SlideShare a Scribd company logo
data_selection
October 19, 2022
[1]: # Data Selection
[2]: import numpy as np
[3]: # This is weather data recorded in Memphis during summer
(June to September).
# Column 0: month
# Column 1: temperature in Farenheit
# Column 2: precipitation in inches
data = np.array([
[6, 70, 3],
[7, 75, 3],
[6, 85, 4],
[7, 90, 4],
[7, 91, 5],
[8, 85, 2],
[8, 87, 4],
[6, 83, 5],
[8, 77, 3],
[6, 69, 6],
[9, 68, 1],
[6, 80, 6],
[9, 65, 3],
[9, 75, 4],
[9, 80, 5]])
[4]: data.shape
[4]: (15, 3)
[5]: # Select the data for the row 0:
data[0, :]
# row_selection: 0
# column_selection: all
[5]: array([ 6, 70, 3])
1
[6]: # Select the data of column 2:
data[:, 2]
# row_selection: all
# column_selection: 2
[6]: array([3, 3, 4, 4, 5, 2, 4, 5, 3, 6, 1, 6, 3, 4, 5])
[7]: # Get the data for the first five rows.
data[0:5, :]
[7]: array([[ 6, 70, 3],
[ 7, 75, 3],
[ 6, 85, 4],
[ 7, 90, 4],
[ 7, 91, 5]])
[8]: # Get the data for the first five rows,
# and the first two columns.
data[0:5, 0:2]
[8]: array([[ 6, 70],
[ 7, 75],
[ 6, 85],
[ 7, 90],
[ 7, 91]])
[9]: # Get the data for the last two columns,
# and the first five rows.
data[0:5, 1:3]
[9]: array([[70, 3],
[75, 3],
[85, 4],
[90, 4],
[91, 5]])
[10]: # or can be written as
data[:5, 1:]
[10]: array([[70, 3],
[75, 3],
[85, 4],
[90, 4],
[91, 5]])
[11]: # or can be written as
data[:5, -2:]
2
[11]: array([[70, 3],
[75, 3],
[85, 4],
[90, 4],
[91, 5]])
[12]: # Get the last 4 rows
data[-4:, :]
[12]: array([[ 6, 80, 6],
[ 9, 65, 3],
[ 9, 75, 4],
[ 9, 80, 5]])
[13]: # Find the temperature values, and store them in a variable
temp = data[:, 1]
[14]: temp
[14]: array([70, 75, 85, 90, 91, 85, 87, 83, 77, 69, 68, 80, 65,
75, 80])
[15]: # Find the month values, and store them in a variable
month = data[:, 0]
[16]: month
[16]: array([6, 7, 6, 7, 7, 8, 8, 6, 8, 6, 9, 6, 9, 9, 9])
[17]: # Find the maximum temperature
np.max(temp)
[17]: 91
[18]: # Find the index (or position) of the maximum temperature
np.argmax(temp)
[18]: 4
[19]: # Find the month that corresponds to the maximum
temperature
data[np.argmax(temp), 0]
[19]: 7
[20]: m = np.argmax(temp)
data[m, 0]
[20]: 7
3
[21]: # boolean selection
[22]: # Find all the temperatures below 70 degrees
data[temp < 70, 1]
[22]: array([69, 68, 65])
[23]: # Find the months with temperatures below 70 degrees
data[temp < 70, 0]
[23]: array([6, 9, 9])
[24]: np.unique(data[temp < 70, 0])
[24]: array([6, 9])
[25]: # Find all the temperatures for the month of August
data[month == 8, 1]
[25]: array([85, 87, 77])
[26]: # Find the average temperature for August
np.average(data[month == 8, 1])
[26]: 83.0
[27]: # Find the temperatures above 80 for June
data[(month == 6) & (temp > 80), 1]
# & means and
[27]: array([85, 83])
[28]: # Find the temperatures for the months of June, July, and
August
data[month != 9, 1]
[28]: array([70, 75, 85, 90, 91, 85, 87, 83, 77, 69, 80])
[29]: data[(month == 6) | (month == 7) | (month == 8), 1]
[29]: array([70, 75, 85, 90, 91, 85, 87, 83, 77, 69, 80])
[30]: data[(month >= 6) & (month <= 8), 1]
[30]: array([70, 75, 85, 90, 91, 85, 87, 83, 77, 69, 80])
[31]: # Print the average temperature for each month:
for x in [6, 7, 8, 9]:
print(np.average(data[month == x, 1]))
4
77.4
85.33333333333333
83.0
72.0
[32]: # Find the average temperature for each month, and store
it in a list:
[np.average(data[month == x, 1]) for x in [6, 7, 8, 9]]
[32]: [77.4, 85.33333333333333, 83.0, 72.0]
[33]: month
[33]: array([6, 7, 6, 7, 7, 8, 8, 6, 8, 6, 9, 6, 9, 9, 9])
[34]: # Display the the data set sorted by month
data[month.argsort()]
[34]: array([[ 6, 70, 3],
[ 6, 85, 4],
[ 6, 83, 5],
[ 6, 69, 6],
[ 6, 80, 6],
[ 7, 75, 3],
[ 7, 90, 4],
[ 7, 91, 5],
[ 8, 85, 2],
[ 8, 87, 4],
[ 8, 77, 3],
[ 9, 68, 1],
[ 9, 65, 3],
[ 9, 75, 4],
[ 9, 80, 5]])
[35]: month.argsort?
Docstring:
a.argsort(axis=-1, kind=None, order=None)
Returns the indices that would sort this array.
Refer to `numpy.argsort` for full documentation.
See Also
--------
numpy.argsort : equivalent function
Type: builtin_function_or_method
5
[ ]:
6
vis_matplotlib
October 25, 2022
[1]: # data visualization
[2]: import numpy as np
[3]: weather = np.genfromtxt('weather_madrid.csv',
delimiter=',', skip_header=True,␣
↪filling_values=0.0)
[4]: weather
[4]: array([[1.997e+03, 1.000e+00, 1.000e+00, …, 6.000e+00,
0.000e+00,
2.290e+02],
[1.997e+03, 1.000e+00, 2.000e+00, …, 5.000e+00, 0.000e+00,
1.430e+02],
[1.997e+03, 1.000e+00, 3.000e+00, …, 6.000e+00, 0.000e+00,
2.560e+02],
…,
[2.015e+03, 1.200e+01, 2.900e+01, …, 5.000e+00, 0.000e+00,
1.900e+02],
[2.015e+03, 1.200e+01, 3.000e+01, …, 6.000e+00, 0.000e+00,
2.560e+02],
[2.015e+03, 1.200e+01, 3.100e+01, …, 6.000e+00, 0.000e+00,
3.130e+02]])
[5]: np.set_printoptions(suppress=True)
[6]: weather
[6]: array([[1997., 1., 1., …, 6., 0., 229.],
[1997., 1., 2., …, 5., 0., 143.],
[1997., 1., 3., …, 6., 0., 256.],
…,
[2015., 12., 29., …, 5., 0., 190.],
[2015., 12., 30., …, 6., 0., 256.],
[2015., 12., 31., …, 6., 0., 313.]])
[7]: weather[1, :] # row1
1
[7]: array([1997., 1., 2., 7., 3., 0., 6., 3., 0.,
100., 92., 71., 1007., 1003., 997., 10., 9., 4.,
26., 8., 47., 0., 5., 0., 143.])
[8]: import matplotlib.pyplot as plt
[9]: # Plot 1: Plot of monthly average temperatures.
[10]: month = weather[:, 1]
[11]: # step1: find average temperature for january
[12]: np.average(weather[month == 1, 4])
# since column 4 has the mean temperatures.
[12]: 5.688729874776387
[13]: # step 2: find the average temperature for all months, store
them in array Y.
[14]: for x in range(1, 13): # 1 in included in the range, 13 is
excluded.
print(x)
1
2
3
4
5
6
7
8
9
10
11
12
[15]: X = [x for x in range(1, 13)]
X
[15]: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
[16]: Y = [np.average(weather[month == x, 4]) for x in X]
Y
[16]: [5.688729874776387,
6.947069943289225,
10.14874551971326,
12.85925925925926,
16.667235494880547,
2
22.037366548042705,
25.074702886247877,
24.742556917688265,
20.49473684210526,
15.152801358234296,
9.196491228070176,
5.859083191850594]
[17]: # step 3: create a plot
[18]: plt.plot(X, Y)
[18]: [<matplotlib.lines.Line2D at 0x125d1dbbe80>]
[19]: plt.bar(X, Y)
[19]: <BarContainer object of 12 artists>
3
[20]: # Plot 2: Monthly minimum (blue), mean (green), and
maximum (red) recorded␣
↪temperatures.
[21]: X = [x for x in range(1, 13)]
X
[21]: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
[22]: Y_min = [np.min(weather[month == x, 5]) for x in X]
Y_mean = [np.average(weather[month == x, 4]) for x in X]
Y_max = [np.max(weather[month == x, 3]) for x in X]
[23]: plt.plot(X, Y_min, 'b')
plt.plot(X, Y_mean, 'g')
plt.plot(X, Y_max, 'r')
[23]: [<matplotlib.lines.Line2D at 0x125d27a1190>]
4
[24]: Y_min = [np.average(weather[month == x, 5]) for x in X]
Y_mean = [np.average(weather[month == x, 4]) for x in X]
Y_max = [np.average(weather[month == x, 3]) for x in X]
[25]: plt.plot(X, Y_min, 'b')
plt.plot(X, Y_mean, 'g')
plt.plot(X, Y_max, 'r')
[25]: [<matplotlib.lines.Line2D at 0x125d27fbe80>]
5
[26]: # Plot 3: Average yearly temperatures
[27]: # for the months:
# month = weather[:, 1]
# X = [x for x in range(1, 13)]
# Y = [np.average(weather[month == x, 4]) for x in X]
[28]: # for the years?
year = weather[:, 0]
# X = [x for x in range(1997, 2016)]
X = np.unique(year)
X.sort()
Y = [np.average(weather[year == x, 4]) for x in X]
[29]: plt.bar(X, Y)
[29]: <BarContainer object of 19 artists>
6
[30]: # Plot 4: Histogram of number of data points per
temperature interval
# visualize the distribution of the data
[31]: plt.hist(weather[:, 4], bins=5, ec='black')
[31]: (array([ 414., 2013., 1825., 1808., 752.]),
array([-3., 4., 11., 18., 25., 32.]),
<BarContainer object of 5 artists>)
7
[32]: # for example, we have about 2000 temperatures between
4 and 11 degrees (second␣
↪bar).
[ ]:
8
Let data be a dataset having 4 columns.
For example data =
87, 29, 10, 88
93, 95, 40, 67
55, 98, 27, 96
15, 34, 78, 23
12, 58, 11, 20
68, 65, 93, 11
...
Answer the following questions using Python commands.
Your answers must work on any dataset that has 4 columns and
not only on the one provided above.
(a) Select all of row 0: [87, 29, 10, 88].
I am giving you the answer to the first question:
data[0, :]
(b) Select all of column 3: [88, 67, 96, 23, 20, 11].
(c) Select all of rows 2, 3, and 4:
55, 98, 27, 96
15, 34, 78, 23
12, 58, 11, 20.
(d) Find the maximum value in column 3. For the given
example, the value is 96.
(e) Find the row index of the maximum value in column 3. For
the given example the output is 2.
(f) Select the entire row that contains the maximum value for
column 3. For the given example, the row is [55, 98, 27, 96].
(g) Store the values of column 1 in a variable named col1.
(h) Using the variable col1, select all the values in column 1
that are greater than 50: [95, 98, 58, 65].
(i) Using the variable col1, select all the values in column 1 that
are greater than 50 and smaller than 90: [58, 65].
(j) Display the entire data set sorted on column 1. For the given
example, the output is
87, 29, 10, 88
15, 34, 78, 23
12, 58, 11, 20
68, 65, 93, 11
93, 95, 40, 67
55, 98, 27, 96

More Related Content

PPTX
NumPy_Broadcasting Data Science - Python.pptx
PDF
Pandas numpy Related Presentation.pptx.pdf
PPT
Multi dimensional arrays
PDF
R Programming Homework Help
PPTX
Time Series.pptx
PPTX
NUMPY LIBRARY study materials PPT 2.pptx
PDF
CE344L-200365-Lab2.pdf
PDF
C Language Lecture 10
NumPy_Broadcasting Data Science - Python.pptx
Pandas numpy Related Presentation.pptx.pdf
Multi dimensional arrays
R Programming Homework Help
Time Series.pptx
NUMPY LIBRARY study materials PPT 2.pptx
CE344L-200365-Lab2.pdf
C Language Lecture 10

Similar to data_selectionOctober 19, 2022[1] # Data Selection.docx (20)

PDF
Arrays in python
PPTX
07+08slide.pptx
PDF
Programming Fundamentals Arrays and Strings
PPTX
A quick introduction to R
PPTX
Array,MULTI ARRAY, IN C
PPT
Chapter 10.ppt
PPTX
r studio presentation.pptx
PPTX
r studio presentation.pptx
PDF
TENSOR DECOMPOSITION WITH PYTHON
DOCX
R Activity in Biostatistics
PPT
R Programming Intro
PDF
XIIInfo.Pract.PT3277.pdf
PDF
XIIInfo.Pract.PT3277.pdf
PDF
[1062BPY12001] Data analysis with R / week 2
PDF
05_Arrays C plus Programming language22.pdf
PPTX
CSE115 C Programming Multidimensional Array Introduction
PDF
Numpy questions with answers and practice
PDF
Optimization and Mathematical Programming in R and ROI - R Optimization Infra...
PDF
Intoduction to numpy
PDF
Data manipulation on r
Arrays in python
07+08slide.pptx
Programming Fundamentals Arrays and Strings
A quick introduction to R
Array,MULTI ARRAY, IN C
Chapter 10.ppt
r studio presentation.pptx
r studio presentation.pptx
TENSOR DECOMPOSITION WITH PYTHON
R Activity in Biostatistics
R Programming Intro
XIIInfo.Pract.PT3277.pdf
XIIInfo.Pract.PT3277.pdf
[1062BPY12001] Data analysis with R / week 2
05_Arrays C plus Programming language22.pdf
CSE115 C Programming Multidimensional Array Introduction
Numpy questions with answers and practice
Optimization and Mathematical Programming in R and ROI - R Optimization Infra...
Intoduction to numpy
Data manipulation on r
Ad

More from richardnorman90310 (20)

DOCX
BUSI 520Discussion Board Forum InstructionsThreadMarket.docx
DOCX
BUSI 330Collaborative Marketing Plan Final Draft Instructions.docx
DOCX
BUSI 460 – LT Assignment Brief 1 ACADEMIC YEAR 2020 – S.docx
DOCX
BUS475 week#7Diversity in the work environment promotes accept.docx
DOCX
BUS475week#5In Chapter 11 of your textbook, you explored import.docx
DOCX
BUS475week#6Share a recent or current event in which a busine.docx
DOCX
BUS475v10Project PlanBUS475 v10Page 2 of 2Wk 4 – App.docx
DOCX
BUS472L – Unit 2 & 4 AssignmentStudent Name ___________________.docx
DOCX
BUS308 Week 4 Lecture 1 Examining Relationships Expect.docx
DOCX
BUS301 Memo Rubric Spring 2020 - Student.docxBUS301 Writing Ru.docx
DOCX
BUS 206 Milestone Two Template To simplify completi.docx
DOCX
BurkleyFirst edition Chapter 14Situational InfluencesC.docx
DOCX
BurkleyFirst edition Chapter 7BelongingCopyright © 201.docx
DOCX
BurkleyFirst edition Chapter 5AutonomyCopyright © 2018.docx
DOCX
Bunker Hill Community College MAT 093 Foundations of Mathema.docx
DOCX
BurkleyFirst edition Chapter 3Psychological Origins of M.docx
DOCX
Bullying and cyberbullying of adolescents have become increasingly p.docx
DOCX
Building an Information Technology Security Awareness an.docx
DOCX
Building a company with the help of IT is really necessary as most.docx
DOCX
Building a Comprehensive Health HistoryBuild a health histor.docx
BUSI 520Discussion Board Forum InstructionsThreadMarket.docx
BUSI 330Collaborative Marketing Plan Final Draft Instructions.docx
BUSI 460 – LT Assignment Brief 1 ACADEMIC YEAR 2020 – S.docx
BUS475 week#7Diversity in the work environment promotes accept.docx
BUS475week#5In Chapter 11 of your textbook, you explored import.docx
BUS475week#6Share a recent or current event in which a busine.docx
BUS475v10Project PlanBUS475 v10Page 2 of 2Wk 4 – App.docx
BUS472L – Unit 2 & 4 AssignmentStudent Name ___________________.docx
BUS308 Week 4 Lecture 1 Examining Relationships Expect.docx
BUS301 Memo Rubric Spring 2020 - Student.docxBUS301 Writing Ru.docx
BUS 206 Milestone Two Template To simplify completi.docx
BurkleyFirst edition Chapter 14Situational InfluencesC.docx
BurkleyFirst edition Chapter 7BelongingCopyright © 201.docx
BurkleyFirst edition Chapter 5AutonomyCopyright © 2018.docx
Bunker Hill Community College MAT 093 Foundations of Mathema.docx
BurkleyFirst edition Chapter 3Psychological Origins of M.docx
Bullying and cyberbullying of adolescents have become increasingly p.docx
Building an Information Technology Security Awareness an.docx
Building a company with the help of IT is really necessary as most.docx
Building a Comprehensive Health HistoryBuild a health histor.docx
Ad

Recently uploaded (20)

PDF
Indian roads congress 037 - 2012 Flexible pavement
PDF
OBE - B.A.(HON'S) IN INTERIOR ARCHITECTURE -Ar.MOHIUDDIN.pdf
PPTX
Digestion and Absorption of Carbohydrates, Proteina and Fats
PDF
A GUIDE TO GENETICS FOR UNDERGRADUATE MEDICAL STUDENTS
PDF
Complications of Minimal Access Surgery at WLH
PPTX
Chinmaya Tiranga Azadi Quiz (Class 7-8 )
PDF
1_English_Language_Set_2.pdf probationary
PPTX
Orientation - ARALprogram of Deped to the Parents.pptx
PPTX
1st Inaugural Professorial Lecture held on 19th February 2020 (Governance and...
PDF
medical_surgical_nursing_10th_edition_ignatavicius_TEST_BANK_pdf.pdf
PDF
IGGE1 Understanding the Self1234567891011
PDF
LDMMIA Reiki Yoga Finals Review Spring Summer
PDF
RTP_AR_KS1_Tutor's Guide_English [FOR REPRODUCTION].pdf
PPTX
Final Presentation General Medicine 03-08-2024.pptx
PDF
Supply Chain Operations Speaking Notes -ICLT Program
PPTX
202450812 BayCHI UCSC-SV 20250812 v17.pptx
PPTX
Unit 4 Skeletal System.ppt.pptxopresentatiom
PDF
Chinmaya Tiranga quiz Grand Finale.pdf
DOC
Soft-furnishing-By-Architect-A.F.M.Mohiuddin-Akhand.doc
PDF
Empowerment Technology for Senior High School Guide
Indian roads congress 037 - 2012 Flexible pavement
OBE - B.A.(HON'S) IN INTERIOR ARCHITECTURE -Ar.MOHIUDDIN.pdf
Digestion and Absorption of Carbohydrates, Proteina and Fats
A GUIDE TO GENETICS FOR UNDERGRADUATE MEDICAL STUDENTS
Complications of Minimal Access Surgery at WLH
Chinmaya Tiranga Azadi Quiz (Class 7-8 )
1_English_Language_Set_2.pdf probationary
Orientation - ARALprogram of Deped to the Parents.pptx
1st Inaugural Professorial Lecture held on 19th February 2020 (Governance and...
medical_surgical_nursing_10th_edition_ignatavicius_TEST_BANK_pdf.pdf
IGGE1 Understanding the Self1234567891011
LDMMIA Reiki Yoga Finals Review Spring Summer
RTP_AR_KS1_Tutor's Guide_English [FOR REPRODUCTION].pdf
Final Presentation General Medicine 03-08-2024.pptx
Supply Chain Operations Speaking Notes -ICLT Program
202450812 BayCHI UCSC-SV 20250812 v17.pptx
Unit 4 Skeletal System.ppt.pptxopresentatiom
Chinmaya Tiranga quiz Grand Finale.pdf
Soft-furnishing-By-Architect-A.F.M.Mohiuddin-Akhand.doc
Empowerment Technology for Senior High School Guide

data_selectionOctober 19, 2022[1] # Data Selection.docx

  • 1. data_selection October 19, 2022 [1]: # Data Selection [2]: import numpy as np [3]: # This is weather data recorded in Memphis during summer (June to September). # Column 0: month # Column 1: temperature in Farenheit # Column 2: precipitation in inches data = np.array([ [6, 70, 3], [7, 75, 3], [6, 85, 4], [7, 90, 4], [7, 91, 5], [8, 85, 2], [8, 87, 4], [6, 83, 5], [8, 77, 3], [6, 69, 6], [9, 68, 1], [6, 80, 6], [9, 65, 3], [9, 75, 4], [9, 80, 5]]) [4]: data.shape
  • 2. [4]: (15, 3) [5]: # Select the data for the row 0: data[0, :] # row_selection: 0 # column_selection: all [5]: array([ 6, 70, 3]) 1 [6]: # Select the data of column 2: data[:, 2] # row_selection: all # column_selection: 2 [6]: array([3, 3, 4, 4, 5, 2, 4, 5, 3, 6, 1, 6, 3, 4, 5]) [7]: # Get the data for the first five rows. data[0:5, :] [7]: array([[ 6, 70, 3], [ 7, 75, 3], [ 6, 85, 4], [ 7, 90, 4], [ 7, 91, 5]]) [8]: # Get the data for the first five rows, # and the first two columns. data[0:5, 0:2] [8]: array([[ 6, 70], [ 7, 75],
  • 3. [ 6, 85], [ 7, 90], [ 7, 91]]) [9]: # Get the data for the last two columns, # and the first five rows. data[0:5, 1:3] [9]: array([[70, 3], [75, 3], [85, 4], [90, 4], [91, 5]]) [10]: # or can be written as data[:5, 1:] [10]: array([[70, 3], [75, 3], [85, 4], [90, 4], [91, 5]]) [11]: # or can be written as data[:5, -2:] 2 [11]: array([[70, 3], [75, 3], [85, 4], [90, 4], [91, 5]])
  • 4. [12]: # Get the last 4 rows data[-4:, :] [12]: array([[ 6, 80, 6], [ 9, 65, 3], [ 9, 75, 4], [ 9, 80, 5]]) [13]: # Find the temperature values, and store them in a variable temp = data[:, 1] [14]: temp [14]: array([70, 75, 85, 90, 91, 85, 87, 83, 77, 69, 68, 80, 65, 75, 80]) [15]: # Find the month values, and store them in a variable month = data[:, 0] [16]: month [16]: array([6, 7, 6, 7, 7, 8, 8, 6, 8, 6, 9, 6, 9, 9, 9]) [17]: # Find the maximum temperature np.max(temp) [17]: 91 [18]: # Find the index (or position) of the maximum temperature np.argmax(temp) [18]: 4 [19]: # Find the month that corresponds to the maximum temperature data[np.argmax(temp), 0]
  • 5. [19]: 7 [20]: m = np.argmax(temp) data[m, 0] [20]: 7 3 [21]: # boolean selection [22]: # Find all the temperatures below 70 degrees data[temp < 70, 1] [22]: array([69, 68, 65]) [23]: # Find the months with temperatures below 70 degrees data[temp < 70, 0] [23]: array([6, 9, 9]) [24]: np.unique(data[temp < 70, 0]) [24]: array([6, 9]) [25]: # Find all the temperatures for the month of August data[month == 8, 1] [25]: array([85, 87, 77]) [26]: # Find the average temperature for August np.average(data[month == 8, 1])
  • 6. [26]: 83.0 [27]: # Find the temperatures above 80 for June data[(month == 6) & (temp > 80), 1] # & means and [27]: array([85, 83]) [28]: # Find the temperatures for the months of June, July, and August data[month != 9, 1] [28]: array([70, 75, 85, 90, 91, 85, 87, 83, 77, 69, 80]) [29]: data[(month == 6) | (month == 7) | (month == 8), 1] [29]: array([70, 75, 85, 90, 91, 85, 87, 83, 77, 69, 80]) [30]: data[(month >= 6) & (month <= 8), 1] [30]: array([70, 75, 85, 90, 91, 85, 87, 83, 77, 69, 80]) [31]: # Print the average temperature for each month: for x in [6, 7, 8, 9]: print(np.average(data[month == x, 1])) 4 77.4 85.33333333333333 83.0 72.0
  • 7. [32]: # Find the average temperature for each month, and store it in a list: [np.average(data[month == x, 1]) for x in [6, 7, 8, 9]] [32]: [77.4, 85.33333333333333, 83.0, 72.0] [33]: month [33]: array([6, 7, 6, 7, 7, 8, 8, 6, 8, 6, 9, 6, 9, 9, 9]) [34]: # Display the the data set sorted by month data[month.argsort()] [34]: array([[ 6, 70, 3], [ 6, 85, 4], [ 6, 83, 5], [ 6, 69, 6], [ 6, 80, 6], [ 7, 75, 3], [ 7, 90, 4], [ 7, 91, 5], [ 8, 85, 2], [ 8, 87, 4], [ 8, 77, 3], [ 9, 68, 1], [ 9, 65, 3], [ 9, 75, 4], [ 9, 80, 5]]) [35]: month.argsort? Docstring: a.argsort(axis=-1, kind=None, order=None) Returns the indices that would sort this array.
  • 8. Refer to `numpy.argsort` for full documentation. See Also -------- numpy.argsort : equivalent function Type: builtin_function_or_method 5 [ ]: 6 vis_matplotlib October 25, 2022 [1]: # data visualization [2]: import numpy as np [3]: weather = np.genfromtxt('weather_madrid.csv', delimiter=',', skip_header=True,␣ ↪filling_values=0.0) [4]: weather [4]: array([[1.997e+03, 1.000e+00, 1.000e+00, …, 6.000e+00, 0.000e+00, 2.290e+02],
  • 9. [1.997e+03, 1.000e+00, 2.000e+00, …, 5.000e+00, 0.000e+00, 1.430e+02], [1.997e+03, 1.000e+00, 3.000e+00, …, 6.000e+00, 0.000e+00, 2.560e+02], …, [2.015e+03, 1.200e+01, 2.900e+01, …, 5.000e+00, 0.000e+00, 1.900e+02], [2.015e+03, 1.200e+01, 3.000e+01, …, 6.000e+00, 0.000e+00, 2.560e+02], [2.015e+03, 1.200e+01, 3.100e+01, …, 6.000e+00, 0.000e+00, 3.130e+02]]) [5]: np.set_printoptions(suppress=True) [6]: weather [6]: array([[1997., 1., 1., …, 6., 0., 229.], [1997., 1., 2., …, 5., 0., 143.], [1997., 1., 3., …, 6., 0., 256.], …, [2015., 12., 29., …, 5., 0., 190.], [2015., 12., 30., …, 6., 0., 256.], [2015., 12., 31., …, 6., 0., 313.]]) [7]: weather[1, :] # row1 1 [7]: array([1997., 1., 2., 7., 3., 0., 6., 3., 0., 100., 92., 71., 1007., 1003., 997., 10., 9., 4.,
  • 10. 26., 8., 47., 0., 5., 0., 143.]) [8]: import matplotlib.pyplot as plt [9]: # Plot 1: Plot of monthly average temperatures. [10]: month = weather[:, 1] [11]: # step1: find average temperature for january [12]: np.average(weather[month == 1, 4]) # since column 4 has the mean temperatures. [12]: 5.688729874776387 [13]: # step 2: find the average temperature for all months, store them in array Y. [14]: for x in range(1, 13): # 1 in included in the range, 13 is excluded. print(x) 1 2 3 4 5 6 7 8 9 10 11 12 [15]: X = [x for x in range(1, 13)]
  • 11. X [15]: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] [16]: Y = [np.average(weather[month == x, 4]) for x in X] Y [16]: [5.688729874776387, 6.947069943289225, 10.14874551971326, 12.85925925925926, 16.667235494880547, 2 22.037366548042705, 25.074702886247877, 24.742556917688265, 20.49473684210526, 15.152801358234296, 9.196491228070176, 5.859083191850594] [17]: # step 3: create a plot [18]: plt.plot(X, Y) [18]: [<matplotlib.lines.Line2D at 0x125d1dbbe80>] [19]: plt.bar(X, Y) [19]: <BarContainer object of 12 artists> 3
  • 12. [20]: # Plot 2: Monthly minimum (blue), mean (green), and maximum (red) recorded␣ ↪temperatures. [21]: X = [x for x in range(1, 13)] X [21]: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] [22]: Y_min = [np.min(weather[month == x, 5]) for x in X] Y_mean = [np.average(weather[month == x, 4]) for x in X] Y_max = [np.max(weather[month == x, 3]) for x in X] [23]: plt.plot(X, Y_min, 'b') plt.plot(X, Y_mean, 'g') plt.plot(X, Y_max, 'r') [23]: [<matplotlib.lines.Line2D at 0x125d27a1190>] 4 [24]: Y_min = [np.average(weather[month == x, 5]) for x in X] Y_mean = [np.average(weather[month == x, 4]) for x in X] Y_max = [np.average(weather[month == x, 3]) for x in X] [25]: plt.plot(X, Y_min, 'b') plt.plot(X, Y_mean, 'g') plt.plot(X, Y_max, 'r') [25]: [<matplotlib.lines.Line2D at 0x125d27fbe80>]
  • 13. 5 [26]: # Plot 3: Average yearly temperatures [27]: # for the months: # month = weather[:, 1] # X = [x for x in range(1, 13)] # Y = [np.average(weather[month == x, 4]) for x in X] [28]: # for the years? year = weather[:, 0] # X = [x for x in range(1997, 2016)] X = np.unique(year) X.sort() Y = [np.average(weather[year == x, 4]) for x in X] [29]: plt.bar(X, Y) [29]: <BarContainer object of 19 artists> 6 [30]: # Plot 4: Histogram of number of data points per temperature interval # visualize the distribution of the data [31]: plt.hist(weather[:, 4], bins=5, ec='black') [31]: (array([ 414., 2013., 1825., 1808., 752.]), array([-3., 4., 11., 18., 25., 32.]), <BarContainer object of 5 artists>)
  • 14. 7 [32]: # for example, we have about 2000 temperatures between 4 and 11 degrees (second␣ ↪bar). [ ]: 8 Let data be a dataset having 4 columns. For example data = 87, 29, 10, 88 93, 95, 40, 67 55, 98, 27, 96 15, 34, 78, 23 12, 58, 11, 20 68, 65, 93, 11 ... Answer the following questions using Python commands. Your answers must work on any dataset that has 4 columns and not only on the one provided above. (a) Select all of row 0: [87, 29, 10, 88]. I am giving you the answer to the first question: data[0, :] (b) Select all of column 3: [88, 67, 96, 23, 20, 11]. (c) Select all of rows 2, 3, and 4: 55, 98, 27, 96 15, 34, 78, 23 12, 58, 11, 20. (d) Find the maximum value in column 3. For the given example, the value is 96. (e) Find the row index of the maximum value in column 3. For
  • 15. the given example the output is 2. (f) Select the entire row that contains the maximum value for column 3. For the given example, the row is [55, 98, 27, 96]. (g) Store the values of column 1 in a variable named col1. (h) Using the variable col1, select all the values in column 1 that are greater than 50: [95, 98, 58, 65]. (i) Using the variable col1, select all the values in column 1 that are greater than 50 and smaller than 90: [58, 65]. (j) Display the entire data set sorted on column 1. For the given example, the output is 87, 29, 10, 88 15, 34, 78, 23 12, 58, 11, 20 68, 65, 93, 11 93, 95, 40, 67 55, 98, 27, 96