0% found this document useful (0 votes)
128 views4 pages

Pandas

This document discusses various pandas operations including: - Creating and manipulating pandas Series and DataFrames - Reading and writing CSV files with pandas - Indexing and selecting data from pandas objects - Cleaning and aggregating data in pandas - Merging DataFrames and appending Series to DataFrames

Uploaded by

dashsidd1
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
128 views4 pages

Pandas

This document discusses various pandas operations including: - Creating and manipulating pandas Series and DataFrames - Reading and writing CSV files with pandas - Indexing and selecting data from pandas objects - Cleaning and aggregating data in pandas - Merging DataFrames and appending Series to DataFrames

Uploaded by

dashsidd1
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 4

Pandas

1. Pandas Data Structures - Hands-on

import pandas as pd
import numpy as np

heights_A = pd.Series([176.2, 158.4, 167.6, 156.2, 161.4])


heights_A.index = ['s1', 's2', 's3', 's4','s5']
print(heights_A.shape)
weights_A = pd.Series([85.1, 90.2, 76.8, 80.4 , 78.9])
weights_A.index = ['s1', 's2', 's3', 's4','s5']
print(weights_A.dtype)
df_A = pd.DataFrame()
df_A['Student_height'] = heights_A
df_A['Student_weight'] = weights_A
print(df_A.shape)
np.random.seed(100)
x=np.random.normal(loc=170.0,scale=25.0,size=5)
np.random.seed(100)
heights_B=pd.Series(x,index=['s1','s2','s3','s4','s5'])
np.random.seed(100)
y=np.random.normal(loc=75.0,scale=12.0,size=5)
weights_B=pd.Series(y,index=['s1','s2','s3','s4','s5'])
print(heights_B.mean())
df_B = pd.DataFrame({'Student_height': heights_B,'Student_weight':weights_B}, index
= weights_B.index)
print(df_B.columns)

2. Working with CSVs

import pandas as pd
import numpy as np
import os

heights_A = pd.Series([176.2, 158.4, 167.6, 156.2, 161.4])


heights_A.index = ['s1', 's2', 's3', 's4','s5']
print(heights_A.shape)
weights_A = pd.Series([85.1, 90.2, 76.8, 80.4 , 78.9])
weights_A.index = ['s1', 's2', 's3', 's4','s5']
print(weights_A.dtype)
df_A = pd.DataFrame()
df_A['Student_height'] = heights_A
df_A['Student_weight'] = weights_A
df_A.to_csv('classA.csv')
os.system("cat classA.csv")
df_A2=pd.read_csv('classA.csv')
print(df_A2)
df_A3=pd.read_csv("classA.csv", index_col=0)
print(df_A3)
np.random.seed(100)
x=np.random.normal(loc=170.0,scale=25.0,size=5)
np.random.seed(100)
heights_B=pd.Series(x,index=['s1','s2','s3','s4','s5'])
np.random.seed(100)
y=np.random.normal(loc=75.0,scale=12.0,size=5)
weights_B=pd.Series(y,index=['s1','s2','s3','s4','s5'])
df_B = pd.DataFrame({'Student_height': heights_B,'Student_weight':weights_B}, index
= weights_B.index)
df_B.to_csv("classB.csv",index=False)
os.system("cat classB.csv")

df_B2=pd.read_csv("classB.csv")
print(df_B2)

df_B3=pd.read_csv("classB.csv", header=None)
print(df_B3)

df_B4=pd.read_csv("classB.csv", header=None, skiprows=2)


print(df_B4)

3. Hands-on with Indexes

import pandas as pd

dates = pd.date_range(start= '2017/09/01', end='2017/09/15')


print(dates[2])
datelist = ['14-Sep-2017', '9-Sep-2017']
dates_to_be_searched = pd.to_datetime(datelist)
print(dates_to_be_searched)
print(dates_to_be_searched.isin(dates))
arraylist = [['classA']*5 + ['classB']*5, ['s1', 's2', 's3', 's4', 's5']*2]

mi_index = pd.MultiIndex.from_arrays(arraylist)
print(mi_index.levels)

4. Access Elements in Data Structures

#Write your code here


import pandas as pd
import numpy as np

heights_A = pd.Series([176.2, 158.4, 167.6, 156.2, 161.4])


heights_A.index = ['s1', 's2', 's3', 's4','s5']
print(heights_A[1])
print(heights_A[[1,2,3]])

weights_A = pd.Series([85.1, 90.2, 76.8, 80.4 , 78.9])


weights_A.index = ['s1', 's2', 's3', 's4','s5']
df_A = pd.DataFrame()
df_A['Student_height'] = heights_A
df_A['Student_weight'] = weights_A
height = df_A['Student_height']
print(type(height))

df_s1s2 = df_A.iloc[[0,1]]
print(df_s1s2)

df_s2s5s1 = df_A.iloc[[1,4,0]]
print(df_s2s5s1)

df_s1s4 = df_A.loc[(df_A.index.str.endswith('1') | df_A.index.str.endswith('4'))]


print(df_s1s4)

5. Data Cleaning - Handson

#Write your code here


import pandas as pd
import numpy as np

heights_A = pd.Series([176.2, 158.4, 167.6, 156.2, 161.4])


heights_A.index = ['s1', 's2', 's3', 's4','s5']

weights_A = pd.Series([85.1, 90.2, 76.8, 80.4 , 78.9])


weights_A.index = ['s1', 's2', 's3', 's4','s5']
df_A = pd.DataFrame()
df_A['Student_height'] = heights_A
df_A['Student_weight'] = weights_A

df_A.loc['s3'] = np.nan
df_A.loc['s5'][1]= np.nan

df_A2 = df_A.dropna(how ='any')


print(df_A2)

6. Data Aggregation - Handson

import pandas as pd
import numpy as np

heights_A = pd.Series([176.2, 158.4, 167.6, 156.2, 161.4])


heights_A.index = ['s1', 's2', 's3', 's4','s5']

weights_A = pd.Series([85.1, 90.2, 76.8, 80.4 , 78.9])


weights_A.index = ['s1', 's2', 's3', 's4','s5']
df_A = pd.DataFrame()
df_A['Student_height'] = heights_A
df_A['Student_weight'] = weights_A

df_A_filter1 = df_A[(df_A.Student_height > 160.0) & (df_A.Student_weight < 80.0)]


print(df_A_filter1)

df_A_filter2 = df_A[df_A.index.isin(['s5'])]
print(df_A_filter2)

df_A['Gender'] = ['M', 'F', 'M', 'M', 'F']


df_groups = df_A.groupby('Gender')
print(df_groups.mean())

7.

import pandas as pd
import numpy as np

nameid = pd.Series(range(101, 111))


name = pd.Series(['person' + str(i) for i in range(1, 11)])
master = pd.DataFrame()
master['nameid'] = nameid
master['name'] = name

transaction = pd.DataFrame({'nameid':[108, 108, 108,103], 'product':['iPhone',


'Nokia', 'Micromax', 'Vivo']})
mdf = pd.merge(master,transaction,on='nameid')
print(mdf)

8.
#Write your code here
import pandas as pd
import numpy as np

heights_A = pd.Series([176.2, 158.4, 167.6, 156.2, 161.4])


heights_A.index = ['s1', 's2', 's3', 's4','s5']

weights_A = pd.Series([85.1, 90.2, 76.8, 80.4 , 78.9])


weights_A.index = ['s1', 's2', 's3', 's4','s5']
df_A = pd.DataFrame()
df_A['Student_height'] = heights_A
df_A['Student_weight'] = weights_A

df_A['Gender'] = ['M', 'F', 'M', 'M', 'F']


s = pd.Series([165.4, 82.7, 'F'],index=['Student_height', 'Student_weight',
'Gender'],name='s6')

df_AA = df_A.append(s)
print(df_AA)
np.random.seed(100)
x=np.random.normal(loc=170.0,scale=25.0,size=5)
np.random.seed(100)
heights_B=pd.Series(x,index=['s1','s2','s3','s4','s5'])
np.random.seed(100)
y=np.random.normal(loc=75.0,scale=12.0,size=5)
weights_B=pd.Series(y,index=['s1','s2','s3','s4','s5'])
df_B = pd.DataFrame({'Student_height': heights_B,'Student_weight':weights_B}, index
= weights_B.index)
df_B.index = [ 's7', 's8', 's9', 's10', 's11']
df_B['Gender'] = ['F', 'M', 'F', 'F', 'M']

df = pd.concat([df_AA,df_B])
print(df)

You might also like