Tuesday, October 24, 2017

Pandas and basic plotting

Untitled
In [177]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random as rnd
%matplotlib inline
In [4]:
np.arange(0,10, 2)
Out[4]:
array([0, 2, 4, 6, 8])
In [5]:
np.linspace(10,23,6)
Out[5]:
array([ 10. ,  12.6,  15.2,  17.8,  20.4,  23. ])
In [23]:
rnd.seed(121)
rnd.random()
Out[23]:
0.08730148686581662
In [51]:
l1 = [1,2,3,4]
a = np.array(l1)
a = a.reshape(2,2)
print(a)
print(a[:,0])
[[1 2]
 [3 4]]
[1 3]
In [54]:
a.dtype
Out[54]:
dtype('int32')
In [64]:
np.transpose(np.ones(3).reshape(1,3)).dtype
Out[64]:
dtype('float64')
In [66]:
np.random.randn(2)
Out[66]:
array([-0.16930965,  1.30136014])
In [67]:
np.random.uniform(0,1,5)
Out[67]:
array([ 0.78556333,  0.11732278,  0.35629669,  0.27716212,  0.20262334])
In [72]:
plt.plot(np.random.uniform(0,1,5))
Out[72]:
[<matplotlib.lines.Line2D at 0x26aabf22278>]
In [85]:
countries = ['aus', 'Ind', 'pak', 'SL', 'Eng']
happiness_Index = ['8','4','4','6','8.5']

df = pd.DataFrame(happiness_Index, index = countries, columns = ['happiness_Index'])
df
df1 = pd.DataFrame(np.random.randn(5,5), index = "0,1,2,3,4".split(","), columns="a b c d e".split(" "))
df1
Out[85]:
a b c d e
0 0.650215 -0.810062 0.632912 -0.343608 1.385923
1 0.080158 1.485864 0.788142 -0.066174 -0.922874
2 -0.574159 -0.861349 -0.972300 -2.240135 0.591917
3 0.408051 1.422824 -1.108161 0.056692 1.261179
4 -0.634453 -0.140949 0.212248 0.351281 -0.538127
In [96]:
df1.loc[['0']]
Out[96]:
a b c d e
0 0.650215 -0.810062 0.632912 -0.343608 1.385923
In [99]:
df1.loc[['1','2'],['b','c']]
Out[99]:
b c
1 1.485864 0.788142
2 -0.861349 -0.972300
In [108]:
df1.loc[:,['b','c']]
Out[108]:
b c
0 -0.810062 0.632912
1 1.485864 0.788142
2 -0.861349 -0.972300
3 1.422824 -1.108161
4 -0.140949 0.212248
In [109]:
df1 > 0
Out[109]:
a b c d e
0 True False True False True
1 True True True False False
2 False False False False True
3 True True False True True
4 False False True True False
In [111]:
df1[df1 > 0][['a','e']]
Out[111]:
a e
0 0.650215 1.385923
1 0.080158 NaN
2 NaN 0.591917
3 0.408051 1.261179
4 NaN NaN
In [112]:
df1['d'] > 0
Out[112]:
0    False
1    False
2    False
3     True
4     True
Name: d, dtype: bool
In [114]:
df1[df1['d'] > 0]
Out[114]:
a b c d e
3 0.408051 1.422824 -1.108161 0.056692 1.261179
4 -0.634453 -0.140949 0.212248 0.351281 -0.538127
In [136]:
newind = 'a0 a1 a2 a3 a4'.split()
In [137]:
df1['new_index'] = newind
In [138]:
df1
Out[138]:
a b c d e new_index
0 0.650215 -0.810062 0.632912 -0.343608 1.385923 a0
1 0.080158 1.485864 0.788142 -0.066174 -0.922874 a1
2 -0.574159 -0.861349 -0.972300 -2.240135 0.591917 a2
3 0.408051 1.422824 -1.108161 0.056692 1.261179 a3
4 -0.634453 -0.140949 0.212248 0.351281 -0.538127 a4
In [139]:
df1.reset_index()
Out[139]:
index a b c d e new_index
0 0 0.650215 -0.810062 0.632912 -0.343608 1.385923 a0
1 1 0.080158 1.485864 0.788142 -0.066174 -0.922874 a1
2 2 -0.574159 -0.861349 -0.972300 -2.240135 0.591917 a2
3 3 0.408051 1.422824 -1.108161 0.056692 1.261179 a3
4 4 -0.634453 -0.140949 0.212248 0.351281 -0.538127 a4
In [141]:
df1.set_index('new_index')
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-141-4b031fb7ee18> in <module>()
----> 1 df1.set_index('new_index')

TypeError: 'list' object is not callable
In [144]:
from collections import defaultdict
In [146]:
d = defaultdict(list)
d['a'].append(100)
d['a'].append(101)
d['a'].append(102)
d['b'].append(200)
d['b'].append(201)
In [151]:
d['b'].append(np.nan)
d
Out[151]:
defaultdict(list, {'a': [100, 101, 102], 'b': [200, 201, nan]})
In [154]:
df2 = pd.DataFrame(d)
df2
Out[154]:
a b
0 100 200.0
1 101 201.0
2 102 NaN
In [155]:
df2['Countries'] = 'aus ind eng'.split()
df2
Out[155]:
a b Countries
0 100 200.0 aus
1 101 201.0 ind
2 102 NaN eng
In [156]:
byCountry = df2.groupby('Countries')
In [162]:
df3 = pd.DataFrame(np.random.randn(3,3), index = 'a b c'.split(), columns = 'x y z'.split())
df3
Out[162]:
x y z
a -0.505596 1.574099 -0.822803
b 2.362696 0.507077 0.555769
c -0.394249 0.331578 -0.048262
In [171]:
x = np.linspace(0,10,5)
y2 = y**2
y3 = x**3
print(y2)
print(y3)
[  0.00000000e+00   2.44140625e+02   1.56250000e+04   1.77978516e+05
   1.00000000e+06]
[    0.       15.625   125.      421.875  1000.   ]
In [167]:
plt.plot(x,y3)
Out[167]:
[<matplotlib.lines.Line2D at 0x26aac1f9710>]
In [168]:
plt.subplot(1,2,1)
plt.plot(x,y2,'r--')
plt.subplot(1,2,2)
plt.plot(x,y3,'g--')
Out[168]:
[<matplotlib.lines.Line2D at 0x26aac2586d8>]
In [190]:
fig = plt.figure(figsize=(2,1), dpi=50)
axes = fig.add_axes([0,0,2,2])
axes.plot(x, y2, 'b')
Out[190]:
[<matplotlib.lines.Line2D at 0x26aaca5c9b0>]
In [197]:
fig, axes = plt.subplots(nrows=1, ncols=2)
In [204]:
for ax in axes:
    ax.plot(x, y, 'b')
    ax.set_xlabel('x')
    ax.set_ylabel('y')
    ax.set_title('title')
fig
plt.tight_layout()
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-204-f836af9c8ade> in <module>()
      5     ax.set_title('title')
      6 fig
----> 7 plt.tight_layout()

C:\Users\hp\AppData\Local\Continuum\Anaconda3\lib\site-packages\matplotlib\pyplot.py in tight_layout(pad, h_pad, w_pad, rect)
   1385 
   1386     fig = gcf()
-> 1387     fig.tight_layout(pad=pad, h_pad=h_pad, w_pad=w_pad, rect=rect)
   1388 
   1389 

C:\Users\hp\AppData\Local\Continuum\Anaconda3\lib\site-packages\matplotlib\figure.py in tight_layout(self, renderer, pad, h_pad, w_pad, rect)
   1750                                          renderer,
   1751                                          pad=pad, h_pad=h_pad, w_pad=w_pad,
-> 1752                                          rect=rect)
   1753 
   1754         self.subplots_adjust(**kwargs)

C:\Users\hp\AppData\Local\Continuum\Anaconda3\lib\site-packages\matplotlib\tight_layout.py in get_tight_layout_figure(fig, axes_list, subplotspec_list, renderer, pad, h_pad, w_pad, rect)
    320         subplots.append(ax)
    321 
--> 322     max_nrows = max(nrows_list)
    323     max_ncols = max(ncols_list)
    324 

ValueError: max() arg is an empty sequence
<matplotlib.figure.Figure at 0x26aae7c79e8>

lets look at seaborn library

In [205]:
import seaborn as sns
In [206]:
tips = sns.load_dataset('tips')
tips.head()
Out[206]:
total_bill tip sex smoker day time size
0 16.99 1.01 Female No Sun Dinner 2
1 10.34 1.66 Male No Sun Dinner 3
2 21.01 3.50 Male No Sun Dinner 3
3 23.68 3.31 Male No Sun Dinner 2
4 24.59 3.61 Female No Sun Dinner 4
In [207]:
sns.violinplot(x="day", y="total_bill", data=tips,hue='sex',palette='rainbow')
Out[207]:
<matplotlib.axes._subplots.AxesSubplot at 0x26aaf26b710>

1 comment:

5 States data in geoChart