>>> import pandas as pd """从列表创建""" >>> pd.Series(range(5)) 00 11 22 33 44 dtype: int64 """自定义索引,用'a','b','c','d'作为1,2,3,4索引""" >>> pd.Series([1,2,3,4],['a','b','c','d']) # [1,2],['a','b','c','d']会报错,也可以写成这样: >>> pd.Series([1,2,3,4],index=['a','b','c','d']) a 1 b 2 c 3 d 4 dtype: int64 """从一个标量做索引""" >>> pd.Series(4,['a','b','c','d']) a 4 b 4 c 4 d 4 dtype: int64 """从字典创建""" >>> pd.Series({'a':1,'b':2,'c':3,'d':4}) a 1 b 2 c 3 d 4 dtype: int64 """从字典创建,然后用自定义的索引给series做索引""" >>> pd.Series({'a':1,'b':2,'c':3,'d':4},index=['d','c','b','a','e']) d 4.0 c 3.0 b 2.0 a 1.0 e NaN dtype: float64 # 是浮点数 """从ndarray创建""" >>> import numpy as np >>> pd.Series(np.arange(5)) 00 11 22 33 44 dtype: int32 """从ndrange创建,然后用自定义的索引给series做索引""" >>> pd.Series(np.arange(5),index=np.arange(0,10,2)) 00 21 42 63 84 dtype: int64
>>> a=pd.Series([1,2,3,4],['a','b','c','d']) """通过自动索引进行切片""" >>> a[:4] a 1 b 2 c 3 d 4 dtype: int64 """对ndarray的运算和操作可用于Series类型""" >>> a[a>a.median()] c 3 d 4 dtype: int64 """对ndarray的运算和操作可用于Series类型""" >>> np.exp(a) a 2.718282 b 7.389056 c 20.085537 d 54.598150 dtype: float64
>>> a=pd.Series([1,2,3,4],['a','b','c','d']) >>> b=pd.Series([3,4,5],['a','y','z']) >>> a+b a 4.0# 4.0=a['a']+b['a']=1+3 b NaN # NaN=a['b']+b['b']=1+NaN c NaN d NaN y NaN z NaN dtype: float64
name属性
Series有一个name属性,Series对象和索引都可以有一个名字,存储在属性.name中。
Series对象属性可以即刻修改
1 2 3 4 5 6 7 8 9 10
>>> a=pd.Series([1,2,3,4],['a','b','c','d']) >>> a.name="Series a" >>> a['a']=11 >>> a['b','c']=22 >>> a a 11 b 22 c 22 d 4 Name: Series a, dtype: int64
>>> import pandas as pd >>> import numpy as np """由一维ndarray创建""" >>> d=pd.DataFrame(np.arange(10).reshape(2,5)) >>> d 01234# 0 1 2 3 4表示自动索引 001234 156789 """由一维ndarray对象字典创建""" # 创建一维ndarray对象字典 >>> dt={'one' : pd.Series([1,2,3],index=['a','b','c']), 'two' : pd.Series([9,8,7,6],index=['a','b','c','d'])} >>> dt {'one': a 1 b 2 c 3 dtype: int64, 'two': a 9 b 8 c 7 d 6 dtype: int64} # 创建DataFrame对象 >>> d=pd.DataFrame(dt) >>> d one two a 1.09 b 2.08 c 3.07 d NaN 6# 缺少的值为NAN """对DataFrame对象表进行拖动,自动补齐""" >>> pd.DataFrame(dt,index=['b','c','d'],columns=['two','three']) two three b 8 NaN c 7 NaN d 6 NaN """由列表类型的字典创建""" >>> dl={'one':[1,2,3,4],'two':[9,8,7,6]} >>> d=pd.DataFrame(dl,index=['a','b','c','d']) >>> d one two a 19 b 28 c 37 d 46
"""由列表类型的字典创建""" >>> dl={'one':[1,2,3,4],'two':[9,8,7,6]} >>> d=pd.DataFrame(dl,index=['a','b','c','d']) >>> d one two a 19 b 28 c 37 d 46 """reindex重排行索引index""" >>> d=d.reindex(index=['d','c','b','a']) >>> d one two d 46 c 37 b 28 a 19 """reindex重排列索引columns""" >>> d=d.reindex(columns=['two','one']) >>> d two one a 91 b 82 c 73 d 64 """reindex只能重排,不能重命名""" >>> d=d.reindex(index=['A','B','C','D']) >>> d one two A NaN NaN B NaN NaN C NaN NaN D NaN NaN """reindex排错了 不可恢复""" >>> d=d.reindex(index=['a','b','c','d']) >>> d one two a NaN NaN b NaN NaN c NaN NaN d NaN NaN """初始化d""" >>> d=pd.DataFrame(dl,index=['a','b','c','d']) """插入新列,200填充""" >>> new_d=d.columns.insert(2,'new') >>> new_d Index(['two', 'one', 'new'], dtype='object') >>> newd=d.reindex(columns=new_d,fill_value=200) >>> newd two one new a 91200 b 82200 c 73200 d 64200
"""由列表类型的字典创建""" >>> dl={'one':[1,2,3,4],'two':[9,8,7,6]} >>> d=pd.DataFrame(dl,index=['a','b','c','d']) >>> d one two a 19 b 28 c 37 d 46 """根据索引修改数据""" >>> dc=d.columns.delete(0) # 删除one列 >>> di=d.index.insert(4,'e') # 在4的位置(d的下边)插入e >>> d_new=d.reindex(index=di,columns=dc,method='ffill') >>> d_new one a 1 b 2 c 3 d 4 e 4 """删除指定行""" >>> d_new=d_new.drop(['e']) >>> d_new one a 1 b 2 c 3 d 4 """删除指定列""" >>> d.drop('two',axis=1) # Series只有0轴,DataFrame有1轴 one a 1 b 2 c 3 d 4
>>> b=pd.DataFrame(np.arange(20).reshape(4,5),index=['c','a','d','b']) >>> b 01234 c 01234 a 56789 d 1011121314 b 1516171819 >>> c=b.sort_index() >>> c 01234 a 56789 b 1516171819 c 01234 d 1011121314 >>> b=c.sort_index(axis=1,ascending=False) >>> b 43210 a 98765 b 1918171615 c 43210 d 1413121110
b=pd.DataFrame(np.arange(20).reshape(4,5),index=['c','a','d','b']) >>> b 01234 c 01234 a 56789 d 1011121314 b 1516171819 """按照某一列的值进行排序""" >>> c=b.sort_values(2,ascending=False) >>> c 01234# 参与排序的一列是 b 1516171819# 17 d 1011121314# 12 a 56789# 7 c 01234# 2 """按照某一行的值进行排序""" >>> c=c.sort_values('a',axis=1,ascending=False) >>> c 43210# 参与排序的一行是 b 1918171615 d 1413121110 a 98765# 9 8 7 6 5 c 43210 """NaN统一放到排序未尾""" >>> a=pd.DataFrame(np.zeros((4,4),dtype=int),index=['c','a','d','b']) >>> a 0123 c 0000 a 0000 d 0000 b 0000 >>> c=a+b >>> c 01234 c 0123 NaN a 5678 NaN d 10111213 NaN b 15161718 NaN >>> c.sort_values('a',axis=1,ascending=False) 32104# 参与排序的一行是 c 3210 NaN a 8765 NaN d 13121110 NaN # 13 12 11 10 b 18171615 NaN
>>> a=pd.DataFrame(np.arange(16).reshape(4,4),index=['c','a','d','b']) >>> a 0123 c 0123 a 4567 d 891011 b 12131415 """依次给出前1、2、…、n个数的和""" >>> a.cumsum() 0123 c 0123 a 46810# 4=0+4 6=1+5 8=6+2 10=3+7 d 12151821# 12=4+8 15=6+9 8=8+10 21=10+11 b 24283236 """依次给出前1、2、...、n个数的积""" >>> a.cumprod() 0123 c 0123 a 051221# 0=0*0 5=5*1 d 045120231# 0=0*0 45=5*9 b 058516803465 """依次给出前1、2、...、n个数的最大值""" >>> a.cummax() 0123 c 0123 a 4567 d 891011 b 12131415 """依次给出前1、2、...、n个数的最小值""" >>> a.cummin() 0123 c 0123 a 0123 d 0123 b 0123
>>> b=pd.DataFrame(np.arange(16).reshape(4,4),index=['c','a','b','d']) >>> b 0123 c 0123 a 4567 b 891011 d 12131415 """依次计算相邻2个行元素的和""" >>> b.rolling(2).sum() 0123 c NaN NaN NaN NaN # NaN=0+NaN a 4.06.08.010.0# 4.0=0+4 b 12.014.016.018.0# 12.0=4.0+8.0 d 20.022.024.026.0 """依次计算相邻3个行元素的和""" >>> b.rolling(3).sum() 0123 c NaN NaN NaN NaN a NaN NaN NaN NaN b 12.015.018.021.0 d 24.027.030.033.0 """依次计算相邻3个列元素的和""" >>> b.rolling(3,axis=1).sum() 0123 c NaN NaN 3.06.0 a NaN NaN 15.018.0 b NaN NaN 27.030.0 d NaN NaN 39.042.0