Pandas基本操作笔记
Python Data Analysis Library 或 pandas 是基于NumPy 的一种工具,该工具是为了解决数据分析任务而创建的。Pandas 纳入了大量库和一些标准的数据模型,提供了高效地操作大型数据集所需的工具。pandas提供了大量能使我们快速便捷地处理数据的函数和方法。你很快就会发现,它是使Python成为强大而高效的数据分析环境的重要因素之一。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
#encoding=utf-8 import numpy as np import pandas as pd def main(): #生成表格 dates=pd.date_range("20170801",periods=8) df=pd.DataFrame(np.random.randn(8,5),index=dates,columns=list("ABCDE"))#生成框架 print df dff=pd.DataFrame({"A":np.random.randint(1,10,8),"B":pd.date_range("20170707",periods=8)}) print dff #基本操作 print df.head(3)#打印前三行 print df.tail(3)#打印后三行 print df.index print df.values print df.T#转置 print df.sort_values("C")#排序C列 升序 print df.sort_index(axis=1,ascending=False)#用属性值排序 降序 print df.describe()#数据的相关信息 print df["A"]#切片 print df[:3] print df["20170801":"20170806"] print df.loc[dates[0]] print df.loc["20170802":"20170806",["B","D"]] print df.at[dates[0],"C"] print df.iloc[1:3,2:4] print df.iloc[1,4]#2行第5列 print df[df.B>0] print df[df<0] print df[df["E"].isin([1,2])] #基本设置 sl=pd.Series(list(range(10,18)),index=pd.date_range("20170801",periods=8)) print sl df["F"]=sl print df df.at[dates[0],"A"]=0 print df df.iat[1,4]=666 df.loc[:,"D"]=np.array([4]*len(df)) print df df2=df.copy()#拷贝 df2[df2<0]=-df2#负数变为正数 print df2 if __name__ == '__main__': main() |
output:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 |
A B C D E 2017-08-01 1.075245 -0.605058 0.712755 2.418159 -0.028940 2017-08-02 -1.513320 -0.517082 1.363238 0.618522 -0.450400 2017-08-03 -2.267711 -2.235939 2.288919 2.608362 -1.181633 2017-08-04 -0.276085 -1.130215 -0.122823 -0.542475 -0.331884 2017-08-05 -0.231365 -0.488244 1.757364 -0.967584 -1.448575 2017-08-06 0.377513 0.051755 0.322172 -0.462239 0.823563 2017-08-07 1.009144 -0.670553 0.262810 -0.865274 -0.721550 2017-08-08 -0.257351 0.984785 -0.297281 -0.230398 0.697477 A B 0 9 2017-07-07 1 3 2017-07-08 2 3 2017-07-09 3 6 2017-07-10 4 6 2017-07-11 5 4 2017-07-12 6 5 2017-07-13 7 6 2017-07-14 A B C D E 2017-08-01 1.075245 -0.605058 0.712755 2.418159 -0.028940 2017-08-02 -1.513320 -0.517082 1.363238 0.618522 -0.450400 2017-08-03 -2.267711 -2.235939 2.288919 2.608362 -1.181633 A B C D E 2017-08-06 0.377513 0.051755 0.322172 -0.462239 0.823563 2017-08-07 1.009144 -0.670553 0.262810 -0.865274 -0.721550 2017-08-08 -0.257351 0.984785 -0.297281 -0.230398 0.697477 DatetimeIndex(['2017-08-01', '2017-08-02', '2017-08-03', '2017-08-04', '2017-08-05', '2017-08-06', '2017-08-07', '2017-08-08'], dtype='datetime64[ns]', freq='D') [[ 1.07524464 -0.60505755 0.71275536 2.41815902 -0.02894002] [-1.51331977 -0.51708246 1.36323759 0.6185221 -0.45040032] [-2.26771071 -2.23593917 2.28891947 2.60836214 -1.1816333 ] [-0.27608484 -1.13021474 -0.12282251 -0.54247504 -0.33188383] [-0.23136532 -0.48824379 1.75736371 -0.96758439 -1.44857541] [ 0.37751303 0.05175454 0.32217176 -0.46223914 0.82356261] [ 1.00914409 -0.67055311 0.26280966 -0.86527427 -0.72155023] [-0.25735124 0.98478455 -0.29728085 -0.23039814 0.69747694]] 2017-08-01 2017-08-02 2017-08-03 2017-08-04 2017-08-05 2017-08-06 \ A 1.075245 -1.513320 -2.267711 -0.276085 -0.231365 0.377513 B -0.605058 -0.517082 -2.235939 -1.130215 -0.488244 0.051755 C 0.712755 1.363238 2.288919 -0.122823 1.757364 0.322172 D 2.418159 0.618522 2.608362 -0.542475 -0.967584 -0.462239 E -0.028940 -0.450400 -1.181633 -0.331884 -1.448575 0.823563 2017-08-07 2017-08-08 A 1.009144 -0.257351 B -0.670553 0.984785 C 0.262810 -0.297281 D -0.865274 -0.230398 E -0.721550 0.697477 A B C D E 2017-08-08 -0.257351 0.984785 -0.297281 -0.230398 0.697477 2017-08-04 -0.276085 -1.130215 -0.122823 -0.542475 -0.331884 2017-08-07 1.009144 -0.670553 0.262810 -0.865274 -0.721550 2017-08-06 0.377513 0.051755 0.322172 -0.462239 0.823563 2017-08-01 1.075245 -0.605058 0.712755 2.418159 -0.028940 2017-08-02 -1.513320 -0.517082 1.363238 0.618522 -0.450400 2017-08-05 -0.231365 -0.488244 1.757364 -0.967584 -1.448575 2017-08-03 -2.267711 -2.235939 2.288919 2.608362 -1.181633 E D C B A 2017-08-01 -0.028940 2.418159 0.712755 -0.605058 1.075245 2017-08-02 -0.450400 0.618522 1.363238 -0.517082 -1.513320 2017-08-03 -1.181633 2.608362 2.288919 -2.235939 -2.267711 2017-08-04 -0.331884 -0.542475 -0.122823 -1.130215 -0.276085 2017-08-05 -1.448575 -0.967584 1.757364 -0.488244 -0.231365 2017-08-06 0.823563 -0.462239 0.322172 0.051755 0.377513 2017-08-07 -0.721550 -0.865274 0.262810 -0.670553 1.009144 2017-08-08 0.697477 -0.230398 -0.297281 0.984785 -0.257351 A B C D E count 8.000000 8.000000 8.000000 8.000000 8.000000 mean -0.260491 -0.576319 0.785894 0.322134 -0.330243 std 1.158991 0.919133 0.928070 1.436732 0.812523 min -2.267711 -2.235939 -0.297281 -0.967584 -1.448575 25% -0.585394 -0.785469 0.166402 -0.623175 -0.836571 50% -0.244358 -0.561070 0.517464 -0.346319 -0.391142 75% 0.535421 -0.353244 1.461769 1.068431 0.152664 max 1.075245 0.984785 2.288919 2.608362 0.823563 2017-08-01 1.075245 2017-08-02 -1.513320 2017-08-03 -2.267711 2017-08-04 -0.276085 2017-08-05 -0.231365 2017-08-06 0.377513 2017-08-07 1.009144 2017-08-08 -0.257351 Freq: D, Name: A, dtype: float64 A B C D E 2017-08-01 1.075245 -0.605058 0.712755 2.418159 -0.028940 2017-08-02 -1.513320 -0.517082 1.363238 0.618522 -0.450400 2017-08-03 -2.267711 -2.235939 2.288919 2.608362 -1.181633 A B C D E 2017-08-01 1.075245 -0.605058 0.712755 2.418159 -0.028940 2017-08-02 -1.513320 -0.517082 1.363238 0.618522 -0.450400 2017-08-03 -2.267711 -2.235939 2.288919 2.608362 -1.181633 2017-08-04 -0.276085 -1.130215 -0.122823 -0.542475 -0.331884 2017-08-05 -0.231365 -0.488244 1.757364 -0.967584 -1.448575 2017-08-06 0.377513 0.051755 0.322172 -0.462239 0.823563 A 1.075245 B -0.605058 C 0.712755 D 2.418159 E -0.028940 Name: 2017-08-01 00:00:00, dtype: float64 B D 2017-08-02 -0.517082 0.618522 2017-08-03 -2.235939 2.608362 2017-08-04 -1.130215 -0.542475 2017-08-05 -0.488244 -0.967584 2017-08-06 0.051755 -0.462239 0.71275536229 C D 2017-08-02 1.363238 0.618522 2017-08-03 2.288919 2.608362 -0.45040032497 A B C D E 2017-08-06 0.377513 0.051755 0.322172 -0.462239 0.823563 2017-08-08 -0.257351 0.984785 -0.297281 -0.230398 0.697477 A B C D E 2017-08-01 NaN -0.605058 NaN NaN -0.028940 2017-08-02 -1.513320 -0.517082 NaN NaN -0.450400 2017-08-03 -2.267711 -2.235939 NaN NaN -1.181633 2017-08-04 -0.276085 -1.130215 -0.122823 -0.542475 -0.331884 2017-08-05 -0.231365 -0.488244 NaN -0.967584 -1.448575 2017-08-06 NaN NaN NaN -0.462239 NaN 2017-08-07 NaN -0.670553 NaN -0.865274 -0.721550 2017-08-08 -0.257351 NaN -0.297281 -0.230398 NaN Empty DataFrame Columns: [A, B, C, D, E] Index: [] 2017-08-01 10 2017-08-02 11 2017-08-03 12 2017-08-04 13 2017-08-05 14 2017-08-06 15 2017-08-07 16 2017-08-08 17 Freq: D, dtype: int64 A B C D E F 2017-08-01 1.075245 -0.605058 0.712755 2.418159 -0.028940 10 2017-08-02 -1.513320 -0.517082 1.363238 0.618522 -0.450400 11 2017-08-03 -2.267711 -2.235939 2.288919 2.608362 -1.181633 12 2017-08-04 -0.276085 -1.130215 -0.122823 -0.542475 -0.331884 13 2017-08-05 -0.231365 -0.488244 1.757364 -0.967584 -1.448575 14 2017-08-06 0.377513 0.051755 0.322172 -0.462239 0.823563 15 2017-08-07 1.009144 -0.670553 0.262810 -0.865274 -0.721550 16 2017-08-08 -0.257351 0.984785 -0.297281 -0.230398 0.697477 17 A B C D E F 2017-08-01 0.000000 -0.605058 0.712755 2.418159 -0.028940 10 2017-08-02 -1.513320 -0.517082 1.363238 0.618522 -0.450400 11 2017-08-03 -2.267711 -2.235939 2.288919 2.608362 -1.181633 12 2017-08-04 -0.276085 -1.130215 -0.122823 -0.542475 -0.331884 13 2017-08-05 -0.231365 -0.488244 1.757364 -0.967584 -1.448575 14 2017-08-06 0.377513 0.051755 0.322172 -0.462239 0.823563 15 2017-08-07 1.009144 -0.670553 0.262810 -0.865274 -0.721550 16 2017-08-08 -0.257351 0.984785 -0.297281 -0.230398 0.697477 17 A B C D E F 2017-08-01 0.000000 -0.605058 0.712755 4 -0.028940 10 2017-08-02 -1.513320 -0.517082 1.363238 4 666.000000 11 2017-08-03 -2.267711 -2.235939 2.288919 4 -1.181633 12 2017-08-04 -0.276085 -1.130215 -0.122823 4 -0.331884 13 2017-08-05 -0.231365 -0.488244 1.757364 4 -1.448575 14 2017-08-06 0.377513 0.051755 0.322172 4 0.823563 15 2017-08-07 1.009144 -0.670553 0.262810 4 -0.721550 16 2017-08-08 -0.257351 0.984785 -0.297281 4 0.697477 17 A B C D E F 2017-08-01 0.000000 0.605058 0.712755 4 0.028940 10 2017-08-02 1.513320 0.517082 1.363238 4 666.000000 11 2017-08-03 2.267711 2.235939 2.288919 4 1.181633 12 2017-08-04 0.276085 1.130215 0.122823 4 0.331884 13 2017-08-05 0.231365 0.488244 1.757364 4 1.448575 14 2017-08-06 0.377513 0.051755 0.322172 4 0.823563 15 2017-08-07 1.009144 0.670553 0.262810 4 0.721550 16 2017-08-08 0.257351 0.984785 0.297281 4 0.697477 17 |
近期评论