10 分钟入门 pandas

1
2
3
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

Object Creation

1
s = pd.Series([1, 3, 5, np.nan, 6, 8])
1
s
0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64
1
2
dates = pd.date_range('20130101', periods=6)
dates
DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')
1
2
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))
df
























































A B C D
2013-01-01 0.579917 -1.406484 0.584294 -0.939104
2013-01-02 0.533225 -0.233655 -0.084990 0.084715
2013-01-03 0.056030 -1.618568 1.715577 1.335031
2013-01-04 0.950231 -0.370937 -0.747215 0.111698
2013-01-05 1.095598 0.577610 0.016606 -1.251764
2013-01-06 0.730875 -0.163791 -0.642668 1.419975

1
2
3
4
5
6
7
df2 = pd.DataFrame({'A':1, 
'B':pd.Timestamp('20130102'),
'C':pd.Series(1, index=list(range(4)), dtype='float32'),
'D':np.array([3] * 4, dtype='int32'),
'E':pd.Categorical(['test', 'train', 'test', 'train']),
'F':'foo'})
df2




















































A B C D E F
0 1 2013-01-02 1.0 3 test foo
1 1 2013-01-02 1.0 3 train foo
2 1 2013-01-02 1.0 3 test foo
3 1 2013-01-02 1.0 3 train foo

1
df2.dtypes
A             int64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

Viewing Data

1
df.head()

















































A B C D
2013-01-01 0.579917 -1.406484 0.584294 -0.939104
2013-01-02 0.533225 -0.233655 -0.084990 0.084715
2013-01-03 0.056030 -1.618568 1.715577 1.335031
2013-01-04 0.950231 -0.370937 -0.747215 0.111698
2013-01-05 1.095598 0.577610 0.016606 -1.251764

1
df.tail(3)



































A B C D
2013-01-04 0.950231 -0.370937 -0.747215 0.111698
2013-01-05 1.095598 0.577610 0.016606 -1.251764
2013-01-06 0.730875 -0.163791 -0.642668 1.419975

1
df.index
DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')
1
df.columns
Index(['A', 'B', 'C', 'D'], dtype='object')
1
df.values
array([[ 0.57991721, -1.4064839 ,  0.58429379, -0.9391043 ],
       [ 0.53322459, -0.23365533, -0.08499029,  0.08471479],
       [ 0.05603044, -1.61856825,  1.71557651,  1.33503104],
       [ 0.95023069, -0.37093663, -0.74721488,  0.11169788],
       [ 1.09559791,  0.5776104 ,  0.01660636, -1.25176432],
       [ 0.73087463, -0.1637906 , -0.64266787,  1.41997504]])
1
df.describe()






































































A B C D
count 6.000000 6.000000 6.000000 6.000000
mean 0.657646 -0.535971 0.140267 0.126758
std 0.365030 0.827892 0.910539 1.110920
min 0.056030 -1.618568 -0.747215 -1.251764
25% 0.544898 -1.147597 -0.503248 -0.683150
50% 0.655396 -0.302296 -0.034192 0.098206
75% 0.895392 -0.181257 0.442372 1.029198
max 1.095598 0.577610 1.715577 1.419975

1
df.T




















































2013-01-01 00:00:00 2013-01-02 00:00:00 2013-01-03 00:00:00 2013-01-04 00:00:00 2013-01-05 00:00:00 2013-01-06 00:00:00
A 0.579917 0.533225 0.056030 0.950231 1.095598 0.730875
B -1.406484 -0.233655 -1.618568 -0.370937 0.577610 -0.163791
C 0.584294 -0.084990 1.715577 -0.747215 0.016606 -0.642668
D -0.939104 0.084715 1.335031 0.111698 -1.251764 1.419975

1
df.sort_index(axis=1, ascending=False)
























































D C B A
2013-01-01 -0.939104 0.584294 -1.406484 0.579917
2013-01-02 0.084715 -0.084990 -0.233655 0.533225
2013-01-03 1.335031 1.715577 -1.618568 0.056030
2013-01-04 0.111698 -0.747215 -0.370937 0.950231
2013-01-05 -1.251764 0.016606 0.577610 1.095598
2013-01-06 1.419975 -0.642668 -0.163791 0.730875

1
df.sort_index(axis=0, ascending=False)
























































A B C D
2013-01-06 0.730875 -0.163791 -0.642668 1.419975
2013-01-05 1.095598 0.577610 0.016606 -1.251764
2013-01-04 0.950231 -0.370937 -0.747215 0.111698
2013-01-03 0.056030 -1.618568 1.715577 1.335031
2013-01-02 0.533225 -0.233655 -0.084990 0.084715
2013-01-01 0.579917 -1.406484 0.584294 -0.939104

1
df.sort_values(by='B')
























































A B C D
2013-01-03 0.056030 -1.618568 1.715577 1.335031
2013-01-01 0.579917 -1.406484 0.584294 -0.939104
2013-01-04 0.950231 -0.370937 -0.747215 0.111698
2013-01-02 0.533225 -0.233655 -0.084990 0.084715
2013-01-06 0.730875 -0.163791 -0.642668 1.419975
2013-01-05 1.095598 0.577610 0.016606 -1.251764

Selection

虽然用于选择和设置的标准Python / Numpy表达式非常直观,可用于交互式工作,但对于生产代码,我们推荐优化的 pandas 数据访问方法 .at,.iat,.loc,.iloc和.ix

Getting

1
df['A']
2013-01-01    0.579917
2013-01-02    0.533225
2013-01-03    0.056030
2013-01-04    0.950231
2013-01-05    1.095598
2013-01-06    0.730875
Freq: D, Name: A, dtype: float64
1
df[0:3]



































A B C D
2013-01-01 0.579917 -1.406484 0.584294 -0.939104
2013-01-02 0.533225 -0.233655 -0.084990 0.084715
2013-01-03 0.056030 -1.618568 1.715577 1.335031

1
df['20130102':'20130104']



































A B C D
2013-01-02 0.533225 -0.233655 -0.084990 0.084715
2013-01-03 0.056030 -1.618568 1.715577 1.335031
2013-01-04 0.950231 -0.370937 -0.747215 0.111698

Selection by Label

1
df.loc[dates[0]]
A    0.579917
B   -1.406484
C    0.584294
D   -0.939104
Name: 2013-01-01 00:00:00, dtype: float64
1
df.loc[:, ['A', 'B']]










































A B
2013-01-01 0.579917 -1.406484
2013-01-02 0.533225 -0.233655
2013-01-03 0.056030 -1.618568
2013-01-04 0.950231 -0.370937
2013-01-05 1.095598 0.577610
2013-01-06 0.730875 -0.163791

1
df.loc['20130102':'20130104', ['A', 'B']]



























A B
2013-01-02 0.533225 -0.233655
2013-01-03 0.056030 -1.618568
2013-01-04 0.950231 -0.370937

1
df.loc['20130102', ['A', 'B']]
A    0.533225
B   -0.233655
Name: 2013-01-02 00:00:00, dtype: float64
1
df.loc[dates[0], 'A']
0.57991721046639744

For getting fast access to a scalar (equiv to the prior method)

1
df.at[dates[0], 'A']
0.57991721046639744

Selection by Position

1
df.iloc[3]
A    0.950231
B   -0.370937
C   -0.747215
D    0.111698
Name: 2013-01-04 00:00:00, dtype: float64
1
df
























































A B C D
2013-01-01 0.579917 -1.406484 0.584294 -0.939104
2013-01-02 0.533225 -0.233655 -0.084990 0.084715
2013-01-03 0.056030 -1.618568 1.715577 1.335031
2013-01-04 0.950231 -0.370937 -0.747215 0.111698
2013-01-05 1.095598 0.577610 0.016606 -1.251764
2013-01-06 0.730875 -0.163791 -0.642668 1.419975

1
df.iloc[3:5, 0:2]






















A B
2013-01-04 0.950231 -0.370937
2013-01-05 1.095598 0.577610

1
df.iloc[[1, 2, 4], [0, 2]]



























A C
2013-01-02 0.533225 -0.084990
2013-01-03 0.056030 1.715577
2013-01-05 1.095598 0.016606

1
df.iloc[1:3, :]




























A B C D
2013-01-02 0.533225 -0.233655 -0.084990 0.084715
2013-01-03 0.056030 -1.618568 1.715577 1.335031

1
df.iloc[:, 1:3]










































B C
2013-01-01 -1.406484 0.584294
2013-01-02 -0.233655 -0.084990
2013-01-03 -1.618568 1.715577
2013-01-04 -0.370937 -0.747215
2013-01-05 0.577610 0.016606
2013-01-06 -0.163791 -0.642668

1
df.iloc[1, 1]
-0.2336553320711513
1
df.iat[1, 1]
-0.2336553320711513

Boolean Indexing

1
df[df.B > 0]





















A B C D
2013-01-05 1.095598 0.57761 0.016606 -1.251764

1
df[df > 0]
























































A B C D
2013-01-01 0.579917 NaN 0.584294 NaN
2013-01-02 0.533225 NaN NaN 0.084715
2013-01-03 0.056030 NaN 1.715577 1.335031
2013-01-04 0.950231 NaN NaN 0.111698
2013-01-05 1.095598 0.57761 0.016606 NaN
2013-01-06 0.730875 NaN NaN 1.419975

1
2
3
df2 = df.copy()
df2['E'] = ['one', 'one', 'two', 'three', 'four', 'three']
df2































































A B C D E
2013-01-01 0.579917 -1.406484 0.584294 -0.939104 one
2013-01-02 0.533225 -0.233655 -0.084990 0.084715 one
2013-01-03 0.056030 -1.618568 1.715577 1.335031 two
2013-01-04 0.950231 -0.370937 -0.747215 0.111698 three
2013-01-05 1.095598 0.577610 0.016606 -1.251764 four
2013-01-06 0.730875 -0.163791 -0.642668 1.419975 three

1
df2[df2['E'].isin(['two', 'four'])]































A B C D E
2013-01-03 0.056030 -1.618568 1.715577 1.335031 two
2013-01-05 1.095598 0.577610 0.016606 -1.251764 four

Setting

1
s1 = pd.Series([1, 2, 3, 4, 5, 6], index=pd.date_range('20130102', periods=6))
1
s1
2013-01-02    1
2013-01-03    2
2013-01-04    3
2013-01-05    4
2013-01-06    5
2013-01-07    6
Freq: D, dtype: int64
1
df.at[dates[0], 'A'] = 0
1
df
























































A B C D
2013-01-01 0.000000 -1.406484 0.584294 -0.939104
2013-01-02 0.533225 -0.233655 -0.084990 0.084715
2013-01-03 0.056030 -1.618568 1.715577 1.335031
2013-01-04 0.950231 -0.370937 -0.747215 0.111698
2013-01-05 1.095598 0.577610 0.016606 -1.251764
2013-01-06 0.730875 -0.163791 -0.642668 1.419975

1
df.iat[0, 1] = 0
1
df
























































A B C D
2013-01-01 0.000000 0.000000 0.584294 -0.939104
2013-01-02 0.533225 -0.233655 -0.084990 0.084715
2013-01-03 0.056030 -1.618568 1.715577 1.335031
2013-01-04 0.950231 -0.370937 -0.747215 0.111698
2013-01-05 1.095598 0.577610 0.016606 -1.251764
2013-01-06 0.730875 -0.163791 -0.642668 1.419975

1
df.loc[:, 'D'] = np.array([5] * len(df))
1
df
























































A B C D
2013-01-01 0.000000 0.000000 0.584294 5
2013-01-02 0.533225 -0.233655 -0.084990 5
2013-01-03 0.056030 -1.618568 1.715577 5
2013-01-04 0.950231 -0.370937 -0.747215 5
2013-01-05 1.095598 0.577610 0.016606 5
2013-01-06 0.730875 -0.163791 -0.642668 5

1
2
3
df2 = df.copy()
df2[df2 > 0] = -df2
df2
























































A B C D
2013-01-01 0.000000 0.000000 -0.584294 -5
2013-01-02 -0.533225 -0.233655 -0.084990 -5
2013-01-03 -0.056030 -1.618568 -1.715577 -5
2013-01-04 -0.950231 -0.370937 -0.747215 -5
2013-01-05 -1.095598 -0.577610 -0.016606 -5
2013-01-06 -0.730875 -0.163791 -0.642668 -5

Missing Data

pandas primarily uses the value np.nan to represent missing data. It is by default not included in computations.

1
df1 = df.reindex(index=dates[0:4],columns=list(df.columns) + ['E'])
1
df1















































A B C D E
2013-01-01 0.000000 0.000000 0.584294 5 NaN
2013-01-02 0.533225 -0.233655 -0.084990 5 NaN
2013-01-03 0.056030 -1.618568 1.715577 5 NaN
2013-01-04 0.950231 -0.370937 -0.747215 5 NaN

1
2
df1.loc[dates[0]:dates[1], 'E'] = 1
df1















































A B C D E
2013-01-01 0.000000 0.000000 0.584294 5 1.0
2013-01-02 0.533225 -0.233655 -0.084990 5 1.0
2013-01-03 0.056030 -1.618568 1.715577 5 NaN
2013-01-04 0.950231 -0.370937 -0.747215 5 NaN

To drop any rows that have missing data.

1
df1.dropna(how='any')































A B C D E
2013-01-01 0.000000 0.000000 0.584294 5 1.0
2013-01-02 0.533225 -0.233655 -0.084990 5 1.0

Filling missing data

1
df1.fillna(value=5)















































A B C D E
2013-01-01 0.000000 0.000000 0.584294 5 1.0
2013-01-02 0.533225 -0.233655 -0.084990 5 1.0
2013-01-03 0.056030 -1.618568 1.715577 5 5.0
2013-01-04 0.950231 -0.370937 -0.747215 5 5.0

1
pd.isna(df1)















































A B C D E
2013-01-01 False False False False False
2013-01-02 False False False False False
2013-01-03 False False False False True
2013-01-04 False False False False True

Operations

Stats

Operations in general exclude missing data.

1
df.mean()
A    0.560993
B   -0.301557
C    0.140267
D    5.000000
dtype: float64
1
df.mean(1)
2013-01-01    1.396073
2013-01-02    1.303645
2013-01-03    1.288260
2013-01-04    1.208020
2013-01-05    1.672454
2013-01-06    1.231104
Freq: D, dtype: float64

shift() 平移

1
s = pd.Series([1, 3, 5, np.nan, 6, 8], index=dates).shift(2)
1
s
2013-01-01    NaN
2013-01-02    NaN
2013-01-03    1.0
2013-01-04    3.0
2013-01-05    5.0
2013-01-06    NaN
Freq: D, dtype: float64
1
df.sub(s, axis='index')
























































A B C D
2013-01-01 NaN NaN NaN NaN
2013-01-02 NaN NaN NaN NaN
2013-01-03 -0.943970 -2.618568 0.715577 4.0
2013-01-04 -2.049769 -3.370937 -3.747215 2.0
2013-01-05 -3.904402 -4.422390 -4.983394 0.0
2013-01-06 NaN NaN NaN NaN

Apply

1
df.apply(np.cumsum)
























































A B C D
2013-01-01 0.000000 0.000000 0.584294 5
2013-01-02 0.533225 -0.233655 0.499304 10
2013-01-03 0.589255 -1.852224 2.214880 15
2013-01-04 1.539486 -2.223160 1.467665 20
2013-01-05 2.635084 -1.645550 1.484271 25
2013-01-06 3.365958 -1.809340 0.841604 30

1
df
























































A B C D
2013-01-01 0.000000 0.000000 0.584294 5
2013-01-02 0.533225 -0.233655 -0.084990 5
2013-01-03 0.056030 -1.618568 1.715577 5
2013-01-04 0.950231 -0.370937 -0.747215 5
2013-01-05 1.095598 0.577610 0.016606 5
2013-01-06 0.730875 -0.163791 -0.642668 5

1
df.apply(lambda x: x.max() - x.min())
A    1.095598
B    2.196179
C    2.462791
D    0.000000
dtype: float64

Histogramming

1
2
s = pd.Series(np.random.randint(0, 7, size=10))
s
0    3
1    0
2    4
3    5
4    5
5    5
6    3
7    3
8    1
9    0
dtype: int64
1
s.value_counts()
5    3
3    3
0    2
4    1
1    1
dtype: int64

String Methods

Series is equipped with a set of string processing methods in the str attribute that make it easy to operate on each element of the array, as in the code snippet below. Note that pattern-matching in str generally uses regular expressions by default (and in some cases always uses them).

1
s = pd.Series(['A', 'B', 'C', 'Aaba', 'Baca', np.nan, 'CABA', 'dog', 'cat'])
1
s
0       A
1       B
2       C
3    Aaba
4    Baca
5     NaN
6    CABA
7     dog
8     cat
dtype: object
1
s.str.lower()
0       a
1       b
2       c
3    aaba
4    baca
5     NaN
6    caba
7     dog
8     cat
dtype: object

Merge

Concat

pandas provides various facilities for easily combining together Series, DataFrame, and Panel objects with various kinds of set logic for the indexes and relational algebra functionality in the case of join / merge-type operations.

1
2
df = pd.DataFrame(np.random.randn(10, 4))
df




















































































0 1 2 3
0 -0.172216 0.453155 -0.808231 -0.254001
1 0.547393 0.444988 0.664029 0.813927
2 -0.084124 -1.126096 -0.546502 -1.146282
3 -0.075791 1.405128 -0.976699 1.474235
4 -0.370291 -0.583410 0.635501 0.165744
5 1.027374 -0.303083 0.514810 0.048940
6 0.177177 1.875743 -0.406614 -0.413840
7 -0.556008 -0.153022 0.994405 0.798631
8 1.180055 -0.028602 -0.910111 1.187928
9 0.289311 -0.645251 -0.075802 -0.005709

1
pieces = [df[:3], df[3:7], df[7:]]
1
pieces
[          0         1         2         3
 0 -0.172216  0.453155 -0.808231 -0.254001
 1  0.547393  0.444988  0.664029  0.813927
 2 -0.084124 -1.126096 -0.546502 -1.146282,
           0         1         2         3
 3 -0.075791  1.405128 -0.976699  1.474235
 4 -0.370291 -0.583410  0.635501  0.165744
 5  1.027374 -0.303083  0.514810  0.048940
 6  0.177177  1.875743 -0.406614 -0.413840,
           0         1         2         3
 7 -0.556008 -0.153022  0.994405  0.798631
 8  1.180055 -0.028602 -0.910111  1.187928
 9  0.289311 -0.645251 -0.075802 -0.005709]
1
pd.concat(pieces)




















































































0 1 2 3
0 -0.172216 0.453155 -0.808231 -0.254001
1 0.547393 0.444988 0.664029 0.813927
2 -0.084124 -1.126096 -0.546502 -1.146282
3 -0.075791 1.405128 -0.976699 1.474235
4 -0.370291 -0.583410 0.635501 0.165744
5 1.027374 -0.303083 0.514810 0.048940
6 0.177177 1.875743 -0.406614 -0.413840
7 -0.556008 -0.153022 0.994405 0.798631
8 1.180055 -0.028602 -0.910111 1.187928
9 0.289311 -0.645251 -0.075802 -0.005709

Join

1
2
left = pd.DataFrame({'key':['foo', 'foo'], 'lval':[1, 2]})
right = pd.DataFrame({'key':['foo', 'foo'], 'rval':[4, 5]})
1
left






















key lval
0 foo 1
1 foo 2

1
right






















key rval
0 foo 4
1 foo 5

1
pd.merge(left, right, on='key')





































key lval rval
0 foo 1 4
1 foo 1 5
2 foo 2 4
3 foo 2 5

1
2
left = pd.DataFrame({'key':['foo', 'bar'], 'lval':[1, 2]})
right = pd.DataFrame({'key':['foo', 'bar'], 'rval':[4, 5]})
1
left






















key lval
0 foo 1
1 bar 2

1
right






















key rval
0 foo 4
1 bar 5

1
pd.merge(left, right, on='key')

























key lval rval
0 foo 1 4
1 bar 2 5

Append

1
2
df = pd.DataFrame(np.random.randn(8, 4), columns=['A', 'B', 'C', 'D'])
df






































































A B C D
0 -0.010348 -1.092837 -1.243382 -2.109873
1 -1.242149 -0.109368 -1.450247 0.707071
2 -1.026163 1.243612 0.775890 -0.781992
3 0.163503 1.237187 0.835743 0.957505
4 1.499561 -0.699390 0.617964 -1.165497
5 -0.777156 0.182997 0.940488 1.112792
6 -0.344106 -1.180250 0.199295 1.158537
7 -0.806075 1.524947 1.048141 0.532292

1
s = df.iloc[3]
1
s
A    0.163503
B    1.237187
C    0.835743
D    0.957505
Name: 3, dtype: float64
1
df.append(s, ignore_index=True)













































































A B C D
0 -0.010348 -1.092837 -1.243382 -2.109873
1 -1.242149 -0.109368 -1.450247 0.707071
2 -1.026163 1.243612 0.775890 -0.781992
3 0.163503 1.237187 0.835743 0.957505
4 1.499561 -0.699390 0.617964 -1.165497
5 -0.777156 0.182997 0.940488 1.112792
6 -0.344106 -1.180250 0.199295 1.158537
7 -0.806075 1.524947 1.048141 0.532292
8 0.163503 1.237187 0.835743 0.957505

Grouping

By “group by” we are referring to a process involving one or more of the following steps

  • Splitting the data into groups based on some criteria
  • Applying a function to each group independently
  • Combining the results into a data structure
1
2
3
4
5
6
7
df = pd.DataFrame({'A':['foo', 'bar', 'foo', 'bar',
'foo', 'bar', 'foo', 'foo'],
'B':['one', 'one', 'two', 'three',
'two', 'two', 'one', 'three'],
'C':np.random.randn(8),
'D':np.random.randn(8)})
df






































































A B C D
0 foo one -1.114415 -1.077195
1 bar one 1.607636 -0.917627
2 foo two 1.276651 -0.199080
3 bar three 1.080608 0.803381
4 foo two 0.179543 -0.991834
5 bar two 0.044457 0.450798
6 foo one 1.026045 0.458458
7 foo three 0.578885 0.919283

1
df.groupby('A').sum()



























C D
A
bar 2.732701 0.336552
foo 1.946709 -0.890367

1
df.groupby(['A', 'B']).sum()



















































C D
A B
bar one 1.607636 -0.917627
three 1.080608 0.803381
two 0.044457 0.450798
foo one -0.088370 -0.618736
three 0.578885 0.919283
two 1.456194 -1.190914

Reshaping

Stack

1
2
3
4
tuples = list(zip(*[['bar', 'bar', 'baz', 'baz',
'foo', 'foo', 'qux', 'qux'],
['one', 'two', 'one', 'two',
'one', 'two', 'one', 'two']]))
1
tuples
[('bar', 'one'),
 ('bar', 'two'),
 ('baz', 'one'),
 ('baz', 'two'),
 ('foo', 'one'),
 ('foo', 'two'),
 ('qux', 'one'),
 ('qux', 'two')]
1
index = pd.MultiIndex.from_tuples(tuples, names=['first', 'second'])
1
index
MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'], ['one', 'two']],
           labels=[[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]],
           names=['first', 'second'])
1
2
3
df = pd.DataFrame(np.random.randn(8, 2), index=index, columns=['A', 'B'])
df2 = df[:4]
df2









































A B
first second
bar one 0.993661 1.034926
two -1.308428 -0.548574
baz one 1.203660 1.603470
two -2.770462 -0.834044

1
df































































A B
first second
bar one 0.993661 1.034926
two -1.308428 -0.548574
baz one 1.203660 1.603470
two -2.770462 -0.834044
foo one 1.384250 -0.198922
two 1.831291 -0.232852
qux one -0.619321 0.766814
two 0.971604 -0.044436

The stack() method “compresses” a level in the DataFrame’s columns.

1
2
stacked = df2.stack()
stacked
first  second   
bar    one     A    0.993661
               B    1.034926
       two     A   -1.308428
               B   -0.548574
baz    one     A    1.203660
               B    1.603470
       two     A   -2.770462
               B   -0.834044
dtype: float64
1
stacked.unstack()









































A B
first second
bar one 0.993661 1.034926
two -1.308428 -0.548574
baz one 1.203660 1.603470
two -2.770462 -0.834044

1
stacked.unstack(1)









































second one two
first
bar A 0.993661 -1.308428
B 1.034926 -0.548574
baz A 1.203660 -2.770462
B 1.603470 -0.834044

1
stacked.unstack(0)









































first bar baz
second
one A 0.993661 1.203660
B 1.034926 1.603470
two A -1.308428 -2.770462
B -0.548574 -0.834044

Pivot Tables

1
2
3
4
5
6
df = pd.DataFrame({'A' : ['one', 'one', 'two', 'three'] * 3,
'B' : ['A', 'B', 'C'] * 4,
'C' : ['foo', 'foo', 'foo', 'bar', 'bar', 'bar'] * 2,
'D' : np.random.randn(12),
'E' : np.random.randn(12)})
df















































































































A B C D E
0 one A foo 1.355760 1.065220
1 one B foo -0.178326 -0.485148
2 two C foo 0.688998 -0.444242
3 three A bar 1.220353 2.014127
4 one B bar 0.240032 -0.636026
5 one C bar -1.126072 -0.885649
6 two A foo 0.787169 -1.645967
7 three B foo 0.831231 -0.745306
8 one C foo 0.687459 0.903137
9 one A bar 0.265186 0.090426
10 two B bar -0.681013 -0.556909
11 three C bar 0.975782 1.240485

1
pd.pivot_table(df, values='D', index=['A', 'B'], columns=['C'])



































































C bar foo
A B
one A 0.265186 1.355760
B 0.240032 -0.178326
C -1.126072 0.687459
three A 1.220353 NaN
B NaN 0.831231
C 0.975782 NaN
two A NaN 0.787169
B -0.681013 NaN
C NaN 0.688998

Time Series

pandas has simple, powerful, and efficient functionality for performing resampling operations during frequency conversion (e.g., converting secondly data into 5-minutely data). This is extremely common in, but not limited to, financial applications.

1
rng = pd.date_range('1/1/2012', periods=100, freq='S')
1
rng
DatetimeIndex(['2012-01-01 00:00:00', '2012-01-01 00:00:01',
               '2012-01-01 00:00:02', '2012-01-01 00:00:03',
               '2012-01-01 00:00:04', '2012-01-01 00:00:05',
               '2012-01-01 00:00:06', '2012-01-01 00:00:07',
               '2012-01-01 00:00:08', '2012-01-01 00:00:09',
               '2012-01-01 00:00:10', '2012-01-01 00:00:11',
               '2012-01-01 00:00:12', '2012-01-01 00:00:13',
               '2012-01-01 00:00:14', '2012-01-01 00:00:15',
               '2012-01-01 00:00:16', '2012-01-01 00:00:17',
               '2012-01-01 00:00:18', '2012-01-01 00:00:19',
               '2012-01-01 00:00:20', '2012-01-01 00:00:21',
               '2012-01-01 00:00:22', '2012-01-01 00:00:23',
               '2012-01-01 00:00:24', '2012-01-01 00:00:25',
               '2012-01-01 00:00:26', '2012-01-01 00:00:27',
               '2012-01-01 00:00:28', '2012-01-01 00:00:29',
               '2012-01-01 00:00:30', '2012-01-01 00:00:31',
               '2012-01-01 00:00:32', '2012-01-01 00:00:33',
               '2012-01-01 00:00:34', '2012-01-01 00:00:35',
               '2012-01-01 00:00:36', '2012-01-01 00:00:37',
               '2012-01-01 00:00:38', '2012-01-01 00:00:39',
               '2012-01-01 00:00:40', '2012-01-01 00:00:41',
               '2012-01-01 00:00:42', '2012-01-01 00:00:43',
               '2012-01-01 00:00:44', '2012-01-01 00:00:45',
               '2012-01-01 00:00:46', '2012-01-01 00:00:47',
               '2012-01-01 00:00:48', '2012-01-01 00:00:49',
               '2012-01-01 00:00:50', '2012-01-01 00:00:51',
               '2012-01-01 00:00:52', '2012-01-01 00:00:53',
               '2012-01-01 00:00:54', '2012-01-01 00:00:55',
               '2012-01-01 00:00:56', '2012-01-01 00:00:57',
               '2012-01-01 00:00:58', '2012-01-01 00:00:59',
               '2012-01-01 00:01:00', '2012-01-01 00:01:01',
               '2012-01-01 00:01:02', '2012-01-01 00:01:03',
               '2012-01-01 00:01:04', '2012-01-01 00:01:05',
               '2012-01-01 00:01:06', '2012-01-01 00:01:07',
               '2012-01-01 00:01:08', '2012-01-01 00:01:09',
               '2012-01-01 00:01:10', '2012-01-01 00:01:11',
               '2012-01-01 00:01:12', '2012-01-01 00:01:13',
               '2012-01-01 00:01:14', '2012-01-01 00:01:15',
               '2012-01-01 00:01:16', '2012-01-01 00:01:17',
               '2012-01-01 00:01:18', '2012-01-01 00:01:19',
               '2012-01-01 00:01:20', '2012-01-01 00:01:21',
               '2012-01-01 00:01:22', '2012-01-01 00:01:23',
               '2012-01-01 00:01:24', '2012-01-01 00:01:25',
               '2012-01-01 00:01:26', '2012-01-01 00:01:27',
               '2012-01-01 00:01:28', '2012-01-01 00:01:29',
               '2012-01-01 00:01:30', '2012-01-01 00:01:31',
               '2012-01-01 00:01:32', '2012-01-01 00:01:33',
               '2012-01-01 00:01:34', '2012-01-01 00:01:35',
               '2012-01-01 00:01:36', '2012-01-01 00:01:37',
               '2012-01-01 00:01:38', '2012-01-01 00:01:39'],
              dtype='datetime64[ns]', freq='S')
1
ts = pd.Series(np.random.randint(0, 500, len(rng)), index=rng)
1
ts
2012-01-01 00:00:00    244
2012-01-01 00:00:01    321
2012-01-01 00:00:02    289
2012-01-01 00:00:03    285
2012-01-01 00:00:04     25
2012-01-01 00:00:05    236
2012-01-01 00:00:06    299
2012-01-01 00:00:07    373
2012-01-01 00:00:08    224
2012-01-01 00:00:09    342
2012-01-01 00:00:10     30
2012-01-01 00:00:11    460
2012-01-01 00:00:12    274
2012-01-01 00:00:13    125
2012-01-01 00:00:14     34
2012-01-01 00:00:15    293
2012-01-01 00:00:16    453
2012-01-01 00:00:17    300
2012-01-01 00:00:18    217
2012-01-01 00:00:19    422
2012-01-01 00:00:20     17
2012-01-01 00:00:21     12
2012-01-01 00:00:22    267
2012-01-01 00:00:23     10
2012-01-01 00:00:24    232
2012-01-01 00:00:25    305
2012-01-01 00:00:26    180
2012-01-01 00:00:27     89
2012-01-01 00:00:28    203
2012-01-01 00:00:29    477
                      ... 
2012-01-01 00:01:10    290
2012-01-01 00:01:11    149
2012-01-01 00:01:12     15
2012-01-01 00:01:13    279
2012-01-01 00:01:14    356
2012-01-01 00:01:15    316
2012-01-01 00:01:16    219
2012-01-01 00:01:17    471
2012-01-01 00:01:18     43
2012-01-01 00:01:19     91
2012-01-01 00:01:20    273
2012-01-01 00:01:21    134
2012-01-01 00:01:22    319
2012-01-01 00:01:23    150
2012-01-01 00:01:24    390
2012-01-01 00:01:25    412
2012-01-01 00:01:26     92
2012-01-01 00:01:27     43
2012-01-01 00:01:28    358
2012-01-01 00:01:29    146
2012-01-01 00:01:30    328
2012-01-01 00:01:31    109
2012-01-01 00:01:32    171
2012-01-01 00:01:33    247
2012-01-01 00:01:34    333
2012-01-01 00:01:35    173
2012-01-01 00:01:36    251
2012-01-01 00:01:37    375
2012-01-01 00:01:38     48
2012-01-01 00:01:39     28
Freq: S, Length: 100, dtype: int64
1
ts.resample('5Min').sum()
2012-01-01    22829
Freq: 5T, dtype: int64

时区表示

1
2
3
rng = pd.date_range('3/6/2012 00:00', periods=5, freq='D')
ts = pd.Series(np.random.randn(len(rng)), rng)
ts
2012-03-06    1.252475
2012-03-07    1.710185
2012-03-08    0.054448
2012-03-09    1.164110
2012-03-10    0.477958
Freq: D, dtype: float64
1
2
ts_utc = ts.tz_localize('UTC')
ts_utc
2012-03-06 00:00:00+00:00    1.252475
2012-03-07 00:00:00+00:00    1.710185
2012-03-08 00:00:00+00:00    0.054448
2012-03-09 00:00:00+00:00    1.164110
2012-03-10 00:00:00+00:00    0.477958
Freq: D, dtype: float64

Convert to another time zone

1
ts_utc.tz_convert('US/Eastern')
2012-03-05 19:00:00-05:00    1.252475
2012-03-06 19:00:00-05:00    1.710185
2012-03-07 19:00:00-05:00    0.054448
2012-03-08 19:00:00-05:00    1.164110
2012-03-09 19:00:00-05:00    0.477958
Freq: D, dtype: float64
1
2
3
rng = pd.date_range('1/1/2012', periods=5, freq='M')
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts
2012-01-31   -0.829642
2012-02-29    1.487379
2012-03-31   -2.397151
2012-04-30   -1.783455
2012-05-31   -0.312436
Freq: M, dtype: float64
1
2
ps = ts.to_period()
ps
2012-01   -0.829642
2012-02    1.487379
2012-03   -2.397151
2012-04   -1.783455
2012-05   -0.312436
Freq: M, dtype: float64
1
ps.to_timestamp()
2012-01-01   -0.829642
2012-02-01    1.487379
2012-03-01   -2.397151
2012-04-01   -1.783455
2012-05-01   -0.312436
Freq: MS, dtype: float64
1
2
3
4
prng = pd.period_range('1990Q1', '2000Q4', freq='Q-NOV')
ts = pd.Series(np.random.randn(len(prng)), prng)
ts.index = (prng.asfreq('M', 'e') + 1).asfreq('H', 's') + 9
ts.head()
1990-03-01 09:00    0.913565
1990-06-01 09:00   -1.160135
1990-09-01 09:00   -0.539973
1990-12-01 09:00    0.253567
1991-03-01 09:00    0.683897
Freq: H, dtype: float64
1
prng
PeriodIndex(['1990Q1', '1990Q2', '1990Q3', '1990Q4', '1991Q1', '1991Q2',
             '1991Q3', '1991Q4', '1992Q1', '1992Q2', '1992Q3', '1992Q4',
             '1993Q1', '1993Q2', '1993Q3', '1993Q4', '1994Q1', '1994Q2',
             '1994Q3', '1994Q4', '1995Q1', '1995Q2', '1995Q3', '1995Q4',
             '1996Q1', '1996Q2', '1996Q3', '1996Q4', '1997Q1', '1997Q2',
             '1997Q3', '1997Q4', '1998Q1', '1998Q2', '1998Q3', '1998Q4',
             '1999Q1', '1999Q2', '1999Q3', '1999Q4', '2000Q1', '2000Q2',
             '2000Q3', '2000Q4'],
            dtype='period[Q-NOV]', freq='Q-NOV')

Categoricals

1
df = pd.DataFrame({'id':[1, 2, 3, 4, 5, 6], 'raw_grade':['a', 'b', 'b', 'a', 'a', 'e']})
1
df










































id raw_grade
0 1 a
1 2 b
2 3 b
3 4 a
4 5 a
5 6 e

1
df['grade'] = df['raw_grade'].astype('category')
1
df

















































id raw_grade grade
0 1 a a
1 2 b b
2 3 b b
3 4 a a
4 5 a a
5 6 e e

1
df['grade']
0    a
1    b
2    b
3    a
4    a
5    e
Name: grade, dtype: category
Categories (3, object): [a, b, e]
1
df.dtypes
id              int64
raw_grade      object
grade        category
dtype: object
1
df['grade'].cat.categories = ['very good', 'good', 'very bad']
1
df

















































id raw_grade grade
0 1 a very good
1 2 b good
2 3 b good
3 4 a very good
4 5 a very good
5 6 e very bad

1
df["grade"] = df["grade"].cat.set_categories(["very bad", "bad", "medium", "good", "very good"])
1
df

















































id raw_grade grade
0 1 a very good
1 2 b good
2 3 b good
3 4 a very good
4 5 a very good
5 6 e very bad

1
df['grade']
0    very good
1         good
2         good
3    very good
4    very good
5     very bad
Name: grade, dtype: category
Categories (5, object): [very bad, bad, medium, good, very good]
1
df.sort_values(by='grade')

















































id raw_grade grade
5 6 e very bad
1 2 b good
2 3 b good
0 1 a very good
3 4 a very good
4 5 a very good

1
df.groupby('grade').size()
grade
very bad     1
bad          0
medium       0
good         2
very good    3
dtype: int64

Plotting

1
ts = pd.Series(np.random.randn(1000), index = pd.date_range('1/1/2000', periods=1000))
1
ts
2000-01-01   -2.162238
2000-01-02   -1.768627
2000-01-03    0.405371
2000-01-04   -0.315177
2000-01-05   -1.211224
2000-01-06    0.919112
2000-01-07   -0.195309
2000-01-08    0.091487
2000-01-09    0.936006
2000-01-10    0.056772
2000-01-11    0.637328
2000-01-12    0.250839
2000-01-13    0.660181
2000-01-14    0.367384
2000-01-15    1.480254
2000-01-16   -3.231503
2000-01-17    0.879241
2000-01-18   -0.066371
2000-01-19    0.565191
2000-01-20    0.197344
2000-01-21   -0.049743
2000-01-22   -1.651110
2000-01-23    0.158434
2000-01-24   -1.336556
2000-01-25   -0.130634
2000-01-26    0.373046
2000-01-27    0.486330
2000-01-28    1.789111
2000-01-29   -0.631727
2000-01-30    0.357759
                ...   
2002-08-28    0.783561
2002-08-29    1.981284
2002-08-30    0.236757
2002-08-31    0.548455
2002-09-01   -0.330292
2002-09-02   -1.025787
2002-09-03   -1.508711
2002-09-04   -1.594106
2002-09-05   -0.083620
2002-09-06   -0.220504
2002-09-07    0.077958
2002-09-08    1.248833
2002-09-09    1.778093
2002-09-10    0.296462
2002-09-11   -0.291047
2002-09-12    0.147860
2002-09-13    0.054481
2002-09-14   -0.598458
2002-09-15   -0.345856
2002-09-16    0.369345
2002-09-17   -0.542375
2002-09-18    0.784640
2002-09-19   -1.374230
2002-09-20   -1.132805
2002-09-21    0.385017
2002-09-22    0.709930
2002-09-23   -0.867157
2002-09-24   -0.011137
2002-09-25   -1.980906
2002-09-26    1.397507
Freq: D, Length: 1000, dtype: float64
1
ts = ts.cumsum()
1
ts
2000-01-01    -2.162238
2000-01-02    -3.930865
2000-01-03    -3.525494
2000-01-04    -3.840671
2000-01-05    -5.051895
2000-01-06    -4.132783
2000-01-07    -4.328092
2000-01-08    -4.236606
2000-01-09    -3.300600
2000-01-10    -3.243828
2000-01-11    -2.606499
2000-01-12    -2.355661
2000-01-13    -1.695480
2000-01-14    -1.328096
2000-01-15     0.152158
2000-01-16    -3.079345
2000-01-17    -2.200104
2000-01-18    -2.266475
2000-01-19    -1.701284
2000-01-20    -1.503940
2000-01-21    -1.553683
2000-01-22    -3.204794
2000-01-23    -3.046359
2000-01-24    -4.382915
2000-01-25    -4.513549
2000-01-26    -4.140503
2000-01-27    -3.654174
2000-01-28    -1.865063
2000-01-29    -2.496789
2000-01-30    -2.139030
                ...    
2002-08-28   -23.899497
2002-08-29   -21.918212
2002-08-30   -21.681456
2002-08-31   -21.133001
2002-09-01   -21.463292
2002-09-02   -22.489079
2002-09-03   -23.997790
2002-09-04   -25.591896
2002-09-05   -25.675517
2002-09-06   -25.896021
2002-09-07   -25.818062
2002-09-08   -24.569230
2002-09-09   -22.791137
2002-09-10   -22.494675
2002-09-11   -22.785722
2002-09-12   -22.637863
2002-09-13   -22.583382
2002-09-14   -23.181840
2002-09-15   -23.527696
2002-09-16   -23.158351
2002-09-17   -23.700727
2002-09-18   -22.916086
2002-09-19   -24.290317
2002-09-20   -25.423122
2002-09-21   -25.038104
2002-09-22   -24.328174
2002-09-23   -25.195331
2002-09-24   -25.206468
2002-09-25   -27.187374
2002-09-26   -25.789867
Freq: D, Length: 1000, dtype: float64
1
2
%matplotlib inline
ts.plot()
<matplotlib.axes._subplots.AxesSubplot at 0x7fc735f83b70>

png

1
2
df = pd.DataFrame(np.random.randn(1000, 4), index=ts.index, columns=['A', 'B', 'C', 'D'])
df = df.cumsum()
1
2
3
plt.figure()
df.plot()
plt.legend(loc='best')
<matplotlib.legend.Legend at 0x7fc7361722b0>




<matplotlib.figure.Figure at 0x7fc735fd6198>

png

Getting Data In/Out

CSV

1
df.to_csv('foo.csv')
1
pd.read_csv('foo.csv')























































































































































































































































































































































































































































































































Unnamed: 0 A B C D
0 2000-01-01 -0.450872 0.395481 0.381578 1.370232
1 2000-01-02 -0.025248 0.637189 2.049532 0.466688
2 2000-01-03 -0.036668 0.087789 0.758993 0.217863
3 2000-01-04 0.170818 1.794943 0.323661 0.236688
4 2000-01-05 0.110068 1.556572 0.111850 0.922194
5 2000-01-06 0.178794 1.057431 0.419542 1.425594
6 2000-01-07 -0.950411 0.937755 -1.219101 -1.422304
7 2000-01-08 -1.036541 0.557909 -4.537197 -1.460412
8 2000-01-09 -0.032446 1.711535 -4.827508 -0.237993
9 2000-01-10 -1.807038 1.744254 -3.316153 1.231774
10 2000-01-11 -0.313481 1.862218 -2.685499 4.072020
11 2000-01-12 -0.472608 2.521596 -2.052473 5.084775
12 2000-01-13 -0.148892 3.313562 -0.227369 6.297979
13 2000-01-14 0.270380 2.962082 0.097617 6.147823
14 2000-01-15 -0.701600 4.053945 -0.245472 6.265729
15 2000-01-16 0.419939 3.971690 0.877146 7.414280
16 2000-01-17 0.772095 2.917321 1.542346 5.491818
17 2000-01-18 0.847531 2.595248 2.641126 6.147717
18 2000-01-19 0.489138 3.462223 2.534211 5.399866
19 2000-01-20 -0.568162 1.784301 2.697018 7.044541
20 2000-01-21 -0.190166 1.594794 2.383903 5.663474
21 2000-01-22 0.506410 0.744354 2.116034 7.488447
22 2000-01-23 1.749586 0.140688 2.512237 8.804555
23 2000-01-24 -1.300250 -0.798619 3.315671 8.504480
24 2000-01-25 -0.671120 -0.312907 4.687308 8.427058
25 2000-01-26 0.539004 -0.586919 3.529719 6.900905
26 2000-01-27 -0.322464 -1.432399 4.854299 8.102648
27 2000-01-28 -0.049216 -0.998291 3.351413 9.908898
28 2000-01-29 0.387226 -1.653189 3.613029 9.805569
29 2000-01-30 1.176226 -1.884252 2.938505 9.589174
970 2002-08-28 -42.010720 32.657803 32.730176 5.405271
971 2002-08-29 -41.928908 31.961678 33.606972 4.608168
972 2002-08-30 -42.360815 30.102908 34.249017 4.320361
973 2002-08-31 -42.035245 31.719768 33.841647 5.316658
974 2002-09-01 -42.023462 31.277771 32.224844 4.491348
975 2002-09-02 -41.121725 30.576611 33.823500 3.783267
976 2002-09-03 -42.336403 29.668299 34.613157 3.856769
977 2002-09-04 -41.955082 28.741731 34.133296 3.824269
978 2002-09-05 -43.073106 29.933133 33.035272 3.558779
979 2002-09-06 -42.857949 29.825473 34.213828 2.164331
980 2002-09-07 -42.155893 29.107456 35.303512 2.868834
981 2002-09-08 -42.101632 29.097744 35.967004 3.297489
982 2002-09-09 -42.729712 29.139733 33.576082 3.727289
983 2002-09-10 -42.603725 29.138196 34.148339 3.314936
984 2002-09-11 -43.306158 29.303346 33.547137 3.292155
985 2002-09-12 -42.916630 31.208726 34.173067 5.066093
986 2002-09-13 -42.238570 30.869320 33.480628 4.901099
987 2002-09-14 -41.676238 31.758266 36.151867 3.944074
988 2002-09-15 -42.114611 30.320999 36.610312 1.602666
989 2002-09-16 -40.724451 30.896120 38.311355 2.015574
990 2002-09-17 -40.235064 31.141360 37.147181 3.570815
991 2002-09-18 -39.931452 31.655201 37.492007 4.146526
992 2002-09-19 -39.617204 29.786372 36.664344 3.721535
993 2002-09-20 -39.521188 30.992560 36.671998 3.753192
994 2002-09-21 -39.433329 32.390220 36.189576 3.943903
995 2002-09-22 -38.268539 32.991980 35.538837 4.397427
996 2002-09-23 -37.280640 31.944266 34.723265 3.195354
997 2002-09-24 -35.703543 29.162965 33.190010 3.719085
998 2002-09-25 -34.481000 28.463854 34.229579 3.300110
999 2002-09-26 -34.835530 26.924530 35.479308 5.618561

1000 rows × 5 columns


HDF5

1
df.to_hdf('foo.h5', 'df')
1
pd.read_hdf('foo.h5', 'df')

























































































































































































































































































































































































































































A B C D
2000-01-01 -0.450872 0.395481 0.381578 1.370232
2000-01-02 -0.025248 0.637189 2.049532 0.466688
2000-01-03 -0.036668 0.087789 0.758993 0.217863
2000-01-04 0.170818 1.794943 0.323661 0.236688
2000-01-05 0.110068 1.556572 0.111850 0.922194
2000-01-06 0.178794 1.057431 0.419542 1.425594
2000-01-07 -0.950411 0.937755 -1.219101 -1.422304
2000-01-08 -1.036541 0.557909 -4.537197 -1.460412
2000-01-09 -0.032446 1.711535 -4.827508 -0.237993
2000-01-10 -1.807038 1.744254 -3.316153 1.231774
2000-01-11 -0.313481 1.862218 -2.685499 4.072020
2000-01-12 -0.472608 2.521596 -2.052473 5.084775
2000-01-13 -0.148892 3.313562 -0.227369 6.297979
2000-01-14 0.270380 2.962082 0.097617 6.147823
2000-01-15 -0.701600 4.053945 -0.245472 6.265729
2000-01-16 0.419939 3.971690 0.877146 7.414280
2000-01-17 0.772095 2.917321 1.542346 5.491818
2000-01-18 0.847531 2.595248 2.641126 6.147717
2000-01-19 0.489138 3.462223 2.534211 5.399866
2000-01-20 -0.568162 1.784301 2.697018 7.044541
2000-01-21 -0.190166 1.594794 2.383903 5.663474
2000-01-22 0.506410 0.744354 2.116034 7.488447
2000-01-23 1.749586 0.140688 2.512237 8.804555
2000-01-24 -1.300250 -0.798619 3.315671 8.504480
2000-01-25 -0.671120 -0.312907 4.687308 8.427058
2000-01-26 0.539004 -0.586919 3.529719 6.900905
2000-01-27 -0.322464 -1.432399 4.854299 8.102648
2000-01-28 -0.049216 -0.998291 3.351413 9.908898
2000-01-29 0.387226 -1.653189 3.613029 9.805569
2000-01-30 1.176226 -1.884252 2.938505 9.589174
2002-08-28 -42.010720 32.657803 32.730176 5.405271
2002-08-29 -41.928908 31.961678 33.606972 4.608168
2002-08-30 -42.360815 30.102908 34.249017 4.320361
2002-08-31 -42.035245 31.719768 33.841647 5.316658
2002-09-01 -42.023462 31.277771 32.224844 4.491348
2002-09-02 -41.121725 30.576611 33.823500 3.783267
2002-09-03 -42.336403 29.668299 34.613157 3.856769
2002-09-04 -41.955082 28.741731 34.133296 3.824269
2002-09-05 -43.073106 29.933133 33.035272 3.558779
2002-09-06 -42.857949 29.825473 34.213828 2.164331
2002-09-07 -42.155893 29.107456 35.303512 2.868834
2002-09-08 -42.101632 29.097744 35.967004 3.297489
2002-09-09 -42.729712 29.139733 33.576082 3.727289
2002-09-10 -42.603725 29.138196 34.148339 3.314936
2002-09-11 -43.306158 29.303346 33.547137 3.292155
2002-09-12 -42.916630 31.208726 34.173067 5.066093
2002-09-13 -42.238570 30.869320 33.480628 4.901099
2002-09-14 -41.676238 31.758266 36.151867 3.944074
2002-09-15 -42.114611 30.320999 36.610312 1.602666
2002-09-16 -40.724451 30.896120 38.311355 2.015574
2002-09-17 -40.235064 31.141360 37.147181 3.570815
2002-09-18 -39.931452 31.655201 37.492007 4.146526
2002-09-19 -39.617204 29.786372 36.664344 3.721535
2002-09-20 -39.521188 30.992560 36.671998 3.753192
2002-09-21 -39.433329 32.390220 36.189576 3.943903
2002-09-22 -38.268539 32.991980 35.538837 4.397427
2002-09-23 -37.280640 31.944266 34.723265 3.195354
2002-09-24 -35.703543 29.162965 33.190010 3.719085
2002-09-25 -34.481000 28.463854 34.229579 3.300110
2002-09-26 -34.835530 26.924530 35.479308 5.618561

1000 rows × 4 columns


Excel

1
df.to_excel('foo.xlsx', sheet_name='Sheet1')
1
pd.read_excel('foo.xlsx', 'Sheet1', index_col=None, na_values=['NA'])

























































































































































































































































































































































































































































A B C D
2000-01-01 -0.450872 0.395481 0.381578 1.370232
2000-01-02 -0.025248 0.637189 2.049532 0.466688
2000-01-03 -0.036668 0.087789 0.758993 0.217863
2000-01-04 0.170818 1.794943 0.323661 0.236688
2000-01-05 0.110068 1.556572 0.111850 0.922194
2000-01-06 0.178794 1.057431 0.419542 1.425594
2000-01-07 -0.950411 0.937755 -1.219101 -1.422304
2000-01-08 -1.036541 0.557909 -4.537197 -1.460412
2000-01-09 -0.032446 1.711535 -4.827508 -0.237993
2000-01-10 -1.807038 1.744254 -3.316153 1.231774
2000-01-11 -0.313481 1.862218 -2.685499 4.072020
2000-01-12 -0.472608 2.521596 -2.052473 5.084775
2000-01-13 -0.148892 3.313562 -0.227369 6.297979
2000-01-14 0.270380 2.962082 0.097617 6.147823
2000-01-15 -0.701600 4.053945 -0.245472 6.265729
2000-01-16 0.419939 3.971690 0.877146 7.414280
2000-01-17 0.772095 2.917321 1.542346 5.491818
2000-01-18 0.847531 2.595248 2.641126 6.147717
2000-01-19 0.489138 3.462223 2.534211 5.399866
2000-01-20 -0.568162 1.784301 2.697018 7.044541
2000-01-21 -0.190166 1.594794 2.383903 5.663474
2000-01-22 0.506410 0.744354 2.116034 7.488447
2000-01-23 1.749586 0.140688 2.512237 8.804555
2000-01-24 -1.300250 -0.798619 3.315671 8.504480
2000-01-25 -0.671120 -0.312907 4.687308 8.427058
2000-01-26 0.539004 -0.586919 3.529719 6.900905
2000-01-27 -0.322464 -1.432399 4.854299 8.102648
2000-01-28 -0.049216 -0.998291 3.351413 9.908898
2000-01-29 0.387226 -1.653189 3.613029 9.805569
2000-01-30 1.176226 -1.884252 2.938505 9.589174
2002-08-28 -42.010720 32.657803 32.730176 5.405271
2002-08-29 -41.928908 31.961678 33.606972 4.608168
2002-08-30 -42.360815 30.102908 34.249017 4.320361
2002-08-31 -42.035245 31.719768 33.841647 5.316658
2002-09-01 -42.023462 31.277771 32.224844 4.491348
2002-09-02 -41.121725 30.576611 33.823500 3.783267
2002-09-03 -42.336403 29.668299 34.613157 3.856769
2002-09-04 -41.955082 28.741731 34.133296 3.824269
2002-09-05 -43.073106 29.933133 33.035272 3.558779
2002-09-06 -42.857949 29.825473 34.213828 2.164331
2002-09-07 -42.155893 29.107456 35.303512 2.868834
2002-09-08 -42.101632 29.097744 35.967004 3.297489
2002-09-09 -42.729712 29.139733 33.576082 3.727289
2002-09-10 -42.603725 29.138196 34.148339 3.314936
2002-09-11 -43.306158 29.303346 33.547137 3.292155
2002-09-12 -42.916630 31.208726 34.173067 5.066093
2002-09-13 -42.238570 30.869320 33.480628 4.901099
2002-09-14 -41.676238 31.758266 36.151867 3.944074
2002-09-15 -42.114611 30.320999 36.610312 1.602666
2002-09-16 -40.724451 30.896120 38.311355 2.015574
2002-09-17 -40.235064 31.141360 37.147181 3.570815
2002-09-18 -39.931452 31.655201 37.492007 4.146526
2002-09-19 -39.617204 29.786372 36.664344 3.721535
2002-09-20 -39.521188 30.992560 36.671998 3.753192
2002-09-21 -39.433329 32.390220 36.189576 3.943903
2002-09-22 -38.268539 32.991980 35.538837 4.397427
2002-09-23 -37.280640 31.944266 34.723265 3.195354
2002-09-24 -35.703543 29.162965 33.190010 3.719085
2002-09-25 -34.481000 28.463854 34.229579 3.300110
2002-09-26 -34.835530 26.924530 35.479308 5.618561

1000 rows × 4 columns


Gotchas

1
2
if pd.Series([False, True, False]):
print("I was true")
---------------------------------------------------------------------------

ValueError                                Traceback (most recent call last)

<ipython-input-51-9cae3ab0f79f> in <module>()
----> 1 if pd.Series([False, True, False]):
      2     print("I was true")


/root/anaconda3/lib/python3.6/site-packages/pandas/core/generic.py in __nonzero__(self)
   1119         raise ValueError("The truth value of a {0} is ambiguous. "
   1120                          "Use a.empty, a.bool(), a.item(), a.any() or a.all()."
-> 1121                          .format(self.__class__.__name__))
   1122 
   1123     __bool__ = __nonzero__


ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().
分享到