【时间序列分析】S04E03时间序列处理的核心技巧

# 1.时间序列的获取

from pandas_datareader import data
import matplotlib.pyplot as plt
import pandas as pd
import seaborn
seaborn.set()

stock_code = '0700.hk'
start_date = '2000-01-01'
end_date = '2019-05-01'
stock_info = data.get_data_yahoo(stock_code, start_date, end_date)
print(type(stock_info))

plt.plot(stock_info['Close'], 'b')
plt.show()

             High    Low   Open  Close        Volume  Adj Close
Date
2004-06-16  0.925  0.815  0.875  0.830  2.198875e+09   0.749649
2004-06-17  0.875  0.825  0.830  0.845  4.190075e+08   0.763197
2004-06-18  0.850  0.790  0.840  0.805  1.829900e+08   0.727070
2004-06-21  0.825  0.790  0.820  0.800  1.140850e+08   0.722554
2004-06-23  0.890  0.805  0.810  0.885  2.750800e+08   0.799325

<class 'pandas.core.frame.DataFrame'>

# 2.重采样和频率转换

resample是以数据累计为基础，他获取的不是指定频率下某个时间点的数值，而是整个重采样频率周期内的统计值（均值、和等等）。

from pandas_datareader import data
import matplotlib.pyplot as plt
import pandas as pd
import seaborn
seaborn.set()

stock_code = '0700.hk'
start_date = '2000-01-01'
end_date = '2019-05-01'
stock_info = data.get_data_yahoo(stock_code, start_date, end_date)
print(type(stock_info))

stock_info['Close'].plot(color='b', alpha=0.5, style='-')
stock_info['Close'].resample('BA').mean().plot(color='r', alpha=0.5, style=':')
stock_info['Close'].asfreq('BA').plot(color='g',alpha=0.5,style='--')
plt.legend(['original','resample','asfreq'],loc='upper left')
plt.show()

# 3.关于缺失值的处理

2019-04-04    376.000000
2019-04-08    380.200012
2019-04-09    383.600006
2019-04-10    388.799988
2019-04-11    391.399994
2019-04-12    393.799988
2019-04-15    388.200012
2019-04-16    393.600006
2019-04-17    395.600006
2019-04-18    391.600006
2019-04-23    393.000000
2019-04-24    393.000000
2019-04-25    382.799988
2019-04-26    384.000000
2019-04-29    390.600006
2019-04-30    388.000000
2019-05-02    391.399994
2019-05-03    387.799988
Name: Close, dtype: float64

`