7장 시계열을 위한 상태공간 모델 - Python (BSTS)
시계열을 위한 상태공간 모델 중 BSTS에 대한 소스코드 입니다.
%matplotlib inline
import matplotlib
matplotlib.rcParams['figure.figsize'] = [8, 3]
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import statsmodels.api as sm
import statsmodels
import scipy
from scipy.stats import pearsonr
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
print(matplotlib.__version__)
print(pd.__version__)
print(np.__version__)
print(statsmodels.__version__)
print(scipy.__version__)
## data obtained from https://datahub.io/core/global-temp#data
df = pd.read_csv("global_temps.csv")
df.head()
df.Mean[:100].plot()
df = df.pivot(index='Date', columns='Source', values='Mean')
df.head()
df.GCAG.plot()
type(df.index)
df.index = pd.to_datetime(df.index)
type(df.index)
df.GCAG.plot()
df['1880']
plt.plot(df['1880':'1950'][['GCAG', 'GISTEMP']])
plt.plot(df['1950':][['GISTEMP']])
plt.scatter(df['1880':'1900'][['GCAG']], df['1880':'1900'][['GISTEMP']])
plt.scatter(df['1880':'1899'][['GCAG']], df['1881':'1900'][['GISTEMP']])
pearsonr(df['1880':'1899'].GCAG, df['1881':'1900'].GISTEMP)
df['1880':'1899'][['GCAG']].head()
df['1881':'1900'][['GISTEMP']].head()
min(df.index)
max(df.index)
train = df['1960':]
# smooth trend model without seasonal or cyclical components
model = {
'level': 'smooth trend', 'cycle': False, 'seasonal': None,
}
# https://www.statsmodels.org/dev/generated/statsmodels.tsa.statespace.structural.UnobservedComponents.html
gcag_mod = sm.tsa.UnobservedComponents(train['GCAG'], **model)
gcag_res = gcag_mod.fit()
fig = gcag_res.plot_components(legend_loc='lower right', figsize=(15, 9));
# Perform rolling prediction and multistep forecast
num_steps = 20
predict_res = gcag_res.get_prediction(dynamic=train['GCAG'].shape[0] - num_steps)
predict = predict_res.predicted_mean
ci = predict_res.conf_int()
plt.plot(predict)
plt.scatter(train['GCAG'], predict)
fig, ax = plt.subplots()
# Plot the results
ax.plot(train['GCAG'], 'k.', label='Observations');
ax.plot(train.index[:-num_steps], predict[:-num_steps], label='One-step-ahead Prediction');
ax.plot(train.index[-num_steps:], predict[-num_steps:], 'r', label='Multistep Prediction');
ax.plot(train.index[-num_steps:], ci.iloc[-num_steps:], 'k--');
# Cleanup the image
legend = ax.legend(loc='upper left');
fig, ax = plt.subplots()
# Plot the results
ax.plot(train.index[-40:], train['GCAG'][-40:], 'k.', label='Observations');
ax.plot(train.index[-40:-num_steps], predict[-40:-num_steps], label='One-step-ahead Prediction');
ax.plot(train.index[-num_steps:], predict[-num_steps:], 'r', label='Multistep Prediction');
ax.plot(train.index[-num_steps:], ci.iloc[-num_steps:], 'k--');
# Cleanup the image
legend = ax.legend(loc='upper left');
seasonal_model = {
'level': 'local linear trend',
'seasonal': 12
}
mod = sm.tsa.UnobservedComponents(train['GCAG'], **seasonal_model)
res = mod.fit(method='powell', disp=False)
fig = res.plot_components(legend_loc='lower right', figsize=(15, 9));
pearsonr(gcag_res.predict(), train['GCAG'])
np.mean(np.abs(gcag_res.predict() - train['GCAG']))
np.mean(np.abs(res.predict() - train['GCAG']))
seasonal_model = {
'level': 'local level',
'seasonal': 12
}
llmod = sm.tsa.UnobservedComponents(train['GCAG'], **seasonal_model)
ll_level_res = llmod.fit(method='powell', disp=False)
fig = ll_level_res.plot_components(legend_loc='lower right', figsize=(15, 9));
np.mean(np.abs(ll_level_res.predict() - train['GCAG']))
train[:48].GCAG.plot()
pearsonr(ll_level_res.predict(), train['GCAG'])
pearsonr(train['GCAG'].iloc[:-1, ], train['GCAG'].iloc[1:, ])
np.mean(np.abs(ll_level_res.predict() - train['GCAG']))
np.mean(np.abs(train['GCAG'].iloc[:-1, ].values, train['GCAG'].iloc[1:, ].values))