9장 시계열을위한 머신러닝 (디시전트리)
디시전트리, 랜덤포레스트, XGBoost 등을 다룬다.
!pip install cesium
from cesium import datasets
eeg = datasets.fetch_andrzejak()
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 7))
plt.subplot(3, 1, 1)
plt.plot(eeg["measurements"][0])
plt.legend(eeg['classes'][0])
plt.subplot(3, 1, 2)
plt.plot(eeg["measurements"][300])
plt.legend(eeg['classes'][300])
plt.subplot(3, 1, 3)
plt.plot(eeg["measurements"][450])
plt.legend(eeg['classes'][450])
from cesium.featurize import featurize_time_series as ft
features_to_use = ["amplitude",
"percent_beyond_1_std",
"percent_close_to_median",
"skew",
"max_slope"]
fset_cesium = ft(times = eeg["times"],
values = eeg["measurements"],
errors = None,
features_to_use = features_to_use,
scheduler = None)
fset_cesium.head()
import numpy as np
np.std(eeg["measurements"][0])
np.mean(eeg['measurements'][0])
sample_ts = eeg["measurements"][0]
sz = len(sample_ts)
ll = -4.13 - 40.4
ul = -4.13 + 40.4
quals = [i for i in range(sz) if sample_ts[i] < ll or sample_ts[i] > ul]
len(quals)/sz
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(fset_cesium.values, eeg["classes"], random_state=21)
from sklearn.ensemble import RandomForestClassifier
rf_clf = RandomForestClassifier(n_estimators = 10,
max_depth = 3,
random_state = 21)
rf_clf.fit(X_train, y_train)
rf_clf.score(X_test, y_test)
import xgboost as xgb
xgb_clf = xgb.XGBClassifier(n_estimators = 10,
max_depth = 3,
random_state = 21)
xgb_clf.fit(X_train, y_train)
xgb_clf.score(X_test, y_test)
import time
start = time.time()
xgb_clf.fit(X_train, y_train)
end = time.time()
end - start
start = time.time()
rf_clf.fit(X_train, y_train)
end = time.time()
end - start
xgb_clf = xgb.XGBClassifier(n_estimators = 10,
max_depth = 2,
random_state = 21)
xgb_clf.fit(X_train, y_train)
xgb_clf.score(X_test, y_test)
rf_clf = RandomForestClassifier(n_estimators = 10,
max_depth = 2,
random_state = 21)
rf_clf.fit(X_train, y_train)
rf_clf.score(X_test, y_test)
xgb_clf = xgb.XGBClassifier(n_estimators = 10,
max_depth = 1,
random_state = 21)
xgb_clf.fit(X_train, y_train)
xgb_clf.score(X_test, y_test)
rf_clf = RandomForestClassifier(n_estimators = 10,
max_depth = 1,
random_state = 21)
rf_clf.fit(X_train, y_train)
rf_clf.score(X_test, y_test)