!pip install cesium
Collecting cesium
  Downloading https://files.pythonhosted.org/packages/c8/73/c998e9983df653e49238a7da0956bb375b23d0559f01f411cca2220a44f7/cesium-0.9.12-cp37-cp37m-manylinux1_x86_64.whl (220kB)
     |████████████████████████████████| 225kB 8.9MB/s 
Requirement already satisfied: toolz in /usr/local/lib/python3.7/dist-packages (from cesium) (0.11.1)
Requirement already satisfied: joblib>=0.14.1 in /usr/local/lib/python3.7/dist-packages (from cesium) (1.0.1)
Requirement already satisfied: dask>=2.5.0 in /usr/local/lib/python3.7/dist-packages (from cesium) (2.12.0)
Collecting gatspy>=0.3.0
  Downloading https://files.pythonhosted.org/packages/b0/fa/a075f6cd3f40255a883e8a966df17322825f6b86cb4907edce06098aa566/gatspy-0.3.tar.gz (554kB)
     |████████████████████████████████| 563kB 8.7MB/s 
Requirement already satisfied: scipy>=0.16.0 in /usr/local/lib/python3.7/dist-packages (from cesium) (1.4.1)
Requirement already satisfied: pandas>=0.17.0 in /usr/local/lib/python3.7/dist-packages (from cesium) (1.1.5)
Requirement already satisfied: scikit-learn>=0.22.1 in /usr/local/lib/python3.7/dist-packages (from cesium) (0.22.2.post1)
Requirement already satisfied: cloudpickle in /usr/local/lib/python3.7/dist-packages (from cesium) (1.3.0)
Requirement already satisfied: numpy>=1.13.3 in /usr/local/lib/python3.7/dist-packages (from scipy>=0.16.0->cesium) (1.19.5)
Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.7/dist-packages (from pandas>=0.17.0->cesium) (2018.9)
Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas>=0.17.0->cesium) (2.8.1)
Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7.3->pandas>=0.17.0->cesium) (1.15.0)
Building wheels for collected packages: gatspy
  Building wheel for gatspy (setup.py) ... done
  Created wheel for gatspy: filename=gatspy-0.3-cp37-none-any.whl size=43807 sha256=8a8fd8cd1014706c5c49b62bf86eb8ee4147006cf3e4f0c8390711806409a44b
  Stored in directory: /root/.cache/pip/wheels/4f/8f/fa/0d7b250ef21828ca373b21f6b3b6ef0f2a0e3560b69c91e55d
Successfully built gatspy
Installing collected packages: gatspy, cesium
Successfully installed cesium-0.9.12 gatspy-0.3
from cesium import datasets
eeg = datasets.fetch_andrzejak()
Downloading data from https://github.com/cesium-ml/cesium-data/raw/master/andrzejak/
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 7))

plt.subplot(3, 1, 1)
plt.plot(eeg["measurements"][0])
plt.legend(eeg['classes'][0])
plt.subplot(3, 1, 2)
plt.plot(eeg["measurements"][300])
plt.legend(eeg['classes'][300])
plt.subplot(3, 1, 3)
plt.plot(eeg["measurements"][450])
plt.legend(eeg['classes'][450])
<matplotlib.legend.Legend at 0x7f6a8cacbd10>
from cesium.featurize import featurize_time_series as ft

features_to_use = ["amplitude",
                   "percent_beyond_1_std",
                   "percent_close_to_median",
                   "skew",
                   "max_slope"]

fset_cesium = ft(times = eeg["times"],
                 values = eeg["measurements"],
                 errors = None,
                 features_to_use = features_to_use,
                 scheduler = None)
fset_cesium.head()
feature amplitude percent_beyond_1_std percent_close_to_median skew max_slope
channel 0 0 0 0 0
0 143.5 0.327313 0.505004 0.032805 11107.796610
1 211.5 0.290212 0.640469 -0.092715 20653.559322
2 165.0 0.302660 0.515987 -0.004100 13537.627119
3 171.5 0.300952 0.541128 0.063678 17008.813559
4 170.0 0.305101 0.566268 0.142753 13016.949153
import numpy as np

np.std(eeg["measurements"][0])
40.41100040099819
np.mean(eeg['measurements'][0])
-4.1320478398828415
sample_ts = eeg["measurements"][0]
sz = len(sample_ts)
ll = -4.13 - 40.4
ul = -4.13 + 40.4
quals = [i for i in range(sz) if sample_ts[i] < ll or sample_ts[i] > ul]
len(quals)/sz
0.3273126678057115
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(fset_cesium.values, eeg["classes"], random_state=21)
from sklearn.ensemble import RandomForestClassifier

rf_clf = RandomForestClassifier(n_estimators = 10,
                                max_depth = 3,
                                random_state = 21)
rf_clf.fit(X_train, y_train)
RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=3, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=10,
                       n_jobs=None, oob_score=False, random_state=21, verbose=0,
                       warm_start=False)
rf_clf.score(X_test, y_test)
0.616
import xgboost as xgb
xgb_clf = xgb.XGBClassifier(n_estimators = 10,
                            max_depth = 3,
                            random_state = 21)

xgb_clf.fit(X_train, y_train)
xgb_clf.score(X_test, y_test)
0.648
import time

start = time.time()
xgb_clf.fit(X_train, y_train)
end = time.time()
end - start
0.01726841926574707
start = time.time()
rf_clf.fit(X_train, y_train)
end = time.time()
end - start
0.01841139793395996
xgb_clf = xgb.XGBClassifier(n_estimators = 10,
max_depth = 2,
random_state = 21)
xgb_clf.fit(X_train, y_train)
xgb_clf.score(X_test, y_test)
0.616
rf_clf = RandomForestClassifier(n_estimators = 10,
                                max_depth = 2,
                                random_state = 21)
rf_clf.fit(X_train, y_train)
rf_clf.score(X_test, y_test)
0.544
xgb_clf = xgb.XGBClassifier(n_estimators = 10,
                            max_depth = 1,
                            random_state = 21)
xgb_clf.fit(X_train, y_train)
xgb_clf.score(X_test, y_test)
0.632
rf_clf = RandomForestClassifier(n_estimators = 10,
                                max_depth = 1,
                                random_state = 21)
rf_clf.fit(X_train, y_train)
rf_clf.score(X_test, y_test)
0.376