!pip install cesium

Collecting cesium
  Downloading https://files.pythonhosted.org/packages/c8/73/c998e9983df653e49238a7da0956bb375b23d0559f01f411cca2220a44f7/cesium-0.9.12-cp37-cp37m-manylinux1_x86_64.whl (220kB)
     |████████████████████████████████| 225kB 8.9MB/s 
Requirement already satisfied: toolz in /usr/local/lib/python3.7/dist-packages (from cesium) (0.11.1)
Requirement already satisfied: joblib>=0.14.1 in /usr/local/lib/python3.7/dist-packages (from cesium) (1.0.1)
Requirement already satisfied: dask>=2.5.0 in /usr/local/lib/python3.7/dist-packages (from cesium) (2.12.0)
Collecting gatspy>=0.3.0
  Downloading https://files.pythonhosted.org/packages/b0/fa/a075f6cd3f40255a883e8a966df17322825f6b86cb4907edce06098aa566/gatspy-0.3.tar.gz (554kB)
     |████████████████████████████████| 563kB 8.7MB/s 
Requirement already satisfied: scipy>=0.16.0 in /usr/local/lib/python3.7/dist-packages (from cesium) (1.4.1)
Requirement already satisfied: pandas>=0.17.0 in /usr/local/lib/python3.7/dist-packages (from cesium) (1.1.5)
Requirement already satisfied: scikit-learn>=0.22.1 in /usr/local/lib/python3.7/dist-packages (from cesium) (0.22.2.post1)
Requirement already satisfied: cloudpickle in /usr/local/lib/python3.7/dist-packages (from cesium) (1.3.0)
Requirement already satisfied: numpy>=1.13.3 in /usr/local/lib/python3.7/dist-packages (from scipy>=0.16.0->cesium) (1.19.5)
Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.7/dist-packages (from pandas>=0.17.0->cesium) (2018.9)
Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas>=0.17.0->cesium) (2.8.1)
Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7.3->pandas>=0.17.0->cesium) (1.15.0)
Building wheels for collected packages: gatspy
  Building wheel for gatspy (setup.py) ... done
  Created wheel for gatspy: filename=gatspy-0.3-cp37-none-any.whl size=43807 sha256=8a8fd8cd1014706c5c49b62bf86eb8ee4147006cf3e4f0c8390711806409a44b
  Stored in directory: /root/.cache/pip/wheels/4f/8f/fa/0d7b250ef21828ca373b21f6b3b6ef0f2a0e3560b69c91e55d
Successfully built gatspy
Installing collected packages: gatspy, cesium
Successfully installed cesium-0.9.12 gatspy-0.3

from cesium import datasets

eeg = datasets.fetch_andrzejak()

Downloading data from https://github.com/cesium-ml/cesium-data/raw/master/andrzejak/

import matplotlib.pyplot as plt
plt.figure(figsize=(10, 7))

plt.subplot(3, 1, 1)
plt.plot(eeg["measurements"][0])
plt.legend(eeg['classes'][0])
plt.subplot(3, 1, 2)
plt.plot(eeg["measurements"][300])
plt.legend(eeg['classes'][300])
plt.subplot(3, 1, 3)
plt.plot(eeg["measurements"][450])
plt.legend(eeg['classes'][450])

<matplotlib.legend.Legend at 0x7f6a8cacbd10>

from cesium.featurize import featurize_time_series as ft

features_to_use = ["amplitude",
                   "percent_beyond_1_std",
                   "percent_close_to_median",
                   "skew",
                   "max_slope"]

fset_cesium = ft(times = eeg["times"],
                 values = eeg["measurements"],
                 errors = None,
                 features_to_use = features_to_use,
                 scheduler = None)

fset_cesium.head()

import numpy as np

np.std(eeg["measurements"][0])

40.41100040099819

np.mean(eeg['measurements'][0])

-4.1320478398828415

sample_ts = eeg["measurements"][0]
sz = len(sample_ts)
ll = -4.13 - 40.4
ul = -4.13 + 40.4
quals = [i for i in range(sz) if sample_ts[i] < ll or sample_ts[i] > ul]

len(quals)/sz

0.3273126678057115

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(fset_cesium.values, eeg["classes"], random_state=21)

from sklearn.ensemble import RandomForestClassifier

rf_clf = RandomForestClassifier(n_estimators = 10,
                                max_depth = 3,
                                random_state = 21)
rf_clf.fit(X_train, y_train)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=3, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=10,
                       n_jobs=None, oob_score=False, random_state=21, verbose=0,
                       warm_start=False)

rf_clf.score(X_test, y_test)

0.616

import xgboost as xgb
xgb_clf = xgb.XGBClassifier(n_estimators = 10,
                            max_depth = 3,
                            random_state = 21)

xgb_clf.fit(X_train, y_train)
xgb_clf.score(X_test, y_test)

0.648

import time

start = time.time()
xgb_clf.fit(X_train, y_train)
end = time.time()
end - start

0.01726841926574707

start = time.time()
rf_clf.fit(X_train, y_train)
end = time.time()
end - start

0.01841139793395996

xgb_clf = xgb.XGBClassifier(n_estimators = 10,
max_depth = 2,
random_state = 21)
xgb_clf.fit(X_train, y_train)
xgb_clf.score(X_test, y_test)

0.616

rf_clf = RandomForestClassifier(n_estimators = 10,
                                max_depth = 2,
                                random_state = 21)
rf_clf.fit(X_train, y_train)
rf_clf.score(X_test, y_test)

0.544

xgb_clf = xgb.XGBClassifier(n_estimators = 10,
                            max_depth = 1,
                            random_state = 21)
xgb_clf.fit(X_train, y_train)
xgb_clf.score(X_test, y_test)

0.632

rf_clf = RandomForestClassifier(n_estimators = 10,
                                max_depth = 1,
                                random_state = 21)
rf_clf.fit(X_train, y_train)
rf_clf.score(X_test, y_test)

0.376

feature	amplitude	percent_beyond_1_std	percent_close_to_median	skew	max_slope
channel	0	0	0	0	0
0	143.5	0.327313	0.505004	0.032805	11107.796610
1	211.5	0.290212	0.640469	-0.092715	20653.559322
2	165.0	0.302660	0.515987	-0.004100	13537.627119
3	171.5	0.300952	0.541128	0.063678	17008.813559
4	170.0	0.305101	0.566268	0.142753	13016.949153