12.1 오차 행렬

from sklearn.tree import DecisionTreeClassifier
dt = DecisionTreeClassifier(
    random_state=42, max_depth=3
)
dt.fit(X_train, y_train)
DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=3,
                       max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=42, splitter='best')
y_predict = dt.predict(X_test)

tp = ((y_test == 1) & (y_test == y_predict)).sum() # 123
tn = ((y_test == 0) & (y_test == y_predict)).sum() # 199
fp = ((y_test == 0) & (y_test != y_predict)).sum() # 25
fn = ((y_test == 1) & (y_test != y_predict)).sum() # 46
from sklearn.metrics import confusion_matrix

y_predict = dt.predict(X_test)

pd.DataFrame(
  confusion_matrix(y_test, y_predict),
  columns=[
    "Predict died",
    "Predict Survive",
  ],
  index=["True Death", "True Survive"],
)
Predict died Predict Survive
True Death 199 25
True Survive 48 121
import matplotlib.pyplot as plt
from yellowbrick.classifier import (
  ConfusionMatrix,
)

mapping = {0: "died", 1: "survived"}
fig, ax = plt.subplots(figsize=(6, 6))

cm_viz = ConfusionMatrix(
  dt,
  classes=["died", "survived"],
  label_encoder=mapping,
)

cm_viz.score(X_test, y_test)
cm_viz.poof()

12.3 정확도

(tp + tn) / (tp + tn + fp + fn)
0.8142493638676844
from sklearn.metrics import accuracy_score
y_predict = dt.predict(X_test)
accuracy_score(y_test, y_predict)
0.8142493638676844

12.4 재현율

tp / (tp + fn)
0.7159763313609467
from sklearn.metrics import recall_score
y_predict = dt.predict(X_test)
recall_score(y_test, y_predict)
0.7159763313609467

12.5 정밀도

tp / (tp + fp)
0.8287671232876712
from sklearn.metrics import precision_score
y_predict = dt.predict(X_test)
precision_score(y_test, y_predict)
0.8287671232876712

12.6 F1

pre = tp / (tp + fp)
rec = tp / (tp + fn)
(2 * pre * rec) / (pre + rec)
0.7682539682539683
from sklearn.metrics import f1_score
y_predict = dt.predict(X_test)
f1_score(y_test, y_predict)
0.7682539682539683

12.7 분류 보고서

import matplotlib.pyplot as plt
from yellowbrick.classifier import (
  ClassificationReport,
)

fig, ax = plt.subplots(figsize=(6, 3))

cm_viz = ClassificationReport(
  dt,
  classes=["died", "survived"],
  label_encoder=mapping,
)

cm_viz.score(X_test, y_test)
cm_viz.poof()

12.8 ROC

from sklearn.metrics import roc_auc_score
y_predict = dt.predict(X_test)
roc_auc_score(y_test, y_predict)
0.8021845942519018
from yellowbrick.classifier import ROCAUC
fig, ax = plt.subplots(figsize=(6, 6))
roc_viz = ROCAUC(dt)
roc_viz.score(X_test, y_test)
roc_viz.poof()

12.9 정밀도-재현율 곡선

from sklearn.metrics import (
  average_precision_score,
)

y_predict = dt.predict(X_test)
average_precision_score(y_test, y_predict)
0.7155150490642249
from yellowbrick.classifier import (
  PrecisionRecallCurve,
)

fig, ax = plt.subplots(figsize=(6, 4))

viz = PrecisionRecallCurve(
  DecisionTreeClassifier(max_depth=3)
)
viz.fit(X_train, y_train)
print(viz.score(X_test, y_test))
viz.poof()
0.8177126373723864

12.10 누적 이득 도표

import scikitplot

fig, ax = plt.subplots(figsize=(6, 6))

y_probas = dt.predict_proba(X_test)
scikitplot.metrics.plot_cumulative_gain(
  y_test, y_probas, ax=ax
)
<matplotlib.axes._subplots.AxesSubplot at 0x7f95c41d89d0>

12.11 리프트 곡선

fig, ax = plt.subplots(figsize=(6, 6))

y_probas = dt.predict_proba(X_test)
scikitplot.metrics.plot_lift_curve(
  y_test, y_probas, ax=ax
)
<matplotlib.axes._subplots.AxesSubplot at 0x7f95c41d4750>

12.12 범주의 균형

from yellowbrick.classifier import ClassBalance
fig, ax = plt.subplots(figsize=(6, 6))

cb_viz = ClassBalance(
  labels=["Died", "Survived"]
)

cb_viz.fit(y_test)
cb_viz.poof()

12.13 범주 예측 오류

from yellowbrick.classifier import (
  ClassPredictionError,
)

fig, ax = plt.subplots(figsize=(6, 6))

cpe_viz = ClassPredictionError(
  dt, classes=["died", "survived"]
)

cpe_viz.score(X_test, y_test)
cpe_viz.poof()

12.14 차별 임계치

from yellowbrick.classifier import (
  DiscriminationThreshold,
)

fig, ax = plt.subplots(figsize=(6, 5))

dt_viz = DiscriminationThreshold(dt)
dt_viz.fit(X, y)

dt_viz.poof()