12장 메트릭 및 분류
분류에 대한 다양한 메트릭(평가 지표)을 알아봅니다.
- 12.1 오차 행렬
- 12.3 정확도
- 12.4 재현율
- 12.5 정밀도
- 12.6 F1
- 12.7 분류 보고서
- 12.8 ROC
- 12.9 정밀도-재현율 곡선
- 12.10 누적 이득 도표
- 12.11 리프트 곡선
- 12.12 범주의 균형
- 12.13 범주 예측 오류
- 12.14 차별 임계치
from sklearn.tree import DecisionTreeClassifier
dt = DecisionTreeClassifier(
random_state=42, max_depth=3
)
dt.fit(X_train, y_train)
y_predict = dt.predict(X_test)
tp = ((y_test == 1) & (y_test == y_predict)).sum() # 123
tn = ((y_test == 0) & (y_test == y_predict)).sum() # 199
fp = ((y_test == 0) & (y_test != y_predict)).sum() # 25
fn = ((y_test == 1) & (y_test != y_predict)).sum() # 46
from sklearn.metrics import confusion_matrix
y_predict = dt.predict(X_test)
pd.DataFrame(
confusion_matrix(y_test, y_predict),
columns=[
"Predict died",
"Predict Survive",
],
index=["True Death", "True Survive"],
)
import matplotlib.pyplot as plt
from yellowbrick.classifier import (
ConfusionMatrix,
)
mapping = {0: "died", 1: "survived"}
fig, ax = plt.subplots(figsize=(6, 6))
cm_viz = ConfusionMatrix(
dt,
classes=["died", "survived"],
label_encoder=mapping,
)
cm_viz.score(X_test, y_test)
cm_viz.poof()
(tp + tn) / (tp + tn + fp + fn)
from sklearn.metrics import accuracy_score
y_predict = dt.predict(X_test)
accuracy_score(y_test, y_predict)
tp / (tp + fn)
from sklearn.metrics import recall_score
y_predict = dt.predict(X_test)
recall_score(y_test, y_predict)
tp / (tp + fp)
from sklearn.metrics import precision_score
y_predict = dt.predict(X_test)
precision_score(y_test, y_predict)
pre = tp / (tp + fp)
rec = tp / (tp + fn)
(2 * pre * rec) / (pre + rec)
from sklearn.metrics import f1_score
y_predict = dt.predict(X_test)
f1_score(y_test, y_predict)
import matplotlib.pyplot as plt
from yellowbrick.classifier import (
ClassificationReport,
)
fig, ax = plt.subplots(figsize=(6, 3))
cm_viz = ClassificationReport(
dt,
classes=["died", "survived"],
label_encoder=mapping,
)
cm_viz.score(X_test, y_test)
cm_viz.poof()
from sklearn.metrics import roc_auc_score
y_predict = dt.predict(X_test)
roc_auc_score(y_test, y_predict)
from yellowbrick.classifier import ROCAUC
fig, ax = plt.subplots(figsize=(6, 6))
roc_viz = ROCAUC(dt)
roc_viz.score(X_test, y_test)
roc_viz.poof()
from sklearn.metrics import (
average_precision_score,
)
y_predict = dt.predict(X_test)
average_precision_score(y_test, y_predict)
from yellowbrick.classifier import (
PrecisionRecallCurve,
)
fig, ax = plt.subplots(figsize=(6, 4))
viz = PrecisionRecallCurve(
DecisionTreeClassifier(max_depth=3)
)
viz.fit(X_train, y_train)
print(viz.score(X_test, y_test))
viz.poof()
import scikitplot
fig, ax = plt.subplots(figsize=(6, 6))
y_probas = dt.predict_proba(X_test)
scikitplot.metrics.plot_cumulative_gain(
y_test, y_probas, ax=ax
)
fig, ax = plt.subplots(figsize=(6, 6))
y_probas = dt.predict_proba(X_test)
scikitplot.metrics.plot_lift_curve(
y_test, y_probas, ax=ax
)
from yellowbrick.classifier import ClassBalance
fig, ax = plt.subplots(figsize=(6, 6))
cb_viz = ClassBalance(
labels=["Died", "Survived"]
)
cb_viz.fit(y_test)
cb_viz.poof()
from yellowbrick.classifier import (
ClassPredictionError,
)
fig, ax = plt.subplots(figsize=(6, 6))
cpe_viz = ClassPredictionError(
dt, classes=["died", "survived"]
)
cpe_viz.score(X_test, y_test)
cpe_viz.poof()
from yellowbrick.classifier import (
DiscriminationThreshold,
)
fig, ax = plt.subplots(figsize=(6, 5))
dt_viz = DiscriminationThreshold(dt)
dt_viz.fit(X, y)
dt_viz.poof()