13장 모델 설명
모델을 설명하기위한 다양한 방식을 탐구합니다.
from sklearn.tree import DecisionTreeClassifier
dt = DecisionTreeClassifier(
random_state=42, max_depth=3
)
dt.fit(X_train, y_train)
from lime import lime_tabular
explainer = lime_tabular.LimeTabularExplainer(
X_train.values,
feature_names=X.columns,
class_names=["died", "survived"],
)
exp = explainer.explain_instance(
X_train.iloc[-1].values, dt.predict_proba
)
fig = exp.as_pyplot_figure()
fig.tight_layout()
data = X_train.iloc[-2].values.copy()
dt.predict_proba(
[data]
)
data[5] = 1
dt.predict_proba([data])
rf5 = ensemble.RandomForestClassifier(
**{
"max_features": "auto",
"min_samples_leaf": 0.1,
"n_estimators": 200,
"random_state": 42,
}
)
rf5.fit(X_train, y_train)
from treeinterpreter import (
treeinterpreter as ti,
)
instances = X.iloc[:2]
prediction, bias, contribs = ti.predict(
rf5, instances
)
i = 0
print("Instance", i)
print("Prediction", prediction[i])
print("Bias (trainset mean)", bias[i])
print("Feature contributions:")
for c, feature in zip(
contribs[i], instances.columns
):
print(" {} {}".format(feature, c))
rf5 = ensemble.RandomForestClassifier(
**{
"max_features": "auto",
"min_samples_leaf": 0.1,
"n_estimators": 200,
"random_state": 42,
}
)
rf5.fit(X_train, y_train)
from pdpbox import pdp
feat_name = "age"
p = pdp.pdp_isolate(
rf5, X, X.columns, feat_name
)
fig, _ = pdp.pdp_plot(
p, feat_name, plot_lines=True, figsize=(30, 15)
)
features = ["fare", "sex_male"]
p = pdp.pdp_interact(
rf5, X, X.columns, features
)
fig, _ = pdp.pdp_interact_plot(p, features)
from sklearn import svm
sv = svm.SVC()
sv.fit(X_train, y_train)
sur_dt = tree.DecisionTreeClassifier()
sur_dt.fit(X_test, sv.predict(X_test))
for col, val in sorted(zip(X_test.columns,sur_dt.feature_importances_),
key=lambda x: x[1],
reverse=True)[:7]:
print(f"{col:10}{val:10.3f}")
rf5.predict_proba(X_test.iloc[[20]])
import shap
shap.initjs()
s = shap.TreeExplainer(rf5)
shap_vals = s.shap_values(X_test)
target_idx = 1
shap.force_plot(
s.expected_value[target_idx],
shap_vals[target_idx][20, :],
feature_names=X_test.columns,
)
shap.initjs()
shap.force_plot(
s.expected_value[1],
shap_vals[1],
feature_names=X_test.columns,
)
fig, ax = plt.subplots(figsize=(10, 8))
shap.summary_plot(shap_vals[0], X_test)