from sklearn.ensemble import (
  RandomForestRegressor,
)

rfr = RandomForestRegressor(
  random_state=42, n_estimators=100
)
rfr.fit(bos_X_train, bos_y_train)
RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
                      max_features='auto', max_leaf_nodes=None,
                      min_impurity_decrease=0.0, min_impurity_split=None,
                      min_samples_leaf=1, min_samples_split=2,
                      min_weight_fraction_leaf=0.0, n_estimators=100,
                      n_jobs=None, oob_score=False, random_state=42, verbose=0,
                      warm_start=False)

15.1 평가 지표

from sklearn import metrics
rfr.score(bos_X_test, bos_y_test)
0.8721182042634867
bos_y_test_pred = rfr.predict(bos_X_test)
metrics.r2_score(bos_y_test, bos_y_test_pred)
0.8721182042634867
metrics.mean_absolute_error(
  bos_y_test, bos_y_test_pred
)
2.0839802631578945

15.2 잔차 도표

import matplotlib.pyplot as plt
from yellowbrick.regressor import ResidualsPlot

fig, ax = plt.subplots(figsize=(10, 8))
rpv = ResidualsPlot(rfr)
rpv.fit(bos_X_train, bos_y_train)
rpv.score(bos_X_test, bos_y_test)
rpv.poof()

15.3 이분산성

import statsmodels.stats.api as sms
resids = bos_y_test - rfr.predict(bos_X_test)
hb = sms.het_breuschpagan(resids, bos_X_test)
labels = [
  "Lagrange multiplier statistic",
  "p-value",
  "f-value",
  "f p-value",
]

for name, num in zip(labels, hb):
  print(f"{name}: {num:.2}")
Lagrange multiplier statistic: 3.6e+01
p-value: 0.00036
f-value: 3.3
f p-value: 0.00022

15.4 정규 잔차

fig, ax = plt.subplots(figsize=(8, 6))

resids = bos_y_test - rfr.predict(bos_X_test)
pd.Series(resids, name="residuals").plot.hist(
  bins=20, ax=ax, title="Residual Histogram"
)
<matplotlib.axes._subplots.AxesSubplot at 0x7f364ad8b290>
from scipy import stats
fig, ax = plt.subplots(figsize=(8, 6))
_ = stats.probplot(resids, plot=ax)
stats.kstest(resids, cdf="norm")
KstestResult(statistic=0.1962230021010155, pvalue=1.3283596864962378e-05)

15.5 예측 오차 도표

from yellowbrick.regressor import (
  PredictionError,
)

fig, ax = plt.subplots(figsize=(10, 8))

pev = PredictionError(rfr)
pev.fit(bos_X_train, bos_y_train)
pev.score(bos_X_test, bos_y_test)
pev.poof()