15장 평가 지표와 회귀의 평가
회귀를 평가하는 지표를 다룹니다.
from sklearn.ensemble import (
RandomForestRegressor,
)
rfr = RandomForestRegressor(
random_state=42, n_estimators=100
)
rfr.fit(bos_X_train, bos_y_train)
from sklearn import metrics
rfr.score(bos_X_test, bos_y_test)
bos_y_test_pred = rfr.predict(bos_X_test)
metrics.r2_score(bos_y_test, bos_y_test_pred)
metrics.mean_absolute_error(
bos_y_test, bos_y_test_pred
)
import matplotlib.pyplot as plt
from yellowbrick.regressor import ResidualsPlot
fig, ax = plt.subplots(figsize=(10, 8))
rpv = ResidualsPlot(rfr)
rpv.fit(bos_X_train, bos_y_train)
rpv.score(bos_X_test, bos_y_test)
rpv.poof()
import statsmodels.stats.api as sms
resids = bos_y_test - rfr.predict(bos_X_test)
hb = sms.het_breuschpagan(resids, bos_X_test)
labels = [
"Lagrange multiplier statistic",
"p-value",
"f-value",
"f p-value",
]
for name, num in zip(labels, hb):
print(f"{name}: {num:.2}")
fig, ax = plt.subplots(figsize=(8, 6))
resids = bos_y_test - rfr.predict(bos_X_test)
pd.Series(resids, name="residuals").plot.hist(
bins=20, ax=ax, title="Residual Histogram"
)
from scipy import stats
fig, ax = plt.subplots(figsize=(8, 6))
_ = stats.probplot(resids, plot=ax)
stats.kstest(resids, cdf="norm")
from yellowbrick.regressor import (
PredictionError,
)
fig, ax = plt.subplots(figsize=(10, 8))
pev = PredictionError(rfr)
pev.fit(bos_X_train, bos_y_train)
pev.score(bos_X_test, bos_y_test)
pev.poof()