9장 불균형 범주의 문제
불균형 범주의 문제를 다룹니다.
from sklearn.utils import resample
mask = df.survived == 1
surv_df = df[mask]
death_df = df[~mask]
df_upsample = resample(
surv_df,
replace=True,
n_samples=len(death_df),
random_state=42,
)
df2 = pd.concat([death_df, df_upsample])
df2.survived.value_counts()
from imblearn.over_sampling import (
RandomOverSampler,
)
ros = RandomOverSampler(random_state=42)
X_ros, y_ros = ros.fit_sample(X, y)
pd.Series(y_ros).value_counts()
from sklearn.utils import resample
mask = df.survived == 1
surv_df = df[mask]
death_df = df[~mask]
df_downsample = resample(
death_df,
replace=False,
n_samples=len(surv_df),
random_state=42,
)
df3 = pd.concat([surv_df, df_downsample])
df3.survived.value_counts()