필요한 패키지

5.1 열의 이름

import janitor as jn
Xbad = pd.DataFrame(
  {
    "A": [1, None, 3],
    " sales numbers ": [20.0, 30.0, None],
  }
)

jn.clean_names(Xbad)
a _sales_numbers_
0 1.0 20.0
1 NaN 30.0
2 3.0 NaN
def clean_col(name):
  return (
    name.strip().lower().replace(" ", "_")
  )

Xbad.rename(columns=clean_col)
a sales_numbers
0 1.0 20.0
1 NaN 30.0
2 3.0 NaN

5.2 누락된 값의 교체

jn.coalesce(
  Xbad,
  columns=["A", " sales numbers "],
  new_column_name="val",
)
val
0 1.0
1 30.0
2 3.0
Xbad.fillna(10)
A sales numbers
0 1.0 20.0
1 10.0 30.0
2 3.0 10.0
jn.fill_empty(
  Xbad,
  columns=["A", " sales numbers "],
  value=10,
)
A sales numbers
0 1.0 20.0
1 10.0 30.0
2 3.0 10.0
import pandas as pd

url = "https://biostat.app.vumc.org/wiki/pub/Main/DataSets/titanic3.xls"
df = pd.read_excel(url)
orig_df = df

df.isna().any().any()
True