7장 데이터 전처리
데이터를 전처리하는 내용을 다룹니다.
• Chansung Park • 22 min read
- 7.1 표준화
- 7.2 범위 조정
- 7.3 더미 변수
- 7.4 레이블 인코더
- 7.5 프리퀀시 인코딩
- 7.6 문자열에서 범주 가져오기
- 7.7 그 밖의 범주형 인코딩
- 7.8 날짜형 데이터의 특징 공학
- 7.9 col_na 특징의 추가
- 7.10 수동적 특징 공학
X2 = pd.DataFrame(
{
"a": range(5),
"b": [-100, -50, 0, 200, 1000],
}
)
X2
a | b | |
---|---|---|
0 | 0 | -100 |
1 | 1 | -50 |
2 | 2 | 0 |
3 | 3 | 200 |
4 | 4 | 1000 |
from sklearn import preprocessing
std = preprocessing.StandardScaler()
std.fit_transform(X2)
array([[-1.41421356, -0.75995002], [-0.70710678, -0.63737744], [ 0. , -0.51480485], [ 0.70710678, -0.02451452], [ 1.41421356, 1.93664683]])
std.scale_
array([ 1.41421356, 407.92156109])
std.mean_
array([ 2., 210.])
std.var_
array([2.000e+00, 1.664e+05])
X_std = (X2 - X2.mean()) / X2.std()
X_std
a | b | |
---|---|---|
0 | -1.264911 | -0.679720 |
1 | -0.632456 | -0.570088 |
2 | 0.000000 | -0.460455 |
3 | 0.632456 | -0.021926 |
4 | 1.264911 | 1.732190 |
X_std.mean()
a 4.440892e-17 b 0.000000e+00 dtype: float64
X_std.std()
a 1.0 b 1.0 dtype: float64
X3 = X2.copy()
scale_vars(X3, mapper=None) #fastai function locally imported
X3.std()
a 1.118034 b 1.118034 dtype: float64
X3.mean()
a 0.000000e+00 b 4.440892e-17 dtype: float64
from sklearn import preprocessing
mms = preprocessing.MinMaxScaler()
mms.fit(X2)
mms.transform(X2)
array([[0. , 0. ], [0.25 , 0.04545455], [0.5 , 0.09090909], [0.75 , 0.27272727], [1. , 1. ]])
(X2 - X2.min()) / (X2.max() - X2.min())
a | b | |
---|---|---|
0 | 0.00 | 0.000000 |
1 | 0.25 | 0.045455 |
2 | 0.50 | 0.090909 |
3 | 0.75 | 0.272727 |
4 | 1.00 | 1.000000 |
X_cat = pd.DataFrame(
{
"name": ["George", "Paul"],
"inst": ["Bass", "Guitar"],
}
)
X_cat
name | inst | |
---|---|---|
0 | George | Bass |
1 | Paul | Guitar |
pd.get_dummies(X_cat, drop_first=True)
name_Paul | inst_Guitar | |
---|---|---|
0 | 0 | 0 |
1 | 1 | 1 |
import janitor as jn
X_cat2 = pd.DataFrame(
{
"A": [1, None, 3],
"names": [
"Fred,George",
"George",
"John,Paul",
],
}
)
jn.expand_column(X_cat2, "names", sep=",")
A | names | Fred | George | John | Paul | |
---|---|---|---|---|---|---|
0 | 1.0 | Fred,George | 1 | 1 | 0 | 0 |
1 | NaN | George | 0 | 1 | 0 | 0 |
2 | 3.0 | John,Paul | 0 | 0 | 1 | 1 |
from sklearn import preprocessing
lab = preprocessing.LabelEncoder()
lab.fit_transform(X_cat['inst'])
array([0, 1])
lab.inverse_transform([1, 1, 0])
array(['Guitar', 'Guitar', 'Bass'], dtype=object)
X_cat.name.astype(
"category"
).cat.as_ordered().cat.codes + 1
0 1 1 2 dtype: int8
mapping = X_cat.name.value_counts()
X_cat.name.map(mapping)
0 1 1 1 Name: name, dtype: int64
df
pclass | survived | name | sex | age | sibsp | parch | fare | cabin | embarked | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 1 | Allen, Miss. Elisabeth Walton | female | 29.0000 | 0 | 0 | 211.3375 | B5 | S |
1 | 1 | 1 | Allison, Master. Hudson Trevor | male | 0.9167 | 1 | 2 | 151.5500 | C22 C26 | S |
2 | 1 | 0 | Allison, Miss. Helen Loraine | female | 2.0000 | 1 | 2 | 151.5500 | C22 C26 | S |
3 | 1 | 0 | Allison, Mr. Hudson Joshua Creighton | male | 30.0000 | 1 | 2 | 151.5500 | C22 C26 | S |
4 | 1 | 0 | Allison, Mrs. Hudson J C (Bessie Waldo Daniels) | female | 25.0000 | 1 | 2 | 151.5500 | C22 C26 | S |
5 | 1 | 1 | Anderson, Mr. Harry | male | 48.0000 | 0 | 0 | 26.5500 | E12 | S |
6 | 1 | 1 | Andrews, Miss. Kornelia Theodosia | female | 63.0000 | 1 | 0 | 77.9583 | D7 | S |
7 | 1 | 0 | Andrews, Mr. Thomas Jr | male | 39.0000 | 0 | 0 | 0.0000 | A36 | S |
8 | 1 | 1 | Appleton, Mrs. Edward Dale (Charlotte Lamson) | female | 53.0000 | 2 | 0 | 51.4792 | C101 | S |
9 | 1 | 0 | Artagaveytia, Mr. Ramon | male | 71.0000 | 0 | 0 | 49.5042 | NaN | C |
10 | 1 | 0 | Astor, Col. John Jacob | male | 47.0000 | 1 | 0 | 227.5250 | C62 C64 | C |
11 | 1 | 1 | Astor, Mrs. John Jacob (Madeleine Talmadge Force) | female | 18.0000 | 1 | 0 | 227.5250 | C62 C64 | C |
12 | 1 | 1 | Aubart, Mme. Leontine Pauline | female | 24.0000 | 0 | 0 | 69.3000 | B35 | C |
13 | 1 | 1 | Barber, Miss. Ellen "Nellie" | female | 26.0000 | 0 | 0 | 78.8500 | NaN | S |
14 | 1 | 1 | Barkworth, Mr. Algernon Henry Wilson | male | 80.0000 | 0 | 0 | 30.0000 | A23 | S |
15 | 1 | 0 | Baumann, Mr. John D | male | NaN | 0 | 0 | 25.9250 | NaN | S |
16 | 1 | 0 | Baxter, Mr. Quigg Edmond | male | 24.0000 | 0 | 1 | 247.5208 | B58 B60 | C |
17 | 1 | 1 | Baxter, Mrs. James (Helene DeLaudeniere Chaput) | female | 50.0000 | 0 | 1 | 247.5208 | B58 B60 | C |
18 | 1 | 1 | Bazzani, Miss. Albina | female | 32.0000 | 0 | 0 | 76.2917 | D15 | C |
19 | 1 | 0 | Beattie, Mr. Thomson | male | 36.0000 | 0 | 0 | 75.2417 | C6 | C |
20 | 1 | 1 | Beckwith, Mr. Richard Leonard | male | 37.0000 | 1 | 1 | 52.5542 | D35 | S |
21 | 1 | 1 | Beckwith, Mrs. Richard Leonard (Sallie Monypeny) | female | 47.0000 | 1 | 1 | 52.5542 | D35 | S |
22 | 1 | 1 | Behr, Mr. Karl Howell | male | 26.0000 | 0 | 0 | 30.0000 | C148 | C |
23 | 1 | 1 | Bidois, Miss. Rosalie | female | 42.0000 | 0 | 0 | 227.5250 | NaN | C |
24 | 1 | 1 | Bird, Miss. Ellen | female | 29.0000 | 0 | 0 | 221.7792 | C97 | S |
25 | 1 | 0 | Birnbaum, Mr. Jakob | male | 25.0000 | 0 | 0 | 26.0000 | NaN | C |
26 | 1 | 1 | Bishop, Mr. Dickinson H | male | 25.0000 | 1 | 0 | 91.0792 | B49 | C |
27 | 1 | 1 | Bishop, Mrs. Dickinson H (Helen Walton) | female | 19.0000 | 1 | 0 | 91.0792 | B49 | C |
28 | 1 | 1 | Bissette, Miss. Amelia | female | 35.0000 | 0 | 0 | 135.6333 | C99 | S |
29 | 1 | 1 | Bjornstrom-Steffansson, Mr. Mauritz Hakan | male | 28.0000 | 0 | 0 | 26.5500 | C52 | S |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1279 | 3 | 0 | Vestrom, Miss. Hulda Amanda Adolfina | female | 14.0000 | 0 | 0 | 7.8542 | NaN | S |
1280 | 3 | 0 | Vovk, Mr. Janko | male | 22.0000 | 0 | 0 | 7.8958 | NaN | S |
1281 | 3 | 0 | Waelens, Mr. Achille | male | 22.0000 | 0 | 0 | 9.0000 | NaN | S |
1282 | 3 | 0 | Ware, Mr. Frederick | male | NaN | 0 | 0 | 8.0500 | NaN | S |
1283 | 3 | 0 | Warren, Mr. Charles William | male | NaN | 0 | 0 | 7.5500 | NaN | S |
1284 | 3 | 0 | Webber, Mr. James | male | NaN | 0 | 0 | 8.0500 | NaN | S |
1285 | 3 | 0 | Wenzel, Mr. Linhart | male | 32.5000 | 0 | 0 | 9.5000 | NaN | S |
1286 | 3 | 1 | Whabee, Mrs. George Joseph (Shawneene Abi-Saab) | female | 38.0000 | 0 | 0 | 7.2292 | NaN | C |
1287 | 3 | 0 | Widegren, Mr. Carl/Charles Peter | male | 51.0000 | 0 | 0 | 7.7500 | NaN | S |
1288 | 3 | 0 | Wiklund, Mr. Jakob Alfred | male | 18.0000 | 1 | 0 | 6.4958 | NaN | S |
1289 | 3 | 0 | Wiklund, Mr. Karl Johan | male | 21.0000 | 1 | 0 | 6.4958 | NaN | S |
1290 | 3 | 1 | Wilkes, Mrs. James (Ellen Needs) | female | 47.0000 | 1 | 0 | 7.0000 | NaN | S |
1291 | 3 | 0 | Willer, Mr. Aaron ("Abi Weller") | male | NaN | 0 | 0 | 8.7125 | NaN | S |
1292 | 3 | 0 | Willey, Mr. Edward | male | NaN | 0 | 0 | 7.5500 | NaN | S |
1293 | 3 | 0 | Williams, Mr. Howard Hugh "Harry" | male | NaN | 0 | 0 | 8.0500 | NaN | S |
1294 | 3 | 0 | Williams, Mr. Leslie | male | 28.5000 | 0 | 0 | 16.1000 | NaN | S |
1295 | 3 | 0 | Windelov, Mr. Einar | male | 21.0000 | 0 | 0 | 7.2500 | NaN | S |
1296 | 3 | 0 | Wirz, Mr. Albert | male | 27.0000 | 0 | 0 | 8.6625 | NaN | S |
1297 | 3 | 0 | Wiseman, Mr. Phillippe | male | NaN | 0 | 0 | 7.2500 | NaN | S |
1298 | 3 | 0 | Wittevrongel, Mr. Camille | male | 36.0000 | 0 | 0 | 9.5000 | NaN | S |
1299 | 3 | 0 | Yasbeck, Mr. Antoni | male | 27.0000 | 1 | 0 | 14.4542 | NaN | C |
1300 | 3 | 1 | Yasbeck, Mrs. Antoni (Selini Alexander) | female | 15.0000 | 1 | 0 | 14.4542 | NaN | C |
1301 | 3 | 0 | Youseff, Mr. Gerious | male | 45.5000 | 0 | 0 | 7.2250 | NaN | C |
1302 | 3 | 0 | Yousif, Mr. Wazli | male | NaN | 0 | 0 | 7.2250 | NaN | C |
1303 | 3 | 0 | Yousseff, Mr. Gerious | male | NaN | 0 | 0 | 14.4583 | NaN | C |
1304 | 3 | 0 | Zabour, Miss. Hileni | female | 14.5000 | 1 | 0 | 14.4542 | NaN | C |
1305 | 3 | 0 | Zabour, Miss. Thamine | female | NaN | 1 | 0 | 14.4542 | NaN | C |
1306 | 3 | 0 | Zakarian, Mr. Mapriededer | male | 26.5000 | 0 | 0 | 7.2250 | NaN | C |
1307 | 3 | 0 | Zakarian, Mr. Ortin | male | 27.0000 | 0 | 0 | 7.2250 | NaN | C |
1308 | 3 | 0 | Zimmerman, Mr. Leo | male | 29.0000 | 0 | 0 | 7.8750 | NaN | S |
1309 rows × 10 columns
from collections import Counter
c = Counter()
def triples(val):
for i in range(len(val)):
c[val[i : i + 3]] += 1
df.name.apply(triples)
c.most_common(10)
[(', M', 1282), (' Mr', 954), ('r. ', 830), ('Mr.', 757), ('s. ', 460), ('n, ', 320), (' Mi', 283), ('iss', 261), ('ss.', 261), ('Mis', 260)]
df.name.str.extract(
"([A-Za-z]+)\.", expand=False
).head()
0 Miss 1 Master 2 Miss 3 Mr 4 Mrs Name: name, dtype: object
df.name.str.extract(
"([A-Za-z]+)\.", expand=False
).value_counts()
Mr 757 Miss 260 Mrs 197 Master 61 Dr 8 Rev 8 Col 4 Ms 2 Mlle 2 Major 2 Don 1 Jonkheer 1 Sir 1 Capt 1 Countess 1 Mme 1 Dona 1 Lady 1 Name: name, dtype: int64
import category_encoders as ce
he = ce.HashingEncoder(verbose=1)
he.fit_transform(X_cat)
col_0 | col_1 | col_2 | col_3 | col_4 | col_5 | col_6 | col_7 | |
---|---|---|---|---|---|---|---|---|
0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 |
1 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 |
size_df = pd.DataFrame(
{
"name": ["Fred", "John", "Matt"],
"size": ["small", "med", "xxl"],
}
)
ore = ce.OrdinalEncoder(
mapping=[
{
"col": "size",
"mapping": {
"small": 1,
"med": 2,
"lg": 3
}
}
]
)
ore.fit_transform(size_df)
name | size | |
---|---|---|
0 | Fred | 1.0 |
1 | John | 2.0 |
2 | Matt | -1.0 |
def get_title(df):
return df.name.str.extract(
"([A-Za-z]+)\.", expand=False
)
te = ce.TargetEncoder(cols="Title")
te.fit_transform(
df.assign(Title=get_title), df.survived)["Title"].head()
0 0.676923 1 0.508197 2 0.676923 3 0.162483 4 0.786802 Name: Title, dtype: float64
import numpy as np
dates = pd.DataFrame(
{
"A": pd.to_datetime(
["9/17/2001", "Jan 1, 2002"]
)
}
)
add_datepart(dates, "A")
dates.T
0 | 1 | |
---|---|---|
AYear | 2001 | 2002 |
AMonth | 9 | 1 |
AWeek | 38 | 1 |
ADay | 17 | 1 |
ADayofweek | 0 | 1 |
ADayofyear | 260 | 1 |
AIs_month_end | False | False |
AIs_month_start | False | True |
AIs_quarter_end | False | False |
AIs_quarter_start | False | True |
AIs_year_end | False | False |
AIs_year_start | False | True |
AElapsed | 1000684800 | 1009843200 |
from pandas.api.types import is_numeric_dtype
def fix_missing(df, col, name, na_dict):
if is_numeric_dtype(col):
if pd.isnull(col).sum() or (name in na_dict):
df[name + "_na"] = pd.isnull(col)
filler = (
na_dict[name]
if name in na_dict
else col.median()
)
df[name] = col.fillna(filler)
na_dict[name] = filler
return na_dict
data = pd.DataFrame({"A": [0, None, 5, 100]})
fix_missing(data, data.A, "A", {})
{'A': 5.0}
from pandas.api.types import is_numeric_dtype
def fix_missing(df, col, name, na_dict):
if is_numeric_dtype(col):
if pd.isnull(col).sum() or (name in na_dict):
df[name + "_na"] = pd.isnull(col)
filler = (
na_dict[name]
if name in na_dict
else col.median()
)
df[name] = col.fillna(filler)
na_dict[name] = filler
return na_dict
data = pd.DataFrame({"A": [0, None, 5, 100]})
fix_missing(data, data.A, "A", {})
{'A': 5.0}
data
A | A_na | |
---|---|---|
0 | 0.0 | False |
1 | 5.0 | True |
2 | 5.0 | False |
3 | 100.0 | False |
data = pd.DataFrame({"A": [0, None, 5, 100]})
data["A_na"] = data.A.isnull()
data["A"] = data.A.fillna(data.A.median())
agg = (
df.groupby("cabin")
.agg("min,max,mean,sum".split(","))
.reset_index()
)
agg.columns = [
"_".join(c).strip("_")
for c in agg.columns.values
]
agg_df = df.merge(agg, on="cabin")
agg_df
pclass | survived | name | sex | age | sibsp | parch | fare | cabin | embarked | pclass_min | pclass_max | pclass_mean | pclass_sum | survived_min | survived_max | survived_mean | survived_sum | age_min | age_max | age_mean | age_sum | sibsp_min | sibsp_max | sibsp_mean | sibsp_sum | parch_min | parch_max | parch_mean | parch_sum | fare_min | fare_max | fare_mean | fare_sum | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 1 | Allen, Miss. Elisabeth Walton | female | 29.0000 | 0 | 0 | 211.3375 | B5 | S | 1 | 1 | 1 | 2 | 1 | 1 | 1.000000 | 2 | 15.0000 | 29.0 | 22.000000 | 44.0000 | 0 | 0 | 0.000000 | 0 | 0 | 1 | 0.5 | 1 | 211.3375 | 211.3375 | 211.337500 | 422.6750 |
1 | 1 | 1 | Madill, Miss. Georgette Alexandra | female | 15.0000 | 0 | 1 | 211.3375 | B5 | S | 1 | 1 | 1 | 2 | 1 | 1 | 1.000000 | 2 | 15.0000 | 29.0 | 22.000000 | 44.0000 | 0 | 0 | 0.000000 | 0 | 0 | 1 | 0.5 | 1 | 211.3375 | 211.3375 | 211.337500 | 422.6750 |
2 | 1 | 1 | Allison, Master. Hudson Trevor | male | 0.9167 | 1 | 2 | 151.5500 | C22 C26 | S | 1 | 1 | 1 | 4 | 0 | 1 | 0.250000 | 1 | 0.9167 | 30.0 | 14.479175 | 57.9167 | 1 | 1 | 1.000000 | 4 | 2 | 2 | 2.0 | 8 | 151.5500 | 151.5500 | 151.550000 | 606.2000 |
3 | 1 | 0 | Allison, Miss. Helen Loraine | female | 2.0000 | 1 | 2 | 151.5500 | C22 C26 | S | 1 | 1 | 1 | 4 | 0 | 1 | 0.250000 | 1 | 0.9167 | 30.0 | 14.479175 | 57.9167 | 1 | 1 | 1.000000 | 4 | 2 | 2 | 2.0 | 8 | 151.5500 | 151.5500 | 151.550000 | 606.2000 |
4 | 1 | 0 | Allison, Mr. Hudson Joshua Creighton | male | 30.0000 | 1 | 2 | 151.5500 | C22 C26 | S | 1 | 1 | 1 | 4 | 0 | 1 | 0.250000 | 1 | 0.9167 | 30.0 | 14.479175 | 57.9167 | 1 | 1 | 1.000000 | 4 | 2 | 2 | 2.0 | 8 | 151.5500 | 151.5500 | 151.550000 | 606.2000 |
5 | 1 | 0 | Allison, Mrs. Hudson J C (Bessie Waldo Daniels) | female | 25.0000 | 1 | 2 | 151.5500 | C22 C26 | S | 1 | 1 | 1 | 4 | 0 | 1 | 0.250000 | 1 | 0.9167 | 30.0 | 14.479175 | 57.9167 | 1 | 1 | 1.000000 | 4 | 2 | 2 | 2.0 | 8 | 151.5500 | 151.5500 | 151.550000 | 606.2000 |
6 | 1 | 1 | Anderson, Mr. Harry | male | 48.0000 | 0 | 0 | 26.5500 | E12 | S | 1 | 1 | 1 | 1 | 1 | 1 | 1.000000 | 1 | 48.0000 | 48.0 | 48.000000 | 48.0000 | 0 | 0 | 0.000000 | 0 | 0 | 0 | 0.0 | 0 | 26.5500 | 26.5500 | 26.550000 | 26.5500 |
7 | 1 | 1 | Andrews, Miss. Kornelia Theodosia | female | 63.0000 | 1 | 0 | 77.9583 | D7 | S | 1 | 1 | 1 | 1 | 1 | 1 | 1.000000 | 1 | 63.0000 | 63.0 | 63.000000 | 63.0000 | 1 | 1 | 1.000000 | 1 | 0 | 0 | 0.0 | 0 | 77.9583 | 77.9583 | 77.958300 | 77.9583 |
8 | 1 | 0 | Andrews, Mr. Thomas Jr | male | 39.0000 | 0 | 0 | 0.0000 | A36 | S | 1 | 1 | 1 | 1 | 0 | 0 | 0.000000 | 0 | 39.0000 | 39.0 | 39.000000 | 39.0000 | 0 | 0 | 0.000000 | 0 | 0 | 0 | 0.0 | 0 | 0.0000 | 0.0000 | 0.000000 | 0.0000 |
9 | 1 | 1 | Appleton, Mrs. Edward Dale (Charlotte Lamson) | female | 53.0000 | 2 | 0 | 51.4792 | C101 | S | 1 | 1 | 1 | 3 | 1 | 1 | 1.000000 | 3 | 53.0000 | 59.0 | 55.666667 | 167.0000 | 2 | 2 | 2.000000 | 6 | 0 | 0 | 0.0 | 0 | 25.7000 | 51.4792 | 42.886133 | 128.6584 |
10 | 1 | 1 | Brown, Mrs. John Murray (Caroline Lane Lamson) | female | 59.0000 | 2 | 0 | 51.4792 | C101 | S | 1 | 1 | 1 | 3 | 1 | 1 | 1.000000 | 3 | 53.0000 | 59.0 | 55.666667 | 167.0000 | 2 | 2 | 2.000000 | 6 | 0 | 0 | 0.0 | 0 | 25.7000 | 51.4792 | 42.886133 | 128.6584 |
11 | 1 | 1 | Cornell, Mrs. Robert Clifford (Malvina Helen L... | female | 55.0000 | 2 | 0 | 25.7000 | C101 | S | 1 | 1 | 1 | 3 | 1 | 1 | 1.000000 | 3 | 53.0000 | 59.0 | 55.666667 | 167.0000 | 2 | 2 | 2.000000 | 6 | 0 | 0 | 0.0 | 0 | 25.7000 | 51.4792 | 42.886133 | 128.6584 |
12 | 1 | 0 | Astor, Col. John Jacob | male | 47.0000 | 1 | 0 | 227.5250 | C62 C64 | C | 1 | 1 | 1 | 2 | 0 | 1 | 0.500000 | 1 | 18.0000 | 47.0 | 32.500000 | 65.0000 | 1 | 1 | 1.000000 | 2 | 0 | 0 | 0.0 | 0 | 227.5250 | 227.5250 | 227.525000 | 455.0500 |
13 | 1 | 1 | Astor, Mrs. John Jacob (Madeleine Talmadge Force) | female | 18.0000 | 1 | 0 | 227.5250 | C62 C64 | C | 1 | 1 | 1 | 2 | 0 | 1 | 0.500000 | 1 | 18.0000 | 47.0 | 32.500000 | 65.0000 | 1 | 1 | 1.000000 | 2 | 0 | 0 | 0.0 | 0 | 227.5250 | 227.5250 | 227.525000 | 455.0500 |
14 | 1 | 1 | Aubart, Mme. Leontine Pauline | female | 24.0000 | 0 | 0 | 69.3000 | B35 | C | 1 | 1 | 1 | 2 | 1 | 1 | 1.000000 | 2 | 24.0000 | 24.0 | 24.000000 | 48.0000 | 0 | 0 | 0.000000 | 0 | 0 | 0 | 0.0 | 0 | 69.3000 | 69.3000 | 69.300000 | 138.6000 |
15 | 1 | 1 | Sagesser, Mlle. Emma | female | 24.0000 | 0 | 0 | 69.3000 | B35 | C | 1 | 1 | 1 | 2 | 1 | 1 | 1.000000 | 2 | 24.0000 | 24.0 | 24.000000 | 48.0000 | 0 | 0 | 0.000000 | 0 | 0 | 0 | 0.0 | 0 | 69.3000 | 69.3000 | 69.300000 | 138.6000 |
16 | 1 | 1 | Barkworth, Mr. Algernon Henry Wilson | male | 80.0000 | 0 | 0 | 30.0000 | A23 | S | 1 | 1 | 1 | 1 | 1 | 1 | 1.000000 | 1 | 80.0000 | 80.0 | 80.000000 | 80.0000 | 0 | 0 | 0.000000 | 0 | 0 | 0 | 0.0 | 0 | 30.0000 | 30.0000 | 30.000000 | 30.0000 |
17 | 1 | 0 | Baxter, Mr. Quigg Edmond | male | 24.0000 | 0 | 1 | 247.5208 | B58 B60 | C | 1 | 1 | 1 | 3 | 0 | 1 | 0.666667 | 2 | 24.0000 | 50.0 | 33.666667 | 101.0000 | 0 | 1 | 0.333333 | 1 | 1 | 1 | 1.0 | 3 | 247.5208 | 247.5208 | 247.520800 | 742.5624 |
18 | 1 | 1 | Baxter, Mrs. James (Helene DeLaudeniere Chaput) | female | 50.0000 | 0 | 1 | 247.5208 | B58 B60 | C | 1 | 1 | 1 | 3 | 0 | 1 | 0.666667 | 2 | 24.0000 | 50.0 | 33.666667 | 101.0000 | 0 | 1 | 0.333333 | 1 | 1 | 1 | 1.0 | 3 | 247.5208 | 247.5208 | 247.520800 | 742.5624 |
19 | 1 | 1 | Douglas, Mrs. Frederick Charles (Mary Helene B... | female | 27.0000 | 1 | 1 | 247.5208 | B58 B60 | C | 1 | 1 | 1 | 3 | 0 | 1 | 0.666667 | 2 | 24.0000 | 50.0 | 33.666667 | 101.0000 | 0 | 1 | 0.333333 | 1 | 1 | 1 | 1.0 | 3 | 247.5208 | 247.5208 | 247.520800 | 742.5624 |
20 | 1 | 1 | Bazzani, Miss. Albina | female | 32.0000 | 0 | 0 | 76.2917 | D15 | C | 1 | 1 | 1 | 2 | 1 | 1 | 1.000000 | 2 | 32.0000 | 60.0 | 46.000000 | 92.0000 | 0 | 0 | 0.000000 | 0 | 0 | 0 | 0.0 | 0 | 76.2917 | 76.2917 | 76.291700 | 152.5834 |
21 | 1 | 1 | Bucknell, Mrs. William Robert (Emma Eliza Ward) | female | 60.0000 | 0 | 0 | 76.2917 | D15 | C | 1 | 1 | 1 | 2 | 1 | 1 | 1.000000 | 2 | 32.0000 | 60.0 | 46.000000 | 92.0000 | 0 | 0 | 0.000000 | 0 | 0 | 0 | 0.0 | 0 | 76.2917 | 76.2917 | 76.291700 | 152.5834 |
22 | 1 | 0 | Beattie, Mr. Thomson | male | 36.0000 | 0 | 0 | 75.2417 | C6 | C | 1 | 1 | 1 | 2 | 0 | 0 | 0.000000 | 0 | 36.0000 | 46.0 | 41.000000 | 82.0000 | 0 | 0 | 0.000000 | 0 | 0 | 0 | 0.0 | 0 | 75.2417 | 75.2417 | 75.241700 | 150.4834 |
23 | 1 | 0 | McCaffry, Mr. Thomas Francis | male | 46.0000 | 0 | 0 | 75.2417 | C6 | C | 1 | 1 | 1 | 2 | 0 | 0 | 0.000000 | 0 | 36.0000 | 46.0 | 41.000000 | 82.0000 | 0 | 0 | 0.000000 | 0 | 0 | 0 | 0.0 | 0 | 75.2417 | 75.2417 | 75.241700 | 150.4834 |
24 | 1 | 1 | Beckwith, Mr. Richard Leonard | male | 37.0000 | 1 | 1 | 52.5542 | D35 | S | 1 | 1 | 1 | 2 | 1 | 1 | 1.000000 | 2 | 37.0000 | 47.0 | 42.000000 | 84.0000 | 1 | 1 | 1.000000 | 2 | 1 | 1 | 1.0 | 2 | 52.5542 | 52.5542 | 52.554200 | 105.1084 |
25 | 1 | 1 | Beckwith, Mrs. Richard Leonard (Sallie Monypeny) | female | 47.0000 | 1 | 1 | 52.5542 | D35 | S | 1 | 1 | 1 | 2 | 1 | 1 | 1.000000 | 2 | 37.0000 | 47.0 | 42.000000 | 84.0000 | 1 | 1 | 1.000000 | 2 | 1 | 1 | 1.0 | 2 | 52.5542 | 52.5542 | 52.554200 | 105.1084 |
26 | 1 | 1 | Behr, Mr. Karl Howell | male | 26.0000 | 0 | 0 | 30.0000 | C148 | C | 1 | 1 | 1 | 1 | 1 | 1 | 1.000000 | 1 | 26.0000 | 26.0 | 26.000000 | 26.0000 | 0 | 0 | 0.000000 | 0 | 0 | 0 | 0.0 | 0 | 30.0000 | 30.0000 | 30.000000 | 30.0000 |
27 | 1 | 1 | Bird, Miss. Ellen | female | 29.0000 | 0 | 0 | 221.7792 | C97 | S | 1 | 1 | 1 | 1 | 1 | 1 | 1.000000 | 1 | 29.0000 | 29.0 | 29.000000 | 29.0000 | 0 | 0 | 0.000000 | 0 | 0 | 0 | 0.0 | 0 | 221.7792 | 221.7792 | 221.779200 | 221.7792 |
28 | 1 | 1 | Bishop, Mr. Dickinson H | male | 25.0000 | 1 | 0 | 91.0792 | B49 | C | 1 | 1 | 1 | 2 | 1 | 1 | 1.000000 | 2 | 19.0000 | 25.0 | 22.000000 | 44.0000 | 1 | 1 | 1.000000 | 2 | 0 | 0 | 0.0 | 0 | 91.0792 | 91.0792 | 91.079200 | 182.1584 |
29 | 1 | 1 | Bishop, Mrs. Dickinson H (Helen Walton) | female | 19.0000 | 1 | 0 | 91.0792 | B49 | C | 1 | 1 | 1 | 2 | 1 | 1 | 1.000000 | 2 | 19.0000 | 25.0 | 22.000000 | 44.0000 | 1 | 1 | 1.000000 | 2 | 0 | 0 | 0.0 | 0 | 91.0792 | 91.0792 | 91.079200 | 182.1584 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
265 | 2 | 1 | Brown, Miss. Amelia "Mildred" | female | 24.0000 | 0 | 0 | 13.0000 | F33 | S | 2 | 2 | 2 | 8 | 1 | 1 | 1.000000 | 4 | 22.0000 | 34.0 | 27.250000 | 109.0000 | 0 | 0 | 0.000000 | 0 | 0 | 0 | 0.0 | 0 | 10.5000 | 13.0000 | 11.125000 | 44.5000 |
266 | 2 | 1 | Cook, Mrs. (Selena Rogers) | female | 22.0000 | 0 | 0 | 10.5000 | F33 | S | 2 | 2 | 2 | 8 | 1 | 1 | 1.000000 | 4 | 22.0000 | 34.0 | 27.250000 | 109.0000 | 0 | 0 | 0.000000 | 0 | 0 | 0 | 0.0 | 0 | 10.5000 | 13.0000 | 11.125000 | 44.5000 |
267 | 2 | 1 | Lemore, Mrs. (Amelia Milley) | female | 34.0000 | 0 | 0 | 10.5000 | F33 | S | 2 | 2 | 2 | 8 | 1 | 1 | 1.000000 | 4 | 22.0000 | 34.0 | 27.250000 | 109.0000 | 0 | 0 | 0.000000 | 0 | 0 | 0 | 0.0 | 0 | 10.5000 | 13.0000 | 11.125000 | 44.5000 |
268 | 2 | 1 | Nye, Mrs. (Elizabeth Ramell) | female | 29.0000 | 0 | 0 | 10.5000 | F33 | S | 2 | 2 | 2 | 8 | 1 | 1 | 1.000000 | 4 | 22.0000 | 34.0 | 27.250000 | 109.0000 | 0 | 0 | 0.000000 | 0 | 0 | 0 | 0.0 | 0 | 10.5000 | 13.0000 | 11.125000 | 44.5000 |
269 | 2 | 1 | Keane, Miss. Nora A | female | NaN | 0 | 0 | 12.3500 | E101 | Q | 2 | 2 | 2 | 6 | 1 | 1 | 1.000000 | 3 | 27.0000 | 32.5 | 29.750000 | 59.5000 | 0 | 0 | 0.000000 | 0 | 0 | 0 | 0.0 | 0 | 10.5000 | 13.0000 | 11.950000 | 35.8500 |
270 | 2 | 1 | Troutt, Miss. Edwina Celia "Winnie" | female | 27.0000 | 0 | 0 | 10.5000 | E101 | S | 2 | 2 | 2 | 6 | 1 | 1 | 1.000000 | 3 | 27.0000 | 32.5 | 29.750000 | 59.5000 | 0 | 0 | 0.000000 | 0 | 0 | 0 | 0.0 | 0 | 10.5000 | 13.0000 | 11.950000 | 35.8500 |
271 | 2 | 1 | Webber, Miss. Susan | female | 32.5000 | 0 | 0 | 13.0000 | E101 | S | 2 | 2 | 2 | 6 | 1 | 1 | 1.000000 | 3 | 27.0000 | 32.5 | 29.750000 | 59.5000 | 0 | 0 | 0.000000 | 0 | 0 | 0 | 0.0 | 0 | 10.5000 | 13.0000 | 11.950000 | 35.8500 |
272 | 2 | 0 | Mack, Mrs. (Mary) | female | 57.0000 | 0 | 0 | 10.5000 | E77 | S | 2 | 2 | 2 | 2 | 0 | 0 | 0.000000 | 0 | 57.0000 | 57.0 | 57.000000 | 57.0000 | 0 | 0 | 0.000000 | 0 | 0 | 0 | 0.0 | 0 | 10.5000 | 10.5000 | 10.500000 | 10.5000 |
273 | 2 | 1 | Navratil, Master. Edmond Roger | male | 2.0000 | 1 | 1 | 26.0000 | F2 | S | 2 | 2 | 2 | 8 | 0 | 1 | 0.500000 | 2 | 2.0000 | 36.5 | 16.875000 | 67.5000 | 0 | 1 | 0.500000 | 2 | 0 | 2 | 1.0 | 4 | 13.0000 | 26.0000 | 22.750000 | 91.0000 |
274 | 2 | 1 | Navratil, Master. Michel M | male | 3.0000 | 1 | 1 | 26.0000 | F2 | S | 2 | 2 | 2 | 8 | 0 | 1 | 0.500000 | 2 | 2.0000 | 36.5 | 16.875000 | 67.5000 | 0 | 1 | 0.500000 | 2 | 0 | 2 | 1.0 | 4 | 13.0000 | 26.0000 | 22.750000 | 91.0000 |
275 | 2 | 0 | Navratil, Mr. Michel ("Louis M Hoffman") | male | 36.5000 | 0 | 2 | 26.0000 | F2 | S | 2 | 2 | 2 | 8 | 0 | 1 | 0.500000 | 2 | 2.0000 | 36.5 | 16.875000 | 67.5000 | 0 | 1 | 0.500000 | 2 | 0 | 2 | 1.0 | 4 | 13.0000 | 26.0000 | 22.750000 | 91.0000 |
276 | 2 | 0 | Nesson, Mr. Israel | male | 26.0000 | 0 | 0 | 13.0000 | F2 | S | 2 | 2 | 2 | 8 | 0 | 1 | 0.500000 | 2 | 2.0000 | 36.5 | 16.875000 | 67.5000 | 0 | 1 | 0.500000 | 2 | 0 | 2 | 1.0 | 4 | 13.0000 | 26.0000 | 22.750000 | 91.0000 |
277 | 2 | 1 | Nourney, Mr. Alfred ("Baron von Drachstedt") | male | 20.0000 | 0 | 0 | 13.8625 | D38 | C | 2 | 2 | 2 | 2 | 1 | 1 | 1.000000 | 1 | 20.0000 | 20.0 | 20.000000 | 20.0000 | 0 | 0 | 0.000000 | 0 | 0 | 0 | 0.0 | 0 | 13.8625 | 13.8625 | 13.862500 | 13.8625 |
278 | 2 | 0 | Swane, Mr. George | male | 18.5000 | 0 | 0 | 13.0000 | F | S | 2 | 2 | 2 | 2 | 0 | 0 | 0.000000 | 0 | 18.5000 | 18.5 | 18.500000 | 18.5000 | 0 | 0 | 0.000000 | 0 | 0 | 0 | 0.0 | 0 | 13.0000 | 13.0000 | 13.000000 | 13.0000 |
279 | 3 | 1 | Abelseth, Mr. Olaus Jorgensen | male | 25.0000 | 0 | 0 | 7.6500 | F G63 | S | 3 | 3 | 3 | 6 | 0 | 1 | 0.500000 | 1 | 25.0000 | 42.0 | 33.500000 | 67.0000 | 0 | 0 | 0.000000 | 0 | 0 | 0 | 0.0 | 0 | 7.6500 | 7.6500 | 7.650000 | 15.3000 |
280 | 3 | 0 | Humblen, Mr. Adolf Mathias Nicolai Olsen | male | 42.0000 | 0 | 0 | 7.6500 | F G63 | S | 3 | 3 | 3 | 6 | 0 | 1 | 0.500000 | 1 | 25.0000 | 42.0 | 33.500000 | 67.0000 | 0 | 0 | 0.000000 | 0 | 0 | 0 | 0.0 | 0 | 7.6500 | 7.6500 | 7.650000 | 15.3000 |
281 | 3 | 1 | Krekorian, Mr. Neshan | male | 25.0000 | 0 | 0 | 7.2292 | F E57 | C | 3 | 3 | 3 | 3 | 1 | 1 | 1.000000 | 1 | 25.0000 | 25.0 | 25.000000 | 25.0000 | 0 | 0 | 0.000000 | 0 | 0 | 0 | 0.0 | 0 | 7.2292 | 7.2292 | 7.229200 | 7.2292 |
282 | 3 | 0 | Mardirosian, Mr. Sarkis | male | NaN | 0 | 0 | 7.2292 | F E46 | C | 3 | 3 | 3 | 3 | 0 | 0 | 0.000000 | 0 | NaN | NaN | NaN | 0.0000 | 0 | 0 | 0.000000 | 0 | 0 | 0 | 0.0 | 0 | 7.2292 | 7.2292 | 7.229200 | 7.2292 |
283 | 3 | 0 | Moen, Mr. Sigurd Hansen | male | 25.0000 | 0 | 0 | 7.6500 | F G73 | S | 3 | 3 | 3 | 6 | 0 | 0 | 0.000000 | 0 | 19.0000 | 25.0 | 22.000000 | 44.0000 | 0 | 0 | 0.000000 | 0 | 0 | 0 | 0.0 | 0 | 7.6500 | 7.6500 | 7.650000 | 15.3000 |
284 | 3 | 0 | Soholt, Mr. Peter Andreas Lauritz Andersen | male | 19.0000 | 0 | 0 | 7.6500 | F G73 | S | 3 | 3 | 3 | 6 | 0 | 0 | 0.000000 | 0 | 19.0000 | 25.0 | 22.000000 | 44.0000 | 0 | 0 | 0.000000 | 0 | 0 | 0 | 0.0 | 0 | 7.6500 | 7.6500 | 7.650000 | 15.3000 |
285 | 3 | 1 | Moor, Master. Meier | male | 6.0000 | 0 | 1 | 12.4750 | E121 | S | 3 | 3 | 3 | 6 | 1 | 1 | 1.000000 | 2 | 6.0000 | 27.0 | 16.500000 | 33.0000 | 0 | 0 | 0.000000 | 0 | 1 | 1 | 1.0 | 2 | 12.4750 | 12.4750 | 12.475000 | 24.9500 |
286 | 3 | 1 | Moor, Mrs. (Beila) | female | 27.0000 | 0 | 1 | 12.4750 | E121 | S | 3 | 3 | 3 | 6 | 1 | 1 | 1.000000 | 2 | 6.0000 | 27.0 | 16.500000 | 33.0000 | 0 | 0 | 0.000000 | 0 | 1 | 1 | 1.0 | 2 | 12.4750 | 12.4750 | 12.475000 | 24.9500 |
287 | 3 | 1 | Peter, Miss. Anna | female | NaN | 1 | 1 | 22.3583 | F E69 | C | 3 | 3 | 3 | 3 | 1 | 1 | 1.000000 | 1 | NaN | NaN | NaN | 0.0000 | 1 | 1 | 1.000000 | 1 | 1 | 1 | 1.0 | 1 | 22.3583 | 22.3583 | 22.358300 | 22.3583 |
288 | 3 | 1 | Pickard, Mr. Berk (Berk Trembisky) | male | 32.0000 | 0 | 0 | 8.0500 | E10 | S | 3 | 3 | 3 | 3 | 1 | 1 | 1.000000 | 1 | 32.0000 | 32.0 | 32.000000 | 32.0000 | 0 | 0 | 0.000000 | 0 | 0 | 0 | 0.0 | 0 | 8.0500 | 8.0500 | 8.050000 | 8.0500 |
289 | 3 | 1 | Sandstrom, Miss. Beatrice Irene | female | 1.0000 | 1 | 1 | 16.7000 | G6 | S | 3 | 3 | 3 | 15 | 0 | 1 | 0.600000 | 3 | 1.0000 | 29.0 | 12.000000 | 60.0000 | 0 | 1 | 0.600000 | 3 | 1 | 2 | 1.2 | 6 | 10.4625 | 16.7000 | 14.205000 | 71.0250 |
290 | 3 | 1 | Sandstrom, Mrs. Hjalmar (Agnes Charlotta Bengt... | female | 24.0000 | 0 | 2 | 16.7000 | G6 | S | 3 | 3 | 3 | 15 | 0 | 1 | 0.600000 | 3 | 1.0000 | 29.0 | 12.000000 | 60.0000 | 0 | 1 | 0.600000 | 3 | 1 | 2 | 1.2 | 6 | 10.4625 | 16.7000 | 14.205000 | 71.0250 |
291 | 3 | 1 | Sandstrom, Miss. Marguerite Rut | female | 4.0000 | 1 | 1 | 16.7000 | G6 | S | 3 | 3 | 3 | 15 | 0 | 1 | 0.600000 | 3 | 1.0000 | 29.0 | 12.000000 | 60.0000 | 0 | 1 | 0.600000 | 3 | 1 | 2 | 1.2 | 6 | 10.4625 | 16.7000 | 14.205000 | 71.0250 |
292 | 3 | 0 | Strom, Miss. Telma Matilda | female | 2.0000 | 0 | 1 | 10.4625 | G6 | S | 3 | 3 | 3 | 15 | 0 | 1 | 0.600000 | 3 | 1.0000 | 29.0 | 12.000000 | 60.0000 | 0 | 1 | 0.600000 | 3 | 1 | 2 | 1.2 | 6 | 10.4625 | 16.7000 | 14.205000 | 71.0250 |
293 | 3 | 0 | Strom, Mrs. Wilhelm (Elna Matilda Persson) | female | 29.0000 | 1 | 1 | 10.4625 | G6 | S | 3 | 3 | 3 | 15 | 0 | 1 | 0.600000 | 3 | 1.0000 | 29.0 | 12.000000 | 60.0000 | 0 | 1 | 0.600000 | 3 | 1 | 2 | 1.2 | 6 | 10.4625 | 16.7000 | 14.205000 | 71.0250 |
294 | 3 | 0 | Tobin, Mr. Roger | male | NaN | 0 | 0 | 7.7500 | F38 | Q | 3 | 3 | 3 | 3 | 0 | 0 | 0.000000 | 0 | NaN | NaN | NaN | 0.0000 | 0 | 0 | 0.000000 | 0 | 0 | 0 | 0.0 | 0 | 7.7500 | 7.7500 | 7.750000 | 7.7500 |
295 rows × 34 columns