Skip to article frontmatterSkip to article content
Site not loading correctly?

This may be due to an incorrect BASE_URL configuration. See the MyST Documentation for reference.

Apply( ), Map( ), & ApplyMap( )

import pandas as pd
titanic = pd.read_csv("data/titanic.csv")
titanic["age"] = titanic["age"].replace(["?"], [None]).astype("float")
titanic["fare"] = titanic["fare"].replace(["?"], [None]).astype("float")

The Series apply( ) method

def years_to_days(yrs):
    return yrs*365

titanic["age"].apply(years_to_days)
0 10585.0000 1 334.5955 2 730.0000 3 10950.0000 4 9125.0000 ... 1304 5292.5000 1305 NaN 1306 9672.5000 1307 9855.0000 1308 10585.0000 Name: age, Length: 1309, dtype: float64
titanic["age"] * 365
0 10585.0000 1 334.5955 2 730.0000 3 10950.0000 4 9125.0000 ... 1304 5292.5000 1305 NaN 1306 9672.5000 1307 9855.0000 1308 10585.0000 Name: age, Length: 1309, dtype: float64
def get_age_group(age):
    if age < 2:
        return "infant"
    elif age < 12:
        return "child"
    elif age < 18:
        return "teen"
    elif age < 50:
        return "adult"
    else:
        return "senior"
titanic["age_group"] = titanic["age"].apply(get_age_group)
titanic.age_group.value_counts()
adult 782 senior 373 child 69 teen 63 infant 22 Name: age_group, dtype: int64
titanic.groupby("age_group").survived.mean()
age_group adult 0.386189 child 0.492754 infant 0.772727 senior 0.313673 teen 0.476190 Name: survived, dtype: float64
titanic.groupby(["age_group", "sex"]).survived.mean()
age_group sex adult female 0.748201 male 0.186508 child female 0.515152 male 0.472222 infant female 0.888889 male 0.692308 senior female 0.698276 male 0.140078 teen female 0.833333 male 0.151515 Name: survived, dtype: float64
titanic["fare"] * 24
0 5072.1000 1 3637.2000 2 3637.2000 3 3637.2000 4 3637.2000 ... 1304 346.9008 1305 346.9008 1306 173.4000 1307 173.4000 1308 189.0000 Name: fare, Length: 1309, dtype: float64
titanic["fare"].apply(lambda x: f"${x*24}")
0 $5072.1 1 $3637.2000000000003 2 $3637.2000000000003 3 $3637.2000000000003 4 $3637.2000000000003 ... 1304 $346.9008 1305 $346.9008 1306 $173.39999999999998 1307 $173.39999999999998 1308 $189.0 Name: fare, Length: 1309, dtype: object
def convert_currency(num, multiplier):
    return f"${num*multiplier}"
titanic["fare"].apply(convert_currency, args=(24,))
0 $5072.1 1 $3637.2000000000003 2 $3637.2000000000003 3 $3637.2000000000003 4 $3637.2000000000003 ... 1304 $346.9008 1305 $346.9008 1306 $173.39999999999998 1307 $173.39999999999998 1308 $189.0 Name: fare, Length: 1309, dtype: object

The DataFrame apply( ) method

df = titanic[["pclass", "survived", "age", "fare"]]
df
Loading...
def get_range(s):
    return s.max() - s.min()
df.apply(get_range)
pclass 2.0000 survived 1.0000 age 79.8333 fare 512.3292 dtype: float64
df.apply(get_range, axis=0)
pclass 2.0000 survived 1.0000 age 79.8333 fare 512.3292 dtype: float64
df.apply(get_range, axis=1)
0 210.3375 1 150.6333 2 151.5500 3 151.5500 4 151.5500 ... 1304 14.5000 1305 14.4542 1306 26.5000 1307 27.0000 1308 29.0000 Length: 1309, dtype: float64
def get_fam_size(s):
    fam_size = s.sibsp + s.parch
    if fam_size == 0:
        return "solo"
    elif fam_size < 5:
        return "average"
    else:
        return "large"
titanic.apply(get_fam_size, axis=1)
0 solo 1 average 2 average 3 average 4 average ... 1304 average 1305 average 1306 solo 1307 solo 1308 solo Length: 1309, dtype: object
titanic["fam_size"] = titanic.apply(get_fam_size, axis=1)
titanic["fam_size"].value_counts()
solo 790 average 459 large 60 Name: fam_size, dtype: int64
titanic.groupby("fam_size").survived.mean()
fam_size average 0.549020 large 0.150000 solo 0.302532 Name: survived, dtype: float64
titanic.groupby(["fam_size", "sex"]).survived.mean()
fam_size sex average female 0.771429 male 0.294393 large female 0.296296 male 0.030303 solo female 0.731959 male 0.162752 Name: survived, dtype: float64

The map( ) method

Only works with Series.

titanic["pclass"]
0 1 1 1 2 1 3 1 4 1 .. 1304 3 1305 3 1306 3 1307 3 1308 3 Name: pclass, Length: 1309, dtype: int64
titanic["pclass"].map({1:"1st", 2:"2nd", 3:"3rd"})
0 1st 1 1st 2 1st 3 1st 4 1st ... 1304 3rd 1305 3rd 1306 3rd 1307 3rd 1308 3rd Name: pclass, Length: 1309, dtype: object
titanic["age"].map(lambda a: a < 18)
0 False 1 True 2 True 3 False 4 False ... 1304 True 1305 False 1306 False 1307 False 1308 False Name: age, Length: 1309, dtype: bool

The applymap( ) method

Only works with dataframes

titanic[["name", "sex", "age_group"]].applymap(str.upper)
Loading...
df.applymap(lambda el: el * 7)
Loading...
df * 7
Loading...
titanic[["name", "sex", "age_group"]].applymap(len)
Loading...