Apply( ), Map( ), & ApplyMap( )

import pandas as pd

titanic = pd.read_csv("data/titanic.csv")
titanic["age"] = titanic["age"].replace(["?"], [None]).astype("float")
titanic["fare"] = titanic["fare"].replace(["?"], [None]).astype("float")

The Series apply( ) method¶

def years_to_days(yrs):
    return yrs*365

titanic["age"].apply(years_to_days)

0       10585.0000
1         334.5955
2         730.0000
3       10950.0000
4        9125.0000
           ...    
1304     5292.5000
1305           NaN
1306     9672.5000
1307     9855.0000
1308    10585.0000
Name: age, Length: 1309, dtype: float64

titanic["age"] * 365

0       10585.0000
1         334.5955
2         730.0000
3       10950.0000
4        9125.0000
           ...    
1304     5292.5000
1305           NaN
1306     9672.5000
1307     9855.0000
1308    10585.0000
Name: age, Length: 1309, dtype: float64

def get_age_group(age):
    if age < 2:
        return "infant"
    elif age < 12:
        return "child"
    elif age < 18:
        return "teen"
    elif age < 50:
        return "adult"
    else:
        return "senior"

titanic["age_group"] = titanic["age"].apply(get_age_group)

titanic.age_group.value_counts()

adult     782
senior    373
child      69
teen       63
infant     22
Name: age_group, dtype: int64

titanic.groupby("age_group").survived.mean()

age_group
adult     0.386189
child     0.492754
infant    0.772727
senior    0.313673
teen      0.476190
Name: survived, dtype: float64

titanic.groupby(["age_group", "sex"]).survived.mean()

age_group  sex   
adult      female    0.748201
           male      0.186508
child      female    0.515152
           male      0.472222
infant     female    0.888889
           male      0.692308
senior     female    0.698276
           male      0.140078
teen       female    0.833333
           male      0.151515
Name: survived, dtype: float64

titanic["fare"] * 24

0       5072.1000
1       3637.2000
2       3637.2000
3       3637.2000
4       3637.2000
          ...    
1304     346.9008
1305     346.9008
1306     173.4000
1307     173.4000
1308     189.0000
Name: fare, Length: 1309, dtype: float64

titanic["fare"].apply(lambda x: f"${x*24}")

0                   $5072.1
1       $3637.2000000000003
2       $3637.2000000000003
3       $3637.2000000000003
4       $3637.2000000000003
               ...         
1304              $346.9008
1305              $346.9008
1306    $173.39999999999998
1307    $173.39999999999998
1308                 $189.0
Name: fare, Length: 1309, dtype: object

def convert_currency(num, multiplier):
    return f"${num*multiplier}"

titanic["fare"].apply(convert_currency, args=(24,))

0                   $5072.1
1       $3637.2000000000003
2       $3637.2000000000003
3       $3637.2000000000003
4       $3637.2000000000003
               ...         
1304              $346.9008
1305              $346.9008
1306    $173.39999999999998
1307    $173.39999999999998
1308                 $189.0
Name: fare, Length: 1309, dtype: object

The DataFrame apply( ) method¶

df = titanic[["pclass", "survived", "age", "fare"]]

df

def get_range(s):
    return s.max() - s.min()

df.apply(get_range)

pclass        2.0000
survived      1.0000
age          79.8333
fare        512.3292
dtype: float64

df.apply(get_range, axis=0)

pclass        2.0000
survived      1.0000
age          79.8333
fare        512.3292
dtype: float64

df.apply(get_range, axis=1)

0       210.3375
1       150.6333
2       151.5500
3       151.5500
4       151.5500
          ...   
1304     14.5000
1305     14.4542
1306     26.5000
1307     27.0000
1308     29.0000
Length: 1309, dtype: float64

def get_fam_size(s):
    fam_size = s.sibsp + s.parch
    if fam_size == 0:
        return "solo"
    elif fam_size < 5:
        return "average"
    else:
        return "large"

titanic.apply(get_fam_size, axis=1)

0          solo
1       average
2       average
3       average
4       average
         ...   
1304    average
1305    average
1306       solo
1307       solo
1308       solo
Length: 1309, dtype: object

titanic["fam_size"] = titanic.apply(get_fam_size, axis=1)

titanic["fam_size"].value_counts()

solo       790
average    459
large       60
Name: fam_size, dtype: int64

titanic.groupby("fam_size").survived.mean()

fam_size
average    0.549020
large      0.150000
solo       0.302532
Name: survived, dtype: float64

titanic.groupby(["fam_size", "sex"]).survived.mean()

fam_size  sex   
average   female    0.771429
          male      0.294393
large     female    0.296296
          male      0.030303
solo      female    0.731959
          male      0.162752
Name: survived, dtype: float64

The map( ) method¶

Only works with Series.

titanic["pclass"]

0       1
1       1
2       1
3       1
4       1
       ..
1304    3
1305    3
1306    3
1307    3
1308    3
Name: pclass, Length: 1309, dtype: int64

titanic["pclass"].map({1:"1st", 2:"2nd", 3:"3rd"})

0       1st
1       1st
2       1st
3       1st
4       1st
       ... 
1304    3rd
1305    3rd
1306    3rd
1307    3rd
1308    3rd
Name: pclass, Length: 1309, dtype: object

titanic["age"].map(lambda a: a < 18)

0       False
1        True
2        True
3       False
4       False
        ...  
1304     True
1305    False
1306    False
1307    False
1308    False
Name: age, Length: 1309, dtype: bool

The applymap( ) method¶

Only works with dataframes

titanic[["name", "sex", "age_group"]].applymap(str.upper)

df.applymap(lambda el: el * 7)

df * 7

titanic[["name", "sex", "age_group"]].applymap(len)