Skip to article frontmatterSkip to article content
Site not loading correctly?

This may be due to an incorrect BASE_URL configuration. See the MyST Documentation for reference.

Merging DataFrames & Series

pd.concat( ) with Series

import pandas as pd
s1 = pd.Series(['a', 'b', 'c'])
s2 = pd.Series(['d', 'e', 'f', 'z'])
pd.concat([s1,s2])
0 a 1 b 2 c 0 d 1 e 2 f 3 z dtype: object
pd.concat([s2, s1])
0 d 1 e 2 f 3 z 0 a 1 b 2 c dtype: object
pd.concat([s2, s1], ignore_index=True)
0 d 1 e 2 f 3 z 4 a 5 b 6 c dtype: object
nums = pd.Series([65,69,99,23,56,54])
nums2 = pd.Series([11,111,1111])
pd.concat([s2, s1, nums], ignore_index=True)
0 d 1 e 2 f 3 z 4 a 5 b 6 c 7 65 8 69 9 99 10 23 11 56 12 54 dtype: object
pd.concat([nums, nums2, s1], ignore_index=True)
0 65 1 69 2 99 3 23 4 56 5 54 6 11 7 111 8 1111 9 a 10 b 11 c dtype: object

pd.concat( ) with Series by Index

c1 = pd.Series(["red", "orange", "yellow"])
c2 = pd.Series(["green", "blue", "purple"])
pd.concat([c1,c2])
0 red 1 orange 2 yellow 0 green 1 blue 2 purple dtype: object
pd.concat([c1,c2], axis=1)
Loading...
fruits = pd.Series(
    data=["apple", "banana", "cherry"], 
    index=["a","b", "c"]
)

animals = pd.Series(
    data=["badger", "cougar", "anaconda"], 
    index=["b", "c", "a"]
)
fruits
a apple b banana c cherry dtype: object
animals
b badger c cougar a anaconda dtype: object
pd.concat([fruits, animals], axis=1)
Loading...
pd.concat([fruits, animals], axis=1, keys=["fruit", "animal"])
Loading...
pd.concat([fruits, animals], axis=0, keys=["fruit", "animal"])
fruit a apple b banana c cherry animal b badger c cougar a anaconda dtype: object

pd.concat( ) with DataFrames

fruits = pd.Series(
    data=["apple", "banana", "cherry", "durian"], 
    index=["a","b", "c", "d"]
)

animals = pd.Series(
    data=["badger", "cougar", "anaconda", "elk", "pika"], 
    index=["b", "c", "a", "e", "p"]
)
pd.concat([animals, fruits], ignore_index=True)
0 badger 1 cougar 2 anaconda 3 elk 4 pika 5 apple 6 banana 7 cherry 8 durian dtype: object
pd.concat([animals, fruits], axis=1)
Loading...
pd.concat([animals, fruits], axis=1, join="inner")
Loading...
harvest_21 = pd.DataFrame(
    [['potatoes', 900], ['garlic', 1350], ['onions', 875]], 
    columns=['crop', 'qty']
)

harvest_22 = pd.DataFrame(
    [['garlic', 1600], ['spinach', 560], ['turnips', 999], ['onions', 1000]], 
    columns=['crop', 'qty']
)
harvest_21
Loading...
harvest_22
Loading...
pd.concat([harvest_21, harvest_22], ignore_index=True)
Loading...
pd.concat([harvest_21, harvest_22], keys=[2021, 2022])
Loading...
harvest_23 = pd.DataFrame(
    [['potatoes', 900, 500], ['garlic', 1350, 1200], ['onions', 875, 950]], 
    columns=['crop', 'qty', 'profit']
)
pd.concat([harvest_21, harvest_22, harvest_23])
Loading...
pd.concat([harvest_21, harvest_22, harvest_23], join="inner")
Loading...
livestock = pd.DataFrame(
    [['pasture', 9], ['stable', 3], ['coop', 34]], 
    columns=['location', 'qty'],
    index=['alpaca', 'horse', 'chicken']
)
weights = pd.DataFrame(
    [[4,10], [900, 2000], [1.2, 4], [110, 150]], 
    columns=['min_weight', 'max_weight'],
    index=['chicken', 'horse', 'duck', 'alpaca']
)
livestock
Loading...
weights
Loading...
pd.concat([livestock, weights])
Loading...
pd.concat([livestock, weights], axis=1)
Loading...
pd.concat([livestock, weights], axis=1, join="inner")
Loading...

Combining DataFrames With Merge

teams = pd.DataFrame(
    [
        ["Suns", "Phoenix", 20, 4], 
        ["Mavericks", "Dallas", 11,12], 
        ["Rockets", "Houston", 7, 16],
        ['Nuggets', "Denver", 11, 12]
    ], 
    columns=["team", "city", "wins", "losses"]
)
cities = pd.DataFrame(
    [
        ["Houston", "Texas", 2310000], 
        ["Phoenix", "Arizona", 1630000], 
        ["San Diego", "California", 1410000],
        ["Dallas", "Texas", 1310000]
    ],
    columns=["city", "state", "population"]
)
teams
Loading...
cities
Loading...
teams.merge(cities)
Loading...
teams.merge(cities, on="city")
Loading...
teams.merge(cities, on="city", how="inner")
Loading...
teams.merge(cities, how="left")
Loading...
teams.merge(cities, how="right")
Loading...
teams.merge(cities, how="outer")
Loading...
cities.merge(teams, on="city", how="left")
Loading...
teams.merge(cities, on="city", how="right")
Loading...
midterms = pd.DataFrame(
    [['alex', 'padilla', 92], ['rayna', 'wilson', 83], ['juan', 'gomez', 78], ['angela', 'smith', 66],['stephen', 'yu', 98]], 
    columns=['first', 'last', 'score']
)
finals = pd.DataFrame(
    [['alex','padilla', 97, False], ['rayna', 'wilson', 88, False], ['alex', 'smith', 86, True], ['juan', 'gomez', 71, True], ['stephen', 'yu', 78, False], ['sakura', 'steel', 100, True]], 
    columns=['first', 'last','score', 'extra_credit']
)
midterms
Loading...
finals
Loading...
midterms.merge(finals, on="score")
Loading...
midterms.merge(finals, on="first")
Loading...
midterms.merge(finals, on=["first", "last"], how="inner")
Loading...
midterms.merge(finals, on=["first", "last"], how="inner", suffixes=("_midterms", "_finals"))
Loading...
combo = midterms.merge(finals, on=["first", "last"], how="inner", suffixes=("_m", "_f"))
combo["avg"] = (combo["score_m"] + combo["score_f"])/2
combo
Loading...
combo.loc[combo["extra_credit"] == True, 'avg'] += 5
combo
Loading...