import pandas as pdtitanic = pd.read_csv("data/titanic.csv")titanic['age'] = titanic['age'].replace(['?'], [None]).astype('float')
titanic['fare'] = titanic['fare'].replace(['?'], [None]).astype('float')Upper( ) and Lower( )¶
titanic["name"].str.upper()0 ALLEN, MISS. ELISABETH WALTON
1 ALLISON, MASTER. HUDSON TREVOR
2 ALLISON, MISS. HELEN LORAINE
3 ALLISON, MR. HUDSON JOSHUA CREIGHTON
4 ALLISON, MRS. HUDSON J C (BESSIE WALDO DANIELS)
...
1304 ZABOUR, MISS. HILENI
1305 ZABOUR, MISS. THAMINE
1306 ZAKARIAN, MR. MAPRIEDEDER
1307 ZAKARIAN, MR. ORTIN
1308 ZIMMERMAN, MR. LEO
Name: name, Length: 1309, dtype: objecttitanic["name"]0 Allen, Miss. Elisabeth Walton
1 Allison, Master. Hudson Trevor
2 Allison, Miss. Helen Loraine
3 Allison, Mr. Hudson Joshua Creighton
4 Allison, Mrs. Hudson J C (Bessie Waldo Daniels)
...
1304 Zabour, Miss. Hileni
1305 Zabour, Miss. Thamine
1306 Zakarian, Mr. Mapriededer
1307 Zakarian, Mr. Ortin
1308 Zimmerman, Mr. Leo
Name: name, Length: 1309, dtype: objecttitanic["lower_name"] = titanic["name"].str.lower()titanicLoading...
titanic.lower_name.str.capitalize()0 Allen, miss. elisabeth walton
1 Allison, master. hudson trevor
2 Allison, miss. helen loraine
3 Allison, mr. hudson joshua creighton
4 Allison, mrs. hudson j c (bessie waldo daniels)
...
1304 Zabour, miss. hileni
1305 Zabour, miss. thamine
1306 Zakarian, mr. mapriededer
1307 Zakarian, mr. ortin
1308 Zimmerman, mr. leo
Name: lower_name, Length: 1309, dtype: objectString Indexing¶
titanic["cabin"].str[0]0 B
1 C
2 C
3 C
4 C
..
1304 ?
1305 ?
1306 ?
1307 ?
1308 ?
Name: cabin, Length: 1309, dtype: objecttitanic["deck"] = titanic["cabin"].str[0]titanicLoading...
titanic.groupby("deck").mean()Loading...
titanic.groupby("deck")["survived"].mean().sort_values().plot(kind="bar")
Strip( )¶
s = pd.Series(['1. Hawk. ', '2. Pickle!\n', '3. Melonhead?\t'])s0 1. Hawk.
1 2. Pickle!\n
2 3. Melonhead?\t
dtype: objects.str.strip()0 1. Hawk.
1 2. Pickle!
2 3. Melonhead?
dtype: objects.str.strip(to_strip="123. \n \t")0 Hawk
1 Pickle!
2 Melonhead?
dtype: objects.str.rstrip()0 1. Hawk.
1 2. Pickle!
2 3. Melonhead?
dtype: objects.str.lstrip()0 1. Hawk.
1 2. Pickle!\n
2 3. Melonhead?\t
dtype: objects.str.strip(to_strip="123.")0 Hawk.
1 Pickle!\n
2 Melonhead?\t
dtype: objectSplit( )¶
titanic["home.dest"].str.split("/", expand=True)Loading...
titanic["home"] = titanic["home.dest"].str.split("/", expand=True)[0]titanicLoading...
titanic["destination"] = titanic["home.dest"].str.split("/", expand=True)[1]titanicLoading...
titanic["home"].value_counts()? 564
New York, NY 64
Cornwall 19
London 14
London 13
...
Middleburg Heights, OH 1
Barre, Co Washington, VT 1
Oslo, Norway Bayonne, NJ 1
England Oglesby, IL 1
?Havana, Cuba 1
Name: home, Length: 339, dtype: int64titanic["destination"].value_counts() Akron, OH 11
Montreal, PQ 9
New York, NY 9
Detroit, MI 6
Cooperstown, NY 5
..
Pennsylvania 1
Greenwich CT 1
Arlington, NJ 1
Toledo, OH 1
Birmingham 1
Name: destination, Length: 77, dtype: int64titanic["home.dest"].str.split("/", n=1, expand=True)Loading...
Replace( )¶
ufos = pd.read_csv("data/nuforc_reports.csv")ufosLoading...
ufos["duration"]0 5 seconds
1 3-5 seconds
2 NaN
3 10 seconds
4 2 minutes
...
88120 3 minutes
88121 20 seconds
88122 20 seconds
88123 2 minutes
88124 3 minutes
Name: duration, Length: 88125, dtype: objectufos["duration"].str.replace("seconds", "s")0 5 s
1 3-5 s
2 NaN
3 10 s
4 2 minutes
...
88120 3 minutes
88121 20 s
88122 20 s
88123 2 minutes
88124 3 minutes
Name: duration, Length: 88125, dtype: objectufos["duration"].str.replace("seconds|minutes", "", regex=True)0 5
1 3-5
2 NaN
3 10
4 2
...
88120 3
88121 20
88122 20
88123 2
88124 3
Name: duration, Length: 88125, dtype: objectdef abbrv(reo):
return reo.group()[0]ufos["duration"].str.replace("seconds|minutes|hours", abbrv)<ipython-input-269-dce69a0bac36>:1: FutureWarning: The default value of regex will change from True to False in a future version.
ufos["duration"].str.replace("seconds|minutes|hours", abbrv)
0 5 s
1 3-5 s
2 NaN
3 10 s
4 2 m
...
88120 3 m
88121 20 s
88122 20 s
88123 2 m
88124 3 m
Name: duration, Length: 88125, dtype: objectContains( )¶
ufos["duration"].str.contains("hour")0 False
1 False
2 NaN
3 False
4 False
...
88120 False
88121 False
88122 False
88123 False
88124 False
Name: duration, Length: 88125, dtype: objectufos[ufos["duration"].str.contains("day|week|month", na=False)]Loading...