Preparing the data for analysis
A Summary of lecture "Analyzing Police Activity with pandas", via datacamp
import pandas as pd
# Read 'police.csv' into a DataFrame named ri
ri = pd.read_csv('./dataset/police.csv')
# Examine the head of the DataFrame
print(ri.head(5))
# Count the number of missing values in each column
print(ri.isnull().sum())
print(ri.shape)
# Drop the 'country_name' and 'state' columns
ri.drop(['county_name', 'state'], axis='columns', inplace=True)
# Examine the shape of the DataFrame (again)
print(ri.shape)
print(ri.isnull().sum())
# Drop all rows that are missing 'driver_gender'
ri.dropna(subset=['driver_gender'], inplace=True)
# Count the number of missing values in each column (again)
print(ri.isnull().sum())
# Examine the shape of the DataFrame
print(ri.shape)
ri.dtypes
print(ri.is_arrested.head())
# Change the data type of 'is_arrested' to 'bool'
ri['is_arrested'] = ri.is_arrested.astype('bool')
# Check the data type of 'is_arrested'
print(ri['is_arrested'].dtype)
combined = ri.stop_date.str.cat(ri.stop_time, sep=' ')
# Convert 'combined' to datetime format
ri['stop_datetime'] = pd.to_datetime(combined)
# Examine the data types of the DataFrame
print(ri.dtypes)
ri.set_index('stop_datetime', inplace=True)
# Examine the index
print(ri.index)
# Examine the columns
print(ri.columns)