import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from plotly.subplots import make_subplots
import plotly.graph_objects as go
%matplotlib inline
income = pd.read_csv('MedianHouseholdIncome2015.csv', encoding='cp1252')
poverty = pd.read_csv('PercentagePeopleBelowPovertyLevel.csv', encoding='cp1252')
highschool = pd.read_csv('PercentOver25CompletedHighSchool.csv', encoding='cp1252')
race = pd.read_csv('ShareRaceByCity.csv', encoding='cp1252')
killings = pd.read_csv('PoliceKillingsUS.csv', encoding='cp1252')
income.head()
poverty.head()
highschool.head()
race.head()
killings.head()
# Shapes of each data frames
income.shape, poverty.shape, highschool.shape, race.shape
geo_info = pd.merge(pd.merge(income, poverty, on=['Geographic Area', 'City']), highschool, on=['Geographic Area', 'City'])
geo_info = pd.merge(geo_info, race, on=['Geographic Area', 'City'])
geo_info.head()
geo_info.rename(columns={'Geographic Area':'state', 'City':'city', 'Median Income':'median_income',
'share_white':'white', 'share_black':'black',
'share_native_american':'native_american', 'share_asian':'asian',
'share_hispanic':'hispanic'}, inplace=True)
geo_info.head()
killings.drop(columns=['id', 'name'], inplace=True)
cols = ['median_income', 'poverty_rate', 'percent_completed_hs',
'white', 'black', 'native_american', 'asian', 'hispanic',]
geo_info[cols] = geo_info[cols].apply(pd.to_numeric, errors='coerce')
states_with_most_killings = killings.groupby('state')['state'].count().sort_values(ascending=False).to_frame()[:20] \
.rename(columns={'state':'Number of Cases'}).reset_index().rename(columns={'state':'State'})
import plotly.express as px
fig = px.bar(states_with_most_killings, y='Number of Cases', x='State', text='Number of Cases',
template="none")
fig.update_traces(texttemplate='%{text:.2s}', textposition='outside', marker_color='khaki')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
fig.update_yaxes(showticklabels=False, showgrid=False)
fig.update_layout(title="Number of Cases in Each State")
fig.show()
states_info = geo_info.groupby('state').agg({'median_income':'median', 'poverty_rate':'mean',
'percent_completed_hs':'mean',
'white':'mean', 'black':'mean', 'native_american':'mean',
'asian':'mean', 'hispanic':'mean'}).reset_index()\
.rename(columns={'state':'State', 'median_income':'Median Income',
'poverty_rate':'Poverty Rate', 'white':'White',
'percent_completed_hs':'HS Completion Rate',
'black':'Black', 'native_american':'Native American',
'asian':'Asian', 'hispanic':'Hispanic'})
CA_info = states_info.loc[states_info['State'] == 'CA']
TX_info = states_info.loc[states_info['State'] == 'TX']
FL_info = states_info.loc[states_info['State'] == 'FL']
AZ_info = states_info.loc[states_info['State'] == 'AZ']
fig = make_subplots(rows=2, cols=2, specs=[[{"type": "pie"}, {"type": "pie"}],
[{"type": "pie"}, {"type": "pie"}]],
subplot_titles=['California', 'Texas', 'Florida', 'Arizona'],
horizontal_spacing=0.08, vertical_spacing=0.01)
colors = ['rgb(250, 235, 235)', 'rgb(79, 96, 58)', 'rgb(34, 139, 34)', 'rgb(219, 225, 45)', 'rgb(223, 89, 125)']
fig.add_trace(go.Pie(labels=['White', 'Black', 'Native American', 'Asian', 'Hispanic'],
values=list(CA_info[['White', 'Black', 'Native American', 'Asian', 'Hispanic']].iloc[0].values),
marker_colors=colors), row=1, col=1)
fig.add_trace(go.Pie(labels=['White', 'Black', 'Native American', 'Asian', 'Hispanic'],
values=list(TX_info[['White', 'Black', 'Native American', 'Asian', 'Hispanic']].iloc[0].values)), row=1, col=2)
fig.add_trace(go.Pie(labels=['White', 'Black', 'Native American', 'Asian', 'Hispanic'],
values=list(FL_info[['White', 'Black', 'Native American', 'Asian', 'Hispanic']].iloc[0].values)), row=2, col=1)
fig.add_trace(go.Pie(labels=['White', 'Black', 'Native American', 'Asian', 'Hispanic'],
values=list(AZ_info[['White', 'Black', 'Native American', 'Asian', 'Hispanic']].iloc[0].values)), row=2, col=2)
fig.update_traces(hole=.4, hoverinfo='label+value', textinfo='none')
fig.update_layout(height=1000, width=1000, title_text="Percentage of Race in Top 4 States",
annotations=[dict(text='California', x=0.23, y=0.735, font_size=20, showarrow=False),
dict(text='Texas', x=0.77, y=0.735, font_size=20, showarrow=False),
dict(text='Florida', x=0.23, y=0.23, font_size=20, showarrow=False),
dict(text='Arizona', x=0.77, y=0.23, font_size=20, showarrow=False)],
font=dict(color='rgb(64, 64, 64)'))
fig.show()
top4 = states_info.loc[states_info['State'].isin(['CA', 'TX', 'FL', 'AZ'])].reset_index(drop=True)
top4.replace({'AZ': 'Arizona', 'CA': 'California', 'FL': 'Florida', 'TX': 'Texas'}, inplace=True)
top4['Poverty Rate'] = top4['Poverty Rate'] * 0.01
fig = go.Figure(go.Bar(x=top4['Poverty Rate'], y=top4['State'], orientation='h'),
layout=go.Layout(annotations=[go.layout.Annotation(text='Average Poverty Rate <br> 16%',
align='center',
showarrow=False,
xref='paper',
yref='paper',
x=0.99,
y=0.01,
bordercolor='grey',
borderwidth=3)]))
fig.update_layout(title="Poverty Rate in Top 4 States", template='none', xaxis_tickformat = '%',
margin=dict(pad=20), yaxis={'categoryorder':'total ascending'})
fig.update_traces(marker_color='rgb(158,202,225)', marker_line_color='rgb(8,48,107)',
marker_line_width=1.5, opacity=0.6,
hovertext=["Top 4: 120 Cases", "Top 2: 230 Cases", "Top 3: 150 Cases", "Top 1: 420 Cases"],
hoverinfo='text')
fig.update_xaxes(showgrid=False)
fig.show()
top4['HS Completion Rate'] = top4['HS Completion Rate'] * 0.01
fig = go.Figure(go.Bar(x=top4['HS Completion Rate'], y=top4['State'], orientation='h'),
layout=go.Layout(annotations=[go.layout.Annotation(text='Average <br> Completion Rate <br> 82%',
align='center',
showarrow=False,
xref='paper',
yref='paper',
x=1,
y=0.01,
bordercolor='grey',
borderwidth=3)]))
fig.update_layout(title="High School Completion Rate in Top 4 States", template='none', xaxis_tickformat = '%',
margin=dict(pad=20), yaxis={'categoryorder':'total ascending'})
fig.update_traces(marker_color='rgb(177,235,150)', marker_line_color='rgb(8,48,107)',
marker_line_width=1.5, opacity=0.6,
hovertext=["Top 4: 120 Cases", "Top 2: 230 Cases", "Top 3: 150 Cases", "Top 1: 420 Cases"],
hoverinfo='text')
fig.update_xaxes(showgrid=False)
fig.show()
cause_of_death = killings.groupby('manner_of_death').count()['date'].to_frame()\
.reset_index().rename(columns={'manner_of_death':'Manner of Death',
'date':'Count'})\
.replace({'shot': 'Shot', 'shot and Tasered': 'Shot and Tasered'})
fig = px.pie(cause_of_death, values='Count', names='Manner of Death', title='Cause of Death',
color_discrete_sequence=px.colors.sequential.RdBu)
fig.show()
armed = killings.groupby('armed').count()['date'].sort_values(ascending=False)[:7].reset_index()\
.rename(columns={'armed':'Armed Weapon', 'date':'Count'})
armed['Armed Weapon'] = armed['Armed Weapon'].apply(lambda x: x.title())
fig = px.pie(armed, values='Count', names='Armed Weapon', title='Types of Armed Weapons of Victims',
color_discrete_sequence=px.colors.sequential.Aggrnyl)
fig.show()
gender = killings.groupby('gender').count()['date'].to_frame().reset_index()\
.rename(columns={'gender':'Gender', 'date':'Count'})\
.replace({'F':'Female', 'M':'Male'})
race = killings.groupby('race').count()['date'].to_frame().reset_index()\
.rename(columns={'race':'Race', 'date':'Count'})\
.replace({'A':'Asian', 'B':'Black', 'H':'Hispanic',
'N':'Native American', 'O':'Others', 'W':'White'})
fig = make_subplots(rows=1, cols=2, specs=[[{"type": "pie"}, {"type": "pie"}]],
subplot_titles=("Gender", "Race"))
fig.add_trace(go.Pie(values=list(gender['Count']), labels=list(gender['Gender'])),
row=1, col=1)
fig.add_trace(go.Pie(values=list(race['Count']), labels=list(race['Race'])),
row=1, col=2)
fig.update_layout(height=700, showlegend=False)
fig.update_traces(hoverinfo='value', textinfo='label+percent', textfont_size=13,
marker=dict(colors=colors, line=dict(color='#000000', width=2)))
fig.show()
fig = px.box(killings, y="age", points='all', template="none")
fig.update_traces(marker_color='darkblue')
fig.update_layout(title="Box Plot of Age of the Victims")
fig.show()
mental = killings.groupby('signs_of_mental_illness').count()['date'].to_frame().reset_index()\
.rename(columns={'signs_of_mental_illness':'Signs of Mental Illness', 'date':'Count'})
fig = px.bar(mental, x="Signs of Mental Illness", y="Count", color='Signs of Mental Illness',
template='none')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
fig.update_yaxes(showticklabels=False, showgrid=False)
fig.update_xaxes(showticklabels=False)
fig.update_layout(title="Number of Cases in Mental Illness", height=400,
yaxis_title=" ")
fig.show()
time = killings.groupby('date').count()['manner_of_death'].to_frame().reset_index()\
.rename(columns={'date':'Date', 'manner_of_death':'Count'})
time['Date'] = pd.to_datetime(time['Date'])
time.sort_values(by='Date', inplace=True)
time = time.groupby(pd.Grouper(key='Date', freq='M')).count().reset_index()
fig = px.line(time, x='Date', y='Count', template='none')
fig.update_yaxes(showgrid=False)
fig.update_layout(title="Fatal Police Shootings Time Series")
fig.show()
state_k = killings.groupby('state').count()['date'].reset_index().rename(columns={'state':'State', 'date':'Count'})
states_info['Killings'] = states_info['State'].map(state_k.set_index('State')['Count'])
fig = px.scatter(states_info, x="Poverty Rate", y="HS Completion Rate",
color='Killings', hover_data=['State', 'Killings'],
template='none', color_continuous_scale=px.colors.sequential.Burg)
fig.update_traces(mode='markers', marker_line_width=2, marker_size=12)
fig.update_yaxes(showgrid=False)
fig.update_xaxes(showgrid=False)
fig.update_layout(title='Poverty Rate, High School Completion Rate and Fatal Police Shootings')
fig.show()