Fatal Police Shootings in the US¶

EDA¶

State with the most fatal police shootings
- State with median income, poverty rate, percentage of high school completion
- Race percentages
Most manner of death
armed or not
Gender and race
Age of the victims
Signs of mental illness and fleeing rate
Time period of incidents

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from plotly.subplots import make_subplots
import plotly.graph_objects as go
%matplotlib inline

income = pd.read_csv('MedianHouseholdIncome2015.csv', encoding='cp1252')
poverty = pd.read_csv('PercentagePeopleBelowPovertyLevel.csv', encoding='cp1252')
highschool = pd.read_csv('PercentOver25CompletedHighSchool.csv', encoding='cp1252')
race = pd.read_csv('ShareRaceByCity.csv', encoding='cp1252')
killings = pd.read_csv('PoliceKillingsUS.csv', encoding='cp1252')

income.head()

poverty.head()

highschool.head()

race.head()

killings.head()

Data Cleaning¶

1. Combining¶

Since, income, poverty and highschool dataframe has the same geographic area, we can comebine these into one dataframe.¶

# Shapes of each data frames
income.shape, poverty.shape, highschool.shape, race.shape

((29322, 3), (29329, 3), (29329, 3), (29268, 7))

Merge the data frames using geographic area code and city name.¶

geo_info = pd.merge(pd.merge(income, poverty, on=['Geographic Area', 'City']), highschool, on=['Geographic Area', 'City'])

geo_info = pd.merge(geo_info, race, on=['Geographic Area', 'City'])

geo_info.head()

Let's rename columns.¶

geo_info.rename(columns={'Geographic Area':'state', 'City':'city', 'Median Income':'median_income',
                         'share_white':'white', 'share_black':'black', 
                         'share_native_american':'native_american', 'share_asian':'asian', 
                         'share_hispanic':'hispanic'}, inplace=True)

geo_info.head()

Drop unnecessray columns such as ID and Name as we have index number.¶

killings.drop(columns=['id', 'name'], inplace=True)

2. Type Change¶

cols = ['median_income', 'poverty_rate', 'percent_completed_hs', 
        'white', 'black', 'native_american', 'asian', 'hispanic',]

geo_info[cols] = geo_info[cols].apply(pd.to_numeric, errors='coerce')

EDA¶

1. State with the Most Fatal Police Shootings¶

states_with_most_killings = killings.groupby('state')['state'].count().sort_values(ascending=False).to_frame()[:20] \
                        .rename(columns={'state':'Number of Cases'}).reset_index().rename(columns={'state':'State'})

import plotly.express as px

fig = px.bar(states_with_most_killings, y='Number of Cases', x='State', text='Number of Cases',
            template="none")
fig.update_traces(texttemplate='%{text:.2s}', textposition='outside', marker_color='khaki')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
fig.update_yaxes(showticklabels=False, showgrid=False)
fig.update_layout(title="Number of Cases in Each State")
fig.show()

a) Race¶

states_info = geo_info.groupby('state').agg({'median_income':'median', 'poverty_rate':'mean',
                                             'percent_completed_hs':'mean',
                                             'white':'mean', 'black':'mean', 'native_american':'mean',
                                             'asian':'mean', 'hispanic':'mean'}).reset_index()\
                        .rename(columns={'state':'State', 'median_income':'Median Income',
                                        'poverty_rate':'Poverty Rate', 'white':'White', 
                                        'percent_completed_hs':'HS Completion Rate',
                                        'black':'Black', 'native_american':'Native American',
                                        'asian':'Asian', 'hispanic':'Hispanic'})

CA_info = states_info.loc[states_info['State'] == 'CA']
TX_info = states_info.loc[states_info['State'] == 'TX']
FL_info = states_info.loc[states_info['State'] == 'FL']
AZ_info = states_info.loc[states_info['State'] == 'AZ']

fig = make_subplots(rows=2, cols=2, specs=[[{"type": "pie"}, {"type": "pie"}],
                                           [{"type": "pie"}, {"type": "pie"}]],
                   subplot_titles=['California', 'Texas', 'Florida', 'Arizona'],
                   horizontal_spacing=0.08, vertical_spacing=0.01)

colors = ['rgb(250, 235, 235)', 'rgb(79, 96, 58)', 'rgb(34, 139, 34)', 'rgb(219, 225, 45)', 'rgb(223, 89, 125)']

fig.add_trace(go.Pie(labels=['White', 'Black', 'Native American', 'Asian', 'Hispanic'],
                     values=list(CA_info[['White', 'Black', 'Native American', 'Asian', 'Hispanic']].iloc[0].values),
                     marker_colors=colors), row=1, col=1)
fig.add_trace(go.Pie(labels=['White', 'Black', 'Native American', 'Asian', 'Hispanic'],
                     values=list(TX_info[['White', 'Black', 'Native American', 'Asian', 'Hispanic']].iloc[0].values)), row=1, col=2)
fig.add_trace(go.Pie(labels=['White', 'Black', 'Native American', 'Asian', 'Hispanic'],
                     values=list(FL_info[['White', 'Black', 'Native American', 'Asian', 'Hispanic']].iloc[0].values)), row=2, col=1)
fig.add_trace(go.Pie(labels=['White', 'Black', 'Native American', 'Asian', 'Hispanic'],
                     values=list(AZ_info[['White', 'Black', 'Native American', 'Asian', 'Hispanic']].iloc[0].values)), row=2, col=2)

fig.update_traces(hole=.4, hoverinfo='label+value', textinfo='none')
fig.update_layout(height=1000, width=1000, title_text="Percentage of Race in Top 4 States",
                 annotations=[dict(text='California', x=0.23, y=0.735, font_size=20, showarrow=False),
                              dict(text='Texas', x=0.77, y=0.735, font_size=20, showarrow=False),
                              dict(text='Florida', x=0.23, y=0.23, font_size=20, showarrow=False),
                              dict(text='Arizona', x=0.77, y=0.23, font_size=20, showarrow=False)],
                 font=dict(color='rgb(64, 64, 64)'))

fig.show()

b) Poverty Rate¶

top4 = states_info.loc[states_info['State'].isin(['CA', 'TX', 'FL', 'AZ'])].reset_index(drop=True)
top4.replace({'AZ': 'Arizona', 'CA': 'California', 'FL': 'Florida', 'TX': 'Texas'}, inplace=True)
top4['Poverty Rate'] = top4['Poverty Rate'] * 0.01

fig = go.Figure(go.Bar(x=top4['Poverty Rate'], y=top4['State'], orientation='h'),
                layout=go.Layout(annotations=[go.layout.Annotation(text='Average Poverty Rate <br> 16%',
                                                                   align='center',
                                                                   showarrow=False,
                                                                   xref='paper',
                                                                   yref='paper',
                                                                   x=0.99,
                                                                   y=0.01,
                                                                   bordercolor='grey',
                                                                   borderwidth=3)]))

fig.update_layout(title="Poverty Rate in Top 4 States", template='none', xaxis_tickformat = '%',
                 margin=dict(pad=20), yaxis={'categoryorder':'total ascending'})
fig.update_traces(marker_color='rgb(158,202,225)', marker_line_color='rgb(8,48,107)',
                  marker_line_width=1.5, opacity=0.6, 
                  hovertext=["Top 4: 120 Cases", "Top 2: 230 Cases", "Top 3: 150 Cases", "Top 1: 420 Cases"],
                  hoverinfo='text')
fig.update_xaxes(showgrid=False)

fig.show()

c) High School Completion Rate¶

top4['HS Completion Rate'] = top4['HS Completion Rate'] * 0.01

fig = go.Figure(go.Bar(x=top4['HS Completion Rate'], y=top4['State'], orientation='h'),
                layout=go.Layout(annotations=[go.layout.Annotation(text='Average <br> Completion Rate <br> 82%',
                                                                   align='center',
                                                                   showarrow=False,
                                                                   xref='paper',
                                                                   yref='paper',
                                                                   x=1,
                                                                   y=0.01,
                                                                   bordercolor='grey',
                                                                   borderwidth=3)]))

fig.update_layout(title="High School Completion Rate in Top 4 States", template='none', xaxis_tickformat = '%',
                 margin=dict(pad=20), yaxis={'categoryorder':'total ascending'})
fig.update_traces(marker_color='rgb(177,235,150)', marker_line_color='rgb(8,48,107)',
                  marker_line_width=1.5, opacity=0.6, 
                  hovertext=["Top 4: 120 Cases", "Top 2: 230 Cases", "Top 3: 150 Cases", "Top 1: 420 Cases"],
                  hoverinfo='text')
fig.update_xaxes(showgrid=False)

fig.show()

2. Most Manner of Death¶

cause_of_death = killings.groupby('manner_of_death').count()['date'].to_frame()\
                                    .reset_index().rename(columns={'manner_of_death':'Manner of Death',
                                                                  'date':'Count'})\
                                    .replace({'shot': 'Shot', 'shot and Tasered': 'Shot and Tasered'})

fig = px.pie(cause_of_death, values='Count', names='Manner of Death', title='Cause of Death',
            color_discrete_sequence=px.colors.sequential.RdBu)

fig.show()

3. Armed or Not?¶

armed = killings.groupby('armed').count()['date'].sort_values(ascending=False)[:7].reset_index()\
                        .rename(columns={'armed':'Armed Weapon', 'date':'Count'})
armed['Armed Weapon'] = armed['Armed Weapon'].apply(lambda x: x.title())

fig = px.pie(armed, values='Count', names='Armed Weapon', title='Types of Armed Weapons of Victims',
            color_discrete_sequence=px.colors.sequential.Aggrnyl)

fig.show()

4. Gender and Race¶

gender = killings.groupby('gender').count()['date'].to_frame().reset_index()\
                        .rename(columns={'gender':'Gender', 'date':'Count'})\
                        .replace({'F':'Female', 'M':'Male'})

race = killings.groupby('race').count()['date'].to_frame().reset_index()\
                        .rename(columns={'race':'Race', 'date':'Count'})\
                        .replace({'A':'Asian', 'B':'Black', 'H':'Hispanic',
                                 'N':'Native American', 'O':'Others', 'W':'White'})

fig = make_subplots(rows=1, cols=2, specs=[[{"type": "pie"}, {"type": "pie"}]],
                    subplot_titles=("Gender", "Race"))

fig.add_trace(go.Pie(values=list(gender['Count']), labels=list(gender['Gender'])),
              row=1, col=1)
fig.add_trace(go.Pie(values=list(race['Count']), labels=list(race['Race'])),
              row=1, col=2)

fig.update_layout(height=700, showlegend=False)
fig.update_traces(hoverinfo='value', textinfo='label+percent', textfont_size=13,
                  marker=dict(colors=colors, line=dict(color='#000000', width=2)))
fig.show()

5. Age of the Victims¶

fig = px.box(killings, y="age", points='all', template="none")
fig.update_traces(marker_color='darkblue')
fig.update_layout(title="Box Plot of Age of the Victims")
fig.show()

6. Existance of Mental Illness¶

mental = killings.groupby('signs_of_mental_illness').count()['date'].to_frame().reset_index()\
                    .rename(columns={'signs_of_mental_illness':'Signs of Mental Illness', 'date':'Count'})

fig = px.bar(mental, x="Signs of Mental Illness", y="Count", color='Signs of Mental Illness',
            template='none')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
fig.update_yaxes(showticklabels=False, showgrid=False)
fig.update_xaxes(showticklabels=False)
fig.update_layout(title="Number of Cases in Mental Illness", height=400,
                 yaxis_title=" ")
fig.show()

7. Time Period of Fatal Police Shootings¶

time = killings.groupby('date').count()['manner_of_death'].to_frame().reset_index()\
                            .rename(columns={'date':'Date', 'manner_of_death':'Count'})
time['Date'] = pd.to_datetime(time['Date'])
time.sort_values(by='Date', inplace=True)
time = time.groupby(pd.Grouper(key='Date', freq='M')).count().reset_index()

fig = px.line(time, x='Date', y='Count', template='none')
fig.update_yaxes(showgrid=False)
fig.update_layout(title="Fatal Police Shootings Time Series")
fig.show()

8. Relationship Between Poverty Rate, High School Completion Rate and Fatal Police Shootings¶

state_k = killings.groupby('state').count()['date'].reset_index().rename(columns={'state':'State', 'date':'Count'})
states_info['Killings'] = states_info['State'].map(state_k.set_index('State')['Count'])

fig = px.scatter(states_info, x="Poverty Rate", y="HS Completion Rate",
                color='Killings', hover_data=['State', 'Killings'],
                template='none', color_continuous_scale=px.colors.sequential.Burg)
fig.update_traces(mode='markers', marker_line_width=2, marker_size=12)
fig.update_yaxes(showgrid=False)
fig.update_xaxes(showgrid=False)
fig.update_layout(title='Poverty Rate, High School Completion Rate and Fatal Police Shootings')
fig.show()

	Geographic Area	City	Median Income
0	AL	Abanda CDP	11207
1	AL	Abbeville city	25615
2	AL	Adamsville city	42575
3	AL	Addison town	37083
4	AL	Akron town	21667

	Geographic Area	City	poverty_rate
0	AL	Abanda CDP	78.8
1	AL	Abbeville city	29.1
2	AL	Adamsville city	25.5
3	AL	Addison town	30.7
4	AL	Akron town	42

	Geographic Area	City	percent_completed_hs
0	AL	Abanda CDP	21.2
1	AL	Abbeville city	69.1
2	AL	Adamsville city	78.9
3	AL	Addison town	81.4
4	AL	Akron town	68.6

	Geographic Area	City	share_white	share_black	share_native_american	share_asian	share_hispanic
0	AL	Abanda CDP	67.2	30.2	0	0	1.6
1	AL	Abbeville city	54.4	41.4	0.1	1	3.1
2	AL	Adamsville city	52.3	44.9	0.5	0.3	2.3
3	AL	Addison town	99.1	0.1	0	0.1	0.4
4	AL	Akron town	13.2	86.5	0	0	0.3

	id	name	date	manner_of_death	armed	age	gender	race	city	state	signs_of_mental_illness	threat_level	flee	body_camera
0	3	Tim Elliot	02/01/15	shot	gun	53.0	M	A	Shelton	WA	True	attack	Not fleeing	False
1	4	Lewis Lee Lembke	02/01/15	shot	gun	47.0	M	W	Aloha	OR	False	attack	Not fleeing	False
2	5	John Paul Quintero	03/01/15	shot and Tasered	unarmed	23.0	M	H	Wichita	KS	False	other	Not fleeing	False
3	8	Matthew Hoffman	04/01/15	shot	toy weapon	32.0	M	W	San Francisco	CA	True	attack	Not fleeing	False
4	9	Michael Rodriguez	04/01/15	shot	nail gun	39.0	M	H	Evans	CO	False	attack	Not fleeing	False