namdini commited on
Commit
29ce454
·
1 Parent(s): 56398dd

Add application file

Browse files
Files changed (1) hide show
  1. app.py +128 -0
app.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import altair as alt
4
+
5
+ # Streamlit app title
6
+ st.title("Total Injuries and Fatalities by Month (Season)")
7
+
8
+ # Load crash report data
9
+ crash_data = pd.read_csv("1.08_Crash_Data_Report_(detail).csv")
10
+
11
+ # Drop duplicate columns (X, Y are the same as Latitude and Longitude) -> from Janhavi's Part 1
12
+ crash_data = crash_data.drop(['X', 'Y'], axis=1)
13
+
14
+ # Drop rows with missing values in critical columns -> from Janhavi's Part 1
15
+ crash_data.dropna(subset=['Incidentid', 'DateTime', 'Year', 'Latitude', 'Longitude'], inplace=True)
16
+
17
+ # Filter rows where we have valid data for all necessary columns
18
+ crash_data = crash_data[['DateTime', 'Totalinjuries', 'Totalfatalities', 'Unittype_One', 'Unittype_Two']].dropna()
19
+
20
+ # Convert "DateTime" to datetime type
21
+ crash_data['DateTime'] = pd.to_datetime(crash_data['DateTime'], errors='coerce')
22
+ # crash_data['Month'] = crash_data['DateTime'].dt.month # Incorrect extraction of month as numeric value, instead of name
23
+ crash_data['Month'] = crash_data['DateTime'].dt.month_name()
24
+
25
+ # sort months in order
26
+ month_order = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']
27
+ crash_data['Month'] = pd.Categorical(crash_data['Month'], categories=month_order, ordered=True)
28
+
29
+ # Dropdown for Unit Type selection
30
+ # st.sidebar.selectbox("Select Unit Type", options=['Total'] + crash_data['Unittype_One'].dropna().unique().tolist()) # previous location of dropdown in sidebar
31
+ # unit_type = st.selectbox("Select Unit Type", options=['Total'] + crash_data['Unittype_One'].dropna().unique().tolist())
32
+ unit_type_pairs = set()
33
+ for _, row in crash_data[['Unittype_One', 'Unittype_Two']].dropna().iterrows():
34
+ if row['Unittype_One'] != 'Driverless' or row['Unittype_Two'] != 'Driverless':
35
+ pair = ' vs '.join(sorted([row['Unittype_One'], row['Unittype_Two']]))
36
+ unit_type_pairs.add(pair)
37
+ # unit_type_pairs = list(unit_type_pairs) # modified as below to sort the dropdown options in alphabetical order
38
+ unit_type_pairs = sorted(list(unit_type_pairs))
39
+ unit_type = st.selectbox("Select Unit Type Pair", options=['Total'] + unit_type_pairs)
40
+
41
+
42
+ # Filter data based on the selected unit type
43
+ if unit_type == 'Total':
44
+ filtered_data = crash_data
45
+ else:
46
+ unit_one, unit_two = unit_type.split(' vs ')
47
+ filtered_data = crash_data[((crash_data['Unittype_One'] == unit_one) & (crash_data['Unittype_Two'] == unit_two)) |
48
+ ((crash_data['Unittype_One'] == unit_two) & (crash_data['Unittype_Two'] == unit_one))]
49
+
50
+ # Group data by month and calculate total injuries and fatalities
51
+ monthly_sum = filtered_data.groupby('Month').agg({'Totalinjuries': 'sum', 'Totalfatalities': 'sum'}).reset_index()
52
+
53
+ # Reshape the data for easier plotting
54
+ injuries = monthly_sum[['Month', 'Totalinjuries']].rename(columns={'Totalinjuries': 'Value'})
55
+ injuries['Measure'] = 'Total Injuries'
56
+
57
+ fatalities = monthly_sum[['Month', 'Totalfatalities']].rename(columns={'Totalfatalities': 'Value'})
58
+ fatalities['Measure'] = 'Total Fatalities'
59
+
60
+ combined_data = pd.concat([injuries, fatalities])
61
+
62
+ # Group data by month and calculate total injuries and fatalities
63
+ monthly_sum = filtered_data.groupby('Month').agg({'Totalinjuries': 'sum', 'Totalfatalities': 'sum'}).reset_index()
64
+
65
+ # Originally tried to use bar chart but switched to line chart for better trend visualization
66
+ # alt.Chart(monthly_sum).mark_bar().encode(
67
+ # x=alt.X('Month', sort=month_order, title='Month'),
68
+ # y=alt.Y('Totalinjuries', title='Total Injuries', axis=alt.Axis(titleColor='blue', labelColor='blue', tickColor='blue')),
69
+ # color=alt.value('blue'),
70
+ # tooltip=['Month', 'Totalinjuries']
71
+ # ).properties(
72
+ # title='Total Injuries and Fatalities by Month',
73
+ # width=300,
74
+ # height=300
75
+ # ) + alt.Chart(monthly_sum).mark_bar().encode(
76
+ # x=alt.X('Month', sort=month_order, title='Month'),
77
+ # y=alt.Y('Totalfatalities', title='Total Fatalities', axis=alt.Axis(titleColor='red', labelColor='red', tickColor='red')),
78
+ # color=alt.value('red'),
79
+ # tooltip=['Month', 'Totalfatalities']
80
+ # )
81
+
82
+ # Plot line chart
83
+ # line_chart = alt.Chart(monthly_sum).mark_line(point=True).encode(
84
+ # x=alt.X('Month', sort=month_order, title='Month'),
85
+ # y=alt.Y('Totalinjuries', title='Total Injuries & Fatalities', axis=alt.Axis(titleColor='black')),
86
+ # color=alt.value('blue'),
87
+ # tooltip=['Month', 'Totalinjuries']
88
+ # ).properties(
89
+ # title=f'Total Injuries and Fatalities by Month for Unit Type Pair: {unit_type}',
90
+ # width=600,
91
+ # height=400
92
+ # ) + alt.Chart(monthly_sum).mark_line(point=True).encode(
93
+ # x=alt.X('Month', sort=month_order, title='Month'),
94
+ # y=alt.Y('Totalfatalities', axis=alt.Axis(titleColor='red')),
95
+ # color=alt.value('red'),
96
+ # tooltip=['Month', 'Totalfatalities']
97
+ # ).configure_legend(
98
+ # titleFontSize=14,
99
+ # labelFontSize=12,
100
+ # titleColor='black',
101
+ # labelColor='black'
102
+ # )
103
+
104
+ line_chart = alt.Chart(combined_data).mark_line(point=True).encode(
105
+ x=alt.X('Month:N', sort=month_order, title='Month'),
106
+ y=alt.Y('Value:Q', title='Total Injuries & Fatalities'),
107
+ color=alt.Color('Measure:N', title = '', scale=alt.Scale(domain=['Total Injuries', 'Total Fatalities'], range=['blue', 'red'])),
108
+ tooltip=['Month', 'Measure:N', 'Value:Q']
109
+ ).properties(
110
+ title=f'Total Injuries and Fatalities by Month for Unit Type Pair: {unit_type}',
111
+ width=600,
112
+ height=400
113
+ )
114
+
115
+ # # Combine the charts
116
+ # combined_chart = alt.layer(line_chart_injuries, line_chart_fatalities).properties(
117
+ # title=f'Total Injuries and Fatalities by Month for Unit Type Pair: {unit_type}',
118
+ # width=600,
119
+ # height=400
120
+ # ).configure_legend(
121
+ # titleFontSize=14,
122
+ # labelFontSize=12,
123
+ # titleColor='black',
124
+ # labelColor='black'
125
+ # )
126
+
127
+ # Display chart in Streamlit
128
+ st.altair_chart(line_chart, use_container_width=True)