msa17 commited on
Commit
e7b1439
·
verified ·
1 Parent(s): ffa7f90

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -246
app.py CHANGED
@@ -5,6 +5,7 @@ import altair as alt
5
  import folium
6
  from folium.plugins import HeatMap, MarkerCluster
7
  from streamlit_folium import st_folium
 
8
 
9
  @st.cache_data
10
  def load_and_preprocess_data(file_path):
@@ -23,9 +24,11 @@ def load_and_preprocess_data(file_path):
23
  for col in numeric:
24
  df[col].fillna(df[col].median(), inplace=True)
25
 
26
- categorical = ['Gender_Drv1', 'Violation1_Drv1', 'AlcoholUse_Drv1', 'DrugUse_Drv1',
27
- 'Gender_Drv2', 'Violation1_Drv2', 'AlcoholUse_Drv2', 'DrugUse_Drv2',
28
- 'Unittype_Two', 'Traveldirection_Two', 'Unitaction_Two', 'CrossStreet']
 
 
29
  for col in categorical:
30
  df[col].fillna('Unknown', inplace=True)
31
 
@@ -44,66 +47,53 @@ def load_and_preprocess_data(file_path):
44
  df['Age_Group_Drv1'] = pd.cut(df['Age_Drv1'], bins=bins, labels=labels)
45
  df['Age_Group_Drv2'] = pd.cut(df['Age_Drv2'], bins=bins, labels=labels)
46
 
 
 
 
47
  return df
48
 
49
- def create_severity_violation_chart(df, age_group=None):
50
- # Apply age group filter if selected
51
- if age_group != 'All Ages':
52
- df = df[(df['Age_Group_Drv1'] == age_group) | (df['Age_Group_Drv2'] == age_group)]
53
-
54
- # Combine violations from both drivers
55
- violations_1 = df.groupby(['Violation1_Drv1', 'Injuryseverity']).size().reset_index(name='count')
56
- violations_2 = df.groupby(['Violation1_Drv2', 'Injuryseverity']).size().reset_index(name='count')
57
-
58
- violations_1.columns = ['Violation', 'Severity', 'count']
59
- violations_2.columns = ['Violation', 'Severity', 'count']
60
-
61
- violations = pd.concat([violations_1, violations_2])
62
- violations = violations.groupby(['Violation', 'Severity'])['count'].sum().reset_index()
63
-
64
- # Create visualization
65
  fig = px.bar(
66
  violations,
67
  x='Violation',
68
- y='count',
69
- color='Severity',
70
- title=f'Crash Severity Distribution by Violation Type - {age_group}',
71
- labels={'count': 'Number of Incidents', 'Violation': 'Violation Type'},
72
  height=600
73
  )
 
74
 
75
- fig.update_layout(
76
- xaxis_tickangle=-45,
77
- legend_title='Severity Level',
78
- barmode='stack'
 
 
 
 
 
 
 
 
 
 
 
79
  )
 
80
 
81
  return fig
82
 
83
- def get_top_violations(df, age_group):
84
- if age_group == 'All Ages':
85
- violations = pd.concat([
86
- df['Violation1_Drv1'].value_counts(),
87
- df['Violation1_Drv2'].value_counts()
88
- ]).groupby(level=0).sum()
89
- else:
90
- filtered_df = df[
91
- (df['Age_Group_Drv1'] == age_group) |
92
- (df['Age_Group_Drv2'] == age_group)
93
- ]
94
- violations = pd.concat([
95
- filtered_df['Violation1_Drv1'].value_counts(),
96
- filtered_df['Violation1_Drv2'].value_counts()
97
- ]).groupby(level=0).sum()
98
-
99
- # Convert to DataFrame and format
100
- violations_df = violations.reset_index()
101
- violations_df.columns = ['Violation Type', 'Count']
102
- violations_df['Percentage'] = (violations_df['Count'] / violations_df['Count'].sum() * 100).round(2)
103
- violations_df['Percentage'] = violations_df['Percentage'].map('{:.2f}%'.format)
104
-
105
- return violations_df.head()
106
-
107
  @st.cache_data
108
  def create_map(df, selected_year):
109
  filtered_df = df[df['Year'] == selected_year]
@@ -130,35 +120,17 @@ def create_map(df, selected_year):
130
  return m
131
 
132
  def create_injuries_fatalities_chart(crash_data, unit_type):
133
-
134
- # 5th visualization title
135
- st.header("5. Total Injuries and Fatalities by Month")
136
-
137
- # Filter rows where we have valid data for all necessary columns
138
  crash_data = crash_data[['DateTime', 'Totalinjuries', 'Totalfatalities', 'Unittype_One', 'Unittype_Two']].dropna()
139
 
140
- # Convert "DateTime" to datetime type
141
  crash_data['DateTime'] = pd.to_datetime(crash_data['DateTime'], errors='coerce')
142
  crash_data['Month'] = crash_data['DateTime'].dt.month_name()
143
 
144
- # sort months in order
145
- month_order = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']
146
  crash_data['Month'] = pd.Categorical(crash_data['Month'], categories=month_order, ordered=True)
147
 
148
- # Dropdown for Unit Type selection
149
- # Dropdown for Unit Type selection
150
- # st.sidebar.selectbox("Select Unit Type", options=['Total'] + crash_data['Unittype_One'].dropna().unique().tolist()) # previous location of dropdown in sidebar
151
- # unit_type = st.selectbox("Select Unit Type", options=['Total'] + crash_data['Unittype_One'].dropna().unique().tolist())
152
- # unit_type_pairs = set()
153
- # for _, row in crash_data[['Unittype_One', 'Unittype_Two']].dropna().iterrows():
154
- # if row['Unittype_One'] != 'Driverless' or row['Unittype_Two'] != 'Driverless':
155
- # pair = ' vs '.join(sorted([row['Unittype_One'], row['Unittype_Two']]))
156
- # unit_type_pairs.add(pair)
157
- # # unit_type_pairs = list(unit_type_pairs) # modified as below to sort the dropdown options in alphabetical order
158
- # unit_type_pairs = sorted(list(unit_type_pairs))
159
- # unit_type = st.selectbox("Select Unit Type Pair", options=['Total'] + unit_type_pairs)
160
-
161
- # Filter data based on the selected unit type
162
  if unit_type == 'Total':
163
  filtered_data = crash_data
164
  else:
@@ -166,10 +138,8 @@ def create_injuries_fatalities_chart(crash_data, unit_type):
166
  filtered_data = crash_data[((crash_data['Unittype_One'] == unit_one) & (crash_data['Unittype_Two'] == unit_two)) |
167
  ((crash_data['Unittype_One'] == unit_two) & (crash_data['Unittype_Two'] == unit_one))]
168
 
169
- # Group data by month and calculate total injuries and fatalities
170
  monthly_sum = filtered_data.groupby('Month').agg({'Totalinjuries': 'sum', 'Totalfatalities': 'sum'}).reset_index()
171
 
172
- # Reshape the data for easier plotting
173
  injuries = monthly_sum[['Month', 'Totalinjuries']].rename(columns={'Totalinjuries': 'Value'})
174
  injuries['Measure'] = 'Total Injuries'
175
 
@@ -178,46 +148,6 @@ def create_injuries_fatalities_chart(crash_data, unit_type):
178
 
179
  combined_data = pd.concat([injuries, fatalities])
180
 
181
- # Originally tried to use bar chart but switched to line chart for better trend visualization
182
- # alt.Chart(monthly_sum).mark_bar().encode(
183
- # x=alt.X('Month', sort=month_order, title='Month'),
184
- # y=alt.Y('Totalinjuries', title='Total Injuries', axis=alt.Axis(titleColor='blue', labelColor='blue', tickColor='blue')),
185
- # color=alt.value('blue'),
186
- # tooltip=['Month', 'Totalinjuries']
187
- # ).properties(
188
- # title='Total Injuries and Fatalities by Month',
189
- # width=300,
190
- # height=300
191
- # ) + alt.Chart(monthly_sum).mark_bar().encode(
192
- # x=alt.X('Month', sort=month_order, title='Month'),
193
- # y=alt.Y('Totalfatalities', title='Total Fatalities', axis=alt.Axis(titleColor='red', labelColor='red', tickColor='red')),
194
- # color=alt.value('red'),
195
- # tooltip=['Month', 'Totalfatalities']
196
- # )
197
-
198
- # Tried to figure out how to plot a legend using altair
199
- # line_chart = alt.Chart(monthly_sum).mark_line(point=True).encode(
200
- # x=alt.X('Month', sort=month_order, title='Month'),
201
- # y=alt.Y('Totalinjuries', title='Total Injuries & Fatalities', axis=alt.Axis(titleColor='black')),
202
- # color=alt.value('blue'),
203
- # tooltip=['Month', 'Totalinjuries']
204
- # ).properties(
205
- # title=f'Total Injuries and Fatalities by Month for Unit Type Pair: {unit_type}',
206
- # width=600,
207
- # height=400
208
- # ) + alt.Chart(monthly_sum).mark_line(point=True).encode(
209
- # x=alt.X('Month', sort=month_order, title='Month'),
210
- # y=alt.Y('Totalfatalities', axis=alt.Axis(titleColor='red')),
211
- # color=alt.value('red'),
212
- # tooltip=['Month', 'Totalfatalities']
213
- # ).configure_legend(
214
- # titleFontSize=14,
215
- # labelFontSize=12,
216
- # titleColor='black',
217
- # labelColor='black'
218
- # )
219
-
220
- # Plot line chart
221
  line_chart = alt.Chart(combined_data).mark_line(point=True).encode(
222
  x=alt.X('Month:N', sort=month_order, title='Month'),
223
  y=alt.Y('Value:Q', title='Total Injuries & Fatalities'),
@@ -229,29 +159,15 @@ def create_injuries_fatalities_chart(crash_data, unit_type):
229
  height=400
230
  )
231
 
232
- # # Combine the charts (trying to make legend)
233
- # combined_chart = alt.layer(line_chart_injuries, line_chart_fatalities).properties(
234
- # title=f'Total Injuries and Fatalities by Month for Unit Type Pair: {unit_type}',
235
- # width=600,
236
- # height=400
237
- # ).configure_legend(
238
- # titleFontSize=14,
239
- # labelFontSize=12,
240
- # titleColor='black',
241
- # labelColor='black'
242
- # )
243
-
244
  return line_chart
245
 
246
  def create_crash_trend_chart(df, weather=None):
247
  if weather and weather != 'All Conditions':
248
  df = df[df['Weather'] == weather]
249
 
250
- # Group data by year and count unique Incident IDs
251
  trend_data = df.groupby('Year')['Incidentid'].nunique().reset_index()
252
  trend_data.columns = ['Year', 'Crash Count']
253
 
254
- # Create line graph
255
  fig = px.line(
256
  trend_data,
257
  x='Year',
@@ -268,18 +184,13 @@ def create_crash_trend_chart(df, weather=None):
268
  return fig
269
 
270
  def create_category_distribution_chart(df, selected_category, selected_year):
271
- # Filter by selected year
272
  if selected_year != 'All Years':
273
  df = df[df['Year'] == int(selected_year)]
274
 
275
- # Group by selected category and Injury Severity
276
  grouped_data = df.groupby([selected_category, 'Injuryseverity']).size().reset_index(name='Count')
277
-
278
- # Calculate percentages for each category value
279
  total_counts = grouped_data.groupby(selected_category)['Count'].transform('sum')
280
  grouped_data['Percentage'] = (grouped_data['Count'] / total_counts * 100).round(2)
281
 
282
- # Create the stacked bar chart using Plotly
283
  fig = px.bar(
284
  grouped_data,
285
  x=selected_category,
@@ -291,7 +202,6 @@ def create_category_distribution_chart(df, selected_category, selected_year):
291
  height=600,
292
  )
293
 
294
- # Customize the chart appearance
295
  fig.update_traces(texttemplate='%{text}%', textposition='inside')
296
  fig.update_layout(
297
  barmode='stack',
@@ -313,24 +223,15 @@ def main():
313
  - Nirmal Attarde
314
  - Maanas Sandeep Agrawa
315
  """)
316
-
317
 
318
  st.markdown("""
319
  ### Introduction to the Traffic Accident Dataset
320
- This dataset contains detailed information about traffic accidents in the city of **Tempe**. It includes various attributes of the accidents, such as the severity of injuries, the demographics of the drivers involved, the locations of the incidents, and the conditions at the time of the accidents. The dataset covers accidents that occurred over several years, with data on factors like **weather conditions**, **road surface conditions**, the **time of day**, and the type of **violations** (e.g., alcohol or drug use) that may have contributed to the accident.
321
-
322
- The data was sourced from **Tempe City's traffic incident reports** and provides a comprehensive view of the factors influencing road safety and accident severity in the city. By analyzing this dataset, we can gain insights into the key contributors to traffic incidents and uncover trends that could help improve traffic safety measures, urban planning, and law enforcement policies in the city.
323
  """)
324
-
325
-
326
-
327
  # Load data
328
  df = load_and_preprocess_data('1.08_Crash_Data_Report_(detail).csv')
329
 
330
- if 'Weather' not in df.columns:
331
- df['Weather'] = 'Unknown'
332
-
333
- # Create tabs for different visualizations
334
  tab1, tab2, tab3, tab4, tab5 = st.tabs(["Crash Statistics", "Crash Map", "Crash Trend", "Crash Injuries/Fatalities","Distribution by Category"])
335
 
336
  with tab1:
@@ -338,11 +239,20 @@ def main():
338
  age_groups = ['All Ages', '16-25', '26-35', '36-45', '46-55', '56-65', '65+']
339
  selected_age = st.selectbox('Select Age Group:', age_groups)
340
 
341
- # Create and display chart
342
- fig = create_severity_violation_chart(df, selected_age)
 
 
 
343
  st.plotly_chart(fig, use_container_width=True)
344
 
345
- # Display statistics
 
 
 
 
 
 
346
  if selected_age == 'All Ages':
347
  total_incidents = len(df)
348
  else:
@@ -351,88 +261,32 @@ def main():
351
  (df['Age_Group_Drv2'] == selected_age)
352
  ])
353
 
354
- # Create two columns for statistics
355
- col1, col2 = st.columns(2)
356
-
357
- with col1:
358
- st.markdown(f"### Total Incidents")
359
- st.markdown(f"**{total_incidents:,}** incidents for {selected_age}")
360
-
361
- with col2:
362
- st.markdown("### Top Violations")
363
- top_violations = get_top_violations(df, selected_age)
364
- st.table(top_violations)
365
-
366
  with tab2:
367
- # Year selection for map
368
  years = sorted(df['Year'].unique())
369
  selected_year = st.selectbox('Select Year:', years)
370
 
371
- # Create and display map
372
  st.markdown("### Crash Location Map")
373
- map_placeholder = st.empty()
374
- with map_placeholder:
375
- m = create_map(df, selected_year)
376
- map_data = st_folium(
377
- m,
378
- width=800,
379
- height=600,
380
- key=f"map_{selected_year}",
381
- returned_objects=["null_drawing"]
382
- )
383
-
384
- st.markdown("""
385
- ### Traffic Crash Location Map
386
- This interactive map visualizes traffic accidents in Tempe for the selected year. It combines **marker clustering** and a **heatmap** to show:
387
- 1. **Accident Markers**: Red markers indicate individual accidents, with popups displaying the coordinates, date/time, and severity of each incident.
388
- 2. **Heatmap**: The heatmap highlights accident hotspots with colors ranging from blue (low frequency) to yellow (moderate) and red (high frequency), showing areas with more frequent accidents.
389
-
390
- **Key Features:**
391
- * **Interactive Year Selection**: Users can select a year to view accidents for that specific time.
392
- * **Accident Patterns**: The map reveals accident-prone areas and severity patterns, helping identify dangerous locations.
393
-
394
- **Color Scheme:**
395
- * **Red**: Individual accident markers.
396
- * **Blue to Red**: Heatmap colors indicate accident frequency, from low (blue) to high (red).
397
-
398
- This map provides insights into accident trends and can help guide safety improvements in the city.
399
- """)
400
 
401
-
402
-
403
  with tab3:
404
- # Weather condition filter
405
  weather = ['All Conditions'] + sorted(df['Weather'].unique())
406
  selected_weather = st.selectbox('Select Weather Condition:', weather)
407
 
408
- # Create and display line graph
409
  st.markdown("### Crash Trend Over Time")
410
  trend_fig = create_crash_trend_chart(df, selected_weather)
411
  st.plotly_chart(trend_fig, use_container_width=True)
412
 
413
- st.markdown("""
414
- ## **Crash Trend Over Time**
415
- This interactive line chart visualizes the trend of unique traffic crashes over the years, optionally filtered by weather conditions. It highlights how crash frequency changes over time, helping identify trends and potential contributing factors.
416
-
417
- **Key Features:**
418
- * **Time Trend Analysis**: Displays the total number of unique crashes for each year, showing long-term patterns.
419
- * **Weather Filter**: Users can filter the data by weather conditions (e.g., "Rainy", "Sunny") to analyze how weather impacts crash trends.
420
- * **Interactive Tooltips**: Hovering over data points reveals the exact crash count for each year, providing detailed insights.
421
-
422
- **Color Scheme and Design:**
423
- * **Line and Markers**: A smooth line connects data points, with prominent markers for each year to highlight trends clearly.
424
- * **Dynamic Title**: The chart updates its title to reflect the selected weather condition or "All Conditions" for the overall trend.
425
-
426
- **Insights:**
427
- This chart helps uncover:
428
- * Annual fluctuations in crash incidents.
429
- * Correlations between weather conditions and crash frequencies.
430
- * Historical patterns that can guide future safety measures and urban planning decisions
431
- """)
432
-
433
-
434
  with tab4:
435
- # Dropdown for Unit Type selection
436
  unit_type_pairs = set()
437
  for _, row in df[['Unittype_One', 'Unittype_Two']].dropna().iterrows():
438
  if row['Unittype_One'] != 'Driverless' or row['Unittype_Two'] != 'Driverless':
@@ -441,21 +295,10 @@ def main():
441
  unit_type_pairs = sorted(list(unit_type_pairs))
442
  unit_type = st.selectbox("Select Unit Type Pair", options=['Total'] + unit_type_pairs)
443
 
444
- # Create 5th Visualization: Injuries and fatalities chart
445
  injuries_fatalities_chart = create_injuries_fatalities_chart(df, unit_type)
446
  st.altair_chart(injuries_fatalities_chart, use_container_width=True)
447
- st.markdown("""
448
- This line chart shows the **total number of injuries and fatalities by month for the selected unit type pair**. The blue line represents total injuries, while the red line represents total fatalities. Observing the trends over the months can help identify any seasonal patterns or peaks in traffic incidents involving specific unit types.
449
-
450
- - **Total Injuries**: The blue line indicates how injuries vary over different months, highlighting any particular spikes or declines.
451
- - **Total Fatalities**: The red line shows the trend for fatalities, which is generally much lower compared to injuries.
452
- - **Unit Types**: The dropdown selection allows users to filter the data by specific unit type pairs (e.g., Driver vs Pedestrian) or view the overall trend across all types.
453
-
454
- This visualization aims to provide an intuitive understanding of how injuries and fatalities are distributed across the year, helping stakeholders develop targeted safety measures.
455
- """)
456
-
457
  with tab5:
458
- # Dropdown for category selection
459
  categories = [
460
  'Collisionmanner',
461
  'Lightcondition',
@@ -465,28 +308,12 @@ def main():
465
  'Gender_Drv1',
466
  ]
467
  selected_category = st.selectbox("Select Category:", categories)
468
-
469
- # Dropdown for year selection
470
  years = ['All Years'] + sorted(df['Year'].dropna().unique().astype(int).tolist())
471
  selected_year = st.selectbox("Select Year:", years)
472
-
473
- # Generate and display the distribution chart
474
  st.markdown(f"### Distribution of Incidents by {selected_category}")
475
  distribution_chart = create_category_distribution_chart(df, selected_category, selected_year)
476
  st.plotly_chart(distribution_chart, use_container_width=True)
477
 
478
- st.markdown("""
479
- ## Distribution by Category
480
- This visualization explores the distribution of traffic incidents across various categories, such as Collision Manner, Weather, Surface Condition, Alcohol Use, and Driver Gender. Each bar represents a specific category value (e.g., "Male" or "Female" for Gender), and the bars are divided into segments based on Injury Severity (e.g., Minor, Moderate, Serious, Fatal).
481
-
482
- **Key features include:**
483
- * Interactive Filters: Select a category and filter by year to analyze trends over time.
484
- * Insightful Tooltips: Hover over each segment to view the exact count and percentage of incidents for a given severity level.
485
- * Comparative Analysis: Quickly identify how different conditions or behaviors correlate with injury severity.
486
-
487
- This chart provides actionable insights into factors contributing to traffic incidents and their outcomes, helping stakeholders target interventions and improve road safety.
488
- """)
489
-
490
-
491
  if __name__ == "__main__":
492
- main()
 
5
  import folium
6
  from folium.plugins import HeatMap, MarkerCluster
7
  from streamlit_folium import st_folium
8
+ from streamlit_plotly_events import plotly_events
9
 
10
  @st.cache_data
11
  def load_and_preprocess_data(file_path):
 
24
  for col in numeric:
25
  df[col].fillna(df[col].median(), inplace=True)
26
 
27
+ categorical = [
28
+ 'Gender_Drv1', 'Violation1_Drv1', 'AlcoholUse_Drv1', 'DrugUse_Drv1',
29
+ 'Gender_Drv2', 'Violation1_Drv2', 'AlcoholUse_Drv2', 'DrugUse_Drv2',
30
+ 'Unittype_Two', 'Traveldirection_Two', 'Unitaction_Two', 'CrossStreet'
31
+ ]
32
  for col in categorical:
33
  df[col].fillna('Unknown', inplace=True)
34
 
 
47
  df['Age_Group_Drv1'] = pd.cut(df['Age_Drv1'], bins=bins, labels=labels)
48
  df['Age_Group_Drv2'] = pd.cut(df['Age_Drv2'], bins=bins, labels=labels)
49
 
50
+ if 'Weather' not in df.columns:
51
+ df['Weather'] = 'Unknown'
52
+
53
  return df
54
 
55
+ def create_violation_distribution_chart(df, selected_age='All Ages'):
56
+ # Filter by age group if needed
57
+ if selected_age != 'All Ages':
58
+ df = df[(df['Age_Group_Drv1'] == selected_age) | (df['Age_Group_Drv2'] == selected_age)]
59
+
60
+ # Combine violations
61
+ violations = pd.concat([
62
+ df['Violation1_Drv1'].value_counts(),
63
+ df['Violation1_Drv2'].value_counts()
64
+ ]).groupby(level=0).sum().reset_index()
65
+ violations.columns = ['Violation', 'Count']
66
+
 
 
 
 
67
  fig = px.bar(
68
  violations,
69
  x='Violation',
70
+ y='Count',
71
+ title=f'Number of Incidents per Violation Type - {selected_age}',
72
+ labels={'Count': 'Number of Incidents', 'Violation': 'Violation Type'},
 
73
  height=600
74
  )
75
+ fig.update_layout(clickmode='event+select', xaxis_tickangle=-45)
76
 
77
+ return fig, violations
78
+
79
+ def create_severity_distribution_for_violation(df, violation):
80
+ # Filter for the selected violation
81
+ filtered_df = df[(df['Violation1_Drv1'] == violation) | (df['Violation1_Drv2'] == violation)]
82
+ severity_count = filtered_df['Injuryseverity'].value_counts().reset_index()
83
+ severity_count.columns = ['Severity', 'Count']
84
+
85
+ fig = px.bar(
86
+ severity_count,
87
+ x='Severity',
88
+ y='Count',
89
+ title=f'Severity Distribution for {violation}',
90
+ labels={'Count': 'Number of Incidents', 'Severity': 'Injury Severity'},
91
+ height=400
92
  )
93
+ fig.update_layout(xaxis_tickangle=-45)
94
 
95
  return fig
96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  @st.cache_data
98
  def create_map(df, selected_year):
99
  filtered_df = df[df['Year'] == selected_year]
 
120
  return m
121
 
122
  def create_injuries_fatalities_chart(crash_data, unit_type):
123
+ # This function remains the same as your original code for that visualization
124
+ # ...
 
 
 
125
  crash_data = crash_data[['DateTime', 'Totalinjuries', 'Totalfatalities', 'Unittype_One', 'Unittype_Two']].dropna()
126
 
 
127
  crash_data['DateTime'] = pd.to_datetime(crash_data['DateTime'], errors='coerce')
128
  crash_data['Month'] = crash_data['DateTime'].dt.month_name()
129
 
130
+ month_order = ['January', 'February', 'March', 'April', 'May', 'June',
131
+ 'July', 'August', 'September', 'October', 'November', 'December']
132
  crash_data['Month'] = pd.Categorical(crash_data['Month'], categories=month_order, ordered=True)
133
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  if unit_type == 'Total':
135
  filtered_data = crash_data
136
  else:
 
138
  filtered_data = crash_data[((crash_data['Unittype_One'] == unit_one) & (crash_data['Unittype_Two'] == unit_two)) |
139
  ((crash_data['Unittype_One'] == unit_two) & (crash_data['Unittype_Two'] == unit_one))]
140
 
 
141
  monthly_sum = filtered_data.groupby('Month').agg({'Totalinjuries': 'sum', 'Totalfatalities': 'sum'}).reset_index()
142
 
 
143
  injuries = monthly_sum[['Month', 'Totalinjuries']].rename(columns={'Totalinjuries': 'Value'})
144
  injuries['Measure'] = 'Total Injuries'
145
 
 
148
 
149
  combined_data = pd.concat([injuries, fatalities])
150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  line_chart = alt.Chart(combined_data).mark_line(point=True).encode(
152
  x=alt.X('Month:N', sort=month_order, title='Month'),
153
  y=alt.Y('Value:Q', title='Total Injuries & Fatalities'),
 
159
  height=400
160
  )
161
 
 
 
 
 
 
 
 
 
 
 
 
 
162
  return line_chart
163
 
164
  def create_crash_trend_chart(df, weather=None):
165
  if weather and weather != 'All Conditions':
166
  df = df[df['Weather'] == weather]
167
 
 
168
  trend_data = df.groupby('Year')['Incidentid'].nunique().reset_index()
169
  trend_data.columns = ['Year', 'Crash Count']
170
 
 
171
  fig = px.line(
172
  trend_data,
173
  x='Year',
 
184
  return fig
185
 
186
  def create_category_distribution_chart(df, selected_category, selected_year):
 
187
  if selected_year != 'All Years':
188
  df = df[df['Year'] == int(selected_year)]
189
 
 
190
  grouped_data = df.groupby([selected_category, 'Injuryseverity']).size().reset_index(name='Count')
 
 
191
  total_counts = grouped_data.groupby(selected_category)['Count'].transform('sum')
192
  grouped_data['Percentage'] = (grouped_data['Count'] / total_counts * 100).round(2)
193
 
 
194
  fig = px.bar(
195
  grouped_data,
196
  x=selected_category,
 
202
  height=600,
203
  )
204
 
 
205
  fig.update_traces(texttemplate='%{text}%', textposition='inside')
206
  fig.update_layout(
207
  barmode='stack',
 
223
  - Nirmal Attarde
224
  - Maanas Sandeep Agrawa
225
  """)
 
226
 
227
  st.markdown("""
228
  ### Introduction to the Traffic Accident Dataset
229
+ This dataset contains detailed information about traffic accidents in the city of **Tempe**. It includes various attributes of the accidents, such as the severity of injuries, the demographics of the drivers involved, the locations of the incidents, and the conditions at the time of the accidents.
 
 
230
  """)
231
+
 
 
232
  # Load data
233
  df = load_and_preprocess_data('1.08_Crash_Data_Report_(detail).csv')
234
 
 
 
 
 
235
  tab1, tab2, tab3, tab4, tab5 = st.tabs(["Crash Statistics", "Crash Map", "Crash Trend", "Crash Injuries/Fatalities","Distribution by Category"])
236
 
237
  with tab1:
 
239
  age_groups = ['All Ages', '16-25', '26-35', '36-45', '46-55', '56-65', '65+']
240
  selected_age = st.selectbox('Select Age Group:', age_groups)
241
 
242
+ # Create and display the main violation distribution chart
243
+ fig, violations = create_violation_distribution_chart(df, selected_age)
244
+
245
+ # Use plotly_events to capture click
246
+ selected_points = plotly_events(fig, click_event=True, hover_event=False, select_event=True)
247
  st.plotly_chart(fig, use_container_width=True)
248
 
249
+ # If user clicked on a bar, selected_points will contain data about that click
250
+ if selected_points:
251
+ clicked_violation = violations.iloc[selected_points[0]['pointIndex']]['Violation']
252
+ severity_fig = create_severity_distribution_for_violation(df, clicked_violation)
253
+ st.plotly_chart(severity_fig, use_container_width=True)
254
+
255
+ # Display total incidents info
256
  if selected_age == 'All Ages':
257
  total_incidents = len(df)
258
  else:
 
261
  (df['Age_Group_Drv2'] == selected_age)
262
  ])
263
 
264
+ st.markdown(f"### Total Incidents for {selected_age}")
265
+ st.markdown(f"**{total_incidents:,}** incidents")
266
+
 
 
 
 
 
 
 
 
 
267
  with tab2:
 
268
  years = sorted(df['Year'].unique())
269
  selected_year = st.selectbox('Select Year:', years)
270
 
 
271
  st.markdown("### Crash Location Map")
272
+ m = create_map(df, selected_year)
273
+ st_folium(
274
+ m,
275
+ width=800,
276
+ height=600,
277
+ key=f"map_{selected_year}",
278
+ returned_objects=["null_drawing"]
279
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
280
 
 
 
281
  with tab3:
 
282
  weather = ['All Conditions'] + sorted(df['Weather'].unique())
283
  selected_weather = st.selectbox('Select Weather Condition:', weather)
284
 
 
285
  st.markdown("### Crash Trend Over Time")
286
  trend_fig = create_crash_trend_chart(df, selected_weather)
287
  st.plotly_chart(trend_fig, use_container_width=True)
288
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
289
  with tab4:
 
290
  unit_type_pairs = set()
291
  for _, row in df[['Unittype_One', 'Unittype_Two']].dropna().iterrows():
292
  if row['Unittype_One'] != 'Driverless' or row['Unittype_Two'] != 'Driverless':
 
295
  unit_type_pairs = sorted(list(unit_type_pairs))
296
  unit_type = st.selectbox("Select Unit Type Pair", options=['Total'] + unit_type_pairs)
297
 
 
298
  injuries_fatalities_chart = create_injuries_fatalities_chart(df, unit_type)
299
  st.altair_chart(injuries_fatalities_chart, use_container_width=True)
300
+
 
 
 
 
 
 
 
 
 
301
  with tab5:
 
302
  categories = [
303
  'Collisionmanner',
304
  'Lightcondition',
 
308
  'Gender_Drv1',
309
  ]
310
  selected_category = st.selectbox("Select Category:", categories)
 
 
311
  years = ['All Years'] + sorted(df['Year'].dropna().unique().astype(int).tolist())
312
  selected_year = st.selectbox("Select Year:", years)
313
+
 
314
  st.markdown(f"### Distribution of Incidents by {selected_category}")
315
  distribution_chart = create_category_distribution_chart(df, selected_category, selected_year)
316
  st.plotly_chart(distribution_chart, use_container_width=True)
317
 
 
 
 
 
 
 
 
 
 
 
 
 
 
318
  if __name__ == "__main__":
319
+ main()