idk3
Browse files
app.py
CHANGED
|
@@ -45,15 +45,15 @@ NUMERIC_COLS = ['NUMBER OF PERSONS INJURED', 'NUMBER OF PERSONS KILLED',
|
|
| 45 |
'NUMBER OF CYCLIST INJURED', 'NUMBER OF CYCLIST KILLED',
|
| 46 |
'NUMBER OF MOTORIST INJURED', 'NUMBER OF MOTORIST KILLED']
|
| 47 |
|
| 48 |
-
# Get unique values for dropdowns
|
| 49 |
-
boroughs = ['All'] + sorted([b for b in df['BOROUGH'].dropna().unique() if str(b) != 'nan'])
|
| 50 |
-
years = ['All'] +
|
| 51 |
-
months = ['All'] + sorted(df['CRASH_MONTH'].unique())
|
| 52 |
vehicles = ['All'] + sorted(VALID_VEHICLE_TYPES + ['OTHER'])
|
| 53 |
-
person_types = ['All'] + sorted([s for s in df['PERSON_TYPE'].dropna().unique() if str(s) != 'nan'])
|
| 54 |
-
injury_types = ['All'] + sorted([t for t in df['PERSON_INJURY'].dropna().unique() if str(t) != 'nan'])
|
| 55 |
-
genders = ['All'] + sorted([g for g in df['PERSON_SEX'].dropna().unique() if str(g) != 'nan'])
|
| 56 |
-
safety_equip = ['All'] + sorted([s for s in df['SAFETY_EQUIPMENT'].dropna().unique()
|
| 57 |
if str(s) not in ['nan', 'NOT APPLICABLE', 'NOT REPORTED', 'DOES NOT APPLY']][:15])
|
| 58 |
|
| 59 |
|
|
@@ -160,12 +160,15 @@ def generate_report(borough, year, month, dow_list, hour_min, hour_max, vehicle,
|
|
| 160 |
if borough != 'All':
|
| 161 |
filtered_df = filtered_df[filtered_df['BOROUGH'] == borough]
|
| 162 |
if year != 'All':
|
| 163 |
-
filtered_df = filtered_df[filtered_df['CRASH_YEAR'] == year]
|
| 164 |
if month != 'All':
|
| 165 |
-
filtered_df = filtered_df[filtered_df['CRASH_MONTH'] == month]
|
| 166 |
-
if dow_list:
|
| 167 |
-
|
| 168 |
-
|
|
|
|
|
|
|
|
|
|
| 169 |
if vehicle != 'All':
|
| 170 |
filtered_df = filtered_df[filtered_df['VEHICLE TYPE CODE 1'] == vehicle]
|
| 171 |
if person_type != 'All':
|
|
@@ -185,19 +188,19 @@ def generate_report(borough, year, month, dow_list, hour_min, hour_max, vehicle,
|
|
| 185 |
|
| 186 |
# Summary Statistics
|
| 187 |
total_records = len(filtered_df)
|
| 188 |
-
total_injuries = filtered_df['NUMBER OF PERSONS INJURED'].sum()
|
| 189 |
-
total_fatalities = filtered_df['NUMBER OF PERSONS KILLED'].sum()
|
| 190 |
injury_rate = (total_injuries / total_records * 100) if total_records > 0 else 0
|
| 191 |
|
| 192 |
summary_text = f"""
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
"""
|
| 202 |
|
| 203 |
# Chart 1: Trend Analysis
|
|
@@ -223,10 +226,10 @@ def generate_report(borough, year, month, dow_list, hour_min, hour_max, vehicle,
|
|
| 223 |
|
| 224 |
# Chart 3: Categorical Analysis
|
| 225 |
if c3_y == 'count':
|
| 226 |
-
chart3_data = filtered_df[c3_x].value_counts().head(c3_top)
|
| 227 |
y_label = 'Number of Records'
|
| 228 |
else:
|
| 229 |
-
chart3_data = filtered_df.groupby(c3_x)[c3_y].sum().sort_values(ascending=False).head(c3_top)
|
| 230 |
y_label = c3_y
|
| 231 |
|
| 232 |
fig3 = px.bar(x=chart3_data.index, y=chart3_data.values,
|
|
@@ -273,6 +276,7 @@ def generate_report(borough, year, month, dow_list, hour_min, hour_max, vehicle,
|
|
| 273 |
fig6 = go.Figure()
|
| 274 |
fig6.add_annotation(text="No secondary factors", xref="paper", yref="paper",
|
| 275 |
x=0.5, y=0.5, showarrow=False)
|
|
|
|
| 276 |
|
| 277 |
# Chart 7: Injury Rate Comparison
|
| 278 |
compare_data = filtered_df.groupby(compare_cat).agg({
|
|
@@ -308,6 +312,7 @@ def generate_report(borough, year, month, dow_list, hour_min, hour_max, vehicle,
|
|
| 308 |
title='Day × Hour Heatmap', template='plotly_white', height=500)
|
| 309 |
else:
|
| 310 |
fig8 = go.Figure()
|
|
|
|
| 311 |
|
| 312 |
return summary_text, fig1, fig2, fig3, fig4, fig5, fig6, fig7, fig8
|
| 313 |
|
|
@@ -331,7 +336,7 @@ with gr.Blocks(title="NYC Motor Vehicle Crashes Dashboard") as demo:
|
|
| 331 |
month = gr.Dropdown(choices=months, value='All', label="Month")
|
| 332 |
dow = gr.CheckboxGroup(choices=[('Mon', 0), ('Tue', 1), ('Wed', 2), ('Thu', 3),
|
| 333 |
('Fri', 4), ('Sat', 5), ('Sun', 6)],
|
| 334 |
-
label="Day of Week")
|
| 335 |
hour_min = gr.Slider(minimum=0, maximum=23, value=0, step=1, label="Hour Min")
|
| 336 |
hour_max = gr.Slider(minimum=0, maximum=23, value=23, step=1, label="Hour Max")
|
| 337 |
vehicle = gr.Dropdown(choices=vehicles, value='All', label="Vehicle Type 1")
|
|
|
|
| 45 |
'NUMBER OF CYCLIST INJURED', 'NUMBER OF CYCLIST KILLED',
|
| 46 |
'NUMBER OF MOTORIST INJURED', 'NUMBER OF MOTORIST KILLED']
|
| 47 |
|
| 48 |
+
# Get unique values for dropdowns - convert to native Python types
|
| 49 |
+
boroughs = ['All'] + sorted([str(b) for b in df['BOROUGH'].dropna().unique() if str(b) != 'nan'])
|
| 50 |
+
years = ['All'] + [int(y) for y in sorted(df['CRASH_YEAR'].unique())]
|
| 51 |
+
months = ['All'] + [int(m) for m in sorted(df['CRASH_MONTH'].unique())]
|
| 52 |
vehicles = ['All'] + sorted(VALID_VEHICLE_TYPES + ['OTHER'])
|
| 53 |
+
person_types = ['All'] + sorted([str(s) for s in df['PERSON_TYPE'].dropna().unique() if str(s) != 'nan'])
|
| 54 |
+
injury_types = ['All'] + sorted([str(t) for t in df['PERSON_INJURY'].dropna().unique() if str(t) != 'nan'])
|
| 55 |
+
genders = ['All'] + sorted([str(g) for g in df['PERSON_SEX'].dropna().unique() if str(g) != 'nan'])
|
| 56 |
+
safety_equip = ['All'] + sorted([str(s) for s in df['SAFETY_EQUIPMENT'].dropna().unique()
|
| 57 |
if str(s) not in ['nan', 'NOT APPLICABLE', 'NOT REPORTED', 'DOES NOT APPLY']][:15])
|
| 58 |
|
| 59 |
|
|
|
|
| 160 |
if borough != 'All':
|
| 161 |
filtered_df = filtered_df[filtered_df['BOROUGH'] == borough]
|
| 162 |
if year != 'All':
|
| 163 |
+
filtered_df = filtered_df[filtered_df['CRASH_YEAR'] == int(year)]
|
| 164 |
if month != 'All':
|
| 165 |
+
filtered_df = filtered_df[filtered_df['CRASH_MONTH'] == int(month)]
|
| 166 |
+
if dow_list and len(dow_list) > 0:
|
| 167 |
+
# Convert to list of ints if needed
|
| 168 |
+
dow_ints = [int(d) if isinstance(d, str) else d for d in dow_list]
|
| 169 |
+
filtered_df = filtered_df[filtered_df['CRASH_DAYOFWEEK'].isin(dow_ints)]
|
| 170 |
+
filtered_df = filtered_df[(filtered_df['CRASH_HOUR'] >= int(hour_min)) &
|
| 171 |
+
(filtered_df['CRASH_HOUR'] <= int(hour_max))]
|
| 172 |
if vehicle != 'All':
|
| 173 |
filtered_df = filtered_df[filtered_df['VEHICLE TYPE CODE 1'] == vehicle]
|
| 174 |
if person_type != 'All':
|
|
|
|
| 188 |
|
| 189 |
# Summary Statistics
|
| 190 |
total_records = len(filtered_df)
|
| 191 |
+
total_injuries = int(filtered_df['NUMBER OF PERSONS INJURED'].sum())
|
| 192 |
+
total_fatalities = int(filtered_df['NUMBER OF PERSONS KILLED'].sum())
|
| 193 |
injury_rate = (total_injuries / total_records * 100) if total_records > 0 else 0
|
| 194 |
|
| 195 |
summary_text = f"""
|
| 196 |
+
📊 **Summary Statistics**
|
| 197 |
+
- **Total Records:** {total_records:,}
|
| 198 |
+
- **Total Injuries:** {total_injuries:,} ({injury_rate:.2f}%)
|
| 199 |
+
- **Total Fatalities:** {total_fatalities:,}
|
| 200 |
+
- **Pedestrian Injuries:** {int(filtered_df['NUMBER OF PEDESTRIANS INJURED'].sum()):,}
|
| 201 |
+
- **Cyclist Injuries:** {int(filtered_df['NUMBER OF CYCLIST INJURED'].sum()):,}
|
| 202 |
+
- **Motorist Injuries:** {int(filtered_df['NUMBER OF MOTORIST INJURED'].sum()):,}
|
| 203 |
+
- **Unique Crashes:** {len(filtered_df['COLLISION_ID'].unique()):,}
|
| 204 |
"""
|
| 205 |
|
| 206 |
# Chart 1: Trend Analysis
|
|
|
|
| 226 |
|
| 227 |
# Chart 3: Categorical Analysis
|
| 228 |
if c3_y == 'count':
|
| 229 |
+
chart3_data = filtered_df[c3_x].value_counts().head(int(c3_top))
|
| 230 |
y_label = 'Number of Records'
|
| 231 |
else:
|
| 232 |
+
chart3_data = filtered_df.groupby(c3_x)[c3_y].sum().sort_values(ascending=False).head(int(c3_top))
|
| 233 |
y_label = c3_y
|
| 234 |
|
| 235 |
fig3 = px.bar(x=chart3_data.index, y=chart3_data.values,
|
|
|
|
| 276 |
fig6 = go.Figure()
|
| 277 |
fig6.add_annotation(text="No secondary factors", xref="paper", yref="paper",
|
| 278 |
x=0.5, y=0.5, showarrow=False)
|
| 279 |
+
fig6.update_layout(height=400)
|
| 280 |
|
| 281 |
# Chart 7: Injury Rate Comparison
|
| 282 |
compare_data = filtered_df.groupby(compare_cat).agg({
|
|
|
|
| 312 |
title='Day × Hour Heatmap', template='plotly_white', height=500)
|
| 313 |
else:
|
| 314 |
fig8 = go.Figure()
|
| 315 |
+
fig8.update_layout(height=500)
|
| 316 |
|
| 317 |
return summary_text, fig1, fig2, fig3, fig4, fig5, fig6, fig7, fig8
|
| 318 |
|
|
|
|
| 336 |
month = gr.Dropdown(choices=months, value='All', label="Month")
|
| 337 |
dow = gr.CheckboxGroup(choices=[('Mon', 0), ('Tue', 1), ('Wed', 2), ('Thu', 3),
|
| 338 |
('Fri', 4), ('Sat', 5), ('Sun', 6)],
|
| 339 |
+
label="Day of Week", type="value")
|
| 340 |
hour_min = gr.Slider(minimum=0, maximum=23, value=0, step=1, label="Hour Min")
|
| 341 |
hour_max = gr.Slider(minimum=0, maximum=23, value=23, step=1, label="Hour Max")
|
| 342 |
vehicle = gr.Dropdown(choices=vehicles, value='All', label="Vehicle Type 1")
|