map 9d7aff
Browse files
app.py
CHANGED
|
@@ -3,7 +3,7 @@ import plotly.express as px
|
|
| 3 |
import plotly.graph_objects as go
|
| 4 |
import pandas as pd
|
| 5 |
import warnings
|
| 6 |
-
|
| 7 |
|
| 8 |
warnings.filterwarnings('ignore', category=DeprecationWarning)
|
| 9 |
|
|
@@ -13,7 +13,7 @@ df = pd.read_parquet('nyc_crashes_integrated_clean.parquet')
|
|
| 13 |
df['CRASH DATE'] = pd.to_datetime(df['CRASH DATE'])
|
| 14 |
print(f"Data loaded: {len(df):,} records")
|
| 15 |
|
| 16 |
-
# Clean vehicle types
|
| 17 |
VALID_VEHICLE_TYPES = [
|
| 18 |
'SEDAN', 'STATION WAGON/SPORT UTILITY VEHICLE', 'TAXI', 'PICK-UP TRUCK',
|
| 19 |
'BOX TRUCK', 'VAN', 'MOTORCYCLE', 'SCOOTER', 'MOPED', 'E-SCOOTER', 'E-BIKE',
|
|
@@ -24,7 +24,6 @@ VALID_VEHICLE_TYPES = [
|
|
| 24 |
'SPORT UTILITY / STATION WAGON', 'LIMOUSINE', 'UNKNOWN'
|
| 25 |
]
|
| 26 |
|
| 27 |
-
# Replace invalid vehicle types with 'OTHER'
|
| 28 |
df['VEHICLE TYPE CODE 1'] = df['VEHICLE TYPE CODE 1'].apply(
|
| 29 |
lambda x: x if x in VALID_VEHICLE_TYPES else 'OTHER'
|
| 30 |
)
|
|
@@ -34,7 +33,7 @@ df['VEHICLE TYPE CODE 2'] = df['VEHICLE TYPE CODE 2'].apply(
|
|
| 34 |
|
| 35 |
print(f"Cleaned vehicle types. Valid categories: {len(df['VEHICLE TYPE CODE 1'].unique())}")
|
| 36 |
|
| 37 |
-
# Define
|
| 38 |
TEMPORAL_COLS = ['CRASH_YEAR', 'CRASH_MONTH', 'CRASH_DAYOFWEEK', 'CRASH_HOUR']
|
| 39 |
CATEGORICAL_COLS = ['BOROUGH', 'PERSON_TYPE', 'PERSON_INJURY',
|
| 40 |
'CONTRIBUTING FACTOR VEHICLE 1', 'VEHICLE TYPE CODE 1',
|
|
@@ -45,42 +44,41 @@ NUMERIC_COLS = ['NUMBER OF PERSONS INJURED', 'NUMBER OF PERSONS KILLED',
|
|
| 45 |
'NUMBER OF CYCLIST INJURED', 'NUMBER OF CYCLIST KILLED',
|
| 46 |
'NUMBER OF MOTORIST INJURED', 'NUMBER OF MOTORIST KILLED']
|
| 47 |
|
| 48 |
-
#
|
| 49 |
-
boroughs = ['All'] + sorted([
|
| 50 |
-
years = ['All'] + [int(y) for y in
|
| 51 |
-
months = ['All'] +
|
| 52 |
vehicles = ['All'] + sorted(VALID_VEHICLE_TYPES + ['OTHER'])
|
| 53 |
-
person_types = ['All'] + sorted([
|
| 54 |
-
injury_types = ['All'] + sorted([
|
| 55 |
-
genders = ['All'
|
| 56 |
-
safety_equip = ['All'] + sorted([
|
| 57 |
-
|
| 58 |
|
| 59 |
|
| 60 |
def smart_search_parser(search_text):
|
| 61 |
-
"""Parse natural language search query
|
| 62 |
if not search_text:
|
| 63 |
return None
|
| 64 |
-
|
| 65 |
search_lower = search_text.lower()
|
| 66 |
filters = {}
|
| 67 |
applied_filters = []
|
| 68 |
-
|
| 69 |
# Borough detection
|
| 70 |
-
|
| 71 |
-
for b in
|
| 72 |
if b.lower() in search_lower:
|
| 73 |
filters['borough'] = b
|
| 74 |
applied_filters.append(f"Borough: {b}")
|
| 75 |
break
|
| 76 |
-
|
| 77 |
# Year detection
|
| 78 |
-
import re
|
| 79 |
years_found = re.findall(r'\b(20[1-2][0-9])\b', search_text)
|
| 80 |
if years_found:
|
| 81 |
filters['year'] = int(years_found[0])
|
| 82 |
applied_filters.append(f"Year: {years_found[0]}")
|
| 83 |
-
|
| 84 |
# Month detection
|
| 85 |
months_map = {'january': 1, 'february': 2, 'march': 3, 'april': 4, 'may': 5, 'june': 6,
|
| 86 |
'july': 7, 'august': 8, 'september': 9, 'october': 10, 'november': 11, 'december': 12,
|
|
@@ -91,17 +89,18 @@ def smart_search_parser(search_text):
|
|
| 91 |
filters['month'] = m_num
|
| 92 |
applied_filters.append(f"Month: {m_name.capitalize()}")
|
| 93 |
break
|
| 94 |
-
|
| 95 |
# Day of week detection
|
| 96 |
days_map = {'monday': [0], 'tuesday': [1], 'wednesday': [2], 'thursday': [3],
|
| 97 |
'friday': [4], 'saturday': [5], 'sunday': [6],
|
|
|
|
| 98 |
'weekday': [0, 1, 2, 3, 4], 'weekend': [5, 6]}
|
| 99 |
for day_name, day_nums in days_map.items():
|
| 100 |
if day_name in search_lower:
|
| 101 |
filters['dow'] = day_nums
|
| 102 |
applied_filters.append(f"Day: {day_name.capitalize()}")
|
| 103 |
break
|
| 104 |
-
|
| 105 |
# Time of day detection
|
| 106 |
if 'morning' in search_lower:
|
| 107 |
filters['hour_range'] = (6, 10)
|
|
@@ -115,19 +114,23 @@ def smart_search_parser(search_text):
|
|
| 115 |
elif 'night' in search_lower:
|
| 116 |
filters['hour_range'] = (20, 23)
|
| 117 |
applied_filters.append("Time: Night (20-23)")
|
| 118 |
-
|
|
|
|
|
|
|
|
|
|
| 119 |
# Vehicle type detection
|
| 120 |
vehicle_keywords = {
|
| 121 |
'sedan': 'SEDAN', 'suv': 'STATION WAGON/SPORT UTILITY VEHICLE',
|
| 122 |
'taxi': 'TAXI', 'truck': 'PICK-UP TRUCK', 'bus': 'BUS',
|
| 123 |
-
'motorcycle': 'MOTORCYCLE', 'bike': 'BICYCLE', 'scooter': 'SCOOTER'
|
|
|
|
| 124 |
}
|
| 125 |
for keyword, vehicle_type in vehicle_keywords.items():
|
| 126 |
if keyword in search_lower:
|
| 127 |
filters['vehicle'] = vehicle_type
|
| 128 |
applied_filters.append(f"Vehicle: {keyword.capitalize()}")
|
| 129 |
break
|
| 130 |
-
|
| 131 |
# Person type detection
|
| 132 |
if 'pedestrian' in search_lower:
|
| 133 |
filters['person_type'] = 'PEDESTRIAN'
|
|
@@ -138,7 +141,7 @@ def smart_search_parser(search_text):
|
|
| 138 |
elif 'occupant' in search_lower or 'driver' in search_lower:
|
| 139 |
filters['person_type'] = 'OCCUPANT'
|
| 140 |
applied_filters.append("Person: Occupant")
|
| 141 |
-
|
| 142 |
# Injury type detection
|
| 143 |
if 'fatal' in search_lower or 'death' in search_lower or 'killed' in search_lower:
|
| 144 |
filters['injury'] = 'KILLED'
|
|
@@ -146,29 +149,35 @@ def smart_search_parser(search_text):
|
|
| 146 |
elif 'injured' in search_lower or 'injury' in search_lower:
|
| 147 |
filters['injury'] = 'INJURED'
|
| 148 |
applied_filters.append("Injury: Injured")
|
| 149 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
return filters, applied_filters
|
| 151 |
|
| 152 |
|
| 153 |
-
def generate_report(borough, year, month,
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
"""Generate
|
| 157 |
-
|
| 158 |
# Filter data
|
| 159 |
filtered_df = df.copy()
|
| 160 |
if borough != 'All':
|
| 161 |
filtered_df = filtered_df[filtered_df['BOROUGH'] == borough]
|
| 162 |
if year != 'All':
|
| 163 |
-
filtered_df = filtered_df[filtered_df['CRASH_YEAR'] ==
|
| 164 |
if month != 'All':
|
| 165 |
-
filtered_df = filtered_df[filtered_df['CRASH_MONTH'] ==
|
| 166 |
-
if
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
filtered_df = filtered_df[(filtered_df['CRASH_HOUR'] >= int(hour_min)) &
|
| 171 |
-
(filtered_df['CRASH_HOUR'] <= int(hour_max))]
|
| 172 |
if vehicle != 'All':
|
| 173 |
filtered_df = filtered_df[filtered_df['VEHICLE TYPE CODE 1'] == vehicle]
|
| 174 |
if person_type != 'All':
|
|
@@ -179,30 +188,35 @@ def generate_report(borough, year, month, dow_list, hour_min, hour_max, vehicle,
|
|
| 179 |
filtered_df = filtered_df[filtered_df['PERSON_SEX'] == gender]
|
| 180 |
if safety != 'All':
|
| 181 |
filtered_df = filtered_df[filtered_df['SAFETY_EQUIPMENT'] == safety]
|
| 182 |
-
|
| 183 |
if len(filtered_df) == 0:
|
| 184 |
empty_fig = go.Figure()
|
| 185 |
empty_fig.add_annotation(text="No data found. Adjust filters.", xref="paper", yref="paper",
|
| 186 |
-
|
| 187 |
return "No data found", empty_fig, empty_fig, empty_fig, empty_fig, empty_fig, empty_fig, empty_fig, empty_fig, empty_fig
|
| 188 |
-
|
| 189 |
# Summary Statistics
|
| 190 |
total_records = len(filtered_df)
|
| 191 |
total_injuries = int(filtered_df['NUMBER OF PERSONS INJURED'].sum())
|
| 192 |
total_fatalities = int(filtered_df['NUMBER OF PERSONS KILLED'].sum())
|
| 193 |
injury_rate = (total_injuries / total_records * 100) if total_records > 0 else 0
|
| 194 |
-
|
|
|
|
| 195 |
summary_text = f"""
|
| 196 |
-
π
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 204 |
"""
|
| 205 |
-
|
| 206 |
# Chart 1: Trend Analysis
|
| 207 |
if c1_y == 'count':
|
| 208 |
chart1_data = filtered_df.groupby(c1_x).size().reset_index(name='count')
|
|
@@ -210,20 +224,20 @@ def generate_report(borough, year, month, dow_list, hour_min, hour_max, vehicle,
|
|
| 210 |
else:
|
| 211 |
chart1_data = filtered_df.groupby(c1_x)[c1_y].sum().reset_index()
|
| 212 |
y_label = c1_y
|
| 213 |
-
|
| 214 |
fig1 = px.line(chart1_data, x=c1_x, y=chart1_data.columns[1],
|
| 215 |
labels={chart1_data.columns[1]: y_label, c1_x: c1_x},
|
| 216 |
title='Trend Analysis')
|
| 217 |
fig1.update_traces(line_color='#3498db', line_width=3)
|
| 218 |
fig1.update_layout(template='plotly_white', height=400)
|
| 219 |
-
|
| 220 |
-
# Chart 2: Person Type Distribution
|
| 221 |
person_type_data = filtered_df['PERSON_TYPE'].value_counts()
|
| 222 |
fig2 = px.pie(values=person_type_data.values, names=person_type_data.index,
|
| 223 |
title='Person Type Distribution',
|
| 224 |
color_discrete_sequence=['#2ecc71', '#f39c12', '#e74c3c', '#3498db'])
|
| 225 |
fig2.update_layout(height=400)
|
| 226 |
-
|
| 227 |
# Chart 3: Categorical Analysis
|
| 228 |
if c3_y == 'count':
|
| 229 |
chart3_data = filtered_df[c3_x].value_counts().head(int(c3_top))
|
|
@@ -231,13 +245,13 @@ def generate_report(borough, year, month, dow_list, hour_min, hour_max, vehicle,
|
|
| 231 |
else:
|
| 232 |
chart3_data = filtered_df.groupby(c3_x)[c3_y].sum().sort_values(ascending=False).head(int(c3_top))
|
| 233 |
y_label = c3_y
|
| 234 |
-
|
| 235 |
fig3 = px.bar(x=chart3_data.index, y=chart3_data.values,
|
| 236 |
labels={'x': c3_x, 'y': y_label},
|
| 237 |
-
title='Categorical Analysis')
|
| 238 |
fig3.update_traces(marker_color='#3498db')
|
| 239 |
fig3.update_layout(template='plotly_white', height=400)
|
| 240 |
-
|
| 241 |
# Chart 4: Time Distribution
|
| 242 |
if c4_y == 'count':
|
| 243 |
chart4_data = filtered_df[c4_x].value_counts().sort_index()
|
|
@@ -245,39 +259,39 @@ def generate_report(borough, year, month, dow_list, hour_min, hour_max, vehicle,
|
|
| 245 |
else:
|
| 246 |
chart4_data = filtered_df.groupby(c4_x)[c4_y].sum().sort_index()
|
| 247 |
y_label = c4_y
|
| 248 |
-
|
| 249 |
fig4 = px.bar(x=chart4_data.index, y=chart4_data.values,
|
| 250 |
labels={'x': c4_x, 'y': y_label},
|
| 251 |
title='Time Distribution')
|
| 252 |
fig4.update_traces(marker_color='#e67e22')
|
| 253 |
fig4.update_layout(template='plotly_white', height=400)
|
| 254 |
-
|
| 255 |
# Chart 5: Contributing Factor 1
|
| 256 |
factor1_data = filtered_df['CONTRIBUTING FACTOR VEHICLE 1'].value_counts().head(15)
|
| 257 |
factor1_data = factor1_data[factor1_data.index != 'UNSPECIFIED']
|
| 258 |
-
|
| 259 |
fig5 = px.bar(x=factor1_data.index, y=factor1_data.values,
|
| 260 |
labels={'x': 'Contributing Factor', 'y': 'Number of Crashes'},
|
| 261 |
title='Top Contributing Factors (Vehicle 1)')
|
| 262 |
fig5.update_traces(marker_color='#e74c3c')
|
| 263 |
fig5.update_layout(template='plotly_white', height=400, xaxis={'tickangle': -45})
|
| 264 |
-
|
| 265 |
# Chart 6: Contributing Factor 2
|
| 266 |
factor2_data = filtered_df['CONTRIBUTING FACTOR VEHICLE 2'].value_counts().head(15)
|
| 267 |
factor2_data = factor2_data[~factor2_data.index.isin(['UNSPECIFIED', 'NO SECOND VEHICLE'])]
|
| 268 |
-
|
| 269 |
if len(factor2_data) > 0:
|
| 270 |
fig6 = px.bar(x=factor2_data.index, y=factor2_data.values,
|
| 271 |
-
|
| 272 |
-
|
| 273 |
fig6.update_traces(marker_color='#f39c12')
|
| 274 |
fig6.update_layout(template='plotly_white', height=400, xaxis={'tickangle': -45})
|
| 275 |
else:
|
| 276 |
fig6 = go.Figure()
|
| 277 |
fig6.add_annotation(text="No secondary factors", xref="paper", yref="paper",
|
| 278 |
-
|
| 279 |
-
fig6.update_layout(height=400)
|
| 280 |
-
|
| 281 |
# Chart 7: Injury Rate Comparison
|
| 282 |
compare_data = filtered_df.groupby(compare_cat).agg({
|
| 283 |
'COLLISION_ID': 'count',
|
|
@@ -288,16 +302,16 @@ def generate_report(borough, year, month, dow_list, hour_min, hour_max, vehicle,
|
|
| 288 |
compare_data['Injury_Rate'] = (compare_data['Total_Injuries'] / compare_data['Total_Records'] * 100)
|
| 289 |
compare_data['Fatality_Rate'] = (compare_data['Total_Fatalities'] / compare_data['Total_Records'] * 100)
|
| 290 |
compare_data = compare_data.sort_values('Injury_Rate', ascending=False).head(15)
|
| 291 |
-
|
| 292 |
fig7 = go.Figure()
|
| 293 |
fig7.add_trace(go.Bar(x=compare_data[compare_cat], y=compare_data['Injury_Rate'],
|
| 294 |
-
|
| 295 |
fig7.add_trace(go.Bar(x=compare_data[compare_cat], y=compare_data['Fatality_Rate'],
|
| 296 |
-
|
| 297 |
fig7.update_layout(barmode='group', template='plotly_white', height=400,
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
# Chart 8: Heatmap
|
| 302 |
heatmap_data = filtered_df.groupby(['CRASH_DAYOFWEEK', 'CRASH_HOUR']).size().reset_index(name='count')
|
| 303 |
if len(heatmap_data) > 0:
|
|
@@ -309,148 +323,200 @@ def generate_report(borough, year, month, dow_list, hour_min, hour_max, vehicle,
|
|
| 309 |
colorscale='YlOrRd'
|
| 310 |
))
|
| 311 |
fig8.update_layout(xaxis_title='Hour of Day', yaxis_title='Day of Week',
|
| 312 |
-
|
| 313 |
else:
|
| 314 |
fig8 = go.Figure()
|
| 315 |
-
fig8.update_layout(height=500)
|
| 316 |
-
|
| 317 |
-
# Chart 9: Geographic Map
|
| 318 |
-
map_sample = filtered_df[(filtered_df['LATITUDE'].notna()) &
|
| 319 |
(filtered_df['LATITUDE'] != 0) &
|
| 320 |
-
(filtered_df['LATITUDE'] > 40) &
|
| 321 |
(filtered_df['LATITUDE'] < 41)]
|
|
|
|
| 322 |
if len(map_sample) > 0:
|
|
|
|
| 323 |
map_sample = map_sample.sample(n=min(3000, len(map_sample)), random_state=42)
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 332 |
)
|
|
|
|
| 333 |
else:
|
| 334 |
fig9 = go.Figure()
|
| 335 |
-
fig9.add_annotation(
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 339 |
return summary_text, fig1, fig2, fig3, fig4, fig5, fig6, fig7, fig8, fig9
|
| 340 |
|
| 341 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 342 |
# Create Gradio Interface
|
| 343 |
with gr.Blocks(title="NYC Motor Vehicle Crashes Dashboard") as demo:
|
| 344 |
gr.Markdown("# π NYC Motor Vehicle Crashes Dashboard - Enhanced Analytics")
|
| 345 |
-
gr.Markdown("Comprehensive analysis with 5.7M+ crash records")
|
| 346 |
-
|
| 347 |
-
with gr.Accordion("
|
| 348 |
-
gr.Markdown("Type natural language queries like:
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 353 |
with gr.Row():
|
| 354 |
with gr.Column(scale=1):
|
| 355 |
-
gr.Markdown("### Filters")
|
| 356 |
borough = gr.Dropdown(choices=boroughs, value='All', label="Borough")
|
| 357 |
year = gr.Dropdown(choices=years, value='All', label="Year")
|
| 358 |
month = gr.Dropdown(choices=months, value='All', label="Month")
|
| 359 |
-
dow = gr.CheckboxGroup(
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
|
|
|
|
|
|
|
|
|
|
| 364 |
vehicle = gr.Dropdown(choices=vehicles, value='All', label="Vehicle Type 1")
|
| 365 |
person_type = gr.Dropdown(choices=person_types, value='All', label="Person Type")
|
| 366 |
person_injury = gr.Dropdown(choices=injury_types, value='All', label="Person Injury")
|
| 367 |
gender = gr.Dropdown(choices=genders, value='All', label="Gender")
|
| 368 |
safety = gr.Dropdown(choices=safety_equip, value='All', label="Safety Equipment")
|
| 369 |
-
|
| 370 |
with gr.Column(scale=1):
|
| 371 |
-
gr.Markdown("### Chart Settings")
|
| 372 |
-
c1_x = gr.Dropdown(choices=TEMPORAL_COLS, value='CRASH_YEAR', label="Chart 1 X-axis")
|
| 373 |
c1_y = gr.Dropdown(choices=['count'] + NUMERIC_COLS, value='count', label="Chart 1 Y-axis")
|
| 374 |
c3_x = gr.Dropdown(choices=CATEGORICAL_COLS, value='BOROUGH', label="Chart 3 Category")
|
| 375 |
c3_y = gr.Dropdown(choices=['count'] + NUMERIC_COLS, value='count', label="Chart 3 Y-axis")
|
| 376 |
c3_top = gr.Slider(minimum=5, maximum=20, value=10, step=1, label="Chart 3 Top N")
|
| 377 |
-
c4_x = gr.Dropdown(choices=TEMPORAL_COLS, value='CRASH_HOUR', label="Chart 4 X-axis")
|
| 378 |
c4_y = gr.Dropdown(choices=['count'] + NUMERIC_COLS, value='count', label="Chart 4 Y-axis")
|
| 379 |
-
compare_cat = gr.Dropdown(
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
|
|
|
|
|
|
|
| 383 |
with gr.Row():
|
| 384 |
-
generate_btn = gr.Button("π Generate Report", variant="primary", size="lg")
|
| 385 |
-
reset_btn = gr.Button("π Reset Filters", variant="secondary", size="lg")
|
| 386 |
-
|
| 387 |
# Outputs
|
| 388 |
summary_output = gr.Markdown(label="Summary Statistics")
|
| 389 |
-
|
| 390 |
with gr.Row():
|
| 391 |
-
chart1_output = gr.Plot(label="Trend Analysis")
|
| 392 |
-
chart2_output = gr.Plot(label="Person Type Distribution")
|
| 393 |
-
|
| 394 |
with gr.Row():
|
| 395 |
-
chart3_output = gr.Plot(label="Categorical Analysis")
|
| 396 |
-
chart4_output = gr.Plot(label="Time Distribution")
|
| 397 |
-
|
| 398 |
with gr.Row():
|
| 399 |
-
chart5_output = gr.Plot(label="Contributing Factor 1")
|
| 400 |
-
chart6_output = gr.Plot(label="Contributing Factor 2")
|
| 401 |
-
|
| 402 |
-
chart7_output = gr.Plot(label="Injury Rate Comparison")
|
| 403 |
-
chart8_output = gr.Plot(label="Day Γ Hour Heatmap")
|
| 404 |
-
chart9_output = gr.Plot(label="Geographic Distribution")
|
| 405 |
-
|
| 406 |
-
|
| 407 |
# Event handlers
|
| 408 |
generate_btn.click(
|
| 409 |
fn=generate_report,
|
| 410 |
inputs=[borough, year, month, dow, hour_min, hour_max, vehicle, person_type,
|
| 411 |
-
|
| 412 |
-
|
| 413 |
outputs=[summary_output, chart1_output, chart2_output, chart3_output, chart4_output,
|
| 414 |
-
|
| 415 |
)
|
| 416 |
-
|
| 417 |
def reset_all():
|
| 418 |
-
return ('All', 'All', 'All', [], 0, 23, 'All', 'All', 'All', 'All', 'All')
|
| 419 |
-
|
| 420 |
reset_btn.click(
|
| 421 |
fn=reset_all,
|
| 422 |
outputs=[borough, year, month, dow, hour_min, hour_max, vehicle, person_type,
|
| 423 |
-
|
| 424 |
)
|
| 425 |
-
|
| 426 |
-
def apply_smart_search(search_text):
|
| 427 |
-
result = smart_search_parser(search_text)
|
| 428 |
-
if result is None:
|
| 429 |
-
return ['All'] * 11
|
| 430 |
-
|
| 431 |
-
filters, applied = result
|
| 432 |
-
return (
|
| 433 |
-
filters.get('borough', 'All'),
|
| 434 |
-
filters.get('year', 'All'),
|
| 435 |
-
filters.get('month', 'All'),
|
| 436 |
-
filters.get('dow', []),
|
| 437 |
-
filters.get('hour_range', (0, 23))[0],
|
| 438 |
-
filters.get('hour_range', (0, 23))[1],
|
| 439 |
-
filters.get('vehicle', 'All'),
|
| 440 |
-
filters.get('person_type', 'All'),
|
| 441 |
-
filters.get('injury', 'All'),
|
| 442 |
-
'All', # gender
|
| 443 |
-
'All' # safety
|
| 444 |
-
)
|
| 445 |
-
|
| 446 |
search_btn.click(
|
| 447 |
fn=apply_smart_search,
|
| 448 |
inputs=[search_input],
|
| 449 |
outputs=[borough, year, month, dow, hour_min, hour_max, vehicle, person_type,
|
| 450 |
-
|
| 451 |
)
|
| 452 |
|
| 453 |
-
|
| 454 |
-
|
| 455 |
-
|
|
|
|
| 456 |
|
|
|
|
|
|
|
|
|
| 3 |
import plotly.graph_objects as go
|
| 4 |
import pandas as pd
|
| 5 |
import warnings
|
| 6 |
+
import re
|
| 7 |
|
| 8 |
warnings.filterwarnings('ignore', category=DeprecationWarning)
|
| 9 |
|
|
|
|
| 13 |
df['CRASH DATE'] = pd.to_datetime(df['CRASH DATE'])
|
| 14 |
print(f"Data loaded: {len(df):,} records")
|
| 15 |
|
| 16 |
+
# Clean vehicle types
|
| 17 |
VALID_VEHICLE_TYPES = [
|
| 18 |
'SEDAN', 'STATION WAGON/SPORT UTILITY VEHICLE', 'TAXI', 'PICK-UP TRUCK',
|
| 19 |
'BOX TRUCK', 'VAN', 'MOTORCYCLE', 'SCOOTER', 'MOPED', 'E-SCOOTER', 'E-BIKE',
|
|
|
|
| 24 |
'SPORT UTILITY / STATION WAGON', 'LIMOUSINE', 'UNKNOWN'
|
| 25 |
]
|
| 26 |
|
|
|
|
| 27 |
df['VEHICLE TYPE CODE 1'] = df['VEHICLE TYPE CODE 1'].apply(
|
| 28 |
lambda x: x if x in VALID_VEHICLE_TYPES else 'OTHER'
|
| 29 |
)
|
|
|
|
| 33 |
|
| 34 |
print(f"Cleaned vehicle types. Valid categories: {len(df['VEHICLE TYPE CODE 1'].unique())}")
|
| 35 |
|
| 36 |
+
# Define column groups
|
| 37 |
TEMPORAL_COLS = ['CRASH_YEAR', 'CRASH_MONTH', 'CRASH_DAYOFWEEK', 'CRASH_HOUR']
|
| 38 |
CATEGORICAL_COLS = ['BOROUGH', 'PERSON_TYPE', 'PERSON_INJURY',
|
| 39 |
'CONTRIBUTING FACTOR VEHICLE 1', 'VEHICLE TYPE CODE 1',
|
|
|
|
| 44 |
'NUMBER OF CYCLIST INJURED', 'NUMBER OF CYCLIST KILLED',
|
| 45 |
'NUMBER OF MOTORIST INJURED', 'NUMBER OF MOTORIST KILLED']
|
| 46 |
|
| 47 |
+
# Prepare dropdown options
|
| 48 |
+
boroughs = ['All'] + sorted([b for b in df['BOROUGH'].dropna().unique() if str(b) != 'nan'])
|
| 49 |
+
years = ['All'] + sorted([int(y) for y in df['CRASH_YEAR'].unique()])
|
| 50 |
+
months = ['All'] + list(range(1, 13))
|
| 51 |
vehicles = ['All'] + sorted(VALID_VEHICLE_TYPES + ['OTHER'])
|
| 52 |
+
person_types = ['All'] + sorted([p for p in df['PERSON_TYPE'].dropna().unique() if str(p) != 'nan'])
|
| 53 |
+
injury_types = ['All'] + sorted([i for i in df['PERSON_INJURY'].dropna().unique() if str(i) != 'nan'])
|
| 54 |
+
genders = ['All', 'M', 'F', 'U']
|
| 55 |
+
safety_equip = ['All'] + sorted([s for s in df['SAFETY_EQUIPMENT'].dropna().unique()
|
| 56 |
+
if str(s) not in ['nan', 'NOT APPLICABLE', 'NOT REPORTED', 'DOES NOT APPLY']][:15])
|
| 57 |
|
| 58 |
|
| 59 |
def smart_search_parser(search_text):
|
| 60 |
+
"""Parse natural language search query into filters"""
|
| 61 |
if not search_text:
|
| 62 |
return None
|
| 63 |
+
|
| 64 |
search_lower = search_text.lower()
|
| 65 |
filters = {}
|
| 66 |
applied_filters = []
|
| 67 |
+
|
| 68 |
# Borough detection
|
| 69 |
+
boroughs_map = ['BROOKLYN', 'MANHATTAN', 'QUEENS', 'BRONX', 'STATEN ISLAND']
|
| 70 |
+
for b in boroughs_map:
|
| 71 |
if b.lower() in search_lower:
|
| 72 |
filters['borough'] = b
|
| 73 |
applied_filters.append(f"Borough: {b}")
|
| 74 |
break
|
| 75 |
+
|
| 76 |
# Year detection
|
|
|
|
| 77 |
years_found = re.findall(r'\b(20[1-2][0-9])\b', search_text)
|
| 78 |
if years_found:
|
| 79 |
filters['year'] = int(years_found[0])
|
| 80 |
applied_filters.append(f"Year: {years_found[0]}")
|
| 81 |
+
|
| 82 |
# Month detection
|
| 83 |
months_map = {'january': 1, 'february': 2, 'march': 3, 'april': 4, 'may': 5, 'june': 6,
|
| 84 |
'july': 7, 'august': 8, 'september': 9, 'october': 10, 'november': 11, 'december': 12,
|
|
|
|
| 89 |
filters['month'] = m_num
|
| 90 |
applied_filters.append(f"Month: {m_name.capitalize()}")
|
| 91 |
break
|
| 92 |
+
|
| 93 |
# Day of week detection
|
| 94 |
days_map = {'monday': [0], 'tuesday': [1], 'wednesday': [2], 'thursday': [3],
|
| 95 |
'friday': [4], 'saturday': [5], 'sunday': [6],
|
| 96 |
+
'mon': [0], 'tue': [1], 'wed': [2], 'thu': [3], 'fri': [4], 'sat': [5], 'sun': [6],
|
| 97 |
'weekday': [0, 1, 2, 3, 4], 'weekend': [5, 6]}
|
| 98 |
for day_name, day_nums in days_map.items():
|
| 99 |
if day_name in search_lower:
|
| 100 |
filters['dow'] = day_nums
|
| 101 |
applied_filters.append(f"Day: {day_name.capitalize()}")
|
| 102 |
break
|
| 103 |
+
|
| 104 |
# Time of day detection
|
| 105 |
if 'morning' in search_lower:
|
| 106 |
filters['hour_range'] = (6, 10)
|
|
|
|
| 114 |
elif 'night' in search_lower:
|
| 115 |
filters['hour_range'] = (20, 23)
|
| 116 |
applied_filters.append("Time: Night (20-23)")
|
| 117 |
+
elif 'late night' in search_lower or 'midnight' in search_lower:
|
| 118 |
+
filters['hour_range'] = (0, 5)
|
| 119 |
+
applied_filters.append("Time: Late Night (0-5)")
|
| 120 |
+
|
| 121 |
# Vehicle type detection
|
| 122 |
vehicle_keywords = {
|
| 123 |
'sedan': 'SEDAN', 'suv': 'STATION WAGON/SPORT UTILITY VEHICLE',
|
| 124 |
'taxi': 'TAXI', 'truck': 'PICK-UP TRUCK', 'bus': 'BUS',
|
| 125 |
+
'motorcycle': 'MOTORCYCLE', 'bike': 'BICYCLE', 'scooter': 'SCOOTER',
|
| 126 |
+
'van': 'VAN', 'ambulance': 'AMBULANCE', 'moped': 'MOPED'
|
| 127 |
}
|
| 128 |
for keyword, vehicle_type in vehicle_keywords.items():
|
| 129 |
if keyword in search_lower:
|
| 130 |
filters['vehicle'] = vehicle_type
|
| 131 |
applied_filters.append(f"Vehicle: {keyword.capitalize()}")
|
| 132 |
break
|
| 133 |
+
|
| 134 |
# Person type detection
|
| 135 |
if 'pedestrian' in search_lower:
|
| 136 |
filters['person_type'] = 'PEDESTRIAN'
|
|
|
|
| 141 |
elif 'occupant' in search_lower or 'driver' in search_lower:
|
| 142 |
filters['person_type'] = 'OCCUPANT'
|
| 143 |
applied_filters.append("Person: Occupant")
|
| 144 |
+
|
| 145 |
# Injury type detection
|
| 146 |
if 'fatal' in search_lower or 'death' in search_lower or 'killed' in search_lower:
|
| 147 |
filters['injury'] = 'KILLED'
|
|
|
|
| 149 |
elif 'injured' in search_lower or 'injury' in search_lower:
|
| 150 |
filters['injury'] = 'INJURED'
|
| 151 |
applied_filters.append("Injury: Injured")
|
| 152 |
+
|
| 153 |
+
# Gender detection
|
| 154 |
+
if 'male' in search_lower and 'female' not in search_lower:
|
| 155 |
+
filters['gender'] = 'M'
|
| 156 |
+
applied_filters.append("Gender: Male")
|
| 157 |
+
elif 'female' in search_lower:
|
| 158 |
+
filters['gender'] = 'F'
|
| 159 |
+
applied_filters.append("Gender: Female")
|
| 160 |
+
|
| 161 |
return filters, applied_filters
|
| 162 |
|
| 163 |
|
| 164 |
+
def generate_report(borough, year, month, dow, hour_min, hour_max, vehicle, person_type,
|
| 165 |
+
person_injury, gender, safety, c1_x, c1_y, c3_x, c3_y, c3_top,
|
| 166 |
+
c4_x, c4_y, compare_cat):
|
| 167 |
+
"""Generate all visualizations based on filters"""
|
| 168 |
+
|
| 169 |
# Filter data
|
| 170 |
filtered_df = df.copy()
|
| 171 |
if borough != 'All':
|
| 172 |
filtered_df = filtered_df[filtered_df['BOROUGH'] == borough]
|
| 173 |
if year != 'All':
|
| 174 |
+
filtered_df = filtered_df[filtered_df['CRASH_YEAR'] == year]
|
| 175 |
if month != 'All':
|
| 176 |
+
filtered_df = filtered_df[filtered_df['CRASH_MONTH'] == month]
|
| 177 |
+
if dow:
|
| 178 |
+
filtered_df = filtered_df[filtered_df['CRASH_DAYOFWEEK'].isin(dow)]
|
| 179 |
+
filtered_df = filtered_df[(filtered_df['CRASH_HOUR'] >= hour_min) &
|
| 180 |
+
(filtered_df['CRASH_HOUR'] <= hour_max)]
|
|
|
|
|
|
|
| 181 |
if vehicle != 'All':
|
| 182 |
filtered_df = filtered_df[filtered_df['VEHICLE TYPE CODE 1'] == vehicle]
|
| 183 |
if person_type != 'All':
|
|
|
|
| 188 |
filtered_df = filtered_df[filtered_df['PERSON_SEX'] == gender]
|
| 189 |
if safety != 'All':
|
| 190 |
filtered_df = filtered_df[filtered_df['SAFETY_EQUIPMENT'] == safety]
|
| 191 |
+
|
| 192 |
if len(filtered_df) == 0:
|
| 193 |
empty_fig = go.Figure()
|
| 194 |
empty_fig.add_annotation(text="No data found. Adjust filters.", xref="paper", yref="paper",
|
| 195 |
+
x=0.5, y=0.5, showarrow=False, font=dict(size=16, color="gray"))
|
| 196 |
return "No data found", empty_fig, empty_fig, empty_fig, empty_fig, empty_fig, empty_fig, empty_fig, empty_fig, empty_fig
|
| 197 |
+
|
| 198 |
# Summary Statistics
|
| 199 |
total_records = len(filtered_df)
|
| 200 |
total_injuries = int(filtered_df['NUMBER OF PERSONS INJURED'].sum())
|
| 201 |
total_fatalities = int(filtered_df['NUMBER OF PERSONS KILLED'].sum())
|
| 202 |
injury_rate = (total_injuries / total_records * 100) if total_records > 0 else 0
|
| 203 |
+
fatality_rate = (total_fatalities / total_records * 100) if total_records > 0 else 0
|
| 204 |
+
|
| 205 |
summary_text = f"""
|
| 206 |
+
## π Summary Statistics
|
| 207 |
+
|
| 208 |
+
| Metric | Value |
|
| 209 |
+
|--------|-------|
|
| 210 |
+
| **Total Records** | {total_records:,} |
|
| 211 |
+
| **Total Injuries** | {total_injuries:,} ({injury_rate:.2f}%) |
|
| 212 |
+
| **Total Fatalities** | {total_fatalities:,} ({fatality_rate:.2f}%) |
|
| 213 |
+
| **Pedestrian Injuries** | {int(filtered_df['NUMBER OF PEDESTRIANS INJURED'].sum()):,} |
|
| 214 |
+
| **Cyclist Injuries** | {int(filtered_df['NUMBER OF CYCLIST INJURED'].sum()):,} |
|
| 215 |
+
| **Motorist Injuries** | {int(filtered_df['NUMBER OF MOTORIST INJURED'].sum()):,} |
|
| 216 |
+
| **Unique Crashes** | {len(filtered_df['COLLISION_ID'].unique()):,} |
|
| 217 |
+
| **Avg Persons/Crash** | {(total_records / len(filtered_df['COLLISION_ID'].unique())):.1f} |
|
| 218 |
"""
|
| 219 |
+
|
| 220 |
# Chart 1: Trend Analysis
|
| 221 |
if c1_y == 'count':
|
| 222 |
chart1_data = filtered_df.groupby(c1_x).size().reset_index(name='count')
|
|
|
|
| 224 |
else:
|
| 225 |
chart1_data = filtered_df.groupby(c1_x)[c1_y].sum().reset_index()
|
| 226 |
y_label = c1_y
|
| 227 |
+
|
| 228 |
fig1 = px.line(chart1_data, x=c1_x, y=chart1_data.columns[1],
|
| 229 |
labels={chart1_data.columns[1]: y_label, c1_x: c1_x},
|
| 230 |
title='Trend Analysis')
|
| 231 |
fig1.update_traces(line_color='#3498db', line_width=3)
|
| 232 |
fig1.update_layout(template='plotly_white', height=400)
|
| 233 |
+
|
| 234 |
+
# Chart 2: Person Type Distribution
|
| 235 |
person_type_data = filtered_df['PERSON_TYPE'].value_counts()
|
| 236 |
fig2 = px.pie(values=person_type_data.values, names=person_type_data.index,
|
| 237 |
title='Person Type Distribution',
|
| 238 |
color_discrete_sequence=['#2ecc71', '#f39c12', '#e74c3c', '#3498db'])
|
| 239 |
fig2.update_layout(height=400)
|
| 240 |
+
|
| 241 |
# Chart 3: Categorical Analysis
|
| 242 |
if c3_y == 'count':
|
| 243 |
chart3_data = filtered_df[c3_x].value_counts().head(int(c3_top))
|
|
|
|
| 245 |
else:
|
| 246 |
chart3_data = filtered_df.groupby(c3_x)[c3_y].sum().sort_values(ascending=False).head(int(c3_top))
|
| 247 |
y_label = c3_y
|
| 248 |
+
|
| 249 |
fig3 = px.bar(x=chart3_data.index, y=chart3_data.values,
|
| 250 |
labels={'x': c3_x, 'y': y_label},
|
| 251 |
+
title=f'Categorical Analysis - Top {int(c3_top)}')
|
| 252 |
fig3.update_traces(marker_color='#3498db')
|
| 253 |
fig3.update_layout(template='plotly_white', height=400)
|
| 254 |
+
|
| 255 |
# Chart 4: Time Distribution
|
| 256 |
if c4_y == 'count':
|
| 257 |
chart4_data = filtered_df[c4_x].value_counts().sort_index()
|
|
|
|
| 259 |
else:
|
| 260 |
chart4_data = filtered_df.groupby(c4_x)[c4_y].sum().sort_index()
|
| 261 |
y_label = c4_y
|
| 262 |
+
|
| 263 |
fig4 = px.bar(x=chart4_data.index, y=chart4_data.values,
|
| 264 |
labels={'x': c4_x, 'y': y_label},
|
| 265 |
title='Time Distribution')
|
| 266 |
fig4.update_traces(marker_color='#e67e22')
|
| 267 |
fig4.update_layout(template='plotly_white', height=400)
|
| 268 |
+
|
| 269 |
# Chart 5: Contributing Factor 1
|
| 270 |
factor1_data = filtered_df['CONTRIBUTING FACTOR VEHICLE 1'].value_counts().head(15)
|
| 271 |
factor1_data = factor1_data[factor1_data.index != 'UNSPECIFIED']
|
| 272 |
+
|
| 273 |
fig5 = px.bar(x=factor1_data.index, y=factor1_data.values,
|
| 274 |
labels={'x': 'Contributing Factor', 'y': 'Number of Crashes'},
|
| 275 |
title='Top Contributing Factors (Vehicle 1)')
|
| 276 |
fig5.update_traces(marker_color='#e74c3c')
|
| 277 |
fig5.update_layout(template='plotly_white', height=400, xaxis={'tickangle': -45})
|
| 278 |
+
|
| 279 |
# Chart 6: Contributing Factor 2
|
| 280 |
factor2_data = filtered_df['CONTRIBUTING FACTOR VEHICLE 2'].value_counts().head(15)
|
| 281 |
factor2_data = factor2_data[~factor2_data.index.isin(['UNSPECIFIED', 'NO SECOND VEHICLE'])]
|
| 282 |
+
|
| 283 |
if len(factor2_data) > 0:
|
| 284 |
fig6 = px.bar(x=factor2_data.index, y=factor2_data.values,
|
| 285 |
+
labels={'x': 'Secondary Contributing Factor', 'y': 'Number of Crashes'},
|
| 286 |
+
title='Top Contributing Factors (Vehicle 2)')
|
| 287 |
fig6.update_traces(marker_color='#f39c12')
|
| 288 |
fig6.update_layout(template='plotly_white', height=400, xaxis={'tickangle': -45})
|
| 289 |
else:
|
| 290 |
fig6 = go.Figure()
|
| 291 |
fig6.add_annotation(text="No secondary factors", xref="paper", yref="paper",
|
| 292 |
+
x=0.5, y=0.5, showarrow=False)
|
| 293 |
+
fig6.update_layout(height=400, title='Top Contributing Factors (Vehicle 2)')
|
| 294 |
+
|
| 295 |
# Chart 7: Injury Rate Comparison
|
| 296 |
compare_data = filtered_df.groupby(compare_cat).agg({
|
| 297 |
'COLLISION_ID': 'count',
|
|
|
|
| 302 |
compare_data['Injury_Rate'] = (compare_data['Total_Injuries'] / compare_data['Total_Records'] * 100)
|
| 303 |
compare_data['Fatality_Rate'] = (compare_data['Total_Fatalities'] / compare_data['Total_Records'] * 100)
|
| 304 |
compare_data = compare_data.sort_values('Injury_Rate', ascending=False).head(15)
|
| 305 |
+
|
| 306 |
fig7 = go.Figure()
|
| 307 |
fig7.add_trace(go.Bar(x=compare_data[compare_cat], y=compare_data['Injury_Rate'],
|
| 308 |
+
name='Injury Rate (%)', marker_color='#f39c12'))
|
| 309 |
fig7.add_trace(go.Bar(x=compare_data[compare_cat], y=compare_data['Fatality_Rate'],
|
| 310 |
+
name='Fatality Rate (%)', marker_color='#e74c3c'))
|
| 311 |
fig7.update_layout(barmode='group', template='plotly_white', height=400,
|
| 312 |
+
title='Injury Rate Comparison',
|
| 313 |
+
xaxis_title=compare_cat, yaxis_title='Rate (%)')
|
| 314 |
+
|
| 315 |
# Chart 8: Heatmap
|
| 316 |
heatmap_data = filtered_df.groupby(['CRASH_DAYOFWEEK', 'CRASH_HOUR']).size().reset_index(name='count')
|
| 317 |
if len(heatmap_data) > 0:
|
|
|
|
| 323 |
colorscale='YlOrRd'
|
| 324 |
))
|
| 325 |
fig8.update_layout(xaxis_title='Hour of Day', yaxis_title='Day of Week',
|
| 326 |
+
title='Day Γ Hour Heatmap', template='plotly_white', height=500)
|
| 327 |
else:
|
| 328 |
fig8 = go.Figure()
|
| 329 |
+
fig8.update_layout(height=500, title='Day Γ Hour Heatmap')
|
| 330 |
+
|
| 331 |
+
# Chart 9: Geographic Map (NEW!)
|
| 332 |
+
map_sample = filtered_df[(filtered_df['LATITUDE'].notna()) &
|
| 333 |
(filtered_df['LATITUDE'] != 0) &
|
| 334 |
+
(filtered_df['LATITUDE'] > 40) &
|
| 335 |
(filtered_df['LATITUDE'] < 41)]
|
| 336 |
+
|
| 337 |
if len(map_sample) > 0:
|
| 338 |
+
# Sample for performance
|
| 339 |
map_sample = map_sample.sample(n=min(3000, len(map_sample)), random_state=42)
|
| 340 |
+
|
| 341 |
+
# Create severity category combining injury and fatality
|
| 342 |
+
def categorize_severity(row):
|
| 343 |
+
if row['NUMBER OF PERSONS KILLED'] > 0:
|
| 344 |
+
return 'Fatal'
|
| 345 |
+
elif row['NUMBER OF PERSONS INJURED'] > 0:
|
| 346 |
+
return 'Injury'
|
| 347 |
+
else:
|
| 348 |
+
return 'Property Damage Only'
|
| 349 |
+
|
| 350 |
+
map_sample['SEVERITY_CATEGORY'] = map_sample.apply(categorize_severity, axis=1)
|
| 351 |
+
|
| 352 |
+
# Color mapping
|
| 353 |
+
color_map = {
|
| 354 |
+
'Fatal': '#e74c3c',
|
| 355 |
+
'Injury': '#f39c12',
|
| 356 |
+
'Property Damage Only': '#9d7aff'
|
| 357 |
+
}
|
| 358 |
+
|
| 359 |
+
fig9 = px.scatter_map(
|
| 360 |
+
map_sample,
|
| 361 |
+
lat='LATITUDE',
|
| 362 |
+
lon='LONGITUDE',
|
| 363 |
+
color='SEVERITY_CATEGORY',
|
| 364 |
+
color_discrete_map=color_map,
|
| 365 |
+
title=f'Geographic Distribution (Sample of {len(map_sample):,} locations)',
|
| 366 |
+
zoom=10,
|
| 367 |
+
height=600,
|
| 368 |
+
hover_data={
|
| 369 |
+
'LATITUDE': False,
|
| 370 |
+
'LONGITUDE': False,
|
| 371 |
+
'SEVERITY_CATEGORY': True,
|
| 372 |
+
'NUMBER OF PERSONS INJURED': True,
|
| 373 |
+
'NUMBER OF PERSONS KILLED': True,
|
| 374 |
+
'BOROUGH': True,
|
| 375 |
+
'VEHICLE TYPE CODE 1': True
|
| 376 |
+
}
|
| 377 |
)
|
| 378 |
+
fig9.update_layout(map_style="open-street-map")
|
| 379 |
else:
|
| 380 |
fig9 = go.Figure()
|
| 381 |
+
fig9.add_annotation(
|
| 382 |
+
text="No location data available",
|
| 383 |
+
xref="paper", yref="paper",
|
| 384 |
+
x=0.5, y=0.5, showarrow=False,
|
| 385 |
+
font=dict(size=20, color="gray")
|
| 386 |
+
)
|
| 387 |
+
fig9.update_layout(height=600, title='Geographic Distribution')
|
| 388 |
+
|
| 389 |
return summary_text, fig1, fig2, fig3, fig4, fig5, fig6, fig7, fig8, fig9
|
| 390 |
|
| 391 |
|
| 392 |
+
def apply_smart_search(search_text):
|
| 393 |
+
"""Apply smart search and return filter values"""
|
| 394 |
+
result = smart_search_parser(search_text)
|
| 395 |
+
if result is None:
|
| 396 |
+
return ['All'] * 11 + ["β οΈ No filters detected. Try: 'Brooklyn 2022 pedestrian crashes'"]
|
| 397 |
+
|
| 398 |
+
filters, applied = result
|
| 399 |
+
feedback = "β
Filters Applied: " + ", ".join(applied) + "\n\nClick 'Generate Report' to see results."
|
| 400 |
+
|
| 401 |
+
return (
|
| 402 |
+
filters.get('borough', 'All'),
|
| 403 |
+
filters.get('year', 'All'),
|
| 404 |
+
filters.get('month', 'All'),
|
| 405 |
+
filters.get('dow', []),
|
| 406 |
+
filters.get('hour_range', (0, 23))[0],
|
| 407 |
+
filters.get('hour_range', (0, 23))[1],
|
| 408 |
+
filters.get('vehicle', 'All'),
|
| 409 |
+
filters.get('person_type', 'All'),
|
| 410 |
+
filters.get('injury', 'All'),
|
| 411 |
+
filters.get('gender', 'All'),
|
| 412 |
+
filters.get('safety', 'All'),
|
| 413 |
+
feedback
|
| 414 |
+
)
|
| 415 |
+
|
| 416 |
+
|
| 417 |
# Create Gradio Interface
|
| 418 |
with gr.Blocks(title="NYC Motor Vehicle Crashes Dashboard") as demo:
|
| 419 |
gr.Markdown("# π NYC Motor Vehicle Crashes Dashboard - Enhanced Analytics")
|
| 420 |
+
gr.Markdown("### Comprehensive analysis with 5.7M+ crash records")
|
| 421 |
+
|
| 422 |
+
with gr.Accordion("π Smart Search", open=True):
|
| 423 |
+
gr.Markdown("**Type natural language queries** like: `Brooklyn 2022 pedestrian crashes` or `Manhattan weekend taxi injured`")
|
| 424 |
+
with gr.Row():
|
| 425 |
+
search_input = gr.Textbox(label="Search Query",
|
| 426 |
+
placeholder="e.g., Queens Friday night motorcycle fatalities...",
|
| 427 |
+
scale=3)
|
| 428 |
+
search_btn = gr.Button("π Apply Smart Search", variant="primary", scale=1)
|
| 429 |
+
clear_search_btn = gr.Button("β Clear", variant="stop", scale=1)
|
| 430 |
+
search_feedback = gr.Markdown(visible=True)
|
| 431 |
+
|
| 432 |
with gr.Row():
|
| 433 |
with gr.Column(scale=1):
|
| 434 |
+
gr.Markdown("### ποΈ Filters")
|
| 435 |
borough = gr.Dropdown(choices=boroughs, value='All', label="Borough")
|
| 436 |
year = gr.Dropdown(choices=years, value='All', label="Year")
|
| 437 |
month = gr.Dropdown(choices=months, value='All', label="Month")
|
| 438 |
+
dow = gr.CheckboxGroup(
|
| 439 |
+
choices=[('Mon', 0), ('Tue', 1), ('Wed', 2), ('Thu', 3),
|
| 440 |
+
('Fri', 4), ('Sat', 5), ('Sun', 6)],
|
| 441 |
+
label="Day of Week", type="value"
|
| 442 |
+
)
|
| 443 |
+
with gr.Row():
|
| 444 |
+
hour_min = gr.Slider(minimum=0, maximum=23, value=0, step=1, label="Hour Min")
|
| 445 |
+
hour_max = gr.Slider(minimum=0, maximum=23, value=23, step=1, label="Hour Max")
|
| 446 |
vehicle = gr.Dropdown(choices=vehicles, value='All', label="Vehicle Type 1")
|
| 447 |
person_type = gr.Dropdown(choices=person_types, value='All', label="Person Type")
|
| 448 |
person_injury = gr.Dropdown(choices=injury_types, value='All', label="Person Injury")
|
| 449 |
gender = gr.Dropdown(choices=genders, value='All', label="Gender")
|
| 450 |
safety = gr.Dropdown(choices=safety_equip, value='All', label="Safety Equipment")
|
| 451 |
+
|
| 452 |
with gr.Column(scale=1):
|
| 453 |
+
gr.Markdown("### βοΈ Chart Settings")
|
| 454 |
+
c1_x = gr.Dropdown(choices=TEMPORAL_COLS, value='CRASH_YEAR', label="Chart 1 X-axis (Trend)")
|
| 455 |
c1_y = gr.Dropdown(choices=['count'] + NUMERIC_COLS, value='count', label="Chart 1 Y-axis")
|
| 456 |
c3_x = gr.Dropdown(choices=CATEGORICAL_COLS, value='BOROUGH', label="Chart 3 Category")
|
| 457 |
c3_y = gr.Dropdown(choices=['count'] + NUMERIC_COLS, value='count', label="Chart 3 Y-axis")
|
| 458 |
c3_top = gr.Slider(minimum=5, maximum=20, value=10, step=1, label="Chart 3 Top N")
|
| 459 |
+
c4_x = gr.Dropdown(choices=TEMPORAL_COLS, value='CRASH_HOUR', label="Chart 4 X-axis (Time)")
|
| 460 |
c4_y = gr.Dropdown(choices=['count'] + NUMERIC_COLS, value='count', label="Chart 4 Y-axis")
|
| 461 |
+
compare_cat = gr.Dropdown(
|
| 462 |
+
choices=['BOROUGH', 'VEHICLE TYPE CODE 1', 'PERSON_TYPE',
|
| 463 |
+
'SAFETY_EQUIPMENT', 'CRASH_HOUR', 'CRASH_DAYOFWEEK'],
|
| 464 |
+
value='BOROUGH', label="Comparison Category"
|
| 465 |
+
)
|
| 466 |
+
|
| 467 |
with gr.Row():
|
| 468 |
+
generate_btn = gr.Button("π Generate Report", variant="primary", size="lg", scale=2)
|
| 469 |
+
reset_btn = gr.Button("π Reset All Filters", variant="secondary", size="lg", scale=1)
|
| 470 |
+
|
| 471 |
# Outputs
|
| 472 |
summary_output = gr.Markdown(label="Summary Statistics")
|
| 473 |
+
|
| 474 |
with gr.Row():
|
| 475 |
+
chart1_output = gr.Plot(label="Chart 1: Trend Analysis")
|
| 476 |
+
chart2_output = gr.Plot(label="Chart 2: Person Type Distribution")
|
| 477 |
+
|
| 478 |
with gr.Row():
|
| 479 |
+
chart3_output = gr.Plot(label="Chart 3: Categorical Analysis")
|
| 480 |
+
chart4_output = gr.Plot(label="Chart 4: Time Distribution")
|
| 481 |
+
|
| 482 |
with gr.Row():
|
| 483 |
+
chart5_output = gr.Plot(label="Chart 5: Contributing Factor 1")
|
| 484 |
+
chart6_output = gr.Plot(label="Chart 6: Contributing Factor 2")
|
| 485 |
+
|
| 486 |
+
chart7_output = gr.Plot(label="Chart 7: Injury Rate Comparison")
|
| 487 |
+
chart8_output = gr.Plot(label="Chart 8: Day Γ Hour Heatmap")
|
| 488 |
+
chart9_output = gr.Plot(label="Chart 9: Geographic Distribution Map")
|
| 489 |
+
|
|
|
|
| 490 |
# Event handlers
|
| 491 |
generate_btn.click(
|
| 492 |
fn=generate_report,
|
| 493 |
inputs=[borough, year, month, dow, hour_min, hour_max, vehicle, person_type,
|
| 494 |
+
person_injury, gender, safety, c1_x, c1_y, c3_x, c3_y, c3_top,
|
| 495 |
+
c4_x, c4_y, compare_cat],
|
| 496 |
outputs=[summary_output, chart1_output, chart2_output, chart3_output, chart4_output,
|
| 497 |
+
chart5_output, chart6_output, chart7_output, chart8_output, chart9_output]
|
| 498 |
)
|
| 499 |
+
|
| 500 |
def reset_all():
|
| 501 |
+
return ('All', 'All', 'All', [], 0, 23, 'All', 'All', 'All', 'All', 'All', '')
|
| 502 |
+
|
| 503 |
reset_btn.click(
|
| 504 |
fn=reset_all,
|
| 505 |
outputs=[borough, year, month, dow, hour_min, hour_max, vehicle, person_type,
|
| 506 |
+
person_injury, gender, safety, search_feedback]
|
| 507 |
)
|
| 508 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 509 |
search_btn.click(
|
| 510 |
fn=apply_smart_search,
|
| 511 |
inputs=[search_input],
|
| 512 |
outputs=[borough, year, month, dow, hour_min, hour_max, vehicle, person_type,
|
| 513 |
+
person_injury, gender, safety, search_feedback]
|
| 514 |
)
|
| 515 |
|
| 516 |
+
clear_search_btn.click(
|
| 517 |
+
fn=lambda: ('', ''),
|
| 518 |
+
outputs=[search_input, search_feedback]
|
| 519 |
+
)
|
| 520 |
|
| 521 |
+
if __name__ == "__main__":
|
| 522 |
+
demo.launch(share=False)
|