| import gradio as gr |
| import plotly.express as px |
| import plotly.graph_objects as go |
| import pandas as pd |
| import warnings |
| import re |
|
|
| warnings.filterwarnings('ignore', category=DeprecationWarning) |
|
|
| |
| print("Loading data...") |
| df = pd.read_parquet('nyc_crashes_integrated_clean.parquet') |
| df['CRASH DATE'] = pd.to_datetime(df['CRASH DATE']) |
| print(f"Data loaded: {len(df):,} records") |
|
|
| |
| VALID_VEHICLE_TYPES = [ |
| 'SEDAN', 'STATION WAGON/SPORT UTILITY VEHICLE', 'TAXI', 'PICK-UP TRUCK', |
| 'BOX TRUCK', 'VAN', 'MOTORCYCLE', 'SCOOTER', 'MOPED', 'E-SCOOTER', 'E-BIKE', |
| 'BICYCLE', 'BUS', 'AMBULANCE', 'FIRE TRUCK', 'TRACTOR TRUCK DIESEL', |
| 'TRACTOR TRUCK GASOLINE', 'DUMP', 'FLAT BED', 'GARBAGE OR REFUSE', |
| 'CONCRETE MIXER', 'REFRIGERATED VAN', 'TRUCK', 'LIVERY VEHICLE', |
| 'PASSENGER VEHICLE', '2 DR SEDAN', '4 DR SEDAN', 'CONVERTIBLE', |
| 'SPORT UTILITY / STATION WAGON', 'LIMOUSINE', 'UNKNOWN' |
| ] |
|
|
| df['VEHICLE TYPE CODE 1'] = df['VEHICLE TYPE CODE 1'].apply( |
| lambda x: x if x in VALID_VEHICLE_TYPES else 'OTHER' |
| ) |
| df['VEHICLE TYPE CODE 2'] = df['VEHICLE TYPE CODE 2'].apply( |
| lambda x: x if x in VALID_VEHICLE_TYPES or x == 'NO SECOND VEHICLE' else 'OTHER' |
| ) |
|
|
| print(f"Cleaned vehicle types. Valid categories: {len(df['VEHICLE TYPE CODE 1'].unique())}") |
|
|
| |
| TEMPORAL_COLS = ['CRASH_YEAR', 'CRASH_MONTH', 'CRASH_DAYOFWEEK', 'CRASH_HOUR'] |
| CATEGORICAL_COLS = ['BOROUGH', 'PERSON_TYPE', 'PERSON_INJURY', |
| 'CONTRIBUTING FACTOR VEHICLE 1', 'VEHICLE TYPE CODE 1', |
| 'PERSON_SEX', 'SAFETY_EQUIPMENT', 'POSITION_IN_VEHICLE', |
| 'EJECTION', 'EMOTIONAL_STATUS'] |
| NUMERIC_COLS = ['NUMBER OF PERSONS INJURED', 'NUMBER OF PERSONS KILLED', |
| 'NUMBER OF PEDESTRIANS INJURED', 'NUMBER OF PEDESTRIANS KILLED', |
| 'NUMBER OF CYCLIST INJURED', 'NUMBER OF CYCLIST KILLED', |
| 'NUMBER OF MOTORIST INJURED', 'NUMBER OF MOTORIST KILLED'] |
|
|
| |
| boroughs = ['All'] + sorted([b for b in df['BOROUGH'].dropna().unique() if str(b) != 'nan']) |
| years = ['All'] + sorted([int(y) for y in df['CRASH_YEAR'].unique()]) |
| months = ['All'] + list(range(1, 13)) |
| vehicles = ['All'] + sorted(VALID_VEHICLE_TYPES + ['OTHER']) |
| person_types = ['All'] + sorted([p for p in df['PERSON_TYPE'].dropna().unique() if str(p) != 'nan']) |
| injury_types = ['All'] + sorted([i for i in df['PERSON_INJURY'].dropna().unique() if str(i) != 'nan']) |
| genders = ['All', 'M', 'F', 'U'] |
| safety_equip = ['All'] + sorted([s for s in df['SAFETY_EQUIPMENT'].dropna().unique() |
| if str(s) not in ['nan', 'NOT APPLICABLE', 'NOT REPORTED', 'DOES NOT APPLY']][:15]) |
|
|
|
|
| def smart_search_parser(search_text): |
| """Parse natural language search query into filters""" |
| if not search_text: |
| return None |
|
|
| search_lower = search_text.lower() |
| filters = {} |
| applied_filters = [] |
|
|
| |
| boroughs_map = ['BROOKLYN', 'MANHATTAN', 'QUEENS', 'BRONX', 'STATEN ISLAND'] |
| for b in boroughs_map: |
| if b.lower() in search_lower: |
| filters['borough'] = b |
| applied_filters.append(f"Borough: {b}") |
| break |
|
|
| |
| years_found = re.findall(r'\b(20[1-2][0-9])\b', search_text) |
| if years_found: |
| filters['year'] = int(years_found[0]) |
| applied_filters.append(f"Year: {years_found[0]}") |
|
|
| |
| months_map = {'january': 1, 'february': 2, 'march': 3, 'april': 4, 'may': 5, 'june': 6, |
| 'july': 7, 'august': 8, 'september': 9, 'october': 10, 'november': 11, 'december': 12, |
| 'jan': 1, 'feb': 2, 'mar': 3, 'apr': 4, 'jun': 6, 'jul': 7, 'aug': 8, |
| 'sep': 9, 'oct': 10, 'nov': 11, 'dec': 12} |
| for m_name, m_num in months_map.items(): |
| if m_name in search_lower: |
| filters['month'] = m_num |
| applied_filters.append(f"Month: {m_name.capitalize()}") |
| break |
|
|
| |
| days_map = {'monday': [0], 'tuesday': [1], 'wednesday': [2], 'thursday': [3], |
| 'friday': [4], 'saturday': [5], 'sunday': [6], |
| 'mon': [0], 'tue': [1], 'wed': [2], 'thu': [3], 'fri': [4], 'sat': [5], 'sun': [6], |
| 'weekday': [0, 1, 2, 3, 4], 'weekend': [5, 6]} |
| for day_name, day_nums in days_map.items(): |
| if day_name in search_lower: |
| filters['dow'] = day_nums |
| applied_filters.append(f"Day: {day_name.capitalize()}") |
| break |
|
|
| |
| if 'morning' in search_lower: |
| filters['hour_range'] = (6, 10) |
| applied_filters.append("Time: Morning (6-10)") |
| elif 'afternoon' in search_lower: |
| filters['hour_range'] = (12, 17) |
| applied_filters.append("Time: Afternoon (12-17)") |
| elif 'evening' in search_lower: |
| filters['hour_range'] = (17, 20) |
| applied_filters.append("Time: Evening (17-20)") |
| elif 'night' in search_lower: |
| filters['hour_range'] = (20, 23) |
| applied_filters.append("Time: Night (20-23)") |
| elif 'late night' in search_lower or 'midnight' in search_lower: |
| filters['hour_range'] = (0, 5) |
| applied_filters.append("Time: Late Night (0-5)") |
|
|
| |
| vehicle_keywords = { |
| 'sedan': 'SEDAN', 'suv': 'STATION WAGON/SPORT UTILITY VEHICLE', |
| 'taxi': 'TAXI', 'truck': 'PICK-UP TRUCK', 'bus': 'BUS', |
| 'motorcycle': 'MOTORCYCLE', 'bike': 'BICYCLE', 'scooter': 'SCOOTER', |
| 'van': 'VAN', 'ambulance': 'AMBULANCE', 'moped': 'MOPED' |
| } |
| for keyword, vehicle_type in vehicle_keywords.items(): |
| if keyword in search_lower: |
| filters['vehicle'] = vehicle_type |
| applied_filters.append(f"Vehicle: {keyword.capitalize()}") |
| break |
|
|
| |
| if 'pedestrian' in search_lower: |
| filters['person_type'] = 'PEDESTRIAN' |
| applied_filters.append("Person: Pedestrian") |
| elif 'cyclist' in search_lower: |
| filters['person_type'] = 'CYCLIST' |
| applied_filters.append("Person: Cyclist") |
| elif 'occupant' in search_lower or 'driver' in search_lower: |
| filters['person_type'] = 'OCCUPANT' |
| applied_filters.append("Person: Occupant") |
|
|
| |
| if 'fatal' in search_lower or 'death' in search_lower or 'killed' in search_lower: |
| filters['injury'] = 'KILLED' |
| applied_filters.append("Injury: Fatal") |
| elif 'injured' in search_lower or 'injury' in search_lower: |
| filters['injury'] = 'INJURED' |
| applied_filters.append("Injury: Injured") |
|
|
| |
| if 'male' in search_lower and 'female' not in search_lower: |
| filters['gender'] = 'M' |
| applied_filters.append("Gender: Male") |
| elif 'female' in search_lower: |
| filters['gender'] = 'F' |
| applied_filters.append("Gender: Female") |
|
|
| return filters, applied_filters |
|
|
|
|
| def generate_report(borough, year, month, dow, hour_min, hour_max, vehicle, person_type, |
| person_injury, gender, safety, c1_x, c1_y, c3_x, c3_y, c3_top, |
| c4_x, c4_y, compare_cat): |
| """Generate all visualizations based on filters""" |
|
|
| |
| filtered_df = df.copy() |
| if borough != 'All': |
| filtered_df = filtered_df[filtered_df['BOROUGH'] == borough] |
| if year != 'All': |
| filtered_df = filtered_df[filtered_df['CRASH_YEAR'] == year] |
| if month != 'All': |
| filtered_df = filtered_df[filtered_df['CRASH_MONTH'] == month] |
| if dow: |
| filtered_df = filtered_df[filtered_df['CRASH_DAYOFWEEK'].isin(dow)] |
| filtered_df = filtered_df[(filtered_df['CRASH_HOUR'] >= hour_min) & |
| (filtered_df['CRASH_HOUR'] <= hour_max)] |
| if vehicle != 'All': |
| filtered_df = filtered_df[filtered_df['VEHICLE TYPE CODE 1'] == vehicle] |
| if person_type != 'All': |
| filtered_df = filtered_df[filtered_df['PERSON_TYPE'] == person_type] |
| if person_injury != 'All': |
| filtered_df = filtered_df[filtered_df['PERSON_INJURY'] == person_injury] |
| if gender != 'All': |
| filtered_df = filtered_df[filtered_df['PERSON_SEX'] == gender] |
| if safety != 'All': |
| filtered_df = filtered_df[filtered_df['SAFETY_EQUIPMENT'] == safety] |
|
|
| if len(filtered_df) == 0: |
| empty_fig = go.Figure() |
| empty_fig.add_annotation(text="No data found. Adjust filters.", xref="paper", yref="paper", |
| x=0.5, y=0.5, showarrow=False, font=dict(size=16, color="gray")) |
| return ("No data found", empty_fig, "", empty_fig, "", empty_fig, "", empty_fig, "", |
| empty_fig, "", empty_fig, "", empty_fig, "", empty_fig, "", empty_fig, "") |
|
|
| |
| total_records = len(filtered_df) |
| total_injuries = int(filtered_df['NUMBER OF PERSONS INJURED'].sum()) |
| total_fatalities = int(filtered_df['NUMBER OF PERSONS KILLED'].sum()) |
| injury_rate = (total_injuries / total_records * 100) if total_records > 0 else 0 |
| fatality_rate = (total_fatalities / total_records * 100) if total_records > 0 else 0 |
|
|
| summary_text = f""" |
| ## π Summary Statistics |
| |
| | Metric | Value | |
| |--------|-------| |
| | **Total Records** | {total_records:,} | |
| | **Total Injuries** | {total_injuries:,} ({injury_rate:.2f}%) | |
| | **Total Fatalities** | {total_fatalities:,} ({fatality_rate:.2f}%) | |
| | **Pedestrian Injuries** | {int(filtered_df['NUMBER OF PEDESTRIANS INJURED'].sum()):,} | |
| | **Cyclist Injuries** | {int(filtered_df['NUMBER OF CYCLIST INJURED'].sum()):,} | |
| | **Motorist Injuries** | {int(filtered_df['NUMBER OF MOTORIST INJURED'].sum()):,} | |
| | **Unique Crashes** | {len(filtered_df['COLLISION_ID'].unique()):,} | |
| | **Avg Persons/Crash** | {(total_records / len(filtered_df['COLLISION_ID'].unique())):.1f} | |
| """ |
|
|
| |
| if c1_y == 'count': |
| chart1_data = filtered_df.groupby(c1_x).size().reset_index(name='count') |
| y_label = 'Number of Records' |
| else: |
| chart1_data = filtered_df.groupby(c1_x)[c1_y].sum().reset_index() |
| y_label = c1_y |
|
|
| fig1 = px.line(chart1_data, x=c1_x, y=chart1_data.columns[1], |
| labels={chart1_data.columns[1]: y_label, c1_x: c1_x}, |
| title='Trend Analysis') |
| fig1.update_traces(line_color='#3498db', line_width=3) |
| fig1.update_layout(template='plotly_white', height=400) |
|
|
| |
| max_val = chart1_data[chart1_data.columns[1]].max() |
| min_val = chart1_data[chart1_data.columns[1]].min() |
| max_cat = chart1_data.loc[chart1_data[chart1_data.columns[1]].idxmax(), c1_x] |
| min_cat = chart1_data.loc[chart1_data[chart1_data.columns[1]].idxmin(), c1_x] |
| insight1 = f"π **Insight:** Peak at {max_cat} ({max_val:,.0f}), lowest at {min_cat} ({min_val:,.0f})" |
|
|
| |
| person_type_data = filtered_df['PERSON_TYPE'].value_counts() |
| fig2 = px.pie(values=person_type_data.values, names=person_type_data.index, |
| title='Person Type Distribution', |
| color_discrete_sequence=['#2ecc71', '#f39c12', '#e74c3c', '#3498db']) |
| fig2.update_layout(height=400) |
|
|
| |
| most_common = person_type_data.idxmax() |
| pct = (person_type_data.max() / person_type_data.sum() * 100) |
| insight2 = f"π₯§ **Insight:** Most common person type: {most_common} ({pct:.1f}% of records)" |
|
|
| |
| if c3_y == 'count': |
| chart3_data = filtered_df[c3_x].value_counts().head(int(c3_top)) |
| y_label = 'Number of Records' |
| else: |
| chart3_data = filtered_df.groupby(c3_x)[c3_y].sum().sort_values(ascending=False).head(int(c3_top)) |
| y_label = c3_y |
|
|
| fig3 = px.bar(x=chart3_data.index, y=chart3_data.values, |
| labels={'x': c3_x, 'y': y_label}, |
| title=f'Categorical Analysis - Top {int(c3_top)}') |
| fig3.update_traces(marker_color='#3498db') |
| fig3.update_layout(template='plotly_white', height=400) |
|
|
| |
| max_cat3 = chart3_data.idxmax() |
| min_cat3 = chart3_data.idxmin() |
| insight3 = f"π **Insight:** Highest: {max_cat3} ({chart3_data.max():,.0f}), Lowest: {min_cat3} ({chart3_data.min():,.0f})" |
|
|
| |
| if c4_y == 'count': |
| chart4_data = filtered_df[c4_x].value_counts().sort_index() |
| y_label = 'Number of Records' |
| else: |
| chart4_data = filtered_df.groupby(c4_x)[c4_y].sum().sort_index() |
| y_label = c4_y |
|
|
| fig4 = px.bar(x=chart4_data.index, y=chart4_data.values, |
| labels={'x': c4_x, 'y': y_label}, |
| title='Time Distribution') |
| fig4.update_traces(marker_color='#e67e22') |
| fig4.update_layout(template='plotly_white', height=400) |
|
|
| |
| max_cat4 = chart4_data.idxmax() |
| min_cat4 = chart4_data.idxmin() |
| insight4 = f"β° **Insight:** Peak time: {max_cat4} ({chart4_data.max():,.0f}), Quietest: {min_cat4} ({chart4_data.min():,.0f})" |
|
|
| |
| factor1_data = filtered_df['CONTRIBUTING FACTOR VEHICLE 1'].value_counts().head(15) |
| factor1_data = factor1_data[factor1_data.index != 'UNSPECIFIED'] |
|
|
| fig5 = px.bar(x=factor1_data.index, y=factor1_data.values, |
| labels={'x': 'Contributing Factor', 'y': 'Number of Crashes'}, |
| title='Top Contributing Factors (Vehicle 1)') |
| fig5.update_traces(marker_color='#e74c3c') |
| fig5.update_layout(template='plotly_white', height=400, xaxis={'tickangle': -45}) |
|
|
| |
| top_factor1 = factor1_data.idxmax() if len(factor1_data) > 0 else "N/A" |
| top_factor1_pct = (factor1_data.max() / len(filtered_df) * 100) if len(factor1_data) > 0 else 0 |
| insight5 = f"π¨ **Insight:** Top cause: {top_factor1} ({factor1_data.max():,} crashes, {top_factor1_pct:.1f}%)" |
|
|
| |
| factor2_data = filtered_df['CONTRIBUTING FACTOR VEHICLE 2'].value_counts().head(15) |
| factor2_data = factor2_data[~factor2_data.index.isin(['UNSPECIFIED', 'NO SECOND VEHICLE'])] |
|
|
| if len(factor2_data) > 0: |
| fig6 = px.bar(x=factor2_data.index, y=factor2_data.values, |
| labels={'x': 'Secondary Contributing Factor', 'y': 'Number of Crashes'}, |
| title='Top Contributing Factors (Vehicle 2)') |
| fig6.update_traces(marker_color='#f39c12') |
| fig6.update_layout(template='plotly_white', height=400, xaxis={'tickangle': -45}) |
| |
| |
| top_factor2 = factor2_data.idxmax() |
| top_factor2_pct = (factor2_data.max() / len(filtered_df) * 100) |
| insight6 = f"π¨ **Insight:** Top secondary cause: {top_factor2} ({factor2_data.max():,} crashes, {top_factor2_pct:.1f}%)" |
| else: |
| fig6 = go.Figure() |
| fig6.add_annotation(text="No secondary factors", xref="paper", yref="paper", |
| x=0.5, y=0.5, showarrow=False) |
| fig6.update_layout(height=400, title='Top Contributing Factors (Vehicle 2)') |
| insight6 = "βΉοΈ **Note:** Most crashes involve only one vehicle or have unspecified secondary factors" |
|
|
| |
| compare_data = filtered_df.groupby(compare_cat).agg({ |
| 'COLLISION_ID': 'count', |
| 'NUMBER OF PERSONS INJURED': 'sum', |
| 'NUMBER OF PERSONS KILLED': 'sum' |
| }).reset_index() |
| compare_data.columns = [compare_cat, 'Total_Records', 'Total_Injuries', 'Total_Fatalities'] |
| compare_data['Injury_Rate'] = (compare_data['Total_Injuries'] / compare_data['Total_Records'] * 100) |
| compare_data['Fatality_Rate'] = (compare_data['Total_Fatalities'] / compare_data['Total_Records'] * 100) |
| |
| |
| if compare_cat == 'CRASH_DAYOFWEEK': |
| day_mapping = { |
| 0: 'Monday', |
| 1: 'Tuesday', |
| 2: 'Wednesday', |
| 3: 'Thursday', |
| 4: 'Friday', |
| 5: 'Saturday', |
| 6: 'Sunday' |
| } |
| compare_data[compare_cat] = compare_data[compare_cat].map(day_mapping) |
| |
| day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'] |
| compare_data[compare_cat] = pd.Categorical(compare_data[compare_cat], categories=day_order, ordered=True) |
| compare_data = compare_data.sort_values(compare_cat) |
| else: |
| compare_data = compare_data.sort_values('Injury_Rate', ascending=False).head(15) |
| |
| fig7 = go.Figure() |
| fig7.add_trace(go.Bar(x=compare_data[compare_cat], y=compare_data['Injury_Rate'], |
| name='Injury Rate (%)', marker_color='#f39c12')) |
| fig7.add_trace(go.Bar(x=compare_data[compare_cat], y=compare_data['Fatality_Rate'], |
| name='Fatality Rate (%)', marker_color='#e74c3c')) |
| fig7.update_layout(barmode='group', template='plotly_white', height=400, |
| title='Injury Rate Comparison', |
| xaxis_title=compare_cat, yaxis_title='Rate (%)') |
|
|
| |
| highest_injury = compare_data.loc[compare_data['Injury_Rate'].idxmax()] |
| highest_fatal = compare_data.loc[compare_data['Fatality_Rate'].idxmax()] |
| insight7 = f"β οΈ **Insight:** Highest injury rate: {highest_injury[compare_cat]} ({highest_injury['Injury_Rate']:.2f}%), Highest fatality rate: {highest_fatal[compare_cat]} ({highest_fatal['Fatality_Rate']:.2f}%)" |
|
|
| |
| heatmap_data = filtered_df.groupby(['CRASH_DAYOFWEEK', 'CRASH_HOUR']).size().reset_index(name='count') |
| if len(heatmap_data) > 0: |
| heatmap_pivot = heatmap_data.pivot(index='CRASH_DAYOFWEEK', columns='CRASH_HOUR', values='count') |
| fig8 = go.Figure(data=go.Heatmap( |
| z=heatmap_pivot.values, |
| x=heatmap_pivot.columns, |
| y=['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'], |
| colorscale='YlOrRd' |
| )) |
| fig8.update_layout(xaxis_title='Hour of Day', yaxis_title='Day of Week', |
| title='Day Γ Hour Heatmap', template='plotly_white', height=500) |
| |
| |
| max_day = heatmap_pivot.sum(axis=1).idxmax() |
| max_hour = heatmap_pivot.sum(axis=0).idxmax() |
| day_names = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] |
| insight8 = f"ποΈ **Insight:** Busiest day: {day_names[max_day]}, Busiest hour: {max_hour}:00" |
| else: |
| fig8 = go.Figure() |
| fig8.update_layout(height=500, title='Day Γ Hour Heatmap') |
| insight8 = "" |
|
|
| |
| map_sample = filtered_df[(filtered_df['LATITUDE'].notna()) & |
| (filtered_df['LATITUDE'] != 0) & |
| (filtered_df['LATITUDE'] > 40) & |
| (filtered_df['LATITUDE'] < 41)] |
|
|
| if len(map_sample) > 0: |
| |
| map_sample = map_sample.sample(n=min(3000, len(map_sample)), random_state=42) |
|
|
| |
| def categorize_severity(row): |
| if row['NUMBER OF PERSONS KILLED'] > 0: |
| return 'Fatal' |
| elif row['NUMBER OF PERSONS INJURED'] > 0: |
| return 'Injury' |
| else: |
| return 'Property Damage Only' |
|
|
| map_sample['SEVERITY_CATEGORY'] = map_sample.apply(categorize_severity, axis=1) |
|
|
| |
| color_map = { |
| 'Fatal': '#e74c3c', |
| 'Injury': '#f39c12', |
| 'Property Damage Only': '#9d7aff' |
| } |
|
|
| fig9 = px.scatter_map( |
| map_sample, |
| lat='LATITUDE', |
| lon='LONGITUDE', |
| color='SEVERITY_CATEGORY', |
| color_discrete_map=color_map, |
| title=f'Geographic Distribution (Sample of {len(map_sample):,} locations)', |
| zoom=10, |
| height=600, |
| hover_data={ |
| 'LATITUDE': False, |
| 'LONGITUDE': False, |
| 'SEVERITY_CATEGORY': True, |
| 'NUMBER OF PERSONS INJURED': True, |
| 'NUMBER OF PERSONS KILLED': True, |
| 'BOROUGH': True, |
| 'VEHICLE TYPE CODE 1': True |
| } |
| ) |
| fig9.update_layout(map_style="open-street-map") |
| |
| |
| severity_counts = map_sample['SEVERITY_CATEGORY'].value_counts() |
| top_severity = severity_counts.idxmax() if len(severity_counts) > 0 else "N/A" |
| insight9 = f"πΊοΈ **Insight:** Showing {len(map_sample):,} locations, most common severity: {top_severity}" |
| else: |
| fig9 = go.Figure() |
| fig9.add_annotation( |
| text="No location data available", |
| xref="paper", yref="paper", |
| x=0.5, y=0.5, showarrow=False, |
| font=dict(size=20, color="gray") |
| ) |
| fig9.update_layout(height=600, title='Geographic Distribution') |
| insight9 = "" |
|
|
| return (summary_text, fig1, insight1, fig2, insight2, fig3, insight3, fig4, insight4, |
| fig5, insight5, fig6, insight6, fig7, insight7, fig8, insight8, fig9, insight9) |
|
|
|
|
| def apply_smart_search(search_text): |
| """Apply smart search and return filter values""" |
| result = smart_search_parser(search_text) |
| if result is None: |
| return ['All'] * 11 + ["β οΈ No filters detected. Try: 'Brooklyn 2022 pedestrian crashes'"] |
|
|
| filters, applied = result |
| feedback = "β
Filters Applied: " + ", ".join(applied) + "\n\nClick 'Generate Report' to see results." |
|
|
| return ( |
| filters.get('borough', 'All'), |
| filters.get('year', 'All'), |
| filters.get('month', 'All'), |
| filters.get('dow', []), |
| filters.get('hour_range', (0, 23))[0], |
| filters.get('hour_range', (0, 23))[1], |
| filters.get('vehicle', 'All'), |
| filters.get('person_type', 'All'), |
| filters.get('injury', 'All'), |
| filters.get('gender', 'All'), |
| filters.get('safety', 'All'), |
| feedback |
| ) |
|
|
|
|
| |
| with gr.Blocks(title="NYC Motor Vehicle Crashes Dashboard") as demo: |
| gr.Markdown("# π NYC Motor Vehicle Crashes Dashboard - Enhanced Analytics") |
| gr.Markdown("### Comprehensive analysis with 5.7M+ crash records") |
|
|
| with gr.Accordion("π Smart Search", open=True): |
| gr.Markdown("**Type natural language queries** like: `Brooklyn 2022 pedestrian crashes` or `Manhattan weekend taxi injured`") |
| with gr.Row(): |
| search_input = gr.Textbox(label="Search Query", |
| placeholder="e.g., Queens Friday night motorcycle fatalities...", |
| scale=3) |
| search_btn = gr.Button("π Apply Smart Search", variant="primary", scale=1) |
| clear_search_btn = gr.Button("β Clear", variant="stop", scale=1) |
| search_feedback = gr.Markdown(visible=True) |
|
|
| with gr.Row(): |
| with gr.Column(scale=1): |
| gr.Markdown("### ποΈ Filters") |
| borough = gr.Dropdown(choices=boroughs, value='All', label="Borough") |
| year = gr.Dropdown(choices=years, value='All', label="Year") |
| month = gr.Dropdown(choices=months, value='All', label="Month") |
| dow = gr.CheckboxGroup( |
| choices=[('Mon', 0), ('Tue', 1), ('Wed', 2), ('Thu', 3), |
| ('Fri', 4), ('Sat', 5), ('Sun', 6)], |
| label="Day of Week", type="value" |
| ) |
| with gr.Row(): |
| hour_min = gr.Slider(minimum=0, maximum=23, value=0, step=1, label="Hour Min") |
| hour_max = gr.Slider(minimum=0, maximum=23, value=23, step=1, label="Hour Max") |
| vehicle = gr.Dropdown(choices=vehicles, value='All', label="Vehicle Type 1") |
| person_type = gr.Dropdown(choices=person_types, value='All', label="Person Type") |
| person_injury = gr.Dropdown(choices=injury_types, value='All', label="Person Injury") |
| gender = gr.Dropdown(choices=genders, value='All', label="Gender") |
| safety = gr.Dropdown(choices=safety_equip, value='All', label="Safety Equipment") |
|
|
| with gr.Column(scale=1): |
| gr.Markdown("### βοΈ Chart Settings") |
| c1_x = gr.Dropdown(choices=TEMPORAL_COLS, value='CRASH_YEAR', label="Chart 1 X-axis (Trend)") |
| c1_y = gr.Dropdown(choices=['count'] + NUMERIC_COLS, value='count', label="Chart 1 Y-axis") |
| c3_x = gr.Dropdown(choices=CATEGORICAL_COLS, value='BOROUGH', label="Chart 3 Category") |
| c3_y = gr.Dropdown(choices=['count'] + NUMERIC_COLS, value='count', label="Chart 3 Y-axis") |
| c3_top = gr.Slider(minimum=5, maximum=20, value=10, step=1, label="Chart 3 Top N") |
| c4_x = gr.Dropdown(choices=TEMPORAL_COLS, value='CRASH_HOUR', label="Chart 4 X-axis (Time)") |
| c4_y = gr.Dropdown(choices=['count'] + NUMERIC_COLS, value='count', label="Chart 4 Y-axis") |
| compare_cat = gr.Dropdown( |
| choices=['BOROUGH', 'VEHICLE TYPE CODE 1', 'PERSON_TYPE', |
| 'SAFETY_EQUIPMENT', 'CRASH_HOUR', 'CRASH_DAYOFWEEK','CRASH_MONTH','CRASH_YEAR','POSITION_IN_VEHICLE','PERSON_SEX'], |
| value='BOROUGH', label="Comparison Category" |
| ) |
|
|
| with gr.Row(): |
| generate_btn = gr.Button("π Generate Report", variant="primary", size="lg", scale=2) |
| reset_btn = gr.Button("π Reset All Filters", variant="secondary", size="lg", scale=1) |
|
|
| |
| summary_output = gr.Markdown(label="Summary Statistics") |
| |
| with gr.Row(): |
| with gr.Column(): |
| chart1_output = gr.Plot(label="Chart 1: Trend Analysis") |
| insight1_output = gr.Markdown(label="Insight") |
| with gr.Column(): |
| chart2_output = gr.Plot(label="Chart 2: Person Type Distribution") |
| insight2_output = gr.Markdown(label="Insight") |
| |
| with gr.Row(): |
| with gr.Column(): |
| chart3_output = gr.Plot(label="Chart 3: Categorical Analysis") |
| insight3_output = gr.Markdown(label="Insight") |
| with gr.Column(): |
| chart4_output = gr.Plot(label="Chart 4: Time Distribution") |
| insight4_output = gr.Markdown(label="Insight") |
| |
| with gr.Row(): |
| with gr.Column(): |
| chart5_output = gr.Plot(label="Chart 5: Contributing Factor 1") |
| insight5_output = gr.Markdown(label="Insight") |
| with gr.Column(): |
| chart6_output = gr.Plot(label="Chart 6: Contributing Factor 2") |
| insight6_output = gr.Markdown(label="Insight") |
| |
| chart7_output = gr.Plot(label="Chart 7: Injury Rate Comparison") |
| insight7_output = gr.Markdown(label="Insight") |
| |
| chart8_output = gr.Plot(label="Chart 8: Day Γ Hour Heatmap") |
| insight8_output = gr.Markdown(label="Insight") |
| |
| chart9_output = gr.Plot(label="Chart 9: Geographic Distribution Map") |
| insight9_output = gr.Markdown(label="Insight") |
|
|
| |
| generate_btn.click( |
| fn=generate_report, |
| inputs=[borough, year, month, dow, hour_min, hour_max, vehicle, person_type, |
| person_injury, gender, safety, c1_x, c1_y, c3_x, c3_y, c3_top, |
| c4_x, c4_y, compare_cat], |
| outputs=[summary_output, |
| chart1_output, insight1_output, |
| chart2_output, insight2_output, |
| chart3_output, insight3_output, |
| chart4_output, insight4_output, |
| chart5_output, insight5_output, |
| chart6_output, insight6_output, |
| chart7_output, insight7_output, |
| chart8_output, insight8_output, |
| chart9_output, insight9_output] |
| ) |
|
|
| def reset_all(): |
| return ('All', 'All', 'All', [], 0, 23, 'All', 'All', 'All', 'All', 'All', '') |
|
|
| reset_btn.click( |
| fn=reset_all, |
| outputs=[borough, year, month, dow, hour_min, hour_max, vehicle, person_type, |
| person_injury, gender, safety, search_feedback] |
| ) |
|
|
| search_btn.click( |
| fn=apply_smart_search, |
| inputs=[search_input], |
| outputs=[borough, year, month, dow, hour_min, hour_max, vehicle, person_type, |
| person_injury, gender, safety, search_feedback] |
| ) |
|
|
| clear_search_btn.click( |
| fn=lambda: ('', ''), |
| outputs=[search_input, search_feedback] |
| ) |
|
|
| if __name__ == "__main__": |
| demo.launch(share=False) |