import streamlit as st import pandas as pd import plotly.express as px import plotly.graph_objects as go from datetime import datetime import os st.set_page_config( page_title="Urban Traffic Flow Dashboard", page_icon="🚗", layout="wide", initial_sidebar_state="expanded", ) @st.cache_data def load_data(): script_dir = os.path.dirname(os.path.abspath(__file__)) csv_path = os.path.join(script_dir, "urban_traffic_flow_with_target.csv") df = pd.read_csv(csv_path) df["Timestamp"] = pd.to_datetime(df["Timestamp"]) df["Hour"] = df["Timestamp"].dt.hour df["DayOfWeek"] = df["Timestamp"].dt.day_name() df["Date"] = df["Timestamp"].dt.date df["IsWeekend"] = df["DayOfWeek"].isin(["Saturday", "Sunday"]) return df def main(): st.title("🚗 Urban Traffic Flow Dashboard") st.markdown( "Explore urban traffic patterns, congestion levels, and temporal trends" ) df = load_data() with st.sidebar: st.header("🔍 Filters") min_date = df["Timestamp"].min().date() max_date = df["Timestamp"].max().date() date_range = st.date_input( "Date Range", value=(min_date, max_date), min_value=min_date, max_value=max_date, ) selected_locations = st.multiselect( "Select Locations", options=sorted(df["Location"].unique()), default=sorted(df["Location"].unique()), ) peak_filter = st.multiselect( "Peak/Off-Peak", options=sorted(df["Peak_Off_Peak"].unique()), default=sorted(df["Peak_Off_Peak"].unique()), ) day_filter = st.multiselect( "Day of Week", options=sorted(df["DayOfWeek"].unique()), default=sorted(df["DayOfWeek"].unique()), ) congestion_filter = st.slider( "Min Congestion Level", min_value=0, max_value=5, value=0, step=1 ) filtered_df = df.copy() if len(date_range) == 2: start_date, end_date = date_range filtered_df = filtered_df[ (filtered_df["Timestamp"].dt.date >= start_date) & (filtered_df["Timestamp"].dt.date <= end_date) ] if selected_locations: filtered_df = filtered_df[filtered_df["Location"].isin(selected_locations)] if peak_filter: filtered_df = filtered_df[filtered_df["Peak_Off_Peak"].isin(peak_filter)] if day_filter: filtered_df = filtered_df[filtered_df["DayOfWeek"].isin(day_filter)] filtered_df = filtered_df[filtered_df["Congestion_Level"] >= congestion_filter] st.subheader("📊 Key Performance Indicators") kpi_col1, kpi_col2, kpi_col3, kpi_col4 = st.columns(4) with kpi_col1: st.metric( "Total Vehicle Count", f"{filtered_df['Vehicle_Count'].sum():,.0f}", help="Total number of vehicles recorded", ) with kpi_col2: st.metric( "Avg Vehicle Speed", f"{filtered_df['Vehicle_Speed'].mean():.1f} km/h", help="Average speed across all locations", ) with kpi_col3: st.metric( "Avg Congestion Level", f"{filtered_df['Congestion_Level'].mean():.1f}", help="Average congestion level (0-5 scale)", ) with kpi_col4: st.metric( "Peak Hours Ratio", f"{(filtered_df['Peak_Off_Peak'] == 'Peak').sum() / len(filtered_df) * 100:.1f}%", help="Percentage of peak hour observations", ) st.markdown("---") tab1, tab2, tab3, tab4 = st.tabs( [ "📈 Temporal Trends", "📍 Location Analysis", "📊 Distribution", "🏆 Top Zones", ] ) with tab1: st.subheader("Hourly Traffic Patterns") hourly_avg = ( filtered_df.groupby("Hour") .agg( { "Vehicle_Count": "mean", "Vehicle_Speed": "mean", "Congestion_Level": "mean", } ) .reset_index() ) fig_hourly = go.Figure() fig_hourly.add_trace( go.Scatter( x=hourly_avg["Hour"], y=hourly_avg["Vehicle_Count"], mode="lines+markers", name="Avg Vehicle Count", line=dict(color="#1f77b4", width=3), yaxis="y", ) ) fig_hourly.add_trace( go.Scatter( x=hourly_avg["Hour"], y=hourly_avg["Vehicle_Speed"], mode="lines+markers", name="Avg Speed (km/h)", line=dict(color="#2ca02c", width=3), yaxis="y2", ) ) fig_hourly.update_layout( title="Average Traffic by Hour of Day", xaxis_title="Hour", yaxis_title="Vehicle Count", yaxis2=dict(title="Speed (km/h)", overlaying="y", side="right"), hovermode="x unified", template="plotly_white", height=500, ) st.plotly_chart(fig_hourly, use_container_width=True) st.subheader("Traffic Evolution Over Time") time_series = ( filtered_df.groupby(["Timestamp", "Location"]) .agg({"Vehicle_Count": "sum", "Congestion_Level": "mean"}) .reset_index() ) fig_ts = px.line( time_series, x="Timestamp", y="Vehicle_Count", color="Location", title="Traffic Volume Over Time by Location", labels={"Vehicle_Count": "Vehicle Count", "Timestamp": "Time"}, ) fig_ts.update_layout(hovermode="x unified", template="plotly_white", height=500) st.plotly_chart(fig_ts, use_container_width=True) with tab2: col1, col2 = st.columns(2) with col1: st.subheader("Traffic by Location") location_stats = ( filtered_df.groupby("Location") .agg( { "Vehicle_Count": "sum", "Vehicle_Speed": "mean", "Congestion_Level": "mean", } ) .reset_index() ) fig_loc = px.bar( location_stats, x="Location", y="Vehicle_Count", title="Total Vehicle Count by Location", color="Vehicle_Count", color_continuous_scale="Blues", labels={"Vehicle_Count": "Total Count"}, ) fig_loc.update_layout(template="plotly_white", height=400) st.plotly_chart(fig_loc, use_container_width=True) with col2: st.subheader("Avg Speed by Location") fig_speed = px.bar( location_stats, x="Location", y="Vehicle_Speed", title="Average Speed by Location", color="Vehicle_Speed", color_continuous_scale="RdYlGn", labels={"Vehicle_Speed": "Speed (km/h)"}, ) fig_speed.update_layout(template="plotly_white", height=400) st.plotly_chart(fig_speed, use_container_width=True) st.subheader("Congestion Heatmap: Hour vs Location") heatmap_data = filtered_df.pivot_table( values="Congestion_Level", index="Hour", columns="Location", aggfunc="mean" ) fig_heatmap = px.imshow( heatmap_data, labels=dict(x="Location", y="Hour", color="Avg Congestion Level"), title="Average Congestion Level by Hour and Location", color_continuous_scale="RdYlGn_r", aspect="auto", ) fig_heatmap.update_layout(template="plotly_white", height=500) st.plotly_chart(fig_heatmap, use_container_width=True) with tab3: col1, col2 = st.columns(2) with col1: st.subheader("Vehicle Count Distribution") fig_count_box = px.box( filtered_df, x="Location", y="Vehicle_Count", title="Vehicle Count Distribution by Location", color="Location", ) fig_count_box.update_layout( template="plotly_white", height=400, showlegend=False ) st.plotly_chart(fig_count_box, use_container_width=True) with col2: st.subheader("Speed Distribution") fig_speed_box = px.box( filtered_df, x="Location", y="Vehicle_Speed", title="Speed Distribution by Location", color="Location", ) fig_speed_box.update_layout( template="plotly_white", height=400, showlegend=False ) st.plotly_chart(fig_speed_box, use_container_width=True) st.subheader("Congestion Level Distribution") congestion_dist = ( filtered_df["Congestion_Level"].value_counts().sort_index().reset_index() ) congestion_dist.columns = ["Congestion_Level", "Count"] fig_congestion = px.bar( congestion_dist, x="Congestion_Level", y="Count", title="Distribution of Congestion Levels", color="Congestion_Level", color_continuous_scale="Reds", labels={ "Count": "Number of Records", "Congestion_Level": "Congestion Level", }, ) fig_congestion.update_layout(template="plotly_white", height=400) st.plotly_chart(fig_congestion, use_container_width=True) st.subheader("Congestion by Peak/Off-Peak") fig_peak = px.box( filtered_df, x="Peak_Off_Peak", y="Congestion_Level", title="Congestion Level: Peak vs Off-Peak", color="Peak_Off_Peak", ) fig_peak.update_layout(template="plotly_white", height=400, showlegend=False) st.plotly_chart(fig_peak, use_container_width=True) with tab4: st.subheader("Most Congested Locations") location_congestion = ( filtered_df.groupby("Location") .agg( { "Congestion_Level": "mean", "Vehicle_Count": "mean", "Vehicle_Speed": "mean", } ) .round(2) .reset_index() ) location_congestion = location_congestion.sort_values( "Congestion_Level", ascending=True ) st.dataframe(location_congestion, use_container_width=True, hide_index=True) st.subheader("Top 5 Busiest Locations") top_locations = ( filtered_df.groupby("Location")["Vehicle_Count"] .sum() .sort_values(ascending=False) .head(5) .reset_index() ) fig_top = px.bar( top_locations, x="Vehicle_Count", y="Location", orientation="h", title="Top 5 Locations by Total Traffic Volume", color="Vehicle_Count", color_continuous_scale="Blues", ) fig_top.update_layout( template="plotly_white", height=400, yaxis={"categoryorder": "total ascending"}, ) st.plotly_chart(fig_top, use_container_width=True) st.subheader("Slowest Locations (Lowest Avg Speed)") slowest_locations = ( filtered_df.groupby("Location")["Vehicle_Speed"] .mean() .sort_values() .head(5) .reset_index() ) fig_slow = px.bar( slowest_locations, x="Vehicle_Speed", y="Location", orientation="h", title="Top 5 Slowest Locations", color="Vehicle_Speed", color_continuous_scale="Reds_r", ) fig_slow.update_layout( template="plotly_white", height=400, yaxis={"categoryorder": "total ascending"}, ) st.plotly_chart(fig_slow, use_container_width=True) st.markdown("---") st.subheader("💡 Automatic Insights") insights = [] if len(filtered_df) > 0: peak_hour = filtered_df.groupby("Hour")["Vehicle_Count"].mean().idxmax() insights.append( f"🕐 **Peak traffic hour**: {peak_hour}:00 - {peak_hour + 1}:00 with avg {filtered_df.groupby('Hour')['Vehicle_Count'].mean().max():.0f} vehicles" ) busiest_loc = filtered_df.groupby("Location")["Vehicle_Count"].sum().idxmax() busiest_count = filtered_df.groupby("Location")["Vehicle_Count"].sum().max() insights.append( f"📍 **Busiest location**: {busiest_loc} with {busiest_count:,.0f} total vehicles" ) avg_congestion = filtered_df["Congestion_Level"].mean() if avg_congestion < 2: congestion_status = "Low" elif avg_congestion < 4: congestion_status = "Moderate" else: congestion_status = "High" insights.append( f"🚦 **Overall congestion**: {congestion_status} (avg level: {avg_congestion:.1f}/5)" ) weekday_avg = filtered_df[~filtered_df["IsWeekend"]]["Vehicle_Count"].mean() weekend_avg = filtered_df[filtered_df["IsWeekend"]]["Vehicle_Count"].mean() diff_pct = ( ((weekday_avg - weekend_avg) / weekend_avg * 100) if weekend_avg > 0 else 0 ) insights.append( f"📅 **Weekday vs Weekend**: Weekdays have {abs(diff_pct):.1f}% {'more' if diff_pct > 0 else 'less'} traffic on average" ) peak_vs_offpeak_peak = filtered_df[filtered_df["Peak_Off_Peak"] == "Peak"][ "Congestion_Level" ].mean() peak_vs_offpeak_off = filtered_df[filtered_df["Peak_Off_Peak"] == "Off-Peak"][ "Congestion_Level" ].mean() insights.append( f"⏰ **Peak hours**: Congestion is {(peak_vs_offpeak_peak - peak_vs_offpeak_off):.1f} levels higher during peak hours" ) for insight in insights: st.markdown(f"- {insight}") if __name__ == "__main__": main()