Spaces:

Docfile
/

lm

Sleeping

File size: 14,608 Bytes

import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime
import os

st.set_page_config(
    page_title="Urban Traffic Flow Dashboard",
    page_icon="🚗",
    layout="wide",
    initial_sidebar_state="expanded",
)


@st.cache_data
def load_data():
    script_dir = os.path.dirname(os.path.abspath(__file__))
    csv_path = os.path.join(script_dir, "urban_traffic_flow_with_target.csv")
    df = pd.read_csv(csv_path)
    df["Timestamp"] = pd.to_datetime(df["Timestamp"])
    df["Hour"] = df["Timestamp"].dt.hour
    df["DayOfWeek"] = df["Timestamp"].dt.day_name()
    df["Date"] = df["Timestamp"].dt.date
    df["IsWeekend"] = df["DayOfWeek"].isin(["Saturday", "Sunday"])
    return df


def main():
    st.title("🚗 Urban Traffic Flow Dashboard")
    st.markdown(
        "Explore urban traffic patterns, congestion levels, and temporal trends"
    )

    df = load_data()

    with st.sidebar:
        st.header("🔍 Filters")

        min_date = df["Timestamp"].min().date()
        max_date = df["Timestamp"].max().date()

        date_range = st.date_input(
            "Date Range",
            value=(min_date, max_date),
            min_value=min_date,
            max_value=max_date,
        )

        selected_locations = st.multiselect(
            "Select Locations",
            options=sorted(df["Location"].unique()),
            default=sorted(df["Location"].unique()),
        )

        peak_filter = st.multiselect(
            "Peak/Off-Peak",
            options=sorted(df["Peak_Off_Peak"].unique()),
            default=sorted(df["Peak_Off_Peak"].unique()),
        )

        day_filter = st.multiselect(
            "Day of Week",
            options=sorted(df["DayOfWeek"].unique()),
            default=sorted(df["DayOfWeek"].unique()),
        )

        congestion_filter = st.slider(
            "Min Congestion Level", min_value=0, max_value=5, value=0, step=1
        )

    filtered_df = df.copy()

    if len(date_range) == 2:
        start_date, end_date = date_range
        filtered_df = filtered_df[
            (filtered_df["Timestamp"].dt.date >= start_date)
            & (filtered_df["Timestamp"].dt.date <= end_date)
        ]

    if selected_locations:
        filtered_df = filtered_df[filtered_df["Location"].isin(selected_locations)]

    if peak_filter:
        filtered_df = filtered_df[filtered_df["Peak_Off_Peak"].isin(peak_filter)]

    if day_filter:
        filtered_df = filtered_df[filtered_df["DayOfWeek"].isin(day_filter)]

    filtered_df = filtered_df[filtered_df["Congestion_Level"] >= congestion_filter]

    st.subheader("📊 Key Performance Indicators")

    kpi_col1, kpi_col2, kpi_col3, kpi_col4 = st.columns(4)

    with kpi_col1:
        st.metric(
            "Total Vehicle Count",
            f"{filtered_df['Vehicle_Count'].sum():,.0f}",
            help="Total number of vehicles recorded",
        )

    with kpi_col2:
        st.metric(
            "Avg Vehicle Speed",
            f"{filtered_df['Vehicle_Speed'].mean():.1f} km/h",
            help="Average speed across all locations",
        )

    with kpi_col3:
        st.metric(
            "Avg Congestion Level",
            f"{filtered_df['Congestion_Level'].mean():.1f}",
            help="Average congestion level (0-5 scale)",
        )

    with kpi_col4:
        st.metric(
            "Peak Hours Ratio",
            f"{(filtered_df['Peak_Off_Peak'] == 'Peak').sum() / len(filtered_df) * 100:.1f}%",
            help="Percentage of peak hour observations",
        )

    st.markdown("---")

    tab1, tab2, tab3, tab4 = st.tabs(
        [
            "📈 Temporal Trends",
            "📍 Location Analysis",
            "📊 Distribution",
            "🏆 Top Zones",
        ]
    )

    with tab1:
        st.subheader("Hourly Traffic Patterns")

        hourly_avg = (
            filtered_df.groupby("Hour")
            .agg(
                {
                    "Vehicle_Count": "mean",
                    "Vehicle_Speed": "mean",
                    "Congestion_Level": "mean",
                }
            )
            .reset_index()
        )

        fig_hourly = go.Figure()

        fig_hourly.add_trace(
            go.Scatter(
                x=hourly_avg["Hour"],
                y=hourly_avg["Vehicle_Count"],
                mode="lines+markers",
                name="Avg Vehicle Count",
                line=dict(color="#1f77b4", width=3),
                yaxis="y",
            )
        )

        fig_hourly.add_trace(
            go.Scatter(
                x=hourly_avg["Hour"],
                y=hourly_avg["Vehicle_Speed"],
                mode="lines+markers",
                name="Avg Speed (km/h)",
                line=dict(color="#2ca02c", width=3),
                yaxis="y2",
            )
        )

        fig_hourly.update_layout(
            title="Average Traffic by Hour of Day",
            xaxis_title="Hour",
            yaxis_title="Vehicle Count",
            yaxis2=dict(title="Speed (km/h)", overlaying="y", side="right"),
            hovermode="x unified",
            template="plotly_white",
            height=500,
        )

        st.plotly_chart(fig_hourly, use_container_width=True)

        st.subheader("Traffic Evolution Over Time")

        time_series = (
            filtered_df.groupby(["Timestamp", "Location"])
            .agg({"Vehicle_Count": "sum", "Congestion_Level": "mean"})
            .reset_index()
        )

        fig_ts = px.line(
            time_series,
            x="Timestamp",
            y="Vehicle_Count",
            color="Location",
            title="Traffic Volume Over Time by Location",
            labels={"Vehicle_Count": "Vehicle Count", "Timestamp": "Time"},
        )

        fig_ts.update_layout(hovermode="x unified", template="plotly_white", height=500)

        st.plotly_chart(fig_ts, use_container_width=True)

    with tab2:
        col1, col2 = st.columns(2)

        with col1:
            st.subheader("Traffic by Location")

            location_stats = (
                filtered_df.groupby("Location")
                .agg(
                    {
                        "Vehicle_Count": "sum",
                        "Vehicle_Speed": "mean",
                        "Congestion_Level": "mean",
                    }
                )
                .reset_index()
            )

            fig_loc = px.bar(
                location_stats,
                x="Location",
                y="Vehicle_Count",
                title="Total Vehicle Count by Location",
                color="Vehicle_Count",
                color_continuous_scale="Blues",
                labels={"Vehicle_Count": "Total Count"},
            )

            fig_loc.update_layout(template="plotly_white", height=400)

            st.plotly_chart(fig_loc, use_container_width=True)

        with col2:
            st.subheader("Avg Speed by Location")

            fig_speed = px.bar(
                location_stats,
                x="Location",
                y="Vehicle_Speed",
                title="Average Speed by Location",
                color="Vehicle_Speed",
                color_continuous_scale="RdYlGn",
                labels={"Vehicle_Speed": "Speed (km/h)"},
            )

            fig_speed.update_layout(template="plotly_white", height=400)

            st.plotly_chart(fig_speed, use_container_width=True)

        st.subheader("Congestion Heatmap: Hour vs Location")

        heatmap_data = filtered_df.pivot_table(
            values="Congestion_Level", index="Hour", columns="Location", aggfunc="mean"
        )

        fig_heatmap = px.imshow(
            heatmap_data,
            labels=dict(x="Location", y="Hour", color="Avg Congestion Level"),
            title="Average Congestion Level by Hour and Location",
            color_continuous_scale="RdYlGn_r",
            aspect="auto",
        )

        fig_heatmap.update_layout(template="plotly_white", height=500)

        st.plotly_chart(fig_heatmap, use_container_width=True)

    with tab3:
        col1, col2 = st.columns(2)

        with col1:
            st.subheader("Vehicle Count Distribution")

            fig_count_box = px.box(
                filtered_df,
                x="Location",
                y="Vehicle_Count",
                title="Vehicle Count Distribution by Location",
                color="Location",
            )

            fig_count_box.update_layout(
                template="plotly_white", height=400, showlegend=False
            )

            st.plotly_chart(fig_count_box, use_container_width=True)

        with col2:
            st.subheader("Speed Distribution")

            fig_speed_box = px.box(
                filtered_df,
                x="Location",
                y="Vehicle_Speed",
                title="Speed Distribution by Location",
                color="Location",
            )

            fig_speed_box.update_layout(
                template="plotly_white", height=400, showlegend=False
            )

            st.plotly_chart(fig_speed_box, use_container_width=True)

        st.subheader("Congestion Level Distribution")

        congestion_dist = (
            filtered_df["Congestion_Level"].value_counts().sort_index().reset_index()
        )
        congestion_dist.columns = ["Congestion_Level", "Count"]

        fig_congestion = px.bar(
            congestion_dist,
            x="Congestion_Level",
            y="Count",
            title="Distribution of Congestion Levels",
            color="Congestion_Level",
            color_continuous_scale="Reds",
            labels={
                "Count": "Number of Records",
                "Congestion_Level": "Congestion Level",
            },
        )

        fig_congestion.update_layout(template="plotly_white", height=400)

        st.plotly_chart(fig_congestion, use_container_width=True)

        st.subheader("Congestion by Peak/Off-Peak")

        fig_peak = px.box(
            filtered_df,
            x="Peak_Off_Peak",
            y="Congestion_Level",
            title="Congestion Level: Peak vs Off-Peak",
            color="Peak_Off_Peak",
        )

        fig_peak.update_layout(template="plotly_white", height=400, showlegend=False)

        st.plotly_chart(fig_peak, use_container_width=True)

    with tab4:
        st.subheader("Most Congested Locations")

        location_congestion = (
            filtered_df.groupby("Location")
            .agg(
                {
                    "Congestion_Level": "mean",
                    "Vehicle_Count": "mean",
                    "Vehicle_Speed": "mean",
                }
            )
            .round(2)
            .reset_index()
        )

        location_congestion = location_congestion.sort_values(
            "Congestion_Level", ascending=True
        )

        st.dataframe(location_congestion, use_container_width=True, hide_index=True)

        st.subheader("Top 5 Busiest Locations")

        top_locations = (
            filtered_df.groupby("Location")["Vehicle_Count"]
            .sum()
            .sort_values(ascending=False)
            .head(5)
            .reset_index()
        )

        fig_top = px.bar(
            top_locations,
            x="Vehicle_Count",
            y="Location",
            orientation="h",
            title="Top 5 Locations by Total Traffic Volume",
            color="Vehicle_Count",
            color_continuous_scale="Blues",
        )

        fig_top.update_layout(
            template="plotly_white",
            height=400,
            yaxis={"categoryorder": "total ascending"},
        )

        st.plotly_chart(fig_top, use_container_width=True)

        st.subheader("Slowest Locations (Lowest Avg Speed)")

        slowest_locations = (
            filtered_df.groupby("Location")["Vehicle_Speed"]
            .mean()
            .sort_values()
            .head(5)
            .reset_index()
        )

        fig_slow = px.bar(
            slowest_locations,
            x="Vehicle_Speed",
            y="Location",
            orientation="h",
            title="Top 5 Slowest Locations",
            color="Vehicle_Speed",
            color_continuous_scale="Reds_r",
        )

        fig_slow.update_layout(
            template="plotly_white",
            height=400,
            yaxis={"categoryorder": "total ascending"},
        )

        st.plotly_chart(fig_slow, use_container_width=True)

    st.markdown("---")
    st.subheader("💡 Automatic Insights")

    insights = []

    if len(filtered_df) > 0:
        peak_hour = filtered_df.groupby("Hour")["Vehicle_Count"].mean().idxmax()
        insights.append(
            f"🕐 **Peak traffic hour**: {peak_hour}:00 - {peak_hour + 1}:00 with avg {filtered_df.groupby('Hour')['Vehicle_Count'].mean().max():.0f} vehicles"
        )

        busiest_loc = filtered_df.groupby("Location")["Vehicle_Count"].sum().idxmax()
        busiest_count = filtered_df.groupby("Location")["Vehicle_Count"].sum().max()
        insights.append(
            f"📍 **Busiest location**: {busiest_loc} with {busiest_count:,.0f} total vehicles"
        )

        avg_congestion = filtered_df["Congestion_Level"].mean()
        if avg_congestion < 2:
            congestion_status = "Low"
        elif avg_congestion < 4:
            congestion_status = "Moderate"
        else:
            congestion_status = "High"
        insights.append(
            f"🚦 **Overall congestion**: {congestion_status} (avg level: {avg_congestion:.1f}/5)"
        )

        weekday_avg = filtered_df[~filtered_df["IsWeekend"]]["Vehicle_Count"].mean()
        weekend_avg = filtered_df[filtered_df["IsWeekend"]]["Vehicle_Count"].mean()
        diff_pct = (
            ((weekday_avg - weekend_avg) / weekend_avg * 100) if weekend_avg > 0 else 0
        )
        insights.append(
            f"📅 **Weekday vs Weekend**: Weekdays have {abs(diff_pct):.1f}% {'more' if diff_pct > 0 else 'less'} traffic on average"
        )

        peak_vs_offpeak_peak = filtered_df[filtered_df["Peak_Off_Peak"] == "Peak"][
            "Congestion_Level"
        ].mean()
        peak_vs_offpeak_off = filtered_df[filtered_df["Peak_Off_Peak"] == "Off-Peak"][
            "Congestion_Level"
        ].mean()
        insights.append(
            f"⏰ **Peak hours**: Congestion is {(peak_vs_offpeak_peak - peak_vs_offpeak_off):.1f} levels higher during peak hours"
        )

    for insight in insights:
        st.markdown(f"- {insight}")


if __name__ == "__main__":
    main()