Spaces:

Docfile
/

lm

Sleeping

App Files Files Community

Docfile commited on 28 days ago

Commit

85c35ef

verified ·

1 Parent(s): 7b63a84

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +474 -38

src/streamlit_app.py CHANGED Viewed

@@ -1,40 +1,476 @@
-import altair as alt
-import numpy as np
-import pandas as pd
 import streamlit as st
-"""
-# Welcome to Streamlit!
-Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).
-In the meantime, below is an example of what you can do with just a few lines of code:
-"""
-num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
-num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
-indices = np.linspace(0, 1, num_points)
-theta = 2 * np.pi * num_turns * indices
-radius = indices
-x = radius * np.cos(theta)
-y = radius * np.sin(theta)
-df = pd.DataFrame({
-    "x": x,
-    "y": y,
-    "idx": indices,
-    "rand": np.random.randn(num_points),
-})
-st.altair_chart(alt.Chart(df, height=700, width=700)
-    .mark_point(filled=True)
-    .encode(
-        x=alt.X("x", axis=None),
-        y=alt.Y("y", axis=None),
-        color=alt.Color("idx", legend=None, scale=alt.Scale()),
-        size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
-    ))

 import streamlit as st
+import pandas as pd
+import plotly.express as px
+import plotly.graph_objects as go
+from datetime import datetime
+st.set_page_config(
+    page_title="Urban Traffic Flow Dashboard",
+    page_icon="🚗",
+    layout="wide",
+    initial_sidebar_state="expanded",
+)
+@st.cache_data
+def load_data():
+    df = pd.read_csv("urban_traffic_flow_with_target.csv")
+    df["Timestamp"] = pd.to_datetime(df["Timestamp"])
+    df["Hour"] = df["Timestamp"].dt.hour
+    df["DayOfWeek"] = df["Timestamp"].dt.day_name()
+    df["Date"] = df["Timestamp"].dt.date
+    df["IsWeekend"] = df["DayOfWeek"].isin(["Saturday", "Sunday"])
+    return df
+def main():
+    st.title("🚗 Urban Traffic Flow Dashboard")
+    st.markdown(
+        "Explore urban traffic patterns, congestion levels, and temporal trends"
+    )
+    df = load_data()
+    with st.sidebar:
+        st.header("🔍 Filters")
+        min_date = df["Timestamp"].min().date()
+        max_date = df["Timestamp"].max().date()
+        date_range = st.date_input(
+            "Date Range",
+            value=(min_date, max_date),
+            min_value=min_date,
+            max_value=max_date,
+        )
+        selected_locations = st.multiselect(
+            "Select Locations",
+            options=sorted(df["Location"].unique()),
+            default=sorted(df["Location"].unique()),
+        )
+        peak_filter = st.multiselect(
+            "Peak/Off-Peak",
+            options=sorted(df["Peak_Off_Peak"].unique()),
+            default=sorted(df["Peak_Off_Peak"].unique()),
+        )
+        day_filter = st.multiselect(
+            "Day of Week",
+            options=sorted(df["DayOfWeek"].unique()),
+            default=sorted(df["DayOfWeek"].unique()),
+        )
+        congestion_filter = st.slider(
+            "Min Congestion Level", min_value=0, max_value=5, value=0, step=1
+        )
+    filtered_df = df.copy()
+    if len(date_range) == 2:
+        start_date, end_date = date_range
+        filtered_df = filtered_df[
+            (filtered_df["Timestamp"].dt.date >= start_date)
+            & (filtered_df["Timestamp"].dt.date <= end_date)
+        ]
+    if selected_locations:
+        filtered_df = filtered_df[filtered_df["Location"].isin(selected_locations)]
+    if peak_filter:
+        filtered_df = filtered_df[filtered_df["Peak_Off_Peak"].isin(peak_filter)]
+    if day_filter:
+        filtered_df = filtered_df[filtered_df["DayOfWeek"].isin(day_filter)]
+    filtered_df = filtered_df[filtered_df["Congestion_Level"] >= congestion_filter]
+    st.subheader("📊 Key Performance Indicators")
+    kpi_col1, kpi_col2, kpi_col3, kpi_col4 = st.columns(4)
+    with kpi_col1:
+        st.metric(
+            "Total Vehicle Count",
+            f"{filtered_df['Vehicle_Count'].sum():,.0f}",
+            help="Total number of vehicles recorded",
+        )
+    with kpi_col2:
+        st.metric(
+            "Avg Vehicle Speed",
+            f"{filtered_df['Vehicle_Speed'].mean():.1f} km/h",
+            help="Average speed across all locations",
+        )
+    with kpi_col3:
+        st.metric(
+            "Avg Congestion Level",
+            f"{filtered_df['Congestion_Level'].mean():.1f}",
+            help="Average congestion level (0-5 scale)",
+        )
+    with kpi_col4:
+        st.metric(
+            "Peak Hours Ratio",
+            f"{(filtered_df['Peak_Off_Peak'] == 'Peak').sum() / len(filtered_df) * 100:.1f}%",
+            help="Percentage of peak hour observations",
+        )
+    st.markdown("---")
+    tab1, tab2, tab3, tab4 = st.tabs(
+        [
+            "📈 Temporal Trends",
+            "📍 Location Analysis",
+            "📊 Distribution",
+            "🏆 Top Zones",
+        ]
+    )
+    with tab1:
+        st.subheader("Hourly Traffic Patterns")
+        hourly_avg = (
+            filtered_df.groupby("Hour")
+            .agg(
+                {
+                    "Vehicle_Count": "mean",
+                    "Vehicle_Speed": "mean",
+                    "Congestion_Level": "mean",
+                }
+            )
+            .reset_index()
+        )
+        fig_hourly = go.Figure()
+        fig_hourly.add_trace(
+            go.Scatter(
+                x=hourly_avg["Hour"],
+                y=hourly_avg["Vehicle_Count"],
+                mode="lines+markers",
+                name="Avg Vehicle Count",
+                line=dict(color="#1f77b4", width=3),
+                yaxis="y",
+            )
+        )
+        fig_hourly.add_trace(
+            go.Scatter(
+                x=hourly_avg["Hour"],
+                y=hourly_avg["Vehicle_Speed"],
+                mode="lines+markers",
+                name="Avg Speed (km/h)",
+                line=dict(color="#2ca02c", width=3),
+                yaxis="y2",
+            )
+        )
+        fig_hourly.update_layout(
+            title="Average Traffic by Hour of Day",
+            xaxis_title="Hour",
+            yaxis_title="Vehicle Count",
+            yaxis2=dict(title="Speed (km/h)", overlaying="y", side="right"),
+            hovermode="x unified",
+            template="plotly_white",
+            height=500,
+        )
+        st.plotly_chart(fig_hourly, use_container_width=True)
+        st.subheader("Traffic Evolution Over Time")
+        time_series = (
+            filtered_df.groupby(["Timestamp", "Location"])
+            .agg({"Vehicle_Count": "sum", "Congestion_Level": "mean"})
+            .reset_index()
+        )
+        fig_ts = px.line(
+            time_series,
+            x="Timestamp",
+            y="Vehicle_Count",
+            color="Location",
+            title="Traffic Volume Over Time by Location",
+            labels={"Vehicle_Count": "Vehicle Count", "Timestamp": "Time"},
+        )
+        fig_ts.update_layout(hovermode="x unified", template="plotly_white", height=500)
+        st.plotly_chart(fig_ts, use_container_width=True)
+    with tab2:
+        col1, col2 = st.columns(2)
+        with col1:
+            st.subheader("Traffic by Location")
+            location_stats = (
+                filtered_df.groupby("Location")
+                .agg(
+                    {
+                        "Vehicle_Count": "sum",
+                        "Vehicle_Speed": "mean",
+                        "Congestion_Level": "mean",
+                    }
+                )
+                .reset_index()
+            )
+            fig_loc = px.bar(
+                location_stats,
+                x="Location",
+                y="Vehicle_Count",
+                title="Total Vehicle Count by Location",
+                color="Vehicle_Count",
+                color_continuous_scale="Blues",
+                labels={"Vehicle_Count": "Total Count"},
+            )
+            fig_loc.update_layout(template="plotly_white", height=400)
+            st.plotly_chart(fig_loc, use_container_width=True)
+        with col2:
+            st.subheader("Avg Speed by Location")
+            fig_speed = px.bar(
+                location_stats,
+                x="Location",
+                y="Vehicle_Speed",
+                title="Average Speed by Location",
+                color="Vehicle_Speed",
+                color_continuous_scale="RdYlGn",
+                labels={"Vehicle_Speed": "Speed (km/h)"},
+            )
+            fig_speed.update_layout(template="plotly_white", height=400)
+            st.plotly_chart(fig_speed, use_container_width=True)
+        st.subheader("Congestion Heatmap: Hour vs Location")
+        heatmap_data = filtered_df.pivot_table(
+            values="Congestion_Level", index="Hour", columns="Location", aggfunc="mean"
+        )
+        fig_heatmap = px.imshow(
+            heatmap_data,
+            labels=dict(x="Location", y="Hour", color="Avg Congestion Level"),
+            title="Average Congestion Level by Hour and Location",
+            color_continuous_scale="RdYlGn_r",
+            aspect="auto",
+        )
+        fig_heatmap.update_layout(template="plotly_white", height=500)
+        st.plotly_chart(fig_heatmap, use_container_width=True)
+    with tab3:
+        col1, col2 = st.columns(2)
+        with col1:
+            st.subheader("Vehicle Count Distribution")
+            fig_count_box = px.box(
+                filtered_df,
+                x="Location",
+                y="Vehicle_Count",
+                title="Vehicle Count Distribution by Location",
+                color="Location",
+            )
+            fig_count_box.update_layout(
+                template="plotly_white", height=400, showlegend=False
+            )
+            st.plotly_chart(fig_count_box, use_container_width=True)
+        with col2:
+            st.subheader("Speed Distribution")
+            fig_speed_box = px.box(
+                filtered_df,
+                x="Location",
+                y="Vehicle_Speed",
+                title="Speed Distribution by Location",
+                color="Location",
+            )
+            fig_speed_box.update_layout(
+                template="plotly_white", height=400, showlegend=False
+            )
+            st.plotly_chart(fig_speed_box, use_container_width=True)
+        st.subheader("Congestion Level Distribution")
+        congestion_dist = (
+            filtered_df["Congestion_Level"].value_counts().sort_index().reset_index()
+        )
+        congestion_dist.columns = ["Congestion_Level", "Count"]
+        fig_congestion = px.bar(
+            congestion_dist,
+            x="Congestion_Level",
+            y="Count",
+            title="Distribution of Congestion Levels",
+            color="Congestion_Level",
+            color_continuous_scale="Reds",
+            labels={
+                "Count": "Number of Records",
+                "Congestion_Level": "Congestion Level",
+            },
+        )
+        fig_congestion.update_layout(template="plotly_white", height=400)
+        st.plotly_chart(fig_congestion, use_container_width=True)
+        st.subheader("Congestion by Peak/Off-Peak")
+        fig_peak = px.box(
+            filtered_df,
+            x="Peak_Off_Peak",
+            y="Congestion_Level",
+            title="Congestion Level: Peak vs Off-Peak",
+            color="Peak_Off_Peak",
+        )
+        fig_peak.update_layout(template="plotly_white", height=400, showlegend=False)
+        st.plotly_chart(fig_peak, use_container_width=True)
+    with tab4:
+        st.subheader("Most Congested Locations")
+        location_congestion = (
+            filtered_df.groupby("Location")
+            .agg(
+                {
+                    "Congestion_Level": "mean",
+                    "Vehicle_Count": "mean",
+                    "Vehicle_Speed": "mean",
+                }
+            )
+            .round(2)
+            .reset_index()
+        )
+        location_congestion = location_congestion.sort_values(
+            "Congestion_Level", ascending=True
+        )
+        st.dataframe(location_congestion, use_container_width=True, hide_index=True)
+        st.subheader("Top 5 Busiest Locations")
+        top_locations = (
+            filtered_df.groupby("Location")["Vehicle_Count"]
+            .sum()
+            .sort_values(ascending=False)
+            .head(5)
+            .reset_index()
+        )
+        fig_top = px.bar(
+            top_locations,
+            x="Vehicle_Count",
+            y="Location",
+            orientation="h",
+            title="Top 5 Locations by Total Traffic Volume",
+            color="Vehicle_Count",
+            color_continuous_scale="Blues",
+        )
+        fig_top.update_layout(
+            template="plotly_white",
+            height=400,
+            yaxis={"categoryorder": "total ascending"},
+        )
+        st.plotly_chart(fig_top, use_container_width=True)
+        st.subheader("Slowest Locations (Lowest Avg Speed)")
+        slowest_locations = (
+            filtered_df.groupby("Location")["Vehicle_Speed"]
+            .mean()
+            .sort_values()
+            .head(5)
+            .reset_index()
+        )
+        fig_slow = px.bar(
+            slowest_locations,
+            x="Vehicle_Speed",
+            y="Location",
+            orientation="h",
+            title="Top 5 Slowest Locations",
+            color="Vehicle_Speed",
+            color_continuous_scale="Reds_r",
+        )
+        fig_slow.update_layout(
+            template="plotly_white",
+            height=400,
+            yaxis={"categoryorder": "total ascending"},
+        )
+        st.plotly_chart(fig_slow, use_container_width=True)
+    st.markdown("---")
+    st.subheader("💡 Automatic Insights")
+    insights = []
+    if len(filtered_df) > 0:
+        peak_hour = filtered_df.groupby("Hour")["Vehicle_Count"].mean().idxmax()
+        insights.append(
+            f"🕐 **Peak traffic hour**: {peak_hour}:00 - {peak_hour + 1}:00 with avg {filtered_df.groupby('Hour')['Vehicle_Count'].mean().max():.0f} vehicles"
+        )
+        busiest_loc = filtered_df.groupby("Location")["Vehicle_Count"].sum().idxmax()
+        busiest_count = filtered_df.groupby("Location")["Vehicle_Count"].sum().max()
+        insights.append(
+            f"📍 **Busiest location**: {busiest_loc} with {busiest_count:,.0f} total vehicles"
+        )
+        avg_congestion = filtered_df["Congestion_Level"].mean()
+        if avg_congestion < 2:
+            congestion_status = "Low"
+        elif avg_congestion < 4:
+            congestion_status = "Moderate"
+        else:
+            congestion_status = "High"
+        insights.append(
+            f"🚦 **Overall congestion**: {congestion_status} (avg level: {avg_congestion:.1f}/5)"
+        )
+        weekday_avg = filtered_df[~filtered_df["IsWeekend"]]["Vehicle_Count"].mean()
+        weekend_avg = filtered_df[filtered_df["IsWeekend"]]["Vehicle_Count"].mean()
+        diff_pct = (
+            ((weekday_avg - weekend_avg) / weekend_avg * 100) if weekend_avg > 0 else 0
+        )
+        insights.append(
+            f"📅 **Weekday vs Weekend**: Weekdays have {abs(diff_pct):.1f}% {'more' if diff_pct > 0 else 'less'} traffic on average"
+        )
+        peak_vs_offpeak_peak = filtered_df[filtered_df["Peak_Off_Peak"] == "Peak"][
+            "Congestion_Level"
+        ].mean()
+        peak_vs_offpeak_off = filtered_df[filtered_df["Peak_Off_Peak"] == "Off-Peak"][
+            "Congestion_Level"
+        ].mean()
+        insights.append(
+            f"⏰ **Peak hours**: Congestion is {(peak_vs_offpeak_peak - peak_vs_offpeak_off):.1f} levels higher during peak hours"
+        )
+    for insight in insights:
+        st.markdown(f"- {insight}")
+if __name__ == "__main__":
+    main()