lm / src /streamlit_app.py
Youssouf ⚜️
ih
6b75cbe
import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime
import os
st.set_page_config(
page_title="Urban Traffic Flow Dashboard",
page_icon="πŸš—",
layout="wide",
initial_sidebar_state="expanded",
)
@st.cache_data
def load_data():
script_dir = os.path.dirname(os.path.abspath(__file__))
csv_path = os.path.join(script_dir, "urban_traffic_flow_with_target.csv")
df = pd.read_csv(csv_path)
df["Timestamp"] = pd.to_datetime(df["Timestamp"])
df["Hour"] = df["Timestamp"].dt.hour
df["DayOfWeek"] = df["Timestamp"].dt.day_name()
df["Date"] = df["Timestamp"].dt.date
df["IsWeekend"] = df["DayOfWeek"].isin(["Saturday", "Sunday"])
return df
def main():
st.title("πŸš— Urban Traffic Flow Dashboard")
st.markdown(
"Explore urban traffic patterns, congestion levels, and temporal trends"
)
df = load_data()
with st.sidebar:
st.header("πŸ” Filters")
min_date = df["Timestamp"].min().date()
max_date = df["Timestamp"].max().date()
date_range = st.date_input(
"Date Range",
value=(min_date, max_date),
min_value=min_date,
max_value=max_date,
)
selected_locations = st.multiselect(
"Select Locations",
options=sorted(df["Location"].unique()),
default=sorted(df["Location"].unique()),
)
peak_filter = st.multiselect(
"Peak/Off-Peak",
options=sorted(df["Peak_Off_Peak"].unique()),
default=sorted(df["Peak_Off_Peak"].unique()),
)
day_filter = st.multiselect(
"Day of Week",
options=sorted(df["DayOfWeek"].unique()),
default=sorted(df["DayOfWeek"].unique()),
)
congestion_filter = st.slider(
"Min Congestion Level", min_value=0, max_value=5, value=0, step=1
)
filtered_df = df.copy()
if len(date_range) == 2:
start_date, end_date = date_range
filtered_df = filtered_df[
(filtered_df["Timestamp"].dt.date >= start_date)
& (filtered_df["Timestamp"].dt.date <= end_date)
]
if selected_locations:
filtered_df = filtered_df[filtered_df["Location"].isin(selected_locations)]
if peak_filter:
filtered_df = filtered_df[filtered_df["Peak_Off_Peak"].isin(peak_filter)]
if day_filter:
filtered_df = filtered_df[filtered_df["DayOfWeek"].isin(day_filter)]
filtered_df = filtered_df[filtered_df["Congestion_Level"] >= congestion_filter]
st.subheader("πŸ“Š Key Performance Indicators")
kpi_col1, kpi_col2, kpi_col3, kpi_col4 = st.columns(4)
with kpi_col1:
st.metric(
"Total Vehicle Count",
f"{filtered_df['Vehicle_Count'].sum():,.0f}",
help="Total number of vehicles recorded",
)
with kpi_col2:
st.metric(
"Avg Vehicle Speed",
f"{filtered_df['Vehicle_Speed'].mean():.1f} km/h",
help="Average speed across all locations",
)
with kpi_col3:
st.metric(
"Avg Congestion Level",
f"{filtered_df['Congestion_Level'].mean():.1f}",
help="Average congestion level (0-5 scale)",
)
with kpi_col4:
st.metric(
"Peak Hours Ratio",
f"{(filtered_df['Peak_Off_Peak'] == 'Peak').sum() / len(filtered_df) * 100:.1f}%",
help="Percentage of peak hour observations",
)
st.markdown("---")
tab1, tab2, tab3, tab4 = st.tabs(
[
"πŸ“ˆ Temporal Trends",
"πŸ“ Location Analysis",
"πŸ“Š Distribution",
"πŸ† Top Zones",
]
)
with tab1:
st.subheader("Hourly Traffic Patterns")
hourly_avg = (
filtered_df.groupby("Hour")
.agg(
{
"Vehicle_Count": "mean",
"Vehicle_Speed": "mean",
"Congestion_Level": "mean",
}
)
.reset_index()
)
fig_hourly = go.Figure()
fig_hourly.add_trace(
go.Scatter(
x=hourly_avg["Hour"],
y=hourly_avg["Vehicle_Count"],
mode="lines+markers",
name="Avg Vehicle Count",
line=dict(color="#1f77b4", width=3),
yaxis="y",
)
)
fig_hourly.add_trace(
go.Scatter(
x=hourly_avg["Hour"],
y=hourly_avg["Vehicle_Speed"],
mode="lines+markers",
name="Avg Speed (km/h)",
line=dict(color="#2ca02c", width=3),
yaxis="y2",
)
)
fig_hourly.update_layout(
title="Average Traffic by Hour of Day",
xaxis_title="Hour",
yaxis_title="Vehicle Count",
yaxis2=dict(title="Speed (km/h)", overlaying="y", side="right"),
hovermode="x unified",
template="plotly_white",
height=500,
)
st.plotly_chart(fig_hourly, use_container_width=True)
st.subheader("Traffic Evolution Over Time")
time_series = (
filtered_df.groupby(["Timestamp", "Location"])
.agg({"Vehicle_Count": "sum", "Congestion_Level": "mean"})
.reset_index()
)
fig_ts = px.line(
time_series,
x="Timestamp",
y="Vehicle_Count",
color="Location",
title="Traffic Volume Over Time by Location",
labels={"Vehicle_Count": "Vehicle Count", "Timestamp": "Time"},
)
fig_ts.update_layout(hovermode="x unified", template="plotly_white", height=500)
st.plotly_chart(fig_ts, use_container_width=True)
with tab2:
col1, col2 = st.columns(2)
with col1:
st.subheader("Traffic by Location")
location_stats = (
filtered_df.groupby("Location")
.agg(
{
"Vehicle_Count": "sum",
"Vehicle_Speed": "mean",
"Congestion_Level": "mean",
}
)
.reset_index()
)
fig_loc = px.bar(
location_stats,
x="Location",
y="Vehicle_Count",
title="Total Vehicle Count by Location",
color="Vehicle_Count",
color_continuous_scale="Blues",
labels={"Vehicle_Count": "Total Count"},
)
fig_loc.update_layout(template="plotly_white", height=400)
st.plotly_chart(fig_loc, use_container_width=True)
with col2:
st.subheader("Avg Speed by Location")
fig_speed = px.bar(
location_stats,
x="Location",
y="Vehicle_Speed",
title="Average Speed by Location",
color="Vehicle_Speed",
color_continuous_scale="RdYlGn",
labels={"Vehicle_Speed": "Speed (km/h)"},
)
fig_speed.update_layout(template="plotly_white", height=400)
st.plotly_chart(fig_speed, use_container_width=True)
st.subheader("Congestion Heatmap: Hour vs Location")
heatmap_data = filtered_df.pivot_table(
values="Congestion_Level", index="Hour", columns="Location", aggfunc="mean"
)
fig_heatmap = px.imshow(
heatmap_data,
labels=dict(x="Location", y="Hour", color="Avg Congestion Level"),
title="Average Congestion Level by Hour and Location",
color_continuous_scale="RdYlGn_r",
aspect="auto",
)
fig_heatmap.update_layout(template="plotly_white", height=500)
st.plotly_chart(fig_heatmap, use_container_width=True)
with tab3:
col1, col2 = st.columns(2)
with col1:
st.subheader("Vehicle Count Distribution")
fig_count_box = px.box(
filtered_df,
x="Location",
y="Vehicle_Count",
title="Vehicle Count Distribution by Location",
color="Location",
)
fig_count_box.update_layout(
template="plotly_white", height=400, showlegend=False
)
st.plotly_chart(fig_count_box, use_container_width=True)
with col2:
st.subheader("Speed Distribution")
fig_speed_box = px.box(
filtered_df,
x="Location",
y="Vehicle_Speed",
title="Speed Distribution by Location",
color="Location",
)
fig_speed_box.update_layout(
template="plotly_white", height=400, showlegend=False
)
st.plotly_chart(fig_speed_box, use_container_width=True)
st.subheader("Congestion Level Distribution")
congestion_dist = (
filtered_df["Congestion_Level"].value_counts().sort_index().reset_index()
)
congestion_dist.columns = ["Congestion_Level", "Count"]
fig_congestion = px.bar(
congestion_dist,
x="Congestion_Level",
y="Count",
title="Distribution of Congestion Levels",
color="Congestion_Level",
color_continuous_scale="Reds",
labels={
"Count": "Number of Records",
"Congestion_Level": "Congestion Level",
},
)
fig_congestion.update_layout(template="plotly_white", height=400)
st.plotly_chart(fig_congestion, use_container_width=True)
st.subheader("Congestion by Peak/Off-Peak")
fig_peak = px.box(
filtered_df,
x="Peak_Off_Peak",
y="Congestion_Level",
title="Congestion Level: Peak vs Off-Peak",
color="Peak_Off_Peak",
)
fig_peak.update_layout(template="plotly_white", height=400, showlegend=False)
st.plotly_chart(fig_peak, use_container_width=True)
with tab4:
st.subheader("Most Congested Locations")
location_congestion = (
filtered_df.groupby("Location")
.agg(
{
"Congestion_Level": "mean",
"Vehicle_Count": "mean",
"Vehicle_Speed": "mean",
}
)
.round(2)
.reset_index()
)
location_congestion = location_congestion.sort_values(
"Congestion_Level", ascending=True
)
st.dataframe(location_congestion, use_container_width=True, hide_index=True)
st.subheader("Top 5 Busiest Locations")
top_locations = (
filtered_df.groupby("Location")["Vehicle_Count"]
.sum()
.sort_values(ascending=False)
.head(5)
.reset_index()
)
fig_top = px.bar(
top_locations,
x="Vehicle_Count",
y="Location",
orientation="h",
title="Top 5 Locations by Total Traffic Volume",
color="Vehicle_Count",
color_continuous_scale="Blues",
)
fig_top.update_layout(
template="plotly_white",
height=400,
yaxis={"categoryorder": "total ascending"},
)
st.plotly_chart(fig_top, use_container_width=True)
st.subheader("Slowest Locations (Lowest Avg Speed)")
slowest_locations = (
filtered_df.groupby("Location")["Vehicle_Speed"]
.mean()
.sort_values()
.head(5)
.reset_index()
)
fig_slow = px.bar(
slowest_locations,
x="Vehicle_Speed",
y="Location",
orientation="h",
title="Top 5 Slowest Locations",
color="Vehicle_Speed",
color_continuous_scale="Reds_r",
)
fig_slow.update_layout(
template="plotly_white",
height=400,
yaxis={"categoryorder": "total ascending"},
)
st.plotly_chart(fig_slow, use_container_width=True)
st.markdown("---")
st.subheader("πŸ’‘ Automatic Insights")
insights = []
if len(filtered_df) > 0:
peak_hour = filtered_df.groupby("Hour")["Vehicle_Count"].mean().idxmax()
insights.append(
f"πŸ• **Peak traffic hour**: {peak_hour}:00 - {peak_hour + 1}:00 with avg {filtered_df.groupby('Hour')['Vehicle_Count'].mean().max():.0f} vehicles"
)
busiest_loc = filtered_df.groupby("Location")["Vehicle_Count"].sum().idxmax()
busiest_count = filtered_df.groupby("Location")["Vehicle_Count"].sum().max()
insights.append(
f"πŸ“ **Busiest location**: {busiest_loc} with {busiest_count:,.0f} total vehicles"
)
avg_congestion = filtered_df["Congestion_Level"].mean()
if avg_congestion < 2:
congestion_status = "Low"
elif avg_congestion < 4:
congestion_status = "Moderate"
else:
congestion_status = "High"
insights.append(
f"🚦 **Overall congestion**: {congestion_status} (avg level: {avg_congestion:.1f}/5)"
)
weekday_avg = filtered_df[~filtered_df["IsWeekend"]]["Vehicle_Count"].mean()
weekend_avg = filtered_df[filtered_df["IsWeekend"]]["Vehicle_Count"].mean()
diff_pct = (
((weekday_avg - weekend_avg) / weekend_avg * 100) if weekend_avg > 0 else 0
)
insights.append(
f"πŸ“… **Weekday vs Weekend**: Weekdays have {abs(diff_pct):.1f}% {'more' if diff_pct > 0 else 'less'} traffic on average"
)
peak_vs_offpeak_peak = filtered_df[filtered_df["Peak_Off_Peak"] == "Peak"][
"Congestion_Level"
].mean()
peak_vs_offpeak_off = filtered_df[filtered_df["Peak_Off_Peak"] == "Off-Peak"][
"Congestion_Level"
].mean()
insights.append(
f"⏰ **Peak hours**: Congestion is {(peak_vs_offpeak_peak - peak_vs_offpeak_off):.1f} levels higher during peak hours"
)
for insight in insights:
st.markdown(f"- {insight}")
if __name__ == "__main__":
main()