|
|
import streamlit as st |
|
|
import pandas as pd |
|
|
import plotly.express as px |
|
|
import plotly.graph_objects as go |
|
|
from datetime import datetime |
|
|
import os |
|
|
|
|
|
st.set_page_config( |
|
|
page_title="Urban Traffic Flow Dashboard", |
|
|
page_icon="π", |
|
|
layout="wide", |
|
|
initial_sidebar_state="expanded", |
|
|
) |
|
|
|
|
|
|
|
|
@st.cache_data |
|
|
def load_data(): |
|
|
script_dir = os.path.dirname(os.path.abspath(__file__)) |
|
|
csv_path = os.path.join(script_dir, "urban_traffic_flow_with_target.csv") |
|
|
df = pd.read_csv(csv_path) |
|
|
df["Timestamp"] = pd.to_datetime(df["Timestamp"]) |
|
|
df["Hour"] = df["Timestamp"].dt.hour |
|
|
df["DayOfWeek"] = df["Timestamp"].dt.day_name() |
|
|
df["Date"] = df["Timestamp"].dt.date |
|
|
df["IsWeekend"] = df["DayOfWeek"].isin(["Saturday", "Sunday"]) |
|
|
return df |
|
|
|
|
|
|
|
|
def main(): |
|
|
st.title("π Urban Traffic Flow Dashboard") |
|
|
st.markdown( |
|
|
"Explore urban traffic patterns, congestion levels, and temporal trends" |
|
|
) |
|
|
|
|
|
df = load_data() |
|
|
|
|
|
with st.sidebar: |
|
|
st.header("π Filters") |
|
|
|
|
|
min_date = df["Timestamp"].min().date() |
|
|
max_date = df["Timestamp"].max().date() |
|
|
|
|
|
date_range = st.date_input( |
|
|
"Date Range", |
|
|
value=(min_date, max_date), |
|
|
min_value=min_date, |
|
|
max_value=max_date, |
|
|
) |
|
|
|
|
|
selected_locations = st.multiselect( |
|
|
"Select Locations", |
|
|
options=sorted(df["Location"].unique()), |
|
|
default=sorted(df["Location"].unique()), |
|
|
) |
|
|
|
|
|
peak_filter = st.multiselect( |
|
|
"Peak/Off-Peak", |
|
|
options=sorted(df["Peak_Off_Peak"].unique()), |
|
|
default=sorted(df["Peak_Off_Peak"].unique()), |
|
|
) |
|
|
|
|
|
day_filter = st.multiselect( |
|
|
"Day of Week", |
|
|
options=sorted(df["DayOfWeek"].unique()), |
|
|
default=sorted(df["DayOfWeek"].unique()), |
|
|
) |
|
|
|
|
|
congestion_filter = st.slider( |
|
|
"Min Congestion Level", min_value=0, max_value=5, value=0, step=1 |
|
|
) |
|
|
|
|
|
filtered_df = df.copy() |
|
|
|
|
|
if len(date_range) == 2: |
|
|
start_date, end_date = date_range |
|
|
filtered_df = filtered_df[ |
|
|
(filtered_df["Timestamp"].dt.date >= start_date) |
|
|
& (filtered_df["Timestamp"].dt.date <= end_date) |
|
|
] |
|
|
|
|
|
if selected_locations: |
|
|
filtered_df = filtered_df[filtered_df["Location"].isin(selected_locations)] |
|
|
|
|
|
if peak_filter: |
|
|
filtered_df = filtered_df[filtered_df["Peak_Off_Peak"].isin(peak_filter)] |
|
|
|
|
|
if day_filter: |
|
|
filtered_df = filtered_df[filtered_df["DayOfWeek"].isin(day_filter)] |
|
|
|
|
|
filtered_df = filtered_df[filtered_df["Congestion_Level"] >= congestion_filter] |
|
|
|
|
|
st.subheader("π Key Performance Indicators") |
|
|
|
|
|
kpi_col1, kpi_col2, kpi_col3, kpi_col4 = st.columns(4) |
|
|
|
|
|
with kpi_col1: |
|
|
st.metric( |
|
|
"Total Vehicle Count", |
|
|
f"{filtered_df['Vehicle_Count'].sum():,.0f}", |
|
|
help="Total number of vehicles recorded", |
|
|
) |
|
|
|
|
|
with kpi_col2: |
|
|
st.metric( |
|
|
"Avg Vehicle Speed", |
|
|
f"{filtered_df['Vehicle_Speed'].mean():.1f} km/h", |
|
|
help="Average speed across all locations", |
|
|
) |
|
|
|
|
|
with kpi_col3: |
|
|
st.metric( |
|
|
"Avg Congestion Level", |
|
|
f"{filtered_df['Congestion_Level'].mean():.1f}", |
|
|
help="Average congestion level (0-5 scale)", |
|
|
) |
|
|
|
|
|
with kpi_col4: |
|
|
st.metric( |
|
|
"Peak Hours Ratio", |
|
|
f"{(filtered_df['Peak_Off_Peak'] == 'Peak').sum() / len(filtered_df) * 100:.1f}%", |
|
|
help="Percentage of peak hour observations", |
|
|
) |
|
|
|
|
|
st.markdown("---") |
|
|
|
|
|
tab1, tab2, tab3, tab4 = st.tabs( |
|
|
[ |
|
|
"π Temporal Trends", |
|
|
"π Location Analysis", |
|
|
"π Distribution", |
|
|
"π Top Zones", |
|
|
] |
|
|
) |
|
|
|
|
|
with tab1: |
|
|
st.subheader("Hourly Traffic Patterns") |
|
|
|
|
|
hourly_avg = ( |
|
|
filtered_df.groupby("Hour") |
|
|
.agg( |
|
|
{ |
|
|
"Vehicle_Count": "mean", |
|
|
"Vehicle_Speed": "mean", |
|
|
"Congestion_Level": "mean", |
|
|
} |
|
|
) |
|
|
.reset_index() |
|
|
) |
|
|
|
|
|
fig_hourly = go.Figure() |
|
|
|
|
|
fig_hourly.add_trace( |
|
|
go.Scatter( |
|
|
x=hourly_avg["Hour"], |
|
|
y=hourly_avg["Vehicle_Count"], |
|
|
mode="lines+markers", |
|
|
name="Avg Vehicle Count", |
|
|
line=dict(color="#1f77b4", width=3), |
|
|
yaxis="y", |
|
|
) |
|
|
) |
|
|
|
|
|
fig_hourly.add_trace( |
|
|
go.Scatter( |
|
|
x=hourly_avg["Hour"], |
|
|
y=hourly_avg["Vehicle_Speed"], |
|
|
mode="lines+markers", |
|
|
name="Avg Speed (km/h)", |
|
|
line=dict(color="#2ca02c", width=3), |
|
|
yaxis="y2", |
|
|
) |
|
|
) |
|
|
|
|
|
fig_hourly.update_layout( |
|
|
title="Average Traffic by Hour of Day", |
|
|
xaxis_title="Hour", |
|
|
yaxis_title="Vehicle Count", |
|
|
yaxis2=dict(title="Speed (km/h)", overlaying="y", side="right"), |
|
|
hovermode="x unified", |
|
|
template="plotly_white", |
|
|
height=500, |
|
|
) |
|
|
|
|
|
st.plotly_chart(fig_hourly, use_container_width=True) |
|
|
|
|
|
st.subheader("Traffic Evolution Over Time") |
|
|
|
|
|
time_series = ( |
|
|
filtered_df.groupby(["Timestamp", "Location"]) |
|
|
.agg({"Vehicle_Count": "sum", "Congestion_Level": "mean"}) |
|
|
.reset_index() |
|
|
) |
|
|
|
|
|
fig_ts = px.line( |
|
|
time_series, |
|
|
x="Timestamp", |
|
|
y="Vehicle_Count", |
|
|
color="Location", |
|
|
title="Traffic Volume Over Time by Location", |
|
|
labels={"Vehicle_Count": "Vehicle Count", "Timestamp": "Time"}, |
|
|
) |
|
|
|
|
|
fig_ts.update_layout(hovermode="x unified", template="plotly_white", height=500) |
|
|
|
|
|
st.plotly_chart(fig_ts, use_container_width=True) |
|
|
|
|
|
with tab2: |
|
|
col1, col2 = st.columns(2) |
|
|
|
|
|
with col1: |
|
|
st.subheader("Traffic by Location") |
|
|
|
|
|
location_stats = ( |
|
|
filtered_df.groupby("Location") |
|
|
.agg( |
|
|
{ |
|
|
"Vehicle_Count": "sum", |
|
|
"Vehicle_Speed": "mean", |
|
|
"Congestion_Level": "mean", |
|
|
} |
|
|
) |
|
|
.reset_index() |
|
|
) |
|
|
|
|
|
fig_loc = px.bar( |
|
|
location_stats, |
|
|
x="Location", |
|
|
y="Vehicle_Count", |
|
|
title="Total Vehicle Count by Location", |
|
|
color="Vehicle_Count", |
|
|
color_continuous_scale="Blues", |
|
|
labels={"Vehicle_Count": "Total Count"}, |
|
|
) |
|
|
|
|
|
fig_loc.update_layout(template="plotly_white", height=400) |
|
|
|
|
|
st.plotly_chart(fig_loc, use_container_width=True) |
|
|
|
|
|
with col2: |
|
|
st.subheader("Avg Speed by Location") |
|
|
|
|
|
fig_speed = px.bar( |
|
|
location_stats, |
|
|
x="Location", |
|
|
y="Vehicle_Speed", |
|
|
title="Average Speed by Location", |
|
|
color="Vehicle_Speed", |
|
|
color_continuous_scale="RdYlGn", |
|
|
labels={"Vehicle_Speed": "Speed (km/h)"}, |
|
|
) |
|
|
|
|
|
fig_speed.update_layout(template="plotly_white", height=400) |
|
|
|
|
|
st.plotly_chart(fig_speed, use_container_width=True) |
|
|
|
|
|
st.subheader("Congestion Heatmap: Hour vs Location") |
|
|
|
|
|
heatmap_data = filtered_df.pivot_table( |
|
|
values="Congestion_Level", index="Hour", columns="Location", aggfunc="mean" |
|
|
) |
|
|
|
|
|
fig_heatmap = px.imshow( |
|
|
heatmap_data, |
|
|
labels=dict(x="Location", y="Hour", color="Avg Congestion Level"), |
|
|
title="Average Congestion Level by Hour and Location", |
|
|
color_continuous_scale="RdYlGn_r", |
|
|
aspect="auto", |
|
|
) |
|
|
|
|
|
fig_heatmap.update_layout(template="plotly_white", height=500) |
|
|
|
|
|
st.plotly_chart(fig_heatmap, use_container_width=True) |
|
|
|
|
|
with tab3: |
|
|
col1, col2 = st.columns(2) |
|
|
|
|
|
with col1: |
|
|
st.subheader("Vehicle Count Distribution") |
|
|
|
|
|
fig_count_box = px.box( |
|
|
filtered_df, |
|
|
x="Location", |
|
|
y="Vehicle_Count", |
|
|
title="Vehicle Count Distribution by Location", |
|
|
color="Location", |
|
|
) |
|
|
|
|
|
fig_count_box.update_layout( |
|
|
template="plotly_white", height=400, showlegend=False |
|
|
) |
|
|
|
|
|
st.plotly_chart(fig_count_box, use_container_width=True) |
|
|
|
|
|
with col2: |
|
|
st.subheader("Speed Distribution") |
|
|
|
|
|
fig_speed_box = px.box( |
|
|
filtered_df, |
|
|
x="Location", |
|
|
y="Vehicle_Speed", |
|
|
title="Speed Distribution by Location", |
|
|
color="Location", |
|
|
) |
|
|
|
|
|
fig_speed_box.update_layout( |
|
|
template="plotly_white", height=400, showlegend=False |
|
|
) |
|
|
|
|
|
st.plotly_chart(fig_speed_box, use_container_width=True) |
|
|
|
|
|
st.subheader("Congestion Level Distribution") |
|
|
|
|
|
congestion_dist = ( |
|
|
filtered_df["Congestion_Level"].value_counts().sort_index().reset_index() |
|
|
) |
|
|
congestion_dist.columns = ["Congestion_Level", "Count"] |
|
|
|
|
|
fig_congestion = px.bar( |
|
|
congestion_dist, |
|
|
x="Congestion_Level", |
|
|
y="Count", |
|
|
title="Distribution of Congestion Levels", |
|
|
color="Congestion_Level", |
|
|
color_continuous_scale="Reds", |
|
|
labels={ |
|
|
"Count": "Number of Records", |
|
|
"Congestion_Level": "Congestion Level", |
|
|
}, |
|
|
) |
|
|
|
|
|
fig_congestion.update_layout(template="plotly_white", height=400) |
|
|
|
|
|
st.plotly_chart(fig_congestion, use_container_width=True) |
|
|
|
|
|
st.subheader("Congestion by Peak/Off-Peak") |
|
|
|
|
|
fig_peak = px.box( |
|
|
filtered_df, |
|
|
x="Peak_Off_Peak", |
|
|
y="Congestion_Level", |
|
|
title="Congestion Level: Peak vs Off-Peak", |
|
|
color="Peak_Off_Peak", |
|
|
) |
|
|
|
|
|
fig_peak.update_layout(template="plotly_white", height=400, showlegend=False) |
|
|
|
|
|
st.plotly_chart(fig_peak, use_container_width=True) |
|
|
|
|
|
with tab4: |
|
|
st.subheader("Most Congested Locations") |
|
|
|
|
|
location_congestion = ( |
|
|
filtered_df.groupby("Location") |
|
|
.agg( |
|
|
{ |
|
|
"Congestion_Level": "mean", |
|
|
"Vehicle_Count": "mean", |
|
|
"Vehicle_Speed": "mean", |
|
|
} |
|
|
) |
|
|
.round(2) |
|
|
.reset_index() |
|
|
) |
|
|
|
|
|
location_congestion = location_congestion.sort_values( |
|
|
"Congestion_Level", ascending=True |
|
|
) |
|
|
|
|
|
st.dataframe(location_congestion, use_container_width=True, hide_index=True) |
|
|
|
|
|
st.subheader("Top 5 Busiest Locations") |
|
|
|
|
|
top_locations = ( |
|
|
filtered_df.groupby("Location")["Vehicle_Count"] |
|
|
.sum() |
|
|
.sort_values(ascending=False) |
|
|
.head(5) |
|
|
.reset_index() |
|
|
) |
|
|
|
|
|
fig_top = px.bar( |
|
|
top_locations, |
|
|
x="Vehicle_Count", |
|
|
y="Location", |
|
|
orientation="h", |
|
|
title="Top 5 Locations by Total Traffic Volume", |
|
|
color="Vehicle_Count", |
|
|
color_continuous_scale="Blues", |
|
|
) |
|
|
|
|
|
fig_top.update_layout( |
|
|
template="plotly_white", |
|
|
height=400, |
|
|
yaxis={"categoryorder": "total ascending"}, |
|
|
) |
|
|
|
|
|
st.plotly_chart(fig_top, use_container_width=True) |
|
|
|
|
|
st.subheader("Slowest Locations (Lowest Avg Speed)") |
|
|
|
|
|
slowest_locations = ( |
|
|
filtered_df.groupby("Location")["Vehicle_Speed"] |
|
|
.mean() |
|
|
.sort_values() |
|
|
.head(5) |
|
|
.reset_index() |
|
|
) |
|
|
|
|
|
fig_slow = px.bar( |
|
|
slowest_locations, |
|
|
x="Vehicle_Speed", |
|
|
y="Location", |
|
|
orientation="h", |
|
|
title="Top 5 Slowest Locations", |
|
|
color="Vehicle_Speed", |
|
|
color_continuous_scale="Reds_r", |
|
|
) |
|
|
|
|
|
fig_slow.update_layout( |
|
|
template="plotly_white", |
|
|
height=400, |
|
|
yaxis={"categoryorder": "total ascending"}, |
|
|
) |
|
|
|
|
|
st.plotly_chart(fig_slow, use_container_width=True) |
|
|
|
|
|
st.markdown("---") |
|
|
st.subheader("π‘ Automatic Insights") |
|
|
|
|
|
insights = [] |
|
|
|
|
|
if len(filtered_df) > 0: |
|
|
peak_hour = filtered_df.groupby("Hour")["Vehicle_Count"].mean().idxmax() |
|
|
insights.append( |
|
|
f"π **Peak traffic hour**: {peak_hour}:00 - {peak_hour + 1}:00 with avg {filtered_df.groupby('Hour')['Vehicle_Count'].mean().max():.0f} vehicles" |
|
|
) |
|
|
|
|
|
busiest_loc = filtered_df.groupby("Location")["Vehicle_Count"].sum().idxmax() |
|
|
busiest_count = filtered_df.groupby("Location")["Vehicle_Count"].sum().max() |
|
|
insights.append( |
|
|
f"π **Busiest location**: {busiest_loc} with {busiest_count:,.0f} total vehicles" |
|
|
) |
|
|
|
|
|
avg_congestion = filtered_df["Congestion_Level"].mean() |
|
|
if avg_congestion < 2: |
|
|
congestion_status = "Low" |
|
|
elif avg_congestion < 4: |
|
|
congestion_status = "Moderate" |
|
|
else: |
|
|
congestion_status = "High" |
|
|
insights.append( |
|
|
f"π¦ **Overall congestion**: {congestion_status} (avg level: {avg_congestion:.1f}/5)" |
|
|
) |
|
|
|
|
|
weekday_avg = filtered_df[~filtered_df["IsWeekend"]]["Vehicle_Count"].mean() |
|
|
weekend_avg = filtered_df[filtered_df["IsWeekend"]]["Vehicle_Count"].mean() |
|
|
diff_pct = ( |
|
|
((weekday_avg - weekend_avg) / weekend_avg * 100) if weekend_avg > 0 else 0 |
|
|
) |
|
|
insights.append( |
|
|
f"π
**Weekday vs Weekend**: Weekdays have {abs(diff_pct):.1f}% {'more' if diff_pct > 0 else 'less'} traffic on average" |
|
|
) |
|
|
|
|
|
peak_vs_offpeak_peak = filtered_df[filtered_df["Peak_Off_Peak"] == "Peak"][ |
|
|
"Congestion_Level" |
|
|
].mean() |
|
|
peak_vs_offpeak_off = filtered_df[filtered_df["Peak_Off_Peak"] == "Off-Peak"][ |
|
|
"Congestion_Level" |
|
|
].mean() |
|
|
insights.append( |
|
|
f"β° **Peak hours**: Congestion is {(peak_vs_offpeak_peak - peak_vs_offpeak_off):.1f} levels higher during peak hours" |
|
|
) |
|
|
|
|
|
for insight in insights: |
|
|
st.markdown(f"- {insight}") |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |
|
|
|