Spaces:

Docfile
/

lm

Sleeping

lm / src /streamlit_app.py

Youssouf ⚜️

6b75cbe 27 days ago

14.6 kB

	import streamlit as st
	import pandas as pd
	import plotly.express as px
	import plotly.graph_objects as go
	from datetime import datetime
	import os

	st.set_page_config(
	page_title="Urban Traffic Flow Dashboard",
	page_icon="🚗",
	layout="wide",
	initial_sidebar_state="expanded",
	)


	@st.cache_data
	def load_data():
	script_dir = os.path.dirname(os.path.abspath(__file__))
	csv_path = os.path.join(script_dir, "urban_traffic_flow_with_target.csv")
	df = pd.read_csv(csv_path)
	df["Timestamp"] = pd.to_datetime(df["Timestamp"])
	df["Hour"] = df["Timestamp"].dt.hour
	df["DayOfWeek"] = df["Timestamp"].dt.day_name()
	df["Date"] = df["Timestamp"].dt.date
	df["IsWeekend"] = df["DayOfWeek"].isin(["Saturday", "Sunday"])
	return df


	def main():
	st.title("🚗 Urban Traffic Flow Dashboard")
	st.markdown(
	"Explore urban traffic patterns, congestion levels, and temporal trends"
	)

	df = load_data()

	with st.sidebar:
	st.header("🔍 Filters")

	min_date = df["Timestamp"].min().date()
	max_date = df["Timestamp"].max().date()

	date_range = st.date_input(
	"Date Range",
	value=(min_date, max_date),
	min_value=min_date,
	max_value=max_date,
	)

	selected_locations = st.multiselect(
	"Select Locations",
	options=sorted(df["Location"].unique()),
	default=sorted(df["Location"].unique()),
	)

	peak_filter = st.multiselect(
	"Peak/Off-Peak",
	options=sorted(df["Peak_Off_Peak"].unique()),
	default=sorted(df["Peak_Off_Peak"].unique()),
	)

	day_filter = st.multiselect(
	"Day of Week",
	options=sorted(df["DayOfWeek"].unique()),
	default=sorted(df["DayOfWeek"].unique()),
	)

	congestion_filter = st.slider(
	"Min Congestion Level", min_value=0, max_value=5, value=0, step=1
	)

	filtered_df = df.copy()

	if len(date_range) == 2:
	start_date, end_date = date_range
	filtered_df = filtered_df[
	(filtered_df["Timestamp"].dt.date >= start_date)
	& (filtered_df["Timestamp"].dt.date <= end_date)
	]

	if selected_locations:
	filtered_df = filtered_df[filtered_df["Location"].isin(selected_locations)]

	if peak_filter:
	filtered_df = filtered_df[filtered_df["Peak_Off_Peak"].isin(peak_filter)]

	if day_filter:
	filtered_df = filtered_df[filtered_df["DayOfWeek"].isin(day_filter)]

	filtered_df = filtered_df[filtered_df["Congestion_Level"] >= congestion_filter]

	st.subheader("📊 Key Performance Indicators")

	kpi_col1, kpi_col2, kpi_col3, kpi_col4 = st.columns(4)

	with kpi_col1:
	st.metric(
	"Total Vehicle Count",
	f"{filtered_df['Vehicle_Count'].sum():,.0f}",
	help="Total number of vehicles recorded",
	)

	with kpi_col2:
	st.metric(
	"Avg Vehicle Speed",
	f"{filtered_df['Vehicle_Speed'].mean():.1f} km/h",
	help="Average speed across all locations",
	)

	with kpi_col3:
	st.metric(
	"Avg Congestion Level",
	f"{filtered_df['Congestion_Level'].mean():.1f}",
	help="Average congestion level (0-5 scale)",
	)

	with kpi_col4:
	st.metric(
	"Peak Hours Ratio",
	f"{(filtered_df['Peak_Off_Peak'] == 'Peak').sum() / len(filtered_df) * 100:.1f}%",
	help="Percentage of peak hour observations",
	)

	st.markdown("---")

	tab1, tab2, tab3, tab4 = st.tabs(
	[
	"📈 Temporal Trends",
	"📍 Location Analysis",
	"📊 Distribution",
	"🏆 Top Zones",
	]
	)

	with tab1:
	st.subheader("Hourly Traffic Patterns")

	hourly_avg = (
	filtered_df.groupby("Hour")
	.agg(
	{
	"Vehicle_Count": "mean",
	"Vehicle_Speed": "mean",
	"Congestion_Level": "mean",
	}
	)
	.reset_index()
	)

	fig_hourly = go.Figure()

	fig_hourly.add_trace(
	go.Scatter(
	x=hourly_avg["Hour"],
	y=hourly_avg["Vehicle_Count"],
	mode="lines+markers",
	name="Avg Vehicle Count",
	line=dict(color="#1f77b4", width=3),
	yaxis="y",
	)
	)

	fig_hourly.add_trace(
	go.Scatter(
	x=hourly_avg["Hour"],
	y=hourly_avg["Vehicle_Speed"],
	mode="lines+markers",
	name="Avg Speed (km/h)",
	line=dict(color="#2ca02c", width=3),
	yaxis="y2",
	)
	)

	fig_hourly.update_layout(
	title="Average Traffic by Hour of Day",
	xaxis_title="Hour",
	yaxis_title="Vehicle Count",
	yaxis2=dict(title="Speed (km/h)", overlaying="y", side="right"),
	hovermode="x unified",
	template="plotly_white",
	height=500,
	)

	st.plotly_chart(fig_hourly, use_container_width=True)

	st.subheader("Traffic Evolution Over Time")

	time_series = (
	filtered_df.groupby(["Timestamp", "Location"])
	.agg({"Vehicle_Count": "sum", "Congestion_Level": "mean"})
	.reset_index()
	)

	fig_ts = px.line(
	time_series,
	x="Timestamp",
	y="Vehicle_Count",
	color="Location",
	title="Traffic Volume Over Time by Location",
	labels={"Vehicle_Count": "Vehicle Count", "Timestamp": "Time"},
	)

	fig_ts.update_layout(hovermode="x unified", template="plotly_white", height=500)

	st.plotly_chart(fig_ts, use_container_width=True)

	with tab2:
	col1, col2 = st.columns(2)

	with col1:
	st.subheader("Traffic by Location")

	location_stats = (
	filtered_df.groupby("Location")
	.agg(
	{
	"Vehicle_Count": "sum",
	"Vehicle_Speed": "mean",
	"Congestion_Level": "mean",
	}
	)
	.reset_index()
	)

	fig_loc = px.bar(
	location_stats,
	x="Location",
	y="Vehicle_Count",
	title="Total Vehicle Count by Location",
	color="Vehicle_Count",
	color_continuous_scale="Blues",
	labels={"Vehicle_Count": "Total Count"},
	)

	fig_loc.update_layout(template="plotly_white", height=400)

	st.plotly_chart(fig_loc, use_container_width=True)

	with col2:
	st.subheader("Avg Speed by Location")

	fig_speed = px.bar(
	location_stats,
	x="Location",
	y="Vehicle_Speed",
	title="Average Speed by Location",
	color="Vehicle_Speed",
	color_continuous_scale="RdYlGn",
	labels={"Vehicle_Speed": "Speed (km/h)"},
	)

	fig_speed.update_layout(template="plotly_white", height=400)

	st.plotly_chart(fig_speed, use_container_width=True)

	st.subheader("Congestion Heatmap: Hour vs Location")

	heatmap_data = filtered_df.pivot_table(
	values="Congestion_Level", index="Hour", columns="Location", aggfunc="mean"
	)

	fig_heatmap = px.imshow(
	heatmap_data,
	labels=dict(x="Location", y="Hour", color="Avg Congestion Level"),
	title="Average Congestion Level by Hour and Location",
	color_continuous_scale="RdYlGn_r",
	aspect="auto",
	)

	fig_heatmap.update_layout(template="plotly_white", height=500)

	st.plotly_chart(fig_heatmap, use_container_width=True)

	with tab3:
	col1, col2 = st.columns(2)

	with col1:
	st.subheader("Vehicle Count Distribution")

	fig_count_box = px.box(
	filtered_df,
	x="Location",
	y="Vehicle_Count",
	title="Vehicle Count Distribution by Location",
	color="Location",
	)

	fig_count_box.update_layout(
	template="plotly_white", height=400, showlegend=False
	)

	st.plotly_chart(fig_count_box, use_container_width=True)

	with col2:
	st.subheader("Speed Distribution")

	fig_speed_box = px.box(
	filtered_df,
	x="Location",
	y="Vehicle_Speed",
	title="Speed Distribution by Location",
	color="Location",
	)

	fig_speed_box.update_layout(
	template="plotly_white", height=400, showlegend=False
	)

	st.plotly_chart(fig_speed_box, use_container_width=True)

	st.subheader("Congestion Level Distribution")

	congestion_dist = (
	filtered_df["Congestion_Level"].value_counts().sort_index().reset_index()
	)
	congestion_dist.columns = ["Congestion_Level", "Count"]

	fig_congestion = px.bar(
	congestion_dist,
	x="Congestion_Level",
	y="Count",
	title="Distribution of Congestion Levels",
	color="Congestion_Level",
	color_continuous_scale="Reds",
	labels={
	"Count": "Number of Records",
	"Congestion_Level": "Congestion Level",
	},
	)

	fig_congestion.update_layout(template="plotly_white", height=400)

	st.plotly_chart(fig_congestion, use_container_width=True)

	st.subheader("Congestion by Peak/Off-Peak")

	fig_peak = px.box(
	filtered_df,
	x="Peak_Off_Peak",
	y="Congestion_Level",
	title="Congestion Level: Peak vs Off-Peak",
	color="Peak_Off_Peak",
	)

	fig_peak.update_layout(template="plotly_white", height=400, showlegend=False)

	st.plotly_chart(fig_peak, use_container_width=True)

	with tab4:
	st.subheader("Most Congested Locations")

	location_congestion = (
	filtered_df.groupby("Location")
	.agg(
	{
	"Congestion_Level": "mean",
	"Vehicle_Count": "mean",
	"Vehicle_Speed": "mean",
	}
	)
	.round(2)
	.reset_index()
	)

	location_congestion = location_congestion.sort_values(
	"Congestion_Level", ascending=True
	)

	st.dataframe(location_congestion, use_container_width=True, hide_index=True)

	st.subheader("Top 5 Busiest Locations")

	top_locations = (
	filtered_df.groupby("Location")["Vehicle_Count"]
	.sum()
	.sort_values(ascending=False)
	.head(5)
	.reset_index()
	)

	fig_top = px.bar(
	top_locations,
	x="Vehicle_Count",
	y="Location",
	orientation="h",
	title="Top 5 Locations by Total Traffic Volume",
	color="Vehicle_Count",
	color_continuous_scale="Blues",
	)

	fig_top.update_layout(
	template="plotly_white",
	height=400,
	yaxis={"categoryorder": "total ascending"},
	)

	st.plotly_chart(fig_top, use_container_width=True)

	st.subheader("Slowest Locations (Lowest Avg Speed)")

	slowest_locations = (
	filtered_df.groupby("Location")["Vehicle_Speed"]
	.mean()
	.sort_values()
	.head(5)
	.reset_index()
	)

	fig_slow = px.bar(
	slowest_locations,
	x="Vehicle_Speed",
	y="Location",
	orientation="h",
	title="Top 5 Slowest Locations",
	color="Vehicle_Speed",
	color_continuous_scale="Reds_r",
	)

	fig_slow.update_layout(
	template="plotly_white",
	height=400,
	yaxis={"categoryorder": "total ascending"},
	)

	st.plotly_chart(fig_slow, use_container_width=True)

	st.markdown("---")
	st.subheader("💡 Automatic Insights")

	insights = []

	if len(filtered_df) > 0:
	peak_hour = filtered_df.groupby("Hour")["Vehicle_Count"].mean().idxmax()
	insights.append(
	f"🕐 Peak traffic hour: {peak_hour}:00 - {peak_hour + 1}:00 with avg {filtered_df.groupby('Hour')['Vehicle_Count'].mean().max():.0f} vehicles"
	)

	busiest_loc = filtered_df.groupby("Location")["Vehicle_Count"].sum().idxmax()
	busiest_count = filtered_df.groupby("Location")["Vehicle_Count"].sum().max()
	insights.append(
	f"📍 Busiest location: {busiest_loc} with {busiest_count:,.0f} total vehicles"
	)

	avg_congestion = filtered_df["Congestion_Level"].mean()
	if avg_congestion < 2:
	congestion_status = "Low"
	elif avg_congestion < 4:
	congestion_status = "Moderate"
	else:
	congestion_status = "High"
	insights.append(
	f"🚦 Overall congestion: {congestion_status} (avg level: {avg_congestion:.1f}/5)"
	)

	weekday_avg = filtered_df[~filtered_df["IsWeekend"]]["Vehicle_Count"].mean()
	weekend_avg = filtered_df[filtered_df["IsWeekend"]]["Vehicle_Count"].mean()
	diff_pct = (
	((weekday_avg - weekend_avg) / weekend_avg * 100) if weekend_avg > 0 else 0
	)
	insights.append(
	f"📅 Weekday vs Weekend: Weekdays have {abs(diff_pct):.1f}% {'more' if diff_pct > 0 else 'less'} traffic on average"
	)

	peak_vs_offpeak_peak = filtered_df[filtered_df["Peak_Off_Peak"] == "Peak"][
	"Congestion_Level"
	].mean()
	peak_vs_offpeak_off = filtered_df[filtered_df["Peak_Off_Peak"] == "Off-Peak"][
	"Congestion_Level"
	].mean()
	insights.append(
	f"⏰ Peak hours: Congestion is {(peak_vs_offpeak_peak - peak_vs_offpeak_off):.1f} levels higher during peak hours"
	)

	for insight in insights:
	st.markdown(f"- {insight}")


	if __name__ == "__main__":
	main()