Docfile commited on
Commit
85c35ef
Β·
verified Β·
1 Parent(s): 7b63a84

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +474 -38
src/streamlit_app.py CHANGED
@@ -1,40 +1,476 @@
1
- import altair as alt
2
- import numpy as np
3
- import pandas as pd
4
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- """
7
- # Welcome to Streamlit!
8
-
9
- Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
- forums](https://discuss.streamlit.io).
12
-
13
- In the meantime, below is an example of what you can do with just a few lines of code:
14
- """
15
-
16
- num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
- num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
18
-
19
- indices = np.linspace(0, 1, num_points)
20
- theta = 2 * np.pi * num_turns * indices
21
- radius = indices
22
-
23
- x = radius * np.cos(theta)
24
- y = radius * np.sin(theta)
25
-
26
- df = pd.DataFrame({
27
- "x": x,
28
- "y": y,
29
- "idx": indices,
30
- "rand": np.random.randn(num_points),
31
- })
32
-
33
- st.altair_chart(alt.Chart(df, height=700, width=700)
34
- .mark_point(filled=True)
35
- .encode(
36
- x=alt.X("x", axis=None),
37
- y=alt.Y("y", axis=None),
38
- color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
- size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
- ))
 
 
 
 
1
  import streamlit as st
2
+ import pandas as pd
3
+ import plotly.express as px
4
+ import plotly.graph_objects as go
5
+ from datetime import datetime
6
+
7
+ st.set_page_config(
8
+ page_title="Urban Traffic Flow Dashboard",
9
+ page_icon="πŸš—",
10
+ layout="wide",
11
+ initial_sidebar_state="expanded",
12
+ )
13
+
14
+
15
+ @st.cache_data
16
+ def load_data():
17
+ df = pd.read_csv("urban_traffic_flow_with_target.csv")
18
+ df["Timestamp"] = pd.to_datetime(df["Timestamp"])
19
+ df["Hour"] = df["Timestamp"].dt.hour
20
+ df["DayOfWeek"] = df["Timestamp"].dt.day_name()
21
+ df["Date"] = df["Timestamp"].dt.date
22
+ df["IsWeekend"] = df["DayOfWeek"].isin(["Saturday", "Sunday"])
23
+ return df
24
+
25
+
26
+ def main():
27
+ st.title("πŸš— Urban Traffic Flow Dashboard")
28
+ st.markdown(
29
+ "Explore urban traffic patterns, congestion levels, and temporal trends"
30
+ )
31
+
32
+ df = load_data()
33
+
34
+ with st.sidebar:
35
+ st.header("πŸ” Filters")
36
+
37
+ min_date = df["Timestamp"].min().date()
38
+ max_date = df["Timestamp"].max().date()
39
+
40
+ date_range = st.date_input(
41
+ "Date Range",
42
+ value=(min_date, max_date),
43
+ min_value=min_date,
44
+ max_value=max_date,
45
+ )
46
+
47
+ selected_locations = st.multiselect(
48
+ "Select Locations",
49
+ options=sorted(df["Location"].unique()),
50
+ default=sorted(df["Location"].unique()),
51
+ )
52
+
53
+ peak_filter = st.multiselect(
54
+ "Peak/Off-Peak",
55
+ options=sorted(df["Peak_Off_Peak"].unique()),
56
+ default=sorted(df["Peak_Off_Peak"].unique()),
57
+ )
58
+
59
+ day_filter = st.multiselect(
60
+ "Day of Week",
61
+ options=sorted(df["DayOfWeek"].unique()),
62
+ default=sorted(df["DayOfWeek"].unique()),
63
+ )
64
+
65
+ congestion_filter = st.slider(
66
+ "Min Congestion Level", min_value=0, max_value=5, value=0, step=1
67
+ )
68
+
69
+ filtered_df = df.copy()
70
+
71
+ if len(date_range) == 2:
72
+ start_date, end_date = date_range
73
+ filtered_df = filtered_df[
74
+ (filtered_df["Timestamp"].dt.date >= start_date)
75
+ & (filtered_df["Timestamp"].dt.date <= end_date)
76
+ ]
77
+
78
+ if selected_locations:
79
+ filtered_df = filtered_df[filtered_df["Location"].isin(selected_locations)]
80
+
81
+ if peak_filter:
82
+ filtered_df = filtered_df[filtered_df["Peak_Off_Peak"].isin(peak_filter)]
83
+
84
+ if day_filter:
85
+ filtered_df = filtered_df[filtered_df["DayOfWeek"].isin(day_filter)]
86
+
87
+ filtered_df = filtered_df[filtered_df["Congestion_Level"] >= congestion_filter]
88
+
89
+ st.subheader("πŸ“Š Key Performance Indicators")
90
+
91
+ kpi_col1, kpi_col2, kpi_col3, kpi_col4 = st.columns(4)
92
+
93
+ with kpi_col1:
94
+ st.metric(
95
+ "Total Vehicle Count",
96
+ f"{filtered_df['Vehicle_Count'].sum():,.0f}",
97
+ help="Total number of vehicles recorded",
98
+ )
99
+
100
+ with kpi_col2:
101
+ st.metric(
102
+ "Avg Vehicle Speed",
103
+ f"{filtered_df['Vehicle_Speed'].mean():.1f} km/h",
104
+ help="Average speed across all locations",
105
+ )
106
+
107
+ with kpi_col3:
108
+ st.metric(
109
+ "Avg Congestion Level",
110
+ f"{filtered_df['Congestion_Level'].mean():.1f}",
111
+ help="Average congestion level (0-5 scale)",
112
+ )
113
+
114
+ with kpi_col4:
115
+ st.metric(
116
+ "Peak Hours Ratio",
117
+ f"{(filtered_df['Peak_Off_Peak'] == 'Peak').sum() / len(filtered_df) * 100:.1f}%",
118
+ help="Percentage of peak hour observations",
119
+ )
120
+
121
+ st.markdown("---")
122
+
123
+ tab1, tab2, tab3, tab4 = st.tabs(
124
+ [
125
+ "πŸ“ˆ Temporal Trends",
126
+ "πŸ“ Location Analysis",
127
+ "πŸ“Š Distribution",
128
+ "πŸ† Top Zones",
129
+ ]
130
+ )
131
+
132
+ with tab1:
133
+ st.subheader("Hourly Traffic Patterns")
134
+
135
+ hourly_avg = (
136
+ filtered_df.groupby("Hour")
137
+ .agg(
138
+ {
139
+ "Vehicle_Count": "mean",
140
+ "Vehicle_Speed": "mean",
141
+ "Congestion_Level": "mean",
142
+ }
143
+ )
144
+ .reset_index()
145
+ )
146
+
147
+ fig_hourly = go.Figure()
148
+
149
+ fig_hourly.add_trace(
150
+ go.Scatter(
151
+ x=hourly_avg["Hour"],
152
+ y=hourly_avg["Vehicle_Count"],
153
+ mode="lines+markers",
154
+ name="Avg Vehicle Count",
155
+ line=dict(color="#1f77b4", width=3),
156
+ yaxis="y",
157
+ )
158
+ )
159
+
160
+ fig_hourly.add_trace(
161
+ go.Scatter(
162
+ x=hourly_avg["Hour"],
163
+ y=hourly_avg["Vehicle_Speed"],
164
+ mode="lines+markers",
165
+ name="Avg Speed (km/h)",
166
+ line=dict(color="#2ca02c", width=3),
167
+ yaxis="y2",
168
+ )
169
+ )
170
+
171
+ fig_hourly.update_layout(
172
+ title="Average Traffic by Hour of Day",
173
+ xaxis_title="Hour",
174
+ yaxis_title="Vehicle Count",
175
+ yaxis2=dict(title="Speed (km/h)", overlaying="y", side="right"),
176
+ hovermode="x unified",
177
+ template="plotly_white",
178
+ height=500,
179
+ )
180
+
181
+ st.plotly_chart(fig_hourly, use_container_width=True)
182
+
183
+ st.subheader("Traffic Evolution Over Time")
184
+
185
+ time_series = (
186
+ filtered_df.groupby(["Timestamp", "Location"])
187
+ .agg({"Vehicle_Count": "sum", "Congestion_Level": "mean"})
188
+ .reset_index()
189
+ )
190
+
191
+ fig_ts = px.line(
192
+ time_series,
193
+ x="Timestamp",
194
+ y="Vehicle_Count",
195
+ color="Location",
196
+ title="Traffic Volume Over Time by Location",
197
+ labels={"Vehicle_Count": "Vehicle Count", "Timestamp": "Time"},
198
+ )
199
+
200
+ fig_ts.update_layout(hovermode="x unified", template="plotly_white", height=500)
201
+
202
+ st.plotly_chart(fig_ts, use_container_width=True)
203
+
204
+ with tab2:
205
+ col1, col2 = st.columns(2)
206
+
207
+ with col1:
208
+ st.subheader("Traffic by Location")
209
+
210
+ location_stats = (
211
+ filtered_df.groupby("Location")
212
+ .agg(
213
+ {
214
+ "Vehicle_Count": "sum",
215
+ "Vehicle_Speed": "mean",
216
+ "Congestion_Level": "mean",
217
+ }
218
+ )
219
+ .reset_index()
220
+ )
221
+
222
+ fig_loc = px.bar(
223
+ location_stats,
224
+ x="Location",
225
+ y="Vehicle_Count",
226
+ title="Total Vehicle Count by Location",
227
+ color="Vehicle_Count",
228
+ color_continuous_scale="Blues",
229
+ labels={"Vehicle_Count": "Total Count"},
230
+ )
231
+
232
+ fig_loc.update_layout(template="plotly_white", height=400)
233
+
234
+ st.plotly_chart(fig_loc, use_container_width=True)
235
+
236
+ with col2:
237
+ st.subheader("Avg Speed by Location")
238
+
239
+ fig_speed = px.bar(
240
+ location_stats,
241
+ x="Location",
242
+ y="Vehicle_Speed",
243
+ title="Average Speed by Location",
244
+ color="Vehicle_Speed",
245
+ color_continuous_scale="RdYlGn",
246
+ labels={"Vehicle_Speed": "Speed (km/h)"},
247
+ )
248
+
249
+ fig_speed.update_layout(template="plotly_white", height=400)
250
+
251
+ st.plotly_chart(fig_speed, use_container_width=True)
252
+
253
+ st.subheader("Congestion Heatmap: Hour vs Location")
254
+
255
+ heatmap_data = filtered_df.pivot_table(
256
+ values="Congestion_Level", index="Hour", columns="Location", aggfunc="mean"
257
+ )
258
+
259
+ fig_heatmap = px.imshow(
260
+ heatmap_data,
261
+ labels=dict(x="Location", y="Hour", color="Avg Congestion Level"),
262
+ title="Average Congestion Level by Hour and Location",
263
+ color_continuous_scale="RdYlGn_r",
264
+ aspect="auto",
265
+ )
266
+
267
+ fig_heatmap.update_layout(template="plotly_white", height=500)
268
+
269
+ st.plotly_chart(fig_heatmap, use_container_width=True)
270
+
271
+ with tab3:
272
+ col1, col2 = st.columns(2)
273
+
274
+ with col1:
275
+ st.subheader("Vehicle Count Distribution")
276
+
277
+ fig_count_box = px.box(
278
+ filtered_df,
279
+ x="Location",
280
+ y="Vehicle_Count",
281
+ title="Vehicle Count Distribution by Location",
282
+ color="Location",
283
+ )
284
+
285
+ fig_count_box.update_layout(
286
+ template="plotly_white", height=400, showlegend=False
287
+ )
288
+
289
+ st.plotly_chart(fig_count_box, use_container_width=True)
290
+
291
+ with col2:
292
+ st.subheader("Speed Distribution")
293
+
294
+ fig_speed_box = px.box(
295
+ filtered_df,
296
+ x="Location",
297
+ y="Vehicle_Speed",
298
+ title="Speed Distribution by Location",
299
+ color="Location",
300
+ )
301
+
302
+ fig_speed_box.update_layout(
303
+ template="plotly_white", height=400, showlegend=False
304
+ )
305
+
306
+ st.plotly_chart(fig_speed_box, use_container_width=True)
307
+
308
+ st.subheader("Congestion Level Distribution")
309
+
310
+ congestion_dist = (
311
+ filtered_df["Congestion_Level"].value_counts().sort_index().reset_index()
312
+ )
313
+ congestion_dist.columns = ["Congestion_Level", "Count"]
314
+
315
+ fig_congestion = px.bar(
316
+ congestion_dist,
317
+ x="Congestion_Level",
318
+ y="Count",
319
+ title="Distribution of Congestion Levels",
320
+ color="Congestion_Level",
321
+ color_continuous_scale="Reds",
322
+ labels={
323
+ "Count": "Number of Records",
324
+ "Congestion_Level": "Congestion Level",
325
+ },
326
+ )
327
+
328
+ fig_congestion.update_layout(template="plotly_white", height=400)
329
+
330
+ st.plotly_chart(fig_congestion, use_container_width=True)
331
+
332
+ st.subheader("Congestion by Peak/Off-Peak")
333
+
334
+ fig_peak = px.box(
335
+ filtered_df,
336
+ x="Peak_Off_Peak",
337
+ y="Congestion_Level",
338
+ title="Congestion Level: Peak vs Off-Peak",
339
+ color="Peak_Off_Peak",
340
+ )
341
+
342
+ fig_peak.update_layout(template="plotly_white", height=400, showlegend=False)
343
+
344
+ st.plotly_chart(fig_peak, use_container_width=True)
345
+
346
+ with tab4:
347
+ st.subheader("Most Congested Locations")
348
+
349
+ location_congestion = (
350
+ filtered_df.groupby("Location")
351
+ .agg(
352
+ {
353
+ "Congestion_Level": "mean",
354
+ "Vehicle_Count": "mean",
355
+ "Vehicle_Speed": "mean",
356
+ }
357
+ )
358
+ .round(2)
359
+ .reset_index()
360
+ )
361
+
362
+ location_congestion = location_congestion.sort_values(
363
+ "Congestion_Level", ascending=True
364
+ )
365
+
366
+ st.dataframe(location_congestion, use_container_width=True, hide_index=True)
367
+
368
+ st.subheader("Top 5 Busiest Locations")
369
+
370
+ top_locations = (
371
+ filtered_df.groupby("Location")["Vehicle_Count"]
372
+ .sum()
373
+ .sort_values(ascending=False)
374
+ .head(5)
375
+ .reset_index()
376
+ )
377
+
378
+ fig_top = px.bar(
379
+ top_locations,
380
+ x="Vehicle_Count",
381
+ y="Location",
382
+ orientation="h",
383
+ title="Top 5 Locations by Total Traffic Volume",
384
+ color="Vehicle_Count",
385
+ color_continuous_scale="Blues",
386
+ )
387
+
388
+ fig_top.update_layout(
389
+ template="plotly_white",
390
+ height=400,
391
+ yaxis={"categoryorder": "total ascending"},
392
+ )
393
+
394
+ st.plotly_chart(fig_top, use_container_width=True)
395
+
396
+ st.subheader("Slowest Locations (Lowest Avg Speed)")
397
+
398
+ slowest_locations = (
399
+ filtered_df.groupby("Location")["Vehicle_Speed"]
400
+ .mean()
401
+ .sort_values()
402
+ .head(5)
403
+ .reset_index()
404
+ )
405
+
406
+ fig_slow = px.bar(
407
+ slowest_locations,
408
+ x="Vehicle_Speed",
409
+ y="Location",
410
+ orientation="h",
411
+ title="Top 5 Slowest Locations",
412
+ color="Vehicle_Speed",
413
+ color_continuous_scale="Reds_r",
414
+ )
415
+
416
+ fig_slow.update_layout(
417
+ template="plotly_white",
418
+ height=400,
419
+ yaxis={"categoryorder": "total ascending"},
420
+ )
421
+
422
+ st.plotly_chart(fig_slow, use_container_width=True)
423
+
424
+ st.markdown("---")
425
+ st.subheader("πŸ’‘ Automatic Insights")
426
+
427
+ insights = []
428
+
429
+ if len(filtered_df) > 0:
430
+ peak_hour = filtered_df.groupby("Hour")["Vehicle_Count"].mean().idxmax()
431
+ insights.append(
432
+ f"πŸ• **Peak traffic hour**: {peak_hour}:00 - {peak_hour + 1}:00 with avg {filtered_df.groupby('Hour')['Vehicle_Count'].mean().max():.0f} vehicles"
433
+ )
434
+
435
+ busiest_loc = filtered_df.groupby("Location")["Vehicle_Count"].sum().idxmax()
436
+ busiest_count = filtered_df.groupby("Location")["Vehicle_Count"].sum().max()
437
+ insights.append(
438
+ f"πŸ“ **Busiest location**: {busiest_loc} with {busiest_count:,.0f} total vehicles"
439
+ )
440
+
441
+ avg_congestion = filtered_df["Congestion_Level"].mean()
442
+ if avg_congestion < 2:
443
+ congestion_status = "Low"
444
+ elif avg_congestion < 4:
445
+ congestion_status = "Moderate"
446
+ else:
447
+ congestion_status = "High"
448
+ insights.append(
449
+ f"🚦 **Overall congestion**: {congestion_status} (avg level: {avg_congestion:.1f}/5)"
450
+ )
451
+
452
+ weekday_avg = filtered_df[~filtered_df["IsWeekend"]]["Vehicle_Count"].mean()
453
+ weekend_avg = filtered_df[filtered_df["IsWeekend"]]["Vehicle_Count"].mean()
454
+ diff_pct = (
455
+ ((weekday_avg - weekend_avg) / weekend_avg * 100) if weekend_avg > 0 else 0
456
+ )
457
+ insights.append(
458
+ f"πŸ“… **Weekday vs Weekend**: Weekdays have {abs(diff_pct):.1f}% {'more' if diff_pct > 0 else 'less'} traffic on average"
459
+ )
460
+
461
+ peak_vs_offpeak_peak = filtered_df[filtered_df["Peak_Off_Peak"] == "Peak"][
462
+ "Congestion_Level"
463
+ ].mean()
464
+ peak_vs_offpeak_off = filtered_df[filtered_df["Peak_Off_Peak"] == "Off-Peak"][
465
+ "Congestion_Level"
466
+ ].mean()
467
+ insights.append(
468
+ f"⏰ **Peak hours**: Congestion is {(peak_vs_offpeak_peak - peak_vs_offpeak_off):.1f} levels higher during peak hours"
469
+ )
470
+
471
+ for insight in insights:
472
+ st.markdown(f"- {insight}")
473
+
474
 
475
+ if __name__ == "__main__":
476
+ main()