IS445_Group2_FP

Sleeping

App Files Files Community

IS445_Group2_FP / app.py

Aniruddha8699

Update app.py

343370c verified 11 months ago

raw

history blame contribute delete

11.1 kB

	# app.py — San José Crash Safety Explorer
	import os
	import streamlit as st
	import pandas as pd
	import requests
	import altair as alt
	import pydeck as pdk
	from datetime import datetime, timedelta

	### If CrashMap is not working on the local system- Uncomment the belwo Mapbox demo token

	# if not os.getenv("MAPBOX_API_KEY"):
	# pdk.settings.mapbox_api_key = (
	# "pk.eyJ1IjoibWFwYm94IiwiYSI6ImNpejY4Nm44NTA4emYycXBndHRqbnB2N3gifQ."
	# "w5iJj8yOTFoNUG0GI_LhwA"
	# )

	### CONSTANTS & DATA HELPERS
	RESOURCE_ID = "15408d78-9734-4ea1-b3e5-a0f99568dd9b"
	BASE_URL = "https://data.sanjoseca.gov/api/3/action/datastore_search"
	PAGE_LIMIT = 1_000

	@st.cache_data(ttl=3_600, show_spinner=False)
	def fetch_data() -> pd.DataFrame:
	records, offset = [], 0
	while True:
	params = {"resource_id": RESOURCE_ID, "limit": PAGE_LIMIT, "offset": offset}
	try:
	batch = requests.get(BASE_URL, params=params, timeout=20).json()["result"]["records"]
	except Exception:
	st.error("Cannot reach the San José Open‑Data API.")
	return pd.DataFrame(columns=["CrashDateTime"])
	if not batch:
	break
	records.extend(batch)
	offset += PAGE_LIMIT
	return pd.DataFrame.from_records(records)

	def preprocess(df: pd.DataFrame) -> pd.DataFrame:
	if df.empty:
	return df
	df["CrashDateTime"] = pd.to_datetime(df["CrashDateTime"], errors="coerce")
	df["Latitude"] = pd.to_numeric(df["Latitude"], errors="coerce")
	df["Longitude"] = pd.to_numeric(df["Longitude"], errors="coerce")
	for col in ["MinorInjuries", "ModerateInjuries", "SevereInjuries", "FatalInjuries"]:
	df[col] = pd.to_numeric(df[col], errors="coerce").fillna(0)
	df["SpeedingFlag"] = df["SpeedingFlag"].fillna("Unknown")
	df["Weather"] = df["Weather"].fillna("Unknown")
	df["RoadwaySurface"] = df["RoadwaySurface"].fillna("Unknown")
	df["CrashTime"] = df["CrashDateTime"].dt.strftime("%Y‑%m‑%d %H:%M")
	return df.dropna(subset=["CrashDateTime"]).copy()

	def time_filter(df: pd.DataFrame, months: str) -> pd.DataFrame:
	if months == "All" or df.empty:
	return df
	end = datetime.now()
	start = end - timedelta(days=int(months) * 30)
	return df[(df["CrashDateTime"] >= start) & (df["CrashDateTime"] <= end)]

	### PAGE HEADER, MOTIVATION, DATASET INFO, REFRESH
	st.set_page_config("San José Crash Dashboard", layout="wide", page_icon="🚦")

	st.title("🚦 San José Crash Safety Explorer")

	st.markdown("""
	### Motivation
	San José’s ambitious Vision Zero program aims to eliminate traffic fatalities and serious injuries.
	Real‑time insights into when, where, and why crashes occur help planners, engineers, and residents target the most effective counter‑measures.
	""")

	left, right = st.columns([5, 1])
	with left:
	st.markdown("""
	Dataset: [City of San José Crashes Data](https://data.sanjoseca.gov/)
	Updated nightly · 60 + attributes per crash (location, weather, speeding flag, injury counts, etc.)
	""")
	with right:
	if st.button("🔁 Refresh"):
	st.cache_data.clear()

	### FILTERS
	months = st.selectbox(
	"Select time range to explore",
	["6", "9", "12", "All"],
	index=3,
	format_func=lambda m: f"{m} months" if m != "All" else "All time"
	)
	st.divider()

	### DATA LOAD
	df = time_filter(preprocess(fetch_data()), months)
	if df.empty:
	st.warning("No data available for the selected period.")
	st.stop()

	df["year"] = df["CrashDateTime"].dt.year
	df["TotalInjuries"] = df[["MinorInjuries", "ModerateInjuries",
	"SevereInjuries", "FatalInjuries"]].sum(axis=1)

	def two_col_layout(chart, narrative_md):
	vis, story = st.columns([3, 2], gap="large")
	with vis:
	st.altair_chart(chart, use_container_width=True)
	with story:
	st.markdown(narrative_md, unsafe_allow_html=True)

	### NAVIGATION TABS
	tabs = st.tabs([
	"Weather & Injuries",
	"Speeding Trend",
	"Crash Map",
	"Road Surface",
	"Injury Correlation"
	])

	# ───────── TAB 1 — WEATHER × INJURIES ─────────
	with tabs[0]:
	w_df = (
	df.groupby("Weather")[["MinorInjuries", "ModerateInjuries",
	"SevereInjuries", "FatalInjuries"]]
	.sum().reset_index()
	.rename(columns={"MinorInjuries":"Minor","ModerateInjuries":"Moderate",
	"SevereInjuries":"Severe","FatalInjuries":"Fatal"})
	)
	w_long = w_df.melt("Weather", var_name="InjuryType", value_name="Count")
	sel_type = alt.selection_point(fields=["InjuryType"], bind="legend")
	sel_weather = alt.selection_point(fields=["Weather"], toggle="event")
	w_chart = (
	alt.Chart(w_long)
	.mark_bar()
	.encode(
	x="Weather:N",
	y="Count:Q",
	color="InjuryType:N",
	opacity=alt.condition(sel_type & sel_weather, alt.value(1), alt.value(0.25)),
	tooltip=["Weather","InjuryType","Count"])
	.add_params(sel_type, sel_weather)
	.properties(height=380)
	)
	two_col_layout(
	w_chart,
	"""
	### Weather‑Linked Injury Patterns
	Rain & drizzle elevate moderate injuries, while fatal crashes stay steady—indicating speed, not just weather, drives lethality.

	Use for seasonal safety messaging, wet‑weather friction treatments, and enforcement surge planning.
	"""
	)

	# ───────── TAB 2 — SPEEDING TREND ─────────
	with tabs[1]:
	s_df = df.groupby(["year","SpeedingFlag"]).size().reset_index(name="Count")
	sel_flag = alt.selection_point(fields=["SpeedingFlag"], bind="legend")
	s_chart = (
	alt.Chart(s_df)
	.mark_bar(size=22)
	.encode(
	x="year:O",
	y="Count:Q",
	color="SpeedingFlag:N",
	opacity=alt.condition(sel_flag, alt.value(1), alt.value(0.25)),
	tooltip=["year","SpeedingFlag","Count"])
	.add_params(sel_flag)
	.properties(height=380)
	)
	two_col_layout(
	s_chart,
	"""
	### Speeding‑Related Collisions Over Time
	Red bars show crashes flagged for speeding. A mild downward trend since 2020 suggests progress from speed‑limit reductions and targeted enforcement.

	Actionable insight – focus radar enforcement on outlier years or sudden upticks.
	"""
	)

	# ───────── TAB 3 — CRASH MAP ─────────
	with tabs[2]:
	# st.header("Crash Locations in San José")
	# st.markdown("Zoom in to spot areas with high crash density.")

	map_df = df.dropna(subset=["Latitude", "Longitude"])
	if map_df.empty:
	st.info("No geocoded crashes for this period.")
	else:
	deck = pdk.Deck(
	map_style="mapbox://styles/mapbox/light-v9",
	initial_view_state=pdk.ViewState(
	latitude=map_df["Latitude"].mean(),
	longitude=map_df["Longitude"].mean(),
	zoom=11,
	pitch=50,
	),
	layers=[
	pdk.Layer(
	"ScatterplotLayer",
	data=map_df,
	get_position="[Longitude, Latitude]",
	get_radius=40,
	get_fill_color="[200, 30, 0, 160]",
	pickable=True,
	)
	],
	tooltip={"html": "<b>📍 Crash</b><br/>{CrashTime}<br/>Total injuries: {TotalInjuries}"}
	)
	vis, story = st.columns([3, 2], gap="large")
	with vis:
	st.pydeck_chart(deck, use_container_width=True)
	with story:
	st.markdown("""
	### City‑wide Crash Hotspots
	Pins cluster along freeway ramps and busy arterials.
	Hover to reveal timestamp and injury tally; zoom for intersection‑scale detail.

	Guides infrastructure fixes – protected turns, lane re‑striping, signal timing.
	""")

	# ───────── TAB 4 — ROAD SURFACE ─────────
	with tabs[3]:
	surf_df = df.groupby("RoadwaySurface").size().reset_index(name="Count")
	sel_surf = alt.selection_point(fields=["RoadwaySurface"], bind="legend")
	surf_chart = (
	alt.Chart(surf_df)
	.mark_bar()
	.encode(
	x="Count:Q",
	y=alt.Y("RoadwaySurface:N", sort="-x"),
	color="RoadwaySurface:N",
	opacity=alt.condition(sel_surf, alt.value(1), alt.value(0.25)),
	tooltip=["RoadwaySurface","Count"])
	.add_params(sel_surf)
	.properties(height=380)
	)
	two_col_layout(
	surf_chart,
	"""
	### Surface Condition & Crash Frequency
	While asphalt dominates totals, wet/oily pavement poses disproportionate risk.

	Maintenance cue – schedule surface re‑texturing or drainage upgrades right after early‑season rains.
	"""
	)

	# ───────── TAB 5 — INJURY CORRELATION ─────────
	with tabs[4]:
	# st.header("Correlation Among Injury Stats")
	# st.markdown("Numeric relationships between injury types and year.")

	num_df = df[["MinorInjuries", "ModerateInjuries", "SevereInjuries",
	"FatalInjuries", "TotalInjuries"]]
	corr = num_df.corr().round(2)
	corr_df = corr.reset_index().melt(id_vars="index")

	heatmap = (
	alt.Chart(corr_df)
	.mark_rect()
	.encode(
	x=alt.X("index:N", title=None),
	y=alt.Y("variable:N", title=None),
	color=alt.Color("value:Q", scale=alt.Scale(scheme="redblue")),
	tooltip=["index", "variable", "value"]
	)
	.properties(height=450)
	)

	two_col_layout(
	heatmap,
	"""
	### Injury Metric Inter‑Relationships
	Heat‑map reveals how injury categories co‑vary with each other and with year.

	* Strong red = positive correlation (metrics rise together).
	* Strong blue = negative correlation (one rises as the other falls).
	* Near‑white = little or no relationship.

	Use this matrix to spot redundant metrics or unexpected links worth deeper investigation.
	"""
	)

	### CONCLUSION & FOOTER
	st.divider()
	st.markdown("""
	## Conclusion
	Leveraging open data and lightweight web visualisation, this dashboard pinpoints where, when, and under what conditions San José crashes occur.
	Insights support:

	* Vision Zero engineering – identify corridors for protected turns, lane diets, and friction treatment.
	* Targeted enforcement – deploy speed emphasis patrols where speeding spikes.
	* Public outreach – craft seasonal safety messages (e.g., first‑rain reminders).

	Continued monitoring keeps stakeholders aligned on progress toward the city’s goal of zero traffic deaths.
	""")
	st.caption("© City of San José Open‑Data • Dashboard for IS 445 / Group2 - Aniruddha Jenifer Radha Sanidhya Smruti")