IS445_Group2_FP / app.py
Aniruddha8699's picture
Update app.py
343370c verified
# app.py — San José Crash Safety Explorer
import os
import streamlit as st
import pandas as pd
import requests
import altair as alt
import pydeck as pdk
from datetime import datetime, timedelta
### If CrashMap is not working on the local system- Uncomment the belwo Mapbox demo token
# if not os.getenv("MAPBOX_API_KEY"):
# pdk.settings.mapbox_api_key = (
# "pk.eyJ1IjoibWFwYm94IiwiYSI6ImNpejY4Nm44NTA4emYycXBndHRqbnB2N3gifQ."
# "w5iJj8yOTFoNUG0GI_LhwA"
# )
### CONSTANTS & DATA HELPERS
RESOURCE_ID = "15408d78-9734-4ea1-b3e5-a0f99568dd9b"
BASE_URL = "https://data.sanjoseca.gov/api/3/action/datastore_search"
PAGE_LIMIT = 1_000
@st.cache_data(ttl=3_600, show_spinner=False)
def fetch_data() -> pd.DataFrame:
records, offset = [], 0
while True:
params = {"resource_id": RESOURCE_ID, "limit": PAGE_LIMIT, "offset": offset}
try:
batch = requests.get(BASE_URL, params=params, timeout=20).json()["result"]["records"]
except Exception:
st.error("Cannot reach the San José Open‑Data API.")
return pd.DataFrame(columns=["CrashDateTime"])
if not batch:
break
records.extend(batch)
offset += PAGE_LIMIT
return pd.DataFrame.from_records(records)
def preprocess(df: pd.DataFrame) -> pd.DataFrame:
if df.empty:
return df
df["CrashDateTime"] = pd.to_datetime(df["CrashDateTime"], errors="coerce")
df["Latitude"] = pd.to_numeric(df["Latitude"], errors="coerce")
df["Longitude"] = pd.to_numeric(df["Longitude"], errors="coerce")
for col in ["MinorInjuries", "ModerateInjuries", "SevereInjuries", "FatalInjuries"]:
df[col] = pd.to_numeric(df[col], errors="coerce").fillna(0)
df["SpeedingFlag"] = df["SpeedingFlag"].fillna("Unknown")
df["Weather"] = df["Weather"].fillna("Unknown")
df["RoadwaySurface"] = df["RoadwaySurface"].fillna("Unknown")
df["CrashTime"] = df["CrashDateTime"].dt.strftime("%Y‑%m‑%d %H:%M")
return df.dropna(subset=["CrashDateTime"]).copy()
def time_filter(df: pd.DataFrame, months: str) -> pd.DataFrame:
if months == "All" or df.empty:
return df
end = datetime.now()
start = end - timedelta(days=int(months) * 30)
return df[(df["CrashDateTime"] >= start) & (df["CrashDateTime"] <= end)]
### PAGE HEADER, MOTIVATION, DATASET INFO, REFRESH
st.set_page_config("San José Crash Dashboard", layout="wide", page_icon="🚦")
st.title("🚦 San José Crash Safety Explorer")
st.markdown("""
### Motivation
San José’s ambitious **Vision Zero** program aims to eliminate traffic fatalities and serious injuries.
Real‑time insights into *when, where, and why* crashes occur help planners, engineers, and residents target the most effective counter‑measures.
""")
left, right = st.columns([5, 1])
with left:
st.markdown("""
**Dataset**: [City of San José Crashes Data](https://data.sanjoseca.gov/)
Updated nightly · 60 + attributes per crash (location, weather, speeding flag, injury counts, etc.)
""")
with right:
if st.button("🔁 Refresh"):
st.cache_data.clear()
### FILTERS
months = st.selectbox(
"Select time range to explore",
["6", "9", "12", "All"],
index=3,
format_func=lambda m: f"{m} months" if m != "All" else "All time"
)
st.divider()
### DATA LOAD
df = time_filter(preprocess(fetch_data()), months)
if df.empty:
st.warning("No data available for the selected period.")
st.stop()
df["year"] = df["CrashDateTime"].dt.year
df["TotalInjuries"] = df[["MinorInjuries", "ModerateInjuries",
"SevereInjuries", "FatalInjuries"]].sum(axis=1)
def two_col_layout(chart, narrative_md):
vis, story = st.columns([3, 2], gap="large")
with vis:
st.altair_chart(chart, use_container_width=True)
with story:
st.markdown(narrative_md, unsafe_allow_html=True)
### NAVIGATION TABS
tabs = st.tabs([
"Weather & Injuries",
"Speeding Trend",
"Crash Map",
"Road Surface",
"Injury Correlation"
])
# ───────── TAB 1 — WEATHER × INJURIES ─────────
with tabs[0]:
w_df = (
df.groupby("Weather")[["MinorInjuries", "ModerateInjuries",
"SevereInjuries", "FatalInjuries"]]
.sum().reset_index()
.rename(columns={"MinorInjuries":"Minor","ModerateInjuries":"Moderate",
"SevereInjuries":"Severe","FatalInjuries":"Fatal"})
)
w_long = w_df.melt("Weather", var_name="InjuryType", value_name="Count")
sel_type = alt.selection_point(fields=["InjuryType"], bind="legend")
sel_weather = alt.selection_point(fields=["Weather"], toggle="event")
w_chart = (
alt.Chart(w_long)
.mark_bar()
.encode(
x="Weather:N",
y="Count:Q",
color="InjuryType:N",
opacity=alt.condition(sel_type & sel_weather, alt.value(1), alt.value(0.25)),
tooltip=["Weather","InjuryType","Count"])
.add_params(sel_type, sel_weather)
.properties(height=380)
)
two_col_layout(
w_chart,
"""
### Weather‑Linked Injury Patterns
*Rain & drizzle* elevate **moderate injuries**, while fatal crashes stay steady—indicating speed, not just weather, drives lethality.
**Use for** seasonal safety messaging, wet‑weather friction treatments, and enforcement surge planning.
"""
)
# ───────── TAB 2 — SPEEDING TREND ─────────
with tabs[1]:
s_df = df.groupby(["year","SpeedingFlag"]).size().reset_index(name="Count")
sel_flag = alt.selection_point(fields=["SpeedingFlag"], bind="legend")
s_chart = (
alt.Chart(s_df)
.mark_bar(size=22)
.encode(
x="year:O",
y="Count:Q",
color="SpeedingFlag:N",
opacity=alt.condition(sel_flag, alt.value(1), alt.value(0.25)),
tooltip=["year","SpeedingFlag","Count"])
.add_params(sel_flag)
.properties(height=380)
)
two_col_layout(
s_chart,
"""
### Speeding‑Related Collisions Over Time
Red bars show crashes **flagged for speeding**. A mild downward trend since 2020 suggests progress from speed‑limit reductions and targeted enforcement.
**Actionable insight** – focus radar enforcement on outlier years or sudden upticks.
"""
)
# ───────── TAB 3 — CRASH MAP ─────────
with tabs[2]:
# st.header("Crash Locations in San José")
# st.markdown("Zoom in to spot areas with high crash density.")
map_df = df.dropna(subset=["Latitude", "Longitude"])
if map_df.empty:
st.info("No geocoded crashes for this period.")
else:
deck = pdk.Deck(
map_style="mapbox://styles/mapbox/light-v9",
initial_view_state=pdk.ViewState(
latitude=map_df["Latitude"].mean(),
longitude=map_df["Longitude"].mean(),
zoom=11,
pitch=50,
),
layers=[
pdk.Layer(
"ScatterplotLayer",
data=map_df,
get_position="[Longitude, Latitude]",
get_radius=40,
get_fill_color="[200, 30, 0, 160]",
pickable=True,
)
],
tooltip={"html": "<b>📍 Crash</b><br/>{CrashTime}<br/>Total injuries: {TotalInjuries}"}
)
vis, story = st.columns([3, 2], gap="large")
with vis:
st.pydeck_chart(deck, use_container_width=True)
with story:
st.markdown("""
### City‑wide Crash Hotspots
Pins cluster along freeway ramps and busy arterials.
Hover to reveal timestamp and injury tally; zoom for intersection‑scale detail.
*Guides infrastructure fixes – protected turns, lane re‑striping, signal timing.*
""")
# ───────── TAB 4 — ROAD SURFACE ─────────
with tabs[3]:
surf_df = df.groupby("RoadwaySurface").size().reset_index(name="Count")
sel_surf = alt.selection_point(fields=["RoadwaySurface"], bind="legend")
surf_chart = (
alt.Chart(surf_df)
.mark_bar()
.encode(
x="Count:Q",
y=alt.Y("RoadwaySurface:N", sort="-x"),
color="RoadwaySurface:N",
opacity=alt.condition(sel_surf, alt.value(1), alt.value(0.25)),
tooltip=["RoadwaySurface","Count"])
.add_params(sel_surf)
.properties(height=380)
)
two_col_layout(
surf_chart,
"""
### Surface Condition & Crash Frequency
While asphalt dominates totals, **wet/oily** pavement poses disproportionate risk.
**Maintenance cue** – schedule surface re‑texturing or drainage upgrades right after early‑season rains.
"""
)
# ───────── TAB 5 — INJURY CORRELATION ─────────
with tabs[4]:
# st.header("Correlation Among Injury Stats")
# st.markdown("Numeric relationships between injury types and year.")
num_df = df[["MinorInjuries", "ModerateInjuries", "SevereInjuries",
"FatalInjuries", "TotalInjuries"]]
corr = num_df.corr().round(2)
corr_df = corr.reset_index().melt(id_vars="index")
heatmap = (
alt.Chart(corr_df)
.mark_rect()
.encode(
x=alt.X("index:N", title=None),
y=alt.Y("variable:N", title=None),
color=alt.Color("value:Q", scale=alt.Scale(scheme="redblue")),
tooltip=["index", "variable", "value"]
)
.properties(height=450)
)
two_col_layout(
heatmap,
"""
### Injury Metric Inter‑Relationships
Heat‑map reveals how injury categories co‑vary with each other and with **year**.
* Strong red = positive correlation (metrics rise together).
* Strong blue = negative correlation (one rises as the other falls).
* Near‑white = little or no relationship.
Use this matrix to spot redundant metrics or unexpected links worth deeper investigation.
"""
)
### CONCLUSION & FOOTER
st.divider()
st.markdown("""
## Conclusion
Leveraging open data and lightweight web visualisation, this dashboard pinpoints **where**, **when**, and **under what conditions** San José crashes occur.
Insights support:
* **Vision Zero engineering** – identify corridors for protected turns, lane diets, and friction treatment.
* **Targeted enforcement** – deploy speed emphasis patrols where speeding spikes.
* **Public outreach** – craft seasonal safety messages (e.g., first‑rain reminders).
Continued monitoring keeps stakeholders aligned on progress toward the city’s goal of *zero traffic deaths*.
""")
st.caption("© City of San José Open‑Data • Dashboard for IS 445 / Group2 - Aniruddha Jenifer Radha Sanidhya Smruti")