import streamlit as st import pandas as pd import requests import altair as alt import pydeck as pdk from datetime import datetime, timedelta # ----------------------------- # CONFIG & TITLE # ----------------------------- st.set_page_config(page_title="San José Crash Dashboard", layout="wide") st.title("San José Real-Time Crash Dashboard") st.markdown("Explore live crash data from the San José Open Data Portal using interactive charts and maps.") # ----------------------------- # Constants # ----------------------------- RESOURCE_ID = "15408d78-9734-4ea1-b3e5-a0f99568dd9b" BASE_URL = "https://data.sanjoseca.gov/api/3/action/datastore_search" # ----------------------------- # Helper Functions # ----------------------------- @st.cache_data(ttl=3600) def fetch_data(): records = [] limit = 1000 offset = 0 while True: params = {"resource_id": RESOURCE_ID, "limit": limit, "offset": offset} response = requests.get(BASE_URL, params=params).json() batch = response["result"]["records"] if not batch: break records.extend(batch) offset += limit return pd.DataFrame(records) def preprocess(df): df["CrashDateTime"] = pd.to_datetime(df["CrashDateTime"], errors="coerce") df["Latitude"] = pd.to_numeric(df["Latitude"], errors="coerce") df["Longitude"] = pd.to_numeric(df["Longitude"], errors="coerce") df["MinorInjuries"] = pd.to_numeric(df["MinorInjuries"], errors="coerce").fillna(0) df["ModerateInjuries"] = pd.to_numeric(df["ModerateInjuries"], errors="coerce").fillna(0) df["SevereInjuries"] = pd.to_numeric(df["SevereInjuries"], errors="coerce").fillna(0) df["FatalInjuries"] = pd.to_numeric(df["FatalInjuries"], errors="coerce").fillna(0) df["SpeedingFlag"] = df["SpeedingFlag"].fillna("Unknown") df["Weather"] = df["Weather"].fillna("Unknown") df["RoadwaySurface"] = df["RoadwaySurface"].fillna("Unknown") df["TotalInjuries"] = df["MinorInjuries"] + df["ModerateInjuries"] + df["SevereInjuries"] + df["FatalInjuries"] df["year"] = df["CrashDateTime"].dt.year return df.dropna(subset=["CrashDateTime"]) def filter_by_date(df, months): if months == "All": return df end_date = datetime.now() start_date = end_date - timedelta(days=int(months) * 30) return df[(df["CrashDateTime"] >= start_date) & (df["CrashDateTime"] <= end_date)] # ----------------------------- # Navigation sidebar # ----------------------------- st.sidebar.title("Navigation") page = st.sidebar.radio("Go to", [ "Weather & Injuries", "Speeding Trends", "Crash Map", "Road Surface Analysis", "Injury Correlation" ]) months = st.sidebar.selectbox("Select Time Range", ["3", "6", "9", "12", "All"], index=3, format_func=lambda x: f"{x} Months" if x != "All" else "All Time") if st.sidebar.button("🔁 Refresh Data"): st.cache_data.clear() df = preprocess(fetch_data()) df = filter_by_date(df, months) # Render pages dynamically if page == "Weather & Injuries": st.header("Weather Conditions vs Injury Severity") st.markdown("See how different weather patterns affect the severity of crash injuries in San José.") weather_df = df.groupby("Weather").agg({ "MinorInjuries": "sum", "ModerateInjuries": "sum", "SevereInjuries": "sum", "FatalInjuries": "sum" }).reset_index() weather_df.columns = ["Weather", "Minor", "Moderate", "Severe", "Fatal"] for col in ["Minor", "Moderate", "Severe", "Fatal"]: weather_df[col] = pd.to_numeric(weather_df[col], errors="coerce").fillna(0) chart = alt.Chart(weather_df).transform_fold( ["Minor", "Moderate", "Severe", "Fatal"], as_=["Injury_Type", "Count"] ).mark_bar().encode( x=alt.X("Weather:N", title="Weather"), y=alt.Y("Count:Q"), color=alt.Color("Injury_Type:N", type="nominal"), tooltip=["Weather", alt.Tooltip("Injury_Type:N"), alt.Tooltip("Count:Q")] ).properties(height=450) st.altair_chart(chart, use_container_width=True) # ----------------------------- # Speeding related crashes # ----------------------------- elif page == "Speeding Trends": st.header("Speeding-Related Crash Trends") st.markdown("How often is speeding involved in crashes over the years?") trend_df = df.groupby(["year", "SpeedingFlag"]).size().reset_index(name="Count") chart = alt.Chart(trend_df).mark_bar().encode( x=alt.X("year:O", title="Year"), y=alt.Y("Count:Q"), color=alt.Color("SpeedingFlag:N"), tooltip=["year", "SpeedingFlag", "Count"] ).properties(height=450) st.altair_chart(chart, use_container_width=True) # ----------------------------- # The cool stuff for marks # ----------------------------- elif page == "Crash Map": st.header("Crash Locations in San José") st.markdown("Zoom in to spot areas with high crash density.") map_df = df.dropna(subset=["Latitude", "Longitude"]) st.pydeck_chart(pdk.Deck( map_style='mapbox://styles/mapbox/light-v9', initial_view_state=pdk.ViewState( latitude=map_df["Latitude"].mean(), longitude=map_df["Longitude"].mean(), zoom=11, pitch=50, ), layers=[ pdk.Layer( "ScatterplotLayer", data=map_df, get_position="[Longitude, Latitude]", get_color="[200, 30, 0, 160]", get_radius=40, ), ], )) # ----------------------------- # Road surface chart (we need to do more work on this one, the current visual can be improved) # ----------------------------- elif page == "Road Surface Analysis": st.header("Crash Count by Road Surface Condition") st.markdown("Are certain road surface types more accident-prone?") surface_df = df.groupby("RoadwaySurface").size().reset_index(name="Count") chart = alt.Chart(surface_df).mark_bar().encode( x=alt.X("Count:Q", title="Crash Count"), y=alt.Y("RoadwaySurface:N", sort='-x', title="Road Surface"), tooltip=["RoadwaySurface", "Count"] ).properties(height=450) st.altair_chart(chart, use_container_width=True) # ----------------------------- # Correlation # ----------------------------- elif page == "Injury Correlation": st.header("Correlation Among Injury Stats") st.markdown("Numeric relationships between injury types and year.") num_df = df[["MinorInjuries", "ModerateInjuries", "SevereInjuries", "FatalInjuries", "TotalInjuries", "year"]] corr = num_df.corr().round(2) corr_df = corr.reset_index().melt(id_vars="index") heatmap = alt.Chart(corr_df).mark_rect().encode( x=alt.X("index:N", title=None), y=alt.Y("variable:N", title=None), color=alt.Color("value:Q", scale=alt.Scale(scheme="redblue")), tooltip=["index", "variable", "value"] ).properties(height=450) st.altair_chart(heatmap, use_container_width=True)