Spaces:

Group2DataVizSp25
/

SJCrashes

Sleeping

File size: 7,019 Bytes


import streamlit as st
import pandas as pd
import requests
import altair as alt
import pydeck as pdk
from datetime import datetime, timedelta

# -----------------------------
# CONFIG & TITLE
# -----------------------------
st.set_page_config(page_title="San José Crash Dashboard", layout="wide")
st.title("San José Real-Time Crash Dashboard")
st.markdown("Explore live crash data from the San José Open Data Portal using interactive charts and maps.")

# -----------------------------
# Constants
# -----------------------------
RESOURCE_ID = "15408d78-9734-4ea1-b3e5-a0f99568dd9b"
BASE_URL = "https://data.sanjoseca.gov/api/3/action/datastore_search"

# -----------------------------
# Helper Functions
# -----------------------------
@st.cache_data(ttl=3600)
def fetch_data():
    records = []
    limit = 1000
    offset = 0
    while True:
        params = {"resource_id": RESOURCE_ID, "limit": limit, "offset": offset}
        response = requests.get(BASE_URL, params=params).json()
        batch = response["result"]["records"]
        if not batch:
            break
        records.extend(batch)
        offset += limit
    return pd.DataFrame(records)

def preprocess(df):
    df["CrashDateTime"] = pd.to_datetime(df["CrashDateTime"], errors="coerce")
    df["Latitude"] = pd.to_numeric(df["Latitude"], errors="coerce")
    df["Longitude"] = pd.to_numeric(df["Longitude"], errors="coerce")
    df["MinorInjuries"] = pd.to_numeric(df["MinorInjuries"], errors="coerce").fillna(0)
    df["ModerateInjuries"] = pd.to_numeric(df["ModerateInjuries"], errors="coerce").fillna(0)
    df["SevereInjuries"] = pd.to_numeric(df["SevereInjuries"], errors="coerce").fillna(0)
    df["FatalInjuries"] = pd.to_numeric(df["FatalInjuries"], errors="coerce").fillna(0)
    df["SpeedingFlag"] = df["SpeedingFlag"].fillna("Unknown")
    df["Weather"] = df["Weather"].fillna("Unknown")
    df["RoadwaySurface"] = df["RoadwaySurface"].fillna("Unknown")
    df["TotalInjuries"] = df["MinorInjuries"] + df["ModerateInjuries"] + df["SevereInjuries"] + df["FatalInjuries"]
    df["year"] = df["CrashDateTime"].dt.year
    return df.dropna(subset=["CrashDateTime"])

def filter_by_date(df, months):
    if months == "All":
        return df
    end_date = datetime.now()
    start_date = end_date - timedelta(days=int(months) * 30)
    return df[(df["CrashDateTime"] >= start_date) & (df["CrashDateTime"] <= end_date)]

# -----------------------------
# Navigation sidebar
# -----------------------------
st.sidebar.title("Navigation")
page = st.sidebar.radio("Go to", [
    "Weather & Injuries",
    "Speeding Trends",
    "Crash Map",
    "Road Surface Analysis",
    "Injury Correlation"
])

months = st.sidebar.selectbox("Select Time Range", ["3", "6", "9", "12", "All"], index=3, format_func=lambda x: f"{x} Months" if x != "All" else "All Time")
if st.sidebar.button("🔁 Refresh Data"):
    st.cache_data.clear()

df = preprocess(fetch_data())
df = filter_by_date(df, months)

# Render pages dynamically
if page == "Weather & Injuries":
    st.header("Weather Conditions vs Injury Severity")
    st.markdown("See how different weather patterns affect the severity of crash injuries in San José.")

    weather_df = df.groupby("Weather").agg({
        "MinorInjuries": "sum",
        "ModerateInjuries": "sum",
        "SevereInjuries": "sum",
        "FatalInjuries": "sum"
    }).reset_index()

    weather_df.columns = ["Weather", "Minor", "Moderate", "Severe", "Fatal"]
    for col in ["Minor", "Moderate", "Severe", "Fatal"]:
        weather_df[col] = pd.to_numeric(weather_df[col], errors="coerce").fillna(0)

    chart = alt.Chart(weather_df).transform_fold(
        ["Minor", "Moderate", "Severe", "Fatal"],
        as_=["Injury_Type", "Count"]
    ).mark_bar().encode(
        x=alt.X("Weather:N", title="Weather"),
        y=alt.Y("Count:Q"),
        color=alt.Color("Injury_Type:N", type="nominal"),
        tooltip=["Weather", alt.Tooltip("Injury_Type:N"), alt.Tooltip("Count:Q")]
    ).properties(height=450)
    st.altair_chart(chart, use_container_width=True)


# -----------------------------
# Speeding related crashes
# -----------------------------
elif page == "Speeding Trends":
    st.header("Speeding-Related Crash Trends")
    st.markdown("How often is speeding involved in crashes over the years?")

    trend_df = df.groupby(["year", "SpeedingFlag"]).size().reset_index(name="Count")
    chart = alt.Chart(trend_df).mark_bar().encode(
        x=alt.X("year:O", title="Year"),
        y=alt.Y("Count:Q"),
        color=alt.Color("SpeedingFlag:N"),
        tooltip=["year", "SpeedingFlag", "Count"]
    ).properties(height=450)
    st.altair_chart(chart, use_container_width=True)


# -----------------------------
# The cool stuff for marks
# -----------------------------

elif page == "Crash Map":
    st.header("Crash Locations in San José")
    st.markdown("Zoom in to spot areas with high crash density.")

    map_df = df.dropna(subset=["Latitude", "Longitude"])
    st.pydeck_chart(pdk.Deck(
        map_style='mapbox://styles/mapbox/light-v9',
        initial_view_state=pdk.ViewState(
            latitude=map_df["Latitude"].mean(),
            longitude=map_df["Longitude"].mean(),
            zoom=11,
            pitch=50,
        ),
        layers=[
            pdk.Layer(
                "ScatterplotLayer",
                data=map_df,
                get_position="[Longitude, Latitude]",
                get_color="[200, 30, 0, 160]",
                get_radius=40,
            ),
        ],
    ))

# -----------------------------
# Road surface chart (we need to do more work on this one, the current visual can be improved)
# -----------------------------

elif page == "Road Surface Analysis":
    st.header("Crash Count by Road Surface Condition")
    st.markdown("Are certain road surface types more accident-prone?")

    surface_df = df.groupby("RoadwaySurface").size().reset_index(name="Count")
    chart = alt.Chart(surface_df).mark_bar().encode(
        x=alt.X("Count:Q", title="Crash Count"),
        y=alt.Y("RoadwaySurface:N", sort='-x', title="Road Surface"),
        tooltip=["RoadwaySurface", "Count"]
    ).properties(height=450)
    st.altair_chart(chart, use_container_width=True)


# -----------------------------
# Correlation
# -----------------------------

elif page == "Injury Correlation":
    st.header("Correlation Among Injury Stats")
    st.markdown("Numeric relationships between injury types and year.")

    num_df = df[["MinorInjuries", "ModerateInjuries", "SevereInjuries", "FatalInjuries", "TotalInjuries", "year"]]
    corr = num_df.corr().round(2)
    corr_df = corr.reset_index().melt(id_vars="index")

    heatmap = alt.Chart(corr_df).mark_rect().encode(
        x=alt.X("index:N", title=None),
        y=alt.Y("variable:N", title=None),
        color=alt.Color("value:Q", scale=alt.Scale(scheme="redblue")),
        tooltip=["index", "variable", "value"]
    ).properties(height=450)
    st.altair_chart(heatmap, use_container_width=True)