SJCrashes / app.py
SANIDHYAG's picture
Update app.py
9d4a575 verified
import streamlit as st
import pandas as pd
import requests
import altair as alt
import pydeck as pdk
from datetime import datetime, timedelta
# -----------------------------
# CONFIG & TITLE
# -----------------------------
st.set_page_config(page_title="San José Crash Dashboard", layout="wide")
st.title("San José Real-Time Crash Dashboard")
st.markdown("Explore live crash data from the San José Open Data Portal using interactive charts and maps.")
# -----------------------------
# Constants
# -----------------------------
RESOURCE_ID = "15408d78-9734-4ea1-b3e5-a0f99568dd9b"
BASE_URL = "https://data.sanjoseca.gov/api/3/action/datastore_search"
# -----------------------------
# Helper Functions
# -----------------------------
@st.cache_data(ttl=3600)
def fetch_data():
records = []
limit = 1000
offset = 0
while True:
params = {"resource_id": RESOURCE_ID, "limit": limit, "offset": offset}
response = requests.get(BASE_URL, params=params).json()
batch = response["result"]["records"]
if not batch:
break
records.extend(batch)
offset += limit
return pd.DataFrame(records)
def preprocess(df):
df["CrashDateTime"] = pd.to_datetime(df["CrashDateTime"], errors="coerce")
df["Latitude"] = pd.to_numeric(df["Latitude"], errors="coerce")
df["Longitude"] = pd.to_numeric(df["Longitude"], errors="coerce")
df["MinorInjuries"] = pd.to_numeric(df["MinorInjuries"], errors="coerce").fillna(0)
df["ModerateInjuries"] = pd.to_numeric(df["ModerateInjuries"], errors="coerce").fillna(0)
df["SevereInjuries"] = pd.to_numeric(df["SevereInjuries"], errors="coerce").fillna(0)
df["FatalInjuries"] = pd.to_numeric(df["FatalInjuries"], errors="coerce").fillna(0)
df["SpeedingFlag"] = df["SpeedingFlag"].fillna("Unknown")
df["Weather"] = df["Weather"].fillna("Unknown")
df["RoadwaySurface"] = df["RoadwaySurface"].fillna("Unknown")
df["TotalInjuries"] = df["MinorInjuries"] + df["ModerateInjuries"] + df["SevereInjuries"] + df["FatalInjuries"]
df["year"] = df["CrashDateTime"].dt.year
return df.dropna(subset=["CrashDateTime"])
def filter_by_date(df, months):
if months == "All":
return df
end_date = datetime.now()
start_date = end_date - timedelta(days=int(months) * 30)
return df[(df["CrashDateTime"] >= start_date) & (df["CrashDateTime"] <= end_date)]
# -----------------------------
# Navigation sidebar
# -----------------------------
st.sidebar.title("Navigation")
page = st.sidebar.radio("Go to", [
"Weather & Injuries",
"Speeding Trends",
"Crash Map",
"Road Surface Analysis",
"Injury Correlation"
])
months = st.sidebar.selectbox("Select Time Range", ["3", "6", "9", "12", "All"], index=3, format_func=lambda x: f"{x} Months" if x != "All" else "All Time")
if st.sidebar.button("🔁 Refresh Data"):
st.cache_data.clear()
df = preprocess(fetch_data())
df = filter_by_date(df, months)
# Render pages dynamically
if page == "Weather & Injuries":
st.header("Weather Conditions vs Injury Severity")
st.markdown("See how different weather patterns affect the severity of crash injuries in San José.")
weather_df = df.groupby("Weather").agg({
"MinorInjuries": "sum",
"ModerateInjuries": "sum",
"SevereInjuries": "sum",
"FatalInjuries": "sum"
}).reset_index()
weather_df.columns = ["Weather", "Minor", "Moderate", "Severe", "Fatal"]
for col in ["Minor", "Moderate", "Severe", "Fatal"]:
weather_df[col] = pd.to_numeric(weather_df[col], errors="coerce").fillna(0)
chart = alt.Chart(weather_df).transform_fold(
["Minor", "Moderate", "Severe", "Fatal"],
as_=["Injury_Type", "Count"]
).mark_bar().encode(
x=alt.X("Weather:N", title="Weather"),
y=alt.Y("Count:Q"),
color=alt.Color("Injury_Type:N", type="nominal"),
tooltip=["Weather", alt.Tooltip("Injury_Type:N"), alt.Tooltip("Count:Q")]
).properties(height=450)
st.altair_chart(chart, use_container_width=True)
# -----------------------------
# Speeding related crashes
# -----------------------------
elif page == "Speeding Trends":
st.header("Speeding-Related Crash Trends")
st.markdown("How often is speeding involved in crashes over the years?")
trend_df = df.groupby(["year", "SpeedingFlag"]).size().reset_index(name="Count")
chart = alt.Chart(trend_df).mark_bar().encode(
x=alt.X("year:O", title="Year"),
y=alt.Y("Count:Q"),
color=alt.Color("SpeedingFlag:N"),
tooltip=["year", "SpeedingFlag", "Count"]
).properties(height=450)
st.altair_chart(chart, use_container_width=True)
# -----------------------------
# The cool stuff for marks
# -----------------------------
elif page == "Crash Map":
st.header("Crash Locations in San José")
st.markdown("Zoom in to spot areas with high crash density.")
map_df = df.dropna(subset=["Latitude", "Longitude"])
st.pydeck_chart(pdk.Deck(
map_style='mapbox://styles/mapbox/light-v9',
initial_view_state=pdk.ViewState(
latitude=map_df["Latitude"].mean(),
longitude=map_df["Longitude"].mean(),
zoom=11,
pitch=50,
),
layers=[
pdk.Layer(
"ScatterplotLayer",
data=map_df,
get_position="[Longitude, Latitude]",
get_color="[200, 30, 0, 160]",
get_radius=40,
),
],
))
# -----------------------------
# Road surface chart (we need to do more work on this one, the current visual can be improved)
# -----------------------------
elif page == "Road Surface Analysis":
st.header("Crash Count by Road Surface Condition")
st.markdown("Are certain road surface types more accident-prone?")
surface_df = df.groupby("RoadwaySurface").size().reset_index(name="Count")
chart = alt.Chart(surface_df).mark_bar().encode(
x=alt.X("Count:Q", title="Crash Count"),
y=alt.Y("RoadwaySurface:N", sort='-x', title="Road Surface"),
tooltip=["RoadwaySurface", "Count"]
).properties(height=450)
st.altair_chart(chart, use_container_width=True)
# -----------------------------
# Correlation
# -----------------------------
elif page == "Injury Correlation":
st.header("Correlation Among Injury Stats")
st.markdown("Numeric relationships between injury types and year.")
num_df = df[["MinorInjuries", "ModerateInjuries", "SevereInjuries", "FatalInjuries", "TotalInjuries", "year"]]
corr = num_df.corr().round(2)
corr_df = corr.reset_index().melt(id_vars="index")
heatmap = alt.Chart(corr_df).mark_rect().encode(
x=alt.X("index:N", title=None),
y=alt.Y("variable:N", title=None),
color=alt.Color("value:Q", scale=alt.Scale(scheme="redblue")),
tooltip=["index", "variable", "value"]
).properties(height=450)
st.altair_chart(heatmap, use_container_width=True)