Spaces:
Sleeping
Sleeping
File size: 7,019 Bytes
385dea0 72640ea 45d714f 72640ea 385dea0 72640ea 385dea0 72640ea 385dea0 72640ea 385dea0 72640ea 385dea0 72640ea 45d714f 72640ea 45d714f 72640ea 385dea0 72640ea 45d714f 72640ea 3abce8b 72640ea 3abce8b 45d714f 72640ea 3abce8b 45d714f 72640ea 3abce8b 45d714f 72640ea 9d4a575 72640ea | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 |
import streamlit as st
import pandas as pd
import requests
import altair as alt
import pydeck as pdk
from datetime import datetime, timedelta
# -----------------------------
# CONFIG & TITLE
# -----------------------------
st.set_page_config(page_title="San José Crash Dashboard", layout="wide")
st.title("San José Real-Time Crash Dashboard")
st.markdown("Explore live crash data from the San José Open Data Portal using interactive charts and maps.")
# -----------------------------
# Constants
# -----------------------------
RESOURCE_ID = "15408d78-9734-4ea1-b3e5-a0f99568dd9b"
BASE_URL = "https://data.sanjoseca.gov/api/3/action/datastore_search"
# -----------------------------
# Helper Functions
# -----------------------------
@st.cache_data(ttl=3600)
def fetch_data():
records = []
limit = 1000
offset = 0
while True:
params = {"resource_id": RESOURCE_ID, "limit": limit, "offset": offset}
response = requests.get(BASE_URL, params=params).json()
batch = response["result"]["records"]
if not batch:
break
records.extend(batch)
offset += limit
return pd.DataFrame(records)
def preprocess(df):
df["CrashDateTime"] = pd.to_datetime(df["CrashDateTime"], errors="coerce")
df["Latitude"] = pd.to_numeric(df["Latitude"], errors="coerce")
df["Longitude"] = pd.to_numeric(df["Longitude"], errors="coerce")
df["MinorInjuries"] = pd.to_numeric(df["MinorInjuries"], errors="coerce").fillna(0)
df["ModerateInjuries"] = pd.to_numeric(df["ModerateInjuries"], errors="coerce").fillna(0)
df["SevereInjuries"] = pd.to_numeric(df["SevereInjuries"], errors="coerce").fillna(0)
df["FatalInjuries"] = pd.to_numeric(df["FatalInjuries"], errors="coerce").fillna(0)
df["SpeedingFlag"] = df["SpeedingFlag"].fillna("Unknown")
df["Weather"] = df["Weather"].fillna("Unknown")
df["RoadwaySurface"] = df["RoadwaySurface"].fillna("Unknown")
df["TotalInjuries"] = df["MinorInjuries"] + df["ModerateInjuries"] + df["SevereInjuries"] + df["FatalInjuries"]
df["year"] = df["CrashDateTime"].dt.year
return df.dropna(subset=["CrashDateTime"])
def filter_by_date(df, months):
if months == "All":
return df
end_date = datetime.now()
start_date = end_date - timedelta(days=int(months) * 30)
return df[(df["CrashDateTime"] >= start_date) & (df["CrashDateTime"] <= end_date)]
# -----------------------------
# Navigation sidebar
# -----------------------------
st.sidebar.title("Navigation")
page = st.sidebar.radio("Go to", [
"Weather & Injuries",
"Speeding Trends",
"Crash Map",
"Road Surface Analysis",
"Injury Correlation"
])
months = st.sidebar.selectbox("Select Time Range", ["3", "6", "9", "12", "All"], index=3, format_func=lambda x: f"{x} Months" if x != "All" else "All Time")
if st.sidebar.button("🔁 Refresh Data"):
st.cache_data.clear()
df = preprocess(fetch_data())
df = filter_by_date(df, months)
# Render pages dynamically
if page == "Weather & Injuries":
st.header("Weather Conditions vs Injury Severity")
st.markdown("See how different weather patterns affect the severity of crash injuries in San José.")
weather_df = df.groupby("Weather").agg({
"MinorInjuries": "sum",
"ModerateInjuries": "sum",
"SevereInjuries": "sum",
"FatalInjuries": "sum"
}).reset_index()
weather_df.columns = ["Weather", "Minor", "Moderate", "Severe", "Fatal"]
for col in ["Minor", "Moderate", "Severe", "Fatal"]:
weather_df[col] = pd.to_numeric(weather_df[col], errors="coerce").fillna(0)
chart = alt.Chart(weather_df).transform_fold(
["Minor", "Moderate", "Severe", "Fatal"],
as_=["Injury_Type", "Count"]
).mark_bar().encode(
x=alt.X("Weather:N", title="Weather"),
y=alt.Y("Count:Q"),
color=alt.Color("Injury_Type:N", type="nominal"),
tooltip=["Weather", alt.Tooltip("Injury_Type:N"), alt.Tooltip("Count:Q")]
).properties(height=450)
st.altair_chart(chart, use_container_width=True)
# -----------------------------
# Speeding related crashes
# -----------------------------
elif page == "Speeding Trends":
st.header("Speeding-Related Crash Trends")
st.markdown("How often is speeding involved in crashes over the years?")
trend_df = df.groupby(["year", "SpeedingFlag"]).size().reset_index(name="Count")
chart = alt.Chart(trend_df).mark_bar().encode(
x=alt.X("year:O", title="Year"),
y=alt.Y("Count:Q"),
color=alt.Color("SpeedingFlag:N"),
tooltip=["year", "SpeedingFlag", "Count"]
).properties(height=450)
st.altair_chart(chart, use_container_width=True)
# -----------------------------
# The cool stuff for marks
# -----------------------------
elif page == "Crash Map":
st.header("Crash Locations in San José")
st.markdown("Zoom in to spot areas with high crash density.")
map_df = df.dropna(subset=["Latitude", "Longitude"])
st.pydeck_chart(pdk.Deck(
map_style='mapbox://styles/mapbox/light-v9',
initial_view_state=pdk.ViewState(
latitude=map_df["Latitude"].mean(),
longitude=map_df["Longitude"].mean(),
zoom=11,
pitch=50,
),
layers=[
pdk.Layer(
"ScatterplotLayer",
data=map_df,
get_position="[Longitude, Latitude]",
get_color="[200, 30, 0, 160]",
get_radius=40,
),
],
))
# -----------------------------
# Road surface chart (we need to do more work on this one, the current visual can be improved)
# -----------------------------
elif page == "Road Surface Analysis":
st.header("Crash Count by Road Surface Condition")
st.markdown("Are certain road surface types more accident-prone?")
surface_df = df.groupby("RoadwaySurface").size().reset_index(name="Count")
chart = alt.Chart(surface_df).mark_bar().encode(
x=alt.X("Count:Q", title="Crash Count"),
y=alt.Y("RoadwaySurface:N", sort='-x', title="Road Surface"),
tooltip=["RoadwaySurface", "Count"]
).properties(height=450)
st.altair_chart(chart, use_container_width=True)
# -----------------------------
# Correlation
# -----------------------------
elif page == "Injury Correlation":
st.header("Correlation Among Injury Stats")
st.markdown("Numeric relationships between injury types and year.")
num_df = df[["MinorInjuries", "ModerateInjuries", "SevereInjuries", "FatalInjuries", "TotalInjuries", "year"]]
corr = num_df.corr().round(2)
corr_df = corr.reset_index().melt(id_vars="index")
heatmap = alt.Chart(corr_df).mark_rect().encode(
x=alt.X("index:N", title=None),
y=alt.Y("variable:N", title=None),
color=alt.Color("value:Q", scale=alt.Scale(scheme="redblue")),
tooltip=["index", "variable", "value"]
).properties(height=450)
st.altair_chart(heatmap, use_container_width=True)
|