Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import requests | |
| import altair as alt | |
| import pydeck as pdk | |
| from datetime import datetime, timedelta | |
| # ----------------------------- | |
| # CONFIG & TITLE | |
| # ----------------------------- | |
| st.set_page_config(page_title="San José Crash Dashboard", layout="wide") | |
| st.title("San José Real-Time Crash Dashboard") | |
| st.markdown("Explore live crash data from the San José Open Data Portal using interactive charts and maps.") | |
| # ----------------------------- | |
| # Constants | |
| # ----------------------------- | |
| RESOURCE_ID = "15408d78-9734-4ea1-b3e5-a0f99568dd9b" | |
| BASE_URL = "https://data.sanjoseca.gov/api/3/action/datastore_search" | |
| # ----------------------------- | |
| # Helper Functions | |
| # ----------------------------- | |
| def fetch_data(): | |
| records = [] | |
| limit = 1000 | |
| offset = 0 | |
| while True: | |
| params = {"resource_id": RESOURCE_ID, "limit": limit, "offset": offset} | |
| response = requests.get(BASE_URL, params=params).json() | |
| batch = response["result"]["records"] | |
| if not batch: | |
| break | |
| records.extend(batch) | |
| offset += limit | |
| return pd.DataFrame(records) | |
| def preprocess(df): | |
| df["CrashDateTime"] = pd.to_datetime(df["CrashDateTime"], errors="coerce") | |
| df["Latitude"] = pd.to_numeric(df["Latitude"], errors="coerce") | |
| df["Longitude"] = pd.to_numeric(df["Longitude"], errors="coerce") | |
| df["MinorInjuries"] = pd.to_numeric(df["MinorInjuries"], errors="coerce").fillna(0) | |
| df["ModerateInjuries"] = pd.to_numeric(df["ModerateInjuries"], errors="coerce").fillna(0) | |
| df["SevereInjuries"] = pd.to_numeric(df["SevereInjuries"], errors="coerce").fillna(0) | |
| df["FatalInjuries"] = pd.to_numeric(df["FatalInjuries"], errors="coerce").fillna(0) | |
| df["SpeedingFlag"] = df["SpeedingFlag"].fillna("Unknown") | |
| df["Weather"] = df["Weather"].fillna("Unknown") | |
| df["RoadwaySurface"] = df["RoadwaySurface"].fillna("Unknown") | |
| df["TotalInjuries"] = df["MinorInjuries"] + df["ModerateInjuries"] + df["SevereInjuries"] + df["FatalInjuries"] | |
| df["year"] = df["CrashDateTime"].dt.year | |
| return df.dropna(subset=["CrashDateTime"]) | |
| def filter_by_date(df, months): | |
| if months == "All": | |
| return df | |
| end_date = datetime.now() | |
| start_date = end_date - timedelta(days=int(months) * 30) | |
| return df[(df["CrashDateTime"] >= start_date) & (df["CrashDateTime"] <= end_date)] | |
| # ----------------------------- | |
| # Navigation sidebar | |
| # ----------------------------- | |
| st.sidebar.title("Navigation") | |
| page = st.sidebar.radio("Go to", [ | |
| "Weather & Injuries", | |
| "Speeding Trends", | |
| "Crash Map", | |
| "Road Surface Analysis", | |
| "Injury Correlation" | |
| ]) | |
| months = st.sidebar.selectbox("Select Time Range", ["3", "6", "9", "12", "All"], index=3, format_func=lambda x: f"{x} Months" if x != "All" else "All Time") | |
| if st.sidebar.button("🔁 Refresh Data"): | |
| st.cache_data.clear() | |
| df = preprocess(fetch_data()) | |
| df = filter_by_date(df, months) | |
| # Render pages dynamically | |
| if page == "Weather & Injuries": | |
| st.header("Weather Conditions vs Injury Severity") | |
| st.markdown("See how different weather patterns affect the severity of crash injuries in San José.") | |
| weather_df = df.groupby("Weather").agg({ | |
| "MinorInjuries": "sum", | |
| "ModerateInjuries": "sum", | |
| "SevereInjuries": "sum", | |
| "FatalInjuries": "sum" | |
| }).reset_index() | |
| weather_df.columns = ["Weather", "Minor", "Moderate", "Severe", "Fatal"] | |
| for col in ["Minor", "Moderate", "Severe", "Fatal"]: | |
| weather_df[col] = pd.to_numeric(weather_df[col], errors="coerce").fillna(0) | |
| chart = alt.Chart(weather_df).transform_fold( | |
| ["Minor", "Moderate", "Severe", "Fatal"], | |
| as_=["Injury_Type", "Count"] | |
| ).mark_bar().encode( | |
| x=alt.X("Weather:N", title="Weather"), | |
| y=alt.Y("Count:Q"), | |
| color=alt.Color("Injury_Type:N", type="nominal"), | |
| tooltip=["Weather", alt.Tooltip("Injury_Type:N"), alt.Tooltip("Count:Q")] | |
| ).properties(height=450) | |
| st.altair_chart(chart, use_container_width=True) | |
| # ----------------------------- | |
| # Speeding related crashes | |
| # ----------------------------- | |
| elif page == "Speeding Trends": | |
| st.header("Speeding-Related Crash Trends") | |
| st.markdown("How often is speeding involved in crashes over the years?") | |
| trend_df = df.groupby(["year", "SpeedingFlag"]).size().reset_index(name="Count") | |
| chart = alt.Chart(trend_df).mark_bar().encode( | |
| x=alt.X("year:O", title="Year"), | |
| y=alt.Y("Count:Q"), | |
| color=alt.Color("SpeedingFlag:N"), | |
| tooltip=["year", "SpeedingFlag", "Count"] | |
| ).properties(height=450) | |
| st.altair_chart(chart, use_container_width=True) | |
| # ----------------------------- | |
| # The cool stuff for marks | |
| # ----------------------------- | |
| elif page == "Crash Map": | |
| st.header("Crash Locations in San José") | |
| st.markdown("Zoom in to spot areas with high crash density.") | |
| map_df = df.dropna(subset=["Latitude", "Longitude"]) | |
| st.pydeck_chart(pdk.Deck( | |
| map_style='mapbox://styles/mapbox/light-v9', | |
| initial_view_state=pdk.ViewState( | |
| latitude=map_df["Latitude"].mean(), | |
| longitude=map_df["Longitude"].mean(), | |
| zoom=11, | |
| pitch=50, | |
| ), | |
| layers=[ | |
| pdk.Layer( | |
| "ScatterplotLayer", | |
| data=map_df, | |
| get_position="[Longitude, Latitude]", | |
| get_color="[200, 30, 0, 160]", | |
| get_radius=40, | |
| ), | |
| ], | |
| )) | |
| # ----------------------------- | |
| # Road surface chart (we need to do more work on this one, the current visual can be improved) | |
| # ----------------------------- | |
| elif page == "Road Surface Analysis": | |
| st.header("Crash Count by Road Surface Condition") | |
| st.markdown("Are certain road surface types more accident-prone?") | |
| surface_df = df.groupby("RoadwaySurface").size().reset_index(name="Count") | |
| chart = alt.Chart(surface_df).mark_bar().encode( | |
| x=alt.X("Count:Q", title="Crash Count"), | |
| y=alt.Y("RoadwaySurface:N", sort='-x', title="Road Surface"), | |
| tooltip=["RoadwaySurface", "Count"] | |
| ).properties(height=450) | |
| st.altair_chart(chart, use_container_width=True) | |
| # ----------------------------- | |
| # Correlation | |
| # ----------------------------- | |
| elif page == "Injury Correlation": | |
| st.header("Correlation Among Injury Stats") | |
| st.markdown("Numeric relationships between injury types and year.") | |
| num_df = df[["MinorInjuries", "ModerateInjuries", "SevereInjuries", "FatalInjuries", "TotalInjuries", "year"]] | |
| corr = num_df.corr().round(2) | |
| corr_df = corr.reset_index().melt(id_vars="index") | |
| heatmap = alt.Chart(corr_df).mark_rect().encode( | |
| x=alt.X("index:N", title=None), | |
| y=alt.Y("variable:N", title=None), | |
| color=alt.Color("value:Q", scale=alt.Scale(scheme="redblue")), | |
| tooltip=["index", "variable", "value"] | |
| ).properties(height=450) | |
| st.altair_chart(heatmap, use_container_width=True) | |