Spaces:

is445fall2025
/

project3.1

Sleeping

App Files Files Community

jkhare2 commited on Dec 8, 2025

Commit

068ea38

verified ·

1 Parent(s): 7d7ee3c

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +139 -162

src/streamlit_app.py CHANGED Viewed

@@ -1,9 +1,9 @@
 # streamlit_app.py
 """
-Chicago Parks in Motion — Streamlit app
-Author: Juhi Khare (jkhare2), Alisha Rawat (alishar4), Sutthana Koo-Anupong (sk188)
 Primary dataset: Chicago Park District Activities
-Data source (CSV endpoint): https://data.cityofchicago.org/resource/tn7v-6rnw.csv
 """
 import streamlit as st
@@ -11,113 +11,112 @@ import pandas as pd
 import numpy as np
 import plotly.express as px
 st.set_page_config(page_title="Chicago Parks in Motion", layout="wide")
-# -------------------------
-# Helper: Load & preprocess
-# -------------------------
 @st.cache_data(ttl=3600)
 def load_data():
-    csv_url = "https://data.cityofchicago.org/resource/tn7v-6rnw.csv"
-    try:
-        df = pd.read_csv(csv_url, dtype=str)
-    except Exception as e:
-        st.error("Could not load dataset from the City of Chicago portal.")
-        raise e
     df.columns = [c.strip() for c in df.columns]
     if "fee" in df.columns:
         df["fee"] = pd.to_numeric(df["fee"], errors="coerce")
     # Extract lat/lon
-    def extract_latlon(val):
-        if pd.isna(val):
-            return (np.nan, np.nan)
-        sval = str(val)
-        if "POINT" in sval:
-            try:
-                inside = sval.split("(", 1)[1].rstrip(")")
-                lon, lat = map(float, inside.split())
-                return lat, lon
-            except:
-                return (np.nan, np.nan)
-        return (np.nan, np.nan)
     if "location" in df.columns:
-        latlon = df["location"].map(extract_latlon)
         df["latitude"] = latlon.map(lambda x: x[0])
         df["longitude"] = latlon.map(lambda x: x[1])
-    else:
-        df["latitude"] = np.nan
-        df["longitude"] = np.nan
     # Dates
     for c in ["start_date", "end_date"]:
         if c in df.columns:
-            df[c] = pd.to_datetime(df[c], errors="coerce")
-    # Activity type clean
     if "activity_type" in df.columns:
         df["activity_type_clean"] = df["activity_type"].str.title().fillna("Unknown")
     else:
         df["activity_type_clean"] = "Unknown"
-    # Park name
-    possible_names = ["park_name", "park", "location_facility", "location_name", "site_name"]
-    park_col = next((col for col in possible_names if col in df.columns), None)
-    if park_col:
-        df["park_name"] = df[park_col].astype(str).replace(["", "nan", "None"], "Unknown Park")
-    else:
         df["park_name"] = "Unknown Park"
-    return df
 df = load_data()
-# -------------------------
-# Title
-# -------------------------
 st.title("Chicago Parks in Motion: How Our City Plays")
 st.markdown("**Authors:** Juhi Khare (jkhare2), Alisha Rawat (alishar4), Sutthana Koo-Anupong (sk188)")
-# -------------------------
-# Sidebar filters
-# -------------------------
-st.sidebar.header("Filters & Settings")
-categories = sorted(df["activity_type_clean"].dropna().unique())
-chosen_category = st.sidebar.selectbox("Activity category", ["All"] + categories)
-# Season detection
-def season_from_date(dt):
-    if pd.isna(dt): return "Unknown"
-    m = dt.month
-    if m in [12,1,2]: return "Winter"
-    if m in [3,4,5]: return "Spring"
-    if m in [6,7,8]: return "Summer"
-    return "Fall"
-df["season"] = df["start_date"].map(season_from_date)
 seasons = sorted(df["season"].unique())
 chosen_season = st.sidebar.selectbox("Season", ["All"] + seasons)
 if "fee" in df.columns:
-    max_fee = float(df["fee"].fillna(0).max())
-    fee_limit = st.sidebar.slider("Maximum fee (USD)", 0.0, max_fee, max_fee)
 else:
     fee_limit = None
-park_search = st.sidebar.text_input("Search park name (partial)")
-# Accessibility hint
-st.sidebar.caption("Filters help beginners explore the dataset easily without technical skills.")
-# -------------------------
-# Filtering logic
-# -------------------------
 filtered = df.copy()
 if chosen_category != "All":
     filtered = filtered[filtered["activity_type_clean"] == chosen_category]
@@ -125,141 +124,119 @@ if chosen_season != "All":
     filtered = filtered[filtered["season"] == chosen_season]
 if fee_limit is not None:
     filtered = filtered[filtered["fee"].fillna(0) <= fee_limit]
-if park_search:
-    filtered = filtered[filtered["park_name"].str.contains(park_search, case=False)]
-st.sidebar.write(f"Programs shown: **{len(filtered):,}**")
-# -------------------------
-# CENTRAL VISUALIZATION
-# -------------------------
-st.header("Central Interactive Visualization — Programs by Park")
-view = st.radio("Choose a view:", ["Map (recommended)", "Bar chart"], horizontal=True)
-if view.startswith("Map"):
-    # Aggregate for map
-    agg = (
-        filtered.groupby(["park_name", "latitude", "longitude"], dropna=True)
-        .size().reset_index(name="count")
-    )
-    if agg.dropna().shape[0] > 0:
-        fig_map = px.scatter_mapbox(
-            agg,
-            lat="latitude",
-            lon="longitude",
-            size="count",
-            color="count",
-            color_continuous_scale="Bluered",
-            size_max=28,
-            zoom=10,
-            hover_name="park_name",
-            hover_data={"count": True},
-            height=600,
-        )
-        fig_map.update_layout(mapbox_style="open-street-map", margin=dict(l=0,r=0,b=0,t=0))
-        st.plotly_chart(fig_map, use_container_width=True)
-    else:
-        st.warning("No geographic coordinates available for this filtered view.")
-else:
-    agg = filtered.groupby("park_name").size().reset_index(name="count")
-    agg = agg.sort_values("count", ascending=False).head(20)
-    fig_bar = px.bar(
         agg,
-        x="count",
-        y="park_name",
-        orientation="h",
         color="count",
         color_continuous_scale="Cividis",
-        height=600,
     )
-    fig_bar.update_layout(yaxis={'categoryorder':'total ascending'})
-    st.plotly_chart(fig_bar, use_container_width=True)
-# Explanation under central viz
-st.markdown("""
-**What this visualization shows:**
-This is our main visualization because it helps readers understand where activities are happening across Chicago’s parks.
-The map shows each park as a circle, where larger and darker circles represent locations with more programs.
-This makes it easy to see which areas are activity hubs and which are quieter. The filters allow anyone to explore patterns by season,
-category, price, or park—without needing technical experience.
-""")
-# -------------------------
-# CONTEXTUAL VISUALIZATION 1
-# -------------------------
-st.header("Contextual Visualization 1 — Activity Category Breakdown")
 cat_counts = df["activity_type_clean"].value_counts().reset_index()
-cat_counts.columns = ["activity_type", "count"]
 fig_cat = px.pie(
     cat_counts,
-    names="activity_type",
-    values="count",
-    hole=0.35,
-    color_discrete_sequence=px.colors.sequential.RdBu
 )
 st.plotly_chart(fig_cat, use_container_width=True)
-st.markdown("""
-**Why this matters:**
-This chart shows what kinds of activities Chicago parks offer most often—such as sports, aquatics, arts, or youth programs.
-It helps readers understand the variety of programs available across the city.
-Using a simple color palette keeps the chart readable for people who may not be familiar with data visualization.
 """)
-# -------------------------
-# CONTEXTUAL VISUALIZATION 2
-# -------------------------
-st.header("Contextual Visualization 2 — Programs by Season")
 season_counts = df["season"].value_counts().reset_index()
-season_counts.columns = ["Season", "Program Count"]
 fig_season = px.bar(
     season_counts,
     x="Season",
     y="Program Count",
-    color="Program Count",
-    color_continuous_scale="Tealgrn",
     text="Program Count",
-    height=500,
 )
 fig_season.update_traces(textposition="outside")
 st.plotly_chart(fig_season, use_container_width=True)
-st.markdown("""
-**Why this is helpful:**
-This chart shows when programs are most active throughout the year.
-Comparing seasons helps readers see whether summer is the busiest time, or whether activities are spread evenly.
-This makes it easier for residents and planners to understand how weather, school schedules, and community needs
-shape the timing of park programs.
 """)
-# -------------------------
-# FINAL 3-PARAGRAPH EXPLANATION (as provided by you, unchanged)
-# -------------------------
-st.header("📝 What this data story is showing")
 st.markdown("""
-Chicago’s parks offer many kinds of activities for people of all ages. These include sports, arts, fitness classes, youth programs, and seasonal events. Each row in this dataset represents one program offered at a park. Our main interactive map helps readers quickly see which parks offer the most activities. Bigger or darker circles show parks with more programs, making it easy to spot busy parks versus quieter ones.
-Where a park is located also matters. Neighborhoods that are larger or more central usually have more programs because they have more space, more facilities, and more visitors. With the filters on the left, anyone can explore the data by season, activity type, price, or park name. This makes the information easy to use even for someone with no data experience. For example, you can look for free programs, summer-only programs, or activities at a specific park in your neighborhood.
-This project also highlights questions about access and opportunities. Some parks offer a wide range of programs, while others have fewer options or mostly offer only one type of activity. By looking at categories, seasons, and fees, readers can start to see patterns in which communities have more choices and which ones may need more support. Our goal is to turn public data into something simple and useful, so Chicago residents and decision-makers can better understand how parks are serving their communities.
 """)
-# -------------------------
 # CITATIONS
-# -------------------------
 st.markdown("---")
 st.subheader("Citations & Data Sources")
 st.markdown("""
-**Primary dataset:**
-Chicago Park District Activities — City of Chicago Data Portal
-https://data.cityofchicago.org/Parks-Recreation/Chicago-Park-District-Activities/tn7v-6rnw
 """)

 # streamlit_app.py
 """
+Chicago Parks in Motion — Streamlit App
+Authors: Juhi Khare (jkhare2), Alisha Rawat (alishar4), Sutthana Koo-Anupong (sk188)
 Primary dataset: Chicago Park District Activities
+Source: https://data.cityofchicago.org/resource/tn7v-6rnw.csv
 """
 import streamlit as st
 import numpy as np
 import plotly.express as px
 st.set_page_config(page_title="Chicago Parks in Motion", layout="wide")
+# ---------------------------------------------------
+# LOAD & CLEAN DATA
+# ---------------------------------------------------
 @st.cache_data(ttl=3600)
 def load_data():
+    url = "https://data.cityofchicago.org/resource/tn7v-6rnw.csv"
+    df = pd.read_csv(url, dtype=str)
+    # Clean columns
     df.columns = [c.strip() for c in df.columns]
+    # Fee → numeric
     if "fee" in df.columns:
         df["fee"] = pd.to_numeric(df["fee"], errors="coerce")
     # Extract lat/lon
     if "location" in df.columns:
+        def parse_location(val):
+            if pd.isna(val):
+                return (np.nan, np.nan)
+            sval = str(val)
+            if sval.startswith("POINT"):
+                try:
+                    inside = sval.split("(",1)[1].rstrip(")")
+                    lon, lat = map(float, inside.split())
+                    return lat, lon
+                except:
+                    return (np.nan, np.nan)
+            return (np.nan, np.nan)
+        latlon = df["location"].map(parse_location)
         df["latitude"] = latlon.map(lambda x: x[0])
         df["longitude"] = latlon.map(lambda x: x[1])
     # Dates
     for c in ["start_date", "end_date"]:
         if c in df.columns:
+            df[c] = pd.to_datetime(df[c], errors="ignore")
+    # Activity type
     if "activity_type" in df.columns:
         df["activity_type_clean"] = df["activity_type"].str.title().fillna("Unknown")
+    elif "program_type" in df.columns:
+        df["activity_type_clean"] = df["program_type"].str.title().fillna("Unknown")
     else:
         df["activity_type_clean"] = "Unknown"
+    # Park names
+    for col in ["park_name", "park", "site_name", "location_name"]:
+        if col in df.columns:
+            df["park_name"] = df[col].fillna("Unknown Park")
+            break
+    if "park_name" not in df.columns:
         df["park_name"] = "Unknown Park"
+    # Derive season
+    def season_from_date(dt):
+        if isinstance(dt, str) or pd.isna(dt):
+            try:
+                dt = pd.to_datetime(dt)
+            except:
+                return "Unknown"
+        m = dt.month
+        if m in (12, 1, 2): return "Winter"
+        if m in (3, 4, 5): return "Spring"
+        if m in (6, 7, 8): return "Summer"
+        return "Fall"
+    if "start_date" in df.columns:
+        df["season"] = df["start_date"].map(season_from_date)
+    else:
+        df["season"] = "Unknown"
+    return df
 df = load_data()
+# ---------------------------------------------------
+# PAGE TITLE
+# ---------------------------------------------------
 st.title("Chicago Parks in Motion: How Our City Plays")
 st.markdown("**Authors:** Juhi Khare (jkhare2), Alisha Rawat (alishar4), Sutthana Koo-Anupong (sk188)")
+st.info("This is our **central public-facing visualization project**, created for a data journalism assignment.")
+# ---------------------------------------------------
+# FILTERS
+# ---------------------------------------------------
+st.sidebar.header("Filters")
+categories = sorted(df["activity_type_clean"].unique())
+chosen_category = st.sidebar.selectbox("Activity Category", ["All"] + categories)
 seasons = sorted(df["season"].unique())
 chosen_season = st.sidebar.selectbox("Season", ["All"] + seasons)
 if "fee" in df.columns:
+    fee_limit = st.sidebar.slider("Maximum Fee (USD)", 0.0, float(df["fee"].fillna(0).max()), float(df["fee"].fillna(0).max()))
 else:
     fee_limit = None
+search_park = st.sidebar.text_input("Search Park Name")
+st.sidebar.caption("Filters help readers explore the data easily without technical skills.")
+# Filter logic
 filtered = df.copy()
 if chosen_category != "All":
     filtered = filtered[filtered["activity_type_clean"] == chosen_category]
     filtered = filtered[filtered["season"] == chosen_season]
 if fee_limit is not None:
     filtered = filtered[filtered["fee"].fillna(0) <= fee_limit]
+if search_park:
+    filtered = filtered[filtered["park_name"].str.contains(search_park, case=False, na=False)]
+st.sidebar.markdown(f"**Programs Shown:** {len(filtered):,}")
+# ---------------------------------------------------
+# CENTRAL VISUALIZATION (MAP)
+# ---------------------------------------------------
+st.subheader("Central Visualization — Programs by Park (Interactive Map)")
+if filtered[["latitude","longitude"]].dropna().shape[0] > 0:
+    agg = filtered.groupby(["park_name","latitude","longitude"]).size().reset_index(name="count")
+    fig_map = px.scatter_mapbox(
         agg,
+        lat="latitude",
+        lon="longitude",
+        size="count",
+        size_max=30,
         color="count",
         color_continuous_scale="Cividis",
+        zoom=10,
+        height=550
     )
+    fig_map.update_layout(mapbox_style="open-street-map")
+    st.plotly_chart(fig_map, use_container_width=True)
+    st.write("""
+    **Explanation:**
+    This map shows which parks offer the most programs. Larger and darker circles represent parks with more activities.
+    The Cividis color scale makes it easy for all viewers—including those with color-vision differences—to understand intensity.
+    You can hover over any park to see its program count and use the filters to explore activity types, seasons, or fees.
+    """)
+else:
+    st.warning("No geographic coordinates available for mapping.")
+# ---------------------------------------------------
+# CONTEXTUAL VIS 1 — CATEGORY PIE CHART
+# ---------------------------------------------------
+st.subheader("Contextual Visualization 1 — Activity Category Breakdown")
 cat_counts = df["activity_type_clean"].value_counts().reset_index()
+cat_counts.columns = ["Category","Count"]
 fig_cat = px.pie(
     cat_counts,
+    names="Category",
+    values="Count",
+    color_discrete_sequence=px.colors.sequential.Cividis
 )
 st.plotly_chart(fig_cat, use_container_width=True)
+st.write("""
+**Explanation:**
+This pie chart shows the distribution of all activity categories across Chicago parks.
+Some categories—like sports and youth programs—appear more often, while others are limited.
+Using the same Cividis-inspired palette keeps the visuals consistent for the public.
 """)
+# ---------------------------------------------------
+# CONTEXTUAL VIS 2 — PROGRAMS BY SEASON
+# ---------------------------------------------------
+st.subheader("Contextual Visualization 2 — Programs by Season")
 season_counts = df["season"].value_counts().reset_index()
+season_counts.columns = ["Season","Program Count"]
 fig_season = px.bar(
     season_counts,
     x="Season",
     y="Program Count",
     text="Program Count",
+    color="Program Count",
+    color_continuous_scale="Cividis",
+    height=450
 )
 fig_season.update_traces(textposition="outside")
 st.plotly_chart(fig_season, use_container_width=True)
+st.write("""
+**Explanation:**
+This chart shows how program offerings change by season. Summer and fall tend to have more activities,
+while winter shows fewer options. The color scale helps highlight the variation clearly and keeps the look consistent.
 """)
+# ---------------------------------------------------
+# FULL STORY SECTION (YOUR EXACT PARAGRAPHS)
+# ---------------------------------------------------
+st.markdown("---")
+st.header("What This Data Story Is Showing")
 st.markdown("""
+Chicago’s parks offer many kinds of activities for people of all ages. These include sports, arts, fitness classes, youth programs, and seasonal events.
+Each row in this dataset represents one program offered at a park. Our main interactive map helps readers quickly see which parks offer the most activities.
+Bigger or darker circles show parks with more programs, making it easy to spot busy parks versus quieter ones.
+Where a park is located also matters. Neighborhoods that are larger or more central usually have more programs because they have more space, more facilities,
+and more visitors. With the filters on the left, anyone can explore the data by season, activity type, price, or park name.
+This makes the information easy to use even for someone with no data experience. For example, you can look for free programs,
+summer-only programs, or activities at a specific park in your neighborhood.
+This project also highlights questions about access and opportunities. Some parks offer a wide range of programs, while others have fewer options or mostly offer only one type of activity.
+By looking at categories, seasons, and fees, readers can start to see patterns in which communities have more choices and which ones may need more support.
+Our goal is to turn public data into something simple and useful, so Chicago residents and decision-makers can better understand how parks are serving their communities.
 """)
+# ---------------------------------------------------
 # CITATIONS
+# ---------------------------------------------------
 st.markdown("---")
 st.subheader("Citations & Data Sources")
 st.markdown("""
+- Chicago Park District Activities — City of Chicago Data Portal
+  https://data.cityofchicago.org/Parks-Recreation/Chicago-Park-District-Activities/tn7v-6rnw
+- All visualizations created by the authors using Streamlit & Plotly.
 """)