FP3.1

Sleeping

App Files Files Community

jkhare2 commited on Dec 8, 2025

Commit

7d7ee3c

verified ·

1 Parent(s): 1154062

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +131 -289

src/streamlit_app.py CHANGED Viewed

@@ -10,7 +10,6 @@ import streamlit as st
 import pandas as pd
 import numpy as np
 import plotly.express as px
-from urllib.parse import urlencode
 st.set_page_config(page_title="Chicago Parks in Motion", layout="wide")
@@ -27,118 +26,49 @@ def load_data():
         st.error("Could not load dataset from the City of Chicago portal.")
         raise e
-    # Clean column names
     df.columns = [c.strip() for c in df.columns]
-    # Fee as numeric if present
     if "fee" in df.columns:
         df["fee"] = pd.to_numeric(df["fee"], errors="coerce")
-    # -------------------------
-    # Extract lat/lon from location or the_geom
-    # -------------------------
-    lat_col = None
-    lon_col = None
-    if "location" in df.columns:
-        def parse_lat_lon(val):
-            if pd.isna(val):
-                return (np.nan, np.nan)
-            sval = str(val)
-            # POINT (lon lat)
-            if sval.startswith("POINT"):
-                try:
-                    inside = sval.split("(", 1)[1].rstrip(")")
-                    lon, lat = map(float, inside.strip().split())
-                    return lat, lon
-                except:
-                    return (np.nan, np.nan)
-            # JSON-like with latitude / longitude
-            if "latitude" in sval and "longitude" in sval:
-                try:
-                    import json
-                    j = json.loads(sval)
-                    return float(j.get("latitude", np.nan)), float(j.get("longitude", np.nan))
-                except:
-                    return (np.nan, np.nan)
-            # Fallback: two floats
-            import re
-            nums = re.findall(r"-?\d+\.\d+", sval)
-            if len(nums) >= 2:
-                return float(nums[0]), float(nums[1])
             return (np.nan, np.nan)
-        latlon = df["location"].map(parse_lat_lon)
-        df["latitude"] = latlon.map(lambda x: x[0])
-        df["longitude"] = latlon.map(lambda x: x[1])
-        lat_col, lon_col = "latitude", "longitude"
-    if "the_geom" in df.columns and (lat_col is None or lon_col is None):
-        def parse_the_geom(val):
-            if pd.isna(val):
                 return (np.nan, np.nan)
-            sval = str(val)
-            if "POINT" in sval:
-                try:
-                    inside = sval.split("(", 1)[1].rstrip(")")
-                    lon, lat = map(float, inside.strip().split())
-                    return lat, lon
-                except:
-                    return (np.nan, np.nan)
-            return (np.nan, np.nan)
-        latlon = df["the_geom"].map(parse_the_geom)
         df["latitude"] = latlon.map(lambda x: x[0])
         df["longitude"] = latlon.map(lambda x: x[1])
-        lat_col, lon_col = "latitude", "longitude"
-    # -------------------------
-    # Parse dates
-    # -------------------------
     for c in ["start_date", "end_date"]:
         if c in df.columns:
             df[c] = pd.to_datetime(df[c], errors="coerce")
-    # -------------------------
-    # Activity type cleaning
-    # -------------------------
     if "activity_type" in df.columns:
         df["activity_type_clean"] = df["activity_type"].str.title().fillna("Unknown")
     else:
-        if "program_type" in df.columns:
-            df["activity_type_clean"] = df["program_type"].str.title().fillna("Unknown")
-        elif "category" in df.columns:
-            df["activity_type_clean"] = df["category"].str.title().fillna("Unknown")
-        else:
-            df["activity_type_clean"] = "Unknown"
-    # -------------------------
-    # Park name extraction
-    # -------------------------
-    possible_park_cols = [
-        "park_name",
-        "park",
-        "location_facility",
-        "location_name",
-        "location",
-        "site_name"
-    ]
-    park_col = None
-    for col in possible_park_cols:
-        if col in df.columns:
-            park_col = col
-            break
-    if park_col is not None:
-        df["park_name"] = (
-            df[park_col]
-            .astype(str)
-            .replace(["", "nan", "None"], "Unknown Park")
-        )
     else:
         df["park_name"] = "Unknown Park"
@@ -148,16 +78,10 @@ def load_data():
 df = load_data()
 # -------------------------
-# Page header
 # -------------------------
 st.title("Chicago Parks in Motion: How Our City Plays")
-st.markdown("**Author:** Juhi Khare (jkhare2), Alisha Rawat (alishar4), Sutthana Koo-Anupong (sk188)")
-# Explicit central vis label for rubric
-st.info(
-    "**Central Visualization:** The main map/bar chart of programs by park is our central interactive "
-    "visualization for this public-facing data story."
-)
 # -------------------------
 # Sidebar filters
@@ -165,51 +89,31 @@ st.info(
 st.sidebar.header("Filters & Settings")
 categories = sorted(df["activity_type_clean"].dropna().unique())
-categories = [c for c in categories if c != "nan"]
 chosen_category = st.sidebar.selectbox("Activity category", ["All"] + categories)
-# Season calculation helper
 def season_from_date(dt):
-    if pd.isna(dt):
-        return "Unknown"
     m = dt.month
-    if m in (12, 1, 2):
-        return "Winter"
-    if m in (3, 4, 5):
-        return "Spring"
-    if m in (6, 7, 8):
-        return "Summer"
     return "Fall"
-if "start_date" in df.columns:
-    df["season"] = df["start_date"].map(season_from_date)
-else:
-    df["season"] = "Unknown"
-seasons = sorted(df["season"].dropna().unique())
 chosen_season = st.sidebar.selectbox("Season", ["All"] + seasons)
-# Price filter
-has_fee_col = "fee" in df.columns
-if has_fee_col:
-    max_fee = float(np.nanmax(df["fee"].fillna(0)))
-    fee_limit = st.sidebar.slider(
-        "Maximum fee (USD)",
-        0.0,
-        max(1.0, max_fee),
-        float(max_fee)
-    )
 else:
     fee_limit = None
-# Park name search
-park_query = st.sidebar.text_input("Search park name (partial)")
-# Accessibility note about filters
-st.sidebar.caption(
-    "Filters help novice users explore the dataset without needing technical skills, "
-    "making the app more accessible and intuitive."
-)
 # -------------------------
 # Filtering logic
@@ -219,110 +123,74 @@ if chosen_category != "All":
     filtered = filtered[filtered["activity_type_clean"] == chosen_category]
 if chosen_season != "All":
     filtered = filtered[filtered["season"] == chosen_season]
-if fee_limit is not None and "fee" in filtered.columns:
     filtered = filtered[filtered["fee"].fillna(0) <= fee_limit]
-if park_query:
-    filtered = filtered[
-        filtered["park_name"].str.contains(park_query, case=False, na=False)
-    ]
-st.sidebar.markdown(f"**Programs shown:** {len(filtered):,}")
-# ======================================================
-# CENTRAL VISUALIZATION (FULL WIDTH, TOP)
-# ======================================================
-st.subheader("Central Interactive Visualization — Programs by Park")
-view_type = st.radio(
-    "Choose how to view park activity:",
-    ["Map (recommended)", "Bar chart (count by park)"],
-    horizontal=True
-)
-if view_type.startswith("Map"):
-    if (
-        "latitude" in filtered.columns
-        and "longitude" in filtered.columns
-        and filtered[["latitude", "longitude"]].dropna().shape[0] > 0
-    ):
-        agg = (
-            filtered
-            .groupby(["park_name", "latitude", "longitude"], dropna=True)
-            .size()
-            .reset_index(name="count")
-        )
         fig_map = px.scatter_mapbox(
             agg,
             lat="latitude",
             lon="longitude",
             size="count",
-            size_max=32,
-            hover_name="park_name",
-            hover_data={"count": True},
             color="count",
-            # Dark sequential orange scale – strong contrast against map
-            color_continuous_scale=["#FFB366", "#CC5500"],
             zoom=10,
             height=600,
         )
-        fig_map.update_traces(
-            marker=dict(
-                opacity=0.92,
-                sizemode="area",
-            )
-        )
-        fig_map.update_layout(
-            mapbox_style="open-street-map",
-            margin={"r": 0, "t": 0, "l": 0, "b": 0},
-        )
         st.plotly_chart(fig_map, use_container_width=True)
-        st.caption(
-            "Each circle represents a park. Bigger and darker circles show parks with more programs. "
-            "We use a dark sequential colormap so parks stand out clearly against the map background."
-        )
     else:
-        st.warning(
-            "No geographic coordinates found in the dataset for the current filters. "
-            "Try switching to the bar chart view."
-        )
 else:
-    agg = (
-        filtered
-        .groupby("park_name")
-        .size()
-        .reset_index(name="count")
-        .sort_values("count", ascending=False)
-    )
-    top_n = 25
-    agg_top = agg.head(top_n)
     fig_bar = px.bar(
-        agg_top,
         x="count",
         y="park_name",
         orientation="h",
         color="count",
         color_continuous_scale="Cividis",
-        labels={"count": "Number of programs", "park_name": "Park"},
-        height=700,
     )
-    fig_bar.update_layout(yaxis={"categoryorder": "total ascending"})
     st.plotly_chart(fig_bar, use_container_width=True)
-    st.caption(
-        "This bar chart shows the parks with the most programs. "
-        "A sequential 'Cividis' colormap helps highlight which parks stand out, "
-        "while remaining friendly for viewers with color-vision differences."
-    )
-# Optional sample table
-if st.checkbox("Show program sample table (first 50 rows)"):
-    st.dataframe(filtered.head(50))
-# ======================================================
-# CONTEXTUAL VISUALIZATION 1 — Activity Category Breakdown
-# ======================================================
-st.markdown("---")
-st.subheader("Contextual Visualization 1 — What kinds of activities do parks offer?")
 cat_counts = df["activity_type_clean"].value_counts().reset_index()
 cat_counts.columns = ["activity_type", "count"]
@@ -332,92 +200,66 @@ fig_cat = px.pie(
     names="activity_type",
     values="count",
     hole=0.35,
-    color_discrete_sequence=px.colors.qualitative.Set3,
 )
 st.plotly_chart(fig_cat, use_container_width=True)
-st.caption(
-    "This chart shows how programs are split across activity categories, such as sports, aquatics, arts, "
-    "and youth programming. We use a qualitative color palette so each category has its own distinct color, "
-    "making it easier for readers to tell them apart at a glance."
-)
-# ======================================================
-# CONTEXTUAL VISUALIZATION 2 — Programs by Season
-# ======================================================
-st.markdown("---")
-st.subheader("Contextual Visualization 2 — When are most programs offered?")
-season_col = None
-for c in df.columns:
-    if "season" in c.lower():
-        season_col = c
-        break
-if season_col is None:
-    st.error("No season information found in the dataset.")
-else:
-    season_counts = df[season_col].dropna().value_counts().reset_index()
-    season_counts.columns = ["Season", "Program Count"]
-    fig_season = px.bar(
-        season_counts,
-        x="Season",
-        y="Program Count",
-        color="Program Count",
-        color_continuous_scale="Tealgrn",
-        text="Program Count",
-        title="Number of Programs Offered by Season",
-    )
-    fig_season.update_traces(textposition="outside")
-    st.plotly_chart(fig_season, use_container_width=True)
-    st.caption(
-        "This bar chart shows how many programs run in each season. "
-        "A sequential colormap emphasizes the difference between busy and quiet seasons without adding clutter, "
-        "which helps novice viewers focus on the main pattern."
-    )
-# ======================================================
-# DATA & NOTEBOOK INFO
-# ======================================================
-st.markdown("---")
-st.subheader("Data & Notebook")
 st.markdown("""
-**Primary dataset:** Chicago Park District Activities — City of Chicago Data Portal
-<https://data.cityofchicago.org/Parks-Recreation/Chicago-Park-District-Activities/tn7v-6rnw>
-Both contextual visualizations (the activity category breakdown and the seasonal program chart) were also created in our Jupyter Notebook
-as part of our original analysis, then migrated here into this Streamlit app for a more public-friendly, interactive experience.
 """)
-# ======================================================
-# WRITE-UP (3 simple paragraphs for the public)
-# ======================================================
 st.markdown("---")
-st.header("What this data story is showing")
 st.markdown("""
-Chicago’s parks offer many kinds of activities for people of all ages. These include sports, arts, fitness classes, youth programs, and seasonal events.
-Each row in this dataset represents one program offered at a park. Our main interactive map helps readers quickly see which parks offer the most activities.
-Bigger or darker circles show parks with more programs, making it easy to spot busy parks versus quieter ones.
-Where a park is located also matters. Neighborhoods that are larger or more central usually have more programs because they have more space, more facilities, and more visitors.
-With the filters on the left, anyone can explore the data by season, activity type, price, or park name.
-This makes the information easy to use even for someone with no data experience.
-For example, you can look for free programs, summer-only programs, or activities at a specific park in your neighborhood.
-This project also highlights questions about access and opportunities. Some parks offer a wide range of programs, while others have fewer options or mostly offer only one type of activity.
-By looking at categories, seasons, and fees, readers can start to see patterns in which communities have more choices and which ones may need more support.
-Our goal is to turn public data into something simple and useful, so Chicago residents and decision-makers can better understand how parks are serving their communities.
 """)
-# ======================================================
-# FOOTER
-# ======================================================
-st.markdown("---")
-st.markdown(
-    "**Acknowledgements & citations:** Data retrieved directly from the City of Chicago Data Portal (Socrata API). "
-    "All visualizations were created by the authors using Python and Streamlit."
-)

 import pandas as pd
 import numpy as np
 import plotly.express as px
 st.set_page_config(page_title="Chicago Parks in Motion", layout="wide")
         st.error("Could not load dataset from the City of Chicago portal.")
         raise e
     df.columns = [c.strip() for c in df.columns]
     if "fee" in df.columns:
         df["fee"] = pd.to_numeric(df["fee"], errors="coerce")
+    # Extract lat/lon
+    def extract_latlon(val):
+        if pd.isna(val):
             return (np.nan, np.nan)
+        sval = str(val)
+        if "POINT" in sval:
+            try:
+                inside = sval.split("(", 1)[1].rstrip(")")
+                lon, lat = map(float, inside.split())
+                return lat, lon
+            except:
                 return (np.nan, np.nan)
+        return (np.nan, np.nan)
+    if "location" in df.columns:
+        latlon = df["location"].map(extract_latlon)
         df["latitude"] = latlon.map(lambda x: x[0])
         df["longitude"] = latlon.map(lambda x: x[1])
+    else:
+        df["latitude"] = np.nan
+        df["longitude"] = np.nan
+    # Dates
     for c in ["start_date", "end_date"]:
         if c in df.columns:
             df[c] = pd.to_datetime(df[c], errors="coerce")
+    # Activity type clean
     if "activity_type" in df.columns:
         df["activity_type_clean"] = df["activity_type"].str.title().fillna("Unknown")
     else:
+        df["activity_type_clean"] = "Unknown"
+    # Park name
+    possible_names = ["park_name", "park", "location_facility", "location_name", "site_name"]
+    park_col = next((col for col in possible_names if col in df.columns), None)
+    if park_col:
+        df["park_name"] = df[park_col].astype(str).replace(["", "nan", "None"], "Unknown Park")
     else:
         df["park_name"] = "Unknown Park"
 df = load_data()
 # -------------------------
+# Title
 # -------------------------
 st.title("Chicago Parks in Motion: How Our City Plays")
+st.markdown("**Authors:** Juhi Khare (jkhare2), Alisha Rawat (alishar4), Sutthana Koo-Anupong (sk188)")
 # -------------------------
 # Sidebar filters
 st.sidebar.header("Filters & Settings")
 categories = sorted(df["activity_type_clean"].dropna().unique())
 chosen_category = st.sidebar.selectbox("Activity category", ["All"] + categories)
+# Season detection
 def season_from_date(dt):
+    if pd.isna(dt): return "Unknown"
     m = dt.month
+    if m in [12,1,2]: return "Winter"
+    if m in [3,4,5]: return "Spring"
+    if m in [6,7,8]: return "Summer"
     return "Fall"
+df["season"] = df["start_date"].map(season_from_date)
+seasons = sorted(df["season"].unique())
 chosen_season = st.sidebar.selectbox("Season", ["All"] + seasons)
+if "fee" in df.columns:
+    max_fee = float(df["fee"].fillna(0).max())
+    fee_limit = st.sidebar.slider("Maximum fee (USD)", 0.0, max_fee, max_fee)
 else:
     fee_limit = None
+park_search = st.sidebar.text_input("Search park name (partial)")
+# Accessibility hint
+st.sidebar.caption("Filters help beginners explore the dataset easily without technical skills.")
 # -------------------------
 # Filtering logic
     filtered = filtered[filtered["activity_type_clean"] == chosen_category]
 if chosen_season != "All":
     filtered = filtered[filtered["season"] == chosen_season]
+if fee_limit is not None:
     filtered = filtered[filtered["fee"].fillna(0) <= fee_limit]
+if park_search:
+    filtered = filtered[filtered["park_name"].str.contains(park_search, case=False)]
+st.sidebar.write(f"Programs shown: **{len(filtered):,}**")
+# -------------------------
+# CENTRAL VISUALIZATION
+# -------------------------
+st.header("Central Interactive Visualization — Programs by Park")
+view = st.radio("Choose a view:", ["Map (recommended)", "Bar chart"], horizontal=True)
+if view.startswith("Map"):
+    # Aggregate for map
+    agg = (
+        filtered.groupby(["park_name", "latitude", "longitude"], dropna=True)
+        .size().reset_index(name="count")
+    )
+    if agg.dropna().shape[0] > 0:
         fig_map = px.scatter_mapbox(
             agg,
             lat="latitude",
             lon="longitude",
             size="count",
             color="count",
+            color_continuous_scale="Bluered",
+            size_max=28,
             zoom=10,
+            hover_name="park_name",
+            hover_data={"count": True},
             height=600,
         )
+        fig_map.update_layout(mapbox_style="open-street-map", margin=dict(l=0,r=0,b=0,t=0))
         st.plotly_chart(fig_map, use_container_width=True)
     else:
+        st.warning("No geographic coordinates available for this filtered view.")
 else:
+    agg = filtered.groupby("park_name").size().reset_index(name="count")
+    agg = agg.sort_values("count", ascending=False).head(20)
     fig_bar = px.bar(
+        agg,
         x="count",
         y="park_name",
         orientation="h",
         color="count",
         color_continuous_scale="Cividis",
+        height=600,
     )
+    fig_bar.update_layout(yaxis={'categoryorder':'total ascending'})
     st.plotly_chart(fig_bar, use_container_width=True)
+# Explanation under central viz
+st.markdown("""
+**What this visualization shows:**
+This is our main visualization because it helps readers understand where activities are happening across Chicago’s parks.
+The map shows each park as a circle, where larger and darker circles represent locations with more programs.
+This makes it easy to see which areas are activity hubs and which are quieter. The filters allow anyone to explore patterns by season,
+category, price, or park—without needing technical experience.
+""")
+# -------------------------
+# CONTEXTUAL VISUALIZATION 1
+# -------------------------
+st.header("Contextual Visualization 1 — Activity Category Breakdown")
 cat_counts = df["activity_type_clean"].value_counts().reset_index()
 cat_counts.columns = ["activity_type", "count"]
     names="activity_type",
     values="count",
     hole=0.35,
+    color_discrete_sequence=px.colors.sequential.RdBu
 )
 st.plotly_chart(fig_cat, use_container_width=True)
+st.markdown("""
+**Why this matters:**
+This chart shows what kinds of activities Chicago parks offer most often—such as sports, aquatics, arts, or youth programs.
+It helps readers understand the variety of programs available across the city.
+Using a simple color palette keeps the chart readable for people who may not be familiar with data visualization.
+""")
+# -------------------------
+# CONTEXTUAL VISUALIZATION 2
+# -------------------------
+st.header("Contextual Visualization 2 — Programs by Season")
+season_counts = df["season"].value_counts().reset_index()
+season_counts.columns = ["Season", "Program Count"]
+fig_season = px.bar(
+    season_counts,
+    x="Season",
+    y="Program Count",
+    color="Program Count",
+    color_continuous_scale="Tealgrn",
+    text="Program Count",
+    height=500,
+)
+fig_season.update_traces(textposition="outside")
+st.plotly_chart(fig_season, use_container_width=True)
+st.markdown("""
+**Why this is helpful:**
+This chart shows when programs are most active throughout the year.
+Comparing seasons helps readers see whether summer is the busiest time, or whether activities are spread evenly.
+This makes it easier for residents and planners to understand how weather, school schedules, and community needs
+shape the timing of park programs.
+""")
+# -------------------------
+# FINAL 3-PARAGRAPH EXPLANATION (as provided by you, unchanged)
+# -------------------------
+st.header("📝 What this data story is showing")
 st.markdown("""
+Chicago’s parks offer many kinds of activities for people of all ages. These include sports, arts, fitness classes, youth programs, and seasonal events. Each row in this dataset represents one program offered at a park. Our main interactive map helps readers quickly see which parks offer the most activities. Bigger or darker circles show parks with more programs, making it easy to spot busy parks versus quieter ones.
+Where a park is located also matters. Neighborhoods that are larger or more central usually have more programs because they have more space, more facilities, and more visitors. With the filters on the left, anyone can explore the data by season, activity type, price, or park name. This makes the information easy to use even for someone with no data experience. For example, you can look for free programs, summer-only programs, or activities at a specific park in your neighborhood.
+This project also highlights questions about access and opportunities. Some parks offer a wide range of programs, while others have fewer options or mostly offer only one type of activity. By looking at categories, seasons, and fees, readers can start to see patterns in which communities have more choices and which ones may need more support. Our goal is to turn public data into something simple and useful, so Chicago residents and decision-makers can better understand how parks are serving their communities.
 """)
+# -------------------------
+# CITATIONS
+# -------------------------
 st.markdown("---")
+st.subheader("Citations & Data Sources")
 st.markdown("""
+**Primary dataset:**
+Chicago Park District Activities — City of Chicago Data Portal
+https://data.cityofchicago.org/Parks-Recreation/Chicago-Park-District-Activities/tn7v-6rnw
 """)