Spaces:
Sleeping
Sleeping
Update src/streamlit_app.py
Browse files- src/streamlit_app.py +253 -231
src/streamlit_app.py
CHANGED
|
@@ -1,80 +1,127 @@
|
|
| 1 |
# streamlit_app.py
|
| 2 |
"""
|
| 3 |
-
Chicago Parks in Motion — Streamlit
|
| 4 |
-
|
| 5 |
Primary dataset: Chicago Park District Activities
|
| 6 |
-
|
| 7 |
"""
|
| 8 |
|
| 9 |
import streamlit as st
|
| 10 |
import pandas as pd
|
| 11 |
import numpy as np
|
| 12 |
import plotly.express as px
|
|
|
|
| 13 |
|
| 14 |
|
| 15 |
-
# ------------------------------------------------
|
| 16 |
-
# Page configuration
|
| 17 |
-
# ------------------------------------------------
|
| 18 |
st.set_page_config(page_title="Chicago Parks in Motion", layout="wide")
|
| 19 |
|
| 20 |
-
# -------------------------
|
| 21 |
-
# Load
|
| 22 |
-
# -------------------------
|
| 23 |
@st.cache_data(ttl=3600)
|
| 24 |
def load_data():
|
| 25 |
-
|
| 26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
-
# Clean columns
|
| 29 |
df.columns = [c.strip() for c in df.columns]
|
| 30 |
|
| 31 |
-
# Fee → numeric
|
| 32 |
if "fee" in df.columns:
|
| 33 |
df["fee"] = pd.to_numeric(df["fee"], errors="coerce")
|
| 34 |
|
| 35 |
-
# Extract
|
| 36 |
-
|
| 37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
return (np.nan, np.nan)
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
|
|
|
|
|
|
| 46 |
return (np.nan, np.nan)
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
df["
|
| 56 |
-
|
| 57 |
|
| 58 |
# Parse dates
|
| 59 |
-
for
|
| 60 |
-
if
|
| 61 |
-
df[
|
| 62 |
|
| 63 |
-
#
|
| 64 |
if "activity_type" in df.columns:
|
| 65 |
df["activity_type_clean"] = df["activity_type"].str.title().fillna("Unknown")
|
| 66 |
else:
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
possible_park_cols = [
|
| 71 |
-
"park_name",
|
| 72 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
]
|
|
|
|
|
|
|
| 74 |
for col in possible_park_cols:
|
| 75 |
if col in df.columns:
|
| 76 |
-
|
| 77 |
break
|
|
|
|
|
|
|
|
|
|
| 78 |
else:
|
| 79 |
df["park_name"] = "Unknown Park"
|
| 80 |
|
|
@@ -83,52 +130,61 @@ def load_data():
|
|
| 83 |
|
| 84 |
df = load_data()
|
| 85 |
|
| 86 |
-
# -------------------------
|
| 87 |
-
#
|
| 88 |
-
# -------------------------
|
| 89 |
st.title("Chicago Parks in Motion: How Our City Plays")
|
| 90 |
-
st.markdown("**
|
| 91 |
-
st.info("**Central visualization below:** Explore how program availability varies across Chicago’s parks. This is the main interactive visualization required by the rubric.")
|
| 92 |
|
| 93 |
-
#
|
| 94 |
-
|
| 95 |
-
# ------------------------------------------------
|
| 96 |
-
st.sidebar.header("Filters")
|
| 97 |
|
| 98 |
-
#
|
| 99 |
-
|
| 100 |
-
|
|
|
|
| 101 |
|
| 102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
def season_from_date(dt):
|
| 104 |
-
if pd.isna(dt):
|
|
|
|
| 105 |
m = dt.month
|
| 106 |
-
if m in
|
| 107 |
-
|
| 108 |
-
if m in
|
|
|
|
|
|
|
|
|
|
| 109 |
return "Fall"
|
| 110 |
|
| 111 |
-
|
| 112 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
chosen_season = st.sidebar.selectbox("Season", ["All"] + seasons)
|
| 114 |
|
| 115 |
# Price filter
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
|
|
|
| 119 |
else:
|
| 120 |
fee_limit = None
|
| 121 |
|
| 122 |
-
|
| 123 |
-
park_query = st.sidebar.text_input("Search Park Name")
|
| 124 |
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
)
|
| 128 |
|
| 129 |
-
# -------------------------
|
| 130 |
-
#
|
| 131 |
-
# -------------------------
|
| 132 |
filtered = df.copy()
|
| 133 |
if chosen_category != "All":
|
| 134 |
filtered = filtered[filtered["activity_type_clean"] == chosen_category]
|
|
@@ -141,182 +197,148 @@ if park_query:
|
|
| 141 |
|
| 142 |
st.sidebar.markdown(f"**Programs shown:** {len(filtered):,}")
|
| 143 |
|
| 144 |
-
# -------------------------
|
| 145 |
-
#
|
| 146 |
-
# -------------------------
|
| 147 |
-
st.
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
#
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
):
|
| 158 |
-
|
| 159 |
-
filtered.groupby(["park_name", "latitude", "longitude"], dropna=True)
|
| 160 |
-
.
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
|
|
|
|
|
|
|
|
|
| 180 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
)
|
| 182 |
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
margin=dict(r=0, t=0, l=0, b=0),
|
| 186 |
-
paper_bgcolor="white"
|
| 187 |
-
)
|
| 188 |
|
| 189 |
-
|
|
|
|
| 190 |
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
|
| 197 |
-
|
| 198 |
-
|
| 199 |
|
| 200 |
-
#
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
)
|
| 208 |
-
top_n = 25
|
| 209 |
-
agg_top = agg.head(top_n)
|
| 210 |
-
|
| 211 |
-
fig_bar = px.bar(
|
| 212 |
-
agg_top,
|
| 213 |
-
x="count",
|
| 214 |
-
y="park_name",
|
| 215 |
-
orientation="h",
|
| 216 |
-
color="count",
|
| 217 |
-
color_continuous_scale="Cividis",
|
| 218 |
-
height=650,
|
| 219 |
-
labels={"count": "Number of Programs", "park_name": "Park"}
|
| 220 |
)
|
|
|
|
| 221 |
|
| 222 |
-
|
| 223 |
-
marker=dict(line=dict(width=0.7, color="black")),
|
| 224 |
-
text=agg_top["count"],
|
| 225 |
-
textposition="outside"
|
| 226 |
-
)
|
| 227 |
|
| 228 |
-
|
|
|
|
| 229 |
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
"
|
| 233 |
-
|
|
|
|
| 234 |
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
color="Count",
|
| 252 |
-
color_continuous_scale="Cividis"
|
| 253 |
-
)
|
| 254 |
-
|
| 255 |
-
st.plotly_chart(fig_cat, use_container_width=True)
|
| 256 |
-
|
| 257 |
-
st.markdown(
|
| 258 |
-
"This chart shows the types of activities offered across Chicago parks. "
|
| 259 |
-
"Most programs fall under instruction-based activities, such as fitness or sports classes, "
|
| 260 |
-
"while other categories appear less frequently. "
|
| 261 |
-
"Using Cividis keeps the colors readable and consistent with the rest of the app."
|
| 262 |
-
)
|
| 263 |
-
|
| 264 |
-
# ------------------------------------------------
|
| 265 |
-
# CONTEXTUAL VISUALIZATION 2 – PROGRAMS BY SEASON
|
| 266 |
-
# ------------------------------------------------
|
| 267 |
-
st.header("Contextual Visualization 2 — Programs by Season")
|
| 268 |
-
|
| 269 |
-
season_counts = (
|
| 270 |
-
df["season"]
|
| 271 |
-
.value_counts()
|
| 272 |
-
.reset_index()
|
| 273 |
-
)
|
| 274 |
-
season_counts.columns = ["Season", "Program Count"]
|
| 275 |
-
|
| 276 |
-
fig_season = px.bar(
|
| 277 |
-
season_counts,
|
| 278 |
-
x="Season",
|
| 279 |
-
y="Program Count",
|
| 280 |
-
color="Program Count",
|
| 281 |
-
color_continuous_scale="Cividis",
|
| 282 |
-
text="Program Count",
|
| 283 |
-
height=500,
|
| 284 |
-
)
|
| 285 |
-
|
| 286 |
-
fig_season.update_traces(textposition="outside")
|
| 287 |
-
|
| 288 |
-
st.plotly_chart(fig_season, use_container_width=True)
|
| 289 |
-
|
| 290 |
-
st.markdown(
|
| 291 |
-
"This bar chart shows how program availability changes across seasons. "
|
| 292 |
-
"Summer has the highest number of activities, which matches outdoor recreation trends. "
|
| 293 |
-
"The same Cividis palette keeps the experience uniform and accessible."
|
| 294 |
-
)
|
| 295 |
-
|
| 296 |
-
# ------------------------------------------------
|
| 297 |
-
# REQUIRED FINAL NARRATIVE SECTION (Your Own Text)
|
| 298 |
-
# ------------------------------------------------
|
| 299 |
-
st.markdown("---")
|
| 300 |
-
st.header("What this data story is showing")
|
| 301 |
|
| 302 |
-
st.
|
| 303 |
-
|
| 304 |
|
| 305 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 306 |
|
| 307 |
-
|
| 308 |
-
|
|
|
|
| 309 |
|
| 310 |
-
# -------------------------
|
| 311 |
-
#
|
| 312 |
-
# -------------------------
|
| 313 |
st.markdown("---")
|
| 314 |
-
st.
|
| 315 |
-
|
| 316 |
st.markdown("""
|
| 317 |
-
**
|
| 318 |
-
|
| 319 |
-
|
| 320 |
|
| 321 |
-
|
| 322 |
""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
# streamlit_app.py
|
| 2 |
"""
|
| 3 |
+
Chicago Parks in Motion — Streamlit app
|
| 4 |
+
Author: Juhi Khare (jkhare2), Alisha Rawat (alishar4), Sutthana Koo-Anupong (sk188)
|
| 5 |
Primary dataset: Chicago Park District Activities
|
| 6 |
+
Data source (CSV endpoint): https://data.cityofchicago.org/resource/tn7v-6rnw.csv
|
| 7 |
"""
|
| 8 |
|
| 9 |
import streamlit as st
|
| 10 |
import pandas as pd
|
| 11 |
import numpy as np
|
| 12 |
import plotly.express as px
|
| 13 |
+
from urllib.parse import urlencode
|
| 14 |
|
| 15 |
|
|
|
|
|
|
|
|
|
|
| 16 |
st.set_page_config(page_title="Chicago Parks in Motion", layout="wide")
|
| 17 |
|
| 18 |
+
# -------------------------
|
| 19 |
+
# Helper: Load & preprocess
|
| 20 |
+
# -------------------------
|
| 21 |
@st.cache_data(ttl=3600)
|
| 22 |
def load_data():
|
| 23 |
+
csv_url = "https://data.cityofchicago.org/resource/tn7v-6rnw.csv"
|
| 24 |
+
try:
|
| 25 |
+
df = pd.read_csv(csv_url, dtype=str)
|
| 26 |
+
except Exception as e:
|
| 27 |
+
st.error("Could not load dataset from the City of Chicago portal.")
|
| 28 |
+
raise e
|
| 29 |
|
|
|
|
| 30 |
df.columns = [c.strip() for c in df.columns]
|
| 31 |
|
|
|
|
| 32 |
if "fee" in df.columns:
|
| 33 |
df["fee"] = pd.to_numeric(df["fee"], errors="coerce")
|
| 34 |
|
| 35 |
+
# Extract lat/lon from location or the_geom
|
| 36 |
+
lat_col = None
|
| 37 |
+
lon_col = None
|
| 38 |
+
|
| 39 |
+
if "location" in df.columns:
|
| 40 |
+
def parse_lat_lon(val):
|
| 41 |
+
if pd.isna(val):
|
| 42 |
+
return (np.nan, np.nan)
|
| 43 |
+
sval = str(val)
|
| 44 |
+
|
| 45 |
+
if sval.startswith("POINT"):
|
| 46 |
+
try:
|
| 47 |
+
inside = sval.split("(", 1)[1].rstrip(")")
|
| 48 |
+
lon, lat = map(float, inside.strip().split())
|
| 49 |
+
return lat, lon
|
| 50 |
+
except:
|
| 51 |
+
return (np.nan, np.nan)
|
| 52 |
+
|
| 53 |
+
if "latitude" in sval and "longitude" in sval:
|
| 54 |
+
try:
|
| 55 |
+
import json
|
| 56 |
+
j = json.loads(sval)
|
| 57 |
+
return float(j.get("latitude", np.nan)), float(j.get("longitude", np.nan))
|
| 58 |
+
except:
|
| 59 |
+
return (np.nan, np.nan)
|
| 60 |
+
|
| 61 |
+
import re
|
| 62 |
+
nums = re.findall(r"-?\d+\.\d+", sval)
|
| 63 |
+
if len(nums) >= 2:
|
| 64 |
+
return float(nums[0]), float(nums[1])
|
| 65 |
return (np.nan, np.nan)
|
| 66 |
+
|
| 67 |
+
latlon = df["location"].map(parse_lat_lon)
|
| 68 |
+
df["latitude"] = latlon.map(lambda x: x[0])
|
| 69 |
+
df["longitude"] = latlon.map(lambda x: x[1])
|
| 70 |
+
lat_col, lon_col = "latitude", "longitude"
|
| 71 |
+
|
| 72 |
+
if "the_geom" in df.columns and (lat_col is None or lon_col is None):
|
| 73 |
+
def parse_the_geom(val):
|
| 74 |
+
if pd.isna(val):
|
| 75 |
return (np.nan, np.nan)
|
| 76 |
+
sval = str(val)
|
| 77 |
+
if "POINT" in sval:
|
| 78 |
+
try:
|
| 79 |
+
inside = sval.split("(", 1)[1].rstrip(")")
|
| 80 |
+
lon, lat = map(float, inside.strip().split())
|
| 81 |
+
return lat, lon
|
| 82 |
+
except:
|
| 83 |
+
return (np.nan, np.nan)
|
| 84 |
+
return (np.nan, np.nan)
|
| 85 |
|
| 86 |
+
latlon = df["the_geom"].map(parse_the_geom)
|
| 87 |
+
df["latitude"] = latlon.map(lambda x: x[0])
|
| 88 |
+
df["longitude"] = latlon.map(lambda x: x[1])
|
| 89 |
+
lat_col, lon_col = "latitude", "longitude"
|
| 90 |
|
| 91 |
# Parse dates
|
| 92 |
+
for c in ["start_date", "end_date"]:
|
| 93 |
+
if c in df.columns:
|
| 94 |
+
df[c] = pd.to_datetime(df[c], errors="coerce")
|
| 95 |
|
| 96 |
+
# Activity type cleaning
|
| 97 |
if "activity_type" in df.columns:
|
| 98 |
df["activity_type_clean"] = df["activity_type"].str.title().fillna("Unknown")
|
| 99 |
else:
|
| 100 |
+
if "program_type" in df.columns:
|
| 101 |
+
df["activity_type_clean"] = df["program_type"].str.title().fillna("Unknown")
|
| 102 |
+
elif "category" in df.columns:
|
| 103 |
+
df["activity_type_clean"] = df["category"].str.title().fillna("Unknown")
|
| 104 |
+
else:
|
| 105 |
+
df["activity_type_clean"] = "Unknown"
|
| 106 |
+
|
| 107 |
+
# Park name extraction
|
| 108 |
possible_park_cols = [
|
| 109 |
+
"park_name",
|
| 110 |
+
"park",
|
| 111 |
+
"location_facility",
|
| 112 |
+
"location_name",
|
| 113 |
+
"location",
|
| 114 |
+
"site_name"
|
| 115 |
]
|
| 116 |
+
|
| 117 |
+
park_col = None
|
| 118 |
for col in possible_park_cols:
|
| 119 |
if col in df.columns:
|
| 120 |
+
park_col = col
|
| 121 |
break
|
| 122 |
+
|
| 123 |
+
if park_col is not None:
|
| 124 |
+
df["park_name"] = df[park_col].astype(str).replace(["", "nan", "None"], "Unknown Park")
|
| 125 |
else:
|
| 126 |
df["park_name"] = "Unknown Park"
|
| 127 |
|
|
|
|
| 130 |
|
| 131 |
df = load_data()
|
| 132 |
|
| 133 |
+
# -------------------------
|
| 134 |
+
# Page header + important note for rubric
|
| 135 |
+
# -------------------------
|
| 136 |
st.title("Chicago Parks in Motion: How Our City Plays")
|
| 137 |
+
st.markdown("**Author:** Juhi Khare (jkhare2), Alisha Rawat (alishar4), Sutthana Koo-Anupong (sk188)")
|
|
|
|
| 138 |
|
| 139 |
+
# Explicitly label central vis for rubric
|
| 140 |
+
st.info("**Central Visualization:** The main map/bar chart of programs by park is our central interactive visualization for the public-facing data story.")
|
|
|
|
|
|
|
| 141 |
|
| 142 |
+
# -------------------------
|
| 143 |
+
# Sidebar filters
|
| 144 |
+
# -------------------------
|
| 145 |
+
st.sidebar.header("Filters & Settings")
|
| 146 |
|
| 147 |
+
categories = sorted(df["activity_type_clean"].dropna().unique())
|
| 148 |
+
categories = [c for c in categories if c != "nan"]
|
| 149 |
+
chosen_category = st.sidebar.selectbox("Activity category", ["All"] + categories)
|
| 150 |
+
|
| 151 |
+
# Season calculation
|
| 152 |
def season_from_date(dt):
|
| 153 |
+
if pd.isna(dt):
|
| 154 |
+
return "Unknown"
|
| 155 |
m = dt.month
|
| 156 |
+
if m in (12, 1, 2):
|
| 157 |
+
return "Winter"
|
| 158 |
+
if m in (3, 4, 5):
|
| 159 |
+
return "Spring"
|
| 160 |
+
if m in (6, 7, 8):
|
| 161 |
+
return "Summer"
|
| 162 |
return "Fall"
|
| 163 |
|
| 164 |
+
if "start_date" in df.columns:
|
| 165 |
+
df["season"] = df["start_date"].map(season_from_date)
|
| 166 |
+
else:
|
| 167 |
+
df["season"] = "Unknown"
|
| 168 |
+
|
| 169 |
+
seasons = sorted(df["season"].dropna().unique())
|
| 170 |
chosen_season = st.sidebar.selectbox("Season", ["All"] + seasons)
|
| 171 |
|
| 172 |
# Price filter
|
| 173 |
+
has_fee_col = "fee" in df.columns
|
| 174 |
+
if has_fee_col:
|
| 175 |
+
max_fee = float(np.nanmax(df["fee"].fillna(0)))
|
| 176 |
+
fee_limit = st.sidebar.slider("Maximum fee (USD)", 0.0, max(1.0, max_fee), float(max_fee))
|
| 177 |
else:
|
| 178 |
fee_limit = None
|
| 179 |
|
| 180 |
+
park_query = st.sidebar.text_input("Search park name (partial)")
|
|
|
|
| 181 |
|
| 182 |
+
# Mention filters help accessibility
|
| 183 |
+
st.sidebar.caption("Filters help novice users explore the dataset without needing technical skills, making the app more accessible and intuitive.")
|
|
|
|
| 184 |
|
| 185 |
+
# -------------------------
|
| 186 |
+
# Filtering logic
|
| 187 |
+
# -------------------------
|
| 188 |
filtered = df.copy()
|
| 189 |
if chosen_category != "All":
|
| 190 |
filtered = filtered[filtered["activity_type_clean"] == chosen_category]
|
|
|
|
| 197 |
|
| 198 |
st.sidebar.markdown(f"**Programs shown:** {len(filtered):,}")
|
| 199 |
|
| 200 |
+
# -------------------------
|
| 201 |
+
# Layout
|
| 202 |
+
# -------------------------
|
| 203 |
+
main_col, side_col = st.columns((2, 1))
|
| 204 |
+
|
| 205 |
+
# -------------------------
|
| 206 |
+
# CENTRAL VISUALIZATION
|
| 207 |
+
# -------------------------
|
| 208 |
+
with main_col:
|
| 209 |
+
st.subheader("Central Interactive Visualization — Programs by Park")
|
| 210 |
+
|
| 211 |
+
view_type = st.radio("View type", ["Map (recommended)", "Bar chart (count by park)"], horizontal=True)
|
| 212 |
+
|
| 213 |
+
if view_type.startswith("Map"):
|
| 214 |
+
if "latitude" in filtered.columns and "longitude" in filtered.columns and filtered[["latitude","longitude"]].dropna().shape[0] > 0:
|
| 215 |
+
agg = filtered.groupby(["park_name", "latitude", "longitude"], dropna=True).size().reset_index(name="count")
|
| 216 |
+
fig_map = px.scatter_mapbox(
|
| 217 |
+
agg,
|
| 218 |
+
lat="latitude",
|
| 219 |
+
lon="longitude",
|
| 220 |
+
size="count",
|
| 221 |
+
size_max=32,
|
| 222 |
+
hover_name="park_name",
|
| 223 |
+
hover_data={"count": True},
|
| 224 |
+
color="count",
|
| 225 |
+
# 🔥 Dark sequential orange scale – no white, no camouflage
|
| 226 |
+
color_continuous_scale=["#FFB366", "#CC5500"],
|
| 227 |
+
zoom=10,
|
| 228 |
+
height=600,
|
| 229 |
+
)
|
| 230 |
+
fig_map.update_traces(
|
| 231 |
+
marker=dict(
|
| 232 |
+
opacity=0.92,
|
| 233 |
+
sizemode="area",
|
| 234 |
+
)
|
| 235 |
+
)
|
| 236 |
+
fig_map.update_layout(
|
| 237 |
+
mapbox_style="open-street-map",
|
| 238 |
+
margin={"r": 0, "t": 0, "l": 0, "b": 0},
|
| 239 |
)
|
| 240 |
+
st.plotly_chart(fig_map, use_container_width=True)
|
| 241 |
+
st.caption("A dark sequential orange colormap highlights parks with higher program counts while remaining visible against the map background.")
|
| 242 |
+
else:
|
| 243 |
+
st.warning("No geographic coordinates found in dataset. Try bar chart view.")
|
| 244 |
+
else:
|
| 245 |
+
agg = filtered.groupby("park_name").size().reset_index(name="count").sort_values("count", ascending=False)
|
| 246 |
+
top_n = 25
|
| 247 |
+
agg_top = agg.head(top_n)
|
| 248 |
+
|
| 249 |
+
fig_bar = px.bar(
|
| 250 |
+
agg_top,
|
| 251 |
+
x="count",
|
| 252 |
+
y="park_name",
|
| 253 |
+
orientation="h",
|
| 254 |
+
color="count",
|
| 255 |
+
color_continuous_scale="Cividis",
|
| 256 |
+
labels={"count": "Number of programs", "park_name": "Park"},
|
| 257 |
+
height=700,
|
| 258 |
)
|
| 259 |
|
| 260 |
+
st.plotly_chart(fig_bar, use_container_width=True)
|
| 261 |
+
st.caption("A sequential 'Cividis' colormap supports accessibility and clearly shows which parks host more programs.")
|
|
|
|
|
|
|
|
|
|
| 262 |
|
| 263 |
+
if st.checkbox("Show program sample table (first 50 rows)"):
|
| 264 |
+
st.dataframe(filtered.head(50))
|
| 265 |
|
| 266 |
+
# -------------------------
|
| 267 |
+
# CONTEXTUAL VISUALIZATIONS
|
| 268 |
+
# -------------------------
|
| 269 |
+
with side_col:
|
| 270 |
+
st.subheader("Contextual Visual 1 — Activity category breakdown")
|
| 271 |
|
| 272 |
+
cat_counts = df["activity_type_clean"].value_counts().reset_index()
|
| 273 |
+
cat_counts.columns = ["activity_type", "count"]
|
| 274 |
|
| 275 |
+
# Use a categorical palette for categories (better for rubric)
|
| 276 |
+
fig_cat = px.pie(
|
| 277 |
+
cat_counts,
|
| 278 |
+
names="activity_type",
|
| 279 |
+
values="count",
|
| 280 |
+
hole=0.35,
|
| 281 |
+
color_discrete_sequence=px.colors.qualitative.Set3
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 282 |
)
|
| 283 |
+
st.plotly_chart(fig_cat, use_container_width=True)
|
| 284 |
|
| 285 |
+
st.caption("A qualitative color palette is used here so that each activity category is clearly distinguishable for readers.")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 286 |
|
| 287 |
+
# Contextual Visualization 2
|
| 288 |
+
st.subheader("Contextual Visual 2 — Programs by Season")
|
| 289 |
|
| 290 |
+
season_col = None
|
| 291 |
+
for c in df.columns:
|
| 292 |
+
if "season" in c.lower():
|
| 293 |
+
season_col = c
|
| 294 |
+
break
|
| 295 |
|
| 296 |
+
if season_col is None:
|
| 297 |
+
st.error("No season column found.")
|
| 298 |
+
else:
|
| 299 |
+
season_counts = df[season_col].dropna().value_counts().reset_index()
|
| 300 |
+
season_counts.columns = ["Season", "Program Count"]
|
| 301 |
+
|
| 302 |
+
fig_season = px.bar(
|
| 303 |
+
season_counts,
|
| 304 |
+
x="Season",
|
| 305 |
+
y="Program Count",
|
| 306 |
+
color="Program Count",
|
| 307 |
+
color_continuous_scale="Tealgrn",
|
| 308 |
+
text="Program Count",
|
| 309 |
+
title="Number of Programs Offered by Season",
|
| 310 |
+
)
|
| 311 |
+
fig_season.update_traces(textposition="outside")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 312 |
|
| 313 |
+
st.plotly_chart(fig_season, use_container_width=True)
|
| 314 |
+
st.caption("A sequential colormap emphasizes differences in program volume across seasons without overwhelming novice viewers.")
|
| 315 |
|
| 316 |
+
# Data & Notebook Link Section
|
| 317 |
+
st.markdown("---")
|
| 318 |
+
st.subheader("Data & Notebook")
|
| 319 |
+
st.markdown("""
|
| 320 |
+
**Primary dataset:** Chicago Park District Activities — City of Chicago Data Portal
|
| 321 |
+
https://data.cityofchicago.org/Parks-Recreation/Chicago-Park-District-Activities/tn7v-6rnw
|
| 322 |
|
| 323 |
+
Both contextual visualizations (category breakdown and seasonal programs) were also created in our Jupyter Notebook
|
| 324 |
+
as part of our analysis, then migrated here for this public-facing Streamlit app.
|
| 325 |
+
""")
|
| 326 |
|
| 327 |
+
# -------------------------
|
| 328 |
+
# WRITE-UP
|
| 329 |
+
# -------------------------
|
| 330 |
st.markdown("---")
|
| 331 |
+
st.header("What this data story is showing")
|
|
|
|
| 332 |
st.markdown("""
|
| 333 |
+
**1)** Chicago’s parks host a wide range of programs: everything from aquatics and sports to arts, senior programming, and day camps. This app focuses on the *program-level* view: each row in the dataset corresponds to a specific program offering. The central visualization helps users quickly identify which parks are major activity hubs and which types of programs are most common.
|
| 334 |
+
|
| 335 |
+
**2)** Geographic context matters: parks in denser neighborhoods often offer more activities due to available facilities and community demand. The interactive map’s dark sequential colormap lets users see where program density is highest without getting lost in the basemap details. If a park hosts many programs but lacks variety in categories (e.g., arts or cultural programs), this may reveal meaningful gaps in the neighborhood's opportunities.
|
| 336 |
|
| 337 |
+
**3)** We intentionally designed the visualization to be accessible to novice users. Sequential, color-blind–friendly colormaps highlight differences without overwhelming the viewer, while tooltips provide detail only when needed. Filters allow users without technical backgrounds to interactively explore activity type, price, seasonality, or specific parks. This aims to turn public data into approachable insight for residents, researchers, and decision-makers.
|
| 338 |
""")
|
| 339 |
+
|
| 340 |
+
# -------------------------
|
| 341 |
+
# Footer
|
| 342 |
+
# -------------------------
|
| 343 |
+
st.markdown("---")
|
| 344 |
+
st.markdown("**Acknowledgements & citations:** Data retrieved directly from the City of Chicago Data Portal (Socrata API). All visualizations created by the authors.")
|