jkhare2 commited on
Commit
4b239eb
·
verified ·
1 Parent(s): 40f9641

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +72 -125
src/streamlit_app.py CHANGED
@@ -10,7 +10,6 @@ import streamlit as st
10
  import pandas as pd
11
  import numpy as np
12
  import plotly.express as px
13
- from urllib.parse import urlencode
14
 
15
 
16
  st.set_page_config(page_title="Chicago Parks in Motion", layout="wide")
@@ -32,16 +31,10 @@ def load_data():
32
  if "fee" in df.columns:
33
  df["fee"] = pd.to_numeric(df["fee"], errors="coerce")
34
 
35
- # -------------------------
36
  # Extract Latitude / Longitude
37
- # -------------------------
38
- lat_col = None
39
- lon_col = None
40
-
41
  if "location" in df.columns:
42
  def parse_lat_lon(val):
43
- if pd.isna(val):
44
- return (np.nan, np.nan)
45
  sval = str(val)
46
  if sval.startswith("POINT"):
47
  try:
@@ -50,13 +43,6 @@ def load_data():
50
  return lat, lon
51
  except:
52
  return (np.nan, np.nan)
53
- if "latitude" in sval and "longitude" in sval:
54
- try:
55
- import json
56
- j = json.loads(sval)
57
- return float(j.get("latitude", np.nan)), float(j.get("longitude", np.nan))
58
- except:
59
- return (np.nan, np.nan)
60
  import re
61
  nums = re.findall(r"-?\d+\.\d+", sval)
62
  if len(nums) >= 2:
@@ -66,28 +52,8 @@ def load_data():
66
  latlon = df["location"].map(parse_lat_lon)
67
  df["latitude"] = latlon.map(lambda x: x[0])
68
  df["longitude"] = latlon.map(lambda x: x[1])
69
- lat_col, lon_col = "latitude", "longitude"
70
-
71
- if "the_geom" in df.columns and (lat_col is None or lon_col is None):
72
- def parse_the_geom(val):
73
- if pd.isna(val): return (np.nan, np.nan)
74
- sval = str(val)
75
- if "POINT" in sval:
76
- try:
77
- inside = sval.split("(", 1)[1].rstrip(")")
78
- lon, lat = map(float, inside.strip().split())
79
- return lat, lon
80
- except:
81
- return (np.nan, np.nan)
82
- return (np.nan, np.nan)
83
-
84
- latlon = df["the_geom"].map(parse_the_geom)
85
- df["latitude"] = latlon.map(lambda x: x[0])
86
- df["longitude"] = latlon.map(lambda x: x[1])
87
 
88
- # -------------------------
89
- # Clean categorical fields
90
- # -------------------------
91
  if "activity_type" in df.columns:
92
  df["activity_type_clean"] = df["activity_type"].str.title().fillna("Unknown")
93
  elif "program_type" in df.columns:
@@ -97,32 +63,16 @@ def load_data():
97
  else:
98
  df["activity_type_clean"] = "Unknown"
99
 
100
- # -------------------------
101
- # Park Name extraction
102
- # -------------------------
103
- possible_park_cols = [
104
- "park_name",
105
- "park",
106
- "location_facility",
107
- "location_name",
108
- "location",
109
- "site_name"
110
- ]
111
-
112
- park_col = None
113
- for col in possible_park_cols:
114
- if col in df.columns:
115
- park_col = col
116
- break
117
 
118
  if park_col is not None:
119
  df["park_name"] = df[park_col].astype(str).replace(["", "nan", "None"], "Unknown Park")
120
  else:
121
  df["park_name"] = "Unknown Park"
122
 
123
- # -------------------------
124
- # Season extraction
125
- # -------------------------
126
  if "start_date" in df.columns:
127
  df["start_date"] = pd.to_datetime(df["start_date"], errors="coerce")
128
 
@@ -140,44 +90,42 @@ def load_data():
140
 
141
  return df
142
 
 
143
  df = load_data()
144
 
145
  # -------------------------
146
- # Page header
147
  # -------------------------
148
  st.title("Chicago Parks in Motion: How Our City Plays")
149
- st.markdown("**Author:** Juhi Khare (jkhare2), Alisha Rawat (alishar4), Sutthana Koo-Anupong (sk188)")
150
 
151
  st.markdown("""
152
- **Central Visualization:**
153
- Our main interactive map and bar chart (below) serve as the central visualization of this project.
154
- They were first prototyped and tested in our associated Jupyter Notebook before being migrated to this Streamlit interface.
155
 
156
- We intentionally use large, clear layouts and high-contrast colors so that novice readers can explore the data without prior experience.
157
  """)
158
 
159
  # -------------------------
160
  # Sidebar filters
161
  # -------------------------
162
- st.sidebar.header("Filters & Settings")
163
 
164
  categories = sorted(df["activity_type_clean"].dropna().unique())
165
- categories = [c for c in categories if c != "nan"]
166
  chosen_category = st.sidebar.selectbox("Activity category", ["All"] + categories)
167
 
168
  seasons = sorted(df["season"].dropna().unique())
169
  chosen_season = st.sidebar.selectbox("Season", ["All"] + seasons)
170
 
171
- has_fee_col = "fee" in df.columns
172
- if has_fee_col:
173
  max_fee = float(np.nanmax(df["fee"].fillna(0)))
174
- fee_limit = st.sidebar.slider("Maximum fee (USD)", 0.0, max(1.0, max_fee), float(max_fee))
175
  else:
176
  fee_limit = None
177
 
178
- park_query = st.sidebar.text_input("Search park name (partial)")
179
 
180
- # Apply filters
181
  filtered = df.copy()
182
  if chosen_category != "All":
183
  filtered = filtered[filtered["activity_type_clean"] == chosen_category]
@@ -188,11 +136,8 @@ if fee_limit is not None:
188
  if park_query:
189
  filtered = filtered[filtered["park_name"].str.contains(park_query, case=False, na=False)]
190
 
191
- st.sidebar.markdown(f"**Programs in current filter:** {len(filtered):,}")
192
- st.sidebar.markdown("""
193
- Filters improve accessibility by helping users explore small slices of data
194
- without needing technical skills or scrolling through thousands of rows.
195
- """)
196
 
197
  # -------------------------
198
  # Layout
@@ -205,77 +150,69 @@ main_col, side_col = st.columns((2, 1))
205
  with main_col:
206
  st.subheader("Central Interactive Visualization — Programs by Park")
207
 
208
- view_type = st.radio("View type", ["Map (recommended)", "Bar chart (count by park)"], horizontal=True)
209
 
210
  if view_type.startswith("Map"):
211
- if "latitude" in filtered.columns and "longitude" in filtered.columns and filtered[["latitude", "longitude"]].dropna().shape[0] > 0:
212
 
213
- agg = filtered.groupby(["park_name", "latitude", "longitude"], dropna=True).size().reset_index(name="count")
214
 
215
- # ★★★ NEW COLORFUL, CLEAR BUBBLE MAP ★★★
216
  fig_map = px.scatter_mapbox(
217
  agg,
218
  lat="latitude",
219
  lon="longitude",
220
  size="count",
221
- size_max=28,
222
  hover_name="park_name",
223
  hover_data={"count": True},
224
  color="count",
225
- color_continuous_scale=["#FFE5CC", "#FF7F0E"], # bright orange sequential colormap
226
  zoom=10,
227
  height=600,
228
  )
229
 
230
- fig_map.update_traces(
231
- marker=dict(
232
- opacity=0.90,
233
- line=dict(width=0.7, color="#303030") # thin grey outline for contrast
234
- )
235
- )
236
 
237
- fig_map.update_layout(
238
- mapbox_style="open-street-map",
239
- margin={"r": 0, "t": 0, "l": 0, "b": 0},
240
- )
241
 
242
  st.plotly_chart(fig_map, use_container_width=True)
243
- st.caption("Sequential orange colormap chosen intentionally to highlight program density while ensuring visibility on OpenStreetMap backgrounds.")
 
244
  else:
245
- st.warning("No geographic coordinates found. Try using the bar chart view instead.")
246
 
247
  else:
248
  agg = filtered.groupby("park_name").size().reset_index(name="count").sort_values("count", ascending=False)
249
- top_n = 25
250
- agg_top = agg.head(top_n)
251
 
252
  fig_bar = px.bar(
253
- agg_top,
254
  x="count",
255
  y="park_name",
256
  orientation="h",
257
  color="count",
258
- color_continuous_scale="Blues", # sequential colormap requirement
259
- labels={"count": "Number of programs", "park_name": "Park"},
260
- height=700,
261
  )
262
 
263
- fig_bar.update_layout(
264
- yaxis={'categoryorder': 'total ascending'},
265
- margin={"r": 20, "t": 10, "l": 200, "b": 10},
266
- )
267
  st.plotly_chart(fig_bar, use_container_width=True)
268
- st.caption("Blues sequential colormap used to reinforce magnitude patterns.")
269
 
270
- if st.checkbox("Show program sample table (first 50 rows)"):
271
  st.dataframe(filtered.head(50))
272
 
273
-
274
  # -------------------------
275
  # CONTEXTUAL VISUALIZATIONS
276
  # -------------------------
277
  with side_col:
278
- st.subheader("Contextual Visual 1 — Activity Category Breakdown")
 
279
  cat_counts = df["activity_type_clean"].value_counts().reset_index()
280
  cat_counts.columns = ["activity_type", "count"]
281
 
@@ -285,16 +222,18 @@ with side_col:
285
  values="count",
286
  hole=0.35,
287
  height=300,
288
- color_discrete_sequence=px.colors.qualitative.Set3 # chosen for categorical contrast
289
  )
 
290
  st.plotly_chart(fig_cat, use_container_width=True)
291
  st.caption("""
292
- This visualization appears both in our Streamlit App and in our Jupyter Notebook.
293
- We chose a categorical palette to clearly differentiate activity types for readers.
294
  """)
295
 
296
- st.subheader("Contextual Visual 2 — Programs by Season")
297
- season_counts = df["season"].dropna().value_counts().reset_index()
 
298
  season_counts.columns = ["Season", "Program Count"]
299
 
300
  fig_season = px.bar(
@@ -303,27 +242,28 @@ with side_col:
303
  y="Program Count",
304
  color="Program Count",
305
  color_continuous_scale="Viridis",
306
- title="Number of Programs Offered by Season",
307
  text="Program Count",
 
308
  )
309
- fig_season.update_traces(textposition="outside")
310
- fig_season.update_layout(height=500)
311
 
 
312
  st.plotly_chart(fig_season, use_container_width=True)
 
313
  st.caption("""
314
- This contextual visualization also appears in our notebook.
315
- A sequential 'Viridis' colormap is used to ensure clear readability and accessibility.
316
  """)
317
 
318
  st.markdown("---")
319
- st.subheader("Data & Notebook")
 
320
  st.markdown("""
321
  **Primary dataset:**
322
  Chicago Park District Activities — City of Chicago Data Portal
323
  https://data.cityofchicago.org/Parks-Recreation/Chicago-Park-District-Activities/tn7v-6rnw
324
 
325
- All contextual visualizations and cleaning steps were first created in our Jupyter Notebook
326
- and then migrated to this Streamlit app for public presentation.
327
  """)
328
 
329
  # -------------------------
@@ -333,11 +273,18 @@ st.markdown("---")
333
  st.header("What this data story is showing")
334
 
335
  st.markdown("""
336
- **1)** Chicago’s parks host a wide variety of recreational programs, including aquatics, sports, arts, and senior programming. Each row of the dataset represents a specific program offering. Our central visualization makes it easy to see which parks host the most activities and how program density varies across the city.
337
-
338
- **2)** The map visualization highlights geographic patterns, drawing attention to parks in denser neighborhoods where program availability tends to be higher. The use of sequential colormaps helps novice users differentiate high-activity areas without needing to interpret complex scales. If a park has many programs but lacks certain categories (e.g., cultural programs), this may indicate an opportunity for expanded community support.
339
-
340
- **3)** Access and equity are key themes. Filters allow users to explore free or low-cost programs, seasonal availability, and offerings in specific neighborhoods. This design choice improves accessibility, both in visual clarity and in helping users navigate a complex public dataset without technical expertise. The intention is to turn raw civic data into an approachable tool for residents, researchers, and policymakers.
 
 
 
 
 
 
 
341
  """)
342
 
343
  # -------------------------
@@ -347,5 +294,5 @@ st.markdown("---")
347
  st.markdown("""
348
  **Acknowledgements & Citations:**
349
  City of Chicago Data Portal — Chicago Park District Activities.
350
- Visualizations created using Plotly and Streamlit.
351
  """)
 
10
  import pandas as pd
11
  import numpy as np
12
  import plotly.express as px
 
13
 
14
 
15
  st.set_page_config(page_title="Chicago Parks in Motion", layout="wide")
 
31
  if "fee" in df.columns:
32
  df["fee"] = pd.to_numeric(df["fee"], errors="coerce")
33
 
 
34
  # Extract Latitude / Longitude
 
 
 
 
35
  if "location" in df.columns:
36
  def parse_lat_lon(val):
37
+ if pd.isna(val): return (np.nan, np.nan)
 
38
  sval = str(val)
39
  if sval.startswith("POINT"):
40
  try:
 
43
  return lat, lon
44
  except:
45
  return (np.nan, np.nan)
 
 
 
 
 
 
 
46
  import re
47
  nums = re.findall(r"-?\d+\.\d+", sval)
48
  if len(nums) >= 2:
 
52
  latlon = df["location"].map(parse_lat_lon)
53
  df["latitude"] = latlon.map(lambda x: x[0])
54
  df["longitude"] = latlon.map(lambda x: x[1])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
+ # Activity category
 
 
57
  if "activity_type" in df.columns:
58
  df["activity_type_clean"] = df["activity_type"].str.title().fillna("Unknown")
59
  elif "program_type" in df.columns:
 
63
  else:
64
  df["activity_type_clean"] = "Unknown"
65
 
66
+ # Park name extraction
67
+ possible_park_cols = ["park_name", "park", "location_facility", "location_name", "location", "site_name"]
68
+ park_col = next((col for col in possible_park_cols if col in df.columns), None)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
  if park_col is not None:
71
  df["park_name"] = df[park_col].astype(str).replace(["", "nan", "None"], "Unknown Park")
72
  else:
73
  df["park_name"] = "Unknown Park"
74
 
75
+ # Season
 
 
76
  if "start_date" in df.columns:
77
  df["start_date"] = pd.to_datetime(df["start_date"], errors="coerce")
78
 
 
90
 
91
  return df
92
 
93
+
94
  df = load_data()
95
 
96
  # -------------------------
97
+ # Header / Intro
98
  # -------------------------
99
  st.title("Chicago Parks in Motion: How Our City Plays")
100
+ st.markdown("**Authors:** Juhi Khare Alisha Rawat Sutthana Koo-Anupong")
101
 
102
  st.markdown("""
103
+ ### Central Visualization
104
+ Our main interactive map and bar chart (below) serve as the **central visualization** for this data-journalism-style article.
105
+ These were first prototyped in our Jupyter Notebook before being migrated and refined inside Streamlit.
106
 
107
+ We use large, high-contrast visuals, sequential colormaps, and clear explanatory text to ensure that the app remains accessible to novice viewers.
108
  """)
109
 
110
  # -------------------------
111
  # Sidebar filters
112
  # -------------------------
113
+ st.sidebar.header("Filters")
114
 
115
  categories = sorted(df["activity_type_clean"].dropna().unique())
 
116
  chosen_category = st.sidebar.selectbox("Activity category", ["All"] + categories)
117
 
118
  seasons = sorted(df["season"].dropna().unique())
119
  chosen_season = st.sidebar.selectbox("Season", ["All"] + seasons)
120
 
121
+ if "fee" in df.columns:
 
122
  max_fee = float(np.nanmax(df["fee"].fillna(0)))
123
+ fee_limit = st.sidebar.slider("Maximum fee (USD)", 0.0, max_fee, max_fee)
124
  else:
125
  fee_limit = None
126
 
127
+ park_query = st.sidebar.text_input("Search park name (partial match)")
128
 
 
129
  filtered = df.copy()
130
  if chosen_category != "All":
131
  filtered = filtered[filtered["activity_type_clean"] == chosen_category]
 
136
  if park_query:
137
  filtered = filtered[filtered["park_name"].str.contains(park_query, case=False, na=False)]
138
 
139
+ st.sidebar.markdown(f"**Programs shown:** {len(filtered):,}")
140
+ st.sidebar.caption("Filters improve accessibility for non-technical readers by letting them explore only the parts of the dataset they care about.")
 
 
 
141
 
142
  # -------------------------
143
  # Layout
 
150
  with main_col:
151
  st.subheader("Central Interactive Visualization — Programs by Park")
152
 
153
+ view_type = st.radio("Choose view", ["Map (recommended)", "Bar chart (top parks)"], horizontal=True)
154
 
155
  if view_type.startswith("Map"):
156
+ if "latitude" in filtered and "longitude" in filtered and filtered[["latitude","longitude"]].dropna().shape[0] > 0:
157
 
158
+ agg = filtered.groupby(["park_name", "latitude", "longitude"]).size().reset_index(name="count")
159
 
160
+ # HIGH-VISIBILITY BUBBLE MAP (NO WHITE, NO ERRORS)
161
  fig_map = px.scatter_mapbox(
162
  agg,
163
  lat="latitude",
164
  lon="longitude",
165
  size="count",
166
+ size_max=30,
167
  hover_name="park_name",
168
  hover_data={"count": True},
169
  color="count",
170
+ color_continuous_scale=["#FFE5CC", "#FF7F0E"], # sequential orange (rubric requirement)
171
  zoom=10,
172
  height=600,
173
  )
174
 
175
+ fig_map.update_traces(marker=dict(opacity=0.92, sizemode="area"))
 
 
 
 
 
176
 
177
+ fig_map.update_layout(mapbox_style="open-street-map",
178
+ margin={"r":0,"t":0,"l":0,"b":0})
 
 
179
 
180
  st.plotly_chart(fig_map, use_container_width=True)
181
+ st.caption("Sequential orange colormap chosen to maximize visibility against OpenStreetMap backgrounds.")
182
+
183
  else:
184
+ st.warning("No geographic coordinates available in this dataset.")
185
 
186
  else:
187
  agg = filtered.groupby("park_name").size().reset_index(name="count").sort_values("count", ascending=False)
188
+ top_n = agg.head(25)
 
189
 
190
  fig_bar = px.bar(
191
+ top_n,
192
  x="count",
193
  y="park_name",
194
  orientation="h",
195
  color="count",
196
+ color_continuous_scale="Blues",
197
+ labels={"count":"Program Count","park_name":"Park"},
198
+ height=700
199
  )
200
 
201
+ fig_bar.update_layout(yaxis={'categoryorder':'total ascending'},
202
+ margin={"r":20,"t":10,"l":200,"b":10})
203
+
 
204
  st.plotly_chart(fig_bar, use_container_width=True)
205
+ st.caption("Blues sequential colormap used to emphasize differences in program volume.")
206
 
207
+ if st.checkbox("Show a small sample of the filtered table"):
208
  st.dataframe(filtered.head(50))
209
 
 
210
  # -------------------------
211
  # CONTEXTUAL VISUALIZATIONS
212
  # -------------------------
213
  with side_col:
214
+ st.subheader("Contextual Visual 1 — Activity Categories")
215
+
216
  cat_counts = df["activity_type_clean"].value_counts().reset_index()
217
  cat_counts.columns = ["activity_type", "count"]
218
 
 
222
  values="count",
223
  hole=0.35,
224
  height=300,
225
+ color_discrete_sequence=px.colors.qualitative.Set3
226
  )
227
+
228
  st.plotly_chart(fig_cat, use_container_width=True)
229
  st.caption("""
230
+ This contextual visualization also appears in our Jupyter Notebook.
231
+ A categorical palette (Set3) is used to ensure distinct, accessible color differences.
232
  """)
233
 
234
+ st.subheader("Contextual Visual 2 — Seasonal Patterns")
235
+
236
+ season_counts = df["season"].value_counts().reset_index()
237
  season_counts.columns = ["Season", "Program Count"]
238
 
239
  fig_season = px.bar(
 
242
  y="Program Count",
243
  color="Program Count",
244
  color_continuous_scale="Viridis",
 
245
  text="Program Count",
246
+ height=500
247
  )
 
 
248
 
249
+ fig_season.update_traces(textposition="outside")
250
  st.plotly_chart(fig_season, use_container_width=True)
251
+
252
  st.caption("""
253
+ This visualization is also included in our Notebook.
254
+ A sequential 'Viridis' scale was chosen for accessibility and clear magnitude comparison.
255
  """)
256
 
257
  st.markdown("---")
258
+ st.subheader("Dataset & Notebook")
259
+
260
  st.markdown("""
261
  **Primary dataset:**
262
  Chicago Park District Activities — City of Chicago Data Portal
263
  https://data.cityofchicago.org/Parks-Recreation/Chicago-Park-District-Activities/tn7v-6rnw
264
 
265
+ All contextual visualizations and preprocessing steps were first implemented in our
266
+ **Python Jupyter Notebook**, then migrated to this Streamlit app for public communication.
267
  """)
268
 
269
  # -------------------------
 
273
  st.header("What this data story is showing")
274
 
275
  st.markdown("""
276
+ **1)** Chicago’s parks host thousands of programs that range from sports and aquatics to day camps and senior activities.
277
+ Each row represents a specific program offering. Our central visualization allows readers to immediately see where the
278
+ city’s recreational “hotspots” are located and which parks offer the highest variety or volume of programs.
279
+
280
+ **2)** Geographic and seasonal context help uncover patterns. Some neighborhoods particularly those with larger
281
+ parks — have significantly more offerings. The map’s bright orange sequential colormap was chosen intentionally to help
282
+ novice viewers understand density without needing technical expertise. If a park has high overall activity but few
283
+ programs in certain categories, this may signal unmet community needs.
284
+
285
+ **3)** Accessibility and equity are major themes. Filters let readers explore affordability (via fee limits),
286
+ seasonal schedules, and specific types of programs. This design approach transforms a large, raw civic dataset
287
+ into an accessible storytelling tool for residents, city planners, and researchers alike.
288
  """)
289
 
290
  # -------------------------
 
294
  st.markdown("""
295
  **Acknowledgements & Citations:**
296
  City of Chicago Data Portal — Chicago Park District Activities.
297
+ Visualizations built with Streamlit and Plotly.
298
  """)