jkhare2 commited on
Commit
20dc96d
·
verified ·
1 Parent(s): 068ea38

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +218 -138
src/streamlit_app.py CHANGED
@@ -11,11 +11,15 @@ import pandas as pd
11
  import numpy as np
12
  import plotly.express as px
13
 
 
 
 
 
14
  st.set_page_config(page_title="Chicago Parks in Motion", layout="wide")
15
 
16
- # ---------------------------------------------------
17
- # LOAD & CLEAN DATA
18
- # ---------------------------------------------------
19
  @st.cache_data(ttl=3600)
20
  def load_data():
21
  url = "https://data.cityofchicago.org/resource/tn7v-6rnw.csv"
@@ -28,95 +32,103 @@ def load_data():
28
  if "fee" in df.columns:
29
  df["fee"] = pd.to_numeric(df["fee"], errors="coerce")
30
 
31
- # Extract lat/lon
32
- if "location" in df.columns:
33
- def parse_location(val):
34
- if pd.isna(val):
35
- return (np.nan, np.nan)
36
- sval = str(val)
37
- if sval.startswith("POINT"):
38
- try:
39
- inside = sval.split("(",1)[1].rstrip(")")
40
- lon, lat = map(float, inside.split())
41
- return lat, lon
42
- except:
43
- return (np.nan, np.nan)
44
  return (np.nan, np.nan)
45
- latlon = df["location"].map(parse_location)
46
- df["latitude"] = latlon.map(lambda x: x[0])
47
- df["longitude"] = latlon.map(lambda x: x[1])
 
 
 
 
 
 
 
 
 
 
48
 
49
- # Dates
50
- for c in ["start_date", "end_date"]:
51
- if c in df.columns:
52
- df[c] = pd.to_datetime(df[c], errors="ignore")
 
 
 
 
 
53
 
54
- # Activity type
55
  if "activity_type" in df.columns:
56
  df["activity_type_clean"] = df["activity_type"].str.title().fillna("Unknown")
57
- elif "program_type" in df.columns:
58
- df["activity_type_clean"] = df["program_type"].str.title().fillna("Unknown")
59
  else:
60
  df["activity_type_clean"] = "Unknown"
61
 
62
- # Park names
63
- for col in ["park_name", "park", "site_name", "location_name"]:
 
 
 
 
64
  if col in df.columns:
65
- df["park_name"] = df[col].fillna("Unknown Park")
66
  break
67
- if "park_name" not in df.columns:
68
- df["park_name"] = "Unknown Park"
69
-
70
- # Derive season
71
- def season_from_date(dt):
72
- if isinstance(dt, str) or pd.isna(dt):
73
- try:
74
- dt = pd.to_datetime(dt)
75
- except:
76
- return "Unknown"
77
- m = dt.month
78
- if m in (12, 1, 2): return "Winter"
79
- if m in (3, 4, 5): return "Spring"
80
- if m in (6, 7, 8): return "Summer"
81
- return "Fall"
82
-
83
- if "start_date" in df.columns:
84
- df["season"] = df["start_date"].map(season_from_date)
85
  else:
86
- df["season"] = "Unknown"
87
 
88
  return df
89
 
 
90
  df = load_data()
91
 
92
- # ---------------------------------------------------
93
- # PAGE TITLE
94
- # ---------------------------------------------------
95
  st.title("Chicago Parks in Motion: How Our City Plays")
96
  st.markdown("**Authors:** Juhi Khare (jkhare2), Alisha Rawat (alishar4), Sutthana Koo-Anupong (sk188)")
97
- st.info("This is our **central public-facing visualization project**, created for a data journalism assignment.")
98
 
99
- # ---------------------------------------------------
100
- # FILTERS
101
- # ---------------------------------------------------
102
  st.sidebar.header("Filters")
103
 
 
104
  categories = sorted(df["activity_type_clean"].unique())
105
  chosen_category = st.sidebar.selectbox("Activity Category", ["All"] + categories)
106
 
 
 
 
 
 
 
 
 
 
 
107
  seasons = sorted(df["season"].unique())
108
  chosen_season = st.sidebar.selectbox("Season", ["All"] + seasons)
109
 
 
110
  if "fee" in df.columns:
111
- fee_limit = st.sidebar.slider("Maximum Fee (USD)", 0.0, float(df["fee"].fillna(0).max()), float(df["fee"].fillna(0).max()))
 
112
  else:
113
  fee_limit = None
114
 
115
- search_park = st.sidebar.text_input("Search Park Name")
 
116
 
117
- st.sidebar.caption("Filters help readers explore the data easily without technical skills.")
 
 
118
 
119
- # Filter logic
 
 
120
  filtered = df.copy()
121
  if chosen_category != "All":
122
  filtered = filtered[filtered["activity_type_clean"] == chosen_category]
@@ -124,119 +136,187 @@ if chosen_season != "All":
124
  filtered = filtered[filtered["season"] == chosen_season]
125
  if fee_limit is not None:
126
  filtered = filtered[filtered["fee"].fillna(0) <= fee_limit]
127
- if search_park:
128
- filtered = filtered[filtered["park_name"].str.contains(search_park, case=False, na=False)]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
 
130
- st.sidebar.markdown(f"**Programs Shown:** {len(filtered):,}")
131
-
132
- # ---------------------------------------------------
133
- # CENTRAL VISUALIZATION (MAP)
134
- # ---------------------------------------------------
135
- st.subheader("Central Visualization — Programs by Park (Interactive Map)")
136
-
137
- if filtered[["latitude","longitude"]].dropna().shape[0] > 0:
138
- agg = filtered.groupby(["park_name","latitude","longitude"]).size().reset_index(name="count")
139
 
140
- fig_map = px.scatter_mapbox(
141
- agg,
142
- lat="latitude",
143
- lon="longitude",
144
- size="count",
145
- size_max=30,
 
 
 
 
 
 
 
 
 
 
146
  color="count",
147
  color_continuous_scale="Cividis",
148
- zoom=10,
149
- height=550
 
 
 
 
 
 
150
  )
151
- fig_map.update_layout(mapbox_style="open-street-map")
152
- st.plotly_chart(fig_map, use_container_width=True)
153
-
154
- st.write("""
155
- **Explanation:**
156
- This map shows which parks offer the most programs. Larger and darker circles represent parks with more activities.
157
- The Cividis color scale makes it easy for all viewers—including those with color-vision differences—to understand intensity.
158
- You can hover over any park to see its program count and use the filters to explore activity types, seasons, or fees.
159
- """)
160
- else:
161
- st.warning("No geographic coordinates available for mapping.")
162
 
163
- # ---------------------------------------------------
164
- # CONTEXTUAL VIS 1 — CATEGORY PIE CHART
165
- # ---------------------------------------------------
166
- st.subheader("Contextual Visualization 1 — Activity Category Breakdown")
167
 
168
- cat_counts = df["activity_type_clean"].value_counts().reset_index()
169
- cat_counts.columns = ["Category","Count"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
 
171
  fig_cat = px.pie(
172
  cat_counts,
173
  names="Category",
174
  values="Count",
175
- color_discrete_sequence=px.colors.sequential.Cividis
 
176
  )
 
177
  st.plotly_chart(fig_cat, use_container_width=True)
178
 
179
- st.write("""
180
- **Explanation:**
181
- This pie chart shows the distribution of all activity categories across Chicago parks.
182
- Some categories—like sports and youth programs—appear more often, while others are limited.
183
- Using the same Cividis-inspired palette keeps the visuals consistent for the public.
184
- """)
185
 
186
- # ---------------------------------------------------
187
- # CONTEXTUAL VIS 2 PROGRAMS BY SEASON
188
- # ---------------------------------------------------
189
- st.subheader("Contextual Visualization 2 — Programs by Season")
190
 
191
- season_counts = df["season"].value_counts().reset_index()
192
- season_counts.columns = ["Season","Program Count"]
 
 
 
 
193
 
194
  fig_season = px.bar(
195
  season_counts,
196
  x="Season",
197
  y="Program Count",
198
- text="Program Count",
199
  color="Program Count",
200
  color_continuous_scale="Cividis",
201
- height=450
 
202
  )
 
203
  fig_season.update_traces(textposition="outside")
 
204
  st.plotly_chart(fig_season, use_container_width=True)
205
 
206
- st.write("""
207
- **Explanation:**
208
- This chart shows how program offerings change by season. Summer and fall tend to have more activities,
209
- while winter shows fewer options. The color scale helps highlight the variation clearly and keeps the look consistent.
210
- """)
211
 
212
- # ---------------------------------------------------
213
- # FULL STORY SECTION (YOUR EXACT PARAGRAPHS)
214
- # ---------------------------------------------------
215
  st.markdown("---")
216
- st.header("What This Data Story Is Showing")
217
 
218
  st.markdown("""
219
- Chicago’s parks offer many kinds of activities for people of all ages. These include sports, arts, fitness classes, youth programs, and seasonal events.
220
- Each row in this dataset represents one program offered at a park. Our main interactive map helps readers quickly see which parks offer the most activities.
221
- Bigger or darker circles show parks with more programs, making it easy to spot busy parks versus quieter ones.
222
-
223
- Where a park is located also matters. Neighborhoods that are larger or more central usually have more programs because they have more space, more facilities,
224
- and more visitors. With the filters on the left, anyone can explore the data by season, activity type, price, or park name.
225
- This makes the information easy to use even for someone with no data experience. For example, you can look for free programs,
226
- summer-only programs, or activities at a specific park in your neighborhood.
227
-
228
- This project also highlights questions about access and opportunities. Some parks offer a wide range of programs, while others have fewer options or mostly offer only one type of activity.
229
- By looking at categories, seasons, and fees, readers can start to see patterns in which communities have more choices and which ones may need more support.
230
- Our goal is to turn public data into something simple and useful, so Chicago residents and decision-makers can better understand how parks are serving their communities.
231
  """)
232
 
233
- # ---------------------------------------------------
234
- # CITATIONS
235
- # ---------------------------------------------------
236
  st.markdown("---")
237
- st.subheader("Citations & Data Sources")
 
238
  st.markdown("""
239
- - Chicago Park District Activities — City of Chicago Data Portal
240
- https://data.cityofchicago.org/Parks-Recreation/Chicago-Park-District-Activities/tn7v-6rnw
241
- - All visualizations created by the authors using Streamlit & Plotly.
 
 
242
  """)
 
11
  import numpy as np
12
  import plotly.express as px
13
 
14
+
15
+ # ------------------------------------------------
16
+ # Page configuration
17
+ # ------------------------------------------------
18
  st.set_page_config(page_title="Chicago Parks in Motion", layout="wide")
19
 
20
+ # ------------------------------------------------
21
+ # Load Data
22
+ # ------------------------------------------------
23
  @st.cache_data(ttl=3600)
24
  def load_data():
25
  url = "https://data.cityofchicago.org/resource/tn7v-6rnw.csv"
 
32
  if "fee" in df.columns:
33
  df["fee"] = pd.to_numeric(df["fee"], errors="coerce")
34
 
35
+ # Extract coordinates
36
+ def extract_latlon(val):
37
+ if pd.isna(val):
 
 
 
 
 
 
 
 
 
 
38
  return (np.nan, np.nan)
39
+ s = str(val)
40
+ if s.startswith("POINT"):
41
+ try:
42
+ inside = s.split("(")[1].rstrip(")")
43
+ lon, lat = map(float, inside.strip().split())
44
+ return lat, lon
45
+ except:
46
+ return (np.nan, np.nan)
47
+ import re
48
+ nums = re.findall(r"-?\d+\.\d+", s)
49
+ if len(nums) >= 2:
50
+ return float(nums[0]), float(nums[1])
51
+ return (np.nan, np.nan)
52
 
53
+ if "location" in df.columns:
54
+ coords = df["location"].map(extract_latlon)
55
+ df["latitude"] = coords.map(lambda x: x[0])
56
+ df["longitude"] = coords.map(lambda x: x[1])
57
+
58
+ # Parse dates
59
+ for col in ["start_date", "end_date"]:
60
+ if col in df.columns:
61
+ df[col] = pd.to_datetime(df[col], errors="coerce")
62
 
63
+ # Clean activity type
64
  if "activity_type" in df.columns:
65
  df["activity_type_clean"] = df["activity_type"].str.title().fillna("Unknown")
 
 
66
  else:
67
  df["activity_type_clean"] = "Unknown"
68
 
69
+ # Park name
70
+ possible_park_cols = [
71
+ "park_name", "park", "location_facility",
72
+ "location_name", "location", "site_name"
73
+ ]
74
+ for col in possible_park_cols:
75
  if col in df.columns:
76
+ df["park_name"] = df[col].astype(str).replace(["", "nan", "None"], "Unknown Park")
77
  break
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  else:
79
+ df["park_name"] = "Unknown Park"
80
 
81
  return df
82
 
83
+
84
  df = load_data()
85
 
86
+ # ------------------------------------------------
87
+ # Title + intro
88
+ # ------------------------------------------------
89
  st.title("Chicago Parks in Motion: How Our City Plays")
90
  st.markdown("**Authors:** Juhi Khare (jkhare2), Alisha Rawat (alishar4), Sutthana Koo-Anupong (sk188)")
91
+ st.info("**Central visualization below:** Explore how program availability varies across Chicago’s parks. This is the main interactive visualization required by the rubric.")
92
 
93
+ # ------------------------------------------------
94
+ # Sidebar Filters
95
+ # ------------------------------------------------
96
  st.sidebar.header("Filters")
97
 
98
+ # Activity type
99
  categories = sorted(df["activity_type_clean"].unique())
100
  chosen_category = st.sidebar.selectbox("Activity Category", ["All"] + categories)
101
 
102
+ # Season
103
+ def season_from_date(dt):
104
+ if pd.isna(dt): return "Unknown"
105
+ m = dt.month
106
+ if m in [12,1,2]: return "Winter"
107
+ if m in [3,4,5]: return "Spring"
108
+ if m in [6,7,8]: return "Summer"
109
+ return "Fall"
110
+
111
+ df["season"] = df["start_date"].map(season_from_date)
112
  seasons = sorted(df["season"].unique())
113
  chosen_season = st.sidebar.selectbox("Season", ["All"] + seasons)
114
 
115
+ # Price filter
116
  if "fee" in df.columns:
117
+ max_fee = float(df["fee"].fillna(0).max())
118
+ fee_limit = st.sidebar.slider("Maximum Fee (USD)", 0.0, max_fee, max_fee)
119
  else:
120
  fee_limit = None
121
 
122
+ # Park name search
123
+ park_query = st.sidebar.text_input("Search Park Name")
124
 
125
+ st.sidebar.caption(
126
+ "Filters help all users — including novices — explore the dataset without needing technical skills."
127
+ )
128
 
129
+ # ------------------------------------------------
130
+ # Apply Filters
131
+ # ------------------------------------------------
132
  filtered = df.copy()
133
  if chosen_category != "All":
134
  filtered = filtered[filtered["activity_type_clean"] == chosen_category]
 
136
  filtered = filtered[filtered["season"] == chosen_season]
137
  if fee_limit is not None:
138
  filtered = filtered[filtered["fee"].fillna(0) <= fee_limit]
139
+ if park_query:
140
+ filtered = filtered[filtered["park_name"].str.contains(park_query, case=False, na=False)]
141
+
142
+ st.sidebar.markdown(f"**Programs shown:** {len(filtered):,}")
143
+
144
+ # ------------------------------------------------
145
+ # CENTRAL VISUALIZATION — MAP / BAR
146
+ # ------------------------------------------------
147
+ st.header("Central Interactive Visualization — Programs by Park")
148
+
149
+ view_type = st.radio("Choose View", ["Map (recommended)", "Bar Chart"], horizontal=True)
150
+
151
+ # MAP VIEW
152
+ if view_type == "Map (recommended)":
153
+ if (
154
+ "latitude" in filtered.columns and
155
+ "longitude" in filtered.columns and
156
+ filtered[["latitude", "longitude"]].dropna().shape[0] > 0
157
+ ):
158
+ agg = (
159
+ filtered.groupby(["park_name", "latitude", "longitude"], dropna=True)
160
+ .size()
161
+ .reset_index(name="count")
162
+ )
163
+
164
+ fig_map = px.scatter_mapbox(
165
+ agg,
166
+ lat="latitude",
167
+ lon="longitude",
168
+ size="count",
169
+ size_max=32,
170
+ color="count",
171
+ color_continuous_scale="Cividis", # unified scheme
172
+ zoom=10,
173
+ height=550,
174
+ )
175
+
176
+ fig_map.update_traces(
177
+ marker=dict(
178
+ opacity=0.85,
179
+ line=dict(width=1.2, color="black") # helps contrast
180
+ )
181
+ )
182
+
183
+ fig_map.update_layout(
184
+ mapbox_style="carto-positron", # high visibility
185
+ margin=dict(r=0, t=0, l=0, b=0),
186
+ paper_bgcolor="white"
187
+ )
188
+
189
+ st.plotly_chart(fig_map, use_container_width=True)
190
+
191
+ st.markdown(
192
+ "This map shows which parks offer the most programs. "
193
+ "Darker and larger circles represent parks with more activities. "
194
+ "Using a dark Cividis color scale ensures the bubbles stand out clearly on the light basemap."
195
+ )
196
 
197
+ else:
198
+ st.warning("No map coordinates available. Switch to Bar Chart view.")
 
 
 
 
 
 
 
199
 
200
+ # BAR CHART VIEW
201
+ else:
202
+ agg = (
203
+ filtered.groupby("park_name")
204
+ .size()
205
+ .reset_index(name="count")
206
+ .sort_values("count", ascending=False)
207
+ )
208
+ top_n = 25
209
+ agg_top = agg.head(top_n)
210
+
211
+ fig_bar = px.bar(
212
+ agg_top,
213
+ x="count",
214
+ y="park_name",
215
+ orientation="h",
216
  color="count",
217
  color_continuous_scale="Cividis",
218
+ height=650,
219
+ labels={"count": "Number of Programs", "park_name": "Park"}
220
+ )
221
+
222
+ fig_bar.update_traces(
223
+ marker=dict(line=dict(width=0.7, color="black")),
224
+ text=agg_top["count"],
225
+ textposition="outside"
226
  )
 
 
 
 
 
 
 
 
 
 
 
227
 
228
+ st.plotly_chart(fig_bar, use_container_width=True)
 
 
 
229
 
230
+ st.markdown(
231
+ "This bar chart lists the parks with the most programs. "
232
+ "The consistent Cividis color scheme highlights which locations have higher activity levels."
233
+ )
234
+
235
+ # ------------------------------------------------
236
+ # CONTEXTUAL VISUALIZATION 1 – CATEGORY BREAKDOWN
237
+ # ------------------------------------------------
238
+ st.header("Contextual Visualization 1 — Activity Category Breakdown")
239
+
240
+ cat_counts = (
241
+ df["activity_type_clean"]
242
+ .value_counts()
243
+ .reset_index()
244
+ )
245
+ cat_counts.columns = ["Category", "Count"]
246
 
247
  fig_cat = px.pie(
248
  cat_counts,
249
  names="Category",
250
  values="Count",
251
+ color="Count",
252
+ color_continuous_scale="Cividis"
253
  )
254
+
255
  st.plotly_chart(fig_cat, use_container_width=True)
256
 
257
+ st.markdown(
258
+ "This chart shows the types of activities offered across Chicago parks. "
259
+ "Most programs fall under instruction-based activities, such as fitness or sports classes, "
260
+ "while other categories appear less frequently. "
261
+ "Using Cividis keeps the colors readable and consistent with the rest of the app."
262
+ )
263
 
264
+ # ------------------------------------------------
265
+ # CONTEXTUAL VISUALIZATION 2 PROGRAMS BY SEASON
266
+ # ------------------------------------------------
267
+ st.header("Contextual Visualization 2 — Programs by Season")
268
 
269
+ season_counts = (
270
+ df["season"]
271
+ .value_counts()
272
+ .reset_index()
273
+ )
274
+ season_counts.columns = ["Season", "Program Count"]
275
 
276
  fig_season = px.bar(
277
  season_counts,
278
  x="Season",
279
  y="Program Count",
 
280
  color="Program Count",
281
  color_continuous_scale="Cividis",
282
+ text="Program Count",
283
+ height=500,
284
  )
285
+
286
  fig_season.update_traces(textposition="outside")
287
+
288
  st.plotly_chart(fig_season, use_container_width=True)
289
 
290
+ st.markdown(
291
+ "This bar chart shows how program availability changes across seasons. "
292
+ "Summer has the highest number of activities, which matches outdoor recreation trends. "
293
+ "The same Cividis palette keeps the experience uniform and accessible."
294
+ )
295
 
296
+ # ------------------------------------------------
297
+ # REQUIRED FINAL NARRATIVE SECTION (Your Own Text)
298
+ # ------------------------------------------------
299
  st.markdown("---")
300
+ st.header("What this data story is showing")
301
 
302
  st.markdown("""
303
+ Chicago’s parks offer many kinds of activities for people of all ages. These include sports, arts, fitness classes, youth programs, and seasonal events. Each row in this dataset represents one program offered at a park. Our main interactive map helps readers quickly see which parks offer the most activities. Bigger or darker circles show parks with more programs, making it easy to spot busy parks versus quieter ones.
304
+
305
+ Where a park is located also matters. Neighborhoods that are larger or more central usually have more programs because they have more space, more facilities, and more visitors. With the filters on the left, anyone can explore the data by season, activity type, price, or park name. This makes the information easy to use even for someone with no data experience. For example, you can look for free programs, summer-only programs, or activities at a specific park in your neighborhood.
306
+
307
+ This project also highlights questions about access and opportunities. Some parks offer a wide range of programs, while others have fewer options or mostly offer only one type of activity. By looking at categories, seasons, and fees, readers can start to see patterns in which communities have more choices and which ones may need more support. Our goal is to turn public data into something simple and useful, so Chicago residents and decision-makers can better understand how parks are serving their communities.
 
 
 
 
 
 
 
308
  """)
309
 
310
+ # ------------------------------------------------
311
+ # Citations
312
+ # ------------------------------------------------
313
  st.markdown("---")
314
+ st.subheader("Data Sources & Citations")
315
+
316
  st.markdown("""
317
+ **Primary dataset:**
318
+ Chicago Park District Activities — City of Chicago Data Portal
319
+ https://data.cityofchicago.org/Parks-Recreation/Chicago-Park-District-Activities/tn7v-6rnw
320
+
321
+ All visualizations were created by the authors using Streamlit and Plotly.
322
  """)