jkhare2 commited on
Commit
068ea38
·
verified ·
1 Parent(s): 7d7ee3c

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +139 -162
src/streamlit_app.py CHANGED
@@ -1,9 +1,9 @@
1
  # streamlit_app.py
2
  """
3
- Chicago Parks in Motion — Streamlit app
4
- Author: Juhi Khare (jkhare2), Alisha Rawat (alishar4), Sutthana Koo-Anupong (sk188)
5
  Primary dataset: Chicago Park District Activities
6
- Data source (CSV endpoint): https://data.cityofchicago.org/resource/tn7v-6rnw.csv
7
  """
8
 
9
  import streamlit as st
@@ -11,113 +11,112 @@ import pandas as pd
11
  import numpy as np
12
  import plotly.express as px
13
 
14
-
15
  st.set_page_config(page_title="Chicago Parks in Motion", layout="wide")
16
 
17
- # -------------------------
18
- # Helper: Load & preprocess
19
- # -------------------------
20
  @st.cache_data(ttl=3600)
21
  def load_data():
22
- csv_url = "https://data.cityofchicago.org/resource/tn7v-6rnw.csv"
23
- try:
24
- df = pd.read_csv(csv_url, dtype=str)
25
- except Exception as e:
26
- st.error("Could not load dataset from the City of Chicago portal.")
27
- raise e
28
 
 
29
  df.columns = [c.strip() for c in df.columns]
30
 
 
31
  if "fee" in df.columns:
32
  df["fee"] = pd.to_numeric(df["fee"], errors="coerce")
33
 
34
  # Extract lat/lon
35
- def extract_latlon(val):
36
- if pd.isna(val):
37
- return (np.nan, np.nan)
38
- sval = str(val)
39
- if "POINT" in sval:
40
- try:
41
- inside = sval.split("(", 1)[1].rstrip(")")
42
- lon, lat = map(float, inside.split())
43
- return lat, lon
44
- except:
45
- return (np.nan, np.nan)
46
- return (np.nan, np.nan)
47
-
48
  if "location" in df.columns:
49
- latlon = df["location"].map(extract_latlon)
 
 
 
 
 
 
 
 
 
 
 
 
50
  df["latitude"] = latlon.map(lambda x: x[0])
51
  df["longitude"] = latlon.map(lambda x: x[1])
52
- else:
53
- df["latitude"] = np.nan
54
- df["longitude"] = np.nan
55
 
56
  # Dates
57
  for c in ["start_date", "end_date"]:
58
  if c in df.columns:
59
- df[c] = pd.to_datetime(df[c], errors="coerce")
60
 
61
- # Activity type clean
62
  if "activity_type" in df.columns:
63
  df["activity_type_clean"] = df["activity_type"].str.title().fillna("Unknown")
 
 
64
  else:
65
  df["activity_type_clean"] = "Unknown"
66
 
67
- # Park name
68
- possible_names = ["park_name", "park", "location_facility", "location_name", "site_name"]
69
- park_col = next((col for col in possible_names if col in df.columns), None)
70
- if park_col:
71
- df["park_name"] = df[park_col].astype(str).replace(["", "nan", "None"], "Unknown Park")
72
- else:
73
  df["park_name"] = "Unknown Park"
74
 
75
- return df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
 
77
 
78
  df = load_data()
79
 
80
- # -------------------------
81
- # Title
82
- # -------------------------
83
  st.title("Chicago Parks in Motion: How Our City Plays")
84
  st.markdown("**Authors:** Juhi Khare (jkhare2), Alisha Rawat (alishar4), Sutthana Koo-Anupong (sk188)")
 
85
 
86
- # -------------------------
87
- # Sidebar filters
88
- # -------------------------
89
- st.sidebar.header("Filters & Settings")
90
 
91
- categories = sorted(df["activity_type_clean"].dropna().unique())
92
- chosen_category = st.sidebar.selectbox("Activity category", ["All"] + categories)
93
 
94
- # Season detection
95
- def season_from_date(dt):
96
- if pd.isna(dt): return "Unknown"
97
- m = dt.month
98
- if m in [12,1,2]: return "Winter"
99
- if m in [3,4,5]: return "Spring"
100
- if m in [6,7,8]: return "Summer"
101
- return "Fall"
102
-
103
- df["season"] = df["start_date"].map(season_from_date)
104
  seasons = sorted(df["season"].unique())
105
  chosen_season = st.sidebar.selectbox("Season", ["All"] + seasons)
106
 
107
  if "fee" in df.columns:
108
- max_fee = float(df["fee"].fillna(0).max())
109
- fee_limit = st.sidebar.slider("Maximum fee (USD)", 0.0, max_fee, max_fee)
110
  else:
111
  fee_limit = None
112
 
113
- park_search = st.sidebar.text_input("Search park name (partial)")
114
 
115
- # Accessibility hint
116
- st.sidebar.caption("Filters help beginners explore the dataset easily without technical skills.")
117
 
118
- # -------------------------
119
- # Filtering logic
120
- # -------------------------
121
  filtered = df.copy()
122
  if chosen_category != "All":
123
  filtered = filtered[filtered["activity_type_clean"] == chosen_category]
@@ -125,141 +124,119 @@ if chosen_season != "All":
125
  filtered = filtered[filtered["season"] == chosen_season]
126
  if fee_limit is not None:
127
  filtered = filtered[filtered["fee"].fillna(0) <= fee_limit]
128
- if park_search:
129
- filtered = filtered[filtered["park_name"].str.contains(park_search, case=False)]
130
 
131
- st.sidebar.write(f"Programs shown: **{len(filtered):,}**")
132
 
133
- # -------------------------
134
- # CENTRAL VISUALIZATION
135
- # -------------------------
136
- st.header("Central Interactive Visualization — Programs by Park")
137
 
138
- view = st.radio("Choose a view:", ["Map (recommended)", "Bar chart"], horizontal=True)
 
139
 
140
- if view.startswith("Map"):
141
- # Aggregate for map
142
- agg = (
143
- filtered.groupby(["park_name", "latitude", "longitude"], dropna=True)
144
- .size().reset_index(name="count")
145
- )
146
-
147
- if agg.dropna().shape[0] > 0:
148
- fig_map = px.scatter_mapbox(
149
- agg,
150
- lat="latitude",
151
- lon="longitude",
152
- size="count",
153
- color="count",
154
- color_continuous_scale="Bluered",
155
- size_max=28,
156
- zoom=10,
157
- hover_name="park_name",
158
- hover_data={"count": True},
159
- height=600,
160
- )
161
- fig_map.update_layout(mapbox_style="open-street-map", margin=dict(l=0,r=0,b=0,t=0))
162
- st.plotly_chart(fig_map, use_container_width=True)
163
- else:
164
- st.warning("No geographic coordinates available for this filtered view.")
165
- else:
166
- agg = filtered.groupby("park_name").size().reset_index(name="count")
167
- agg = agg.sort_values("count", ascending=False).head(20)
168
-
169
- fig_bar = px.bar(
170
  agg,
171
- x="count",
172
- y="park_name",
173
- orientation="h",
 
174
  color="count",
175
  color_continuous_scale="Cividis",
176
- height=600,
 
177
  )
178
- fig_bar.update_layout(yaxis={'categoryorder':'total ascending'})
179
- st.plotly_chart(fig_bar, use_container_width=True)
180
-
181
- # Explanation under central viz
182
- st.markdown("""
183
- **What this visualization shows:**
184
- This is our main visualization because it helps readers understand where activities are happening across Chicago’s parks.
185
- The map shows each park as a circle, where larger and darker circles represent locations with more programs.
186
- This makes it easy to see which areas are activity hubs and which are quieter. The filters allow anyone to explore patterns by season,
187
- category, price, or park—without needing technical experience.
188
- """)
189
 
190
- # -------------------------
191
- # CONTEXTUAL VISUALIZATION 1
192
- # -------------------------
193
- st.header("Contextual Visualization 1 — Activity Category Breakdown")
194
 
195
  cat_counts = df["activity_type_clean"].value_counts().reset_index()
196
- cat_counts.columns = ["activity_type", "count"]
197
 
198
  fig_cat = px.pie(
199
  cat_counts,
200
- names="activity_type",
201
- values="count",
202
- hole=0.35,
203
- color_discrete_sequence=px.colors.sequential.RdBu
204
  )
205
  st.plotly_chart(fig_cat, use_container_width=True)
206
 
207
- st.markdown("""
208
- **Why this matters:**
209
- This chart shows what kinds of activities Chicago parks offer most often—such as sports, aquatics, arts, or youth programs.
210
- It helps readers understand the variety of programs available across the city.
211
- Using a simple color palette keeps the chart readable for people who may not be familiar with data visualization.
212
  """)
213
 
214
- # -------------------------
215
- # CONTEXTUAL VISUALIZATION 2
216
- # -------------------------
217
- st.header("Contextual Visualization 2 — Programs by Season")
218
 
219
  season_counts = df["season"].value_counts().reset_index()
220
- season_counts.columns = ["Season", "Program Count"]
221
 
222
  fig_season = px.bar(
223
  season_counts,
224
  x="Season",
225
  y="Program Count",
226
- color="Program Count",
227
- color_continuous_scale="Tealgrn",
228
  text="Program Count",
229
- height=500,
 
 
230
  )
231
  fig_season.update_traces(textposition="outside")
232
-
233
  st.plotly_chart(fig_season, use_container_width=True)
234
 
235
- st.markdown("""
236
- **Why this is helpful:**
237
- This chart shows when programs are most active throughout the year.
238
- Comparing seasons helps readers see whether summer is the busiest time, or whether activities are spread evenly.
239
- This makes it easier for residents and planners to understand how weather, school schedules, and community needs
240
- shape the timing of park programs.
241
  """)
242
 
243
- # -------------------------
244
- # FINAL 3-PARAGRAPH EXPLANATION (as provided by you, unchanged)
245
- # -------------------------
246
- st.header("📝 What this data story is showing")
 
247
 
248
  st.markdown("""
249
- Chicago’s parks offer many kinds of activities for people of all ages. These include sports, arts, fitness classes, youth programs, and seasonal events. Each row in this dataset represents one program offered at a park. Our main interactive map helps readers quickly see which parks offer the most activities. Bigger or darker circles show parks with more programs, making it easy to spot busy parks versus quieter ones.
250
-
251
- Where a park is located also matters. Neighborhoods that are larger or more central usually have more programs because they have more space, more facilities, and more visitors. With the filters on the left, anyone can explore the data by season, activity type, price, or park name. This makes the information easy to use even for someone with no data experience. For example, you can look for free programs, summer-only programs, or activities at a specific park in your neighborhood.
252
-
253
- This project also highlights questions about access and opportunities. Some parks offer a wide range of programs, while others have fewer options or mostly offer only one type of activity. By looking at categories, seasons, and fees, readers can start to see patterns in which communities have more choices and which ones may need more support. Our goal is to turn public data into something simple and useful, so Chicago residents and decision-makers can better understand how parks are serving their communities.
 
 
 
 
 
 
 
254
  """)
255
 
256
- # -------------------------
257
  # CITATIONS
258
- # -------------------------
259
  st.markdown("---")
260
  st.subheader("Citations & Data Sources")
261
  st.markdown("""
262
- **Primary dataset:**
263
- Chicago Park District Activities — City of Chicago Data Portal
264
- https://data.cityofchicago.org/Parks-Recreation/Chicago-Park-District-Activities/tn7v-6rnw
265
  """)
 
1
  # streamlit_app.py
2
  """
3
+ Chicago Parks in Motion — Streamlit App
4
+ Authors: Juhi Khare (jkhare2), Alisha Rawat (alishar4), Sutthana Koo-Anupong (sk188)
5
  Primary dataset: Chicago Park District Activities
6
+ Source: https://data.cityofchicago.org/resource/tn7v-6rnw.csv
7
  """
8
 
9
  import streamlit as st
 
11
  import numpy as np
12
  import plotly.express as px
13
 
 
14
  st.set_page_config(page_title="Chicago Parks in Motion", layout="wide")
15
 
16
+ # ---------------------------------------------------
17
+ # LOAD & CLEAN DATA
18
+ # ---------------------------------------------------
19
  @st.cache_data(ttl=3600)
20
  def load_data():
21
+ url = "https://data.cityofchicago.org/resource/tn7v-6rnw.csv"
22
+ df = pd.read_csv(url, dtype=str)
 
 
 
 
23
 
24
+ # Clean columns
25
  df.columns = [c.strip() for c in df.columns]
26
 
27
+ # Fee → numeric
28
  if "fee" in df.columns:
29
  df["fee"] = pd.to_numeric(df["fee"], errors="coerce")
30
 
31
  # Extract lat/lon
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  if "location" in df.columns:
33
+ def parse_location(val):
34
+ if pd.isna(val):
35
+ return (np.nan, np.nan)
36
+ sval = str(val)
37
+ if sval.startswith("POINT"):
38
+ try:
39
+ inside = sval.split("(",1)[1].rstrip(")")
40
+ lon, lat = map(float, inside.split())
41
+ return lat, lon
42
+ except:
43
+ return (np.nan, np.nan)
44
+ return (np.nan, np.nan)
45
+ latlon = df["location"].map(parse_location)
46
  df["latitude"] = latlon.map(lambda x: x[0])
47
  df["longitude"] = latlon.map(lambda x: x[1])
 
 
 
48
 
49
  # Dates
50
  for c in ["start_date", "end_date"]:
51
  if c in df.columns:
52
+ df[c] = pd.to_datetime(df[c], errors="ignore")
53
 
54
+ # Activity type
55
  if "activity_type" in df.columns:
56
  df["activity_type_clean"] = df["activity_type"].str.title().fillna("Unknown")
57
+ elif "program_type" in df.columns:
58
+ df["activity_type_clean"] = df["program_type"].str.title().fillna("Unknown")
59
  else:
60
  df["activity_type_clean"] = "Unknown"
61
 
62
+ # Park names
63
+ for col in ["park_name", "park", "site_name", "location_name"]:
64
+ if col in df.columns:
65
+ df["park_name"] = df[col].fillna("Unknown Park")
66
+ break
67
+ if "park_name" not in df.columns:
68
  df["park_name"] = "Unknown Park"
69
 
70
+ # Derive season
71
+ def season_from_date(dt):
72
+ if isinstance(dt, str) or pd.isna(dt):
73
+ try:
74
+ dt = pd.to_datetime(dt)
75
+ except:
76
+ return "Unknown"
77
+ m = dt.month
78
+ if m in (12, 1, 2): return "Winter"
79
+ if m in (3, 4, 5): return "Spring"
80
+ if m in (6, 7, 8): return "Summer"
81
+ return "Fall"
82
+
83
+ if "start_date" in df.columns:
84
+ df["season"] = df["start_date"].map(season_from_date)
85
+ else:
86
+ df["season"] = "Unknown"
87
 
88
+ return df
89
 
90
  df = load_data()
91
 
92
+ # ---------------------------------------------------
93
+ # PAGE TITLE
94
+ # ---------------------------------------------------
95
  st.title("Chicago Parks in Motion: How Our City Plays")
96
  st.markdown("**Authors:** Juhi Khare (jkhare2), Alisha Rawat (alishar4), Sutthana Koo-Anupong (sk188)")
97
+ st.info("This is our **central public-facing visualization project**, created for a data journalism assignment.")
98
 
99
+ # ---------------------------------------------------
100
+ # FILTERS
101
+ # ---------------------------------------------------
102
+ st.sidebar.header("Filters")
103
 
104
+ categories = sorted(df["activity_type_clean"].unique())
105
+ chosen_category = st.sidebar.selectbox("Activity Category", ["All"] + categories)
106
 
 
 
 
 
 
 
 
 
 
 
107
  seasons = sorted(df["season"].unique())
108
  chosen_season = st.sidebar.selectbox("Season", ["All"] + seasons)
109
 
110
  if "fee" in df.columns:
111
+ fee_limit = st.sidebar.slider("Maximum Fee (USD)", 0.0, float(df["fee"].fillna(0).max()), float(df["fee"].fillna(0).max()))
 
112
  else:
113
  fee_limit = None
114
 
115
+ search_park = st.sidebar.text_input("Search Park Name")
116
 
117
+ st.sidebar.caption("Filters help readers explore the data easily without technical skills.")
 
118
 
119
+ # Filter logic
 
 
120
  filtered = df.copy()
121
  if chosen_category != "All":
122
  filtered = filtered[filtered["activity_type_clean"] == chosen_category]
 
124
  filtered = filtered[filtered["season"] == chosen_season]
125
  if fee_limit is not None:
126
  filtered = filtered[filtered["fee"].fillna(0) <= fee_limit]
127
+ if search_park:
128
+ filtered = filtered[filtered["park_name"].str.contains(search_park, case=False, na=False)]
129
 
130
+ st.sidebar.markdown(f"**Programs Shown:** {len(filtered):,}")
131
 
132
+ # ---------------------------------------------------
133
+ # CENTRAL VISUALIZATION (MAP)
134
+ # ---------------------------------------------------
135
+ st.subheader("Central Visualization — Programs by Park (Interactive Map)")
136
 
137
+ if filtered[["latitude","longitude"]].dropna().shape[0] > 0:
138
+ agg = filtered.groupby(["park_name","latitude","longitude"]).size().reset_index(name="count")
139
 
140
+ fig_map = px.scatter_mapbox(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  agg,
142
+ lat="latitude",
143
+ lon="longitude",
144
+ size="count",
145
+ size_max=30,
146
  color="count",
147
  color_continuous_scale="Cividis",
148
+ zoom=10,
149
+ height=550
150
  )
151
+ fig_map.update_layout(mapbox_style="open-street-map")
152
+ st.plotly_chart(fig_map, use_container_width=True)
153
+
154
+ st.write("""
155
+ **Explanation:**
156
+ This map shows which parks offer the most programs. Larger and darker circles represent parks with more activities.
157
+ The Cividis color scale makes it easy for all viewers—including those with color-vision differences—to understand intensity.
158
+ You can hover over any park to see its program count and use the filters to explore activity types, seasons, or fees.
159
+ """)
160
+ else:
161
+ st.warning("No geographic coordinates available for mapping.")
162
 
163
+ # ---------------------------------------------------
164
+ # CONTEXTUAL VIS 1 — CATEGORY PIE CHART
165
+ # ---------------------------------------------------
166
+ st.subheader("Contextual Visualization 1 — Activity Category Breakdown")
167
 
168
  cat_counts = df["activity_type_clean"].value_counts().reset_index()
169
+ cat_counts.columns = ["Category","Count"]
170
 
171
  fig_cat = px.pie(
172
  cat_counts,
173
+ names="Category",
174
+ values="Count",
175
+ color_discrete_sequence=px.colors.sequential.Cividis
 
176
  )
177
  st.plotly_chart(fig_cat, use_container_width=True)
178
 
179
+ st.write("""
180
+ **Explanation:**
181
+ This pie chart shows the distribution of all activity categories across Chicago parks.
182
+ Some categories—like sports and youth programs—appear more often, while others are limited.
183
+ Using the same Cividis-inspired palette keeps the visuals consistent for the public.
184
  """)
185
 
186
+ # ---------------------------------------------------
187
+ # CONTEXTUAL VIS 2 — PROGRAMS BY SEASON
188
+ # ---------------------------------------------------
189
+ st.subheader("Contextual Visualization 2 — Programs by Season")
190
 
191
  season_counts = df["season"].value_counts().reset_index()
192
+ season_counts.columns = ["Season","Program Count"]
193
 
194
  fig_season = px.bar(
195
  season_counts,
196
  x="Season",
197
  y="Program Count",
 
 
198
  text="Program Count",
199
+ color="Program Count",
200
+ color_continuous_scale="Cividis",
201
+ height=450
202
  )
203
  fig_season.update_traces(textposition="outside")
 
204
  st.plotly_chart(fig_season, use_container_width=True)
205
 
206
+ st.write("""
207
+ **Explanation:**
208
+ This chart shows how program offerings change by season. Summer and fall tend to have more activities,
209
+ while winter shows fewer options. The color scale helps highlight the variation clearly and keeps the look consistent.
 
 
210
  """)
211
 
212
+ # ---------------------------------------------------
213
+ # FULL STORY SECTION (YOUR EXACT PARAGRAPHS)
214
+ # ---------------------------------------------------
215
+ st.markdown("---")
216
+ st.header("What This Data Story Is Showing")
217
 
218
  st.markdown("""
219
+ Chicago’s parks offer many kinds of activities for people of all ages. These include sports, arts, fitness classes, youth programs, and seasonal events.
220
+ Each row in this dataset represents one program offered at a park. Our main interactive map helps readers quickly see which parks offer the most activities.
221
+ Bigger or darker circles show parks with more programs, making it easy to spot busy parks versus quieter ones.
222
+
223
+ Where a park is located also matters. Neighborhoods that are larger or more central usually have more programs because they have more space, more facilities,
224
+ and more visitors. With the filters on the left, anyone can explore the data by season, activity type, price, or park name.
225
+ This makes the information easy to use even for someone with no data experience. For example, you can look for free programs,
226
+ summer-only programs, or activities at a specific park in your neighborhood.
227
+
228
+ This project also highlights questions about access and opportunities. Some parks offer a wide range of programs, while others have fewer options or mostly offer only one type of activity.
229
+ By looking at categories, seasons, and fees, readers can start to see patterns in which communities have more choices and which ones may need more support.
230
+ Our goal is to turn public data into something simple and useful, so Chicago residents and decision-makers can better understand how parks are serving their communities.
231
  """)
232
 
233
+ # ---------------------------------------------------
234
  # CITATIONS
235
+ # ---------------------------------------------------
236
  st.markdown("---")
237
  st.subheader("Citations & Data Sources")
238
  st.markdown("""
239
+ - Chicago Park District Activities — City of Chicago Data Portal
240
+ https://data.cityofchicago.org/Parks-Recreation/Chicago-Park-District-Activities/tn7v-6rnw
241
+ - All visualizations created by the authors using Streamlit & Plotly.
242
  """)