alisharawat commited on
Commit
9a0cd94
·
verified ·
1 Parent(s): a77a172

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +23 -65
src/streamlit_app.py CHANGED
@@ -12,6 +12,7 @@ import numpy as np
12
  import plotly.express as px
13
  from urllib.parse import urlencode
14
 
 
15
  st.set_page_config(page_title="Chicago Parks in Motion", layout="wide")
16
 
17
  # -------------------------
@@ -29,7 +30,7 @@ def load_data():
29
  raise e
30
 
31
  # Normalize columns to lower for ease
32
- df.columns = [c.strip().lower() for c in df.columns]
33
  # Common useful columns in dataset may include:
34
  # 'program_name', 'activity_type', 'start_date', 'end_date', 'park_name', 'location', 'fee', 'age_group'
35
  # Convert numeric types where obvious
@@ -93,15 +94,15 @@ def load_data():
93
  lat_col, lon_col = "latitude", "longitude"
94
 
95
  # Basic date parsing
96
- for c in ["start date", "end date"]:
97
  if c in df.columns:
98
  df[c] = pd.to_datetime(df[c], errors="coerce")
99
 
100
  # Clean category / activity type naming
101
- if "activity type" in df.columns:
102
- df["activity_type_clean"] = df["activity type"].str.title().fillna("Unknown")
103
- elif "program type" in df.columns:
104
- df["activity_type_clean"] = df["program type"].str.title().fillna("Unknown")
105
  else:
106
  # fallback - try 'category' or create Unknown
107
  if "category" in df.columns:
@@ -109,12 +110,8 @@ def load_data():
109
  else:
110
  df["activity_type_clean"] = "Unknown"
111
 
112
- # ----------------------------------------------------
113
- # CLEAN PARK NAME — USE "Location Facility" ONLY
114
- # ----------------------------------------------------
115
- # Robust park name extraction
116
  possible_park_cols = [
117
- "location facility",
118
  "park_name",
119
  "park",
120
  "facility_name",
@@ -136,7 +133,7 @@ def load_data():
136
  # Worst-case fallback if no recognizable name exists
137
  df["park_name"] = "Unknown Park"
138
 
139
- return df
140
 
141
  df = load_data()
142
 
@@ -145,7 +142,7 @@ df = load_data()
145
  # Page header + sidebar
146
  # -------------------------
147
  st.title("Chicago Parks in Motion: How Our City Plays")
148
- st.markdown("**Author:** Juhi Khare (jkhare2), Alisha Rawat (alishar4), Sutthana Koo-Anupong (sk188)")
149
 
150
  st.sidebar.header("Filters & Settings")
151
  # Activity categories
@@ -166,8 +163,8 @@ def season_from_date(dt):
166
  return "Summer"
167
  return "Fall"
168
 
169
- if "start date" in df.columns:
170
- df["season"] = df["start date"].map(season_from_date)
171
  else:
172
  df["season"] = "Unknown"
173
  seasons = sorted(df["season"].dropna().unique())
@@ -204,51 +201,8 @@ if park_query:
204
  # Keep a small sample message if huge
205
  st.sidebar.markdown(f"**Programs in current filter:** {len(filtered):,}")
206
 
207
- # -------------------------
208
- # Central visualization: Map + bar toggle
209
- # -------------------------
210
- with main_col:
211
- st.subheader("Central Interactive Visualization — Programs by Park")
212
- view_type = st.radio("View type", ["Map (recommended)", "Bar chart (count by park)"], horizontal=True)
213
-
214
- if view_type.startswith("Map"):
215
- # Map: scatter with park coordinates if available
216
- if "latitude" in filtered.columns and "longitude" in filtered.columns and filtered[["latitude","longitude"]].dropna().shape[0] > 0:
217
- # Aggregate counts per park
218
- agg = filtered.groupby(["park_name", "latitude", "longitude"], dropna=True).size().reset_index(name="count")
219
- fig_map = px.scatter_mapbox(
220
- agg,
221
- lat="latitude",
222
- lon="longitude",
223
- size="count",
224
- size_max=30,
225
- hover_name="park_name",
226
- hover_data={"count": True},
227
- zoom=10,
228
- height=600,
229
- )
230
- fig_map.update_traces(marker=dict(color="#ff4b4b"))
231
- fig_map.update_layout(mapbox_style="open-street-map", margin={"r":0,"t":0,"l":0,"b":0})
232
- st.plotly_chart(fig_map, use_container_width=True)
233
- st.caption("Map shows parks with at least one program in the current filter; bubble size ~ number of programs.")
234
- else:
235
- st.warning("No geographic coordinates found in the loaded dataset. Try using the bar chart view instead.")
236
- else:
237
- # Bar chart: top parks by program count
238
- agg = filtered.groupby("park_name").size().reset_index(name="count") #HERE 2!!
239
- # Keep top N for readability
240
- top_n = 25
241
- agg_top = agg.head(top_n)
242
- fig_bar = px.bar(agg_top, x="count", y="park_name", orientation="h",
243
- labels={"count":"Number of programs","park_name":"Park"},
244
- height=700)
245
- fig_bar.update_layout(yaxis={'categoryorder':'total ascending'}, margin={"r":20,"t":10,"l":200,"b":10})
246
- st.plotly_chart(fig_bar, use_container_width=True)
247
- st.caption(f"Top {min(top_n, len(agg))} parks by number of programs (filtered).")
248
-
249
- # Small interactive table toggle
250
- if st.checkbox("Show program sample table (first 50 rows)"):
251
- st.dataframe(filtered.head(50))
252
 
253
  # -------------------------
254
  # Contextual Visualizations & Write-up
@@ -257,22 +211,25 @@ with side_col:
257
  st.subheader("Contextual Visual 1 — Activity category breakdown")
258
  # Overall counts by activity_type_clean
259
  cat_counts = df["activity_type_clean"].value_counts().reset_index()
260
- cat_counts.columns = ["activity type", "count"]
261
- fig_cat = px.pie(cat_counts, names="activity type", values="count", hole=0.35, height=300)
262
  st.plotly_chart(fig_cat, use_container_width=True)
263
  st.caption("Distribution of programs across activity categories (entire dataset).")
264
 
265
- st.markdown("---")
266
  # ---------------------------------------------
267
  # Contextual Visualization 2 — Programs by Season
268
  # ---------------------------------------------
269
- st.subheader("Contextual Visual 2 — Programs by Season")
 
 
 
270
  season_col = None
271
  for c in df.columns:
272
  if "season" in c.lower():
273
  season_col = c
274
  break
275
-
276
  if season_col is None:
277
  st.error("No season-related column found in dataset.")
278
  else:
@@ -309,6 +266,7 @@ with side_col:
309
  This provides temporal context about when most activities are offered.
310
  """)
311
 
 
312
  st.markdown("---")
313
  st.subheader("Data & Notebook")
314
  st.markdown("""
 
12
  import plotly.express as px
13
  from urllib.parse import urlencode
14
 
15
+
16
  st.set_page_config(page_title="Chicago Parks in Motion", layout="wide")
17
 
18
  # -------------------------
 
30
  raise e
31
 
32
  # Normalize columns to lower for ease
33
+ df.columns = [c.strip() for c in df.columns]
34
  # Common useful columns in dataset may include:
35
  # 'program_name', 'activity_type', 'start_date', 'end_date', 'park_name', 'location', 'fee', 'age_group'
36
  # Convert numeric types where obvious
 
94
  lat_col, lon_col = "latitude", "longitude"
95
 
96
  # Basic date parsing
97
+ for c in ["start_date", "end_date"]:
98
  if c in df.columns:
99
  df[c] = pd.to_datetime(df[c], errors="coerce")
100
 
101
  # Clean category / activity type naming
102
+ if "activity_type" in df.columns:
103
+ df["activity_type_clean"] = df["activity_type"].str.title().fillna("Unknown")
104
+ elif "program_type" in df.columns:
105
+ df["activity_type_clean"] = df["program_type"].str.title().fillna("Unknown")
106
  else:
107
  # fallback - try 'category' or create Unknown
108
  if "category" in df.columns:
 
110
  else:
111
  df["activity_type_clean"] = "Unknown"
112
 
113
+ # Robust park name extraction
 
 
 
114
  possible_park_cols = [
 
115
  "park_name",
116
  "park",
117
  "facility_name",
 
133
  # Worst-case fallback if no recognizable name exists
134
  df["park_name"] = "Unknown Park"
135
 
136
+ return df
137
 
138
  df = load_data()
139
 
 
142
  # Page header + sidebar
143
  # -------------------------
144
  st.title("Chicago Parks in Motion: How Our City Plays")
145
+ st.markdown("**Author:** Babz (Master’s Student, UIUC Information Management)")
146
 
147
  st.sidebar.header("Filters & Settings")
148
  # Activity categories
 
163
  return "Summer"
164
  return "Fall"
165
 
166
+ if "start_date" in df.columns:
167
+ df["season"] = df["start_date"].map(season_from_date)
168
  else:
169
  df["season"] = "Unknown"
170
  seasons = sorted(df["season"].dropna().unique())
 
201
  # Keep a small sample message if huge
202
  st.sidebar.markdown(f"**Programs in current filter:** {len(filtered):,}")
203
 
204
+
205
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
 
207
  # -------------------------
208
  # Contextual Visualizations & Write-up
 
211
  st.subheader("Contextual Visual 1 — Activity category breakdown")
212
  # Overall counts by activity_type_clean
213
  cat_counts = df["activity_type_clean"].value_counts().reset_index()
214
+ cat_counts.columns = ["activity_type", "count"]
215
+ fig_cat = px.pie(cat_counts, names="activity_type", values="count", hole=0.35, height=300)
216
  st.plotly_chart(fig_cat, use_container_width=True)
217
  st.caption("Distribution of programs across activity categories (entire dataset).")
218
 
219
+
220
  # ---------------------------------------------
221
  # Contextual Visualization 2 — Programs by Season
222
  # ---------------------------------------------
223
+ st.header("Contextual Visual 2 — Programs by Season")
224
+
225
+ # Check that the dataset has a 'season' column or similar
226
+ # Your dataset typically has 'season' or 'season_name' — adjust if needed
227
  season_col = None
228
  for c in df.columns:
229
  if "season" in c.lower():
230
  season_col = c
231
  break
232
+
233
  if season_col is None:
234
  st.error("No season-related column found in dataset.")
235
  else:
 
266
  This provides temporal context about when most activities are offered.
267
  """)
268
 
269
+
270
  st.markdown("---")
271
  st.subheader("Data & Notebook")
272
  st.markdown("""