Spaces:
Sleeping
Sleeping
Update src/streamlit_app.py
Browse files- src/streamlit_app.py +23 -65
src/streamlit_app.py
CHANGED
|
@@ -12,6 +12,7 @@ import numpy as np
|
|
| 12 |
import plotly.express as px
|
| 13 |
from urllib.parse import urlencode
|
| 14 |
|
|
|
|
| 15 |
st.set_page_config(page_title="Chicago Parks in Motion", layout="wide")
|
| 16 |
|
| 17 |
# -------------------------
|
|
@@ -29,7 +30,7 @@ def load_data():
|
|
| 29 |
raise e
|
| 30 |
|
| 31 |
# Normalize columns to lower for ease
|
| 32 |
-
df.columns = [c.strip()
|
| 33 |
# Common useful columns in dataset may include:
|
| 34 |
# 'program_name', 'activity_type', 'start_date', 'end_date', 'park_name', 'location', 'fee', 'age_group'
|
| 35 |
# Convert numeric types where obvious
|
|
@@ -93,15 +94,15 @@ def load_data():
|
|
| 93 |
lat_col, lon_col = "latitude", "longitude"
|
| 94 |
|
| 95 |
# Basic date parsing
|
| 96 |
-
for c in ["
|
| 97 |
if c in df.columns:
|
| 98 |
df[c] = pd.to_datetime(df[c], errors="coerce")
|
| 99 |
|
| 100 |
# Clean category / activity type naming
|
| 101 |
-
if "
|
| 102 |
-
df["activity_type_clean"] = df["
|
| 103 |
-
elif "
|
| 104 |
-
df["activity_type_clean"] = df["
|
| 105 |
else:
|
| 106 |
# fallback - try 'category' or create Unknown
|
| 107 |
if "category" in df.columns:
|
|
@@ -109,12 +110,8 @@ def load_data():
|
|
| 109 |
else:
|
| 110 |
df["activity_type_clean"] = "Unknown"
|
| 111 |
|
| 112 |
-
|
| 113 |
-
# CLEAN PARK NAME — USE "Location Facility" ONLY
|
| 114 |
-
# ----------------------------------------------------
|
| 115 |
-
# Robust park name extraction
|
| 116 |
possible_park_cols = [
|
| 117 |
-
"location facility",
|
| 118 |
"park_name",
|
| 119 |
"park",
|
| 120 |
"facility_name",
|
|
@@ -136,7 +133,7 @@ def load_data():
|
|
| 136 |
# Worst-case fallback if no recognizable name exists
|
| 137 |
df["park_name"] = "Unknown Park"
|
| 138 |
|
| 139 |
-
return df
|
| 140 |
|
| 141 |
df = load_data()
|
| 142 |
|
|
@@ -145,7 +142,7 @@ df = load_data()
|
|
| 145 |
# Page header + sidebar
|
| 146 |
# -------------------------
|
| 147 |
st.title("Chicago Parks in Motion: How Our City Plays")
|
| 148 |
-
st.markdown("**Author:**
|
| 149 |
|
| 150 |
st.sidebar.header("Filters & Settings")
|
| 151 |
# Activity categories
|
|
@@ -166,8 +163,8 @@ def season_from_date(dt):
|
|
| 166 |
return "Summer"
|
| 167 |
return "Fall"
|
| 168 |
|
| 169 |
-
if "
|
| 170 |
-
df["season"] = df["
|
| 171 |
else:
|
| 172 |
df["season"] = "Unknown"
|
| 173 |
seasons = sorted(df["season"].dropna().unique())
|
|
@@ -204,51 +201,8 @@ if park_query:
|
|
| 204 |
# Keep a small sample message if huge
|
| 205 |
st.sidebar.markdown(f"**Programs in current filter:** {len(filtered):,}")
|
| 206 |
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
# -------------------------
|
| 210 |
-
with main_col:
|
| 211 |
-
st.subheader("Central Interactive Visualization — Programs by Park")
|
| 212 |
-
view_type = st.radio("View type", ["Map (recommended)", "Bar chart (count by park)"], horizontal=True)
|
| 213 |
-
|
| 214 |
-
if view_type.startswith("Map"):
|
| 215 |
-
# Map: scatter with park coordinates if available
|
| 216 |
-
if "latitude" in filtered.columns and "longitude" in filtered.columns and filtered[["latitude","longitude"]].dropna().shape[0] > 0:
|
| 217 |
-
# Aggregate counts per park
|
| 218 |
-
agg = filtered.groupby(["park_name", "latitude", "longitude"], dropna=True).size().reset_index(name="count")
|
| 219 |
-
fig_map = px.scatter_mapbox(
|
| 220 |
-
agg,
|
| 221 |
-
lat="latitude",
|
| 222 |
-
lon="longitude",
|
| 223 |
-
size="count",
|
| 224 |
-
size_max=30,
|
| 225 |
-
hover_name="park_name",
|
| 226 |
-
hover_data={"count": True},
|
| 227 |
-
zoom=10,
|
| 228 |
-
height=600,
|
| 229 |
-
)
|
| 230 |
-
fig_map.update_traces(marker=dict(color="#ff4b4b"))
|
| 231 |
-
fig_map.update_layout(mapbox_style="open-street-map", margin={"r":0,"t":0,"l":0,"b":0})
|
| 232 |
-
st.plotly_chart(fig_map, use_container_width=True)
|
| 233 |
-
st.caption("Map shows parks with at least one program in the current filter; bubble size ~ number of programs.")
|
| 234 |
-
else:
|
| 235 |
-
st.warning("No geographic coordinates found in the loaded dataset. Try using the bar chart view instead.")
|
| 236 |
-
else:
|
| 237 |
-
# Bar chart: top parks by program count
|
| 238 |
-
agg = filtered.groupby("park_name").size().reset_index(name="count") #HERE 2!!
|
| 239 |
-
# Keep top N for readability
|
| 240 |
-
top_n = 25
|
| 241 |
-
agg_top = agg.head(top_n)
|
| 242 |
-
fig_bar = px.bar(agg_top, x="count", y="park_name", orientation="h",
|
| 243 |
-
labels={"count":"Number of programs","park_name":"Park"},
|
| 244 |
-
height=700)
|
| 245 |
-
fig_bar.update_layout(yaxis={'categoryorder':'total ascending'}, margin={"r":20,"t":10,"l":200,"b":10})
|
| 246 |
-
st.plotly_chart(fig_bar, use_container_width=True)
|
| 247 |
-
st.caption(f"Top {min(top_n, len(agg))} parks by number of programs (filtered).")
|
| 248 |
-
|
| 249 |
-
# Small interactive table toggle
|
| 250 |
-
if st.checkbox("Show program sample table (first 50 rows)"):
|
| 251 |
-
st.dataframe(filtered.head(50))
|
| 252 |
|
| 253 |
# -------------------------
|
| 254 |
# Contextual Visualizations & Write-up
|
|
@@ -257,22 +211,25 @@ with side_col:
|
|
| 257 |
st.subheader("Contextual Visual 1 — Activity category breakdown")
|
| 258 |
# Overall counts by activity_type_clean
|
| 259 |
cat_counts = df["activity_type_clean"].value_counts().reset_index()
|
| 260 |
-
cat_counts.columns = ["
|
| 261 |
-
fig_cat = px.pie(cat_counts, names="
|
| 262 |
st.plotly_chart(fig_cat, use_container_width=True)
|
| 263 |
st.caption("Distribution of programs across activity categories (entire dataset).")
|
| 264 |
|
| 265 |
-
|
| 266 |
# ---------------------------------------------
|
| 267 |
# Contextual Visualization 2 — Programs by Season
|
| 268 |
# ---------------------------------------------
|
| 269 |
-
st.
|
|
|
|
|
|
|
|
|
|
| 270 |
season_col = None
|
| 271 |
for c in df.columns:
|
| 272 |
if "season" in c.lower():
|
| 273 |
season_col = c
|
| 274 |
break
|
| 275 |
-
|
| 276 |
if season_col is None:
|
| 277 |
st.error("No season-related column found in dataset.")
|
| 278 |
else:
|
|
@@ -309,6 +266,7 @@ with side_col:
|
|
| 309 |
This provides temporal context about when most activities are offered.
|
| 310 |
""")
|
| 311 |
|
|
|
|
| 312 |
st.markdown("---")
|
| 313 |
st.subheader("Data & Notebook")
|
| 314 |
st.markdown("""
|
|
|
|
| 12 |
import plotly.express as px
|
| 13 |
from urllib.parse import urlencode
|
| 14 |
|
| 15 |
+
|
| 16 |
st.set_page_config(page_title="Chicago Parks in Motion", layout="wide")
|
| 17 |
|
| 18 |
# -------------------------
|
|
|
|
| 30 |
raise e
|
| 31 |
|
| 32 |
# Normalize columns to lower for ease
|
| 33 |
+
df.columns = [c.strip() for c in df.columns]
|
| 34 |
# Common useful columns in dataset may include:
|
| 35 |
# 'program_name', 'activity_type', 'start_date', 'end_date', 'park_name', 'location', 'fee', 'age_group'
|
| 36 |
# Convert numeric types where obvious
|
|
|
|
| 94 |
lat_col, lon_col = "latitude", "longitude"
|
| 95 |
|
| 96 |
# Basic date parsing
|
| 97 |
+
for c in ["start_date", "end_date"]:
|
| 98 |
if c in df.columns:
|
| 99 |
df[c] = pd.to_datetime(df[c], errors="coerce")
|
| 100 |
|
| 101 |
# Clean category / activity type naming
|
| 102 |
+
if "activity_type" in df.columns:
|
| 103 |
+
df["activity_type_clean"] = df["activity_type"].str.title().fillna("Unknown")
|
| 104 |
+
elif "program_type" in df.columns:
|
| 105 |
+
df["activity_type_clean"] = df["program_type"].str.title().fillna("Unknown")
|
| 106 |
else:
|
| 107 |
# fallback - try 'category' or create Unknown
|
| 108 |
if "category" in df.columns:
|
|
|
|
| 110 |
else:
|
| 111 |
df["activity_type_clean"] = "Unknown"
|
| 112 |
|
| 113 |
+
# Robust park name extraction
|
|
|
|
|
|
|
|
|
|
| 114 |
possible_park_cols = [
|
|
|
|
| 115 |
"park_name",
|
| 116 |
"park",
|
| 117 |
"facility_name",
|
|
|
|
| 133 |
# Worst-case fallback if no recognizable name exists
|
| 134 |
df["park_name"] = "Unknown Park"
|
| 135 |
|
| 136 |
+
return df
|
| 137 |
|
| 138 |
df = load_data()
|
| 139 |
|
|
|
|
| 142 |
# Page header + sidebar
|
| 143 |
# -------------------------
|
| 144 |
st.title("Chicago Parks in Motion: How Our City Plays")
|
| 145 |
+
st.markdown("**Author:** Babz (Master’s Student, UIUC Information Management)")
|
| 146 |
|
| 147 |
st.sidebar.header("Filters & Settings")
|
| 148 |
# Activity categories
|
|
|
|
| 163 |
return "Summer"
|
| 164 |
return "Fall"
|
| 165 |
|
| 166 |
+
if "start_date" in df.columns:
|
| 167 |
+
df["season"] = df["start_date"].map(season_from_date)
|
| 168 |
else:
|
| 169 |
df["season"] = "Unknown"
|
| 170 |
seasons = sorted(df["season"].dropna().unique())
|
|
|
|
| 201 |
# Keep a small sample message if huge
|
| 202 |
st.sidebar.markdown(f"**Programs in current filter:** {len(filtered):,}")
|
| 203 |
|
| 204 |
+
|
| 205 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 206 |
|
| 207 |
# -------------------------
|
| 208 |
# Contextual Visualizations & Write-up
|
|
|
|
| 211 |
st.subheader("Contextual Visual 1 — Activity category breakdown")
|
| 212 |
# Overall counts by activity_type_clean
|
| 213 |
cat_counts = df["activity_type_clean"].value_counts().reset_index()
|
| 214 |
+
cat_counts.columns = ["activity_type", "count"]
|
| 215 |
+
fig_cat = px.pie(cat_counts, names="activity_type", values="count", hole=0.35, height=300)
|
| 216 |
st.plotly_chart(fig_cat, use_container_width=True)
|
| 217 |
st.caption("Distribution of programs across activity categories (entire dataset).")
|
| 218 |
|
| 219 |
+
|
| 220 |
# ---------------------------------------------
|
| 221 |
# Contextual Visualization 2 — Programs by Season
|
| 222 |
# ---------------------------------------------
|
| 223 |
+
st.header("Contextual Visual 2 — Programs by Season")
|
| 224 |
+
|
| 225 |
+
# Check that the dataset has a 'season' column or similar
|
| 226 |
+
# Your dataset typically has 'season' or 'season_name' — adjust if needed
|
| 227 |
season_col = None
|
| 228 |
for c in df.columns:
|
| 229 |
if "season" in c.lower():
|
| 230 |
season_col = c
|
| 231 |
break
|
| 232 |
+
|
| 233 |
if season_col is None:
|
| 234 |
st.error("No season-related column found in dataset.")
|
| 235 |
else:
|
|
|
|
| 266 |
This provides temporal context about when most activities are offered.
|
| 267 |
""")
|
| 268 |
|
| 269 |
+
|
| 270 |
st.markdown("---")
|
| 271 |
st.subheader("Data & Notebook")
|
| 272 |
st.markdown("""
|