Spaces:

is445fall2025
/

project3.1

Sleeping

App Files Files Community

alisharawat commited on Dec 8, 2025

Commit

9a0cd94

verified ·

1 Parent(s): a77a172

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +23 -65

src/streamlit_app.py CHANGED Viewed

@@ -12,6 +12,7 @@ import numpy as np
 import plotly.express as px
 from urllib.parse import urlencode
 st.set_page_config(page_title="Chicago Parks in Motion", layout="wide")
 # -------------------------
@@ -29,7 +30,7 @@ def load_data():
         raise e
     # Normalize columns to lower for ease
-    df.columns = [c.strip().lower() for c in df.columns]
     # Common useful columns in dataset may include:
     # 'program_name', 'activity_type', 'start_date', 'end_date', 'park_name', 'location', 'fee', 'age_group'
     # Convert numeric types where obvious
@@ -93,15 +94,15 @@ def load_data():
         lat_col, lon_col = "latitude", "longitude"
     # Basic date parsing
-    for c in ["start date", "end date"]:
         if c in df.columns:
             df[c] = pd.to_datetime(df[c], errors="coerce")
     # Clean category / activity type naming
-    if "activity type" in df.columns:
-        df["activity_type_clean"] = df["activity type"].str.title().fillna("Unknown")
-    elif "program type" in df.columns:
-        df["activity_type_clean"] = df["program type"].str.title().fillna("Unknown")
     else:
         # fallback - try 'category' or create Unknown
         if "category" in df.columns:
@@ -109,12 +110,8 @@ def load_data():
         else:
             df["activity_type_clean"] = "Unknown"
-    # ----------------------------------------------------
-    # CLEAN PARK NAME — USE "Location Facility" ONLY
-    # ----------------------------------------------------
-    # Robust park name extraction
     possible_park_cols = [
-        "location facility",
         "park_name",
         "park",
         "facility_name",
@@ -136,7 +133,7 @@ def load_data():
         # Worst-case fallback if no recognizable name exists
         df["park_name"] = "Unknown Park"
-    return df
 df = load_data()
@@ -145,7 +142,7 @@ df = load_data()
 # Page header + sidebar
 # -------------------------
 st.title("Chicago Parks in Motion: How Our City Plays")
-st.markdown("**Author:** Juhi Khare (jkhare2), Alisha Rawat (alishar4), Sutthana Koo-Anupong (sk188)")
 st.sidebar.header("Filters & Settings")
 # Activity categories
@@ -166,8 +163,8 @@ def season_from_date(dt):
         return "Summer"
     return "Fall"
-if "start date" in df.columns:
-    df["season"] = df["start date"].map(season_from_date)
 else:
     df["season"] = "Unknown"
 seasons = sorted(df["season"].dropna().unique())
@@ -204,51 +201,8 @@ if park_query:
 # Keep a small sample message if huge
 st.sidebar.markdown(f"**Programs in current filter:** {len(filtered):,}")
-# -------------------------
-# Central visualization: Map + bar toggle
-# -------------------------
-with main_col:
-    st.subheader("Central Interactive Visualization — Programs by Park")
-    view_type = st.radio("View type", ["Map (recommended)", "Bar chart (count by park)"], horizontal=True)
-    if view_type.startswith("Map"):
-        # Map: scatter with park coordinates if available
-        if "latitude" in filtered.columns and "longitude" in filtered.columns and filtered[["latitude","longitude"]].dropna().shape[0] > 0:
-            # Aggregate counts per park
-            agg = filtered.groupby(["park_name", "latitude", "longitude"], dropna=True).size().reset_index(name="count")
-            fig_map = px.scatter_mapbox(
-                agg,
-                lat="latitude",
-                lon="longitude",
-                size="count",
-                size_max=30,
-                hover_name="park_name",
-                hover_data={"count": True},
-                zoom=10,
-                height=600,
-            )
-            fig_map.update_traces(marker=dict(color="#ff4b4b"))
-            fig_map.update_layout(mapbox_style="open-street-map", margin={"r":0,"t":0,"l":0,"b":0})
-            st.plotly_chart(fig_map, use_container_width=True)
-            st.caption("Map shows parks with at least one program in the current filter; bubble size ~ number of programs.")
-        else:
-            st.warning("No geographic coordinates found in the loaded dataset. Try using the bar chart view instead.")
-    else:
-        # Bar chart: top parks by program count
-        agg = filtered.groupby("park_name").size().reset_index(name="count") #HERE 2!!
-        # Keep top N for readability
-        top_n = 25
-        agg_top = agg.head(top_n)
-        fig_bar = px.bar(agg_top, x="count", y="park_name", orientation="h",
-                         labels={"count":"Number of programs","park_name":"Park"},
-                         height=700)
-        fig_bar.update_layout(yaxis={'categoryorder':'total ascending'}, margin={"r":20,"t":10,"l":200,"b":10})
-        st.plotly_chart(fig_bar, use_container_width=True)
-        st.caption(f"Top {min(top_n, len(agg))} parks by number of programs (filtered).")
-    # Small interactive table toggle
-    if st.checkbox("Show program sample table (first 50 rows)"):
-        st.dataframe(filtered.head(50))
 # -------------------------
 # Contextual Visualizations & Write-up
@@ -257,22 +211,25 @@ with side_col:
     st.subheader("Contextual Visual 1 — Activity category breakdown")
     # Overall counts by activity_type_clean
     cat_counts = df["activity_type_clean"].value_counts().reset_index()
-    cat_counts.columns = ["activity type", "count"]
-    fig_cat = px.pie(cat_counts, names="activity type", values="count", hole=0.35, height=300)
     st.plotly_chart(fig_cat, use_container_width=True)
     st.caption("Distribution of programs across activity categories (entire dataset).")
-    st.markdown("---")
     # ---------------------------------------------
     # Contextual Visualization 2 — Programs by Season
     # ---------------------------------------------
-    st.subheader("Contextual Visual 2 — Programs by Season")
     season_col = None
     for c in df.columns:
         if "season" in c.lower():
             season_col = c
             break
     if season_col is None:
         st.error("No season-related column found in dataset.")
     else:
@@ -309,6 +266,7 @@ with side_col:
         This provides temporal context about when most activities are offered.
         """)
     st.markdown("---")
     st.subheader("Data & Notebook")
     st.markdown("""

 import plotly.express as px
 from urllib.parse import urlencode
 st.set_page_config(page_title="Chicago Parks in Motion", layout="wide")
 # -------------------------
         raise e
     # Normalize columns to lower for ease
+    df.columns = [c.strip() for c in df.columns]
     # Common useful columns in dataset may include:
     # 'program_name', 'activity_type', 'start_date', 'end_date', 'park_name', 'location', 'fee', 'age_group'
     # Convert numeric types where obvious
         lat_col, lon_col = "latitude", "longitude"
     # Basic date parsing
+    for c in ["start_date", "end_date"]:
         if c in df.columns:
             df[c] = pd.to_datetime(df[c], errors="coerce")
     # Clean category / activity type naming
+    if "activity_type" in df.columns:
+        df["activity_type_clean"] = df["activity_type"].str.title().fillna("Unknown")
+    elif "program_type" in df.columns:
+        df["activity_type_clean"] = df["program_type"].str.title().fillna("Unknown")
     else:
         # fallback - try 'category' or create Unknown
         if "category" in df.columns:
         else:
             df["activity_type_clean"] = "Unknown"
+   # Robust park name extraction
     possible_park_cols = [
         "park_name",
         "park",
         "facility_name",
         # Worst-case fallback if no recognizable name exists
         df["park_name"] = "Unknown Park"
+    return df
 df = load_data()
 # Page header + sidebar
 # -------------------------
 st.title("Chicago Parks in Motion: How Our City Plays")
+st.markdown("**Author:** Babz (Master’s Student, UIUC Information Management)")
 st.sidebar.header("Filters & Settings")
 # Activity categories
         return "Summer"
     return "Fall"
+if "start_date" in df.columns:
+    df["season"] = df["start_date"].map(season_from_date)
 else:
     df["season"] = "Unknown"
 seasons = sorted(df["season"].dropna().unique())
 # Keep a small sample message if huge
 st.sidebar.markdown(f"**Programs in current filter:** {len(filtered):,}")
 # -------------------------
 # Contextual Visualizations & Write-up
     st.subheader("Contextual Visual 1 — Activity category breakdown")
     # Overall counts by activity_type_clean
     cat_counts = df["activity_type_clean"].value_counts().reset_index()
+    cat_counts.columns = ["activity_type", "count"]
+    fig_cat = px.pie(cat_counts, names="activity_type", values="count", hole=0.35, height=300)
     st.plotly_chart(fig_cat, use_container_width=True)
     st.caption("Distribution of programs across activity categories (entire dataset).")
     # ---------------------------------------------
     # Contextual Visualization 2 — Programs by Season
     # ---------------------------------------------
+    st.header("Contextual Visual 2 — Programs by Season")
+    # Check that the dataset has a 'season' column or similar
+    # Your dataset typically has 'season' or 'season_name' — adjust if needed
     season_col = None
     for c in df.columns:
         if "season" in c.lower():
             season_col = c
             break
     if season_col is None:
         st.error("No season-related column found in dataset.")
     else:
         This provides temporal context about when most activities are offered.
         """)
     st.markdown("---")
     st.subheader("Data & Notebook")
     st.markdown("""