Spaces:
Sleeping
Sleeping
| # app.py | |
| # --------------------------------------------------------- | |
| # Milk Tea & Dessert Location Recommendation β Streamlit App | |
| # --------------------------------------------------------- | |
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| import plotly.express as px | |
| import plotly.graph_objects as go | |
| import folium | |
| from folium.plugins import HeatMap | |
| from streamlit_folium import st_folium | |
| # ---------------------- | |
| # Config & File Paths | |
| # ---------------------- | |
| MASTER_PATH = "final_master_clean.csv" | |
| YELP_PATH = "yelp_with_comm.csv" | |
| LP_RECO_PATH = "lincoln_park_recommendations.csv" # optional β ok if missing | |
| st.set_page_config( | |
| page_title="Milk Tea Location Intelligence", | |
| layout="wide", | |
| initial_sidebar_state="expanded", | |
| ) | |
| # ---------------------- | |
| # Helper utilities | |
| # ---------------------- | |
| def load_csv(path, expected=True): | |
| try: | |
| return pd.read_csv(path) | |
| except FileNotFoundError: | |
| if expected: | |
| st.error(f"Could not find `{path}` in the current folder.") | |
| return None | |
| def pick_col(df, candidates): | |
| """ | |
| Return the first existing column in df from a list of candidates. | |
| Raises KeyError if none exist. | |
| """ | |
| for c in candidates: | |
| if c in df.columns: | |
| return c | |
| raise KeyError(f"None of these columns found: {candidates}") | |
| def ensure_numeric(df, cols): | |
| for c in cols: | |
| if c in df.columns: | |
| df[c] = pd.to_numeric(df[c], errors="coerce") | |
| return df | |
| # ---------------------- | |
| # Load data | |
| # ---------------------- | |
| df_master = load_csv(MASTER_PATH) | |
| df_yelp = load_csv(YELP_PATH) | |
| df_lp = load_csv(LP_RECO_PATH, expected=False) | |
| if df_master is None or df_yelp is None: | |
| st.stop() | |
| # Normalize column names a bit (strip spaces) | |
| df_master.columns = [c.strip().replace(" ", "_") for c in df_master.columns] | |
| df_yelp.columns = [c.strip().replace(" ", "_") for c in df_yelp.columns] | |
| if df_lp is not None: | |
| df_lp.columns = [c.strip().replace(" ", "_") for c in df_lp.columns] | |
| # Key columns (with flexible names) | |
| COMM_NAME_COL = pick_col(df_master, ["community_name", "Community_Name"]) | |
| COMM_ID_COL = pick_col(df_master, ["community_id", "Community_ID"]) | |
| TOTAL_POP_COL = pick_col(df_master, ["total_pop", "Total_Pop"]) | |
| PCT_18_34_Z_COL = pick_col(df_master, ["pct_18_34_z", "Pct_18_34_Z"]) | |
| PCT_ASIAN_Z_COL = pick_col(df_master, ["pct_asian_z", "Pct_Asian_Z"]) | |
| HI_INC_SHARE_Z_COL = pick_col(df_master, ["high_income_share_z", "High_Income_Share_Z"]) | |
| CTA_RIDES_Z_COL = pick_col(df_master, ["total_cta_rides_z", "Total_Cta_Rides_Z"]) | |
| DIVVY_Z_COL = pick_col(df_master, ["num_divvy_stations_z", "Num_Divvy_Stations_Z"]) | |
| CRIME_Z_COL = pick_col(df_master, ["crime_count_z", "Crime_Count_Z"]) | |
| FINAL_SCORE_COL = pick_col(df_master, ["final_score_adj", "Final_Score_Adj"]) | |
| # Optional competition columns | |
| NUM_COMP_COL = next((c for c in ["num_competitors", "Num_Competitors"] if c in df_master.columns), None) | |
| WHITE_SPACE_COL = next((c for c in ["final_score_white_space", "Final_Score_White_Space"] if c in df_master.columns), None) | |
| # Ensure numeric | |
| df_master = ensure_numeric( | |
| df_master, | |
| [ | |
| TOTAL_POP_COL, | |
| PCT_18_34_Z_COL, | |
| PCT_ASIAN_Z_COL, | |
| HI_INC_SHARE_Z_COL, | |
| CTA_RIDES_Z_COL, | |
| DIVVY_Z_COL, | |
| CRIME_Z_COL, | |
| FINAL_SCORE_COL, | |
| NUM_COMP_COL, | |
| WHITE_SPACE_COL, | |
| ], | |
| ) | |
| # Yelp columns | |
| YELP_LAT_COL = pick_col(df_yelp, ["lat", "Latitude"]) | |
| YELP_LON_COL = pick_col(df_yelp, ["lon", "Longitude"]) | |
| YELP_NAME_COL = pick_col(df_yelp, ["name", "Name"]) | |
| YELP_RATING_COL = pick_col(df_yelp, ["rating", "Rating"]) | |
| YELP_REVIEWS_COL = pick_col(df_yelp, ["review_count", "Review_Count"]) | |
| YELP_COMM_NAME_COL = pick_col(df_yelp, ["community_name", "Community_Name"]) | |
| # ---------------------- | |
| # Sidebar Navigation | |
| # ---------------------- | |
| st.sidebar.title("Navigation") | |
| page = st.sidebar.radio( | |
| "Go to", | |
| [ | |
| "Home", | |
| "Community Rankings", | |
| "Accessibility", | |
| "Competitive Landscape", | |
| "Lincoln Park Recommendations", | |
| "Radar Chart Explorer", | |
| ], | |
| ) | |
| # Filter for top candidate communities (optional) | |
| top_n = st.sidebar.slider( | |
| "Highlight top N communities by final score", min_value=3, max_value=20, value=10 | |
| ) | |
| top_communities = ( | |
| df_master.sort_values(FINAL_SCORE_COL, ascending=False) | |
| .head(top_n)[COMM_NAME_COL] | |
| .tolist() | |
| ) | |
| # ---------------------- | |
| # PAGE 1 β HOME | |
| # ---------------------- | |
| if page == "Home": | |
| st.title("Milk Tea & Dessert Location Intelligence β Chicago") | |
| st.markdown( | |
| """ | |
| This interactive app summarizes your full analysis pipeline: | |
| 1. **Market potential** β demographics, target age (18β34), Asian population, income. | |
| 2. **Accessibility** β CTA ridership and Divvy station density. | |
| 3. **Safety** β lower relative crime vs other communities. | |
| 4. **Competition** β existing bubble-tea & dessert shops from Yelp. | |
| 5. **Final recommendation** β best **community + street segments** for a new store. | |
| Use the left sidebar to explore each section like a mini web application. | |
| """ | |
| ) | |
| # Simple summary KPIs | |
| c1, c2, c3, c4 = st.columns(4) | |
| with c1: | |
| st.metric("Communities evaluated", df_master[COMM_ID_COL].nunique()) | |
| with c2: | |
| st.metric("Competitor shops (Yelp)", len(df_yelp)) | |
| with c3: | |
| st.metric("Top candidate communities", top_n) | |
| with c4: | |
| lincoln_score = float( | |
| df_master.loc[df_master[COMM_NAME_COL] == "LINCOLN PARK", FINAL_SCORE_COL].iloc[0] | |
| ) if "LINCOLN PARK" in df_master[COMM_NAME_COL].values else np.nan | |
| st.metric("Lincoln Park final score", f"{lincoln_score:0.2f}" if not np.isnan(lincoln_score) else "N/A") | |
| # ---------------------- | |
| # PAGE 2 β COMMUNITY RANKINGS | |
| # ---------------------- | |
| elif page == "Community Rankings": | |
| st.title("Community Evaluation & Ranking") | |
| # Ranking barplot | |
| ranking_df = df_master.sort_values(FINAL_SCORE_COL, ascending=False) | |
| fig = px.bar( | |
| ranking_df, | |
| x=FINAL_SCORE_COL, | |
| y=COMM_NAME_COL, | |
| orientation="h", | |
| color=COMM_NAME_COL, | |
| color_discrete_sequence=px.colors.sequential.Viridis, | |
| title="Final Opportunity Score by Community", | |
| ) | |
| fig.update_layout(showlegend=False, height=800) | |
| st.plotly_chart(fig, use_container_width=True) | |
| st.subheader("Target Population vs Asian Population") | |
| fig2 = px.scatter( | |
| df_master, | |
| x=PCT_18_34_Z_COL, | |
| y=PCT_ASIAN_Z_COL, | |
| size=TOTAL_POP_COL, | |
| color=COMM_NAME_COL, | |
| hover_name=COMM_NAME_COL, | |
| title="Z-scores: 18β34 Population vs Asian Population", | |
| ) | |
| # Highlight top N | |
| fig2.update_traces( | |
| selector=lambda t: t.name in top_communities, | |
| marker=dict(line=dict(width=2, color="black")), | |
| ) | |
| fig2.update_layout(showlegend=False) | |
| st.plotly_chart(fig2, use_container_width=True) | |
| # ---------------------- | |
| # PAGE 3 β ACCESSIBILITY | |
| # ---------------------- | |
| elif page == "Accessibility": | |
| st.title("Accessibility β CTA & Divvy") | |
| c1, c2 = st.columns(2) | |
| with c1: | |
| st.subheader("CTA Rides vs Divvy Stations") | |
| fig = px.scatter( | |
| df_master, | |
| x=CTA_RIDES_Z_COL, | |
| y=DIVVY_Z_COL, | |
| size=TOTAL_POP_COL, | |
| color=COMM_NAME_COL, | |
| hover_name=COMM_NAME_COL, | |
| labels={ | |
| CTA_RIDES_Z_COL: "CTA Rides (Z)", | |
| DIVVY_Z_COL: "Divvy Stations (Z)", | |
| }, | |
| ) | |
| fig.update_layout(showlegend=False) | |
| st.plotly_chart(fig, use_container_width=True) | |
| with c2: | |
| st.subheader("Top Communities by CTA Rides") | |
| top_cta = df_master.sort_values(CTA_RIDES_Z_COL, ascending=False).head(15) | |
| fig_bar = px.bar( | |
| top_cta, | |
| x=CTA_RIDES_Z_COL, | |
| y=COMM_NAME_COL, | |
| orientation="h", | |
| color=COMM_NAME_COL, | |
| labels={CTA_RIDES_Z_COL: "CTA Rides (Z)"}, | |
| ) | |
| fig_bar.update_layout(showlegend=False, height=600) | |
| st.plotly_chart(fig_bar, use_container_width=True) | |
| st.markdown( | |
| """ | |
| **Interpretation**: Communities in the upper-right quadrant combine **high transit access** | |
| and **dense micromobility (Divvy)** β excellent for a foot-traffic-driven drink shop. | |
| """ | |
| ) | |
| # ---------------------- | |
| # PAGE 4 β COMPETITIVE LANDSCAPE | |
| # ---------------------- | |
| elif page == "Competitive Landscape": | |
| st.title("Competitive Landscape β Bubble Tea & Dessert Shops") | |
| # Aggregate competition by community | |
| comp_by_comm = ( | |
| df_yelp.groupby(YELP_COMM_NAME_COL) | |
| .agg( | |
| num_shops=(YELP_NAME_COL, "count"), | |
| avg_rating=(YELP_RATING_COL, "mean"), | |
| avg_reviews=(YELP_REVIEWS_COL, "mean"), | |
| ) | |
| .reset_index() | |
| ) | |
| st.subheader("Competition Heatmap by Community") | |
| fig = px.treemap( | |
| comp_by_comm, | |
| path=[YELP_COMM_NAME_COL], | |
| values="num_shops", | |
| color="avg_rating", | |
| color_continuous_scale="RdYlGn", | |
| color_continuous_midpoint=3.8, | |
| hover_data={"avg_reviews": True}, | |
| ) | |
| st.plotly_chart(fig, use_container_width=True) | |
| st.subheader("Competitors Map (All Communities)") | |
| m = folium.Map(location=[41.88, -87.63], zoom_start=11, tiles="cartodbpositron") | |
| # Color scale based on rating | |
| def rating_color(r): | |
| if pd.isna(r): | |
| return "gray" | |
| if r >= 4.5: | |
| return "green" | |
| if r >= 4.0: | |
| return "orange" | |
| return "red" | |
| for _, row in df_yelp.iterrows(): | |
| lat = row[YELP_LAT_COL] | |
| lon = row[YELP_LON_COL] | |
| if pd.isna(lat) or pd.isna(lon): | |
| continue | |
| popup = f"{row[YELP_NAME_COL]}<br>Rating: {row[YELP_RATING_COL]} ({row[YELP_REVIEWS_COL]} reviews)" | |
| folium.CircleMarker( | |
| location=[lat, lon], | |
| radius=4, | |
| color=rating_color(row[YELP_RATING_COL]), | |
| fill=True, | |
| fill_opacity=0.8, | |
| popup=popup, | |
| ).add_to(m) | |
| st_folium(m, width=1000, height=600) | |
| # Heatmap of competitor density | |
| st.subheader("Competitor Density Heatmap") | |
| hm_map = folium.Map(location=[41.88, -87.63], zoom_start=11, tiles="cartodbpositron") | |
| heat_data = df_yelp[[YELP_LAT_COL, YELP_LON_COL]].dropna().values.tolist() | |
| if heat_data: | |
| HeatMap(heat_data, radius=15, blur=10).add_to(hm_map) | |
| st_folium(hm_map, width=1000, height=600) | |
| # ---------------------- | |
| # PAGE 5 β LINCOLN PARK RECOMMENDATIONS | |
| # ---------------------- | |
| elif page == "Lincoln Park Recommendations": | |
| st.title("Recommended Location β Lincoln Park Focus") | |
| if df_lp is None: | |
| st.warning("`lincoln_park_recommendations.csv` not found. " | |
| "Add it to the folder to see street-level recommendations.") | |
| lp_mask = df_yelp[YELP_COMM_NAME_COL] == "LINCOLN PARK" | |
| df_lp_yelp = df_yelp[lp_mask].copy() | |
| st.subheader("Existing Competitors in Lincoln Park") | |
| st.dataframe( | |
| df_lp_yelp[[YELP_NAME_COL, YELP_RATING_COL, YELP_REVIEWS_COL]].sort_values( | |
| YELP_RATING_COL, ascending=False | |
| ), | |
| use_container_width=True, | |
| ) | |
| st.subheader("Lincoln Park β Competitors & Recommended Spots") | |
| m_lp = folium.Map(location=[41.92, -87.65], zoom_start=14, tiles="cartodbpositron") | |
| # Existing shops | |
| for _, row in df_lp_yelp.iterrows(): | |
| folium.CircleMarker( | |
| location=[row[YELP_LAT_COL], row[YELP_LON_COL]], | |
| radius=5, | |
| color="red", | |
| fill=True, | |
| fill_opacity=0.8, | |
| popup=f"{row[YELP_NAME_COL]} (Rating {row[YELP_RATING_COL]})", | |
| ).add_to(m_lp) | |
| # Recommended points (if file exists) | |
| if df_lp is not None: | |
| lat_col = pick_col(df_lp, ["lat", "Latitude"]) | |
| lon_col = pick_col(df_lp, ["lon", "Longitude"]) | |
| rank_col = next((c for c in ["rank", "priority_rank", "Priority_Rank"] if c in df_lp.columns), None) | |
| label_col = next((c for c in ["label", "location_name", "Location_Name"] if c in df_lp.columns), None) | |
| for _, row in df_lp.iterrows(): | |
| popup = "" | |
| if label_col and not pd.isna(row[label_col]): | |
| popup += f"{row[label_col]}<br>" | |
| if rank_col and not pd.isna(row[rank_col]): | |
| popup += f"Priority Rank: {int(row[rank_col])}" | |
| folium.Marker( | |
| location=[row[lat_col], row[lon_col]], | |
| popup=popup or "Recommended Spot", | |
| icon=folium.Icon(color="orange", icon="star"), | |
| ).add_to(m_lp) | |
| st_folium(m_lp, width=1000, height=600) | |
| # ---------------------- | |
| # PAGE 6 β RADAR CHART EXPLORER | |
| # ---------------------- | |
| elif page == "Radar Chart Explorer": | |
| st.title("Final Opportunity Radar β Community Comparison") | |
| metrics = { | |
| "Young Population (18β34, Z)": PCT_18_34_Z_COL, | |
| "Asian Population (Z)": PCT_ASIAN_Z_COL, | |
| "High Income Share (Z)": HI_INC_SHARE_Z_COL, | |
| "CTA Rides (Z)": CTA_RIDES_Z_COL, | |
| "Divvy Stations (Z)": DIVVY_Z_COL, | |
| "Crime Count (Z β lower is better)": CRIME_Z_COL, | |
| } | |
| # Select community | |
| communities = df_master[COMM_NAME_COL].sort_values().unique().tolist() | |
| default_comm = "LINCOLN PARK" if "LINCOLN PARK" in communities else communities[0] | |
| comm_choice = st.selectbox("Select Community", communities, index=communities.index(default_comm)) | |
| # Build radar data | |
| row = df_master[df_master[COMM_NAME_COL] == comm_choice].iloc[0] | |
| values = [row[mcol] for mcol in metrics.values()] | |
| labels = list(metrics.keys()) | |
| # Shift crime so that higher is better (multiply by -1) | |
| crime_index = labels.index("Crime Count (Z β lower is better)") | |
| values[crime_index] = -values[crime_index] | |
| # Close the loop | |
| values += values[:1] | |
| labels_closed = labels + [labels[0]] | |
| fig = go.Figure() | |
| fig.add_trace( | |
| go.Scatterpolar( | |
| r=values, | |
| theta=labels_closed, | |
| fill="toself", | |
| name=comm_choice, | |
| ) | |
| ) | |
| # Optionally overlay Lincoln Park for comparison | |
| if comm_choice != default_comm and default_comm in communities: | |
| lp_row = df_master[df_master[COMM_NAME_COL] == default_comm].iloc[0] | |
| lp_vals = [lp_row[mcol] for mcol in metrics.values()] | |
| lp_vals[crime_index] = -lp_vals[crime_index] | |
| lp_vals += lp_vals[:1] | |
| fig.add_trace( | |
| go.Scatterpolar( | |
| r=lp_vals, | |
| theta=labels_closed, | |
| fill="toself", | |
| name=default_comm, | |
| opacity=0.5, | |
| ) | |
| ) | |
| fig.update_layout( | |
| polar=dict(radialaxis=dict(visible=True)), | |
| showlegend=True, | |
| height=600, | |
| ) | |
| st.plotly_chart(fig, use_container_width=True) | |
| st.markdown( | |
| """ | |
| **How to read this radar chart:** | |
| - Points farther from the center indicate **better opportunity** | |
| (more target customers, higher income, more transit & bikes, *lower* crime). | |
| - The **red area** that covers more surface implies a stronger overall case | |
| for opening a store in that community. | |
| """ | |
| ) | |