Spaces:

SarahXia0405
/

Location_Reco

Sleeping

App Files Files Community

Location_Reco / app.py

SarahXia0405

Upload 7 files

edb8491 verified 4 months ago

raw

history blame contribute delete

15.7 kB

	# app.py
	# ---------------------------------------------------------
	# Milk Tea & Dessert Location Recommendation – Streamlit App
	# ---------------------------------------------------------
	import streamlit as st
	import pandas as pd
	import numpy as np
	import plotly.express as px
	import plotly.graph_objects as go
	import folium
	from folium.plugins import HeatMap
	from streamlit_folium import st_folium

	# ----------------------
	# Config & File Paths
	# ----------------------
	MASTER_PATH = "final_master_clean.csv"
	YELP_PATH = "yelp_with_comm.csv"
	LP_RECO_PATH = "lincoln_park_recommendations.csv" # optional – ok if missing

	st.set_page_config(
	page_title="Milk Tea Location Intelligence",
	layout="wide",
	initial_sidebar_state="expanded",
	)

	# ----------------------
	# Helper utilities
	# ----------------------
	def load_csv(path, expected=True):
	try:
	return pd.read_csv(path)
	except FileNotFoundError:
	if expected:
	st.error(f"Could not find `{path}` in the current folder.")
	return None


	def pick_col(df, candidates):
	"""
	Return the first existing column in df from a list of candidates.
	Raises KeyError if none exist.
	"""
	for c in candidates:
	if c in df.columns:
	return c
	raise KeyError(f"None of these columns found: {candidates}")


	def ensure_numeric(df, cols):
	for c in cols:
	if c in df.columns:
	df[c] = pd.to_numeric(df[c], errors="coerce")
	return df


	# ----------------------
	# Load data
	# ----------------------
	df_master = load_csv(MASTER_PATH)
	df_yelp = load_csv(YELP_PATH)
	df_lp = load_csv(LP_RECO_PATH, expected=False)

	if df_master is None or df_yelp is None:
	st.stop()

	# Normalize column names a bit (strip spaces)
	df_master.columns = [c.strip().replace(" ", "_") for c in df_master.columns]
	df_yelp.columns = [c.strip().replace(" ", "_") for c in df_yelp.columns]
	if df_lp is not None:
	df_lp.columns = [c.strip().replace(" ", "_") for c in df_lp.columns]

	# Key columns (with flexible names)
	COMM_NAME_COL = pick_col(df_master, ["community_name", "Community_Name"])
	COMM_ID_COL = pick_col(df_master, ["community_id", "Community_ID"])

	TOTAL_POP_COL = pick_col(df_master, ["total_pop", "Total_Pop"])
	PCT_18_34_Z_COL = pick_col(df_master, ["pct_18_34_z", "Pct_18_34_Z"])
	PCT_ASIAN_Z_COL = pick_col(df_master, ["pct_asian_z", "Pct_Asian_Z"])
	HI_INC_SHARE_Z_COL = pick_col(df_master, ["high_income_share_z", "High_Income_Share_Z"])
	CTA_RIDES_Z_COL = pick_col(df_master, ["total_cta_rides_z", "Total_Cta_Rides_Z"])
	DIVVY_Z_COL = pick_col(df_master, ["num_divvy_stations_z", "Num_Divvy_Stations_Z"])
	CRIME_Z_COL = pick_col(df_master, ["crime_count_z", "Crime_Count_Z"])

	FINAL_SCORE_COL = pick_col(df_master, ["final_score_adj", "Final_Score_Adj"])

	# Optional competition columns
	NUM_COMP_COL = next((c for c in ["num_competitors", "Num_Competitors"] if c in df_master.columns), None)
	WHITE_SPACE_COL = next((c for c in ["final_score_white_space", "Final_Score_White_Space"] if c in df_master.columns), None)

	# Ensure numeric
	df_master = ensure_numeric(
	df_master,
	[
	TOTAL_POP_COL,
	PCT_18_34_Z_COL,
	PCT_ASIAN_Z_COL,
	HI_INC_SHARE_Z_COL,
	CTA_RIDES_Z_COL,
	DIVVY_Z_COL,
	CRIME_Z_COL,
	FINAL_SCORE_COL,
	NUM_COMP_COL,
	WHITE_SPACE_COL,
	],
	)

	# Yelp columns
	YELP_LAT_COL = pick_col(df_yelp, ["lat", "Latitude"])
	YELP_LON_COL = pick_col(df_yelp, ["lon", "Longitude"])
	YELP_NAME_COL = pick_col(df_yelp, ["name", "Name"])
	YELP_RATING_COL = pick_col(df_yelp, ["rating", "Rating"])
	YELP_REVIEWS_COL = pick_col(df_yelp, ["review_count", "Review_Count"])
	YELP_COMM_NAME_COL = pick_col(df_yelp, ["community_name", "Community_Name"])


	# ----------------------
	# Sidebar Navigation
	# ----------------------
	st.sidebar.title("Navigation")

	page = st.sidebar.radio(
	"Go to",
	[
	"Home",
	"Community Rankings",
	"Accessibility",
	"Competitive Landscape",
	"Lincoln Park Recommendations",
	"Radar Chart Explorer",
	],
	)

	# Filter for top candidate communities (optional)
	top_n = st.sidebar.slider(
	"Highlight top N communities by final score", min_value=3, max_value=20, value=10
	)

	top_communities = (
	df_master.sort_values(FINAL_SCORE_COL, ascending=False)
	.head(top_n)[COMM_NAME_COL]
	.tolist()
	)

	# ----------------------
	# PAGE 1 – HOME
	# ----------------------
	if page == "Home":
	st.title("Milk Tea & Dessert Location Intelligence – Chicago")

	st.markdown(
	"""
	This interactive app summarizes your full analysis pipeline:

	1. Market potential – demographics, target age (18–34), Asian population, income.
	2. Accessibility – CTA ridership and Divvy station density.
	3. Safety – lower relative crime vs other communities.
	4. Competition – existing bubble-tea & dessert shops from Yelp.
	5. Final recommendation – best community + street segments for a new store.

	Use the left sidebar to explore each section like a mini web application.
	"""
	)

	# Simple summary KPIs
	c1, c2, c3, c4 = st.columns(4)
	with c1:
	st.metric("Communities evaluated", df_master[COMM_ID_COL].nunique())
	with c2:
	st.metric("Competitor shops (Yelp)", len(df_yelp))
	with c3:
	st.metric("Top candidate communities", top_n)
	with c4:
	lincoln_score = float(
	df_master.loc[df_master[COMM_NAME_COL] == "LINCOLN PARK", FINAL_SCORE_COL].iloc[0]
	) if "LINCOLN PARK" in df_master[COMM_NAME_COL].values else np.nan
	st.metric("Lincoln Park final score", f"{lincoln_score:0.2f}" if not np.isnan(lincoln_score) else "N/A")

	# ----------------------
	# PAGE 2 – COMMUNITY RANKINGS
	# ----------------------
	elif page == "Community Rankings":
	st.title("Community Evaluation & Ranking")

	# Ranking barplot
	ranking_df = df_master.sort_values(FINAL_SCORE_COL, ascending=False)

	fig = px.bar(
	ranking_df,
	x=FINAL_SCORE_COL,
	y=COMM_NAME_COL,
	orientation="h",
	color=COMM_NAME_COL,
	color_discrete_sequence=px.colors.sequential.Viridis,
	title="Final Opportunity Score by Community",
	)
	fig.update_layout(showlegend=False, height=800)
	st.plotly_chart(fig, use_container_width=True)

	st.subheader("Target Population vs Asian Population")
	fig2 = px.scatter(
	df_master,
	x=PCT_18_34_Z_COL,
	y=PCT_ASIAN_Z_COL,
	size=TOTAL_POP_COL,
	color=COMM_NAME_COL,
	hover_name=COMM_NAME_COL,
	title="Z-scores: 18–34 Population vs Asian Population",
	)
	# Highlight top N
	fig2.update_traces(
	selector=lambda t: t.name in top_communities,
	marker=dict(line=dict(width=2, color="black")),
	)
	fig2.update_layout(showlegend=False)
	st.plotly_chart(fig2, use_container_width=True)

	# ----------------------
	# PAGE 3 – ACCESSIBILITY
	# ----------------------
	elif page == "Accessibility":
	st.title("Accessibility – CTA & Divvy")

	c1, c2 = st.columns(2)

	with c1:
	st.subheader("CTA Rides vs Divvy Stations")
	fig = px.scatter(
	df_master,
	x=CTA_RIDES_Z_COL,
	y=DIVVY_Z_COL,
	size=TOTAL_POP_COL,
	color=COMM_NAME_COL,
	hover_name=COMM_NAME_COL,
	labels={
	CTA_RIDES_Z_COL: "CTA Rides (Z)",
	DIVVY_Z_COL: "Divvy Stations (Z)",
	},
	)
	fig.update_layout(showlegend=False)
	st.plotly_chart(fig, use_container_width=True)

	with c2:
	st.subheader("Top Communities by CTA Rides")
	top_cta = df_master.sort_values(CTA_RIDES_Z_COL, ascending=False).head(15)
	fig_bar = px.bar(
	top_cta,
	x=CTA_RIDES_Z_COL,
	y=COMM_NAME_COL,
	orientation="h",
	color=COMM_NAME_COL,
	labels={CTA_RIDES_Z_COL: "CTA Rides (Z)"},
	)
	fig_bar.update_layout(showlegend=False, height=600)
	st.plotly_chart(fig_bar, use_container_width=True)

	st.markdown(
	"""
	Interpretation: Communities in the upper-right quadrant combine high transit access
	and dense micromobility (Divvy) – excellent for a foot-traffic-driven drink shop.
	"""
	)

	# ----------------------
	# PAGE 4 – COMPETITIVE LANDSCAPE
	# ----------------------
	elif page == "Competitive Landscape":
	st.title("Competitive Landscape – Bubble Tea & Dessert Shops")

	# Aggregate competition by community
	comp_by_comm = (
	df_yelp.groupby(YELP_COMM_NAME_COL)
	.agg(
	num_shops=(YELP_NAME_COL, "count"),
	avg_rating=(YELP_RATING_COL, "mean"),
	avg_reviews=(YELP_REVIEWS_COL, "mean"),
	)
	.reset_index()
	)

	st.subheader("Competition Heatmap by Community")
	fig = px.treemap(
	comp_by_comm,
	path=[YELP_COMM_NAME_COL],
	values="num_shops",
	color="avg_rating",
	color_continuous_scale="RdYlGn",
	color_continuous_midpoint=3.8,
	hover_data={"avg_reviews": True},
	)
	st.plotly_chart(fig, use_container_width=True)

	st.subheader("Competitors Map (All Communities)")
	m = folium.Map(location=[41.88, -87.63], zoom_start=11, tiles="cartodbpositron")

	# Color scale based on rating
	def rating_color(r):
	if pd.isna(r):
	return "gray"
	if r >= 4.5:
	return "green"
	if r >= 4.0:
	return "orange"
	return "red"

	for _, row in df_yelp.iterrows():
	lat = row[YELP_LAT_COL]
	lon = row[YELP_LON_COL]
	if pd.isna(lat) or pd.isna(lon):
	continue

	popup = f"{row[YELP_NAME_COL]}<br>Rating: {row[YELP_RATING_COL]} ({row[YELP_REVIEWS_COL]} reviews)"
	folium.CircleMarker(
	location=[lat, lon],
	radius=4,
	color=rating_color(row[YELP_RATING_COL]),
	fill=True,
	fill_opacity=0.8,
	popup=popup,
	).add_to(m)

	st_folium(m, width=1000, height=600)

	# Heatmap of competitor density
	st.subheader("Competitor Density Heatmap")
	hm_map = folium.Map(location=[41.88, -87.63], zoom_start=11, tiles="cartodbpositron")
	heat_data = df_yelp[[YELP_LAT_COL, YELP_LON_COL]].dropna().values.tolist()
	if heat_data:
	HeatMap(heat_data, radius=15, blur=10).add_to(hm_map)
	st_folium(hm_map, width=1000, height=600)

	# ----------------------
	# PAGE 5 – LINCOLN PARK RECOMMENDATIONS
	# ----------------------
	elif page == "Lincoln Park Recommendations":
	st.title("Recommended Location – Lincoln Park Focus")

	if df_lp is None:
	st.warning("`lincoln_park_recommendations.csv` not found. "
	"Add it to the folder to see street-level recommendations.")
	lp_mask = df_yelp[YELP_COMM_NAME_COL] == "LINCOLN PARK"
	df_lp_yelp = df_yelp[lp_mask].copy()

	st.subheader("Existing Competitors in Lincoln Park")
	st.dataframe(
	df_lp_yelp[[YELP_NAME_COL, YELP_RATING_COL, YELP_REVIEWS_COL]].sort_values(
	YELP_RATING_COL, ascending=False
	),
	use_container_width=True,
	)

	st.subheader("Lincoln Park – Competitors & Recommended Spots")

	m_lp = folium.Map(location=[41.92, -87.65], zoom_start=14, tiles="cartodbpositron")

	# Existing shops
	for _, row in df_lp_yelp.iterrows():
	folium.CircleMarker(
	location=[row[YELP_LAT_COL], row[YELP_LON_COL]],
	radius=5,
	color="red",
	fill=True,
	fill_opacity=0.8,
	popup=f"{row[YELP_NAME_COL]} (Rating {row[YELP_RATING_COL]})",
	).add_to(m_lp)

	# Recommended points (if file exists)
	if df_lp is not None:
	lat_col = pick_col(df_lp, ["lat", "Latitude"])
	lon_col = pick_col(df_lp, ["lon", "Longitude"])
	rank_col = next((c for c in ["rank", "priority_rank", "Priority_Rank"] if c in df_lp.columns), None)
	label_col = next((c for c in ["label", "location_name", "Location_Name"] if c in df_lp.columns), None)

	for _, row in df_lp.iterrows():
	popup = ""
	if label_col and not pd.isna(row[label_col]):
	popup += f"{row[label_col]}<br>"
	if rank_col and not pd.isna(row[rank_col]):
	popup += f"Priority Rank: {int(row[rank_col])}"
	folium.Marker(
	location=[row[lat_col], row[lon_col]],
	popup=popup or "Recommended Spot",
	icon=folium.Icon(color="orange", icon="star"),
	).add_to(m_lp)

	st_folium(m_lp, width=1000, height=600)

	# ----------------------
	# PAGE 6 – RADAR CHART EXPLORER
	# ----------------------
	elif page == "Radar Chart Explorer":
	st.title("Final Opportunity Radar – Community Comparison")

	metrics = {
	"Young Population (18–34, Z)": PCT_18_34_Z_COL,
	"Asian Population (Z)": PCT_ASIAN_Z_COL,
	"High Income Share (Z)": HI_INC_SHARE_Z_COL,
	"CTA Rides (Z)": CTA_RIDES_Z_COL,
	"Divvy Stations (Z)": DIVVY_Z_COL,
	"Crime Count (Z – lower is better)": CRIME_Z_COL,
	}

	# Select community
	communities = df_master[COMM_NAME_COL].sort_values().unique().tolist()
	default_comm = "LINCOLN PARK" if "LINCOLN PARK" in communities else communities[0]
	comm_choice = st.selectbox("Select Community", communities, index=communities.index(default_comm))

	# Build radar data
	row = df_master[df_master[COMM_NAME_COL] == comm_choice].iloc[0]
	values = [row[mcol] for mcol in metrics.values()]
	labels = list(metrics.keys())

	# Shift crime so that higher is better (multiply by -1)
	crime_index = labels.index("Crime Count (Z – lower is better)")
	values[crime_index] = -values[crime_index]

	# Close the loop
	values += values[:1]
	labels_closed = labels + [labels[0]]

	fig = go.Figure()
	fig.add_trace(
	go.Scatterpolar(
	r=values,
	theta=labels_closed,
	fill="toself",
	name=comm_choice,
	)
	)

	# Optionally overlay Lincoln Park for comparison
	if comm_choice != default_comm and default_comm in communities:
	lp_row = df_master[df_master[COMM_NAME_COL] == default_comm].iloc[0]
	lp_vals = [lp_row[mcol] for mcol in metrics.values()]
	lp_vals[crime_index] = -lp_vals[crime_index]
	lp_vals += lp_vals[:1]
	fig.add_trace(
	go.Scatterpolar(
	r=lp_vals,
	theta=labels_closed,
	fill="toself",
	name=default_comm,
	opacity=0.5,
	)
	)

	fig.update_layout(
	polar=dict(radialaxis=dict(visible=True)),
	showlegend=True,
	height=600,
	)

	st.plotly_chart(fig, use_container_width=True)

	st.markdown(
	"""
	How to read this radar chart:

	- Points farther from the center indicate better opportunity
	(more target customers, higher income, more transit & bikes, lower crime).
	- The red area that covers more surface implies a stronger overall case
	for opening a store in that community.
	"""
	)