HDB_ML / strealit_app.py
aunghlaing's picture
rename
4c36ac9 verified
import streamlit as st
import pandas as pd
import plotly.express as px
import geopandas as gpd
st.set_page_config(layout="wide", page_title="Singapore Housing Data Dashboard", page_icon=":house:")
@st.cache_data
def load_data():
# Use relative paths for deployment
df = pd.read_csv("Adjusted_Resale_Prices_2025_with_coords.csv")
gdf = gpd.read_file("planning_area_boundaries.geojson")
return df, gdf
try:
df, gdf = load_data()
except FileNotFoundError as e:
st.error(f"Data file not found: {e}")
st.stop()
st.title("🏠 Singapore HDB Resale Price Dashboard")
st.markdown("---")
# Sidebar Filters
st.sidebar.header("πŸ” Filters")
# Check if required columns exist
required_columns = ["planning_area", "flat_type", "storey_range", "lease_commence_year", "resale_price"]
missing_columns = [col for col in required_columns if col not in df.columns]
if missing_columns:
st.error(f"Missing required columns: {missing_columns}")
st.write("Available columns:", list(df.columns))
st.stop()
all_planning_areas = sorted(df["planning_area"].unique())
selected_planning_area = st.sidebar.selectbox(
"Planning Area",
["All"] + all_planning_areas
)
all_flat_types = sorted(df["flat_type"].unique())
selected_flat_types = st.sidebar.multiselect(
"Flat Type",
all_flat_types,
default=all_flat_types
)
all_storey_ranges = sorted(df["storey_range"].unique())
selected_storey_ranges = st.sidebar.multiselect(
"Storey Range",
all_storey_ranges,
default=all_storey_ranges
)
min_year = int(df["lease_commence_year"].min())
max_year = int(df["lease_commence_year"].max())
selected_year_range = st.sidebar.slider(
"Lease Commencement Year",
min_year, max_year, (min_year, max_year)
)
# Apply filters
filtered_df = df[
(df["flat_type"].isin(selected_flat_types)) &
(df["storey_range"].isin(selected_storey_ranges)) &
(df["lease_commence_year"] >= selected_year_range[0]) &
(df["lease_commence_year"] <= selected_year_range[1])
]
if selected_planning_area != "All":
filtered_df = filtered_df[filtered_df["planning_area"] == selected_planning_area]
# Display filter summary
st.sidebar.markdown("---")
st.sidebar.write(f"**Records shown:** {len(filtered_df):,}")
st.sidebar.write(f"**Total records:** {len(df):,}")
# Main content
col1, col2 = st.columns([2, 1])
with col2:
if not filtered_df.empty:
avg_price = filtered_df["resale_price"].mean()
median_price = filtered_df["resale_price"].median()
max_price = filtered_df["resale_price"].max()
min_price = filtered_df["resale_price"].min()
st.metric("Average Price", f"${avg_price:,.0f}")
st.metric("Median Price", f"${median_price:,.0f}")
st.metric("Price Range", f"${min_price:,.0f} - ${max_price:,.0f}")
with col1:
st.header("πŸ“Š Key Statistics")
# Choropleth Map
st.header("πŸ—ΊοΈ Average Resale Price by Planning Area")
if not filtered_df.empty:
avg_price_by_planning_area = filtered_df.groupby("planning_area")["resale_price"].mean().reset_index()
# Try to merge with GeoDataFrame
try:
# Check if the GeoDataFrame has the expected column
if "PLN_AREA_N" in gdf.columns:
gdf_merged = gdf.merge(avg_price_by_planning_area, left_on="PLN_AREA_N", right_on="planning_area", how="left")
else:
# Try other common column names
possible_columns = [col for col in gdf.columns if "area" in col.lower() or "name" in col.lower()]
if possible_columns:
gdf_merged = gdf.merge(avg_price_by_planning_area, left_on=possible_columns[0], right_on="planning_area", how="left")
else:
st.error("Could not find matching column in GeoJSON for planning areas")
st.write("GeoJSON columns:", list(gdf.columns))
gdf_merged = None
if gdf_merged is not None:
fig_map = px.choropleth_mapbox(
gdf_merged,
geojson=gdf_merged.geometry,
locations=gdf_merged.index,
color="resale_price",
color_continuous_scale="Viridis",
mapbox_style="carto-positron",
zoom=9.5,
center={"lat": 1.3521, "lon": 103.8198},
opacity=0.7,
labels={
"resale_price": "Avg Resale Price (SGD)",
},
hover_name=gdf_merged.columns[0] if "PLN_AREA_N" not in gdf_merged.columns else "PLN_AREA_N",
hover_data={
"resale_price": ":$,.0f",
}
)
fig_map.update_layout(margin={"r":0,"t":0,"l":0,"b":0}, height=500)
st.plotly_chart(fig_map, use_container_width=True)
else:
st.warning("Could not create choropleth map due to data structure mismatch.")
except Exception as e:
st.error(f"Error creating map: {e}")
st.write("Showing data table instead:")
st.dataframe(avg_price_by_planning_area)
else:
st.warning("No data to display for the selected filters on the map.")
# Line Chart
st.header("πŸ“ˆ Resale Price Trends Over Lease Commencement Year")
if not filtered_df.empty:
avg_price_by_year = filtered_df.groupby("lease_commence_year")["resale_price"].mean().reset_index()
fig_line = px.line(
avg_price_by_year,
x="lease_commence_year",
y="resale_price",
title="Average Resale Price by Lease Commencement Year",
labels={
"lease_commence_year": "Lease Commencement Year",
"resale_price": "Average Resale Price (SGD)"
},
markers=True
)
fig_line.update_traces(mode="lines+markers", line=dict(width=3))
fig_line.update_layout(
hovermode="x unified",
height=400,
xaxis_title="Lease Commencement Year",
yaxis_title="Average Resale Price (SGD)"
)
st.plotly_chart(fig_line, use_container_width=True)
else:
st.warning("No data to display for the selected filters on the line chart.")
# Additional Charts
if not filtered_df.empty:
col1, col2 = st.columns(2)
with col1:
st.subheader("πŸ“Š Price Distribution by Flat Type")
fig_box = px.box(
filtered_df,
x="flat_type",
y="resale_price",
title="Price Distribution by Flat Type"
)
fig_box.update_layout(height=400)
st.plotly_chart(fig_box, use_container_width=True)
with col2:
st.subheader("🏒 Average Price by Storey Range")
avg_by_storey = filtered_df.groupby("storey_range")["resale_price"].mean().reset_index()
fig_bar = px.bar(
avg_by_storey,
x="storey_range",
y="resale_price",
title="Average Price by Storey Range"
)
fig_bar.update_layout(height=400)
st.plotly_chart(fig_bar, use_container_width=True)
# Toggle for Data Table
st.header("πŸ“‹ Filtered Data Table")
show_data_table = st.checkbox("Show filtered data table")
if show_data_table:
if not filtered_df.empty:
st.dataframe(filtered_df, use_container_width=True)
else:
st.info("No data to display in the table for the selected filters.")
# Footer
st.markdown("---")
st.markdown("*Data source: Singapore HDB Resale Prices*")