Spaces:
Build error
Build error
File size: 7,401 Bytes
cfd7b0b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 |
import streamlit as st
import pandas as pd
import plotly.express as px
import geopandas as gpd
st.set_page_config(layout="wide", page_title="Singapore Housing Data Dashboard", page_icon=":house:")
@st.cache_data
def load_data():
# Use relative paths for deployment
df = pd.read_csv("Adjusted_Resale_Prices_2025_with_coords.csv")
gdf = gpd.read_file("planning_area_boundaries.geojson")
return df, gdf
try:
df, gdf = load_data()
except FileNotFoundError as e:
st.error(f"Data file not found: {e}")
st.stop()
st.title("π Singapore HDB Resale Price Dashboard")
st.markdown("---")
# Sidebar Filters
st.sidebar.header("π Filters")
# Check if required columns exist
required_columns = ["planning_area", "flat_type", "storey_range", "lease_commence_year", "resale_price"]
missing_columns = [col for col in required_columns if col not in df.columns]
if missing_columns:
st.error(f"Missing required columns: {missing_columns}")
st.write("Available columns:", list(df.columns))
st.stop()
all_planning_areas = sorted(df["planning_area"].unique())
selected_planning_area = st.sidebar.selectbox(
"Planning Area",
["All"] + all_planning_areas
)
all_flat_types = sorted(df["flat_type"].unique())
selected_flat_types = st.sidebar.multiselect(
"Flat Type",
all_flat_types,
default=all_flat_types
)
all_storey_ranges = sorted(df["storey_range"].unique())
selected_storey_ranges = st.sidebar.multiselect(
"Storey Range",
all_storey_ranges,
default=all_storey_ranges
)
min_year = int(df["lease_commence_year"].min())
max_year = int(df["lease_commence_year"].max())
selected_year_range = st.sidebar.slider(
"Lease Commencement Year",
min_year, max_year, (min_year, max_year)
)
# Apply filters
filtered_df = df[
(df["flat_type"].isin(selected_flat_types)) &
(df["storey_range"].isin(selected_storey_ranges)) &
(df["lease_commence_year"] >= selected_year_range[0]) &
(df["lease_commence_year"] <= selected_year_range[1])
]
if selected_planning_area != "All":
filtered_df = filtered_df[filtered_df["planning_area"] == selected_planning_area]
# Display filter summary
st.sidebar.markdown("---")
st.sidebar.write(f"**Records shown:** {len(filtered_df):,}")
st.sidebar.write(f"**Total records:** {len(df):,}")
# Main content
col1, col2 = st.columns([2, 1])
with col2:
if not filtered_df.empty:
avg_price = filtered_df["resale_price"].mean()
median_price = filtered_df["resale_price"].median()
max_price = filtered_df["resale_price"].max()
min_price = filtered_df["resale_price"].min()
st.metric("Average Price", f"${avg_price:,.0f}")
st.metric("Median Price", f"${median_price:,.0f}")
st.metric("Price Range", f"${min_price:,.0f} - ${max_price:,.0f}")
with col1:
st.header("π Key Statistics")
# Choropleth Map
st.header("πΊοΈ Average Resale Price by Planning Area")
if not filtered_df.empty:
avg_price_by_planning_area = filtered_df.groupby("planning_area")["resale_price"].mean().reset_index()
# Try to merge with GeoDataFrame
try:
# Check if the GeoDataFrame has the expected column
if "PLN_AREA_N" in gdf.columns:
gdf_merged = gdf.merge(avg_price_by_planning_area, left_on="PLN_AREA_N", right_on="planning_area", how="left")
else:
# Try other common column names
possible_columns = [col for col in gdf.columns if "area" in col.lower() or "name" in col.lower()]
if possible_columns:
gdf_merged = gdf.merge(avg_price_by_planning_area, left_on=possible_columns[0], right_on="planning_area", how="left")
else:
st.error("Could not find matching column in GeoJSON for planning areas")
st.write("GeoJSON columns:", list(gdf.columns))
gdf_merged = None
if gdf_merged is not None:
fig_map = px.choropleth_mapbox(
gdf_merged,
geojson=gdf_merged.geometry,
locations=gdf_merged.index,
color="resale_price",
color_continuous_scale="Viridis",
mapbox_style="carto-positron",
zoom=9.5,
center={"lat": 1.3521, "lon": 103.8198},
opacity=0.7,
labels={
"resale_price": "Avg Resale Price (SGD)",
},
hover_name=gdf_merged.columns[0] if "PLN_AREA_N" not in gdf_merged.columns else "PLN_AREA_N",
hover_data={
"resale_price": ":$,.0f",
}
)
fig_map.update_layout(margin={"r":0,"t":0,"l":0,"b":0}, height=500)
st.plotly_chart(fig_map, use_container_width=True)
else:
st.warning("Could not create choropleth map due to data structure mismatch.")
except Exception as e:
st.error(f"Error creating map: {e}")
st.write("Showing data table instead:")
st.dataframe(avg_price_by_planning_area)
else:
st.warning("No data to display for the selected filters on the map.")
# Line Chart
st.header("π Resale Price Trends Over Lease Commencement Year")
if not filtered_df.empty:
avg_price_by_year = filtered_df.groupby("lease_commence_year")["resale_price"].mean().reset_index()
fig_line = px.line(
avg_price_by_year,
x="lease_commence_year",
y="resale_price",
title="Average Resale Price by Lease Commencement Year",
labels={
"lease_commence_year": "Lease Commencement Year",
"resale_price": "Average Resale Price (SGD)"
},
markers=True
)
fig_line.update_traces(mode="lines+markers", line=dict(width=3))
fig_line.update_layout(
hovermode="x unified",
height=400,
xaxis_title="Lease Commencement Year",
yaxis_title="Average Resale Price (SGD)"
)
st.plotly_chart(fig_line, use_container_width=True)
else:
st.warning("No data to display for the selected filters on the line chart.")
# Additional Charts
if not filtered_df.empty:
col1, col2 = st.columns(2)
with col1:
st.subheader("π Price Distribution by Flat Type")
fig_box = px.box(
filtered_df,
x="flat_type",
y="resale_price",
title="Price Distribution by Flat Type"
)
fig_box.update_layout(height=400)
st.plotly_chart(fig_box, use_container_width=True)
with col2:
st.subheader("π’ Average Price by Storey Range")
avg_by_storey = filtered_df.groupby("storey_range")["resale_price"].mean().reset_index()
fig_bar = px.bar(
avg_by_storey,
x="storey_range",
y="resale_price",
title="Average Price by Storey Range"
)
fig_bar.update_layout(height=400)
st.plotly_chart(fig_bar, use_container_width=True)
# Toggle for Data Table
st.header("π Filtered Data Table")
show_data_table = st.checkbox("Show filtered data table")
if show_data_table:
if not filtered_df.empty:
st.dataframe(filtered_df, use_container_width=True)
else:
st.info("No data to display in the table for the selected filters.")
# Footer
st.markdown("---")
st.markdown("*Data source: Singapore HDB Resale Prices*")
|