Spaces:
Sleeping
Sleeping
File size: 11,206 Bytes
76993b7 58407db 6242ca0 76993b7 d4e9419 76993b7 d4e9419 58407db b4c1216 d4e9419 76993b7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 | # -*- coding: utf-8 -*-
import pandas as pd
import numpy as np
import gradio as gr
import joblib
from sklearn.preprocessing import LabelEncoder
# Load the trained XGBoost model
try:
model = joblib.load('best_model_xgboost.joblib')
print("XGBoost model loaded successfully!")
except FileNotFoundError:
print("Warning: best_model_xgboost.joblib not found. Using fallback model.")
model = None
except Exception as e:
print(f"Error loading model: {e}")
model = None
# Sample data generation (for market insights when model is available)
def generate_sample_data():
np.random.seed(42)
n_samples = 1000
towns = ['ANG MO KIO', 'BEDOK', 'BISHAN', 'BUKIT BATOK', 'BUKIT MERAH', 'BUKIT PANJANG', 'BUKIT TIMAH', 'CENTRAL AREA', 'CHOA CHU KANG', 'CLEMENTI', 'GEYLANG', 'HOUGANG', 'JURONG EAST', 'JURONG WEST', 'KALLANG/WHAMPOA', 'MARINE PARADE', 'PASIR RIS', 'PUNGGOL', 'QUEENSTOWN', 'SEMBAWANG', 'SENGKANG', 'SERANGOON', 'TAMPINES', 'TOA PAYOH', 'WOODLANDS', 'YISHUN' ]
flat_types = ['2 ROOM', '3 ROOM', '4 ROOM', '5 ROOM', 'EXECUTIVE', 'MULTI-GENERATION']
flat_models = ['2 ROOM', '3Gen', 'Adjoined flat', 'Apartment', 'DBSS', 'Improved', 'Improved-Maisonette', 'Maisonette', 'Model A', 'Model A-Maisonette', 'Model-A2', 'MULTI-GENERATION', 'New Generation', 'Premium Apartment', 'Premium Apartment Loft', 'Simplified', 'Standard', 'Type S1', 'Type S2']
data = {
'town': np.random.choice(towns, n_samples),
'flat_type': np.random.choice(flat_types, n_samples),
'flat_model': np.random.choice(flat_models, n_samples),
'floor_area_sqm': np.random.uniform(60, 150, n_samples),
'storey_level': np.random.randint(1, 25, n_samples),
'flat_age': np.random.randint(0, 50, n_samples),
'resale_price': np.random.uniform(200000, 800000, n_samples)
}
return pd.DataFrame(data)
# Load or create sample data
data = generate_sample_data()
# Create encoders for categorical variables (should match training data)
towns_list = sorted(data['town'].unique().tolist())
flat_types = sorted(data['flat_type'].unique().tolist())
flat_models = sorted(data['flat_model'].unique().tolist())
# Create label encoders (these should match what was used during training)
town_encoder = LabelEncoder()
flat_type_encoder = LabelEncoder()
flat_model_encoder = LabelEncoder()
# Fit encoders with the categories
town_encoder.fit(towns_list)
flat_type_encoder.fit(flat_types)
flat_model_encoder.fit(flat_models)
def simple_xgboost_emulation(input_data):
"""Fallback function if the model is not available"""
weights = {
'floor_area_sqm': 5200,
'storey_level': 1800,
'flat_age': -2800,
'remaining_lease': 1200,
'town_factor': 9500,
'flat_type_factor': 14500,
'flat_model_factor': 8500,
'base_price': 220000,
'interaction_factor': 500
}
# Calculate factors
town_factor = towns_list.index(input_data['town']) * weights['town_factor']
flat_type_factor = flat_types.index(input_data['flat_type']) * weights['flat_type_factor']
flat_model_factor = flat_models.index(input_data['flat_model']) * weights['flat_model_factor']
# Simulate tree interactions
interaction = (input_data['floor_area_sqm'] * input_data['storey_level']) / 100 * weights['interaction_factor']
# Calculate price
price = (weights['base_price'] +
input_data['floor_area_sqm'] * weights['floor_area_sqm'] +
input_data['storey_level'] * weights['storey_level'] +
input_data['flat_age'] * weights['flat_age'] +
input_data['remaining_lease'] * weights['remaining_lease'] +
town_factor + flat_type_factor + flat_model_factor + interaction)
return max(price, 100000)
def preprocess_input(town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age):
"""Preprocess user input into a format suitable for the model."""
input_data = {
'town': town,
'flat_type': flat_type,
'flat_model': flat_model,
'floor_area_sqm': float(floor_area_sqm),
'storey_level': int(storey_level),
'flat_age': int(flat_age),
'remaining_lease': 99 - int(flat_age)
}
return input_data
def prepare_features_for_model(input_data):
"""Prepare features in the exact format expected by the trained model"""
# Create a DataFrame with the same structure as during training
features = pd.DataFrame([{
'town': input_data['town'],
'flat_type': input_data['flat_type'],
'flat_model': input_data['flat_model'],
'floor_area_sqm': input_data['floor_area_sqm'],
'storey_level': input_data['storey_level'],
'flat_age': input_data['flat_age'],
'remaining_lease': input_data['remaining_lease']
}])
# Encode categorical variables (using the same encoders as during training)
features['town_encoded'] = town_encoder.transform([input_data['town']])[0]
features['flat_type_encoded'] = flat_type_encoder.transform([input_data['flat_type']])[0]
features['flat_model_encoded'] = flat_model_encoder.transform([input_data['flat_model']])[0]
# Select only the numerical features for prediction
numerical_features = features[['floor_area_sqm', 'storey_level', 'flat_age',
'remaining_lease', 'town_encoded',
'flat_type_encoded', 'flat_model_encoded']]
return numerical_features
def predict_with_xgboost(input_data):
"""Make prediction using the loaded XGBoost model"""
if model is None:
return simple_xgboost_emulation(input_data)
try:
# Prepare features
features = prepare_features_for_model(input_data)
# Make prediction
prediction = model.predict(features)[0]
return max(prediction, 100000) # Ensure minimum price
except Exception as e:
print(f"Prediction error: {e}")
# Fallback to emulation
return simple_xgboost_emulation(input_data)
def create_market_insights_chart(data, town, flat_type, predicted_price):
"""
Generate a simple text-based market insight.
"""
# Filter data for the specific town and flat type
filtered_data = data[(data['town'] == town) & (data['flat_type'] == flat_type)]
if filtered_data.empty:
return "No historical data available for this town and flat type combination."
# Calculate some basic statistics
avg_price = filtered_data['resale_price'].mean()
min_price = filtered_data['resale_price'].min()
max_price = filtered_data['resale_price'].max()
count = len(filtered_data)
# Compare prediction with historical average
price_difference = predicted_price - avg_price
percentage_diff = (price_difference / avg_price) * 100 if avg_price > 0 else 0
insight_text = f"""
## Market Insights for {town} - {flat_type}
- Historical transactions: {count}
- Average price: ${avg_price:,.2f}
- Price range: ${min_price:,.2f} - ${max_price:,.2f}
### Prediction Analysis:
- Predicted Price: ${predicted_price:,.2f}
- Difference from average: {percentage_diff:+.1f}%
*Note: Market insights are based on simulated data. Prediction uses {'XGBoost model' if model else 'fallback model'}.*
"""
return insight_text
def predict_hdb_price(town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age):
"""Predict the HDB resale price using the selected model."""
# Validate inputs
try:
floor_area_sqm = float(floor_area_sqm)
storey_level = int(storey_level)
flat_age = int(flat_age)
if floor_area_sqm <= 0 or storey_level <= 0 or flat_age < 0:
return "Invalid input: Please enter positive values.", "Invalid input", "Invalid input"
except ValueError:
return "Please enter valid numbers for floor area, storey level, and flat age.", "Invalid input", "Invalid input"
# Preprocess the user input
input_data = preprocess_input(town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age)
# Make prediction using XGBoost model
predicted_price = predict_with_xgboost(input_data)
# Generate insights
insights = create_market_insights_chart(
data=data,
town=town,
flat_type=flat_type,
predicted_price=predicted_price
)
model_source = "XGBoost model" if model else "fallback model"
summary = f"""
### Property Details 🏡
- **Town:** {town}
- **Flat Type:** {flat_type}
- **Flat Model:** {flat_model}
- **Floor Area:** {floor_area_sqm} sqm
- **Storey Level:** {storey_level}
- **Flat Age:** {flat_age} years
---
### Prediction Summary
The predicted price is **${predicted_price:,.2f}**.
*Prediction made using {model_source}. Market insights based on simulated data.*
"""
return f"${predicted_price:,.2f}", insights, summary
# Create the Gradio interface
with gr.Blocks(title="HDB Resale Price Predictor", theme=gr.themes.Soft()) as demo:
gr.Markdown("# 🏘️ HDB Resale Price Predictor")
gr.Markdown("Estimate the resale price of HDB flats in Singapore based on property features.")
# Display model status
if model:
gr.Markdown("✅ **XGBoost model loaded successfully!**")
else:
gr.Markdown("⚠️ **Using fallback model - XGBoost model not found**")
with gr.Row():
with gr.Column():
town = gr.Dropdown(choices=towns_list, label="Town", value="ANG MO KIO")
flat_type = gr.Dropdown(choices=flat_types, label="Flat Type", value="4 ROOM")
flat_model = gr.Dropdown(choices=flat_models, label="Flat Model", value="Improved")
floor_area_sqm = gr.Number(label="Floor Area (sqm)", value=100, minimum=1, maximum=500)
storey_level = gr.Slider(minimum=1, maximum=50, step=1, label="Storey Level", value=5)
flat_age = gr.Slider(minimum=0, maximum=99, step=1, label="Flat Age (years)", value=10)
predict_btn = gr.Button("Predict Price", variant="primary")
with gr.Column():
price_output = gr.Label(label="Predicted Resale Price")
insights_output = gr.Markdown()
summary_output = gr.Markdown()
predict_btn.click(
fn=predict_hdb_price,
inputs=[town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age],
outputs=[price_output, insights_output, summary_output]
)
#with gr.Row():
# chart_output = gr.Plot(label="📈 Market Insights")
gr.Examples(
examples=[
["ANG MO KIO", "4 ROOM", "Improved", 100, 5, 10],
["BEDOK", "3 ROOM", "New Generation", 80, 8, 5],
["TAMPINES", "5 ROOM", "Model A", 120, 12, 15]
],
inputs=[town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age]
)
# Launch the application
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860) |