Spaces:
Sleeping
Sleeping
| # -*- coding: utf-8 -*- | |
| import pandas as pd | |
| import numpy as np | |
| import gradio as gr | |
| import joblib | |
| from sklearn.preprocessing import LabelEncoder | |
| # Load the trained XGBoost model | |
| try: | |
| model = joblib.load('best_model_xgboost.joblib') | |
| print("XGBoost model loaded successfully!") | |
| except FileNotFoundError: | |
| print("Warning: best_model_xgboost.joblib not found. Using fallback model.") | |
| model = None | |
| except Exception as e: | |
| print(f"Error loading model: {e}") | |
| model = None | |
| # Sample data generation (for market insights when model is available) | |
| def generate_sample_data(): | |
| np.random.seed(42) | |
| n_samples = 1000 | |
| towns = ['ANG MO KIO', 'BEDOK', 'BISHAN', 'BUKIT BATOK', 'BUKIT MERAH', 'BUKIT PANJANG', 'BUKIT TIMAH', 'CENTRAL AREA', 'CHOA CHU KANG', 'CLEMENTI', 'GEYLANG', 'HOUGANG', 'JURONG EAST', 'JURONG WEST', 'KALLANG/WHAMPOA', 'MARINE PARADE', 'PASIR RIS', 'PUNGGOL', 'QUEENSTOWN', 'SEMBAWANG', 'SENGKANG', 'SERANGOON', 'TAMPINES', 'TOA PAYOH', 'WOODLANDS', 'YISHUN' ] | |
| flat_types = ['2 ROOM', '3 ROOM', '4 ROOM', '5 ROOM', 'EXECUTIVE', 'MULTI-GENERATION'] | |
| flat_models = ['2 ROOM', '3Gen', 'Adjoined flat', 'Apartment', 'DBSS', 'Improved', 'Improved-Maisonette', 'Maisonette', 'Model A', 'Model A-Maisonette', 'Model-A2', 'MULTI-GENERATION', 'New Generation', 'Premium Apartment', 'Premium Apartment Loft', 'Simplified', 'Standard', 'Type S1', 'Type S2'] | |
| data = { | |
| 'town': np.random.choice(towns, n_samples), | |
| 'flat_type': np.random.choice(flat_types, n_samples), | |
| 'flat_model': np.random.choice(flat_models, n_samples), | |
| 'floor_area_sqm': np.random.uniform(60, 150, n_samples), | |
| 'storey_level': np.random.randint(1, 25, n_samples), | |
| 'flat_age': np.random.randint(0, 50, n_samples), | |
| 'resale_price': np.random.uniform(200000, 800000, n_samples) | |
| } | |
| return pd.DataFrame(data) | |
| # Load or create sample data | |
| data = generate_sample_data() | |
| # Create encoders for categorical variables (should match training data) | |
| towns_list = sorted(data['town'].unique().tolist()) | |
| flat_types = sorted(data['flat_type'].unique().tolist()) | |
| flat_models = sorted(data['flat_model'].unique().tolist()) | |
| # Create label encoders (these should match what was used during training) | |
| town_encoder = LabelEncoder() | |
| flat_type_encoder = LabelEncoder() | |
| flat_model_encoder = LabelEncoder() | |
| # Fit encoders with the categories | |
| town_encoder.fit(towns_list) | |
| flat_type_encoder.fit(flat_types) | |
| flat_model_encoder.fit(flat_models) | |
| def simple_xgboost_emulation(input_data): | |
| """Fallback function if the model is not available""" | |
| weights = { | |
| 'floor_area_sqm': 5200, | |
| 'storey_level': 1800, | |
| 'flat_age': -2800, | |
| 'remaining_lease': 1200, | |
| 'town_factor': 9500, | |
| 'flat_type_factor': 14500, | |
| 'flat_model_factor': 8500, | |
| 'base_price': 220000, | |
| 'interaction_factor': 500 | |
| } | |
| # Calculate factors | |
| town_factor = towns_list.index(input_data['town']) * weights['town_factor'] | |
| flat_type_factor = flat_types.index(input_data['flat_type']) * weights['flat_type_factor'] | |
| flat_model_factor = flat_models.index(input_data['flat_model']) * weights['flat_model_factor'] | |
| # Simulate tree interactions | |
| interaction = (input_data['floor_area_sqm'] * input_data['storey_level']) / 100 * weights['interaction_factor'] | |
| # Calculate price | |
| price = (weights['base_price'] + | |
| input_data['floor_area_sqm'] * weights['floor_area_sqm'] + | |
| input_data['storey_level'] * weights['storey_level'] + | |
| input_data['flat_age'] * weights['flat_age'] + | |
| input_data['remaining_lease'] * weights['remaining_lease'] + | |
| town_factor + flat_type_factor + flat_model_factor + interaction) | |
| return max(price, 100000) | |
| def preprocess_input(town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age): | |
| """Preprocess user input into a format suitable for the model.""" | |
| input_data = { | |
| 'town': town, | |
| 'flat_type': flat_type, | |
| 'flat_model': flat_model, | |
| 'floor_area_sqm': float(floor_area_sqm), | |
| 'storey_level': int(storey_level), | |
| 'flat_age': int(flat_age), | |
| 'remaining_lease': 99 - int(flat_age) | |
| } | |
| return input_data | |
| def prepare_features_for_model(input_data): | |
| """Prepare features in the exact format expected by the trained model""" | |
| # Create a DataFrame with the same structure as during training | |
| features = pd.DataFrame([{ | |
| 'town': input_data['town'], | |
| 'flat_type': input_data['flat_type'], | |
| 'flat_model': input_data['flat_model'], | |
| 'floor_area_sqm': input_data['floor_area_sqm'], | |
| 'storey_level': input_data['storey_level'], | |
| 'flat_age': input_data['flat_age'], | |
| 'remaining_lease': input_data['remaining_lease'] | |
| }]) | |
| # Encode categorical variables (using the same encoders as during training) | |
| features['town_encoded'] = town_encoder.transform([input_data['town']])[0] | |
| features['flat_type_encoded'] = flat_type_encoder.transform([input_data['flat_type']])[0] | |
| features['flat_model_encoded'] = flat_model_encoder.transform([input_data['flat_model']])[0] | |
| # Select only the numerical features for prediction | |
| numerical_features = features[['floor_area_sqm', 'storey_level', 'flat_age', | |
| 'remaining_lease', 'town_encoded', | |
| 'flat_type_encoded', 'flat_model_encoded']] | |
| return numerical_features | |
| def predict_with_xgboost(input_data): | |
| """Make prediction using the loaded XGBoost model""" | |
| if model is None: | |
| return simple_xgboost_emulation(input_data) | |
| try: | |
| # Prepare features | |
| features = prepare_features_for_model(input_data) | |
| # Make prediction | |
| prediction = model.predict(features)[0] | |
| return max(prediction, 100000) # Ensure minimum price | |
| except Exception as e: | |
| print(f"Prediction error: {e}") | |
| # Fallback to emulation | |
| return simple_xgboost_emulation(input_data) | |
| def create_market_insights_chart(data, town, flat_type, predicted_price): | |
| """ | |
| Generate a simple text-based market insight. | |
| """ | |
| # Filter data for the specific town and flat type | |
| filtered_data = data[(data['town'] == town) & (data['flat_type'] == flat_type)] | |
| if filtered_data.empty: | |
| return "No historical data available for this town and flat type combination." | |
| # Calculate some basic statistics | |
| avg_price = filtered_data['resale_price'].mean() | |
| min_price = filtered_data['resale_price'].min() | |
| max_price = filtered_data['resale_price'].max() | |
| count = len(filtered_data) | |
| # Compare prediction with historical average | |
| price_difference = predicted_price - avg_price | |
| percentage_diff = (price_difference / avg_price) * 100 if avg_price > 0 else 0 | |
| insight_text = f""" | |
| ## Market Insights for {town} - {flat_type} | |
| - Historical transactions: {count} | |
| - Average price: ${avg_price:,.2f} | |
| - Price range: ${min_price:,.2f} - ${max_price:,.2f} | |
| ### Prediction Analysis: | |
| - Predicted Price: ${predicted_price:,.2f} | |
| - Difference from average: {percentage_diff:+.1f}% | |
| *Note: Market insights are based on simulated data. Prediction uses {'XGBoost model' if model else 'fallback model'}.* | |
| """ | |
| return insight_text | |
| def predict_hdb_price(town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age): | |
| """Predict the HDB resale price using the selected model.""" | |
| # Validate inputs | |
| try: | |
| floor_area_sqm = float(floor_area_sqm) | |
| storey_level = int(storey_level) | |
| flat_age = int(flat_age) | |
| if floor_area_sqm <= 0 or storey_level <= 0 or flat_age < 0: | |
| return "Invalid input: Please enter positive values.", "Invalid input", "Invalid input" | |
| except ValueError: | |
| return "Please enter valid numbers for floor area, storey level, and flat age.", "Invalid input", "Invalid input" | |
| # Preprocess the user input | |
| input_data = preprocess_input(town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age) | |
| # Make prediction using XGBoost model | |
| predicted_price = predict_with_xgboost(input_data) | |
| # Generate insights | |
| insights = create_market_insights_chart( | |
| data=data, | |
| town=town, | |
| flat_type=flat_type, | |
| predicted_price=predicted_price | |
| ) | |
| model_source = "XGBoost model" if model else "fallback model" | |
| summary = f""" | |
| ### Property Details 🏡 | |
| - **Town:** {town} | |
| - **Flat Type:** {flat_type} | |
| - **Flat Model:** {flat_model} | |
| - **Floor Area:** {floor_area_sqm} sqm | |
| - **Storey Level:** {storey_level} | |
| - **Flat Age:** {flat_age} years | |
| --- | |
| ### Prediction Summary | |
| The predicted price is **${predicted_price:,.2f}**. | |
| *Prediction made using {model_source}. Market insights based on simulated data.* | |
| """ | |
| return f"${predicted_price:,.2f}", insights, summary | |
| # Create the Gradio interface | |
| with gr.Blocks(title="HDB Resale Price Predictor", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown("# 🏘️ HDB Resale Price Predictor") | |
| gr.Markdown("Estimate the resale price of HDB flats in Singapore based on property features.") | |
| # Display model status | |
| if model: | |
| gr.Markdown("✅ **XGBoost model loaded successfully!**") | |
| else: | |
| gr.Markdown("⚠️ **Using fallback model - XGBoost model not found**") | |
| with gr.Row(): | |
| with gr.Column(): | |
| town = gr.Dropdown(choices=towns_list, label="Town", value="ANG MO KIO") | |
| flat_type = gr.Dropdown(choices=flat_types, label="Flat Type", value="4 ROOM") | |
| flat_model = gr.Dropdown(choices=flat_models, label="Flat Model", value="Improved") | |
| floor_area_sqm = gr.Number(label="Floor Area (sqm)", value=100, minimum=1, maximum=500) | |
| storey_level = gr.Slider(minimum=1, maximum=50, step=1, label="Storey Level", value=5) | |
| flat_age = gr.Slider(minimum=0, maximum=99, step=1, label="Flat Age (years)", value=10) | |
| predict_btn = gr.Button("Predict Price", variant="primary") | |
| with gr.Column(): | |
| price_output = gr.Label(label="Predicted Resale Price") | |
| insights_output = gr.Markdown() | |
| summary_output = gr.Markdown() | |
| predict_btn.click( | |
| fn=predict_hdb_price, | |
| inputs=[town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age], | |
| outputs=[price_output, insights_output, summary_output] | |
| ) | |
| #with gr.Row(): | |
| # chart_output = gr.Plot(label="📈 Market Insights") | |
| gr.Examples( | |
| examples=[ | |
| ["ANG MO KIO", "4 ROOM", "Improved", 100, 5, 10], | |
| ["BEDOK", "3 ROOM", "New Generation", 80, 8, 5], | |
| ["TAMPINES", "5 ROOM", "Model A", 120, 12, 15] | |
| ], | |
| inputs=[town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age] | |
| ) | |
| # Launch the application | |
| if __name__ == "__main__": | |
| demo.launch(server_name="0.0.0.0", server_port=7860) |