File size: 11,206 Bytes
76993b7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58407db
6242ca0
 
76993b7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d4e9419
76993b7
 
 
 
 
 
d4e9419
58407db
 
b4c1216
d4e9419
76993b7
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
# -*- coding: utf-8 -*-
import pandas as pd
import numpy as np
import gradio as gr
import joblib
from sklearn.preprocessing import LabelEncoder

# Load the trained XGBoost model
try:
    model = joblib.load('best_model_xgboost.joblib')
    print("XGBoost model loaded successfully!")
except FileNotFoundError:
    print("Warning: best_model_xgboost.joblib not found. Using fallback model.")
    model = None
except Exception as e:
    print(f"Error loading model: {e}")
    model = None

# Sample data generation (for market insights when model is available)
def generate_sample_data():
    np.random.seed(42)
    n_samples = 1000
    
    towns = ['ANG MO KIO', 'BEDOK', 'BISHAN', 'BUKIT BATOK', 'BUKIT MERAH', 'BUKIT PANJANG', 'BUKIT TIMAH', 'CENTRAL AREA', 'CHOA CHU KANG', 'CLEMENTI', 'GEYLANG', 'HOUGANG', 'JURONG EAST', 'JURONG WEST', 'KALLANG/WHAMPOA', 'MARINE PARADE', 'PASIR RIS', 'PUNGGOL', 'QUEENSTOWN', 'SEMBAWANG', 'SENGKANG', 'SERANGOON', 'TAMPINES', 'TOA PAYOH', 'WOODLANDS', 'YISHUN' ]
    flat_types = ['2 ROOM', '3 ROOM', '4 ROOM', '5 ROOM', 'EXECUTIVE', 'MULTI-GENERATION']
    flat_models = ['2 ROOM', '3Gen', 'Adjoined flat', 'Apartment', 'DBSS', 'Improved', 'Improved-Maisonette', 'Maisonette', 'Model A', 'Model A-Maisonette', 'Model-A2', 'MULTI-GENERATION', 'New Generation', 'Premium Apartment', 'Premium Apartment Loft', 'Simplified', 'Standard', 'Type S1', 'Type S2']
    
    data = {
        'town': np.random.choice(towns, n_samples),
        'flat_type': np.random.choice(flat_types, n_samples),
        'flat_model': np.random.choice(flat_models, n_samples),
        'floor_area_sqm': np.random.uniform(60, 150, n_samples),
        'storey_level': np.random.randint(1, 25, n_samples),
        'flat_age': np.random.randint(0, 50, n_samples),
        'resale_price': np.random.uniform(200000, 800000, n_samples)
    }
    
    return pd.DataFrame(data)

# Load or create sample data
data = generate_sample_data()

# Create encoders for categorical variables (should match training data)
towns_list = sorted(data['town'].unique().tolist())
flat_types = sorted(data['flat_type'].unique().tolist())
flat_models = sorted(data['flat_model'].unique().tolist())

# Create label encoders (these should match what was used during training)
town_encoder = LabelEncoder()
flat_type_encoder = LabelEncoder()
flat_model_encoder = LabelEncoder()

# Fit encoders with the categories
town_encoder.fit(towns_list)
flat_type_encoder.fit(flat_types)
flat_model_encoder.fit(flat_models)

def simple_xgboost_emulation(input_data):
    """Fallback function if the model is not available"""
    weights = {
        'floor_area_sqm': 5200,
        'storey_level': 1800,
        'flat_age': -2800,
        'remaining_lease': 1200,
        'town_factor': 9500,
        'flat_type_factor': 14500,
        'flat_model_factor': 8500,
        'base_price': 220000,
        'interaction_factor': 500
    }
    
    # Calculate factors
    town_factor = towns_list.index(input_data['town']) * weights['town_factor']
    flat_type_factor = flat_types.index(input_data['flat_type']) * weights['flat_type_factor']
    flat_model_factor = flat_models.index(input_data['flat_model']) * weights['flat_model_factor']
    
    # Simulate tree interactions
    interaction = (input_data['floor_area_sqm'] * input_data['storey_level']) / 100 * weights['interaction_factor']
    
    # Calculate price
    price = (weights['base_price'] +
             input_data['floor_area_sqm'] * weights['floor_area_sqm'] +
             input_data['storey_level'] * weights['storey_level'] +
             input_data['flat_age'] * weights['flat_age'] +
             input_data['remaining_lease'] * weights['remaining_lease'] +
             town_factor + flat_type_factor + flat_model_factor + interaction)
    
    return max(price, 100000)

def preprocess_input(town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age):
    """Preprocess user input into a format suitable for the model."""
    input_data = {
        'town': town,
        'flat_type': flat_type,
        'flat_model': flat_model,
        'floor_area_sqm': float(floor_area_sqm),
        'storey_level': int(storey_level),
        'flat_age': int(flat_age),
        'remaining_lease': 99 - int(flat_age)
    }
    
    return input_data

def prepare_features_for_model(input_data):
    """Prepare features in the exact format expected by the trained model"""
    # Create a DataFrame with the same structure as during training
    features = pd.DataFrame([{
        'town': input_data['town'],
        'flat_type': input_data['flat_type'],
        'flat_model': input_data['flat_model'],
        'floor_area_sqm': input_data['floor_area_sqm'],
        'storey_level': input_data['storey_level'],
        'flat_age': input_data['flat_age'],
        'remaining_lease': input_data['remaining_lease']
    }])
    
    # Encode categorical variables (using the same encoders as during training)
    features['town_encoded'] = town_encoder.transform([input_data['town']])[0]
    features['flat_type_encoded'] = flat_type_encoder.transform([input_data['flat_type']])[0]
    features['flat_model_encoded'] = flat_model_encoder.transform([input_data['flat_model']])[0]
    
    # Select only the numerical features for prediction
    numerical_features = features[['floor_area_sqm', 'storey_level', 'flat_age', 
                                  'remaining_lease', 'town_encoded', 
                                  'flat_type_encoded', 'flat_model_encoded']]
    
    return numerical_features

def predict_with_xgboost(input_data):
    """Make prediction using the loaded XGBoost model"""
    if model is None:
        return simple_xgboost_emulation(input_data)
    
    try:
        # Prepare features
        features = prepare_features_for_model(input_data)
        
        # Make prediction
        prediction = model.predict(features)[0]
        
        return max(prediction, 100000)  # Ensure minimum price
    except Exception as e:
        print(f"Prediction error: {e}")
        # Fallback to emulation
        return simple_xgboost_emulation(input_data)

def create_market_insights_chart(data, town, flat_type, predicted_price):
    """
    Generate a simple text-based market insight.
    """
    # Filter data for the specific town and flat type
    filtered_data = data[(data['town'] == town) & (data['flat_type'] == flat_type)]
    
    if filtered_data.empty:
        return "No historical data available for this town and flat type combination."
    
    # Calculate some basic statistics
    avg_price = filtered_data['resale_price'].mean()
    min_price = filtered_data['resale_price'].min()
    max_price = filtered_data['resale_price'].max()
    count = len(filtered_data)
    
    # Compare prediction with historical average
    price_difference = predicted_price - avg_price
    percentage_diff = (price_difference / avg_price) * 100 if avg_price > 0 else 0
    
    insight_text = f"""
    ## Market Insights for {town} - {flat_type}
    
    - Historical transactions: {count}
    - Average price: ${avg_price:,.2f}
    - Price range: ${min_price:,.2f} - ${max_price:,.2f}
    
    ### Prediction Analysis:
    - Predicted Price: ${predicted_price:,.2f}
    - Difference from average: {percentage_diff:+.1f}%
    
    *Note: Market insights are based on simulated data. Prediction uses {'XGBoost model' if model else 'fallback model'}.*
    """
    
    return insight_text

def predict_hdb_price(town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age):
    """Predict the HDB resale price using the selected model."""
    
    # Validate inputs
    try:
        floor_area_sqm = float(floor_area_sqm)
        storey_level = int(storey_level)
        flat_age = int(flat_age)
        
        if floor_area_sqm <= 0 or storey_level <= 0 or flat_age < 0:
            return "Invalid input: Please enter positive values.", "Invalid input", "Invalid input"
            
    except ValueError:
        return "Please enter valid numbers for floor area, storey level, and flat age.", "Invalid input", "Invalid input"
    
    # Preprocess the user input
    input_data = preprocess_input(town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age)

    # Make prediction using XGBoost model
    predicted_price = predict_with_xgboost(input_data)

    # Generate insights
    insights = create_market_insights_chart(
        data=data, 
        town=town, 
        flat_type=flat_type, 
        predicted_price=predicted_price
    )
    
    model_source = "XGBoost model" if model else "fallback model"
    
    summary = f"""
    ### Property Details 🏡
    - **Town:** {town}
    - **Flat Type:** {flat_type}
    - **Flat Model:** {flat_model}
    - **Floor Area:** {floor_area_sqm} sqm
    - **Storey Level:** {storey_level}
    - **Flat Age:** {flat_age} years

    ---

    ### Prediction Summary
    The predicted price is **${predicted_price:,.2f}**.
    
    *Prediction made using {model_source}. Market insights based on simulated data.*
    """
    
    return f"${predicted_price:,.2f}", insights, summary

# Create the Gradio interface
with gr.Blocks(title="HDB Resale Price Predictor", theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 🏘️ HDB Resale Price Predictor")
    gr.Markdown("Estimate the resale price of HDB flats in Singapore based on property features.")
    
    # Display model status
    if model:
        gr.Markdown("✅ **XGBoost model loaded successfully!**")
    else:
        gr.Markdown("⚠️ **Using fallback model - XGBoost model not found**")
    
    with gr.Row():
        with gr.Column():
            town = gr.Dropdown(choices=towns_list, label="Town", value="ANG MO KIO")
            flat_type = gr.Dropdown(choices=flat_types, label="Flat Type", value="4 ROOM")
            flat_model = gr.Dropdown(choices=flat_models, label="Flat Model", value="Improved")
            floor_area_sqm = gr.Number(label="Floor Area (sqm)", value=100, minimum=1, maximum=500)
            storey_level = gr.Slider(minimum=1, maximum=50, step=1, label="Storey Level", value=5)
            flat_age = gr.Slider(minimum=0, maximum=99, step=1, label="Flat Age (years)", value=10)
            predict_btn = gr.Button("Predict Price", variant="primary")
        
        with gr.Column():
            price_output = gr.Label(label="Predicted Resale Price")
            insights_output = gr.Markdown()
            summary_output = gr.Markdown()

    
    predict_btn.click(
        fn=predict_hdb_price,
        inputs=[town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age],
        outputs=[price_output, insights_output, summary_output]
    )

    #with gr.Row():
    #    chart_output = gr.Plot(label="📈 Market Insights")

    
    
    gr.Examples(
        examples=[
            ["ANG MO KIO", "4 ROOM", "Improved", 100, 5, 10],
            ["BEDOK", "3 ROOM", "New Generation", 80, 8, 5],
            ["TAMPINES", "5 ROOM", "Model A", 120, 12, 15]
        ],
        inputs=[town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age]
    )

# Launch the application
if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860)