Chia Woon Yap commited on
Commit
3cd69dc
ยท
verified ยท
1 Parent(s): 654bb1b

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +213 -0
app.py ADDED
@@ -0,0 +1,213 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """app
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1-jiUnfRGcb_iRcTXISQT__JTrBD7QqFM
8
+ """
9
+
10
+ import gradio as gr
11
+ import pandas as pd
12
+ import numpy as np
13
+ import joblib
14
+ import plotly.graph_objects as go
15
+ import plotly.express as px
16
+ from huggingface_hub import hf_hub_download
17
+ import os
18
+ from pathlib import Path
19
+ import warnings
20
+ warnings.filterwarnings('ignore')
21
+
22
+ # Load models using Hugging Face Hub (handles Xet pointers)
23
+ def load_models():
24
+ """Load models using Hugging Face Hub library"""
25
+ models = {}
26
+
27
+ try:
28
+ # Download XGBoost model (handles Xet pointer automatically)
29
+ xgboost_path = hf_hub_download(
30
+ repo_id="Lesterchia174/HDB_Price_Predictor",
31
+ filename="best_model_xgboost.joblib",
32
+ repo_type="space"
33
+ )
34
+ models['xgboost'] = joblib.load(xgboost_path)
35
+ print("โœ… XGBoost model loaded successfully via Hugging Face Hub")
36
+ print(f" File size: {os.path.getsize(xgboost_path)} bytes")
37
+
38
+ except Exception as e:
39
+ print(f"โŒ Error loading XGBoost model: {e}")
40
+ models['xgboost'] = None
41
+
42
+ try:
43
+ # Download Linear Regression model
44
+ linear_path = hf_hub_download(
45
+ repo_id="Lesterchia174/HDB_Price_Predictor",
46
+ filename="linear_regression.joblib",
47
+ repo_type="space"
48
+ )
49
+ models['linear_regression'] = joblib.load(linear_path)
50
+ print("โœ… Linear Regression model loaded successfully via Hugging Face Hub")
51
+ print(f" File size: {os.path.getsize(linear_path)} bytes")
52
+
53
+ except Exception as e:
54
+ print(f"โŒ Error loading Linear Regression model: {e}")
55
+ models['linear_regression'] = None
56
+
57
+ return models
58
+
59
+ def load_data():
60
+ """Load data using Hugging Face Hub"""
61
+ try:
62
+ # Download data file
63
+ data_path = hf_hub_download(
64
+ repo_id="Lesterchia174/HDB_Price_Predictor",
65
+ filename="base_hdb_resale_prices_2015Jan-2025Jun_processed.csv",
66
+ repo_type="space"
67
+ )
68
+ df = pd.read_csv(data_path)
69
+ print("โœ… Data loaded successfully via Hugging Face Hub")
70
+ return df
71
+
72
+ except Exception as e:
73
+ print(f"โŒ Error loading data: {e}")
74
+ # Fallback to creating sample data
75
+ print("โš ๏ธ Creating sample data for demonstration")
76
+ return create_sample_data()
77
+
78
+ def create_sample_data():
79
+ """Create sample data if real data isn't available"""
80
+ np.random.seed(42)
81
+ towns = ['ANG MO KIO', 'BEDOK', 'TAMPINES', 'WOODLANDS', 'JURONG WEST']
82
+ flat_types = ['4 ROOM', '5 ROOM', 'EXECUTIVE']
83
+ flat_models = ['Improved', 'Model A', 'New Generation']
84
+
85
+ data = []
86
+ for _ in range(100):
87
+ town = np.random.choice(towns)
88
+ flat_type = np.random.choice(flat_types)
89
+ flat_model = np.random.choice(flat_models)
90
+ floor_area = np.random.randint(85, 150)
91
+ storey = np.random.randint(1, 25)
92
+ age = np.random.randint(0, 40)
93
+
94
+ base_price = floor_area * 5000
95
+ town_bonus = towns.index(town) * 20000
96
+ storey_bonus = storey * 2000
97
+ age_discount = age * 1500
98
+ flat_type_bonus = flat_types.index(flat_type) * 30000
99
+
100
+ resale_price = base_price + town_bonus + storey_bonus - age_discount + flat_type_bonus
101
+ resale_price = max(300000, resale_price + np.random.randint(-20000, 20000))
102
+
103
+ data.append({
104
+ 'town': town, 'flat_type': flat_type, 'flat_model': flat_model,
105
+ 'floor_area_sqm': floor_area, 'storey_level': storey,
106
+ 'flat_age': age, 'resale_price': resale_price
107
+ })
108
+
109
+ return pd.DataFrame(data)
110
+
111
+ # Preload models and data
112
+ print("Loading models and data using Hugging Face Hub...")
113
+ models = load_models()
114
+ data = load_data()
115
+
116
+ # If models failed to load, create dummy ones
117
+ if models['xgboost'] is None:
118
+ print("โš ๏ธ Creating dummy XGBoost model for demonstration")
119
+ models['xgboost'] = create_dummy_model("xgboost")
120
+
121
+ if models['linear_regression'] is None:
122
+ print("โš ๏ธ Creating dummy Linear Regression model for demonstration")
123
+ models['linear_regression'] = create_dummy_model("linear_regression")
124
+
125
+ def create_dummy_model(model_type):
126
+ """Create a realistic dummy model"""
127
+ class RealisticDummyModel:
128
+ def __init__(self, model_type):
129
+ self.model_type = model_type
130
+ self.n_features_in_ = 9
131
+ self.feature_names_in_ = [
132
+ 'floor_area_sqm', 'storey_level', 'flat_age', 'remaining_lease',
133
+ 'transaction_year', 'flat_type_encoded', 'town_encoded',
134
+ 'flat_model_encoded', 'dummy_feature'
135
+ ]
136
+
137
+ def predict(self, X):
138
+ # Realistic prediction logic
139
+ floor_area = X[0][0]
140
+ storey_level = X[0][1]
141
+ flat_age = X[0][2]
142
+ town_encoded = X[0][6]
143
+ flat_type_encoded = X[0][5]
144
+
145
+ base_price = floor_area * (4800 + town_encoded * 200)
146
+ storey_bonus = storey_level * 2500
147
+ age_discount = flat_age * 1800
148
+
149
+ if self.model_type == "xgboost":
150
+ price = base_price + storey_bonus - age_discount + 35000
151
+ if storey_level > 20: price += 15000
152
+ if flat_age < 10: price += 20000
153
+ else:
154
+ price = base_price + storey_bonus - age_discount - 25000
155
+
156
+ return max(300000, price)
157
+
158
+ return RealisticDummyModel(model_type)
159
+
160
+ # ... [rest of your functions remain the same: preprocess_input, create_market_insights_chart, predict_hdb_price] ...
161
+
162
+ # Define Gradio interface
163
+ towns_list = [
164
+ 'SENGKANG', 'WOODLANDS', 'TAMPINES', 'PUNGGOL', 'JURONG WEST',
165
+ 'YISHUN', 'BEDOK', 'HOUGANG', 'CHOA CHU KANG', 'ANG MO KIO'
166
+ ]
167
+
168
+ flat_types = ['3 ROOM', '4 ROOM', '5 ROOM', 'EXECUTIVE', '2 ROOM', '1 ROOM']
169
+ flat_models = ['Model A', 'Improved', 'New Generation', 'Standard', 'Premium']
170
+
171
+ # Create Gradio interface
172
+ with gr.Blocks(title="๐Ÿ  HDB Price Predictor", theme=gr.themes.Soft()) as demo:
173
+ gr.Markdown("# ๐Ÿ  HDB Price Predictor")
174
+ gr.Markdown("Predict HDB resale prices using different machine learning models")
175
+
176
+ with gr.Row():
177
+ with gr.Column():
178
+ town = gr.Dropdown(label="Town", choices=sorted(towns_list), value="ANG MO KIO")
179
+ flat_type = gr.Dropdown(label="Flat Type", choices=sorted(flat_types), value="4 ROOM")
180
+ flat_model = gr.Dropdown(label="Flat Model", choices=sorted(flat_models), value="Improved")
181
+ floor_area_sqm = gr.Slider(label="Floor Area (sqm)", minimum=30, maximum=200, value=95, step=5)
182
+ storey_level = gr.Slider(label="Storey Level", minimum=1, maximum=50, value=8, step=1)
183
+ flat_age = gr.Slider(label="Flat Age (years)", minimum=0, maximum=99, value=15, step=1)
184
+ model_choice = gr.Radio(label="Select Model",
185
+ choices=["XGBoost", "Linear Regression"],
186
+ value="XGBoost")
187
+
188
+ predict_btn = gr.Button("๐Ÿ”ฎ Predict Price", variant="primary")
189
+
190
+ with gr.Column():
191
+ predicted_price = gr.Label(label="๐Ÿ’ฐ Predicted Price")
192
+ insights = gr.Markdown(label="๐Ÿ“‹ Property Summary")
193
+
194
+ with gr.Row():
195
+ chart_output = gr.Plot(label="๐Ÿ“ˆ Market Insights (Both Models)")
196
+
197
+ # Connect button to function
198
+ predict_btn.click(
199
+ fn=predict_hdb_price,
200
+ inputs=[town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age, model_choice],
201
+ outputs=[predicted_price, chart_output, insights]
202
+ )
203
+
204
+ # For debugging
205
+ if models['xgboost'] is not None:
206
+ print(f"XGBoost model expects {models['xgboost'].n_features_in_} features")
207
+
208
+ if models['linear_regression'] is not None:
209
+ print(f"Linear Regression model expects {models['linear_regression'].n_features_in_} features")
210
+
211
+ # To run in Colab
212
+ if __name__ == "__main__":
213
+ demo.launch(share=True)