Chia Woon Yap commited on
Commit
a2140b7
ยท
verified ยท
1 Parent(s): 290548d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +762 -0
app.py CHANGED
@@ -7,10 +7,772 @@ import plotly.express as px
7
  from huggingface_hub import hf_hub_download
8
  import os
9
  from pathlib import Path
 
 
 
 
 
 
 
 
 
10
  import warnings
11
  warnings.filterwarnings('ignore')
12
  import re
13
  from groq import Groq
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  # Initialize Groq client
16
  groq_api_key = os.getenv("GROQ_API_KEY")
 
7
  from huggingface_hub import hf_hub_download
8
  import os
9
  from pathlib import Path
10
+ import warningsimport gradio as gr
11
+ import pandas as pd
12
+ import numpy as np
13
+ import joblib
14
+ import plotly.graph_objects as go
15
+ import plotly.express as px
16
+ from huggingface_hub import hf_hub_download
17
+ import os
18
+ from pathlib import Path
19
  import warnings
20
  warnings.filterwarnings('ignore')
21
  import re
22
  from groq import Groq
23
+ import folium
24
+ from folium.plugins import MarkerCluster
25
+ import io
26
+ from fastapi import FastAPI, HTTPException
27
+
28
+
29
+ app = FastAPI()
30
+
31
+ # Initialize Groq client
32
+
33
+ groq_api_key = os.getenv("GROQ_API_KEY")
34
+ if groq_api_key:
35
+ #client = Groq(api_key=groq_api_key)
36
+ client = Groq(api_key=groq_api_key) if groq_api_key else None
37
+ else:
38
+ print("โš ๏ธ GROQ_API_KEY not found. Chat functionality will be limited.")
39
+ client = None
40
+
41
+
42
+ @app.post("/chat")
43
+ async def chat(prompt: str):
44
+ if client is None:
45
+ raise HTTPException(
46
+ status_code=503,
47
+ detail="โš ๏ธ Chat service is unavailable because GROQ_API_KEY is missing."
48
+ )
49
+
50
+ try:
51
+ response = client.chat.completions.create(
52
+ model="llama-3.1-8b-instant",
53
+ messages=[{"role": "user", "content": prompt}]
54
+ )
55
+ return {"reply": response.choices[0].message["content"]}
56
+ except Exception as e:
57
+ raise HTTPException(status_code=500, detail=str(e))
58
+
59
+
60
+
61
+
62
+ # Try to import xgboost, but fallback to scikit-learn
63
+ try:
64
+ import xgboost as xgb
65
+ XGB_AVAILABLE = True
66
+ print("โœ… XGBoost is available")
67
+ except ImportError:
68
+ XGB_AVAILABLE = False
69
+ print("โš ๏ธ XGBoost not available, using scikit-learn models")
70
+ from sklearn.ensemble import RandomForestRegressor
71
+
72
+ # Load map data
73
+ try:
74
+ hf_raw_url = 'https://huggingface.co/spaces/Lesterchia174/FPOC_HDB_Price_Predictor_AI_chat_Assistant/resolve/main/Based_Resale_Prices_2025_with_coords.csv'
75
+ map_df = pd.read_csv(hf_raw_url)
76
+
77
+ # Convert 'remaining_lease' to a numeric type, converting non-numeric values to NaN
78
+ map_df['remaining_lease'] = pd.to_numeric(map_df['remaining_lease'], errors='coerce')
79
+
80
+ # Drop rows where the conversion resulted in NaN
81
+ map_df.dropna(subset=['remaining_lease'], inplace=True)
82
+
83
+ # Pre-calculate min/max for Gradio sliders using the 'resale_price' column
84
+ min_lease_val = int(map_df['remaining_lease'].min())
85
+ max_lease_val = int(map_df['remaining_lease'].max())
86
+ min_price_val = int(map_df['resale_price'].min())
87
+ max_price_val = int(map_df['resale_price'].max())
88
+
89
+ # Get unique values for dropdowns
90
+ town_options = ['ALL'] + sorted(list(map_df['town'].unique()))
91
+ flat_type_options = ['ALL'] + sorted(list(map_df['flat_type'].unique()))
92
+ flat_model_options = ['ALL'] + sorted(list(map_df['flat_model'].unique()))
93
+ except Exception as e:
94
+ print(f"Error loading the map dataset: {e}")
95
+ map_df = None
96
+
97
+ def create_dummy_model(model_type):
98
+ """Create a realistic dummy model that has all required methods"""
99
+ class RealisticDummyModel:
100
+ def __init__(self, model_type):
101
+ self.model_type = model_type
102
+ self.n_features_in_ = 9
103
+ self.feature_names_in_ = [
104
+ 'floor_area_sqm', 'storey_level', 'flat_age', 'remaining_lease',
105
+ 'transaction_year', 'flat_type_encoded', 'town_encoded',
106
+ 'flat_model_encoded', 'dummy_feature'
107
+ ]
108
+ # Add methods that might be called by joblib or other code
109
+ self.get_params = lambda deep=True: {}
110
+ self.set_params = lambda **params: self
111
+
112
+ def predict(self, X):
113
+ # Realistic prediction logic
114
+ if isinstance(X, np.ndarray) and len(X.shape) == 2:
115
+ X = X[0] # Take first row if it's a 2D array
116
+
117
+ floor_area = X[0]
118
+ storey_level = X[1]
119
+ flat_age = X[2]
120
+ town_encoded = X[6]
121
+ flat_type_encoded = X[5]
122
+
123
+ base_price = floor_area * (4800 + town_encoded * 200)
124
+ storey_bonus = storey_level * 2500
125
+ age_discount = flat_age * 1800
126
+
127
+ price = base_price + storey_bonus - age_discount + 35000
128
+ if storey_level > 20: price += 15000
129
+ if flat_age < 10: price += 20000
130
+
131
+ return np.array([max(300000, price)])
132
+
133
+ return RealisticDummyModel(model_type)()
134
+
135
+ def safe_joblib_load(filepath):
136
+ """Safely load joblib file with error handling"""
137
+ try:
138
+ model = joblib.load(filepath)
139
+ print(f"โœ… Successfully loaded model from {filepath}")
140
+
141
+ # Check if model has required methods
142
+ if not hasattr(model, 'predict'):
143
+ print("โŒ Loaded object doesn't have predict method")
144
+ return None
145
+
146
+ # Add missing methods if needed
147
+ if not hasattr(model, 'get_params'):
148
+ model.get_params = lambda deep=True: {}
149
+ if not hasattr(model, 'set_params'):
150
+ model.set_params = lambda **params: model
151
+
152
+ return model
153
+
154
+ except Exception as e:
155
+ print(f"โŒ Error loading model from {filepath}: {e}")
156
+ return None
157
+
158
+ def load_models():
159
+ """Load models with robust error handling"""
160
+ models = {}
161
+
162
+ # Try to load XGBoost model
163
+ try:
164
+ xgboost_path = hf_hub_download(
165
+ repo_id="Lesterchia174/HDB_Price_Predictor",
166
+ filename="best_model_xgboost.joblib",
167
+ repo_type="space"
168
+ )
169
+ models['xgboost'] = safe_joblib_load(xgboost_path)
170
+ if models['xgboost'] is None:
171
+ print("โš ๏ธ Creating dummy model for XGBoost")
172
+ models['xgboost'] = create_dummy_model("xgboost")
173
+ else:
174
+ print("โœ… XGBoost model loaded and validated")
175
+
176
+ except Exception as e:
177
+ print(f"โŒ Error downloading XGBoost model: {e}")
178
+ print("โš ๏ธ Creating dummy model for XGBoost")
179
+ models['xgboost'] = create_dummy_model("xgboost")
180
+
181
+ return models
182
+
183
+ def load_data():
184
+ """Load data using Hugging Face Hub"""
185
+ try:
186
+ data_path = hf_hub_download(
187
+ repo_id="Lesterchia174/HDB_Price_Predictor",
188
+ filename="base_hdb_resale_prices_2015Jan-2025Jun_processed.csv",
189
+ repo_type="space"
190
+ )
191
+ df = pd.read_csv(data_path)
192
+ print("โœ… Data loaded successfully via Hugging Face Hub")
193
+ return df
194
+ except Exception as e:
195
+ print(f"โŒ Error loading data: {e}")
196
+ return create_sample_data()
197
+
198
+ def create_sample_data():
199
+ """Create sample data if real data isn't available"""
200
+ np.random.seed(42)
201
+ towns = ['ANG MO KIO', 'BEDOK', 'TAMPINES', 'WOODLANDS', 'JURONG WEST']
202
+ flat_types = ['4 ROOM', '5 ROOM', 'EXECUTIVE']
203
+ flat_models = ['Improved', 'Model A', 'New Generation']
204
+
205
+ data = []
206
+ for _ in range(100):
207
+ town = np.random.choice(towns)
208
+ flat_type = np.random.choice(flat_types)
209
+ flat_model = np.random.choice(flat_models)
210
+ floor_area = np.random.randint(85, 150)
211
+ storey = np.random.randint(1, 25)
212
+ age = np.random.randint(0, 40)
213
+
214
+ base_price = floor_area * 5000
215
+ town_bonus = towns.index(town) * 20000
216
+ storey_bonus = storey * 2000
217
+ age_discount = age * 1500
218
+ flat_type_bonus = flat_types.index(flat_type) * 30000
219
+
220
+ resale_price = base_price + town_bonus + storey_bonus - age_discount + flat_type_bonus
221
+ resale_price = max(300000, resale_price + np.random.randint(-20000, 20000))
222
+
223
+ data.append({
224
+ 'town': town, 'flat_type': flat_type, 'flat_model': flat_model,
225
+ 'floor_area_sqm': floor_area, 'storey_level': storey,
226
+ 'flat_age': age, 'resale_price': resale_price
227
+ })
228
+
229
+ return pd.DataFrame(data)
230
+
231
+ def preprocess_input(user_input, model_type='xgboost'):
232
+ """Preprocess user input for prediction with correct feature mapping"""
233
+ # Flat type mapping
234
+ flat_type_mapping = {'1 ROOM': 1, '2 ROOM': 2, '3 ROOM': 3, '4 ROOM': 4,
235
+ '5 ROOM': 5, 'EXECUTIVE': 6, 'MULTI-GENERATION': 7}
236
+
237
+ # Town mapping
238
+ town_mapping = {
239
+ 'SENGKANG': 0, 'WOODLANDS': 1, 'TAMPINES': 2, 'PUNGGOL': 3,
240
+ 'JURONG WEST': 4, 'YISHUN': 5, 'BEDOK': 6, 'HOUGANG': 7,
241
+ 'CHOA CHU KANG': 8, 'ANG MO KIO': 9
242
+ }
243
+
244
+ # Flat model mapping
245
+ flat_model_mapping = {
246
+ 'Model A': 0, 'Improved': 1, 'New Generation': 2,
247
+ 'Standard': 3, 'Premium': 4
248
+ }
249
+
250
+ # Create input array with features
251
+ input_features = [
252
+ user_input['floor_area_sqm'], # Feature 1
253
+ user_input['storey_level'], # Feature 2
254
+ user_input['flat_age'], # Feature 3
255
+ 99 - user_input['flat_age'], # Feature 4: remaining_lease
256
+ 2025, # Feature 5: transaction_year
257
+ flat_type_mapping.get(user_input['flat_type'], 4), # Feature 6: flat_type_ordinal
258
+ town_mapping.get(user_input['town'], 0), # Feature 7: town_encoded
259
+ flat_model_mapping.get(user_input['flat_model'], 0), # Feature 8: flat_model_encoded
260
+ 1 # Feature 9: (placeholder)
261
+ ]
262
+
263
+ return np.array([input_features])
264
+
265
+ def create_market_insights_chart(data, user_input, predicted_price):
266
+ """Create market insights visualization"""
267
+ if data is None or len(data) == 0:
268
+ return None
269
+
270
+ similar_properties = data[
271
+ (data['flat_type'] == user_input['flat_type']) &
272
+ (data['town'] == user_input['town'])
273
+ ]
274
+
275
+ if len(similar_properties) < 5:
276
+ similar_properties = data[data['flat_type'] == user_input['flat_type']]
277
+
278
+ if len(similar_properties) > 0:
279
+ fig = px.scatter(similar_properties, x='floor_area_sqm', y='resale_price',
280
+ color='flat_model',
281
+ title=f"Market Position: {user_input['flat_type']} in {user_input['town']}",
282
+ labels={'floor_area_sqm': 'Floor Area (sqm)', 'resale_price': 'Resale Price (SGD)'})
283
+
284
+ # Add model prediction
285
+ fig.add_trace(go.Scatter(x=[user_input['floor_area_sqm']], y=[predicted_price],
286
+ mode='markers',
287
+ marker=dict(symbol='star', size=20, color='red',
288
+ line=dict(width=2, color='darkred')),
289
+ name='XGBoost Prediction'))
290
+
291
+ fig.update_layout(template="plotly_white", height=400, showlegend=True)
292
+ return fig
293
+ return None
294
+
295
+ def predict_hdb_price(town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age):
296
+ """Main prediction function for Gradio with robust error handling"""
297
+ user_input = {
298
+ 'town': town,
299
+ 'flat_type': flat_type,
300
+ 'flat_model': flat_model,
301
+ 'floor_area_sqm': floor_area_sqm,
302
+ 'storey_level': storey_level,
303
+ 'flat_age': flat_age
304
+ }
305
+
306
+ try:
307
+ processed_input = preprocess_input(user_input)
308
+
309
+ # Get prediction with error handling
310
+ try:
311
+ predicted_price = max(0, float(models['xgboost'].predict(processed_input)[0]))
312
+ except Exception as e:
313
+ print(f"โŒ XGBoost prediction error: {e}")
314
+ predicted_price = 400000 # Fallback value
315
+
316
+ # Create insights
317
+ remaining_lease = 99 - flat_age
318
+ price_per_sqm = predicted_price / floor_area_sqm
319
+
320
+ insights = f"""
321
+ **Property Summary:**
322
+ - Location: {town}
323
+ - Type: {flat_type}
324
+ - Model: {flat_model}
325
+ - Area: {floor_area_sqm} sqm
326
+ - Floor: Level {storey_level}
327
+ - Age: {flat_age} years
328
+ - Remaining Lease: {remaining_lease} years
329
+ - Price per sqm: ${price_per_sqm:,.0f}
330
+
331
+ **Predicted Price: ${predicted_price:,.0f}**
332
+
333
+ **Financing Eligibility:**
334
+ """
335
+
336
+ if remaining_lease >= 60:
337
+ insights += "โœ… Bank loan eligible"
338
+ elif remaining_lease >= 20:
339
+ insights += "โš ๏ธ HDB loan eligible only"
340
+ else:
341
+ insights += "โŒ Limited financing options"
342
+
343
+ # Create chart
344
+ chart = create_market_insights_chart(data, user_input, predicted_price)
345
+
346
+ return f"${predicted_price:,.0f}", chart, insights
347
+
348
+ except Exception as e:
349
+ error_msg = f"Prediction failed. Error: {str(e)}"
350
+ print(error_msg)
351
+ return "Error: Prediction failed", None, error_msg
352
+
353
+ def extract_parameters_from_query(query):
354
+ """Extract HDB parameters from natural language query using LLM"""
355
+ if not groq_api_key or client is None:
356
+ return {"error": "Please set GROQ_API_KEY environment variable to use chat functionality."}
357
+
358
+ try:
359
+ # System prompt to guide the LLM
360
+ system_prompt = """You are an expert at extracting parameters for HDB price prediction from natural language queries.
361
+ Extract the following parameters if mentioned in the query:
362
+ - town (e.g., Ang Mo Kio, Bedok, Tampines)
363
+ - flat_type (e.g., 3 ROOM, 4 ROOM, 5 ROOM, EXECUTIVE)
364
+ - flat_model (e.g., Improved, Model A, New Generation, Standard, Premium)
365
+ - floor_area_sqm (floor area in square meters)
366
+ - storey_level (floor level)
367
+ - flat_age (age of flat in years)
368
+
369
+ Return only a JSON object with the extracted parameters. If a parameter is not mentioned, set it to null.
370
+ Example: {"town": "ANG MO KIO", "flat_type": "4 ROOM", "flat_model": "Improved", "floor_area_sqm": 95, "storey_level": 8, "flat_age": 15}"""
371
+
372
+ # Query the LLM
373
+ completion = client.chat.completions.create(
374
+ model="llama-3.3-70b-versatile",
375
+ messages=[
376
+ {"role": "system", "content": system_prompt},
377
+ {"role": "user", "content": query}
378
+ ],
379
+ temperature=0.1,
380
+ max_tokens=200
381
+ )
382
+
383
+ # Extract and parse the JSON response
384
+ response = completion.choices[0].message.content
385
+ # Clean the response to extract just the JSON
386
+ json_match = re.search(r'\{.*\}', response, re.DOTALL)
387
+ if json_match:
388
+ import json
389
+ params = json.loads(json_match.group())
390
+ return params
391
+ else:
392
+ return {"error": "Could not extract parameters from query"}
393
+
394
+ except Exception as e:
395
+ return {"error": f"Error processing query: {str(e)}"}
396
+
397
+ def is_small_talk(query):
398
+ """Check if the query is small talk/casual conversation"""
399
+ small_talk_keywords = [
400
+ 'hello', 'hi', 'hey', 'good morning', 'good afternoon', 'good evening',
401
+ 'how are you', 'how are things', "what's up", 'how do you do',
402
+ 'thank you', 'thanks', 'bye', 'goodbye', 'see you', 'nice to meet you',
403
+ 'who are you', 'what can you do', 'help', 'tell me about yourself'
404
+ ]
405
+
406
+ query_lower = query.lower()
407
+ return any(keyword in query_lower for keyword in small_talk_keywords)
408
+
409
+ def handle_small_talk(query):
410
+ """Handle small talk queries with appropriate responses"""
411
+ query_lower = query.lower()
412
+
413
+ if any(greeting in query_lower for greeting in ['hello', 'hi', 'hey', 'good morning', 'good afternoon', 'good evening']):
414
+ return "Hello! ๐Ÿ‘‹ I'm your HDB price assistant. How can I help you today?"
415
+
416
+ elif any(how_are_you in query_lower for how_are_you in ['how are you', 'how are things', "what's up", 'how do you do']):
417
+ return "I'm doing great, thanks for asking! I'm here to help you with HDB price predictions and information. What can I assist you with today?"
418
+
419
+ elif any(thanks in query_lower for thanks in ['thank you', 'thanks']):
420
+ return "You're welcome! ๐Ÿ˜Š Is there anything else you'd like to know about HDB prices?"
421
+
422
+ elif any(bye in query_lower for bye in ['bye', 'goodbye', 'see you']):
423
+ return "Goodbye! ๐Ÿ‘‹ Feel free to come back if you have more questions about HDB prices!"
424
+
425
+ elif 'who are you' in query_lower:
426
+ return "I'm an AI assistant specialized in helping with HDB resale price predictions and information. I can estimate property values based on various factors like location, flat type, size, and age."
427
+
428
+ elif 'what can you do' in query_lower or 'help' in query_lower:
429
+ return "I can help you with:\n- Predicting HDB resale prices\n- Answering questions about HDB properties\n- Providing market insights\n\nJust tell me about the property you're interested in (location, type, size, etc.) and I'll give you an estimate!"
430
+
431
+ elif 'tell me about yourself' in query_lower:
432
+ return "I'm an AI assistant powered by machine learning models trained on HDB resale data. I can provide price estimates and insights about public housing in Singapore. My goal is to help you make informed decisions about HDB properties!"
433
+
434
+ else:
435
+ return "I'm here to help with HDB price predictions and information. How can I assist you today?"
436
+
437
+ def answer_general_hdb_question(query, chat_history):
438
+ """Answer general HDB questions using the LLM"""
439
+ if not groq_api_key or client is None:
440
+ return "Please set GROQ_API_KEY environment variable to use chat functionality.", chat_history
441
+
442
+ try:
443
+ completion = client.chat.completions.create(
444
+ model="llama-3.3-70b-versatile",
445
+ messages=[
446
+ {
447
+ "role": "system",
448
+ "content": "You are a helpful assistant specialized in HDB (Housing & Development Board) properties in Singapore. Provide accurate, helpful information about HDB prices, policies, and market trends."
449
+ },
450
+ {
451
+ "role": "user",
452
+ "content": f"Answer this question about HDB: {query}"
453
+ }
454
+ ],
455
+ temperature=0.3,
456
+ max_tokens=500
457
+ )
458
+ response = completion.choices[0].message.content
459
+ chat_history.append((query, response))
460
+ return response, chat_history
461
+ except Exception as e:
462
+ error_msg = f"I encountered an error. Please try again later."
463
+ chat_history.append((query, error_msg))
464
+ return error_msg, chat_history
465
+
466
+ def chat_with_llm(query, chat_history):
467
+ """Handle chat queries about HDB pricing and small talk"""
468
+ if not groq_api_key or client is None:
469
+ return "Please set GROQ_API_KEY...", chat_history
470
+
471
+ # 1. First, check for small talk
472
+ if is_small_talk(query):
473
+ response = handle_small_talk(query)
474
+ chat_history.append((query, response))
475
+ return response, chat_history
476
+
477
+ # 2. Check if the query is a clear request for a general explanation/trend (not a specific price)
478
+ is_general_query = any(keyword in query.lower() for keyword in [
479
+ 'trend', 'overview', 'how are', 'what are', 'like in', 'average',
480
+ 'over the years', 'market', 'compare'
481
+ ])
482
+
483
+ # 3. If it's a general query, use the LLM to answer it directly
484
+ if is_general_query:
485
+ try:
486
+ completion = client.chat.completions.create(
487
+ model="llama-3.3-70b-versatile",
488
+ messages=[
489
+ {
490
+ "role": "system",
491
+ "content": "You are a helpful assistant specialized in HDB (Housing & Development Board) properties in Singapore. Provide accurate, helpful information about HDB prices, policies, and market trends. Use the provided context if available."
492
+ },
493
+ {
494
+ "role": "user",
495
+ "content": f"Based on general HDB market knowledge, answer this question: {query}"
496
+ }
497
+ ],
498
+ temperature=0.3,
499
+ max_tokens=500
500
+ )
501
+ response = completion.choices[0].message.content
502
+ chat_history.append((query, response))
503
+ return response, chat_history
504
+ except Exception as e:
505
+ error_msg = f"I encountered an error. Please try again later."
506
+ chat_history.append((query, error_msg))
507
+ return error_msg, chat_history
508
+
509
+ # 4. If it's not clearly general, try to extract parameters for a specific prediction
510
+ params = extract_parameters_from_query(query)
511
+
512
+ if "error" in params:
513
+ # If extraction failed, fall back to general Q&A
514
+ return answer_general_hdb_question(query, chat_history)
515
+
516
+ # 5. Check what we got back from parameter extraction
517
+ extracted_params = {k: v for k, v in params.items() if v is not None}
518
+ required_for_prediction = ['town', 'flat_type', 'floor_area_sqm', 'storey_level', 'flat_age']
519
+
520
+ # 6. If the user only provided a town or one other parameter, it's likely a general question.
521
+ if len(extracted_params) < 3: # e.g., if only 'town' and 'flat_type' are provided
522
+ # Ask a clarifying question or provide a general overview
523
+ if 'town' in extracted_params:
524
+ town = extracted_params['town']
525
+ # You could add a pre-generated fact here, e.g., average price for that town from the dataset
526
+ response = f"You asked about {town}. HDB prices can vary widely based on flat type, size, age, and specific location within the town. "
527
+ response += f"For example, are you interested in 4-Room or 5-Room flats? What's your budget or preferred size? "
528
+ response += "Alternatively, I can give you a prediction if you provide more details like flat type, size, and age."
529
+ else:
530
+ response = "I specialize in HDB price predictions and information. Could you provide more details about the property you're interested in (e.g., town, flat type, size) so I can give you a accurate estimate or information?"
531
+ chat_history.append((query, response))
532
+ return response, chat_history
533
+
534
+ # 7. If we have most parameters, ask for the missing ones specifically
535
+ missing_params = [param for param in required_for_prediction if params.get(param) is None]
536
+ if missing_params:
537
+ missing_list = ", ".join(missing_params)
538
+ response = f"I'd be happy to predict a price for you. I just need a few more details: {missing_list}."
539
+ chat_history.append((query, response))
540
+ return response, chat_history
541
+
542
+ # 8. If we have all parameters, make a prediction!
543
+ try:
544
+ # Convert string numbers to appropriate types
545
+ if isinstance(params['floor_area_sqm'], str):
546
+ params['floor_area_sqm'] = float(params['floor_area_sqm'])
547
+ if isinstance(params['storey_level'], str):
548
+ params['storey_level'] = int(params['storey_level'])
549
+ if isinstance(params['flat_age'], str):
550
+ params['flat_age'] = int(params['flat_age'])
551
+
552
+ # Make prediction
553
+ price, chart, insights = predict_hdb_price(
554
+ params['town'], params['flat_type'], params['flat_model'],
555
+ params['floor_area_sqm'], params['storey_level'], params['flat_age']
556
+ )
557
+
558
+ # Format response
559
+ response = f"Based on your query:\n\n"
560
+ response += f"๐Ÿ“ Town: {params['town']}\n"
561
+ response += f"๐Ÿ  Flat Type: {params['flat_type']}\n"
562
+ response += f"๐Ÿ“ Floor Area: {params['floor_area_sqm']} sqm\n"
563
+ response += f"๐Ÿข Storey Level: {params['storey_level']}\n"
564
+ response += f"๐Ÿ“… Flat Age: {params['flat_age']} years\n\n"
565
+ response += f"๐Ÿ’ฐ Predicted Price: {price}\n\n"
566
+ response += insights
567
+
568
+ chat_history.append((query, response))
569
+ return response, chat_history
570
+
571
+ except Exception as e:
572
+ error_msg = f"Error making prediction: {str(e)}"
573
+ chat_history.append((query, error_msg))
574
+ return error_msg, chat_history
575
+
576
+ def generate_map_and_stats(filter_town, filter_flat_type, filter_flat_model,
577
+ min_lease, max_lease, min_price, max_price):
578
+ """Create the Singapore map and generate summary stats"""
579
+ if map_df is None:
580
+ return "<p align='center'>Dataset not found. Please ensure the URL is correct and the file exists.</p>", ""
581
+
582
+ # Apply filters
583
+ filtered_df = map_df.copy()
584
+
585
+ if filter_town and filter_town != 'ALL':
586
+ filtered_df = filtered_df[filtered_df['town'] == filter_town]
587
+
588
+ if filter_flat_type and filter_flat_type != 'ALL':
589
+ filtered_df = filtered_df[filtered_df['flat_type'] == filter_flat_type]
590
+
591
+ if filter_flat_model and filter_flat_model != 'ALL':
592
+ filtered_df = filtered_df[filtered_df['flat_model'] == filter_flat_model]
593
+
594
+ # Filter based on lease and price sliders using 'resale_price'
595
+ filtered_df = filtered_df[(filtered_df['remaining_lease'] >= min_lease) &
596
+ (filtered_df['remaining_lease'] <= max_lease)]
597
+ filtered_df = filtered_df[(filtered_df['resale_price'] >= min_price) &
598
+ (filtered_df['resale_price'] <= max_price)]
599
+
600
+ # Handle case with no matching records
601
+ if len(filtered_df) == 0:
602
+ return "<p align='center'>No data available with the selected filters.</p>", "No data available with the selected filters."
603
+
604
+ # Create base map centered on Singapore
605
+ singapore_coords = [1.3521, 103.8198] # Approximate center of Singapore
606
+ m = folium.Map(location=singapore_coords, zoom_start=11, tiles='OpenStreetMap')
607
+
608
+ # Create marker cluster
609
+ marker_cluster = MarkerCluster().add_to(m)
610
+
611
+ # Create a Folium linear colormap using 'resale_price'
612
+ folium_colormap = folium.LinearColormap(['green', 'yellow', 'red'],
613
+ vmin=filtered_df['resale_price'].min(),
614
+ vmax=filtered_df['resale_price'].max())
615
+ folium_colormap.caption = 'Resale Price (SGD)'
616
+ m.add_child(folium_colormap)
617
+
618
+ # Add markers for each property
619
+ for idx, row in filtered_df.iterrows():
620
+ # Get color based on 'resale_price'
621
+ color = folium_colormap(row['resale_price'])
622
+
623
+ popup_content = f"""
624
+ <b>Town:</b> {row['town']}<br>
625
+ <b>Flat Type:</b> {row['flat_type']}<br>
626
+ <b>Flat Model:</b> {row['flat_model']}<br>
627
+ <b>Address:</b> {row['full_address']}<br>
628
+ <b>Floor Area:</b> {row['floor_area_sqm']} sqm<br>
629
+ <b>Remaining Lease:</b> {row['remaining_lease']} years<br>
630
+ <b>Storey:</b> {row['storey_range']}<br>
631
+ <b>Resale Price:</b> ${row['resale_price']:,.0f}<br>
632
+ <b>Transaction Date:</b> {row['month']}
633
+ """
634
+
635
+ folium.CircleMarker(
636
+ location=[row['latitude'], row['longitude']],
637
+ radius=5,
638
+ popup=folium.Popup(popup_content, max_width=300),
639
+ color=color,
640
+ fill=True,
641
+ fillColor=color,
642
+ fillOpacity=0.7,
643
+ weight=1
644
+ ).add_to(marker_cluster)
645
+
646
+ # Convert map to HTML string
647
+ map_html = m._repr_html_()
648
+
649
+ # Generate summary statistics as a markdown string using 'resale_price'
650
+ stats_string = f"""
651
+ ### Summary Statistics
652
+ - **Total Records:** {len(filtered_df):,}
653
+ - **Average Price [inc Outlier]:** ${filtered_df['resale_price'].mean():,.0f}
654
+ - **Median Price [exc Outlier]:** ${filtered_df['resale_price'].median():,.0f}
655
+ - **Minimum Price:** ${filtered_df['resale_price'].min():,.0f}
656
+ - **Maximum Price:** ${filtered_df['resale_price'].max():,.0f}
657
+ - **Average Remaining Lease:** {filtered_df['remaining_lease'].mean():.1f} years
658
+ - **Median Remaining Lease:** {filtered_df['remaining_lease'].median():.1f} years
659
+ """
660
+
661
+ return map_html, stats_string
662
+
663
+ # Preload models and data
664
+ print("Loading models and data...")
665
+ models = load_models()
666
+ data = load_data()
667
+
668
+ # Define Gradio interface
669
+ towns_list = [
670
+ 'SENGKANG', 'WOODLANDS', 'TAMPINES', 'PUNGGOL', 'JURONG WEST',
671
+ 'YISHUN', 'BEDOK', 'HOUGANG', 'CHOA CHU KANG', 'ANG MO KIO'
672
+ ]
673
+
674
+ flat_types = ['3 ROOM', '4 ROOM', '5 ROOM', 'EXECUTIVE', '2 ROOM', '1 ROOM']
675
+ flat_models = ['Model A', 'Improved', 'New Generation', 'Standard', 'Premium']
676
+
677
+ # Create Gradio interface with chatbot
678
+ with gr.Blocks(title="๐Ÿ  HDB Price Predictor + Chat + Map", theme=gr.themes.Soft()) as demo:
679
+ gr.Markdown("# ๐Ÿ  HDB Price Predictor + AI Chat + Interactive Map")
680
+ gr.Markdown("Predict HDB resale prices using XGBoost model, chat with our AI assistant, or explore properties on an interactive map")
681
+
682
+ with gr.Tab("Traditional Interface"):
683
+ with gr.Row():
684
+ with gr.Column():
685
+ town = gr.Dropdown(label="Town", choices=sorted(towns_list), value="ANG MO KIO")
686
+ flat_type = gr.Dropdown(label="Flat Type", choices=sorted(flat_types), value="4 ROOM")
687
+ flat_model = gr.Dropdown(label="Flat Model", choices=sorted(flat_models), value="Improved")
688
+ floor_area_sqm = gr.Slider(label="Floor Area (sqm)", minimum=30, maximum=200, value=95, step=5)
689
+ storey_level = gr.Slider(label="Storey Level", minimum=1, maximum=50, value=8, step=1)
690
+ flat_age = gr.Slider(label="Flat Age (years)", minimum=0, maximum=99, value=15, step=1)
691
+
692
+ predict_btn = gr.Button("๐Ÿ”ฎ Predict Price", variant="primary")
693
+
694
+ with gr.Column():
695
+ predicted_price = gr.Label(label="๐Ÿ’ฐ Predicted Price")
696
+ insights = gr.Markdown(label="๐Ÿ“‹ Property Summary")
697
+
698
+ with gr.Row():
699
+ chart_output = gr.Plot(label="๐Ÿ“ˆ Market Insights")
700
+
701
+ # Connect button to function
702
+ predict_btn.click(
703
+ fn=predict_hdb_price,
704
+ inputs=[town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age],
705
+ outputs=[predicted_price, chart_output, insights]
706
+ )
707
+
708
+ with gr.Tab("AI Chat Assistant"):
709
+ gr.Markdown("๐Ÿ’ฌ Chat with our AI assistant to get HDB price predictions using natural language!")
710
+ gr.Markdown("Example: 'What would be the price of a 4-room model A flat in Ang Mo Kio with 95 sqm, on the 8th floor, that's 15 years old?'")
711
+ gr.Markdown("You can also say hello, ask how I am, or ask general questions about HDB!")
712
+
713
+ chatbot = gr.Chatbot(label="HDB Price Chatbot", height=500)
714
+ msg = gr.Textbox(label="Your question", placeholder="Type your message here...")
715
+ clear = gr.Button("Clear Chat")
716
+
717
+ def respond(message, chat_history):
718
+ response, updated_history = chat_with_llm(message, chat_history)
719
+ return updated_history
720
+
721
+ msg.submit(respond, [msg, chatbot], [chatbot])
722
+ clear.click(lambda: None, None, [chatbot], queue=False)
723
+
724
+ with gr.Tab("Interactive Map"):
725
+ gr.Markdown("# ๐Ÿ—บ๏ธ Singapore HDB Resale Prices Map")
726
+ gr.Markdown("An interactive map to visualize and filter HDB flat prices across Singapore.")
727
+
728
+ with gr.Row():
729
+ with gr.Column(scale=1):
730
+ town_input = gr.Dropdown(choices=town_options, label="Select Town", value="ALL")
731
+ flat_type_input = gr.Dropdown(choices=flat_type_options, label="Select Flat Type", value="ALL")
732
+ flat_model_input = gr.Dropdown(choices=flat_model_options, label="Select Flat Model", value="ALL")
733
+
734
+ gr.Markdown("### Filter by Lease and Price")
735
+ min_lease_input = gr.Slider(minimum=min_lease_val, maximum=max_lease_val,
736
+ value=min_lease_val, step=1, label="Min Remaining Lease (years)")
737
+ max_lease_input = gr.Slider(minimum=min_lease_val, maximum=max_lease_val,
738
+ value=max_lease_val, step=1, label="Max Remaining Lease (years)")
739
+ min_price_input = gr.Slider(minimum=min_price_val, maximum=max_price_val,
740
+ value=min_price_val, step=1000, label="Min Price (SGD)")
741
+ max_price_input = gr.Slider(minimum=min_price_val, maximum=max_price_val,
742
+ value=max_price_val, step=1000, label="Max Price (SGD)")
743
+
744
+ # Add a button to generate the result
745
+ generate_button = gr.Button("Generate Results", variant="primary")
746
+
747
+ with gr.Column(scale=2):
748
+ map_output = gr.HTML(label="Interactive Map")
749
+ stats_output = gr.Markdown(label="Summary Statistics")
750
+ gr.Markdown("""
751
+ ---
752
+ ### Map Color Legend
753
+ The colors of the markers on the map represent the resale price of the HDB flats:
754
+
755
+ - **<span style='color:green;'>Green</span>:** Indicates a lower resale price.
756
+ - **<span style='color:yellow;'>Yellow</span>:** Indicates a mid-range resale price.
757
+ - **<span style='color:red;'>Red</span>:** Indicates a higher resale price.
758
+ """)
759
+
760
+ # Link the button click to the function
761
+ inputs = [town_input, flat_type_input, flat_model_input,
762
+ min_lease_input, max_lease_input, min_price_input, max_price_input]
763
+
764
+ generate_button.click(
765
+ fn=generate_map_and_stats,
766
+ inputs=inputs,
767
+ outputs=[map_output, stats_output]
768
+ )
769
+
770
+ # To run in Colab
771
+ if __name__ == "__main__":
772
+ demo.launch()
773
+ warnings.filterwarnings('ignore')
774
+ import re
775
+ from groq import Groq
776
 
777
  # Initialize Groq client
778
  groq_api_key = os.getenv("GROQ_API_KEY")