Spaces:

Lesterchia1
/

FPOC_HDB_Price_Predictor_AI_chat_Assistant

Build error

App Files Files Community

Chia Woon Yap commited on Sep 3, 2025

Commit

a2140b7

verified ·

1 Parent(s): 290548d

Update app.py

Browse files

Files changed (1) hide show

app.py +762 -0

app.py CHANGED Viewed

@@ -7,10 +7,772 @@ import plotly.express as px
 from huggingface_hub import hf_hub_download
 import os
 from pathlib import Path
 import warnings
 warnings.filterwarnings('ignore')
 import re
 from groq import Groq
 # Initialize Groq client
 groq_api_key = os.getenv("GROQ_API_KEY")

 from huggingface_hub import hf_hub_download
 import os
 from pathlib import Path
+import warningsimport gradio as gr
+import pandas as pd
+import numpy as np
+import joblib
+import plotly.graph_objects as go
+import plotly.express as px
+from huggingface_hub import hf_hub_download
+import os
+from pathlib import Path
 import warnings
 warnings.filterwarnings('ignore')
 import re
 from groq import Groq
+import folium
+from folium.plugins import MarkerCluster
+import io
+from fastapi import FastAPI, HTTPException
+app = FastAPI()
+# Initialize Groq client
+groq_api_key = os.getenv("GROQ_API_KEY")
+if groq_api_key:
+    #client = Groq(api_key=groq_api_key)
+    client = Groq(api_key=groq_api_key) if groq_api_key else None
+else:
+    print("⚠️  GROQ_API_KEY not found. Chat functionality will be limited.")
+    client = None
+@app.post("/chat")
+async def chat(prompt: str):
+    if client is None:
+        raise HTTPException(
+            status_code=503,
+            detail="⚠️ Chat service is unavailable because GROQ_API_KEY is missing."
+        )
+    try:
+        response = client.chat.completions.create(
+            model="llama-3.1-8b-instant",
+            messages=[{"role": "user", "content": prompt}]
+        )
+        return {"reply": response.choices[0].message["content"]}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+# Try to import xgboost, but fallback to scikit-learn
+try:
+    import xgboost as xgb
+    XGB_AVAILABLE = True
+    print("✅ XGBoost is available")
+except ImportError:
+    XGB_AVAILABLE = False
+    print("⚠️  XGBoost not available, using scikit-learn models")
+    from sklearn.ensemble import RandomForestRegressor
+# Load map data
+try:
+    hf_raw_url = 'https://huggingface.co/spaces/Lesterchia174/FPOC_HDB_Price_Predictor_AI_chat_Assistant/resolve/main/Based_Resale_Prices_2025_with_coords.csv'
+    map_df = pd.read_csv(hf_raw_url)
+    # Convert 'remaining_lease' to a numeric type, converting non-numeric values to NaN
+    map_df['remaining_lease'] = pd.to_numeric(map_df['remaining_lease'], errors='coerce')
+    # Drop rows where the conversion resulted in NaN
+    map_df.dropna(subset=['remaining_lease'], inplace=True)
+    # Pre-calculate min/max for Gradio sliders using the 'resale_price' column
+    min_lease_val = int(map_df['remaining_lease'].min())
+    max_lease_val = int(map_df['remaining_lease'].max())
+    min_price_val = int(map_df['resale_price'].min())
+    max_price_val = int(map_df['resale_price'].max())
+    # Get unique values for dropdowns
+    town_options = ['ALL'] + sorted(list(map_df['town'].unique()))
+    flat_type_options = ['ALL'] + sorted(list(map_df['flat_type'].unique()))
+    flat_model_options = ['ALL'] + sorted(list(map_df['flat_model'].unique()))
+except Exception as e:
+    print(f"Error loading the map dataset: {e}")
+    map_df = None
+def create_dummy_model(model_type):
+    """Create a realistic dummy model that has all required methods"""
+    class RealisticDummyModel:
+        def __init__(self, model_type):
+            self.model_type = model_type
+            self.n_features_in_ = 9
+            self.feature_names_in_ = [
+                'floor_area_sqm', 'storey_level', 'flat_age', 'remaining_lease',
+                'transaction_year', 'flat_type_encoded', 'town_encoded',
+                'flat_model_encoded', 'dummy_feature'
+            ]
+            # Add methods that might be called by joblib or other code
+            self.get_params = lambda deep=True: {}
+            self.set_params = lambda **params: self
+        def predict(self, X):
+            # Realistic prediction logic
+            if isinstance(X, np.ndarray) and len(X.shape) == 2:
+                X = X[0]  # Take first row if it's a 2D array
+            floor_area = X[0]
+            storey_level = X[1]
+            flat_age = X[2]
+            town_encoded = X[6]
+            flat_type_encoded = X[5]
+            base_price = floor_area * (4800 + town_encoded * 200)
+            storey_bonus = storey_level * 2500
+            age_discount = flat_age * 1800
+            price = base_price + storey_bonus - age_discount + 35000
+            if storey_level > 20: price += 15000
+            if flat_age < 10: price += 20000
+            return np.array([max(300000, price)])
+    return RealisticDummyModel(model_type)()
+def safe_joblib_load(filepath):
+    """Safely load joblib file with error handling"""
+    try:
+        model = joblib.load(filepath)
+        print(f"✅ Successfully loaded model from {filepath}")
+        # Check if model has required methods
+        if not hasattr(model, 'predict'):
+            print("❌ Loaded object doesn't have predict method")
+            return None
+        # Add missing methods if needed
+        if not hasattr(model, 'get_params'):
+            model.get_params = lambda deep=True: {}
+        if not hasattr(model, 'set_params'):
+            model.set_params = lambda **params: model
+        return model
+    except Exception as e:
+        print(f"❌ Error loading model from {filepath}: {e}")
+        return None
+def load_models():
+    """Load models with robust error handling"""
+    models = {}
+    # Try to load XGBoost model
+    try:
+        xgboost_path = hf_hub_download(
+            repo_id="Lesterchia174/HDB_Price_Predictor",
+            filename="best_model_xgboost.joblib",
+            repo_type="space"
+        )
+        models['xgboost'] = safe_joblib_load(xgboost_path)
+        if models['xgboost'] is None:
+            print("⚠️  Creating dummy model for XGBoost")
+            models['xgboost'] = create_dummy_model("xgboost")
+        else:
+            print("✅ XGBoost model loaded and validated")
+    except Exception as e:
+        print(f"❌ Error downloading XGBoost model: {e}")
+        print("⚠️  Creating dummy model for XGBoost")
+        models['xgboost'] = create_dummy_model("xgboost")
+    return models
+def load_data():
+    """Load data using Hugging Face Hub"""
+    try:
+        data_path = hf_hub_download(
+            repo_id="Lesterchia174/HDB_Price_Predictor",
+            filename="base_hdb_resale_prices_2015Jan-2025Jun_processed.csv",
+            repo_type="space"
+        )
+        df = pd.read_csv(data_path)
+        print("✅ Data loaded successfully via Hugging Face Hub")
+        return df
+    except Exception as e:
+        print(f"❌ Error loading data: {e}")
+        return create_sample_data()
+def create_sample_data():
+    """Create sample data if real data isn't available"""
+    np.random.seed(42)
+    towns = ['ANG MO KIO', 'BEDOK', 'TAMPINES', 'WOODLANDS', 'JURONG WEST']
+    flat_types = ['4 ROOM', '5 ROOM', 'EXECUTIVE']
+    flat_models = ['Improved', 'Model A', 'New Generation']
+    data = []
+    for _ in range(100):
+        town = np.random.choice(towns)
+        flat_type = np.random.choice(flat_types)
+        flat_model = np.random.choice(flat_models)
+        floor_area = np.random.randint(85, 150)
+        storey = np.random.randint(1, 25)
+        age = np.random.randint(0, 40)
+        base_price = floor_area * 5000
+        town_bonus = towns.index(town) * 20000
+        storey_bonus = storey * 2000
+        age_discount = age * 1500
+        flat_type_bonus = flat_types.index(flat_type) * 30000
+        resale_price = base_price + town_bonus + storey_bonus - age_discount + flat_type_bonus
+        resale_price = max(300000, resale_price + np.random.randint(-20000, 20000))
+        data.append({
+            'town': town, 'flat_type': flat_type, 'flat_model': flat_model,
+            'floor_area_sqm': floor_area, 'storey_level': storey,
+            'flat_age': age, 'resale_price': resale_price
+        })
+    return pd.DataFrame(data)
+def preprocess_input(user_input, model_type='xgboost'):
+    """Preprocess user input for prediction with correct feature mapping"""
+    # Flat type mapping
+    flat_type_mapping = {'1 ROOM': 1, '2 ROOM': 2, '3 ROOM': 3, '4 ROOM': 4,
+                         '5 ROOM': 5, 'EXECUTIVE': 6, 'MULTI-GENERATION': 7}
+    # Town mapping
+    town_mapping = {
+        'SENGKANG': 0, 'WOODLANDS': 1, 'TAMPINES': 2, 'PUNGGOL': 3,
+        'JURONG WEST': 4, 'YISHUN': 5, 'BEDOK': 6, 'HOUGANG': 7,
+        'CHOA CHU KANG': 8, 'ANG MO KIO': 9
+    }
+    # Flat model mapping
+    flat_model_mapping = {
+        'Model A': 0, 'Improved': 1, 'New Generation': 2,
+        'Standard': 3, 'Premium': 4
+    }
+    # Create input array with features
+    input_features = [
+        user_input['floor_area_sqm'],           # Feature 1
+        user_input['storey_level'],             # Feature 2
+        user_input['flat_age'],                 # Feature 3
+        99 - user_input['flat_age'],            # Feature 4: remaining_lease
+        2025,                                   # Feature 5: transaction_year
+        flat_type_mapping.get(user_input['flat_type'], 4),  # Feature 6: flat_type_ordinal
+        town_mapping.get(user_input['town'], 0),           # Feature 7: town_encoded
+        flat_model_mapping.get(user_input['flat_model'], 0), # Feature 8: flat_model_encoded
+        1                                       # Feature 9: (placeholder)
+    ]
+    return np.array([input_features])
+def create_market_insights_chart(data, user_input, predicted_price):
+    """Create market insights visualization"""
+    if data is None or len(data) == 0:
+        return None
+    similar_properties = data[
+        (data['flat_type'] == user_input['flat_type']) &
+        (data['town'] == user_input['town'])
+    ]
+    if len(similar_properties) < 5:
+        similar_properties = data[data['flat_type'] == user_input['flat_type']]
+    if len(similar_properties) > 0:
+        fig = px.scatter(similar_properties, x='floor_area_sqm', y='resale_price',
+                         color='flat_model',
+                         title=f"Market Position: {user_input['flat_type']} in {user_input['town']}",
+                         labels={'floor_area_sqm': 'Floor Area (sqm)', 'resale_price': 'Resale Price (SGD)'})
+        # Add model prediction
+        fig.add_trace(go.Scatter(x=[user_input['floor_area_sqm']], y=[predicted_price],
+                                 mode='markers',
+                                 marker=dict(symbol='star', size=20, color='red',
+                                             line=dict(width=2, color='darkred')),
+                                 name='XGBoost Prediction'))
+        fig.update_layout(template="plotly_white", height=400, showlegend=True)
+        return fig
+    return None
+def predict_hdb_price(town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age):
+    """Main prediction function for Gradio with robust error handling"""
+    user_input = {
+        'town': town,
+        'flat_type': flat_type,
+        'flat_model': flat_model,
+        'floor_area_sqm': floor_area_sqm,
+        'storey_level': storey_level,
+        'flat_age': flat_age
+    }
+    try:
+        processed_input = preprocess_input(user_input)
+        # Get prediction with error handling
+        try:
+            predicted_price = max(0, float(models['xgboost'].predict(processed_input)[0]))
+        except Exception as e:
+            print(f"❌ XGBoost prediction error: {e}")
+            predicted_price = 400000  # Fallback value
+        # Create insights
+        remaining_lease = 99 - flat_age
+        price_per_sqm = predicted_price / floor_area_sqm
+        insights = f"""
+        **Property Summary:**
+        - Location: {town}
+        - Type: {flat_type}
+        - Model: {flat_model}
+        - Area: {floor_area_sqm} sqm
+        - Floor: Level {storey_level}
+        - Age: {flat_age} years
+        - Remaining Lease: {remaining_lease} years
+        - Price per sqm: ${price_per_sqm:,.0f}
+        **Predicted Price: ${predicted_price:,.0f}**
+        **Financing Eligibility:**
+        """
+        if remaining_lease >= 60:
+            insights += "✅ Bank loan eligible"
+        elif remaining_lease >= 20:
+            insights += "⚠️ HDB loan eligible only"
+        else:
+            insights += "❌ Limited financing options"
+        # Create chart
+        chart = create_market_insights_chart(data, user_input, predicted_price)
+        return f"${predicted_price:,.0f}", chart, insights
+    except Exception as e:
+        error_msg = f"Prediction failed. Error: {str(e)}"
+        print(error_msg)
+        return "Error: Prediction failed", None, error_msg
+def extract_parameters_from_query(query):
+    """Extract HDB parameters from natural language query using LLM"""
+    if not groq_api_key or client is None:
+        return {"error": "Please set GROQ_API_KEY environment variable to use chat functionality."}
+    try:
+        # System prompt to guide the LLM
+        system_prompt = """You are an expert at extracting parameters for HDB price prediction from natural language queries.
+        Extract the following parameters if mentioned in the query:
+        - town (e.g., Ang Mo Kio, Bedok, Tampines)
+        - flat_type (e.g., 3 ROOM, 4 ROOM, 5 ROOM, EXECUTIVE)
+        - flat_model (e.g., Improved, Model A, New Generation, Standard, Premium)
+        - floor_area_sqm (floor area in square meters)
+        - storey_level (floor level)
+        - flat_age (age of flat in years)
+        Return only a JSON object with the extracted parameters. If a parameter is not mentioned, set it to null.
+        Example: {"town": "ANG MO KIO", "flat_type": "4 ROOM", "flat_model": "Improved", "floor_area_sqm": 95, "storey_level": 8, "flat_age": 15}"""
+        # Query the LLM
+        completion = client.chat.completions.create(
+            model="llama-3.3-70b-versatile",
+            messages=[
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": query}
+            ],
+            temperature=0.1,
+            max_tokens=200
+        )
+        # Extract and parse the JSON response
+        response = completion.choices[0].message.content
+        # Clean the response to extract just the JSON
+        json_match = re.search(r'\{.*\}', response, re.DOTALL)
+        if json_match:
+            import json
+            params = json.loads(json_match.group())
+            return params
+        else:
+            return {"error": "Could not extract parameters from query"}
+    except Exception as e:
+        return {"error": f"Error processing query: {str(e)}"}
+def is_small_talk(query):
+    """Check if the query is small talk/casual conversation"""
+    small_talk_keywords = [
+        'hello', 'hi', 'hey', 'good morning', 'good afternoon', 'good evening',
+        'how are you', 'how are things', "what's up", 'how do you do',
+        'thank you', 'thanks', 'bye', 'goodbye', 'see you', 'nice to meet you',
+        'who are you', 'what can you do', 'help', 'tell me about yourself'
+    ]
+    query_lower = query.lower()
+    return any(keyword in query_lower for keyword in small_talk_keywords)
+def handle_small_talk(query):
+    """Handle small talk queries with appropriate responses"""
+    query_lower = query.lower()
+    if any(greeting in query_lower for greeting in ['hello', 'hi', 'hey', 'good morning', 'good afternoon', 'good evening']):
+        return "Hello! 👋 I'm your HDB price assistant. How can I help you today?"
+    elif any(how_are_you in query_lower for how_are_you in ['how are you', 'how are things', "what's up", 'how do you do']):
+        return "I'm doing great, thanks for asking! I'm here to help you with HDB price predictions and information. What can I assist you with today?"
+    elif any(thanks in query_lower for thanks in ['thank you', 'thanks']):
+        return "You're welcome! 😊 Is there anything else you'd like to know about HDB prices?"
+    elif any(bye in query_lower for bye in ['bye', 'goodbye', 'see you']):
+        return "Goodbye! 👋 Feel free to come back if you have more questions about HDB prices!"
+    elif 'who are you' in query_lower:
+        return "I'm an AI assistant specialized in helping with HDB resale price predictions and information. I can estimate property values based on various factors like location, flat type, size, and age."
+    elif 'what can you do' in query_lower or 'help' in query_lower:
+        return "I can help you with:\n- Predicting HDB resale prices\n- Answering questions about HDB properties\n- Providing market insights\n\nJust tell me about the property you're interested in (location, type, size, etc.) and I'll give you an estimate!"
+    elif 'tell me about yourself' in query_lower:
+        return "I'm an AI assistant powered by machine learning models trained on HDB resale data. I can provide price estimates and insights about public housing in Singapore. My goal is to help you make informed decisions about HDB properties!"
+    else:
+        return "I'm here to help with HDB price predictions and information. How can I assist you today?"
+def answer_general_hdb_question(query, chat_history):
+    """Answer general HDB questions using the LLM"""
+    if not groq_api_key or client is None:
+        return "Please set GROQ_API_KEY environment variable to use chat functionality.", chat_history
+    try:
+        completion = client.chat.completions.create(
+            model="llama-3.3-70b-versatile",
+            messages=[
+                {
+                    "role": "system",
+                    "content": "You are a helpful assistant specialized in HDB (Housing & Development Board) properties in Singapore. Provide accurate, helpful information about HDB prices, policies, and market trends."
+                },
+                {
+                    "role": "user",
+                    "content": f"Answer this question about HDB: {query}"
+                }
+            ],
+            temperature=0.3,
+            max_tokens=500
+        )
+        response = completion.choices[0].message.content
+        chat_history.append((query, response))
+        return response, chat_history
+    except Exception as e:
+        error_msg = f"I encountered an error. Please try again later."
+        chat_history.append((query, error_msg))
+        return error_msg, chat_history
+def chat_with_llm(query, chat_history):
+    """Handle chat queries about HDB pricing and small talk"""
+    if not groq_api_key or client is None:
+        return "Please set GROQ_API_KEY...", chat_history
+    # 1. First, check for small talk
+    if is_small_talk(query):
+        response = handle_small_talk(query)
+        chat_history.append((query, response))
+        return response, chat_history
+    # 2. Check if the query is a clear request for a general explanation/trend (not a specific price)
+    is_general_query = any(keyword in query.lower() for keyword in [
+        'trend', 'overview', 'how are', 'what are', 'like in', 'average',
+        'over the years', 'market', 'compare'
+    ])
+    # 3. If it's a general query, use the LLM to answer it directly
+    if is_general_query:
+        try:
+            completion = client.chat.completions.create(
+                model="llama-3.3-70b-versatile",
+                messages=[
+                    {
+                        "role": "system",
+                        "content": "You are a helpful assistant specialized in HDB (Housing & Development Board) properties in Singapore. Provide accurate, helpful information about HDB prices, policies, and market trends. Use the provided context if available."
+                    },
+                    {
+                        "role": "user",
+                        "content": f"Based on general HDB market knowledge, answer this question: {query}"
+                    }
+                ],
+                temperature=0.3,
+                max_tokens=500
+            )
+            response = completion.choices[0].message.content
+            chat_history.append((query, response))
+            return response, chat_history
+        except Exception as e:
+            error_msg = f"I encountered an error. Please try again later."
+            chat_history.append((query, error_msg))
+            return error_msg, chat_history
+    # 4. If it's not clearly general, try to extract parameters for a specific prediction
+    params = extract_parameters_from_query(query)
+    if "error" in params:
+        # If extraction failed, fall back to general Q&A
+        return answer_general_hdb_question(query, chat_history)
+    # 5. Check what we got back from parameter extraction
+    extracted_params = {k: v for k, v in params.items() if v is not None}
+    required_for_prediction = ['town', 'flat_type', 'floor_area_sqm', 'storey_level', 'flat_age']
+    # 6. If the user only provided a town or one other parameter, it's likely a general question.
+    if len(extracted_params) < 3: # e.g., if only 'town' and 'flat_type' are provided
+        # Ask a clarifying question or provide a general overview
+        if 'town' in extracted_params:
+            town = extracted_params['town']
+            # You could add a pre-generated fact here, e.g., average price for that town from the dataset
+            response = f"You asked about {town}. HDB prices can vary widely based on flat type, size, age, and specific location within the town. "
+            response += f"For example, are you interested in 4-Room or 5-Room flats? What's your budget or preferred size? "
+            response += "Alternatively, I can give you a prediction if you provide more details like flat type, size, and age."
+        else:
+            response = "I specialize in HDB price predictions and information. Could you provide more details about the property you're interested in (e.g., town, flat type, size) so I can give you a accurate estimate or information?"
+        chat_history.append((query, response))
+        return response, chat_history
+    # 7. If we have most parameters, ask for the missing ones specifically
+    missing_params = [param for param in required_for_prediction if params.get(param) is None]
+    if missing_params:
+        missing_list = ", ".join(missing_params)
+        response = f"I'd be happy to predict a price for you. I just need a few more details: {missing_list}."
+        chat_history.append((query, response))
+        return response, chat_history
+    # 8. If we have all parameters, make a prediction!
+    try:
+        # Convert string numbers to appropriate types
+        if isinstance(params['floor_area_sqm'], str):
+            params['floor_area_sqm'] = float(params['floor_area_sqm'])
+        if isinstance(params['storey_level'], str):
+            params['storey_level'] = int(params['storey_level'])
+        if isinstance(params['flat_age'], str):
+            params['flat_age'] = int(params['flat_age'])
+        # Make prediction
+        price, chart, insights = predict_hdb_price(
+            params['town'], params['flat_type'], params['flat_model'],
+            params['floor_area_sqm'], params['storey_level'], params['flat_age']
+        )
+        # Format response
+        response = f"Based on your query:\n\n"
+        response += f"📍 Town: {params['town']}\n"
+        response += f"🏠 Flat Type: {params['flat_type']}\n"
+        response += f"📐 Floor Area: {params['floor_area_sqm']} sqm\n"
+        response += f"🏢 Storey Level: {params['storey_level']}\n"
+        response += f"📅 Flat Age: {params['flat_age']} years\n\n"
+        response += f"💰 Predicted Price: {price}\n\n"
+        response += insights
+        chat_history.append((query, response))
+        return response, chat_history
+    except Exception as e:
+        error_msg = f"Error making prediction: {str(e)}"
+        chat_history.append((query, error_msg))
+        return error_msg, chat_history
+def generate_map_and_stats(filter_town, filter_flat_type, filter_flat_model,
+                          min_lease, max_lease, min_price, max_price):
+    """Create the Singapore map and generate summary stats"""
+    if map_df is None:
+        return "<p align='center'>Dataset not found. Please ensure the URL is correct and the file exists.</p>", ""
+    # Apply filters
+    filtered_df = map_df.copy()
+    if filter_town and filter_town != 'ALL':
+        filtered_df = filtered_df[filtered_df['town'] == filter_town]
+    if filter_flat_type and filter_flat_type != 'ALL':
+        filtered_df = filtered_df[filtered_df['flat_type'] == filter_flat_type]
+    if filter_flat_model and filter_flat_model != 'ALL':
+        filtered_df = filtered_df[filtered_df['flat_model'] == filter_flat_model]
+    # Filter based on lease and price sliders using 'resale_price'
+    filtered_df = filtered_df[(filtered_df['remaining_lease'] >= min_lease) &
+                             (filtered_df['remaining_lease'] <= max_lease)]
+    filtered_df = filtered_df[(filtered_df['resale_price'] >= min_price) &
+                             (filtered_df['resale_price'] <= max_price)]
+    # Handle case with no matching records
+    if len(filtered_df) == 0:
+        return "<p align='center'>No data available with the selected filters.</p>", "No data available with the selected filters."
+    # Create base map centered on Singapore
+    singapore_coords = [1.3521, 103.8198]  # Approximate center of Singapore
+    m = folium.Map(location=singapore_coords, zoom_start=11, tiles='OpenStreetMap')
+    # Create marker cluster
+    marker_cluster = MarkerCluster().add_to(m)
+    # Create a Folium linear colormap using 'resale_price'
+    folium_colormap = folium.LinearColormap(['green', 'yellow', 'red'],
+                                             vmin=filtered_df['resale_price'].min(),
+                                             vmax=filtered_df['resale_price'].max())
+    folium_colormap.caption = 'Resale Price (SGD)'
+    m.add_child(folium_colormap)
+    # Add markers for each property
+    for idx, row in filtered_df.iterrows():
+        # Get color based on 'resale_price'
+        color = folium_colormap(row['resale_price'])
+        popup_content = f"""
+        <b>Town:</b> {row['town']}<br>
+        <b>Flat Type:</b> {row['flat_type']}<br>
+        <b>Flat Model:</b> {row['flat_model']}<br>
+        <b>Address:</b> {row['full_address']}<br>
+        <b>Floor Area:</b> {row['floor_area_sqm']} sqm<br>
+        <b>Remaining Lease:</b> {row['remaining_lease']} years<br>
+        <b>Storey:</b> {row['storey_range']}<br>
+        <b>Resale Price:</b> ${row['resale_price']:,.0f}<br>
+        <b>Transaction Date:</b> {row['month']}
+        """
+        folium.CircleMarker(
+            location=[row['latitude'], row['longitude']],
+            radius=5,
+            popup=folium.Popup(popup_content, max_width=300),
+            color=color,
+            fill=True,
+            fillColor=color,
+            fillOpacity=0.7,
+            weight=1
+        ).add_to(marker_cluster)
+    # Convert map to HTML string
+    map_html = m._repr_html_()
+    # Generate summary statistics as a markdown string using 'resale_price'
+    stats_string = f"""
+    ### Summary Statistics
+    - **Total Records:** {len(filtered_df):,}
+    - **Average Price [inc Outlier]:** ${filtered_df['resale_price'].mean():,.0f}
+    - **Median Price [exc Outlier]:** ${filtered_df['resale_price'].median():,.0f}
+    - **Minimum Price:** ${filtered_df['resale_price'].min():,.0f}
+    - **Maximum Price:** ${filtered_df['resale_price'].max():,.0f}
+    - **Average Remaining Lease:** {filtered_df['remaining_lease'].mean():.1f} years
+    - **Median Remaining Lease:** {filtered_df['remaining_lease'].median():.1f} years
+    """
+    return map_html, stats_string
+# Preload models and data
+print("Loading models and data...")
+models = load_models()
+data = load_data()
+# Define Gradio interface
+towns_list = [
+    'SENGKANG', 'WOODLANDS', 'TAMPINES', 'PUNGGOL', 'JURONG WEST',
+    'YISHUN', 'BEDOK', 'HOUGANG', 'CHOA CHU KANG', 'ANG MO KIO'
+]
+flat_types = ['3 ROOM', '4 ROOM', '5 ROOM', 'EXECUTIVE', '2 ROOM', '1 ROOM']
+flat_models = ['Model A', 'Improved', 'New Generation', 'Standard', 'Premium']
+# Create Gradio interface with chatbot
+with gr.Blocks(title="🏠 HDB Price Predictor + Chat + Map", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🏠 HDB Price Predictor + AI Chat + Interactive Map")
+    gr.Markdown("Predict HDB resale prices using XGBoost model, chat with our AI assistant, or explore properties on an interactive map")
+    with gr.Tab("Traditional Interface"):
+        with gr.Row():
+            with gr.Column():
+                town = gr.Dropdown(label="Town", choices=sorted(towns_list), value="ANG MO KIO")
+                flat_type = gr.Dropdown(label="Flat Type", choices=sorted(flat_types), value="4 ROOM")
+                flat_model = gr.Dropdown(label="Flat Model", choices=sorted(flat_models), value="Improved")
+                floor_area_sqm = gr.Slider(label="Floor Area (sqm)", minimum=30, maximum=200, value=95, step=5)
+                storey_level = gr.Slider(label="Storey Level", minimum=1, maximum=50, value=8, step=1)
+                flat_age = gr.Slider(label="Flat Age (years)", minimum=0, maximum=99, value=15, step=1)
+                predict_btn = gr.Button("🔮 Predict Price", variant="primary")
+            with gr.Column():
+                predicted_price = gr.Label(label="💰 Predicted Price")
+                insights = gr.Markdown(label="📋 Property Summary")
+        with gr.Row():
+            chart_output = gr.Plot(label="📈 Market Insights")
+        # Connect button to function
+        predict_btn.click(
+            fn=predict_hdb_price,
+            inputs=[town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age],
+            outputs=[predicted_price, chart_output, insights]
+        )
+    with gr.Tab("AI Chat Assistant"):
+        gr.Markdown("💬 Chat with our AI assistant to get HDB price predictions using natural language!")
+        gr.Markdown("Example: 'What would be the price of a 4-room model A flat in Ang Mo Kio with 95 sqm, on the 8th floor, that's 15 years old?'")
+        gr.Markdown("You can also say hello, ask how I am, or ask general questions about HDB!")
+        chatbot = gr.Chatbot(label="HDB Price Chatbot", height=500)
+        msg = gr.Textbox(label="Your question", placeholder="Type your message here...")
+        clear = gr.Button("Clear Chat")
+        def respond(message, chat_history):
+            response, updated_history = chat_with_llm(message, chat_history)
+            return updated_history
+        msg.submit(respond, [msg, chatbot], [chatbot])
+        clear.click(lambda: None, None, [chatbot], queue=False)
+    with gr.Tab("Interactive Map"):
+        gr.Markdown("# 🗺️ Singapore HDB Resale Prices Map")
+        gr.Markdown("An interactive map to visualize and filter HDB flat prices across Singapore.")
+        with gr.Row():
+            with gr.Column(scale=1):
+                town_input = gr.Dropdown(choices=town_options, label="Select Town", value="ALL")
+                flat_type_input = gr.Dropdown(choices=flat_type_options, label="Select Flat Type", value="ALL")
+                flat_model_input = gr.Dropdown(choices=flat_model_options, label="Select Flat Model", value="ALL")
+                gr.Markdown("### Filter by Lease and Price")
+                min_lease_input = gr.Slider(minimum=min_lease_val, maximum=max_lease_val,
+                                            value=min_lease_val, step=1, label="Min Remaining Lease (years)")
+                max_lease_input = gr.Slider(minimum=min_lease_val, maximum=max_lease_val,
+                                            value=max_lease_val, step=1, label="Max Remaining Lease (years)")
+                min_price_input = gr.Slider(minimum=min_price_val, maximum=max_price_val,
+                                            value=min_price_val, step=1000, label="Min Price (SGD)")
+                max_price_input = gr.Slider(minimum=min_price_val, maximum=max_price_val,
+                                            value=max_price_val, step=1000, label="Max Price (SGD)")
+                # Add a button to generate the result
+                generate_button = gr.Button("Generate Results", variant="primary")
+            with gr.Column(scale=2):
+                map_output = gr.HTML(label="Interactive Map")
+                stats_output = gr.Markdown(label="Summary Statistics")
+                gr.Markdown("""
+                ---
+                ### Map Color Legend
+                The colors of the markers on the map represent the resale price of the HDB flats:
+                - **<span style='color:green;'>Green</span>:** Indicates a lower resale price.
+                - **<span style='color:yellow;'>Yellow</span>:** Indicates a mid-range resale price.
+                - **<span style='color:red;'>Red</span>:** Indicates a higher resale price.
+                """)
+        # Link the button click to the function
+        inputs = [town_input, flat_type_input, flat_model_input,
+                  min_lease_input, max_lease_input, min_price_input, max_price_input]
+        generate_button.click(
+            fn=generate_map_and_stats,
+            inputs=inputs,
+            outputs=[map_output, stats_output]
+        )
+# To run in Colab
+if __name__ == "__main__":
+    demo.launch()
+warnings.filterwarnings('ignore')
+import re
+from groq import Groq
 # Initialize Groq client
 groq_api_key = os.getenv("GROQ_API_KEY")