# app.py # The core FastAPI application for our IGUDAR model import joblib import pandas as pd import numpy as np from fastapi import FastAPI, HTTPException from pydantic import BaseModel import warnings warnings.filterwarnings('ignore') # --- 1. DEFINE APP AND LOAD MODELS --- # Initialize the FastAPI app app = FastAPI( title="IGUDAR AI Valuation API", description="An API to serve the trained property valuation model for Moroccan real estate.", version="1.0" ) # Load the trained model and preprocessing objects at startup # This ensures they are loaded only once, making the API fast. try: model = joblib.load("./models/valuation_model.joblib") preprocessing = joblib.load("./models/preprocessing_objects.joblib") # Extract the individual objects from the preprocessing file scaler = preprocessing['scaler'] label_encoders = preprocessing['label_encoders'] feature_names = preprocessing['feature_names'] print("✅ Models and preprocessing objects loaded successfully.") except FileNotFoundError: print("❌ ERROR: Model or preprocessing files not found. Ensure they are in the /models directory.") model = None # Set to None to handle errors gracefully # --- 2. DEFINE THE INPUT DATA MODEL --- # Pydantic model for input data validation. # This tells FastAPI what the incoming JSON should look like. class PropertyFeatures(BaseModel): size_m2: float bedrooms: int bathrooms: int age_years: int property_type: str city: str infrastructure_score: float economic_score: float lifestyle_score: float investment_score: float neighborhood_tier: int total_amenities: int data_quality: float = 0.9 # Default value has_coordinates: bool = True # Default value # --- 3. CREATE THE PREDICTION ENDPOINT --- @app.post("/valuation") def predict_valuation(property_data: PropertyFeatures): """ Predicts the value of a property based on its features. Accepts a JSON object with property details and returns a prediction. """ if model is None: raise HTTPException(status_code=500, detail="Model is not loaded. Check server logs.") # Convert the incoming Pydantic model to a dictionary data_dict = property_data.dict() # Start with a dictionary of all zeros for our feature vector features = {name: 0 for name in feature_names} # --- Feature Engineering (must EXACTLY match the training script) --- # 1. Direct mapping features.update({ 'size_m2': data_dict.get('size_m2', 100), 'bedrooms': data_dict.get('bedrooms', 2), 'bathrooms': data_dict.get('bathrooms', 1), 'age_years': min(data_dict.get('age_years', 5), 50), 'infrastructure_score': data_dict.get('infrastructure_score', 50), 'economic_score': data_dict.get('economic_score', 50), 'lifestyle_score': data_dict.get('lifestyle_score', 50), 'investment_score': data_dict.get('investment_score', 50), 'neighborhood_tier': data_dict.get('neighborhood_tier', 3), 'total_amenities': data_dict.get('total_amenities', 20), 'data_quality': data_dict.get('data_quality', 0.8) }) # 2. Calculated features features['room_density'] = min((features['bedrooms'] + features['bathrooms']) / features['size_m2'], 0.2) features['amenity_density'] = min(features['total_amenities'] / features['size_m2'], 2) features['location_quality'] = (features['infrastructure_score'] * 0.4 + features['economic_score'] * 0.3 + features['lifestyle_score'] * 0.3) features['investment_attractiveness'] = ((5 - features['neighborhood_tier']) * 20 + features['location_quality'] * 0.5 + (10 if data_dict.get('has_coordinates', True) else 0) + (features['data_quality'] * 20)) # NOTE: We can't calculate 'city_median_size' or 'city_infra_avg' for a single prediction. # We will use average values or handle them during training. For now, we leave them as 0. # This is a common challenge in deployment. A better approach would be to have a pre-calculated # dictionary of city stats to look up from. For this demo, this is acceptable. # 3. Categorical Encoding for col, le in label_encoders.items(): encoded_col_name = f"{col}_encoded" if encoded_col_name in features: try: # Use the loaded encoder to transform the input string value = data_dict.get(col) encoded_value = le.transform([value])[0] features[encoded_col_name] = encoded_value except Exception as e: # If the category is new/unseen, default to 0 (or another strategy) print(f"Warning: Could not encode '{value}' for feature '{col}'. Defaulting to 0. Error: {e}") features[encoded_col_name] = 0 # Create a DataFrame in the exact order of feature_names df = pd.DataFrame([features])[feature_names] # Scale the features using the loaded scaler df_scaled = scaler.transform(df) # Make the prediction prediction = model.predict(df_scaled)[0] # Post-process for a clean response predicted_price = round(max(200000, prediction), 0) # Apply minimum realistic price return { "predicted_price_mad": predicted_price, "predicted_price_per_m2": round(predicted_price / data_dict['size_m2'], 0), "model_used": "igudar_valuation_v1_xgboost" } @app.get("/") def read_root(): return {"message": "Welcome to the IGUDAR AI Valuation API. Use the /docs endpoint to test."}