demo model
Browse files
app.py
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# app.py
|
| 2 |
+
# The core FastAPI application for our IGUDAR model
|
| 3 |
+
|
| 4 |
+
import joblib
|
| 5 |
+
import pandas as pd
|
| 6 |
+
import numpy as np
|
| 7 |
+
from fastapi import FastAPI, HTTPException
|
| 8 |
+
from pydantic import BaseModel
|
| 9 |
+
import warnings
|
| 10 |
+
warnings.filterwarnings('ignore')
|
| 11 |
+
|
| 12 |
+
# --- 1. DEFINE APP AND LOAD MODELS ---
|
| 13 |
+
|
| 14 |
+
# Initialize the FastAPI app
|
| 15 |
+
app = FastAPI(
|
| 16 |
+
title="IGUDAR AI Valuation API",
|
| 17 |
+
description="An API to serve the trained property valuation model for Moroccan real estate.",
|
| 18 |
+
version="1.0"
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
# Load the trained model and preprocessing objects at startup
|
| 22 |
+
# This ensures they are loaded only once, making the API fast.
|
| 23 |
+
try:
|
| 24 |
+
model = joblib.load("./models/valuation_model.joblib")
|
| 25 |
+
preprocessing = joblib.load("./models/preprocessing_objects.joblib")
|
| 26 |
+
|
| 27 |
+
# Extract the individual objects from the preprocessing file
|
| 28 |
+
scaler = preprocessing['scaler']
|
| 29 |
+
label_encoders = preprocessing['label_encoders']
|
| 30 |
+
feature_names = preprocessing['feature_names']
|
| 31 |
+
|
| 32 |
+
print("✅ Models and preprocessing objects loaded successfully.")
|
| 33 |
+
|
| 34 |
+
except FileNotFoundError:
|
| 35 |
+
print("❌ ERROR: Model or preprocessing files not found. Ensure they are in the /models directory.")
|
| 36 |
+
model = None # Set to None to handle errors gracefully
|
| 37 |
+
|
| 38 |
+
# --- 2. DEFINE THE INPUT DATA MODEL ---
|
| 39 |
+
|
| 40 |
+
# Pydantic model for input data validation.
|
| 41 |
+
# This tells FastAPI what the incoming JSON should look like.
|
| 42 |
+
class PropertyFeatures(BaseModel):
|
| 43 |
+
size_m2: float
|
| 44 |
+
bedrooms: int
|
| 45 |
+
bathrooms: int
|
| 46 |
+
age_years: int
|
| 47 |
+
property_type: str
|
| 48 |
+
city: str
|
| 49 |
+
infrastructure_score: float
|
| 50 |
+
economic_score: float
|
| 51 |
+
lifestyle_score: float
|
| 52 |
+
investment_score: float
|
| 53 |
+
neighborhood_tier: int
|
| 54 |
+
total_amenities: int
|
| 55 |
+
data_quality: float = 0.9 # Default value
|
| 56 |
+
has_coordinates: bool = True # Default value
|
| 57 |
+
|
| 58 |
+
# --- 3. CREATE THE PREDICTION ENDPOINT ---
|
| 59 |
+
|
| 60 |
+
@app.post("/valuation")
|
| 61 |
+
def predict_valuation(property_data: PropertyFeatures):
|
| 62 |
+
"""
|
| 63 |
+
Predicts the value of a property based on its features.
|
| 64 |
+
Accepts a JSON object with property details and returns a prediction.
|
| 65 |
+
"""
|
| 66 |
+
if model is None:
|
| 67 |
+
raise HTTPException(status_code=500, detail="Model is not loaded. Check server logs.")
|
| 68 |
+
|
| 69 |
+
# Convert the incoming Pydantic model to a dictionary
|
| 70 |
+
data_dict = property_data.dict()
|
| 71 |
+
|
| 72 |
+
# Start with a dictionary of all zeros for our feature vector
|
| 73 |
+
features = {name: 0 for name in feature_names}
|
| 74 |
+
|
| 75 |
+
# --- Feature Engineering (must EXACTLY match the training script) ---
|
| 76 |
+
|
| 77 |
+
# 1. Direct mapping
|
| 78 |
+
features.update({
|
| 79 |
+
'size_m2': data_dict.get('size_m2', 100),
|
| 80 |
+
'bedrooms': data_dict.get('bedrooms', 2),
|
| 81 |
+
'bathrooms': data_dict.get('bathrooms', 1),
|
| 82 |
+
'age_years': min(data_dict.get('age_years', 5), 50),
|
| 83 |
+
'infrastructure_score': data_dict.get('infrastructure_score', 50),
|
| 84 |
+
'economic_score': data_dict.get('economic_score', 50),
|
| 85 |
+
'lifestyle_score': data_dict.get('lifestyle_score', 50),
|
| 86 |
+
'investment_score': data_dict.get('investment_score', 50),
|
| 87 |
+
'neighborhood_tier': data_dict.get('neighborhood_tier', 3),
|
| 88 |
+
'total_amenities': data_dict.get('total_amenities', 20),
|
| 89 |
+
'data_quality': data_dict.get('data_quality', 0.8)
|
| 90 |
+
})
|
| 91 |
+
|
| 92 |
+
# 2. Calculated features
|
| 93 |
+
features['room_density'] = min((features['bedrooms'] + features['bathrooms']) / features['size_m2'], 0.2)
|
| 94 |
+
features['amenity_density'] = min(features['total_amenities'] / features['size_m2'], 2)
|
| 95 |
+
features['location_quality'] = (features['infrastructure_score'] * 0.4 +
|
| 96 |
+
features['economic_score'] * 0.3 +
|
| 97 |
+
features['lifestyle_score'] * 0.3)
|
| 98 |
+
features['investment_attractiveness'] = ((5 - features['neighborhood_tier']) * 20 +
|
| 99 |
+
features['location_quality'] * 0.5 +
|
| 100 |
+
(10 if data_dict.get('has_coordinates', True) else 0) +
|
| 101 |
+
(features['data_quality'] * 20))
|
| 102 |
+
|
| 103 |
+
# NOTE: We can't calculate 'city_median_size' or 'city_infra_avg' for a single prediction.
|
| 104 |
+
# We will use average values or handle them during training. For now, we leave them as 0.
|
| 105 |
+
# This is a common challenge in deployment. A better approach would be to have a pre-calculated
|
| 106 |
+
# dictionary of city stats to look up from. For this demo, this is acceptable.
|
| 107 |
+
|
| 108 |
+
# 3. Categorical Encoding
|
| 109 |
+
for col, le in label_encoders.items():
|
| 110 |
+
encoded_col_name = f"{col}_encoded"
|
| 111 |
+
if encoded_col_name in features:
|
| 112 |
+
try:
|
| 113 |
+
# Use the loaded encoder to transform the input string
|
| 114 |
+
value = data_dict.get(col)
|
| 115 |
+
encoded_value = le.transform([value])[0]
|
| 116 |
+
features[encoded_col_name] = encoded_value
|
| 117 |
+
except Exception as e:
|
| 118 |
+
# If the category is new/unseen, default to 0 (or another strategy)
|
| 119 |
+
print(f"Warning: Could not encode '{value}' for feature '{col}'. Defaulting to 0. Error: {e}")
|
| 120 |
+
features[encoded_col_name] = 0
|
| 121 |
+
|
| 122 |
+
# Create a DataFrame in the exact order of feature_names
|
| 123 |
+
df = pd.DataFrame([features])[feature_names]
|
| 124 |
+
|
| 125 |
+
# Scale the features using the loaded scaler
|
| 126 |
+
df_scaled = scaler.transform(df)
|
| 127 |
+
|
| 128 |
+
# Make the prediction
|
| 129 |
+
prediction = model.predict(df_scaled)[0]
|
| 130 |
+
|
| 131 |
+
# Post-process for a clean response
|
| 132 |
+
predicted_price = round(max(200000, prediction), 0) # Apply minimum realistic price
|
| 133 |
+
|
| 134 |
+
return {
|
| 135 |
+
"predicted_price_mad": predicted_price,
|
| 136 |
+
"predicted_price_per_m2": round(predicted_price / data_dict['size_m2'], 0),
|
| 137 |
+
"model_used": "igudar_valuation_v1_xgboost"
|
| 138 |
+
}
|
| 139 |
+
|
| 140 |
+
@app.get("/")
|
| 141 |
+
def read_root():
|
| 142 |
+
return {"message": "Welcome to the IGUDAR AI Valuation API. Use the /docs endpoint to test."}
|
models/igudar_fixed_preprocessing_20250620_233500.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e7c6b5d64702e089ec7c7b884fee399b0e91be3d6e56690647fac161beb1995d
|
| 3 |
+
size 5675
|
models/igudar_fixed_valuation_20250620_233500.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fd12af4accd24b1de3ab873656fcaeeb8d2535826948cf3868c70f6b4107c1e7
|
| 3 |
+
size 725863
|
requirements.txt
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi
|
| 2 |
+
uvicorn
|
| 3 |
+
pandas
|
| 4 |
+
numpy
|
| 5 |
+
scikit-learn
|
| 6 |
+
xgboost
|
| 7 |
+
joblib
|
| 8 |
+
pydantic
|