bneay commited on
Commit
de3b0aa
·
1 Parent(s): 89e8399

demo model

Browse files
app.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ # The core FastAPI application for our IGUDAR model
3
+
4
+ import joblib
5
+ import pandas as pd
6
+ import numpy as np
7
+ from fastapi import FastAPI, HTTPException
8
+ from pydantic import BaseModel
9
+ import warnings
10
+ warnings.filterwarnings('ignore')
11
+
12
+ # --- 1. DEFINE APP AND LOAD MODELS ---
13
+
14
+ # Initialize the FastAPI app
15
+ app = FastAPI(
16
+ title="IGUDAR AI Valuation API",
17
+ description="An API to serve the trained property valuation model for Moroccan real estate.",
18
+ version="1.0"
19
+ )
20
+
21
+ # Load the trained model and preprocessing objects at startup
22
+ # This ensures they are loaded only once, making the API fast.
23
+ try:
24
+ model = joblib.load("./models/valuation_model.joblib")
25
+ preprocessing = joblib.load("./models/preprocessing_objects.joblib")
26
+
27
+ # Extract the individual objects from the preprocessing file
28
+ scaler = preprocessing['scaler']
29
+ label_encoders = preprocessing['label_encoders']
30
+ feature_names = preprocessing['feature_names']
31
+
32
+ print("✅ Models and preprocessing objects loaded successfully.")
33
+
34
+ except FileNotFoundError:
35
+ print("❌ ERROR: Model or preprocessing files not found. Ensure they are in the /models directory.")
36
+ model = None # Set to None to handle errors gracefully
37
+
38
+ # --- 2. DEFINE THE INPUT DATA MODEL ---
39
+
40
+ # Pydantic model for input data validation.
41
+ # This tells FastAPI what the incoming JSON should look like.
42
+ class PropertyFeatures(BaseModel):
43
+ size_m2: float
44
+ bedrooms: int
45
+ bathrooms: int
46
+ age_years: int
47
+ property_type: str
48
+ city: str
49
+ infrastructure_score: float
50
+ economic_score: float
51
+ lifestyle_score: float
52
+ investment_score: float
53
+ neighborhood_tier: int
54
+ total_amenities: int
55
+ data_quality: float = 0.9 # Default value
56
+ has_coordinates: bool = True # Default value
57
+
58
+ # --- 3. CREATE THE PREDICTION ENDPOINT ---
59
+
60
+ @app.post("/valuation")
61
+ def predict_valuation(property_data: PropertyFeatures):
62
+ """
63
+ Predicts the value of a property based on its features.
64
+ Accepts a JSON object with property details and returns a prediction.
65
+ """
66
+ if model is None:
67
+ raise HTTPException(status_code=500, detail="Model is not loaded. Check server logs.")
68
+
69
+ # Convert the incoming Pydantic model to a dictionary
70
+ data_dict = property_data.dict()
71
+
72
+ # Start with a dictionary of all zeros for our feature vector
73
+ features = {name: 0 for name in feature_names}
74
+
75
+ # --- Feature Engineering (must EXACTLY match the training script) ---
76
+
77
+ # 1. Direct mapping
78
+ features.update({
79
+ 'size_m2': data_dict.get('size_m2', 100),
80
+ 'bedrooms': data_dict.get('bedrooms', 2),
81
+ 'bathrooms': data_dict.get('bathrooms', 1),
82
+ 'age_years': min(data_dict.get('age_years', 5), 50),
83
+ 'infrastructure_score': data_dict.get('infrastructure_score', 50),
84
+ 'economic_score': data_dict.get('economic_score', 50),
85
+ 'lifestyle_score': data_dict.get('lifestyle_score', 50),
86
+ 'investment_score': data_dict.get('investment_score', 50),
87
+ 'neighborhood_tier': data_dict.get('neighborhood_tier', 3),
88
+ 'total_amenities': data_dict.get('total_amenities', 20),
89
+ 'data_quality': data_dict.get('data_quality', 0.8)
90
+ })
91
+
92
+ # 2. Calculated features
93
+ features['room_density'] = min((features['bedrooms'] + features['bathrooms']) / features['size_m2'], 0.2)
94
+ features['amenity_density'] = min(features['total_amenities'] / features['size_m2'], 2)
95
+ features['location_quality'] = (features['infrastructure_score'] * 0.4 +
96
+ features['economic_score'] * 0.3 +
97
+ features['lifestyle_score'] * 0.3)
98
+ features['investment_attractiveness'] = ((5 - features['neighborhood_tier']) * 20 +
99
+ features['location_quality'] * 0.5 +
100
+ (10 if data_dict.get('has_coordinates', True) else 0) +
101
+ (features['data_quality'] * 20))
102
+
103
+ # NOTE: We can't calculate 'city_median_size' or 'city_infra_avg' for a single prediction.
104
+ # We will use average values or handle them during training. For now, we leave them as 0.
105
+ # This is a common challenge in deployment. A better approach would be to have a pre-calculated
106
+ # dictionary of city stats to look up from. For this demo, this is acceptable.
107
+
108
+ # 3. Categorical Encoding
109
+ for col, le in label_encoders.items():
110
+ encoded_col_name = f"{col}_encoded"
111
+ if encoded_col_name in features:
112
+ try:
113
+ # Use the loaded encoder to transform the input string
114
+ value = data_dict.get(col)
115
+ encoded_value = le.transform([value])[0]
116
+ features[encoded_col_name] = encoded_value
117
+ except Exception as e:
118
+ # If the category is new/unseen, default to 0 (or another strategy)
119
+ print(f"Warning: Could not encode '{value}' for feature '{col}'. Defaulting to 0. Error: {e}")
120
+ features[encoded_col_name] = 0
121
+
122
+ # Create a DataFrame in the exact order of feature_names
123
+ df = pd.DataFrame([features])[feature_names]
124
+
125
+ # Scale the features using the loaded scaler
126
+ df_scaled = scaler.transform(df)
127
+
128
+ # Make the prediction
129
+ prediction = model.predict(df_scaled)[0]
130
+
131
+ # Post-process for a clean response
132
+ predicted_price = round(max(200000, prediction), 0) # Apply minimum realistic price
133
+
134
+ return {
135
+ "predicted_price_mad": predicted_price,
136
+ "predicted_price_per_m2": round(predicted_price / data_dict['size_m2'], 0),
137
+ "model_used": "igudar_valuation_v1_xgboost"
138
+ }
139
+
140
+ @app.get("/")
141
+ def read_root():
142
+ return {"message": "Welcome to the IGUDAR AI Valuation API. Use the /docs endpoint to test."}
models/igudar_fixed_preprocessing_20250620_233500.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7c6b5d64702e089ec7c7b884fee399b0e91be3d6e56690647fac161beb1995d
3
+ size 5675
models/igudar_fixed_valuation_20250620_233500.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd12af4accd24b1de3ab873656fcaeeb8d2535826948cf3868c70f6b4107c1e7
3
+ size 725863
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ pandas
4
+ numpy
5
+ scikit-learn
6
+ xgboost
7
+ joblib
8
+ pydantic