house-price-api / main.py
FabIndy's picture
Clean restart for Hugging Face Space (no model binaries)
0101f7e
# ==============================================
# API de prédiction du prix des maisons (FastAPI + modèle Hugging Face)
# ==============================================
from fastapi import FastAPI
from pydantic import BaseModel
import joblib
import numpy as np
import pandas as pd
from pathlib import Path
from math import radians, sin, cos, sqrt, atan2
from huggingface_hub import hf_hub_download
# ------------------------------------------------
# Initialisation
# ------------------------------------------------
app = FastAPI(title="House Price Prediction API (via Hugging Face Hub)")
BASE_DIR = Path(__file__).resolve().parent
# ------------------------------------------------
# Chargement des fichiers depuis le modèle hébergé
# ------------------------------------------------
REPO_ID = "FabIndy/indy-house-model"
model_path = hf_hub_download(repo_id=REPO_ID, filename="xgb_model_trainval.pkl")
scaler_normal_path = hf_hub_download(repo_id=REPO_ID, filename="scaler_normal-v2.pkl")
scaler_standard_path = hf_hub_download(repo_id=REPO_ID, filename="scaler_standard-v2.pkl")
model = joblib.load(model_path)
scaler_normal = joblib.load(scaler_normal_path)
scaler_standard = joblib.load(scaler_standard_path)
with open(BASE_DIR / "features_trainval.txt", "r", encoding="utf-8") as f:
FEATURES = [line.strip() for line in f]
NORMAL_COLS = ["bedrooms", "bathrooms", "lat", "lon", "distance_to_downtown_km"]
STANDARD_COLS = ["log_living_area", "log_house_age"]
DOWNTOWN_LAT, DOWNTOWN_LON = 39.7684, -86.1581
# ------------------------------------------------
# Calcul de distance géographique
# ------------------------------------------------
def haversine_distance_to_downtown(lat, lon):
R = 6371
lat1, lon1, lat2, lon2 = map(radians, [lat, lon, DOWNTOWN_LAT, DOWNTOWN_LON])
dlat, dlon = lat2 - lat1, lon2 - lon1
a = sin(dlat / 2) ** 2 + cos(lat1) * cos(lat2) * sin(dlon / 2) ** 2
return 2 * R * atan2(sqrt(a), sqrt(1 - a))
# ------------------------------------------------
# Entrée utilisateur
# ------------------------------------------------
class HouseInput(BaseModel):
bedrooms: float
bathrooms: float
lat: float
lon: float
living_area: float
house_age: float
zipcode: str
# ------------------------------------------------
# Préprocessing complet
# ------------------------------------------------
def preprocess_input(payload: HouseInput):
distance_km = haversine_distance_to_downtown(payload.lat, payload.lon)
log_living_area = np.log1p(payload.living_area)
log_house_age = np.log1p(payload.house_age)
X_standard = pd.DataFrame(
scaler_standard.transform([[log_living_area, log_house_age]]),
columns=STANDARD_COLS,
)
X_normal = pd.DataFrame(
scaler_normal.transform(
[[payload.bedrooms, payload.bathrooms, payload.lat, payload.lon, distance_km]]
),
columns=NORMAL_COLS,
)
zip_features = [col for col in FEATURES if col.startswith("zipcode_")]
zip_vector = {col: 0 for col in zip_features}
z = payload.zipcode.strip()
if not z.endswith(".0"):
z += ".0"
zip_col = f"zipcode_{z}"
if zip_col not in zip_vector:
raise ValueError(f"Zipcode inconnu : {payload.zipcode}")
zip_vector[zip_col] = 1
X_zip = pd.DataFrame([zip_vector])
X_all = pd.concat([X_normal, X_standard, X_zip], axis=1)[FEATURES]
return X_all
# ------------------------------------------------
# Endpoint principal
# ------------------------------------------------
@app.post("/predict")
def predict_price(payload: HouseInput):
try:
X = preprocess_input(payload)
y_log = float(model.predict(X)[0])
y_usd = float(np.expm1(y_log))
return {"predicted_price_usd": round(y_usd, 2), "log_prediction": round(y_log, 5)}
except Exception as e:
return {"error": str(e)}
# ------------------------------------------------
# Endpoint debug
# ------------------------------------------------
@app.post("/debug/transform")
def debug_transform(payload: HouseInput):
X = preprocess_input(payload)
return {
"shape": X.shape,
"columns_first_10": X.columns[:10].tolist(),
"sample_values_first_10": X.iloc[0, :10].to_dict(),
}
# ------------------------------------------------
# Endpoint info
# ------------------------------------------------
@app.get("/model_info")
def model_info():
import sys, sklearn
return {
"model_source": REPO_ID,
"sklearn_version": sklearn.__version__,
"n_features_model": getattr(model, "n_features_in_", None),
"first_features": FEATURES[:10],
"model_type": str(type(model)),
"python_env": sys.executable,
}
# ------------------------------------------------
# Exécution locale / Docker (Hugging Face)
# ------------------------------------------------
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)
# pour lancement fastapi :
# cd "C:\Users\fabri\Documents\Datascientest\ml_indy_housing\house-price-api"
# conda activate indy_env
# C:\Users\fabri\anaconda3\envs\indy_env\Scripts\uvicorn.exe main:app --reload