Spaces:
Sleeping
Sleeping
| # ============================================== | |
| # API de prédiction du prix des maisons (FastAPI + modèle Hugging Face) | |
| # ============================================== | |
| from fastapi import FastAPI | |
| from pydantic import BaseModel | |
| import joblib | |
| import numpy as np | |
| import pandas as pd | |
| from pathlib import Path | |
| from math import radians, sin, cos, sqrt, atan2 | |
| from huggingface_hub import hf_hub_download | |
| # ------------------------------------------------ | |
| # Initialisation | |
| # ------------------------------------------------ | |
| app = FastAPI(title="House Price Prediction API (via Hugging Face Hub)") | |
| BASE_DIR = Path(__file__).resolve().parent | |
| # ------------------------------------------------ | |
| # Chargement des fichiers depuis le modèle hébergé | |
| # ------------------------------------------------ | |
| REPO_ID = "FabIndy/indy-house-model" | |
| model_path = hf_hub_download(repo_id=REPO_ID, filename="xgb_model_trainval.pkl") | |
| scaler_normal_path = hf_hub_download(repo_id=REPO_ID, filename="scaler_normal-v2.pkl") | |
| scaler_standard_path = hf_hub_download(repo_id=REPO_ID, filename="scaler_standard-v2.pkl") | |
| model = joblib.load(model_path) | |
| scaler_normal = joblib.load(scaler_normal_path) | |
| scaler_standard = joblib.load(scaler_standard_path) | |
| with open(BASE_DIR / "features_trainval.txt", "r", encoding="utf-8") as f: | |
| FEATURES = [line.strip() for line in f] | |
| NORMAL_COLS = ["bedrooms", "bathrooms", "lat", "lon", "distance_to_downtown_km"] | |
| STANDARD_COLS = ["log_living_area", "log_house_age"] | |
| DOWNTOWN_LAT, DOWNTOWN_LON = 39.7684, -86.1581 | |
| # ------------------------------------------------ | |
| # Calcul de distance géographique | |
| # ------------------------------------------------ | |
| def haversine_distance_to_downtown(lat, lon): | |
| R = 6371 | |
| lat1, lon1, lat2, lon2 = map(radians, [lat, lon, DOWNTOWN_LAT, DOWNTOWN_LON]) | |
| dlat, dlon = lat2 - lat1, lon2 - lon1 | |
| a = sin(dlat / 2) ** 2 + cos(lat1) * cos(lat2) * sin(dlon / 2) ** 2 | |
| return 2 * R * atan2(sqrt(a), sqrt(1 - a)) | |
| # ------------------------------------------------ | |
| # Entrée utilisateur | |
| # ------------------------------------------------ | |
| class HouseInput(BaseModel): | |
| bedrooms: float | |
| bathrooms: float | |
| lat: float | |
| lon: float | |
| living_area: float | |
| house_age: float | |
| zipcode: str | |
| # ------------------------------------------------ | |
| # Préprocessing complet | |
| # ------------------------------------------------ | |
| def preprocess_input(payload: HouseInput): | |
| distance_km = haversine_distance_to_downtown(payload.lat, payload.lon) | |
| log_living_area = np.log1p(payload.living_area) | |
| log_house_age = np.log1p(payload.house_age) | |
| X_standard = pd.DataFrame( | |
| scaler_standard.transform([[log_living_area, log_house_age]]), | |
| columns=STANDARD_COLS, | |
| ) | |
| X_normal = pd.DataFrame( | |
| scaler_normal.transform( | |
| [[payload.bedrooms, payload.bathrooms, payload.lat, payload.lon, distance_km]] | |
| ), | |
| columns=NORMAL_COLS, | |
| ) | |
| zip_features = [col for col in FEATURES if col.startswith("zipcode_")] | |
| zip_vector = {col: 0 for col in zip_features} | |
| z = payload.zipcode.strip() | |
| if not z.endswith(".0"): | |
| z += ".0" | |
| zip_col = f"zipcode_{z}" | |
| if zip_col not in zip_vector: | |
| raise ValueError(f"Zipcode inconnu : {payload.zipcode}") | |
| zip_vector[zip_col] = 1 | |
| X_zip = pd.DataFrame([zip_vector]) | |
| X_all = pd.concat([X_normal, X_standard, X_zip], axis=1)[FEATURES] | |
| return X_all | |
| # ------------------------------------------------ | |
| # Endpoint principal | |
| # ------------------------------------------------ | |
| def predict_price(payload: HouseInput): | |
| try: | |
| X = preprocess_input(payload) | |
| y_log = float(model.predict(X)[0]) | |
| y_usd = float(np.expm1(y_log)) | |
| return {"predicted_price_usd": round(y_usd, 2), "log_prediction": round(y_log, 5)} | |
| except Exception as e: | |
| return {"error": str(e)} | |
| # ------------------------------------------------ | |
| # Endpoint debug | |
| # ------------------------------------------------ | |
| def debug_transform(payload: HouseInput): | |
| X = preprocess_input(payload) | |
| return { | |
| "shape": X.shape, | |
| "columns_first_10": X.columns[:10].tolist(), | |
| "sample_values_first_10": X.iloc[0, :10].to_dict(), | |
| } | |
| # ------------------------------------------------ | |
| # Endpoint info | |
| # ------------------------------------------------ | |
| def model_info(): | |
| import sys, sklearn | |
| return { | |
| "model_source": REPO_ID, | |
| "sklearn_version": sklearn.__version__, | |
| "n_features_model": getattr(model, "n_features_in_", None), | |
| "first_features": FEATURES[:10], | |
| "model_type": str(type(model)), | |
| "python_env": sys.executable, | |
| } | |
| # ------------------------------------------------ | |
| # Exécution locale / Docker (Hugging Face) | |
| # ------------------------------------------------ | |
| if __name__ == "__main__": | |
| import uvicorn | |
| uvicorn.run(app, host="0.0.0.0", port=7860) | |
| # pour lancement fastapi : | |
| # cd "C:\Users\fabri\Documents\Datascientest\ml_indy_housing\house-price-api" | |
| # conda activate indy_env | |
| # C:\Users\fabri\anaconda3\envs\indy_env\Scripts\uvicorn.exe main:app --reload | |