Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- .gitignore +4 -0
- Dockerfile +13 -0
- README.md +8 -0
- inference/model_registry.py +53 -0
- inference/predict.py +94 -0
- main.py +10 -0
- package.json +7 -0
- requirements.txt +8 -0
- server.py +29 -0
.gitignore
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.venv/
|
| 2 |
+
artifacts/
|
| 3 |
+
training/data/
|
| 4 |
+
__pycache__/
|
Dockerfile
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.11-slim
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
COPY requirements.txt .
|
| 6 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 7 |
+
|
| 8 |
+
COPY . .
|
| 9 |
+
|
| 10 |
+
# HF Spaces requires port 7860
|
| 11 |
+
EXPOSE 7860
|
| 12 |
+
|
| 13 |
+
CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "7860"]
|
README.md
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: M2Predict
|
| 3 |
+
emoji: 🏠
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: green
|
| 6 |
+
sdk: docker
|
| 7 |
+
pinned: false
|
| 8 |
+
---
|
inference/model_registry.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
from pathlib import Path
|
| 3 |
+
|
| 4 |
+
import joblib
|
| 5 |
+
|
| 6 |
+
ROOT = Path(__file__).resolve().parents[1] # apps/ml/
|
| 7 |
+
MODELS_DIR = ROOT / "artifacts" / "models"
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class ModelBundle:
|
| 11 |
+
def __init__(self, model, te_mapping, metadata, pi_p5=None, pi_p95=None, bootstrap_models=None):
|
| 12 |
+
self.model = model
|
| 13 |
+
self.te_mapping = te_mapping
|
| 14 |
+
self.metadata = metadata
|
| 15 |
+
self.pi_p5 = pi_p5
|
| 16 |
+
self.pi_p95 = pi_p95
|
| 17 |
+
self.bootstrap_models = bootstrap_models or []
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def load_model(version: str) -> ModelBundle:
|
| 21 |
+
model_dir = MODELS_DIR / version
|
| 22 |
+
if not model_dir.exists():
|
| 23 |
+
raise ValueError(f"Model version '{version}' not found")
|
| 24 |
+
|
| 25 |
+
model = joblib.load(model_dir / "model.joblib")
|
| 26 |
+
metadata = json.loads(
|
| 27 |
+
(model_dir / "metadata.json").read_text(encoding="utf-8"))
|
| 28 |
+
te_mapping = json.loads(
|
| 29 |
+
(model_dir / "target_encoding.json").read_text(encoding="utf-8"))
|
| 30 |
+
|
| 31 |
+
conf = metadata.get("confidence", {})
|
| 32 |
+
pi_p5 = conf.get("pi_p5", None)
|
| 33 |
+
pi_p95 = conf.get("pi_p95", None)
|
| 34 |
+
|
| 35 |
+
# (optionnel) charger bootstraps HGB si présents
|
| 36 |
+
bootstrap_models = []
|
| 37 |
+
bs_dir = conf.get("bootstrap_dir")
|
| 38 |
+
if bs_dir:
|
| 39 |
+
bs_path = Path(bs_dir)
|
| 40 |
+
if not bs_path.is_absolute():
|
| 41 |
+
bs_path = model_dir / "bootstrap"
|
| 42 |
+
if bs_path.exists():
|
| 43 |
+
for p in sorted(bs_path.glob("model_boot_*.joblib")):
|
| 44 |
+
bootstrap_models.append(joblib.load(p))
|
| 45 |
+
|
| 46 |
+
return ModelBundle(
|
| 47 |
+
model=model,
|
| 48 |
+
te_mapping=te_mapping,
|
| 49 |
+
metadata=metadata,
|
| 50 |
+
pi_p5=pi_p5,
|
| 51 |
+
pi_p95=pi_p95,
|
| 52 |
+
bootstrap_models=bootstrap_models,
|
| 53 |
+
)
|
inference/predict.py
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pandas as pd
|
| 3 |
+
|
| 4 |
+
from .model_registry import load_model
|
| 5 |
+
|
| 6 |
+
DEFAULT_MODEL = "v1_rf_te"
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def make_features(payload: dict, te_data: dict):
|
| 10 |
+
cp = str(payload["code_postal"]).zfill(5)
|
| 11 |
+
|
| 12 |
+
departement = cp[:2]
|
| 13 |
+
global_mean = te_data["global_mean"]
|
| 14 |
+
mapping = te_data["mapping"]
|
| 15 |
+
|
| 16 |
+
cp_te = mapping.get(cp, global_mean)
|
| 17 |
+
|
| 18 |
+
return pd.DataFrame([{
|
| 19 |
+
"departement": departement,
|
| 20 |
+
"surface_reelle_bati": payload["surface_reelle_bati"],
|
| 21 |
+
"nombre_pieces_principales": payload["nombre_pieces_principales"],
|
| 22 |
+
"type_local": payload["type_local"],
|
| 23 |
+
"cp_te": cp_te,
|
| 24 |
+
}])
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def _score_from_width(pi_width: float, p5: float | None, p95: float | None) -> float:
|
| 28 |
+
if p5 is None or p95 is None:
|
| 29 |
+
return 0.5 # fallback if metadata missing
|
| 30 |
+
denom = (p95 - p5) if (p95 - p5) != 0 else 1e-9
|
| 31 |
+
score = 1 - (pi_width - p5) / denom
|
| 32 |
+
return float(np.clip(score, 0, 1))
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def predict(payload: dict, model_version: str = DEFAULT_MODEL):
|
| 36 |
+
bundle = load_model(model_version)
|
| 37 |
+
X = make_features(payload, bundle.te_mapping)
|
| 38 |
+
|
| 39 |
+
pipe = bundle.model # Pipeline(preprocess + model)
|
| 40 |
+
prix_m2 = float(pipe.predict(X)[0])
|
| 41 |
+
|
| 42 |
+
pre = pipe.named_steps["preprocess"]
|
| 43 |
+
core_model = pipe.named_steps["model"]
|
| 44 |
+
|
| 45 |
+
# ----------------------------
|
| 46 |
+
# Confidence: RF vs HGB
|
| 47 |
+
# ----------------------------
|
| 48 |
+
pi_width = None
|
| 49 |
+
q10 = None
|
| 50 |
+
q90 = None
|
| 51 |
+
|
| 52 |
+
# Case 1: RandomForestRegressor (has estimators_)
|
| 53 |
+
if hasattr(core_model, "estimators_"):
|
| 54 |
+
Xt = pre.transform(X)
|
| 55 |
+
all_tree_preds = np.array([tree.predict(Xt)[0]
|
| 56 |
+
for tree in core_model.estimators_])
|
| 57 |
+
q10 = float(np.quantile(all_tree_preds, 0.10))
|
| 58 |
+
q90 = float(np.quantile(all_tree_preds, 0.90))
|
| 59 |
+
pi_width = q90 - q10
|
| 60 |
+
|
| 61 |
+
# Case 2: HistGradientBoostingRegressor -> use bootstrap models
|
| 62 |
+
else:
|
| 63 |
+
# bundle.bootstrap_models is loaded in model_registry.py (from metadata.confidence.bootstrap_dir)
|
| 64 |
+
if bundle.bootstrap_models:
|
| 65 |
+
boot_preds = np.array([float(m.predict(X)[0])
|
| 66 |
+
for m in bundle.bootstrap_models])
|
| 67 |
+
q10 = float(np.quantile(boot_preds, 0.10))
|
| 68 |
+
q90 = float(np.quantile(boot_preds, 0.90))
|
| 69 |
+
pi_width = q90 - q10
|
| 70 |
+
else:
|
| 71 |
+
# no bootstrap models available => cannot compute uncertainty properly
|
| 72 |
+
pi_width = None
|
| 73 |
+
|
| 74 |
+
score = _score_from_width(
|
| 75 |
+
pi_width=float(pi_width) if pi_width is not None else 0.0,
|
| 76 |
+
p5=bundle.pi_p5,
|
| 77 |
+
p95=bundle.pi_p95,
|
| 78 |
+
) if pi_width is not None else 0.5
|
| 79 |
+
|
| 80 |
+
surface = float(payload["surface_reelle_bati"])
|
| 81 |
+
prix_total = prix_m2 * surface
|
| 82 |
+
|
| 83 |
+
return {
|
| 84 |
+
"model_version": model_version,
|
| 85 |
+
"prix_m2": round(prix_m2, 2),
|
| 86 |
+
"prix_total_estime": round(prix_total, 2),
|
| 87 |
+
"score_confiance": round(score, 3),
|
| 88 |
+
|
| 89 |
+
# optional debug
|
| 90 |
+
"intervalle_largeur": None if pi_width is None else round(float(pi_width), 2),
|
| 91 |
+
"q10": None if q10 is None else round(q10, 2),
|
| 92 |
+
"q90": None if q90 is None else round(q90, 2),
|
| 93 |
+
"confidence_method": bundle.metadata.get("confidence", {}).get("method"),
|
| 94 |
+
}
|
main.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from inference.predict import predict
|
| 2 |
+
|
| 3 |
+
result = predict({
|
| 4 |
+
"code_postal": "75011",
|
| 5 |
+
"surface_reelle_bati": 42,
|
| 6 |
+
"nombre_pieces_principales": 2,
|
| 7 |
+
"type_local": "Appartement",
|
| 8 |
+
})
|
| 9 |
+
|
| 10 |
+
print(result)
|
package.json
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "ml",
|
| 3 |
+
"private": true,
|
| 4 |
+
"scripts": {
|
| 5 |
+
"dev": ".venv\\Scripts\\python -m uvicorn server:app --reload --port 8000"
|
| 6 |
+
}
|
| 7 |
+
}
|
requirements.txt
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi
|
| 2 |
+
joblib
|
| 3 |
+
numpy
|
| 4 |
+
pandas
|
| 5 |
+
pyarrow
|
| 6 |
+
scikit-learn
|
| 7 |
+
tqdm
|
| 8 |
+
uvicorn[standard]
|
server.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from fastapi import FastAPI
|
| 4 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 5 |
+
from pydantic import BaseModel
|
| 6 |
+
|
| 7 |
+
from inference.predict import predict
|
| 8 |
+
|
| 9 |
+
app = FastAPI(title="M2Predict API")
|
| 10 |
+
|
| 11 |
+
app.add_middleware(
|
| 12 |
+
CORSMiddleware,
|
| 13 |
+
allow_origins=["*"],
|
| 14 |
+
allow_methods=["*"],
|
| 15 |
+
allow_headers=["*"],
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
class PredictRequest(BaseModel):
|
| 20 |
+
code_postal: str
|
| 21 |
+
surface_reelle_bati: float
|
| 22 |
+
nombre_pieces_principales: int
|
| 23 |
+
type_local: str
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
@app.post("/predict")
|
| 27 |
+
def predict_endpoint(req: PredictRequest, model_version: str = "v1_rf_te"):
|
| 28 |
+
result = predict(req.model_dump(), model_version=model_version)
|
| 29 |
+
return result
|