yasser5711 commited on
Commit
5e6aba8
·
verified ·
1 Parent(s): bdef4ef

Upload folder using huggingface_hub

Browse files
Files changed (9) hide show
  1. .gitignore +4 -0
  2. Dockerfile +13 -0
  3. README.md +8 -0
  4. inference/model_registry.py +53 -0
  5. inference/predict.py +94 -0
  6. main.py +10 -0
  7. package.json +7 -0
  8. requirements.txt +8 -0
  9. server.py +29 -0
.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ .venv/
2
+ artifacts/
3
+ training/data/
4
+ __pycache__/
Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /app
4
+
5
+ COPY requirements.txt .
6
+ RUN pip install --no-cache-dir -r requirements.txt
7
+
8
+ COPY . .
9
+
10
+ # HF Spaces requires port 7860
11
+ EXPOSE 7860
12
+
13
+ CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "7860"]
README.md ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: M2Predict
3
+ emoji: 🏠
4
+ colorFrom: blue
5
+ colorTo: green
6
+ sdk: docker
7
+ pinned: false
8
+ ---
inference/model_registry.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from pathlib import Path
3
+
4
+ import joblib
5
+
6
+ ROOT = Path(__file__).resolve().parents[1] # apps/ml/
7
+ MODELS_DIR = ROOT / "artifacts" / "models"
8
+
9
+
10
+ class ModelBundle:
11
+ def __init__(self, model, te_mapping, metadata, pi_p5=None, pi_p95=None, bootstrap_models=None):
12
+ self.model = model
13
+ self.te_mapping = te_mapping
14
+ self.metadata = metadata
15
+ self.pi_p5 = pi_p5
16
+ self.pi_p95 = pi_p95
17
+ self.bootstrap_models = bootstrap_models or []
18
+
19
+
20
+ def load_model(version: str) -> ModelBundle:
21
+ model_dir = MODELS_DIR / version
22
+ if not model_dir.exists():
23
+ raise ValueError(f"Model version '{version}' not found")
24
+
25
+ model = joblib.load(model_dir / "model.joblib")
26
+ metadata = json.loads(
27
+ (model_dir / "metadata.json").read_text(encoding="utf-8"))
28
+ te_mapping = json.loads(
29
+ (model_dir / "target_encoding.json").read_text(encoding="utf-8"))
30
+
31
+ conf = metadata.get("confidence", {})
32
+ pi_p5 = conf.get("pi_p5", None)
33
+ pi_p95 = conf.get("pi_p95", None)
34
+
35
+ # (optionnel) charger bootstraps HGB si présents
36
+ bootstrap_models = []
37
+ bs_dir = conf.get("bootstrap_dir")
38
+ if bs_dir:
39
+ bs_path = Path(bs_dir)
40
+ if not bs_path.is_absolute():
41
+ bs_path = model_dir / "bootstrap"
42
+ if bs_path.exists():
43
+ for p in sorted(bs_path.glob("model_boot_*.joblib")):
44
+ bootstrap_models.append(joblib.load(p))
45
+
46
+ return ModelBundle(
47
+ model=model,
48
+ te_mapping=te_mapping,
49
+ metadata=metadata,
50
+ pi_p5=pi_p5,
51
+ pi_p95=pi_p95,
52
+ bootstrap_models=bootstrap_models,
53
+ )
inference/predict.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+
4
+ from .model_registry import load_model
5
+
6
+ DEFAULT_MODEL = "v1_rf_te"
7
+
8
+
9
+ def make_features(payload: dict, te_data: dict):
10
+ cp = str(payload["code_postal"]).zfill(5)
11
+
12
+ departement = cp[:2]
13
+ global_mean = te_data["global_mean"]
14
+ mapping = te_data["mapping"]
15
+
16
+ cp_te = mapping.get(cp, global_mean)
17
+
18
+ return pd.DataFrame([{
19
+ "departement": departement,
20
+ "surface_reelle_bati": payload["surface_reelle_bati"],
21
+ "nombre_pieces_principales": payload["nombre_pieces_principales"],
22
+ "type_local": payload["type_local"],
23
+ "cp_te": cp_te,
24
+ }])
25
+
26
+
27
+ def _score_from_width(pi_width: float, p5: float | None, p95: float | None) -> float:
28
+ if p5 is None or p95 is None:
29
+ return 0.5 # fallback if metadata missing
30
+ denom = (p95 - p5) if (p95 - p5) != 0 else 1e-9
31
+ score = 1 - (pi_width - p5) / denom
32
+ return float(np.clip(score, 0, 1))
33
+
34
+
35
+ def predict(payload: dict, model_version: str = DEFAULT_MODEL):
36
+ bundle = load_model(model_version)
37
+ X = make_features(payload, bundle.te_mapping)
38
+
39
+ pipe = bundle.model # Pipeline(preprocess + model)
40
+ prix_m2 = float(pipe.predict(X)[0])
41
+
42
+ pre = pipe.named_steps["preprocess"]
43
+ core_model = pipe.named_steps["model"]
44
+
45
+ # ----------------------------
46
+ # Confidence: RF vs HGB
47
+ # ----------------------------
48
+ pi_width = None
49
+ q10 = None
50
+ q90 = None
51
+
52
+ # Case 1: RandomForestRegressor (has estimators_)
53
+ if hasattr(core_model, "estimators_"):
54
+ Xt = pre.transform(X)
55
+ all_tree_preds = np.array([tree.predict(Xt)[0]
56
+ for tree in core_model.estimators_])
57
+ q10 = float(np.quantile(all_tree_preds, 0.10))
58
+ q90 = float(np.quantile(all_tree_preds, 0.90))
59
+ pi_width = q90 - q10
60
+
61
+ # Case 2: HistGradientBoostingRegressor -> use bootstrap models
62
+ else:
63
+ # bundle.bootstrap_models is loaded in model_registry.py (from metadata.confidence.bootstrap_dir)
64
+ if bundle.bootstrap_models:
65
+ boot_preds = np.array([float(m.predict(X)[0])
66
+ for m in bundle.bootstrap_models])
67
+ q10 = float(np.quantile(boot_preds, 0.10))
68
+ q90 = float(np.quantile(boot_preds, 0.90))
69
+ pi_width = q90 - q10
70
+ else:
71
+ # no bootstrap models available => cannot compute uncertainty properly
72
+ pi_width = None
73
+
74
+ score = _score_from_width(
75
+ pi_width=float(pi_width) if pi_width is not None else 0.0,
76
+ p5=bundle.pi_p5,
77
+ p95=bundle.pi_p95,
78
+ ) if pi_width is not None else 0.5
79
+
80
+ surface = float(payload["surface_reelle_bati"])
81
+ prix_total = prix_m2 * surface
82
+
83
+ return {
84
+ "model_version": model_version,
85
+ "prix_m2": round(prix_m2, 2),
86
+ "prix_total_estime": round(prix_total, 2),
87
+ "score_confiance": round(score, 3),
88
+
89
+ # optional debug
90
+ "intervalle_largeur": None if pi_width is None else round(float(pi_width), 2),
91
+ "q10": None if q10 is None else round(q10, 2),
92
+ "q90": None if q90 is None else round(q90, 2),
93
+ "confidence_method": bundle.metadata.get("confidence", {}).get("method"),
94
+ }
main.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from inference.predict import predict
2
+
3
+ result = predict({
4
+ "code_postal": "75011",
5
+ "surface_reelle_bati": 42,
6
+ "nombre_pieces_principales": 2,
7
+ "type_local": "Appartement",
8
+ })
9
+
10
+ print(result)
package.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "ml",
3
+ "private": true,
4
+ "scripts": {
5
+ "dev": ".venv\\Scripts\\python -m uvicorn server:app --reload --port 8000"
6
+ }
7
+ }
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ joblib
3
+ numpy
4
+ pandas
5
+ pyarrow
6
+ scikit-learn
7
+ tqdm
8
+ uvicorn[standard]
server.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from fastapi import FastAPI
4
+ from fastapi.middleware.cors import CORSMiddleware
5
+ from pydantic import BaseModel
6
+
7
+ from inference.predict import predict
8
+
9
+ app = FastAPI(title="M2Predict API")
10
+
11
+ app.add_middleware(
12
+ CORSMiddleware,
13
+ allow_origins=["*"],
14
+ allow_methods=["*"],
15
+ allow_headers=["*"],
16
+ )
17
+
18
+
19
+ class PredictRequest(BaseModel):
20
+ code_postal: str
21
+ surface_reelle_bati: float
22
+ nombre_pieces_principales: int
23
+ type_local: str
24
+
25
+
26
+ @app.post("/predict")
27
+ def predict_endpoint(req: PredictRequest, model_version: str = "v1_rf_te"):
28
+ result = predict(req.model_dump(), model_version=model_version)
29
+ return result