# app.py from fastapi import FastAPI, HTTPException from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel import joblib, numpy as np, os, sys, types, re import pandas as pd from typing import Dict, List, Any import preprocess_utils as pu APP_VERSION = "1.0.0" PIPELINE_PATH = "artifacts/model.joblib" # ---------- MONKEY-PATCH pour pickles entraînés dans un notebook ---------- def _ensure_main_module(): main_mod = sys.modules.get("__main__") if main_mod is None or not isinstance(main_mod, types.ModuleType): main_mod = types.ModuleType("__main__") sys.modules["__main__"] = main_mod return main_mod def _expose_helpers_in_main(): main_mod = _ensure_main_module() helpers = { "to_float": pu.to_float, "ravel_1d": pu.ravel_1d, "to_1d": pu.to_1d, "combine_text_cols": pu.combine_text_cols, "clean_target_series": pu.clean_target_series, } for k, v in helpers.items(): setattr(main_mod, k, v) def _try_load_with_monkeypatch(path, max_retries=5): _expose_helpers_in_main() last_err = None for _ in range(max_retries): try: return joblib.load(path) except AttributeError as e: last_err = e m = re.search(r"Can't get attribute '([^']+)'", str(e)) if m: missing = m.group(1) main_mod = _ensure_main_module() if not hasattr(main_mod, missing): # stub identité pour continuer si une petite fonction custom manque setattr(main_mod, missing, lambda *a, **k: a[0] if a else None) else: break raise last_err if last_err else RuntimeError("Unable to load model.") # -------------------------------------------------------------------------- # ---------- Introspection des colonnes attendues par le preprocess ---------- REQUIRED: Dict[str, List[str]] = {"num": [], "cat": [], "text": []} def _introspect_required_columns(pipeline_obj) -> Dict[str, List[str]]: req = {"num": [], "cat": [], "text": []} try: prep = pipeline_obj.named_steps.get("prep") transformers = getattr(prep, "transformers_", None) or getattr(prep, "transformers", []) for name, trans, cols in transformers: if name == "num" and isinstance(cols, list): req["num"] = list(cols) elif name == "cat" and isinstance(cols, list): req["cat"] = list(cols) elif isinstance(cols, list): # tout le reste (txt_...) -> colonnes texte individuelles req["text"].extend(list(cols)) except Exception: pass # unicité req["text"] = list(dict.fromkeys(req["text"])) return req def _make_inference_df(prof: str, course: str) -> pd.DataFrame | None: data: Dict[str, Any] = {} # colonnes texte : on remplit intelligemment for col in REQUIRED.get("text", []): c = col.lower() if "global" in c: data[col] = f"{prof} | {course}" elif "title" in c: data[col] = course elif "desc" in c or "description" in c: data[col] = course else: data[col] = (f"{prof} {course}").strip() # colonnes num for col in REQUIRED.get("num", []): data[col] = 0 # colonnes cat for col in REQUIRED.get("cat", []): data[col] = "missing" return pd.DataFrame([data]) if data else None # -------------------------------------------------------------------------- app = FastAPI(title="Satisfaction Grades API", version=APP_VERSION) app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"]) class Payload(BaseModel): professor: str course: str class Prediction(BaseModel): grade: float @app.on_event("startup") def load_model(): global pipeline, REQUIRED if not os.path.exists(PIPELINE_PATH): raise RuntimeError(f"Model file not found at {PIPELINE_PATH}") pipeline = _try_load_with_monkeypatch(PIPELINE_PATH) REQUIRED = _introspect_required_columns(pipeline) print(f"✅ Model loaded from {PIPELINE_PATH}") print("Expected columns:", REQUIRED) @app.get("/health") def health(): return {"status": "ok", "version": APP_VERSION} @app.post("/api/predict", response_model=Prediction) def predict(payload: Payload): prof = (payload.professor or "").strip() course = (payload.course or "").strip() if not prof and not course: raise HTTPException(status_code=422, detail="Provide at least 'professor' or 'course'.") # 1) essayer avec DataFrame conforme au preprocess try: df = _make_inference_df(prof, course) if df is not None: y_pred = pipeline.predict(df) val = float(np.ravel(y_pred)[0]) return {"grade": max(1.0, min(5.0, val))} except Exception as e_df: print(f"[WARN] DF inference failed → fallback string. Reason: {e_df}") # 2) fallback: pipeline qui attend une simple chaîne (cas TF-IDF unique) try: text = f"{prof} | {course}" y_pred = pipeline.predict([text]) val = float(np.ravel(y_pred)[0]) return {"grade": max(1.0, min(5.0, val))} except Exception as e: raise HTTPException(status_code=500, detail=f"Inference error: {e}")