Spaces:
Sleeping
Sleeping
| """Numeric block — inference. | |
| Loads the trained regressor and classifier and returns: | |
| - a predicted obesity classification (7-class, with per-class | |
| probability), | |
| - a predicted BMI from the regression head, | |
| - a personalized daily calorie target derived from Mifflin-St Jeor. | |
| The CV-derived ``high_caloric_meal`` flag overrides the user's | |
| self-reported ``FAVC`` feature before inference. This is the load-bearing | |
| integration point between the computer-vision block and the numeric | |
| classifier. | |
| """ | |
| from __future__ import annotations | |
| import json | |
| from pathlib import Path | |
| from typing import Any | |
| import joblib | |
| import numpy as np | |
| import pandas as pd | |
| from .obesity import OBESITY_LEVELS, apply_favc_override | |
| from .profile import ( | |
| TRAINING_AGE_RANGE, | |
| TRAINING_BMI_RANGE, | |
| TRAINING_HEIGHT_RANGE_CM, | |
| TRAINING_WEIGHT_RANGE_KG, | |
| bmi_to_band, | |
| daily_target_kcal, | |
| ) | |
| # Severity gap (in class indices) at which we override the classifier | |
| # probabilities with the BMI-band rule. Adjacent-class disagreements | |
| # (gap=1) are tolerated — those are genuinely borderline cases. A gap of | |
| # 2+ means the model is materially wrong about severity, typically | |
| # because the conditional (gender, BMI) combination is rare in training. | |
| _OOD_SEVERITY_GAP = 2 | |
| # Mixing weight used when blending the classifier's probabilities with | |
| # a one-hot prior on the BMI-band class for OOD inputs. | |
| _OOD_BAND_PRIOR_WEIGHT = 0.7 | |
| MODELS_DIR = Path(__file__).resolve().parents[2] / "models" | |
| _state: dict[str, Any] = {} | |
| def _load() -> dict[str, Any]: | |
| if _state: | |
| return _state | |
| reg_path = MODELS_DIR / "numeric_regressor.pkl" | |
| clf_path = MODELS_DIR / "numeric_classifier.pkl" | |
| enc_path = MODELS_DIR / "numeric_label_encoder.pkl" | |
| meta_path = MODELS_DIR / "numeric_metadata.json" | |
| if not (reg_path.exists() and clf_path.exists() and meta_path.exists()): | |
| return {} | |
| _state["regressor"] = joblib.load(reg_path) | |
| _state["classifier"] = joblib.load(clf_path) | |
| _state["label_encoder"] = joblib.load(enc_path) if enc_path.exists() else None | |
| _state["metadata"] = json.loads(meta_path.read_text()) | |
| return _state | |
| def _detect_anomalies( | |
| weight_kg: float, | |
| height_cm: float, | |
| age: int, | |
| bmi_raw: float, | |
| bmi_band: str, | |
| model_class: str, | |
| gender: str, | |
| ) -> list[str]: | |
| """Flag profile inputs that fall outside the classifier's training distribution. | |
| The UCI Obesity Levels dataset is a small (n=2111), partly synthetic | |
| sample with strong conditional skew (e.g., Obesity_Type_III is 99.7% | |
| Female). We surface anomalies so the UI can warn the user before | |
| treating the classifier output as gospel. | |
| """ | |
| flags: list[str] = [] | |
| wmin, wmax = TRAINING_WEIGHT_RANGE_KG | |
| if weight_kg < wmin: | |
| flags.append(f"Weight {weight_kg:.0f} kg is below the trained range ({wmin:.0f}–{wmax:.0f} kg).") | |
| elif weight_kg > wmax: | |
| flags.append(f"Weight {weight_kg:.0f} kg is above the trained range ({wmin:.0f}–{wmax:.0f} kg).") | |
| hmin, hmax = TRAINING_HEIGHT_RANGE_CM | |
| if height_cm < hmin: | |
| flags.append(f"Height {height_cm:.0f} cm is below the trained range ({hmin:.0f}–{hmax:.0f} cm).") | |
| elif height_cm > hmax: | |
| flags.append(f"Height {height_cm:.0f} cm is above the trained range ({hmin:.0f}–{hmax:.0f} cm).") | |
| bmin, bmax = TRAINING_BMI_RANGE | |
| if bmi_raw < bmin or bmi_raw > bmax: | |
| flags.append(f"BMI {bmi_raw:.1f} is outside the trained range ({bmin:.1f}–{bmax:.1f}).") | |
| amin, amax = TRAINING_AGE_RANGE | |
| if age < amin or age > amax: | |
| flags.append(f"Age {age} is outside the trained range ({amin}–{amax} years).") | |
| # Severity disagreement (gap in class index) — most telling for the | |
| # known Male × Obesity_Type_III gap in training. | |
| try: | |
| gap = abs(OBESITY_LEVELS.index(bmi_band) - OBESITY_LEVELS.index(model_class)) | |
| except ValueError: | |
| gap = 0 | |
| if gap >= _OOD_SEVERITY_GAP: | |
| flags.append( | |
| f"Classifier says **{model_class.replace('_', ' ')}** but BMI " | |
| f"{bmi_raw:.1f} falls into **{bmi_band.replace('_', ' ')}** " | |
| f"— a {gap}-class gap." | |
| ) | |
| # Conditionally-rare combination: Obesity_Type_III is almost entirely | |
| # female in the training data, so a male profile predicted as Type_III | |
| # (or whose BMI band is Type_III) is unreliable territory. | |
| if bmi_band == "Obesity_Type_III" and gender.lower().startswith("m"): | |
| flags.append( | |
| "Obesity Type III training data is 99.7% female — male predictions " | |
| "at this BMI are extrapolations." | |
| ) | |
| return flags | |
| def _blend_with_bmi_band( | |
| proba_by_name: dict[str, float], | |
| bmi_band: str, | |
| classes: list[str], | |
| ) -> dict[str, float]: | |
| """Mix the classifier output with a one-hot prior centered on ``bmi_band``. | |
| Used only when the classifier disagrees with the BMI rule by a wide | |
| margin. The blend is deterministic (no training data needed) and | |
| preserves order, so the visible probability bars still tell a | |
| coherent story. | |
| """ | |
| w = _OOD_BAND_PRIOR_WEIGHT | |
| blended: dict[str, float] = {} | |
| for c in classes: | |
| prior = 1.0 if c == bmi_band else 0.0 | |
| blended[c] = (1.0 - w) * proba_by_name.get(c, 0.0) + w * prior | |
| s = sum(blended.values()) or 1.0 | |
| return {c: v / s for c, v in blended.items()} | |
| def _build_feature_row(profile: dict, feature_columns: list[str]) -> pd.DataFrame: | |
| """Map the user's form input to a single-row DataFrame matching training columns.""" | |
| row: dict[str, Any] = {col: 0 for col in feature_columns} | |
| numeric = { | |
| "Age": float(profile.get("age", 30)), | |
| "Height": float(profile.get("height_cm", 170)) / 100.0, | |
| "Weight": float(profile.get("weight_kg", 70)), | |
| "FCVC": float(profile.get("FCVC", 2.0)), | |
| "NCP": float(profile.get("NCP", 3.0)), | |
| "CH2O": float(profile.get("CH2O", 2.0)), | |
| "FAF": float(profile.get("FAF", 1.0)), | |
| "TUE": float(profile.get("TUE", 1.0)), | |
| } | |
| for k, v in numeric.items(): | |
| if k in row: | |
| row[k] = v | |
| categorical = { | |
| "Gender": profile.get("Gender", "Male"), | |
| "family_history_with_overweight": profile.get("family_history_with_overweight", "no"), | |
| "FAVC": profile.get("FAVC", "no"), | |
| "CAEC": profile.get("CAEC", "Sometimes"), | |
| "SMOKE": profile.get("SMOKE", "no"), | |
| "SCC": profile.get("SCC", "no"), | |
| "CALC": profile.get("CALC", "no"), | |
| "MTRANS": profile.get("MTRANS", "Public_Transportation"), | |
| } | |
| for prefix, value in categorical.items(): | |
| target_col = f"{prefix}_{value}" | |
| if target_col in row: | |
| row[target_col] = 1 | |
| return pd.DataFrame([row], columns=feature_columns) | |
| def predict(profile: dict, nutrition: dict | None = None) -> dict: | |
| """Run the numeric pipeline for one user. | |
| Parameters | |
| ---------- | |
| profile : dict | |
| Form inputs (Age, Height cm, Weight kg, Gender, habit answers). | |
| nutrition : dict | None | |
| Output of the CV block. When provided and the meal is | |
| high-caloric, the FAVC feature is overridden upstream of the | |
| classifier. | |
| """ | |
| state = _load() | |
| profile = dict(profile) | |
| profile.setdefault("activity_level", "moderate") | |
| profile.setdefault("goal", "maintain") | |
| weight_kg = float(profile["weight_kg"]) | |
| height_cm = float(profile["height_cm"]) | |
| age = int(profile["age"]) | |
| gender = profile.get("Gender", "Male") | |
| bmi_raw = weight_kg / ((height_cm / 100.0) ** 2) | |
| bmi_band = bmi_to_band(bmi_raw) | |
| target_kcal = daily_target_kcal( | |
| age, weight_kg, height_cm, gender, profile["activity_level"], profile["goal"], | |
| ) | |
| if not state: | |
| return { | |
| "obesity_class": bmi_band, | |
| "obesity_probabilities": {c: (1.0 if c == bmi_band else 0.0) for c in OBESITY_LEVELS}, | |
| "predicted_bmi": round(bmi_raw, 2), | |
| "daily_target_kcal": round(target_kcal, 1), | |
| "favc_overridden": False, | |
| "bmi_raw": round(bmi_raw, 2), | |
| "bmi_band": bmi_band, | |
| "anomaly_flags": ["Models unavailable — falling back to BMI rule."], | |
| "ood_blended": False, | |
| "models": {"regressor": "untrained_fallback", "classifier": "untrained_fallback"}, | |
| } | |
| feature_cols = state["metadata"]["feature_columns"] | |
| x = _build_feature_row(profile, feature_cols) | |
| original_favc_yes = int(x.get("FAVC_yes", pd.Series([0])).iloc[0]) if "FAVC_yes" in x.columns else 0 | |
| x = apply_favc_override(x, nutrition) | |
| overridden = "FAVC_yes" in x.columns and int(x["FAVC_yes"].iloc[0]) == 1 and original_favc_yes == 0 | |
| bmi_pred = float(state["regressor"].predict(x)[0]) | |
| proba = state["classifier"].predict_proba(x)[0] | |
| # The label encoder sorts labels alphabetically, so classifier.classes_ is | |
| # in alphabetical order — not the severity-ranked order we author in | |
| # OBESITY_LEVELS. Map probabilities through the encoder's actual class | |
| # names before exposing them, then re-order to the canonical sequence so | |
| # downstream UIs render rows in severity order. | |
| encoder = state.get("label_encoder") | |
| if encoder is not None: | |
| encoder_classes = list(encoder.classes_) | |
| proba_by_name = { | |
| str(encoder_classes[i]): float(proba[i]) for i in range(len(encoder_classes)) | |
| } | |
| model_class = str(encoder_classes[int(np.argmax(proba))]) | |
| else: | |
| proba_by_name = { | |
| OBESITY_LEVELS[i]: float(proba[i]) for i in range(len(OBESITY_LEVELS)) | |
| } | |
| model_class = OBESITY_LEVELS[int(np.argmax(proba))] | |
| anomaly_flags = _detect_anomalies( | |
| weight_kg=weight_kg, height_cm=height_cm, age=age, | |
| bmi_raw=bmi_raw, bmi_band=bmi_band, model_class=model_class, gender=gender, | |
| ) | |
| # Override the classifier when the input is materially OOD: weight/BMI | |
| # outside the trained range, a 2-class severity gap with the BMI band, | |
| # or a known conditional rarity (Male × Obesity_Type_III: only 1 male | |
| # example in 324 Type_III rows). Adjacent-class disagreements without | |
| # an OOD signal stay untouched — those are genuinely borderline cases | |
| # and the model handles them well. | |
| weight_ood = weight_kg < TRAINING_WEIGHT_RANGE_KG[0] or weight_kg > TRAINING_WEIGHT_RANGE_KG[1] | |
| bmi_ood = bmi_raw < TRAINING_BMI_RANGE[0] or bmi_raw > TRAINING_BMI_RANGE[1] | |
| severity_gap = abs(OBESITY_LEVELS.index(bmi_band) - OBESITY_LEVELS.index(model_class)) | |
| male_type3_rarity = ( | |
| bmi_band == "Obesity_Type_III" | |
| and gender.lower().startswith("m") | |
| and model_class != "Obesity_Type_III" | |
| ) | |
| needs_blend = ( | |
| weight_ood | |
| or bmi_ood | |
| or severity_gap >= _OOD_SEVERITY_GAP | |
| or male_type3_rarity | |
| ) | |
| final_proba = proba_by_name | |
| final_class = model_class | |
| blended = False | |
| if needs_blend: | |
| final_proba = _blend_with_bmi_band(proba_by_name, bmi_band, OBESITY_LEVELS) | |
| final_class = max(final_proba, key=final_proba.get) | |
| blended = True | |
| return { | |
| "obesity_class": final_class, | |
| "obesity_probabilities": {c: final_proba.get(c, 0.0) for c in OBESITY_LEVELS}, | |
| "predicted_bmi": round(bmi_pred, 2), | |
| "daily_target_kcal": round(target_kcal, 1), | |
| "favc_overridden": bool(overridden), | |
| "bmi_raw": round(bmi_raw, 2), | |
| "bmi_band": bmi_band, | |
| "model_class": model_class, | |
| "model_probabilities": {c: proba_by_name.get(c, 0.0) for c in OBESITY_LEVELS}, | |
| "anomaly_flags": anomaly_flags, | |
| "ood_blended": blended, | |
| "models": { | |
| "regressor": state["metadata"]["regressor"]["name"], | |
| "classifier": state["metadata"]["classifier"]["name"], | |
| }, | |
| } | |