Spaces:

Harshilforworks
/

Redact_ml_model_agni

Sleeping

App Files Files Community

Harshilforworks commited on Nov 23, 2025

Commit

b16913c

verified ·

1 Parent(s): bbc267d

Update app.py

Browse files

Files changed (1) hide show

app.py +363 -2

app.py CHANGED Viewed

@@ -1,3 +1,54 @@
     print(f"✓ {n_classes} disease classes")
     print(f"✓ Meta input shape: ({len(base_models)} models × {n_classes} classes) = {expected_meta_features}")
@@ -34,6 +85,262 @@
     MODELS_LOADED = True
 except Exception as e:
     MODELS_LOADED = False
         "version": "2.0",
         "status": "ready" if MODELS_LOADED else "error",
         "endpoints": {
@@ -47,6 +354,11 @@ except Exception as e:
 def health():
     return {
         "status": "healthy" if MODELS_LOADED else "models_not_loaded",
     }
@@ -54,13 +366,54 @@ def health():
 def predict_api(patient: PatientInput):
     """
     API endpoint for disease prediction
-    return PredictionResult(**result)
     """
 @app.post("/api/debug")
 def debug_prediction(patient: PatientInput):
     DEBUG ENDPOINT - Returns detailed prediction breakdown
     if not MODELS_LOADED:
         raise HTTPException(status_code=503, detail="Models not loaded")
@@ -152,3 +505,11 @@ def debug_prediction(patient: PatientInput):
         "feature_count": len(features_list),
         "meta_input_shape": list(meta_input.shape)
     }

+#!/usr/bin/env python3
+"""
+MEDIGUARD ULTIMATE - PRODUCTION BACKEND
+✓ Matches training EXACTLY (all 60+ features from training script)
+✓ Pydantic V2 compatible
+✓ 6 base models + neural meta-learner
+✓ No warnings, production-ready
+✓ FastAPI only (no Gradio)
+"""
+import numpy as np
+import pandas as pd
+import joblib
+from pathlib import Path
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel, ConfigDict, Field
+from typing import Dict, List, Any
+import warnings
+# Suppress all warnings
+warnings.filterwarnings("ignore")
+# ============================================================
+# 1) LOAD MODELS
+# ============================================================
+MODEL_DIR = Path("models")
+print("🏥 Loading MediGuard Ultimate models...")
+try:
+    le = joblib.load(MODEL_DIR / "label_encoder.pkl")
+    scaler = joblib.load(MODEL_DIR / "scaler.pkl")
+    features_list = joblib.load(MODEL_DIR / "features.pkl")
+    meta = joblib.load(MODEL_DIR / "meta_neural.pkl")
+    # Load ALL 6 base models (critical!)
+    base_models = []
+    for f in sorted(MODEL_DIR.glob("base_*.pkl")):
+        try:
+            model = joblib.load(f)
+            name = f.stem.replace("base_", "")
+            base_models.append((name, model))
+            print(f"  ✓ Loaded {name}")
+        except Exception as e:
+            print(f"  ⚠️  Failed to load {f.stem}: {e}")
+    n_classes = len(le.classes_)
+    expected_meta_features = len(base_models) * n_classes
+    print(f"✓ Loaded {len(base_models)} base models")
+    print(f"✓ Loaded {len(features_list)} features")
     print(f"✓ {n_classes} disease classes")
     print(f"✓ Meta input shape: ({len(base_models)} models × {n_classes} classes) = {expected_meta_features}")
     MODELS_LOADED = True
 except Exception as e:
     MODELS_LOADED = False
+    print(f"❌ Error loading models: {e}")
+    import traceback
+    traceback.print_exc()
+# ============================================================
+# 2) PYDANTIC V2 MODELS
+# ============================================================
+class PatientInput(BaseModel):
+    """Pydantic V2 compatible input model"""
+    model_config = ConfigDict(populate_by_name=True)
+    Glucose: float
+    Cholesterol: float
+    Hemoglobin: float
+    Platelets: float
+    White_Blood_Cells: float = Field(..., alias="White Blood Cells")
+    Red_Blood_Cells: float = Field(..., alias="Red Blood Cells")
+    Hematocrit: float
+    Mean_Corpuscular_Volume: float = Field(..., alias="Mean Corpuscular Volume")
+    Mean_Corpuscular_Hemoglobin: float = Field(..., alias="Mean Corpuscular Hemoglobin")
+    Mean_Corpuscular_Hemoglobin_Concentration: float = Field(
+        ..., alias="Mean Corpuscular Hemoglobin Concentration"
+    )
+    Insulin: float
+    BMI: float
+    Systolic_Blood_Pressure: float = Field(..., alias="Systolic Blood Pressure")
+    Diastolic_Blood_Pressure: float = Field(..., alias="Diastolic Blood Pressure")
+    Triglycerides: float
+    HbA1c: float
+    LDL_Cholesterol: float = Field(..., alias="LDL Cholesterol")
+    HDL_Cholesterol: float = Field(..., alias="HDL Cholesterol")
+    ALT: float
+    AST: float
+    Heart_Rate: float = Field(..., alias="Heart Rate")
+    Creatinine: float
+    Troponin: float
+    C_reactive_Protein: float = Field(..., alias="C-reactive Protein")
+class PredictionResult(BaseModel):
+    """API response model"""
+    prediction: str
+    confidence: float
+    top_5_predictions: List[Dict[str, Any]]
+    raw_values: Dict[str, float]
+    model_info: Dict[str, Any]
+# ============================================================
+# 3) FEATURE ENGINEERING (EXACT MATCH TO TRAINING)
+# ============================================================
+def engineer_features(df: pd.DataFrame) -> pd.DataFrame:
+    """
+    CRITICAL: Must match training EXACTLY - all 40+ engineered features
+    This is the COMPLETE feature set from the training script (document 2)
+    """
+    df = df.copy()
+    # === CORE FEATURES (CRP - top performer) ===
+    df["CRP_WBC"] = df["C-reactive Protein"] * df["White Blood Cells"]
+    df["CRP_squared"] = df["C-reactive Protein"] ** 2
+    df["CRP_cubed"] = df["C-reactive Protein"] ** 3
+    # === DIABETES FEATURES (Type 2 Diabetes weakness) ===
+    df["Glucose_HbA1c_ratio"] = df["Glucose"] / (df["HbA1c"] + 1e-6)
+    df["Glucose_HbA1c_product"] = df["Glucose"] * df["HbA1c"]
+    df["Glucose_squared"] = df["Glucose"] ** 2
+    df["HbA1c_squared"] = df["HbA1c"] ** 2
+    df["Diabetes_composite"] = (df["Glucose"] * 0.5 + df["HbA1c"] * 0.5)
+    df["Glucose_HbA1c_Triglycerides"] = df["Glucose"] * df["HbA1c"] * df["Triglycerides"]
+    # === ANEMIA FEATURES (General Anemia, Thalassemia) ===
+    df["RBC_Hemoglobin"] = df["Red Blood Cells"] * df["Hemoglobin"]
+    df["RBC_Hemoglobin_ratio"] = df["Red Blood Cells"] / (df["Hemoglobin"] + 1e-6)
+    df["Hemoglobin_squared"] = df["Hemoglobin"] ** 2
+    df["RBC_squared"] = df["Red Blood Cells"] ** 2
+    df["Anemia_comprehensive"] = (
+        df["Hemoglobin"] * df["Red Blood Cells"] * df["Hematocrit"]
+    ) / (df["Mean Corpuscular Volume"] + 1e-6)
+    df["Iron_deficiency"] = df["Hemoglobin"] / (df["Mean Corpuscular Volume"] + 1e-6)
+    df["MCV_MCH_interaction"] = df["Mean Corpuscular Volume"] * df["Mean Corpuscular Hemoglobin"]
+    df["MCH_MCHC_ratio"] = df["Mean Corpuscular Hemoglobin"] / (
+        df["Mean Corpuscular Hemoglobin Concentration"] + 1e-6
+    )
+    df["Thalassemia_marker"] = df["Mean Corpuscular Volume"] * df["RBC_Hemoglobin_ratio"]
+    # === PLATELET FEATURES (Thrombocytopenia, Thrombocytosis) ===
+    df["Platelet_squared"] = df["Platelets"] ** 2
+    df["Platelet_WBC_ratio"] = df["Platelets"] / (df["White Blood Cells"] + 1e-6)
+    df["Platelet_RBC_ratio"] = df["Platelets"] / (df["Red Blood Cells"] + 1e-6)
+    df["Platelet_Hemoglobin"] = df["Platelets"] * df["Hemoglobin"]
+    df["Platelet_RBC_interaction"] = df["Platelets"] * df["Red Blood Cells"]
+    df["Thrombocytopenia_marker"] = df["Platelets"] * df["White Blood Cells"]
+    # === LIPID FEATURES ===
+    df["Cholesterol_HDL_ratio"] = df["Cholesterol"] / (df["HDL Cholesterol"] + 1e-6)
+    df["LDL_HDL_ratio"] = df["LDL Cholesterol"] / (df["HDL Cholesterol"] + 1e-6)
+    df["Atherogenic_index"] = (df["Cholesterol"] - df["HDL Cholesterol"]) / (
+        df["HDL Cholesterol"] + 1e-6
+    )
+    df["Triglycerides_HDL_ratio"] = df["Triglycerides"] / (df["HDL Cholesterol"] + 1e-6)
+    df["Total_lipid"] = df["Cholesterol"] + df["Triglycerides"] + df["LDL Cholesterol"]
+    # === LIVER FEATURES ===
+    df["AST_ALT_ratio"] = df["AST"] / (df["ALT"] + 1e-6)
+    df["Liver_damage"] = df["AST"] * df["ALT"]
+    df["ALT_squared"] = df["ALT"] ** 2
+    # === KIDNEY FEATURES ===
+    df["eGFR_proxy"] = 1 / (df["Creatinine"] + 1e-6)
+    df["Kidney_stress"] = df["Creatinine"] * df["Systolic Blood Pressure"]
+    # === METABOLIC FEATURES ===
+    df["MetS_comprehensive"] = (
+        df["Glucose"] * 0.3
+        + df["Triglycerides"] * 0.3
+        + df["BMI"] * 0.2
+        + df["Systolic Blood Pressure"] * 0.2
+    )
+    df["MetS_product"] = df["Glucose"] * df["Triglycerides"] * (
+        1 / (df["HDL Cholesterol"] + 1e-6)
+    )
+    # === CARDIAC FEATURES ===
+    df["Cardiac_risk"] = df["Troponin"] * df["C-reactive Protein"]
+    df["Blood_pressure_product"] = df["Systolic Blood Pressure"] * df["Diastolic Blood Pressure"]
+    # === CROSS-INTERACTIONS ===
+    df["Glucose_CRP"] = df["Glucose"] * df["C-reactive Protein"]
+    df["Hemoglobin_CRP"] = df["Hemoglobin"] * df["C-reactive Protein"]
+    df["Platelet_Glucose"] = df["Platelets"] * df["Glucose"]
+    df["RBC_Platelet"] = df["Red Blood Cells"] * df["Platelets"]
+    return df
+# ============================================================
+# 4) PREDICTION PIPELINE
+# ============================================================
+def predict_disease(raw_values: Dict[str, float]) -> Dict[str, Any]:
+    """
+    Complete prediction pipeline matching training exactly
+    Pipeline: raw → engineer → add_missing → reorder → scale → base_models → stack → meta
+    """
+    if not MODELS_LOADED:
+        return {
+            "error": "Models not loaded",
+            "prediction": "Error",
+            "confidence": 0.0,
+            "top_5_predictions": [],
+            "raw_values": raw_values,
+            "model_info": {"error": "models_not_loaded"}
+        }
+    try:
+        # 1️⃣ Create DataFrame with raw values (NO min-max scaling!)
+        df = pd.DataFrame([raw_values])
+        # 2️⃣ Engineer ALL features (must match training)
+        df_engineered = engineer_features(df)
+        # 3️⃣ Add missing features with zeros
+        for feat in features_list:
+            if feat not in df_engineered.columns:
+                df_engineered[feat] = 0.0
+        # 4️⃣ Reorder columns to match features_list EXACTLY
+        df_engineered = df_engineered[features_list]
+        # 5️⃣ Convert to float32 (matching training)
+        X = df_engineered.values.astype(np.float32)
+        # 6️⃣ Apply StandardScaler (trained on engineered features)
+        X_scaled = scaler.transform(X)
+        # 7️⃣ Get base model predictions (all 6 models)
+        base_probs = []
+        for name, model in base_models:
+            proba = model.predict_proba(X_scaled)
+            base_probs.append(proba)
+        # 8️⃣ Stack horizontally for meta-learner
+        meta_input = np.hstack(base_probs).astype(np.float32)
+        # Validate shape
+        expected_shape = (1, len(base_models) * n_classes)
+        if meta_input.shape != expected_shape:
+            return {
+                "error": f"Meta input shape mismatch: {meta_input.shape} vs {expected_shape}",
+                "prediction": "Error",
+                "confidence": 0.0,
+                "top_5_predictions": [],
+                "raw_values": raw_values,
+                "model_info": {
+                    "base_models": len(base_models),
+                    "n_classes": n_classes,
+                    "expected_shape": expected_shape,
+                    "actual_shape": list(meta_input.shape)
+                }
+            }
+        # 9️⃣ Meta-learner prediction
+        probs = meta.predict_proba(meta_input)[0]
+        # 🔟 Get prediction
+        pred_idx = np.argmax(probs)
+        prediction = le.inverse_transform([pred_idx])[0]
+        confidence = float(probs[pred_idx])
+        # Top-5 predictions
+        top5_indices = np.argsort(probs)[-5:][::-1]
+        top5 = [
+            {
+                "disease": le.inverse_transform([i])[0],
+                "probability": float(probs[i])
+            }
+            for i in top5_indices
+        ]
+        return {
+            "prediction": prediction,
+            "confidence": confidence,
+            "top_5_predictions": top5,
+            "raw_values": raw_values,
+            "model_info": {
+                "base_models": len(base_models),
+                "features_used": len(features_list),
+                "meta_input_shape": list(meta_input.shape),
+                "n_classes": n_classes
+            }
+        }
+    except Exception as e:
+        import traceback
+        return {
+            "error": str(e),
+            "traceback": traceback.format_exc(),
+            "prediction": "Error",
+            "confidence": 0.0,
+            "top_5_predictions": [],
+            "raw_values": raw_values,
+            "model_info": {"error": "prediction_failed"}
+        }
+# ============================================================
+# 5) FASTAPI APP
+# ============================================================
+app = FastAPI(title="MediGuard Ultimate API", version="2.0")
+@app.get("/")
+def root():
+    return {
+        "message": "MediGuard Ultimate API",
         "version": "2.0",
         "status": "ready" if MODELS_LOADED else "error",
         "endpoints": {
 def health():
     return {
         "status": "healthy" if MODELS_LOADED else "models_not_loaded",
+        "models_loaded": MODELS_LOADED,
+        "base_models": len(base_models) if MODELS_LOADED else 0,
+        "features": len(features_list) if MODELS_LOADED else 0,
+        "classes": n_classes if MODELS_LOADED else 0,
+        "model_names": [name for name, _ in base_models] if MODELS_LOADED else []
     }
 def predict_api(patient: PatientInput):
     """
     API endpoint for disease prediction
+    Returns:
+        PredictionResult with prediction, confidence, top-5, and model info
     """
+    if not MODELS_LOADED:
+        raise HTTPException(status_code=503, detail="Models not loaded")
+    # Convert Pydantic model to dict with correct keys (matching training)
+    raw_values = {
+        "Glucose": patient.Glucose,
+        "Cholesterol": patient.Cholesterol,
+        "Hemoglobin": patient.Hemoglobin,
+        "Platelets": patient.Platelets,
+        "White Blood Cells": patient.White_Blood_Cells,
+        "Red Blood Cells": patient.Red_Blood_Cells,
+        "Hematocrit": patient.Hematocrit,
+        "Mean Corpuscular Volume": patient.Mean_Corpuscular_Volume,
+        "Mean Corpuscular Hemoglobin": patient.Mean_Corpuscular_Hemoglobin,
+        "Mean Corpuscular Hemoglobin Concentration": patient.Mean_Corpuscular_Hemoglobin_Concentration,
+        "Insulin": patient.Insulin,
+        "BMI": patient.BMI,
+        "Systolic Blood Pressure": patient.Systolic_Blood_Pressure,
+        "Diastolic Blood Pressure": patient.Diastolic_Blood_Pressure,
+        "Triglycerides": patient.Triglycerides,
+        "HbA1c": patient.HbA1c,
+        "LDL Cholesterol": patient.LDL_Cholesterol,
+        "HDL Cholesterol": patient.HDL_Cholesterol,
+        "ALT": patient.ALT,
+        "AST": patient.AST,
+        "Heart Rate": patient.Heart_Rate,
+        "Creatinine": patient.Creatinine,
+        "Troponin": patient.Troponin,
+        "C-reactive Protein": patient.C_reactive_Protein
+    }
+    result = predict_disease(raw_values)
+    if "error" in result:
+        raise HTTPException(status_code=500, detail=result)
+    return PredictionResult(**result)
 @app.post("/api/debug")
 def debug_prediction(patient: PatientInput):
+    """
     DEBUG ENDPOINT - Returns detailed prediction breakdown
+    """
     if not MODELS_LOADED:
         raise HTTPException(status_code=503, detail="Models not loaded")
         "feature_count": len(features_list),
         "meta_input_shape": list(meta_input.shape)
     }
+# ============================================================
+# 6) RUN SERVER
+# ============================================================
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=7860)