Spaces:

Harshilforworks
/

ML_MODEL_PLEASE

Sleeping

App Files Files Community

Harshilforworks commited on Nov 23, 2025

Commit

c375045

verified ·

1 Parent(s): 59b59fc

Upload 8 files

Browse files

Files changed (8) hide show

Dockerfile +26 -0
app.py +253 -0
feature_cols.pkl +3 -0
label_encoder.pkl +3 -0
meta_model.pkl +3 -0
nn_model.pkl +3 -0
requirements.txt +9 -0
scaler.pkl +3 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,26 @@

+# Use Python 3.12 full image (not slim)
+FROM python:3.12
+# Set working directory
+WORKDIR /app
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    && rm -rf /var/lib/apt/lists/*
+# Copy requirements first for better caching
+COPY requirements.txt .
+# Install Python dependencies
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r requirements.txt
+# Copy application code and models
+COPY . .
+# Expose port 7860 (Hugging Face Spaces default)
+EXPOSE 7860
+# Run the application
+CMD ["python", "app.py"]

app.py ADDED Viewed

	@@ -0,0 +1,253 @@

+"""
+MediGuard Disease Prediction API
+FastAPI application for Hugging Face Spaces deployment
+"""
+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel, Field
+from typing import List, Dict
+import numpy as np
+import joblib
+from pathlib import Path
+import logging
+# Setup logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(_name_)
+# Initialize FastAPI app
+app = FastAPI(
+    title="MediGuard Disease Prediction API",
+    description="AI-powered disease prediction using stacking ensemble",
+    version="1.0.0"
+)
+# CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Model directory
+MODEL_DIR = Path("mediguard_models")
+# Global variables for models
+rf_model = None
+nn_model = None
+meta_model = None
+scaler = None
+label_encoder = None
+feature_cols = None
+# Backend's allowed diseases (for validation)
+BACKEND_ALLOWED_DISEASES = {
+    "Anemia", "Prediabetes", "Diabetes", "Severe Inflammation",
+    "Thrombocytopenia", "Obesity", "IronDeficiencyAnemia",
+    "ThalassemiaMajorLike", "ThalassemiaTrait", "KidneyImpairment",
+    "Thromboc", "MetabolicSyndrome", "HyperthyroidismLike",
+    "CoronaryArteryDisease", "Hypertension", "ArrhythmiaRisk",
+    "Dyslipidemia", "Hepatitis", "NAFLD", "InfectionInflammation",
+    "Polycythemia", "ACS", "Healthy"
+}
+# Pydantic models
+class PatientData(BaseModel):
+    """Patient biomarker data for prediction"""
+    features: List[float] = Field(
+        ...,
+        description="List of biomarker values in the correct order",
+        example=[13.2, 165, 245, 280, 7.5, 4.8, 42, 88, 28, 33, 18, 32.5, 145, 92, 210, 7.8, 145, 38, 35, 28, 78, 1.1, 0.01, 2.8]
+    )
+class PredictionResponse(BaseModel):
+    """Response model for disease prediction"""
+    predicted_disease: str
+    confidence: float
+    top_3_predictions: List[Dict[str, float]]
+class HealthResponse(BaseModel):
+    """Health check response"""
+    status: str
+    model_loaded: bool
+    feature_count: int
+@app.on_event("startup")
+async def load_models():
+    """Load all trained models on startup"""
+    global rf_model, nn_model, meta_model, scaler, label_encoder, feature_cols
+    try:
+        logger.info("Loading models...")
+        # Load models
+        rf_model = joblib.load(MODEL_DIR / "rf_model.pkl")
+        nn_model = joblib.load(MODEL_DIR / "nn_model.pkl")
+        meta_model = joblib.load(MODEL_DIR / "meta_model.pkl")
+        scaler = joblib.load(MODEL_DIR / "scaler.pkl")
+        label_encoder = joblib.load(MODEL_DIR / "label_encoder.pkl")
+        feature_cols = joblib.load(MODEL_DIR / "feature_cols.pkl")
+        logger.info(f"✓ Models loaded successfully!")
+        logger.info(f"✓ Feature count: {len(feature_cols)}")
+        logger.info(f"✓ Classes: {list(label_encoder.classes_)}")
+        # Validate classes
+        invalid_classes = set(label_encoder.classes_) - BACKEND_ALLOWED_DISEASES
+        if invalid_classes:
+            logger.error(f"Invalid classes found: {invalid_classes}")
+            raise ValueError("Model contains invalid disease classes")
+    except Exception as e:
+        logger.error(f"❌ Error loading models: {e}")
+        raise
+def predict_disease(patient_features: np.ndarray):
+    """
+    Predict disease using stacking ensemble
+    Args:
+        patient_features: Array of biomarker values
+    Returns:
+        Tuple of (disease, confidence, top_3_predictions)
+    """
+    # Validate features
+    if len(patient_features) != len(feature_cols):
+        raise ValueError(
+            f"Expected {len(feature_cols)} features, got {len(patient_features)}"
+        )
+    # Scale features
+    X_scaled = scaler.transform([patient_features]).astype(np.float32)
+    # Get base learner predictions
+    rf_probs = rf_model.predict_proba(X_scaled)
+    nn_probs = nn_model.predict_proba(X_scaled)
+    # Create meta-features
+    X_meta = np.hstack([rf_probs, nn_probs])
+    # Get final prediction from meta-learner
+    y_pred = meta_model.predict(X_meta)[0]
+    y_proba = meta_model.predict_proba(X_meta)[0]
+    # Get disease name
+    disease = label_encoder.inverse_transform([y_pred])[0]
+    confidence = float(y_proba[y_pred])
+    # Get top 3 predictions
+    top_3_idx = np.argsort(y_proba)[-3:][::-1]
+    top_3 = [
+        {
+            "disease": label_encoder.inverse_transform([idx])[0],
+            "confidence": float(y_proba[idx])
+        }
+        for idx in top_3_idx
+    ]
+    return disease, confidence, top_3
+@app.get("/", response_model=Dict[str, str])
+async def root():
+    """Root endpoint"""
+    return {
+        "message": "MediGuard Disease Prediction API",
+        "version": "1.0.0",
+        "endpoints": {
+            "health": "/health",
+            "predict": "/predict (POST)",
+            "features": "/features",
+            "docs": "/docs"
+        }
+    }
+@app.get("/health", response_model=HealthResponse)
+async def health_check():
+    """Health check endpoint"""
+    return HealthResponse(
+        status="healthy" if rf_model is not None else "not_ready",
+        model_loaded=rf_model is not None,
+        feature_count=len(feature_cols) if feature_cols else 0
+    )
+@app.get("/features", response_model=Dict[str, List[str]])
+async def get_features():
+    """Get list of required features"""
+    if feature_cols is None:
+        raise HTTPException(status_code=503, detail="Models not loaded")
+    return {
+        "features": feature_cols,
+        "count": len(feature_cols),
+        "example": "Use /predict endpoint with biomarker values in this exact order"
+    }
+@app.post("/predict", response_model=PredictionResponse)
+async def predict(patient_data: PatientData):
+    """
+    Predict disease from patient biomarker data
+    Args:
+        patient_data: PatientData object with list of feature values
+    Returns:
+        PredictionResponse with predicted disease and confidence
+    """
+    # Check if models are loaded
+    if rf_model is None:
+        raise HTTPException(
+            status_code=503,
+            detail="Models not loaded. Please wait for startup to complete."
+        )
+    try:
+        # Convert to numpy array
+        features = np.array(patient_data.features, dtype=np.float32)
+        # Predict
+        disease, confidence, top_3 = predict_disease(features)
+        logger.info(f"Prediction: {disease} ({confidence*100:.2f}%)")
+        return PredictionResponse(
+            predicted_disease=disease,
+            confidence=confidence,
+            top_3_predictions=top_3
+        )
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    except Exception as e:
+        logger.error(f"Prediction error: {e}")
+        raise HTTPException(status_code=500, detail=f"Prediction failed: {str(e)}")
+@app.get("/diseases", response_model=Dict[str, List[str]])
+async def get_diseases():
+    """Get list of all possible diseases the model can predict"""
+    if label_encoder is None:
+        raise HTTPException(status_code=503, detail="Models not loaded")
+    return {
+        "diseases": list(label_encoder.classes_),
+        "count": len(label_encoder.classes_)
+    }
+# For local testing
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=7860)

feature_cols.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:727a7b27128a86b6c495e236910115895c53c1026b3c416e189fecc4caa56379
+size 415

label_encoder.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:595bd4bd63b39638c7724c7a1ce8ee923fc62523a303e928ff9a92721caa452d
+size 618

meta_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7d1a6f4a54720522ac7c1200bd548a32f481958dacf65fdc3758a92759e2ccf7
+size 2959

nn_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d02e60489b8b5cb6098117913e87c87ee6e97224108266b8b2d0dec5f98617b2
+size 230460

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+fastapi
+pydantic
+numpy
+pandas
+scikit-learn
+joblib
+xgboost
+lightgbm
+uvicorn

scaler.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2f58fb0494f11fb10a7673657fd8b385506c4dedeae5edcf8664c413b9c2ae87
+size 1191