Spaces:
Sleeping
Sleeping
Upload 8 files
Browse files- Dockerfile +26 -0
- app.py +253 -0
- feature_cols.pkl +3 -0
- label_encoder.pkl +3 -0
- meta_model.pkl +3 -0
- nn_model.pkl +3 -0
- requirements.txt +9 -0
- scaler.pkl +3 -0
Dockerfile
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Use Python 3.12 full image (not slim)
|
| 2 |
+
FROM python:3.12
|
| 3 |
+
|
| 4 |
+
# Set working directory
|
| 5 |
+
WORKDIR /app
|
| 6 |
+
|
| 7 |
+
# Install system dependencies
|
| 8 |
+
RUN apt-get update && apt-get install -y \
|
| 9 |
+
build-essential \
|
| 10 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 11 |
+
|
| 12 |
+
# Copy requirements first for better caching
|
| 13 |
+
COPY requirements.txt .
|
| 14 |
+
|
| 15 |
+
# Install Python dependencies
|
| 16 |
+
RUN pip install --no-cache-dir --upgrade pip && \
|
| 17 |
+
pip install --no-cache-dir -r requirements.txt
|
| 18 |
+
|
| 19 |
+
# Copy application code and models
|
| 20 |
+
COPY . .
|
| 21 |
+
|
| 22 |
+
# Expose port 7860 (Hugging Face Spaces default)
|
| 23 |
+
EXPOSE 7860
|
| 24 |
+
|
| 25 |
+
# Run the application
|
| 26 |
+
CMD ["python", "app.py"]
|
app.py
ADDED
|
@@ -0,0 +1,253 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
MediGuard Disease Prediction API
|
| 3 |
+
FastAPI application for Hugging Face Spaces deployment
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
from fastapi import FastAPI, HTTPException
|
| 7 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 8 |
+
from pydantic import BaseModel, Field
|
| 9 |
+
from typing import List, Dict
|
| 10 |
+
import numpy as np
|
| 11 |
+
import joblib
|
| 12 |
+
from pathlib import Path
|
| 13 |
+
import logging
|
| 14 |
+
|
| 15 |
+
# Setup logging
|
| 16 |
+
logging.basicConfig(level=logging.INFO)
|
| 17 |
+
logger = logging.getLogger(_name_)
|
| 18 |
+
|
| 19 |
+
# Initialize FastAPI app
|
| 20 |
+
app = FastAPI(
|
| 21 |
+
title="MediGuard Disease Prediction API",
|
| 22 |
+
description="AI-powered disease prediction using stacking ensemble",
|
| 23 |
+
version="1.0.0"
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
+
# CORS middleware
|
| 27 |
+
app.add_middleware(
|
| 28 |
+
CORSMiddleware,
|
| 29 |
+
allow_origins=["*"],
|
| 30 |
+
allow_credentials=True,
|
| 31 |
+
allow_methods=["*"],
|
| 32 |
+
allow_headers=["*"],
|
| 33 |
+
)
|
| 34 |
+
|
| 35 |
+
# Model directory
|
| 36 |
+
MODEL_DIR = Path("mediguard_models")
|
| 37 |
+
|
| 38 |
+
# Global variables for models
|
| 39 |
+
rf_model = None
|
| 40 |
+
nn_model = None
|
| 41 |
+
meta_model = None
|
| 42 |
+
scaler = None
|
| 43 |
+
label_encoder = None
|
| 44 |
+
feature_cols = None
|
| 45 |
+
|
| 46 |
+
# Backend's allowed diseases (for validation)
|
| 47 |
+
BACKEND_ALLOWED_DISEASES = {
|
| 48 |
+
"Anemia", "Prediabetes", "Diabetes", "Severe Inflammation",
|
| 49 |
+
"Thrombocytopenia", "Obesity", "IronDeficiencyAnemia",
|
| 50 |
+
"ThalassemiaMajorLike", "ThalassemiaTrait", "KidneyImpairment",
|
| 51 |
+
"Thromboc", "MetabolicSyndrome", "HyperthyroidismLike",
|
| 52 |
+
"CoronaryArteryDisease", "Hypertension", "ArrhythmiaRisk",
|
| 53 |
+
"Dyslipidemia", "Hepatitis", "NAFLD", "InfectionInflammation",
|
| 54 |
+
"Polycythemia", "ACS", "Healthy"
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
# Pydantic models
|
| 59 |
+
class PatientData(BaseModel):
|
| 60 |
+
"""Patient biomarker data for prediction"""
|
| 61 |
+
features: List[float] = Field(
|
| 62 |
+
...,
|
| 63 |
+
description="List of biomarker values in the correct order",
|
| 64 |
+
example=[13.2, 165, 245, 280, 7.5, 4.8, 42, 88, 28, 33, 18, 32.5, 145, 92, 210, 7.8, 145, 38, 35, 28, 78, 1.1, 0.01, 2.8]
|
| 65 |
+
)
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
class PredictionResponse(BaseModel):
|
| 69 |
+
"""Response model for disease prediction"""
|
| 70 |
+
predicted_disease: str
|
| 71 |
+
confidence: float
|
| 72 |
+
top_3_predictions: List[Dict[str, float]]
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
class HealthResponse(BaseModel):
|
| 76 |
+
"""Health check response"""
|
| 77 |
+
status: str
|
| 78 |
+
model_loaded: bool
|
| 79 |
+
feature_count: int
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
@app.on_event("startup")
|
| 83 |
+
async def load_models():
|
| 84 |
+
"""Load all trained models on startup"""
|
| 85 |
+
global rf_model, nn_model, meta_model, scaler, label_encoder, feature_cols
|
| 86 |
+
|
| 87 |
+
try:
|
| 88 |
+
logger.info("Loading models...")
|
| 89 |
+
|
| 90 |
+
# Load models
|
| 91 |
+
rf_model = joblib.load(MODEL_DIR / "rf_model.pkl")
|
| 92 |
+
nn_model = joblib.load(MODEL_DIR / "nn_model.pkl")
|
| 93 |
+
meta_model = joblib.load(MODEL_DIR / "meta_model.pkl")
|
| 94 |
+
scaler = joblib.load(MODEL_DIR / "scaler.pkl")
|
| 95 |
+
label_encoder = joblib.load(MODEL_DIR / "label_encoder.pkl")
|
| 96 |
+
feature_cols = joblib.load(MODEL_DIR / "feature_cols.pkl")
|
| 97 |
+
|
| 98 |
+
logger.info(f"✓ Models loaded successfully!")
|
| 99 |
+
logger.info(f"✓ Feature count: {len(feature_cols)}")
|
| 100 |
+
logger.info(f"✓ Classes: {list(label_encoder.classes_)}")
|
| 101 |
+
|
| 102 |
+
# Validate classes
|
| 103 |
+
invalid_classes = set(label_encoder.classes_) - BACKEND_ALLOWED_DISEASES
|
| 104 |
+
if invalid_classes:
|
| 105 |
+
logger.error(f"Invalid classes found: {invalid_classes}")
|
| 106 |
+
raise ValueError("Model contains invalid disease classes")
|
| 107 |
+
|
| 108 |
+
except Exception as e:
|
| 109 |
+
logger.error(f"❌ Error loading models: {e}")
|
| 110 |
+
raise
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
def predict_disease(patient_features: np.ndarray):
|
| 114 |
+
"""
|
| 115 |
+
Predict disease using stacking ensemble
|
| 116 |
+
|
| 117 |
+
Args:
|
| 118 |
+
patient_features: Array of biomarker values
|
| 119 |
+
|
| 120 |
+
Returns:
|
| 121 |
+
Tuple of (disease, confidence, top_3_predictions)
|
| 122 |
+
"""
|
| 123 |
+
# Validate features
|
| 124 |
+
if len(patient_features) != len(feature_cols):
|
| 125 |
+
raise ValueError(
|
| 126 |
+
f"Expected {len(feature_cols)} features, got {len(patient_features)}"
|
| 127 |
+
)
|
| 128 |
+
|
| 129 |
+
# Scale features
|
| 130 |
+
X_scaled = scaler.transform([patient_features]).astype(np.float32)
|
| 131 |
+
|
| 132 |
+
# Get base learner predictions
|
| 133 |
+
rf_probs = rf_model.predict_proba(X_scaled)
|
| 134 |
+
nn_probs = nn_model.predict_proba(X_scaled)
|
| 135 |
+
|
| 136 |
+
# Create meta-features
|
| 137 |
+
X_meta = np.hstack([rf_probs, nn_probs])
|
| 138 |
+
|
| 139 |
+
# Get final prediction from meta-learner
|
| 140 |
+
y_pred = meta_model.predict(X_meta)[0]
|
| 141 |
+
y_proba = meta_model.predict_proba(X_meta)[0]
|
| 142 |
+
|
| 143 |
+
# Get disease name
|
| 144 |
+
disease = label_encoder.inverse_transform([y_pred])[0]
|
| 145 |
+
confidence = float(y_proba[y_pred])
|
| 146 |
+
|
| 147 |
+
# Get top 3 predictions
|
| 148 |
+
top_3_idx = np.argsort(y_proba)[-3:][::-1]
|
| 149 |
+
top_3 = [
|
| 150 |
+
{
|
| 151 |
+
"disease": label_encoder.inverse_transform([idx])[0],
|
| 152 |
+
"confidence": float(y_proba[idx])
|
| 153 |
+
}
|
| 154 |
+
for idx in top_3_idx
|
| 155 |
+
]
|
| 156 |
+
|
| 157 |
+
return disease, confidence, top_3
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
@app.get("/", response_model=Dict[str, str])
|
| 161 |
+
async def root():
|
| 162 |
+
"""Root endpoint"""
|
| 163 |
+
return {
|
| 164 |
+
"message": "MediGuard Disease Prediction API",
|
| 165 |
+
"version": "1.0.0",
|
| 166 |
+
"endpoints": {
|
| 167 |
+
"health": "/health",
|
| 168 |
+
"predict": "/predict (POST)",
|
| 169 |
+
"features": "/features",
|
| 170 |
+
"docs": "/docs"
|
| 171 |
+
}
|
| 172 |
+
}
|
| 173 |
+
|
| 174 |
+
|
| 175 |
+
@app.get("/health", response_model=HealthResponse)
|
| 176 |
+
async def health_check():
|
| 177 |
+
"""Health check endpoint"""
|
| 178 |
+
return HealthResponse(
|
| 179 |
+
status="healthy" if rf_model is not None else "not_ready",
|
| 180 |
+
model_loaded=rf_model is not None,
|
| 181 |
+
feature_count=len(feature_cols) if feature_cols else 0
|
| 182 |
+
)
|
| 183 |
+
|
| 184 |
+
|
| 185 |
+
@app.get("/features", response_model=Dict[str, List[str]])
|
| 186 |
+
async def get_features():
|
| 187 |
+
"""Get list of required features"""
|
| 188 |
+
if feature_cols is None:
|
| 189 |
+
raise HTTPException(status_code=503, detail="Models not loaded")
|
| 190 |
+
|
| 191 |
+
return {
|
| 192 |
+
"features": feature_cols,
|
| 193 |
+
"count": len(feature_cols),
|
| 194 |
+
"example": "Use /predict endpoint with biomarker values in this exact order"
|
| 195 |
+
}
|
| 196 |
+
|
| 197 |
+
|
| 198 |
+
@app.post("/predict", response_model=PredictionResponse)
|
| 199 |
+
async def predict(patient_data: PatientData):
|
| 200 |
+
"""
|
| 201 |
+
Predict disease from patient biomarker data
|
| 202 |
+
|
| 203 |
+
Args:
|
| 204 |
+
patient_data: PatientData object with list of feature values
|
| 205 |
+
|
| 206 |
+
Returns:
|
| 207 |
+
PredictionResponse with predicted disease and confidence
|
| 208 |
+
"""
|
| 209 |
+
# Check if models are loaded
|
| 210 |
+
if rf_model is None:
|
| 211 |
+
raise HTTPException(
|
| 212 |
+
status_code=503,
|
| 213 |
+
detail="Models not loaded. Please wait for startup to complete."
|
| 214 |
+
)
|
| 215 |
+
|
| 216 |
+
try:
|
| 217 |
+
# Convert to numpy array
|
| 218 |
+
features = np.array(patient_data.features, dtype=np.float32)
|
| 219 |
+
|
| 220 |
+
# Predict
|
| 221 |
+
disease, confidence, top_3 = predict_disease(features)
|
| 222 |
+
|
| 223 |
+
logger.info(f"Prediction: {disease} ({confidence*100:.2f}%)")
|
| 224 |
+
|
| 225 |
+
return PredictionResponse(
|
| 226 |
+
predicted_disease=disease,
|
| 227 |
+
confidence=confidence,
|
| 228 |
+
top_3_predictions=top_3
|
| 229 |
+
)
|
| 230 |
+
|
| 231 |
+
except ValueError as e:
|
| 232 |
+
raise HTTPException(status_code=400, detail=str(e))
|
| 233 |
+
except Exception as e:
|
| 234 |
+
logger.error(f"Prediction error: {e}")
|
| 235 |
+
raise HTTPException(status_code=500, detail=f"Prediction failed: {str(e)}")
|
| 236 |
+
|
| 237 |
+
|
| 238 |
+
@app.get("/diseases", response_model=Dict[str, List[str]])
|
| 239 |
+
async def get_diseases():
|
| 240 |
+
"""Get list of all possible diseases the model can predict"""
|
| 241 |
+
if label_encoder is None:
|
| 242 |
+
raise HTTPException(status_code=503, detail="Models not loaded")
|
| 243 |
+
|
| 244 |
+
return {
|
| 245 |
+
"diseases": list(label_encoder.classes_),
|
| 246 |
+
"count": len(label_encoder.classes_)
|
| 247 |
+
}
|
| 248 |
+
|
| 249 |
+
|
| 250 |
+
# For local testing
|
| 251 |
+
if __name__ == "__main__":
|
| 252 |
+
import uvicorn
|
| 253 |
+
uvicorn.run(app, host="0.0.0.0", port=7860)
|
feature_cols.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:727a7b27128a86b6c495e236910115895c53c1026b3c416e189fecc4caa56379
|
| 3 |
+
size 415
|
label_encoder.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:595bd4bd63b39638c7724c7a1ce8ee923fc62523a303e928ff9a92721caa452d
|
| 3 |
+
size 618
|
meta_model.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7d1a6f4a54720522ac7c1200bd548a32f481958dacf65fdc3758a92759e2ccf7
|
| 3 |
+
size 2959
|
nn_model.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d02e60489b8b5cb6098117913e87c87ee6e97224108266b8b2d0dec5f98617b2
|
| 3 |
+
size 230460
|
requirements.txt
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi
|
| 2 |
+
pydantic
|
| 3 |
+
numpy
|
| 4 |
+
pandas
|
| 5 |
+
scikit-learn
|
| 6 |
+
joblib
|
| 7 |
+
xgboost
|
| 8 |
+
lightgbm
|
| 9 |
+
uvicorn
|
scaler.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2f58fb0494f11fb10a7673657fd8b385506c4dedeae5edcf8664c413b9c2ae87
|
| 3 |
+
size 1191
|