Harshilforworks commited on
Commit
c375045
·
verified ·
1 Parent(s): 59b59fc

Upload 8 files

Browse files
Files changed (8) hide show
  1. Dockerfile +26 -0
  2. app.py +253 -0
  3. feature_cols.pkl +3 -0
  4. label_encoder.pkl +3 -0
  5. meta_model.pkl +3 -0
  6. nn_model.pkl +3 -0
  7. requirements.txt +9 -0
  8. scaler.pkl +3 -0
Dockerfile ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use Python 3.12 full image (not slim)
2
+ FROM python:3.12
3
+
4
+ # Set working directory
5
+ WORKDIR /app
6
+
7
+ # Install system dependencies
8
+ RUN apt-get update && apt-get install -y \
9
+ build-essential \
10
+ && rm -rf /var/lib/apt/lists/*
11
+
12
+ # Copy requirements first for better caching
13
+ COPY requirements.txt .
14
+
15
+ # Install Python dependencies
16
+ RUN pip install --no-cache-dir --upgrade pip && \
17
+ pip install --no-cache-dir -r requirements.txt
18
+
19
+ # Copy application code and models
20
+ COPY . .
21
+
22
+ # Expose port 7860 (Hugging Face Spaces default)
23
+ EXPOSE 7860
24
+
25
+ # Run the application
26
+ CMD ["python", "app.py"]
app.py ADDED
@@ -0,0 +1,253 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ MediGuard Disease Prediction API
3
+ FastAPI application for Hugging Face Spaces deployment
4
+ """
5
+
6
+ from fastapi import FastAPI, HTTPException
7
+ from fastapi.middleware.cors import CORSMiddleware
8
+ from pydantic import BaseModel, Field
9
+ from typing import List, Dict
10
+ import numpy as np
11
+ import joblib
12
+ from pathlib import Path
13
+ import logging
14
+
15
+ # Setup logging
16
+ logging.basicConfig(level=logging.INFO)
17
+ logger = logging.getLogger(_name_)
18
+
19
+ # Initialize FastAPI app
20
+ app = FastAPI(
21
+ title="MediGuard Disease Prediction API",
22
+ description="AI-powered disease prediction using stacking ensemble",
23
+ version="1.0.0"
24
+ )
25
+
26
+ # CORS middleware
27
+ app.add_middleware(
28
+ CORSMiddleware,
29
+ allow_origins=["*"],
30
+ allow_credentials=True,
31
+ allow_methods=["*"],
32
+ allow_headers=["*"],
33
+ )
34
+
35
+ # Model directory
36
+ MODEL_DIR = Path("mediguard_models")
37
+
38
+ # Global variables for models
39
+ rf_model = None
40
+ nn_model = None
41
+ meta_model = None
42
+ scaler = None
43
+ label_encoder = None
44
+ feature_cols = None
45
+
46
+ # Backend's allowed diseases (for validation)
47
+ BACKEND_ALLOWED_DISEASES = {
48
+ "Anemia", "Prediabetes", "Diabetes", "Severe Inflammation",
49
+ "Thrombocytopenia", "Obesity", "IronDeficiencyAnemia",
50
+ "ThalassemiaMajorLike", "ThalassemiaTrait", "KidneyImpairment",
51
+ "Thromboc", "MetabolicSyndrome", "HyperthyroidismLike",
52
+ "CoronaryArteryDisease", "Hypertension", "ArrhythmiaRisk",
53
+ "Dyslipidemia", "Hepatitis", "NAFLD", "InfectionInflammation",
54
+ "Polycythemia", "ACS", "Healthy"
55
+ }
56
+
57
+
58
+ # Pydantic models
59
+ class PatientData(BaseModel):
60
+ """Patient biomarker data for prediction"""
61
+ features: List[float] = Field(
62
+ ...,
63
+ description="List of biomarker values in the correct order",
64
+ example=[13.2, 165, 245, 280, 7.5, 4.8, 42, 88, 28, 33, 18, 32.5, 145, 92, 210, 7.8, 145, 38, 35, 28, 78, 1.1, 0.01, 2.8]
65
+ )
66
+
67
+
68
+ class PredictionResponse(BaseModel):
69
+ """Response model for disease prediction"""
70
+ predicted_disease: str
71
+ confidence: float
72
+ top_3_predictions: List[Dict[str, float]]
73
+
74
+
75
+ class HealthResponse(BaseModel):
76
+ """Health check response"""
77
+ status: str
78
+ model_loaded: bool
79
+ feature_count: int
80
+
81
+
82
+ @app.on_event("startup")
83
+ async def load_models():
84
+ """Load all trained models on startup"""
85
+ global rf_model, nn_model, meta_model, scaler, label_encoder, feature_cols
86
+
87
+ try:
88
+ logger.info("Loading models...")
89
+
90
+ # Load models
91
+ rf_model = joblib.load(MODEL_DIR / "rf_model.pkl")
92
+ nn_model = joblib.load(MODEL_DIR / "nn_model.pkl")
93
+ meta_model = joblib.load(MODEL_DIR / "meta_model.pkl")
94
+ scaler = joblib.load(MODEL_DIR / "scaler.pkl")
95
+ label_encoder = joblib.load(MODEL_DIR / "label_encoder.pkl")
96
+ feature_cols = joblib.load(MODEL_DIR / "feature_cols.pkl")
97
+
98
+ logger.info(f"✓ Models loaded successfully!")
99
+ logger.info(f"✓ Feature count: {len(feature_cols)}")
100
+ logger.info(f"✓ Classes: {list(label_encoder.classes_)}")
101
+
102
+ # Validate classes
103
+ invalid_classes = set(label_encoder.classes_) - BACKEND_ALLOWED_DISEASES
104
+ if invalid_classes:
105
+ logger.error(f"Invalid classes found: {invalid_classes}")
106
+ raise ValueError("Model contains invalid disease classes")
107
+
108
+ except Exception as e:
109
+ logger.error(f"❌ Error loading models: {e}")
110
+ raise
111
+
112
+
113
+ def predict_disease(patient_features: np.ndarray):
114
+ """
115
+ Predict disease using stacking ensemble
116
+
117
+ Args:
118
+ patient_features: Array of biomarker values
119
+
120
+ Returns:
121
+ Tuple of (disease, confidence, top_3_predictions)
122
+ """
123
+ # Validate features
124
+ if len(patient_features) != len(feature_cols):
125
+ raise ValueError(
126
+ f"Expected {len(feature_cols)} features, got {len(patient_features)}"
127
+ )
128
+
129
+ # Scale features
130
+ X_scaled = scaler.transform([patient_features]).astype(np.float32)
131
+
132
+ # Get base learner predictions
133
+ rf_probs = rf_model.predict_proba(X_scaled)
134
+ nn_probs = nn_model.predict_proba(X_scaled)
135
+
136
+ # Create meta-features
137
+ X_meta = np.hstack([rf_probs, nn_probs])
138
+
139
+ # Get final prediction from meta-learner
140
+ y_pred = meta_model.predict(X_meta)[0]
141
+ y_proba = meta_model.predict_proba(X_meta)[0]
142
+
143
+ # Get disease name
144
+ disease = label_encoder.inverse_transform([y_pred])[0]
145
+ confidence = float(y_proba[y_pred])
146
+
147
+ # Get top 3 predictions
148
+ top_3_idx = np.argsort(y_proba)[-3:][::-1]
149
+ top_3 = [
150
+ {
151
+ "disease": label_encoder.inverse_transform([idx])[0],
152
+ "confidence": float(y_proba[idx])
153
+ }
154
+ for idx in top_3_idx
155
+ ]
156
+
157
+ return disease, confidence, top_3
158
+
159
+
160
+ @app.get("/", response_model=Dict[str, str])
161
+ async def root():
162
+ """Root endpoint"""
163
+ return {
164
+ "message": "MediGuard Disease Prediction API",
165
+ "version": "1.0.0",
166
+ "endpoints": {
167
+ "health": "/health",
168
+ "predict": "/predict (POST)",
169
+ "features": "/features",
170
+ "docs": "/docs"
171
+ }
172
+ }
173
+
174
+
175
+ @app.get("/health", response_model=HealthResponse)
176
+ async def health_check():
177
+ """Health check endpoint"""
178
+ return HealthResponse(
179
+ status="healthy" if rf_model is not None else "not_ready",
180
+ model_loaded=rf_model is not None,
181
+ feature_count=len(feature_cols) if feature_cols else 0
182
+ )
183
+
184
+
185
+ @app.get("/features", response_model=Dict[str, List[str]])
186
+ async def get_features():
187
+ """Get list of required features"""
188
+ if feature_cols is None:
189
+ raise HTTPException(status_code=503, detail="Models not loaded")
190
+
191
+ return {
192
+ "features": feature_cols,
193
+ "count": len(feature_cols),
194
+ "example": "Use /predict endpoint with biomarker values in this exact order"
195
+ }
196
+
197
+
198
+ @app.post("/predict", response_model=PredictionResponse)
199
+ async def predict(patient_data: PatientData):
200
+ """
201
+ Predict disease from patient biomarker data
202
+
203
+ Args:
204
+ patient_data: PatientData object with list of feature values
205
+
206
+ Returns:
207
+ PredictionResponse with predicted disease and confidence
208
+ """
209
+ # Check if models are loaded
210
+ if rf_model is None:
211
+ raise HTTPException(
212
+ status_code=503,
213
+ detail="Models not loaded. Please wait for startup to complete."
214
+ )
215
+
216
+ try:
217
+ # Convert to numpy array
218
+ features = np.array(patient_data.features, dtype=np.float32)
219
+
220
+ # Predict
221
+ disease, confidence, top_3 = predict_disease(features)
222
+
223
+ logger.info(f"Prediction: {disease} ({confidence*100:.2f}%)")
224
+
225
+ return PredictionResponse(
226
+ predicted_disease=disease,
227
+ confidence=confidence,
228
+ top_3_predictions=top_3
229
+ )
230
+
231
+ except ValueError as e:
232
+ raise HTTPException(status_code=400, detail=str(e))
233
+ except Exception as e:
234
+ logger.error(f"Prediction error: {e}")
235
+ raise HTTPException(status_code=500, detail=f"Prediction failed: {str(e)}")
236
+
237
+
238
+ @app.get("/diseases", response_model=Dict[str, List[str]])
239
+ async def get_diseases():
240
+ """Get list of all possible diseases the model can predict"""
241
+ if label_encoder is None:
242
+ raise HTTPException(status_code=503, detail="Models not loaded")
243
+
244
+ return {
245
+ "diseases": list(label_encoder.classes_),
246
+ "count": len(label_encoder.classes_)
247
+ }
248
+
249
+
250
+ # For local testing
251
+ if __name__ == "__main__":
252
+ import uvicorn
253
+ uvicorn.run(app, host="0.0.0.0", port=7860)
feature_cols.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:727a7b27128a86b6c495e236910115895c53c1026b3c416e189fecc4caa56379
3
+ size 415
label_encoder.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:595bd4bd63b39638c7724c7a1ce8ee923fc62523a303e928ff9a92721caa452d
3
+ size 618
meta_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d1a6f4a54720522ac7c1200bd548a32f481958dacf65fdc3758a92759e2ccf7
3
+ size 2959
nn_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d02e60489b8b5cb6098117913e87c87ee6e97224108266b8b2d0dec5f98617b2
3
+ size 230460
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ pydantic
3
+ numpy
4
+ pandas
5
+ scikit-learn
6
+ joblib
7
+ xgboost
8
+ lightgbm
9
+ uvicorn
scaler.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f58fb0494f11fb10a7673657fd8b385506c4dedeae5edcf8664c413b9c2ae87
3
+ size 1191