Harshilforworks commited on
Commit
b16913c
·
verified ·
1 Parent(s): bbc267d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +363 -2
app.py CHANGED
@@ -1,3 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  print(f"✓ {n_classes} disease classes")
2
  print(f"✓ Meta input shape: ({len(base_models)} models × {n_classes} classes) = {expected_meta_features}")
3
 
@@ -34,6 +85,262 @@
34
  MODELS_LOADED = True
35
  except Exception as e:
36
  MODELS_LOADED = False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  "version": "2.0",
38
  "status": "ready" if MODELS_LOADED else "error",
39
  "endpoints": {
@@ -47,6 +354,11 @@ except Exception as e:
47
  def health():
48
  return {
49
  "status": "healthy" if MODELS_LOADED else "models_not_loaded",
 
 
 
 
 
50
  }
51
 
52
 
@@ -54,13 +366,54 @@ def health():
54
  def predict_api(patient: PatientInput):
55
  """
56
  API endpoint for disease prediction
57
- return PredictionResult(**result)
 
 
58
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
  @app.post("/api/debug")
61
  def debug_prediction(patient: PatientInput):
 
62
  DEBUG ENDPOINT - Returns detailed prediction breakdown
63
-
64
  if not MODELS_LOADED:
65
  raise HTTPException(status_code=503, detail="Models not loaded")
66
 
@@ -152,3 +505,11 @@ def debug_prediction(patient: PatientInput):
152
  "feature_count": len(features_list),
153
  "meta_input_shape": list(meta_input.shape)
154
  }
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ MEDIGUARD ULTIMATE - PRODUCTION BACKEND
4
+ ✓ Matches training EXACTLY (all 60+ features from training script)
5
+ ✓ Pydantic V2 compatible
6
+ ✓ 6 base models + neural meta-learner
7
+ ✓ No warnings, production-ready
8
+ ✓ FastAPI only (no Gradio)
9
+ """
10
+
11
+ import numpy as np
12
+ import pandas as pd
13
+ import joblib
14
+ from pathlib import Path
15
+ from fastapi import FastAPI, HTTPException
16
+ from pydantic import BaseModel, ConfigDict, Field
17
+ from typing import Dict, List, Any
18
+ import warnings
19
+
20
+ # Suppress all warnings
21
+ warnings.filterwarnings("ignore")
22
+
23
+ # ============================================================
24
+ # 1) LOAD MODELS
25
+ # ============================================================
26
+ MODEL_DIR = Path("models")
27
+
28
+ print("🏥 Loading MediGuard Ultimate models...")
29
+
30
+ try:
31
+ le = joblib.load(MODEL_DIR / "label_encoder.pkl")
32
+ scaler = joblib.load(MODEL_DIR / "scaler.pkl")
33
+ features_list = joblib.load(MODEL_DIR / "features.pkl")
34
+ meta = joblib.load(MODEL_DIR / "meta_neural.pkl")
35
+
36
+ # Load ALL 6 base models (critical!)
37
+ base_models = []
38
+ for f in sorted(MODEL_DIR.glob("base_*.pkl")):
39
+ try:
40
+ model = joblib.load(f)
41
+ name = f.stem.replace("base_", "")
42
+ base_models.append((name, model))
43
+ print(f" ✓ Loaded {name}")
44
+ except Exception as e:
45
+ print(f" ⚠️ Failed to load {f.stem}: {e}")
46
+
47
+ n_classes = len(le.classes_)
48
+ expected_meta_features = len(base_models) * n_classes
49
+
50
+ print(f"✓ Loaded {len(base_models)} base models")
51
+ print(f"✓ Loaded {len(features_list)} features")
52
  print(f"✓ {n_classes} disease classes")
53
  print(f"✓ Meta input shape: ({len(base_models)} models × {n_classes} classes) = {expected_meta_features}")
54
 
 
85
  MODELS_LOADED = True
86
  except Exception as e:
87
  MODELS_LOADED = False
88
+ print(f"❌ Error loading models: {e}")
89
+ import traceback
90
+ traceback.print_exc()
91
+
92
+
93
+ # ============================================================
94
+ # 2) PYDANTIC V2 MODELS
95
+ # ============================================================
96
+ class PatientInput(BaseModel):
97
+ """Pydantic V2 compatible input model"""
98
+ model_config = ConfigDict(populate_by_name=True)
99
+
100
+ Glucose: float
101
+ Cholesterol: float
102
+ Hemoglobin: float
103
+ Platelets: float
104
+ White_Blood_Cells: float = Field(..., alias="White Blood Cells")
105
+ Red_Blood_Cells: float = Field(..., alias="Red Blood Cells")
106
+ Hematocrit: float
107
+ Mean_Corpuscular_Volume: float = Field(..., alias="Mean Corpuscular Volume")
108
+ Mean_Corpuscular_Hemoglobin: float = Field(..., alias="Mean Corpuscular Hemoglobin")
109
+ Mean_Corpuscular_Hemoglobin_Concentration: float = Field(
110
+ ..., alias="Mean Corpuscular Hemoglobin Concentration"
111
+ )
112
+ Insulin: float
113
+ BMI: float
114
+ Systolic_Blood_Pressure: float = Field(..., alias="Systolic Blood Pressure")
115
+ Diastolic_Blood_Pressure: float = Field(..., alias="Diastolic Blood Pressure")
116
+ Triglycerides: float
117
+ HbA1c: float
118
+ LDL_Cholesterol: float = Field(..., alias="LDL Cholesterol")
119
+ HDL_Cholesterol: float = Field(..., alias="HDL Cholesterol")
120
+ ALT: float
121
+ AST: float
122
+ Heart_Rate: float = Field(..., alias="Heart Rate")
123
+ Creatinine: float
124
+ Troponin: float
125
+ C_reactive_Protein: float = Field(..., alias="C-reactive Protein")
126
+
127
+
128
+ class PredictionResult(BaseModel):
129
+ """API response model"""
130
+ prediction: str
131
+ confidence: float
132
+ top_5_predictions: List[Dict[str, Any]]
133
+ raw_values: Dict[str, float]
134
+ model_info: Dict[str, Any]
135
+
136
+
137
+ # ============================================================
138
+ # 3) FEATURE ENGINEERING (EXACT MATCH TO TRAINING)
139
+ # ============================================================
140
+ def engineer_features(df: pd.DataFrame) -> pd.DataFrame:
141
+ """
142
+ CRITICAL: Must match training EXACTLY - all 40+ engineered features
143
+ This is the COMPLETE feature set from the training script (document 2)
144
+ """
145
+ df = df.copy()
146
+
147
+ # === CORE FEATURES (CRP - top performer) ===
148
+ df["CRP_WBC"] = df["C-reactive Protein"] * df["White Blood Cells"]
149
+ df["CRP_squared"] = df["C-reactive Protein"] ** 2
150
+ df["CRP_cubed"] = df["C-reactive Protein"] ** 3
151
+
152
+ # === DIABETES FEATURES (Type 2 Diabetes weakness) ===
153
+ df["Glucose_HbA1c_ratio"] = df["Glucose"] / (df["HbA1c"] + 1e-6)
154
+ df["Glucose_HbA1c_product"] = df["Glucose"] * df["HbA1c"]
155
+ df["Glucose_squared"] = df["Glucose"] ** 2
156
+ df["HbA1c_squared"] = df["HbA1c"] ** 2
157
+ df["Diabetes_composite"] = (df["Glucose"] * 0.5 + df["HbA1c"] * 0.5)
158
+ df["Glucose_HbA1c_Triglycerides"] = df["Glucose"] * df["HbA1c"] * df["Triglycerides"]
159
+
160
+ # === ANEMIA FEATURES (General Anemia, Thalassemia) ===
161
+ df["RBC_Hemoglobin"] = df["Red Blood Cells"] * df["Hemoglobin"]
162
+ df["RBC_Hemoglobin_ratio"] = df["Red Blood Cells"] / (df["Hemoglobin"] + 1e-6)
163
+ df["Hemoglobin_squared"] = df["Hemoglobin"] ** 2
164
+ df["RBC_squared"] = df["Red Blood Cells"] ** 2
165
+ df["Anemia_comprehensive"] = (
166
+ df["Hemoglobin"] * df["Red Blood Cells"] * df["Hematocrit"]
167
+ ) / (df["Mean Corpuscular Volume"] + 1e-6)
168
+ df["Iron_deficiency"] = df["Hemoglobin"] / (df["Mean Corpuscular Volume"] + 1e-6)
169
+ df["MCV_MCH_interaction"] = df["Mean Corpuscular Volume"] * df["Mean Corpuscular Hemoglobin"]
170
+ df["MCH_MCHC_ratio"] = df["Mean Corpuscular Hemoglobin"] / (
171
+ df["Mean Corpuscular Hemoglobin Concentration"] + 1e-6
172
+ )
173
+ df["Thalassemia_marker"] = df["Mean Corpuscular Volume"] * df["RBC_Hemoglobin_ratio"]
174
+
175
+ # === PLATELET FEATURES (Thrombocytopenia, Thrombocytosis) ===
176
+ df["Platelet_squared"] = df["Platelets"] ** 2
177
+ df["Platelet_WBC_ratio"] = df["Platelets"] / (df["White Blood Cells"] + 1e-6)
178
+ df["Platelet_RBC_ratio"] = df["Platelets"] / (df["Red Blood Cells"] + 1e-6)
179
+ df["Platelet_Hemoglobin"] = df["Platelets"] * df["Hemoglobin"]
180
+ df["Platelet_RBC_interaction"] = df["Platelets"] * df["Red Blood Cells"]
181
+ df["Thrombocytopenia_marker"] = df["Platelets"] * df["White Blood Cells"]
182
+
183
+ # === LIPID FEATURES ===
184
+ df["Cholesterol_HDL_ratio"] = df["Cholesterol"] / (df["HDL Cholesterol"] + 1e-6)
185
+ df["LDL_HDL_ratio"] = df["LDL Cholesterol"] / (df["HDL Cholesterol"] + 1e-6)
186
+ df["Atherogenic_index"] = (df["Cholesterol"] - df["HDL Cholesterol"]) / (
187
+ df["HDL Cholesterol"] + 1e-6
188
+ )
189
+ df["Triglycerides_HDL_ratio"] = df["Triglycerides"] / (df["HDL Cholesterol"] + 1e-6)
190
+ df["Total_lipid"] = df["Cholesterol"] + df["Triglycerides"] + df["LDL Cholesterol"]
191
+
192
+ # === LIVER FEATURES ===
193
+ df["AST_ALT_ratio"] = df["AST"] / (df["ALT"] + 1e-6)
194
+ df["Liver_damage"] = df["AST"] * df["ALT"]
195
+ df["ALT_squared"] = df["ALT"] ** 2
196
+
197
+ # === KIDNEY FEATURES ===
198
+ df["eGFR_proxy"] = 1 / (df["Creatinine"] + 1e-6)
199
+ df["Kidney_stress"] = df["Creatinine"] * df["Systolic Blood Pressure"]
200
+
201
+ # === METABOLIC FEATURES ===
202
+ df["MetS_comprehensive"] = (
203
+ df["Glucose"] * 0.3
204
+ + df["Triglycerides"] * 0.3
205
+ + df["BMI"] * 0.2
206
+ + df["Systolic Blood Pressure"] * 0.2
207
+ )
208
+ df["MetS_product"] = df["Glucose"] * df["Triglycerides"] * (
209
+ 1 / (df["HDL Cholesterol"] + 1e-6)
210
+ )
211
+
212
+ # === CARDIAC FEATURES ===
213
+ df["Cardiac_risk"] = df["Troponin"] * df["C-reactive Protein"]
214
+ df["Blood_pressure_product"] = df["Systolic Blood Pressure"] * df["Diastolic Blood Pressure"]
215
+
216
+ # === CROSS-INTERACTIONS ===
217
+ df["Glucose_CRP"] = df["Glucose"] * df["C-reactive Protein"]
218
+ df["Hemoglobin_CRP"] = df["Hemoglobin"] * df["C-reactive Protein"]
219
+ df["Platelet_Glucose"] = df["Platelets"] * df["Glucose"]
220
+ df["RBC_Platelet"] = df["Red Blood Cells"] * df["Platelets"]
221
+
222
+ return df
223
+
224
+
225
+ # ============================================================
226
+ # 4) PREDICTION PIPELINE
227
+ # ============================================================
228
+ def predict_disease(raw_values: Dict[str, float]) -> Dict[str, Any]:
229
+ """
230
+ Complete prediction pipeline matching training exactly
231
+ Pipeline: raw → engineer → add_missing → reorder → scale → base_models → stack → meta
232
+ """
233
+ if not MODELS_LOADED:
234
+ return {
235
+ "error": "Models not loaded",
236
+ "prediction": "Error",
237
+ "confidence": 0.0,
238
+ "top_5_predictions": [],
239
+ "raw_values": raw_values,
240
+ "model_info": {"error": "models_not_loaded"}
241
+ }
242
+
243
+ try:
244
+ # 1️⃣ Create DataFrame with raw values (NO min-max scaling!)
245
+ df = pd.DataFrame([raw_values])
246
+
247
+ # 2️⃣ Engineer ALL features (must match training)
248
+ df_engineered = engineer_features(df)
249
+
250
+ # 3️⃣ Add missing features with zeros
251
+ for feat in features_list:
252
+ if feat not in df_engineered.columns:
253
+ df_engineered[feat] = 0.0
254
+
255
+ # 4️⃣ Reorder columns to match features_list EXACTLY
256
+ df_engineered = df_engineered[features_list]
257
+
258
+ # 5️⃣ Convert to float32 (matching training)
259
+ X = df_engineered.values.astype(np.float32)
260
+
261
+ # 6️⃣ Apply StandardScaler (trained on engineered features)
262
+ X_scaled = scaler.transform(X)
263
+
264
+ # 7️⃣ Get base model predictions (all 6 models)
265
+ base_probs = []
266
+ for name, model in base_models:
267
+ proba = model.predict_proba(X_scaled)
268
+ base_probs.append(proba)
269
+
270
+ # 8️⃣ Stack horizontally for meta-learner
271
+ meta_input = np.hstack(base_probs).astype(np.float32)
272
+
273
+ # Validate shape
274
+ expected_shape = (1, len(base_models) * n_classes)
275
+ if meta_input.shape != expected_shape:
276
+ return {
277
+ "error": f"Meta input shape mismatch: {meta_input.shape} vs {expected_shape}",
278
+ "prediction": "Error",
279
+ "confidence": 0.0,
280
+ "top_5_predictions": [],
281
+ "raw_values": raw_values,
282
+ "model_info": {
283
+ "base_models": len(base_models),
284
+ "n_classes": n_classes,
285
+ "expected_shape": expected_shape,
286
+ "actual_shape": list(meta_input.shape)
287
+ }
288
+ }
289
+
290
+ # 9️⃣ Meta-learner prediction
291
+ probs = meta.predict_proba(meta_input)[0]
292
+
293
+ # 🔟 Get prediction
294
+ pred_idx = np.argmax(probs)
295
+ prediction = le.inverse_transform([pred_idx])[0]
296
+ confidence = float(probs[pred_idx])
297
+
298
+ # Top-5 predictions
299
+ top5_indices = np.argsort(probs)[-5:][::-1]
300
+ top5 = [
301
+ {
302
+ "disease": le.inverse_transform([i])[0],
303
+ "probability": float(probs[i])
304
+ }
305
+ for i in top5_indices
306
+ ]
307
+
308
+ return {
309
+ "prediction": prediction,
310
+ "confidence": confidence,
311
+ "top_5_predictions": top5,
312
+ "raw_values": raw_values,
313
+ "model_info": {
314
+ "base_models": len(base_models),
315
+ "features_used": len(features_list),
316
+ "meta_input_shape": list(meta_input.shape),
317
+ "n_classes": n_classes
318
+ }
319
+ }
320
+
321
+ except Exception as e:
322
+ import traceback
323
+ return {
324
+ "error": str(e),
325
+ "traceback": traceback.format_exc(),
326
+ "prediction": "Error",
327
+ "confidence": 0.0,
328
+ "top_5_predictions": [],
329
+ "raw_values": raw_values,
330
+ "model_info": {"error": "prediction_failed"}
331
+ }
332
+
333
+
334
+ # ============================================================
335
+ # 5) FASTAPI APP
336
+ # ============================================================
337
+ app = FastAPI(title="MediGuard Ultimate API", version="2.0")
338
+
339
+
340
+ @app.get("/")
341
+ def root():
342
+ return {
343
+ "message": "MediGuard Ultimate API",
344
  "version": "2.0",
345
  "status": "ready" if MODELS_LOADED else "error",
346
  "endpoints": {
 
354
  def health():
355
  return {
356
  "status": "healthy" if MODELS_LOADED else "models_not_loaded",
357
+ "models_loaded": MODELS_LOADED,
358
+ "base_models": len(base_models) if MODELS_LOADED else 0,
359
+ "features": len(features_list) if MODELS_LOADED else 0,
360
+ "classes": n_classes if MODELS_LOADED else 0,
361
+ "model_names": [name for name, _ in base_models] if MODELS_LOADED else []
362
  }
363
 
364
 
 
366
  def predict_api(patient: PatientInput):
367
  """
368
  API endpoint for disease prediction
369
+
370
+ Returns:
371
+ PredictionResult with prediction, confidence, top-5, and model info
372
  """
373
+ if not MODELS_LOADED:
374
+ raise HTTPException(status_code=503, detail="Models not loaded")
375
+
376
+ # Convert Pydantic model to dict with correct keys (matching training)
377
+ raw_values = {
378
+ "Glucose": patient.Glucose,
379
+ "Cholesterol": patient.Cholesterol,
380
+ "Hemoglobin": patient.Hemoglobin,
381
+ "Platelets": patient.Platelets,
382
+ "White Blood Cells": patient.White_Blood_Cells,
383
+ "Red Blood Cells": patient.Red_Blood_Cells,
384
+ "Hematocrit": patient.Hematocrit,
385
+ "Mean Corpuscular Volume": patient.Mean_Corpuscular_Volume,
386
+ "Mean Corpuscular Hemoglobin": patient.Mean_Corpuscular_Hemoglobin,
387
+ "Mean Corpuscular Hemoglobin Concentration": patient.Mean_Corpuscular_Hemoglobin_Concentration,
388
+ "Insulin": patient.Insulin,
389
+ "BMI": patient.BMI,
390
+ "Systolic Blood Pressure": patient.Systolic_Blood_Pressure,
391
+ "Diastolic Blood Pressure": patient.Diastolic_Blood_Pressure,
392
+ "Triglycerides": patient.Triglycerides,
393
+ "HbA1c": patient.HbA1c,
394
+ "LDL Cholesterol": patient.LDL_Cholesterol,
395
+ "HDL Cholesterol": patient.HDL_Cholesterol,
396
+ "ALT": patient.ALT,
397
+ "AST": patient.AST,
398
+ "Heart Rate": patient.Heart_Rate,
399
+ "Creatinine": patient.Creatinine,
400
+ "Troponin": patient.Troponin,
401
+ "C-reactive Protein": patient.C_reactive_Protein
402
+ }
403
+
404
+ result = predict_disease(raw_values)
405
+
406
+ if "error" in result:
407
+ raise HTTPException(status_code=500, detail=result)
408
+
409
+ return PredictionResult(**result)
410
+
411
 
412
  @app.post("/api/debug")
413
  def debug_prediction(patient: PatientInput):
414
+ """
415
  DEBUG ENDPOINT - Returns detailed prediction breakdown
416
+ """
417
  if not MODELS_LOADED:
418
  raise HTTPException(status_code=503, detail="Models not loaded")
419
 
 
505
  "feature_count": len(features_list),
506
  "meta_input_shape": list(meta_input.shape)
507
  }
508
+
509
+
510
+ # ============================================================
511
+ # 6) RUN SERVER
512
+ # ============================================================
513
+ if __name__ == "__main__":
514
+ import uvicorn
515
+ uvicorn.run(app, host="0.0.0.0", port=7860)