Signe22 commited on
Commit
908a2ca
·
verified ·
1 Parent(s): 12197ac

Upload 4 files

Browse files
Dockerfile ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+ WORKDIR /app
3
+ ENV PYTHONDONTWRITEBYTECODE=1 PYTHONUNBUFFERED=1 MODEL_PATH=/app/diabetes_prediction_model_20251007.pkl
4
+ COPY requirements.txt ./
5
+ RUN pip install --no-cache-dir -r requirements.txt
6
+ COPY . .
7
+ EXPOSE 7860
8
+ CMD uvicorn main:app --host 0.0.0.0 --port ${PORT:-7860}
diabetes_prediction_model_20251007.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:483b1c6e46afa3aedba6565eec3cc80e49fcaa04e4384e14bc34a18c4926f6dd
3
+ size 7290
main.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # main.py — FastAPI model service
2
+ # Run: uvicorn main:app --reload
3
+ # Requires: pip install fastapi uvicorn pydantic joblib pandas
4
+
5
+ from fastapi import FastAPI
6
+ from fastapi.middleware.cors import CORSMiddleware
7
+ from pydantic import BaseModel
8
+ from typing import List, Dict, Any
9
+ import pandas as pd
10
+ import joblib
11
+ import os
12
+
13
+ # Path to your saved pipeline from the notebook
14
+ MODEL_PATH = os.getenv("MODEL_PATH", "diabetes_prediction_model_20251007.pkl")
15
+ pipe = joblib.load(MODEL_PATH)
16
+
17
+ # Feature schema expected by the pipeline (must match training)
18
+ NUMERIC_FEATURES = ['age','alcohol_consumption_per_week', 'physical_activity_minutes_per_week',
19
+ 'diet_score', 'bmi', 'cholesterol_total', 'insulin_level', 'map', 'glucose_fasting']
20
+
21
+ CATEGORICAL_FEATURES = ['gender', 'ethnicity', 'education_level', 'income_level', 'employment_status',
22
+ 'smoking_status', 'family_history_diabetes', 'hypertension_history', 'cardiovascular_history']
23
+
24
+ ALL_FEATURES = NUMERIC_FEATURES + CATEGORICAL_FEATURES
25
+
26
+ class Batch(BaseModel):
27
+ data: List[Dict[str, Any]]
28
+
29
+ app = FastAPI()
30
+ app.add_middleware(
31
+ CORSMiddleware,
32
+ allow_origins=["*"], allow_credentials=True,
33
+ allow_methods=["*"], allow_headers=["*"],
34
+ )
35
+
36
+ @app.get("/health")
37
+ def health():
38
+ return {"status": "ok"}
39
+
40
+ @app.get("/features")
41
+ def features():
42
+ return {"features": ALL_FEATURES}
43
+
44
+ @app.post("/predict_batch")
45
+ def predict_batch(batch: Batch):
46
+ X = pd.DataFrame(batch.data)
47
+ # Ensure all expected columns exist
48
+ for col in ALL_FEATURES:
49
+ if col not in X.columns:
50
+ X[col] = 0
51
+ # Reorder and type-hints (numerics safe-cast, categoricals to str)
52
+ X = X[ALL_FEATURES].copy()
53
+ for col in NUMERIC_FEATURES:
54
+ X[col] = pd.to_numeric(X[col], errors="coerce").fillna(0)
55
+ for col in CATEGORICAL_FEATURES:
56
+ X[col] = X[col].astype(str).fillna("Unknown")
57
+ probs = pipe.predict_proba(X)[:, 1].tolist()
58
+ return {"probabilities": probs}
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ pydantic
4
+ joblib
5
+ pandas
6
+ numpy
7
+ scikit-learn
8
+ xgboost