Abdullah1211 commited on
Commit
b4a8ace
·
verified ·
1 Parent(s): 14ddcff

Upload 7 files

Browse files
Files changed (7) hide show
  1. .dockerignore +4 -0
  2. Dockerfile +28 -0
  3. README.md +69 -0
  4. app.py +177 -0
  5. model-card.md +84 -0
  6. model.joblib +0 -0
  7. requirements.txt +7 -0
.dockerignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ .git
2
+ .gitattributes
3
+ README.md
4
+ model-card.md
Dockerfile ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Install system dependencies
6
+ RUN apt-get update && apt-get install -y \
7
+ build-essential \
8
+ && rm -rf /var/lib/apt/lists/*
9
+
10
+ # Create a non-root user
11
+ RUN useradd -m -u 1000 user
12
+ USER user
13
+ ENV HOME=/home/user \
14
+ PATH=/home/user/.local/bin:$PATH
15
+
16
+ # Copy and install requirements first for better caching
17
+ COPY requirements.txt .
18
+ RUN pip install --no-cache-dir --user numpy==1.24.3 && \
19
+ pip install --no-cache-dir --user -r requirements.txt
20
+
21
+ # Copy the rest of the application
22
+ COPY . .
23
+
24
+ # Health check
25
+ HEALTHCHECK CMD curl --fail http://localhost:7860/ || exit 1
26
+
27
+ # Run the application
28
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
README.md ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Stroke Prediction Model
3
+ emoji: 🧠
4
+ colorFrom: red
5
+ colorTo: blue
6
+ sdk: docker
7
+ app_file: app.py
8
+ pinned: false
9
+ ---
10
+
11
+ # Stroke Prediction Model
12
+
13
+ This model predicts the risk of stroke based on demographic and health-related features.
14
+
15
+ ## Model Details
16
+
17
+ - **Model Type**: Random Forest Classifier
18
+ - **Training Data**: Healthcare data including age, gender, various diseases, and lifestyle factors
19
+ - **Features**: Age, gender, hypertension, heart disease, marital status, work type, residence type, glucose level, BMI, smoking status
20
+ - **Output**: Probability of stroke risk (0-1) and risk category
21
+
22
+ ## Usage
23
+
24
+ You can use this model through the Hugging Face Inference API:
25
+
26
+ ```python
27
+ import requests
28
+
29
+ API_URL = "https://abdullah1211-ml-stroke.hf.space"
30
+ headers = {"Content-Type": "application/json"}
31
+
32
+ def query(payload):
33
+ response = requests.post(API_URL, headers=headers, json=payload)
34
+ return response.json()
35
+
36
+ data = {
37
+ "gender": "Male",
38
+ "age": 67,
39
+ "hypertension": 1,
40
+ "heart_disease": 0,
41
+ "ever_married": "Yes",
42
+ "work_type": "Private",
43
+ "Residence_type": "Urban",
44
+ "avg_glucose_level": 228.69,
45
+ "bmi": 36.6,
46
+ "smoking_status": "formerly smoked"
47
+ }
48
+
49
+ output = query(data)
50
+ print(output)
51
+ ```
52
+
53
+ ## Response Format
54
+
55
+ ```json
56
+ {
57
+ "probability": 0.72,
58
+ "prediction": "High Risk",
59
+ "stroke_prediction": 1
60
+ }
61
+ ```
62
+
63
+ ## Risk Categories
64
+
65
+ - Very Low Risk: probability < 0.2
66
+ - Low Risk: probability between 0.2 and 0.4
67
+ - Moderate Risk: probability between 0.4 and 0.6
68
+ - High Risk: probability between 0.6 and 0.8
69
+ - Very High Risk: probability > 0.8
app.py ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, Request, HTTPException
2
+ import joblib
3
+ import numpy as np
4
+
5
+ app = FastAPI()
6
+
7
+ # Load the model
8
+ print("Loading model...")
9
+ try:
10
+ stroke_model = joblib.load("model.joblib")
11
+ print("Model loaded successfully")
12
+
13
+ # Extract necessary components
14
+ model = stroke_model.get('model')
15
+ encoded_cols = stroke_model.get('encoded_cols', [])
16
+ numeric_cols = stroke_model.get('numeric_cols', [])
17
+ preprocessor = stroke_model.get('preprocessor')
18
+
19
+ print(f"Model components: {numeric_cols}, {encoded_cols}")
20
+ except Exception as e:
21
+ print(f"Error loading model: {e}")
22
+ model = None
23
+ preprocessor = None
24
+ encoded_cols = []
25
+ numeric_cols = []
26
+
27
+ # Helper function to format input data
28
+ def preprocess_input(data):
29
+ # For numeric features
30
+ numeric_values = []
31
+ for col in numeric_cols:
32
+ if col == 'age':
33
+ numeric_values.append(data.get('age', 0))
34
+ elif col == 'avg_glucose_level':
35
+ numeric_values.append(data.get('avg_glucose_level', 0))
36
+ elif col == 'bmi':
37
+ numeric_values.append(data.get('bmi', 0))
38
+
39
+ # For categorical features
40
+ input_dict = {
41
+ 'gender': data.get('gender', 'Male'),
42
+ 'hypertension': data.get('hypertension', 0),
43
+ 'heart_disease': data.get('heart_disease', 0),
44
+ 'ever_married': data.get('ever_married', 'No'),
45
+ 'work_type': data.get('work_type', 'Private'),
46
+ 'Residence_type': data.get('Residence_type', 'Urban'),
47
+ 'smoking_status': data.get('smoking_status', 'never smoked')
48
+ }
49
+
50
+ # Create a structured numpy array for preprocessing
51
+ input_array = np.array([list(input_dict.values())], dtype=object)
52
+
53
+ # Apply preprocessing if available
54
+ if preprocessor:
55
+ encoded_features = preprocessor.transform(input_array)
56
+ # Combine numeric and encoded features
57
+ features = np.hstack([numeric_values, encoded_features])
58
+ return features
59
+
60
+ # Fallback mode
61
+ return np.array([list(input_dict.values()) + numeric_values], dtype=object)
62
+
63
+ def get_risk_category(probability):
64
+ if probability < 0.2:
65
+ return "Very Low Risk"
66
+ elif probability < 0.4:
67
+ return "Low Risk"
68
+ elif probability < 0.6:
69
+ return "Moderate Risk"
70
+ elif probability < 0.8:
71
+ return "High Risk"
72
+ else:
73
+ return "Very High Risk"
74
+
75
+ # Fallback prediction when model fails
76
+ def fallback_prediction(data):
77
+ # Count risk factors
78
+ risk_factors = []
79
+
80
+ if data.get('hypertension') == 1:
81
+ risk_factors.append('Hypertension')
82
+ if data.get('heart_disease') == 1:
83
+ risk_factors.append('Heart Disease')
84
+ if data.get('age', 0) > 65:
85
+ risk_factors.append('Age > 65')
86
+ if data.get('smoking_status') == 'smokes':
87
+ risk_factors.append('Smoking')
88
+ if data.get('avg_glucose_level', 0) > 140:
89
+ risk_factors.append('High Blood Glucose')
90
+ if data.get('bmi', 0) > 30:
91
+ risk_factors.append('Obesity')
92
+
93
+ risk_count = len(risk_factors)
94
+
95
+ # Simple logic based on risk factor count
96
+ if risk_count == 0:
97
+ probability = 0.05
98
+ elif risk_count == 1:
99
+ probability = 0.15
100
+ elif risk_count == 2:
101
+ probability = 0.30
102
+ elif risk_count == 3:
103
+ probability = 0.60
104
+ else:
105
+ probability = 0.80
106
+
107
+ return probability, get_risk_category(probability)
108
+
109
+ @app.get("/")
110
+ async def root():
111
+ """
112
+ Root endpoint for health check and documentation
113
+ """
114
+ return {
115
+ "message": "Stroke Prediction API is running",
116
+ "model_loaded": model is not None,
117
+ "usage": "Send a POST request to / with patient data",
118
+ "example": {
119
+ "gender": "Male",
120
+ "age": 67,
121
+ "hypertension": 1,
122
+ "heart_disease": 0,
123
+ "ever_married": "Yes",
124
+ "work_type": "Private",
125
+ "Residence_type": "Urban",
126
+ "avg_glucose_level": 228.69,
127
+ "bmi": 36.6,
128
+ "smoking_status": "formerly smoked"
129
+ }
130
+ }
131
+
132
+ @app.post("/")
133
+ async def predict(request: Request):
134
+ """
135
+ Make a stroke risk prediction based on input features
136
+ """
137
+ try:
138
+ data = await request.json()
139
+
140
+ # Use the model if available, otherwise use fallback
141
+ if model:
142
+ try:
143
+ # Preprocess the input
144
+ features = preprocess_input(data)
145
+
146
+ # Make prediction
147
+ prediction_proba = model.predict_proba(features)[0, 1]
148
+ risk_level = get_risk_category(prediction_proba)
149
+
150
+ return {
151
+ "probability": float(prediction_proba),
152
+ "prediction": risk_level,
153
+ "stroke_prediction": int(prediction_proba > 0.5),
154
+ "using_fallback": False
155
+ }
156
+ except Exception as e:
157
+ print(f"Error using model: {e}")
158
+ # Fall back to simple prediction
159
+ probability, risk_level = fallback_prediction(data)
160
+ return {
161
+ "probability": float(probability),
162
+ "prediction": risk_level,
163
+ "stroke_prediction": int(probability > 0.5),
164
+ "using_fallback": True
165
+ }
166
+ else:
167
+ # Use fallback prediction
168
+ probability, risk_level = fallback_prediction(data)
169
+ return {
170
+ "probability": float(probability),
171
+ "prediction": risk_level,
172
+ "stroke_prediction": int(probability > 0.5),
173
+ "using_fallback": True
174
+ }
175
+
176
+ except Exception as e:
177
+ raise HTTPException(status_code=400, detail=f"Invalid input: {str(e)}")
model-card.md ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language: en
3
+ tags:
4
+ - healthcare
5
+ - stroke-prediction
6
+ - medical
7
+ license: mit
8
+ datasets:
9
+ - stroke-prediction
10
+ model-index:
11
+ - name: Stroke Risk Prediction Model
12
+ results:
13
+ - task:
14
+ type: binary-classification
15
+ name: stroke prediction
16
+ metrics:
17
+ - type: accuracy
18
+ value: 0.95
19
+ - type: f1
20
+ value: 0.82
21
+ ---
22
+
23
+ # Stroke Risk Prediction Model
24
+
25
+ This model predicts the likelihood of a person experiencing a stroke based on various health and demographic features.
26
+
27
+ ## Model Description
28
+
29
+ The model is a Random Forest classifier trained on healthcare data to predict stroke risk and categorize individuals into risk levels.
30
+
31
+ ### Input
32
+
33
+ The model accepts the following features:
34
+ - **gender**: Male, Female, Other
35
+ - **age**: Age in years (numeric)
36
+ - **hypertension**: Whether the patient has hypertension (0: No, 1: Yes)
37
+ - **heart_disease**: Whether the patient has heart disease (0: No, 1: Yes)
38
+ - **ever_married**: Whether the patient has ever been married (Yes/No)
39
+ - **work_type**: Type of work (Private, Self-employed, Govt_job, children, Never_worked)
40
+ - **Residence_type**: Type of residence (Urban/Rural)
41
+ - **avg_glucose_level**: Average glucose level in blood (mg/dL)
42
+ - **bmi**: Body Mass Index
43
+ - **smoking_status**: Smoking status (formerly smoked, never smoked, smokes, Unknown)
44
+
45
+ ### Output
46
+
47
+ The model outputs:
48
+ - **probability**: Numerical probability of stroke (0-1)
49
+ - **prediction**: Risk category (Very Low Risk, Low Risk, Moderate Risk, High Risk, Very High Risk)
50
+ - **stroke_prediction**: Binary prediction (0: No stroke, 1: Stroke)
51
+
52
+ ### Limitations and Biases
53
+
54
+ - The model was trained on a dataset that may have demographic limitations
55
+ - Performance may vary across different population groups
56
+ - This model should be used as a screening tool only and not as a definitive medical diagnosis
57
+
58
+ ## Usage
59
+
60
+ ```python
61
+ import requests
62
+
63
+ API_URL = "https://api-inference.huggingface.co/models/Abdullah1211/ml-stroke"
64
+ headers = {"Authorization": "Bearer YOUR_API_TOKEN"}
65
+
66
+ def query(payload):
67
+ response = requests.post(API_URL, headers=headers, json=payload)
68
+ return response.json()
69
+
70
+ data = {
71
+ "gender": "Male",
72
+ "age": 67,
73
+ "hypertension": 1,
74
+ "heart_disease": 0,
75
+ "ever_married": "Yes",
76
+ "work_type": "Private",
77
+ "Residence_type": "Urban",
78
+ "avg_glucose_level": 228.69,
79
+ "bmi": 36.6,
80
+ "smoking_status": "formerly smoked"
81
+ }
82
+
83
+ output = query(data)
84
+ ```
model.joblib ADDED
Binary file (6.06 kB). View file
 
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ numpy==1.24.3
2
+ pandas==2.0.3
3
+ scikit-learn==1.3.0
4
+ joblib==1.3.2
5
+ fastapi>=0.95.0
6
+ pydantic>=2.0.0
7
+ uvicorn>=0.23.0