Upload 7 files

Browse files

Files changed (7) hide show

.dockerignore +4 -0
Dockerfile +28 -0
README.md +69 -0
app.py +177 -0
model-card.md +84 -0
model.joblib +0 -0
requirements.txt +7 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,4 @@

+.git
+.gitattributes
+README.md
+model-card.md

Dockerfile ADDED Viewed

	@@ -0,0 +1,28 @@

+FROM python:3.10-slim
+WORKDIR /app
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    && rm -rf /var/lib/apt/lists/*
+# Create a non-root user
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:$PATH
+# Copy and install requirements first for better caching
+COPY requirements.txt .
+RUN pip install --no-cache-dir --user numpy==1.24.3 && \
+    pip install --no-cache-dir --user -r requirements.txt
+# Copy the rest of the application
+COPY . .
+# Health check
+HEALTHCHECK CMD curl --fail http://localhost:7860/ || exit 1
+# Run the application
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

README.md ADDED Viewed

	@@ -0,0 +1,69 @@

+---
+title: Stroke Prediction Model
+emoji: 🧠
+colorFrom: red
+colorTo: blue
+sdk: docker
+app_file: app.py
+pinned: false
+---
+# Stroke Prediction Model
+This model predicts the risk of stroke based on demographic and health-related features.
+## Model Details
+- **Model Type**: Random Forest Classifier
+- **Training Data**: Healthcare data including age, gender, various diseases, and lifestyle factors
+- **Features**: Age, gender, hypertension, heart disease, marital status, work type, residence type, glucose level, BMI, smoking status
+- **Output**: Probability of stroke risk (0-1) and risk category
+## Usage
+You can use this model through the Hugging Face Inference API:
+```python
+import requests
+API_URL = "https://abdullah1211-ml-stroke.hf.space"
+headers = {"Content-Type": "application/json"}
+def query(payload):
+    response = requests.post(API_URL, headers=headers, json=payload)
+    return response.json()
+data = {
+    "gender": "Male",
+    "age": 67,
+    "hypertension": 1,
+    "heart_disease": 0,
+    "ever_married": "Yes",
+    "work_type": "Private",
+    "Residence_type": "Urban",
+    "avg_glucose_level": 228.69,
+    "bmi": 36.6,
+    "smoking_status": "formerly smoked"
+}
+output = query(data)
+print(output)
+```
+## Response Format
+```json
+{
+  "probability": 0.72,
+  "prediction": "High Risk",
+  "stroke_prediction": 1
+}
+```
+## Risk Categories
+- Very Low Risk: probability < 0.2
+- Low Risk: probability between 0.2 and 0.4
+- Moderate Risk: probability between 0.4 and 0.6
+- High Risk: probability between 0.6 and 0.8
+- Very High Risk: probability > 0.8

app.py ADDED Viewed

	@@ -0,0 +1,177 @@

+from fastapi import FastAPI, Request, HTTPException
+import joblib
+import numpy as np
+app = FastAPI()
+# Load the model
+print("Loading model...")
+try:
+    stroke_model = joblib.load("model.joblib")
+    print("Model loaded successfully")
+    # Extract necessary components
+    model = stroke_model.get('model')
+    encoded_cols = stroke_model.get('encoded_cols', [])
+    numeric_cols = stroke_model.get('numeric_cols', [])
+    preprocessor = stroke_model.get('preprocessor')
+    print(f"Model components: {numeric_cols}, {encoded_cols}")
+except Exception as e:
+    print(f"Error loading model: {e}")
+    model = None
+    preprocessor = None
+    encoded_cols = []
+    numeric_cols = []
+# Helper function to format input data
+def preprocess_input(data):
+    # For numeric features
+    numeric_values = []
+    for col in numeric_cols:
+        if col == 'age':
+            numeric_values.append(data.get('age', 0))
+        elif col == 'avg_glucose_level':
+            numeric_values.append(data.get('avg_glucose_level', 0))
+        elif col == 'bmi':
+            numeric_values.append(data.get('bmi', 0))
+    # For categorical features
+    input_dict = {
+        'gender': data.get('gender', 'Male'),
+        'hypertension': data.get('hypertension', 0),
+        'heart_disease': data.get('heart_disease', 0),
+        'ever_married': data.get('ever_married', 'No'),
+        'work_type': data.get('work_type', 'Private'),
+        'Residence_type': data.get('Residence_type', 'Urban'),
+        'smoking_status': data.get('smoking_status', 'never smoked')
+    }
+    # Create a structured numpy array for preprocessing
+    input_array = np.array([list(input_dict.values())], dtype=object)
+    # Apply preprocessing if available
+    if preprocessor:
+        encoded_features = preprocessor.transform(input_array)
+        # Combine numeric and encoded features
+        features = np.hstack([numeric_values, encoded_features])
+        return features
+    # Fallback mode
+    return np.array([list(input_dict.values()) + numeric_values], dtype=object)
+def get_risk_category(probability):
+    if probability < 0.2:
+        return "Very Low Risk"
+    elif probability < 0.4:
+        return "Low Risk"
+    elif probability < 0.6:
+        return "Moderate Risk"
+    elif probability < 0.8:
+        return "High Risk"
+    else:
+        return "Very High Risk"
+# Fallback prediction when model fails
+def fallback_prediction(data):
+    # Count risk factors
+    risk_factors = []
+    if data.get('hypertension') == 1:
+        risk_factors.append('Hypertension')
+    if data.get('heart_disease') == 1:
+        risk_factors.append('Heart Disease')
+    if data.get('age', 0) > 65:
+        risk_factors.append('Age > 65')
+    if data.get('smoking_status') == 'smokes':
+        risk_factors.append('Smoking')
+    if data.get('avg_glucose_level', 0) > 140:
+        risk_factors.append('High Blood Glucose')
+    if data.get('bmi', 0) > 30:
+        risk_factors.append('Obesity')
+    risk_count = len(risk_factors)
+    # Simple logic based on risk factor count
+    if risk_count == 0:
+        probability = 0.05
+    elif risk_count == 1:
+        probability = 0.15
+    elif risk_count == 2:
+        probability = 0.30
+    elif risk_count == 3:
+        probability = 0.60
+    else:
+        probability = 0.80
+    return probability, get_risk_category(probability)
+@app.get("/")
+async def root():
+    """
+    Root endpoint for health check and documentation
+    """
+    return {
+        "message": "Stroke Prediction API is running",
+        "model_loaded": model is not None,
+        "usage": "Send a POST request to / with patient data",
+        "example": {
+            "gender": "Male",
+            "age": 67,
+            "hypertension": 1,
+            "heart_disease": 0,
+            "ever_married": "Yes",
+            "work_type": "Private",
+            "Residence_type": "Urban",
+            "avg_glucose_level": 228.69,
+            "bmi": 36.6,
+            "smoking_status": "formerly smoked"
+        }
+    }
+@app.post("/")
+async def predict(request: Request):
+    """
+    Make a stroke risk prediction based on input features
+    """
+    try:
+        data = await request.json()
+        # Use the model if available, otherwise use fallback
+        if model:
+            try:
+                # Preprocess the input
+                features = preprocess_input(data)
+                # Make prediction
+                prediction_proba = model.predict_proba(features)[0, 1]
+                risk_level = get_risk_category(prediction_proba)
+                return {
+                    "probability": float(prediction_proba),
+                    "prediction": risk_level,
+                    "stroke_prediction": int(prediction_proba > 0.5),
+                    "using_fallback": False
+                }
+            except Exception as e:
+                print(f"Error using model: {e}")
+                # Fall back to simple prediction
+                probability, risk_level = fallback_prediction(data)
+                return {
+                    "probability": float(probability),
+                    "prediction": risk_level,
+                    "stroke_prediction": int(probability > 0.5),
+                    "using_fallback": True
+                }
+        else:
+            # Use fallback prediction
+            probability, risk_level = fallback_prediction(data)
+            return {
+                "probability": float(probability),
+                "prediction": risk_level,
+                "stroke_prediction": int(probability > 0.5),
+                "using_fallback": True
+            }
+    except Exception as e:
+        raise HTTPException(status_code=400, detail=f"Invalid input: {str(e)}")

model-card.md ADDED Viewed

	@@ -0,0 +1,84 @@

+---
+language: en
+tags:
+  - healthcare
+  - stroke-prediction
+  - medical
+license: mit
+datasets:
+  - stroke-prediction
+model-index:
+  - name: Stroke Risk Prediction Model
+    results:
+      - task:
+          type: binary-classification
+          name: stroke prediction
+        metrics:
+          - type: accuracy
+            value: 0.95
+          - type: f1
+            value: 0.82
+---
+# Stroke Risk Prediction Model
+This model predicts the likelihood of a person experiencing a stroke based on various health and demographic features.
+## Model Description
+The model is a Random Forest classifier trained on healthcare data to predict stroke risk and categorize individuals into risk levels.
+### Input
+The model accepts the following features:
+- **gender**: Male, Female, Other
+- **age**: Age in years (numeric)
+- **hypertension**: Whether the patient has hypertension (0: No, 1: Yes)
+- **heart_disease**: Whether the patient has heart disease (0: No, 1: Yes)
+- **ever_married**: Whether the patient has ever been married (Yes/No)
+- **work_type**: Type of work (Private, Self-employed, Govt_job, children, Never_worked)
+- **Residence_type**: Type of residence (Urban/Rural)
+- **avg_glucose_level**: Average glucose level in blood (mg/dL)
+- **bmi**: Body Mass Index
+- **smoking_status**: Smoking status (formerly smoked, never smoked, smokes, Unknown)
+### Output
+The model outputs:
+- **probability**: Numerical probability of stroke (0-1)
+- **prediction**: Risk category (Very Low Risk, Low Risk, Moderate Risk, High Risk, Very High Risk)
+- **stroke_prediction**: Binary prediction (0: No stroke, 1: Stroke)
+### Limitations and Biases
+- The model was trained on a dataset that may have demographic limitations
+- Performance may vary across different population groups
+- This model should be used as a screening tool only and not as a definitive medical diagnosis
+## Usage
+```python
+import requests
+API_URL = "https://api-inference.huggingface.co/models/Abdullah1211/ml-stroke"
+headers = {"Authorization": "Bearer YOUR_API_TOKEN"}
+def query(payload):
+    response = requests.post(API_URL, headers=headers, json=payload)
+    return response.json()
+data = {
+    "gender": "Male",
+    "age": 67,
+    "hypertension": 1,
+    "heart_disease": 0,
+    "ever_married": "Yes",
+    "work_type": "Private",
+    "Residence_type": "Urban",
+    "avg_glucose_level": 228.69,
+    "bmi": 36.6,
+    "smoking_status": "formerly smoked"
+}
+output = query(data)
+```

model.joblib ADDED Viewed

Binary file (6.06 kB). View file

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+numpy==1.24.3
+pandas==2.0.3
+scikit-learn==1.3.0
+joblib==1.3.2
+fastapi>=0.95.0
+pydantic>=2.0.0
+uvicorn>=0.23.0