ml-stroke / app.py
Abdullah1211's picture
Upload 11 files
14a47f3 verified
import joblib
import pandas as pd
import numpy as np
from fastapi import FastAPI, Form, File, UploadFile, Request
from fastapi.responses import JSONResponse
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
import time
import json
from typing import Optional, List, Union
import uvicorn
# Load the trained model
print("Loading model...")
model_path = "/app/model.joblib"
import os
print(f"Model path: {model_path}")
print(f"Model file exists: {os.path.exists(model_path)}")
print(f"Model file size: {os.path.getsize(model_path) / 1024:.2f} KB")
try:
model_info = joblib.load(model_path)
print("Model loaded successfully!")
# Access model components
pipeline = model_info['model']
model = pipeline.named_steps['classifier']
print(f"Model details: Type: {type(model)}")
# Get preprocessing info
numeric_cols = model_info['numeric_cols']
categorical_cols = model_info['encoded_cols']
print(f"Features: {len(numeric_cols)} numeric features, {len(categorical_cols)} encoded features")
# Verify model has predict_proba
has_predict_proba = hasattr(model, 'predict_proba')
print(f"Model has predict_proba method: {'Yes' if has_predict_proba else 'No'}")
except Exception as e:
print(f"Error loading model: {e}")
model_info = None
# Initialize FastAPI
app = FastAPI(title="Stroke Prediction Model API")
# Add CORS middleware
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Check if python-multipart is installed
try:
import multipart
print("python-multipart is installed: ", multipart.__version__)
except ImportError:
print("python-multipart is NOT installed")
# Define prediction endpoints
@app.post("/api/predict")
async def predict_stroke(
gender: Optional[str] = Form(None),
age: Optional[float] = Form(None),
hypertension: Optional[int] = Form(None),
heart_disease: Optional[int] = Form(None),
ever_married: Optional[str] = Form(None),
work_type: Optional[str] = Form(None),
Residence_type: Optional[str] = Form(None),
avg_glucose_level: Optional[float] = Form(None),
bmi: Optional[float] = Form(None),
smoking_status: Optional[str] = Form(None)
):
start_time = time.time()
# Log the received data
form_data = {
'gender': gender,
'age': age,
'hypertension': hypertension,
'heart_disease': heart_disease,
'ever_married': ever_married,
'work_type': work_type,
'Residence_type': Residence_type,
'avg_glucose_level': avg_glucose_level,
'bmi': bmi,
'smoking_status': smoking_status
}
print("Received form data:", form_data)
# Process data and fill default values if needed
processed_data = {
'gender': gender if gender else 'Male',
'age': float(age) if age is not None else 0,
'hypertension': int(hypertension) if hypertension is not None else 0,
'heart_disease': int(heart_disease) if heart_disease is not None else 0,
'ever_married': ever_married if ever_married else 'No',
'work_type': work_type if work_type else 'Private',
'Residence_type': Residence_type if Residence_type else 'Urban',
'avg_glucose_level': float(avg_glucose_level) if avg_glucose_level is not None else 0,
'bmi': float(bmi) if bmi is not None else 0,
'smoking_status': smoking_status if smoking_status else 'never smoked'
}
print("Processed data for prediction:", processed_data)
# Create a DataFrame from the processed data
input_df = pd.DataFrame([processed_data])
# Prediction with fallback
try:
if model_info is None:
raise ValueError("Model not loaded")
# Get prediction from model
prediction_proba = pipeline.predict_proba(input_df)[0][1]
prediction_binary = pipeline.predict(input_df)[0]
# Calculate risk level
if prediction_proba < 0.1:
risk_level = "Very Low Risk"
elif prediction_proba < 0.3:
risk_level = "Low Risk"
elif prediction_proba < 0.6:
risk_level = "Moderate Risk"
else:
risk_level = "High Risk"
# Identify risk factors
risk_factors = []
if processed_data['hypertension'] == 1:
risk_factors.append("Hypertension")
if processed_data['heart_disease'] == 1:
risk_factors.append("Heart Disease")
if processed_data['age'] > 65:
risk_factors.append("Advanced Age (65+)")
if processed_data['avg_glucose_level'] > 140:
risk_factors.append("High Blood Glucose (>140)")
if processed_data['bmi'] > 30:
risk_factors.append("Obesity (BMI > 30)")
if processed_data['smoking_status'] == 'formerly smoked':
risk_factors.append("Former Smoker")
if processed_data['smoking_status'] == 'smokes':
risk_factors.append("Current Smoker")
# Return results
result = {
"probability": float(prediction_proba),
"prediction": risk_level,
"stroke_prediction": int(prediction_binary),
"risk_factors": risk_factors,
"using_model": True,
"execution_time_ms": (time.time() - start_time) * 1000,
"model_version": "stroke-prediction-1.0"
}
except Exception as e:
print("Error in preprocessing:", e)
# Fallback risk calculation
fallback_probability = 0.05 # Default low risk
# Increase risk based on known factors
if processed_data['hypertension'] == 1:
fallback_probability += 0.1
if processed_data['heart_disease'] == 1:
fallback_probability += 0.1
if processed_data['age'] > 65:
fallback_probability += 0.15
elif processed_data['age'] > 55:
fallback_probability += 0.1
if processed_data['avg_glucose_level'] > 180:
fallback_probability += 0.1
elif processed_data['avg_glucose_level'] > 140:
fallback_probability += 0.05
if processed_data['bmi'] > 30:
fallback_probability += 0.05
if processed_data['smoking_status'] == 'smokes':
fallback_probability += 0.07
elif processed_data['smoking_status'] == 'formerly smoked':
fallback_probability += 0.03
# Cap at 80%
fallback_probability = min(fallback_probability, 0.8)
# Determine risk level
if fallback_probability < 0.1:
risk_level = "Very Low Risk"
elif fallback_probability < 0.3:
risk_level = "Low Risk"
elif fallback_probability < 0.6:
risk_level = "Moderate Risk"
else:
risk_level = "High Risk"
# Threshold for binary prediction
stroke_prediction = 1 if fallback_probability > 0.5 else 0
# Identify risk factors
risk_factors = []
if processed_data['hypertension'] == 1:
risk_factors.append("Hypertension")
if processed_data['heart_disease'] == 1:
risk_factors.append("Heart Disease")
if processed_data['age'] > 65:
risk_factors.append("Advanced Age (65+)")
if processed_data['avg_glucose_level'] > 140:
risk_factors.append("High Blood Glucose (>140)")
if processed_data['bmi'] > 30:
risk_factors.append("Obesity (BMI > 30)")
if processed_data['smoking_status'] == 'formerly smoked':
risk_factors.append("Former Smoker")
if processed_data['smoking_status'] == 'smokes':
risk_factors.append("Current Smoker")
result = {
"probability": fallback_probability,
"prediction": risk_level,
"stroke_prediction": stroke_prediction,
"risk_factors": risk_factors,
"using_model": False,
"execution_time_ms": (time.time() - start_time) * 1000,
"model_version": "fallback-1.0"
}
print("Prediction result:", result)
return result
@app.get("/")
async def root():
return {"message": "Stroke Prediction API is running! Use /api/predict for predictions."}
# Run the server
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=7860)