File size: 4,990 Bytes
0233f7e
 
c9e72f1
c4ca33e
bc355ae
 
 
 
 
 
14ea95a
a9e59c6
 
 
bc355ae
a9e59c6
 
bc355ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35fab31
c4ca33e
c9e72f1
 
 
0233f7e
bc355ae
 
 
 
 
 
 
 
 
 
e6c7a23
c9e72f1
 
 
0233f7e
 
bc355ae
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import gradio as gr
import pandas as pd
import numpy as np
import pickle
from sklearn.preprocessing import StandardScaler
import sklearn
import os

print(f"Prediction environment scikit-learn version: {sklearn.__version__}")

def decode_file(file_path):
    with open(file_path, 'rb') as file:
        obj = pickle.load(file)
    return obj

# Load the model without try/except
model = decode_file('./model.pkl')

def preprocess_input(data_dict):
    """Preprocess input data to match the training format"""
    # First create DataFrame with original columns
    df = pd.DataFrame([data_dict])
    
    # Store original features before transformation
    original_df = df.copy()
    
    # Numeric features
    numeric_features = ['age', 'avg_glucose_level', 'bmi']
    
    # Scale numeric features and add prefix
    scaler = StandardScaler()
    scaled_features = scaler.fit_transform(df[numeric_features])
    for i, feat in enumerate(numeric_features):
        df[f'num__{feat}'] = scaled_features[:, i]
    
    # Create categorical features
    # Gender
    df['cat__gender_Male'] = (df['gender'] == 'Male').astype(float)
    df['cat__gender_Other'] = 0.0
    
    # Hypertension
    df['cat__hypertension_1'] = df['hypertension'].astype(float)
    
    # Heart Disease
    df['cat__heart_disease_1'] = df['heart_disease'].astype(float)
    
    # Ever Married
    df['cat__ever_married_Yes'] = (df['ever_married'] == 'Yes').astype(float)
    
    # Work Type
    df['cat__work_type_Never_worked'] = (df['work_type'] == 'Never_worked').astype(float)
    df['cat__work_type_Private'] = (df['work_type'] == 'Private').astype(float)
    df['cat__work_type_Self-employed'] = (df['work_type'] == 'Self-employed').astype(float)
    df['cat__work_type_children'] = (df['work_type'] == 'children').astype(float)
    
    # Residence Type
    df['cat__Residence_type_Urban'] = (df['Residence_type'] == 'Urban').astype(float)
    
    # Smoking Status
    df['cat__smoking_status_formerly smoked'] = (df['smoking_status'] == 'formerly smoked').astype(float)
    df['cat__smoking_status_never smoked'] = (df['smoking_status'] == 'never smoked').astype(float)
    df['cat__smoking_status_smokes'] = (df['smoking_status'] == 'smokes').astype(float)
    
    # Select only the transformed columns in the correct order
    feature_columns = [
        'num__age', 'num__avg_glucose_level', 'num__bmi',
        'cat__gender_Male', 'cat__gender_Other', 'cat__hypertension_1',
        'cat__heart_disease_1', 'cat__ever_married_Yes',
        'cat__work_type_Never_worked', 'cat__work_type_Private',
        'cat__work_type_Self-employed', 'cat__work_type_children',
        'cat__Residence_type_Urban', 'cat__smoking_status_formerly smoked',
        'cat__smoking_status_never smoked', 'cat__smoking_status_smokes'
    ]
    
    return df[feature_columns]

def predict(gender, age, hypertension, ever_married, work_type, heart_disease, 
           avg_glucose_level, bmi, smoking_status, Residence_type):
    """Make prediction using the loaded model"""
    if model is None:
        return "Error: Model not loaded"
        
    # Create input dictionary
    input_data = {
        'gender': gender,
        'age': float(age),  # Ensure numeric
        'hypertension': 1 if hypertension == 'Yes' else 0,
        'heart_disease': 1 if heart_disease == 'Yes' else 0,
        'ever_married': ever_married,
        'work_type': work_type,
        'Residence_type': Residence_type,
        'avg_glucose_level': float(avg_glucose_level),  # Ensure numeric
        'bmi': float(bmi),  # Ensure numeric
        'smoking_status': smoking_status
    }
    
    # Preprocess the input
    processed_input = preprocess_input(input_data)
    
    # Use the loaded model
    try:
        prediction = model.predict_proba(processed_input)[0][1]
        return f"The probability of stroke is {prediction:.2%}"
    except Exception as e:
        return f"Error making prediction: {str(e)}, model is not valid" # updated here

# Create the Gradio interface
iface = gr.Interface(
    fn=predict,
    inputs=[
        gr.Radio(choices=['Female', 'Male'], label="Gender"),
        gr.Slider(minimum=0, maximum=100, label="Age"),
        gr.Radio(choices=['Yes', 'No'], label="Hypertension"),
        gr.Radio(choices=['Yes', 'No'], label="Ever Married"),
        gr.Radio(choices=['Private', 'Self-employed', 'Govt_job', 'children', 'Never_worked'], label="Work Type"),
        gr.Radio(choices=['Yes', 'No'], label="Heart Disease"),
        gr.Number(label="Average Glucose Level"),
        gr.Slider(minimum=10, maximum=50, label="BMI"),
        gr.Radio(choices=['formerly smoked', 'never smoked', 'smokes', 'Unknown'], label="Smoking Status"),
        gr.Radio(choices=['Urban', 'Rural'], label="Residence Type")
    ],
    outputs='text',
    title='Stroke Probability Predictor',
    description='Predicts the probability of having a stroke based on input features.'
)

if __name__ == "__main__":
    iface.launch()