ranimeree commited on
Commit
efff9ea
·
verified ·
1 Parent(s): fe556d3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +128 -62
app.py CHANGED
@@ -1,77 +1,143 @@
1
  import gradio as gr
2
- import dill
3
  import pandas as pd
4
- import xgboost as xgb
5
  import numpy as np
6
  import pickle
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
- import joblib
9
-
10
- model = joblib.load('model.joblib')
11
- print(model)
12
-
13
-
14
-
15
- def predict(gender, age, hypertension, ever_married, work_type, heart_disease, avg_glucose_level, bmi, smoking_status, Residence_type):
16
- # Mapping for categorical variables
17
- gender_mapping = {'Male': 1, 'Female': 0}
18
- hypertension_mapping = {'Yes': 1, 'No': 0}
19
- ever_married_mapping = {'Yes': 1, 'No': 0}
20
- work_type_mapping = {'Private': 2, 'Self-employed': 4, 'Govt_job': 3, 'children': 1, 'Never_worked': 0}
21
- heart_disease_mapping = {'Yes': 1, 'No': 0}
22
- smoking_status_mapping = {'formerly smoked': 3, 'smokes': 1, 'never smoked': 2, 'Unknown': 0}
23
- Residence_type_mapping = {'Urban': 1, 'Rural': 0}
24
-
25
- # Map categorical variables to their corresponding numerical values
26
- gender = gender_mapping[gender]
27
- hypertension = hypertension_mapping[hypertension]
28
- ever_married = ever_married_mapping[ever_married]
29
- work_type = work_type_mapping[work_type]
30
- heart_disease = heart_disease_mapping[heart_disease]
31
- smoking_status = smoking_status_mapping[smoking_status]
32
- Residence_type = Residence_type_mapping[Residence_type]
33
-
34
- inputs = [gender, age, hypertension, ever_married, work_type, heart_disease, avg_glucose_level, bmi, smoking_status, Residence_type]
35
- input_labels = ['gender', 'age', 'hypertension', 'ever_married', 'work_type', 'heart_disease', 'avg_glucose_level', 'bmi', 'smoking_status', 'Residence_type']
36
-
37
- # Convert the input into a pandas DataFrame
38
- input_df = pd.DataFrame([inputs], columns=input_labels)
39
-
40
- # Predict the stroke probability
41
- prediction = model.predict_proba(input_df)[0][1]
42
-
43
- # Return the prediction
44
- result = "The probability of stroke is {:.2f}%".format(prediction * 100) # to give a percentage
45
- return result
46
-
47
-
48
-
49
-
50
-
51
-
52
- input_labels = [
53
- 'gender', 'age', 'hypertension', 'ever_married', 'work_type',
54
- 'heart_disease', 'avg_glucose_level', 'bmi', 'smoking_status', 'Residence_type'
55
- ]
56
  # Create the Gradio interface
57
  iface = gr.Interface(
58
  fn=predict,
59
  inputs=[
60
- gr.components.Radio(choices=['Female', 'Male'], label="Gender"),
61
- gr.components.Slider(label="Age"),
62
- gr.components.Radio(choices=['Yes', 'No'], label="Hypertension"),
63
- gr.components.Radio(choices=['Yes', 'No'], label="Ever Married"),
64
- gr.components.Radio(choices=['Private', 'Self-employed', 'Govt_job', 'children', 'Never_worked'], label="Work Type"),
65
- gr.components.Radio(choices=['Yes', 'No'], label="Heart Disease"),
66
- gr.components.Number(label="Average Glucose Level"),
67
- gr.components.Slider(label="BMI"),
68
- gr.components.Radio(choices=['formerly smoked', 'never smoked', 'smokes', 'Unknown'], label="Smoking Status"),
69
- gr.components.Radio(choices=['Urban', 'Rural'], label="Residence Type")
70
  ],
71
  outputs='text',
72
  title='Stroke Probability Predictor',
73
  description='Predicts the probability of having a stroke based on input features.'
74
  )
75
 
76
-
77
- iface.launch()
 
1
  import gradio as gr
 
2
  import pandas as pd
 
3
  import numpy as np
4
  import pickle
5
+ from sklearn.preprocessing import StandardScaler
6
+ import sklearn
7
+ import os
8
+
9
+ print(f"Prediction environment scikit-learn version: {sklearn.__version__}")
10
+
11
+ def decode_file(file_path):
12
+ try:
13
+ with open(file_path, 'rb') as file:
14
+ obj = pickle.load(file)
15
+ return obj
16
+ except FileNotFoundError:
17
+ print(f"Error: File not found at {file_path}")
18
+ return None
19
+ except Exception as e:
20
+ print(f"Error loading pickle file: {e}")
21
+ return None
22
+
23
+ # Load the model once when starting the app
24
+ try:
25
+ model = decode_file('./model.pkl') # Updated here
26
+ if model is not None:
27
+ print("Model loaded successfully")
28
+ else:
29
+ print("Model loading failed, None value")
30
+ except Exception as e:
31
+ print(f"Error loading model: {e}")
32
+ model = None
33
+
34
+ def preprocess_input(data_dict):
35
+ """Preprocess input data to match the training format"""
36
+ # First create DataFrame with original columns
37
+ df = pd.DataFrame([data_dict])
38
+
39
+ # Store original features before transformation
40
+ original_df = df.copy()
41
+
42
+ # Numeric features
43
+ numeric_features = ['age', 'avg_glucose_level', 'bmi']
44
+
45
+ # Scale numeric features and add prefix
46
+ scaler = StandardScaler()
47
+ scaled_features = scaler.fit_transform(df[numeric_features])
48
+ for i, feat in enumerate(numeric_features):
49
+ df[f'num__{feat}'] = scaled_features[:, i]
50
+
51
+ # Create categorical features
52
+ # Gender
53
+ df['cat__gender_Male'] = (df['gender'] == 'Male').astype(float)
54
+ df['cat__gender_Other'] = 0.0
55
+
56
+ # Hypertension
57
+ df['cat__hypertension_1'] = df['hypertension'].astype(float)
58
+
59
+ # Heart Disease
60
+ df['cat__heart_disease_1'] = df['heart_disease'].astype(float)
61
+
62
+ # Ever Married
63
+ df['cat__ever_married_Yes'] = (df['ever_married'] == 'Yes').astype(float)
64
+
65
+ # Work Type
66
+ df['cat__work_type_Never_worked'] = (df['work_type'] == 'Never_worked').astype(float)
67
+ df['cat__work_type_Private'] = (df['work_type'] == 'Private').astype(float)
68
+ df['cat__work_type_Self-employed'] = (df['work_type'] == 'Self-employed').astype(float)
69
+ df['cat__work_type_children'] = (df['work_type'] == 'children').astype(float)
70
+
71
+ # Residence Type
72
+ df['cat__Residence_type_Urban'] = (df['Residence_type'] == 'Urban').astype(float)
73
+
74
+ # Smoking Status
75
+ df['cat__smoking_status_formerly smoked'] = (df['smoking_status'] == 'formerly smoked').astype(float)
76
+ df['cat__smoking_status_never smoked'] = (df['smoking_status'] == 'never smoked').astype(float)
77
+ df['cat__smoking_status_smokes'] = (df['smoking_status'] == 'smokes').astype(float)
78
+
79
+ # Select only the transformed columns in the correct order
80
+ feature_columns = [
81
+ 'num__age', 'num__avg_glucose_level', 'num__bmi',
82
+ 'cat__gender_Male', 'cat__gender_Other', 'cat__hypertension_1',
83
+ 'cat__heart_disease_1', 'cat__ever_married_Yes',
84
+ 'cat__work_type_Never_worked', 'cat__work_type_Private',
85
+ 'cat__work_type_Self-employed', 'cat__work_type_children',
86
+ 'cat__Residence_type_Urban', 'cat__smoking_status_formerly smoked',
87
+ 'cat__smoking_status_never smoked', 'cat__smoking_status_smokes'
88
+ ]
89
+
90
+ return df[feature_columns]
91
+
92
+ def predict(gender, age, hypertension, ever_married, work_type, heart_disease,
93
+ avg_glucose_level, bmi, smoking_status, Residence_type):
94
+ """Make prediction using the loaded model"""
95
+ if model is None:
96
+ return "Error: Model not loaded"
97
+
98
+ # Create input dictionary
99
+ input_data = {
100
+ 'gender': gender,
101
+ 'age': float(age), # Ensure numeric
102
+ 'hypertension': 1 if hypertension == 'Yes' else 0,
103
+ 'heart_disease': 1 if heart_disease == 'Yes' else 0,
104
+ 'ever_married': ever_married,
105
+ 'work_type': work_type,
106
+ 'Residence_type': Residence_type,
107
+ 'avg_glucose_level': float(avg_glucose_level), # Ensure numeric
108
+ 'bmi': float(bmi), # Ensure numeric
109
+ 'smoking_status': smoking_status
110
+ }
111
+
112
+ # Preprocess the input
113
+ processed_input = preprocess_input(input_data)
114
+
115
+ # Use the loaded model
116
+ try:
117
+ prediction = model.predict_proba(processed_input)[0][1]
118
+ return f"The probability of stroke is {prediction:.2%}"
119
+ except Exception as e:
120
+ return f"Error making prediction: {str(e)}, model is not valid" # updated here
121
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  # Create the Gradio interface
123
  iface = gr.Interface(
124
  fn=predict,
125
  inputs=[
126
+ gr.Radio(choices=['Female', 'Male'], label="Gender"),
127
+ gr.Slider(minimum=0, maximum=100, label="Age"),
128
+ gr.Radio(choices=['Yes', 'No'], label="Hypertension"),
129
+ gr.Radio(choices=['Yes', 'No'], label="Ever Married"),
130
+ gr.Radio(choices=['Private', 'Self-employed', 'Govt_job', 'children', 'Never_worked'], label="Work Type"),
131
+ gr.Radio(choices=['Yes', 'No'], label="Heart Disease"),
132
+ gr.Number(label="Average Glucose Level"),
133
+ gr.Slider(minimum=10, maximum=50, label="BMI"),
134
+ gr.Radio(choices=['formerly smoked', 'never smoked', 'smokes', 'Unknown'], label="Smoking Status"),
135
+ gr.Radio(choices=['Urban', 'Rural'], label="Residence Type")
136
  ],
137
  outputs='text',
138
  title='Stroke Probability Predictor',
139
  description='Predicts the probability of having a stroke based on input features.'
140
  )
141
 
142
+ if __name__ == "__main__":
143
+ iface.launch()