ranimeree commited on
Commit
f7d6afd
·
verified ·
1 Parent(s): 964b462

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -125
app.py CHANGED
@@ -2,141 +2,75 @@ import gradio as gr
2
  import pandas as pd
3
  import numpy as np
4
  import pickle
5
- from sklearn.preprocessing import StandardScaler
6
- import sklearn
7
- import os
8
-
9
- print(f"Prediction environment scikit-learn version: {sklearn.__version__}")
10
 
11
  def decode_file(file_path):
12
- try:
13
- with open(file_path, 'rb') as file:
14
- return pickle.load(file, mmap=True) # modified here
15
- except FileNotFoundError:
16
- print(f"Error: File not found at {file_path}")
17
- return None
18
- except Exception as e:
19
- print(f"Error loading pickle file: {e}")
20
- return None
21
-
22
- # Load the model once when starting the app
23
- try:
24
- model = decode_file('model.pkl')
25
- if model is not None:
26
- print("Model loaded successfully")
27
- else:
28
- print("Model loading failed, None value")
29
- except Exception as e:
30
- print(f"Error loading model: {e}")
31
- model = None
32
-
33
- def preprocess_input(data_dict):
34
- """Preprocess input data to match the training format"""
35
- # First create DataFrame with original columns
36
- df = pd.DataFrame([data_dict])
37
-
38
- # Store original features before transformation
39
- original_df = df.copy()
40
-
41
- # Numeric features
42
- numeric_features = ['age', 'avg_glucose_level', 'bmi']
43
-
44
- # Scale numeric features and add prefix
45
- scaler = StandardScaler()
46
- scaled_features = scaler.fit_transform(df[numeric_features])
47
- for i, feat in enumerate(numeric_features):
48
- df[f'num__{feat}'] = scaled_features[:, i]
49
-
50
- # Create categorical features
51
- # Gender
52
- df['cat__gender_Male'] = (df['gender'] == 'Male').astype(float)
53
- df['cat__gender_Other'] = 0.0
54
-
55
- # Hypertension
56
- df['cat__hypertension_1'] = df['hypertension'].astype(float)
57
-
58
- # Heart Disease
59
- df['cat__heart_disease_1'] = df['heart_disease'].astype(float)
60
-
61
- # Ever Married
62
- df['cat__ever_married_Yes'] = (df['ever_married'] == 'Yes').astype(float)
63
-
64
- # Work Type
65
- df['cat__work_type_Never_worked'] = (df['work_type'] == 'Never_worked').astype(float)
66
- df['cat__work_type_Private'] = (df['work_type'] == 'Private').astype(float)
67
- df['cat__work_type_Self-employed'] = (df['work_type'] == 'Self-employed').astype(float)
68
- df['cat__work_type_children'] = (df['work_type'] == 'children').astype(float)
69
-
70
- # Residence Type
71
- df['cat__Residence_type_Urban'] = (df['Residence_type'] == 'Urban').astype(float)
72
-
73
- # Smoking Status
74
- df['cat__smoking_status_formerly smoked'] = (df['smoking_status'] == 'formerly smoked').astype(float)
75
- df['cat__smoking_status_never smoked'] = (df['smoking_status'] == 'never smoked').astype(float)
76
- df['cat__smoking_status_smokes'] = (df['smoking_status'] == 'smokes').astype(float)
77
-
78
- # Select only the transformed columns in the correct order
79
- feature_columns = [
80
- 'num__age', 'num__avg_glucose_level', 'num__bmi',
81
- 'cat__gender_Male', 'cat__gender_Other', 'cat__hypertension_1',
82
- 'cat__heart_disease_1', 'cat__ever_married_Yes',
83
- 'cat__work_type_Never_worked', 'cat__work_type_Private',
84
- 'cat__work_type_Self-employed', 'cat__work_type_children',
85
- 'cat__Residence_type_Urban', 'cat__smoking_status_formerly smoked',
86
- 'cat__smoking_status_never smoked', 'cat__smoking_status_smokes'
87
- ]
88
-
89
- return df[feature_columns]
90
-
91
- def predict(gender, age, hypertension, ever_married, work_type, heart_disease,
92
- avg_glucose_level, bmi, smoking_status, Residence_type):
93
- """Make prediction using the loaded model"""
94
- if model is None:
95
- return "Error: Model not loaded"
96
-
97
- # Create input dictionary
98
- input_data = {
99
- 'gender': gender,
100
- 'age': float(age), # Ensure numeric
101
- 'hypertension': 1 if hypertension == 'Yes' else 0,
102
- 'heart_disease': 1 if heart_disease == 'Yes' else 0,
103
- 'ever_married': ever_married,
104
- 'work_type': work_type,
105
- 'Residence_type': Residence_type,
106
- 'avg_glucose_level': float(avg_glucose_level), # Ensure numeric
107
- 'bmi': float(bmi), # Ensure numeric
108
- 'smoking_status': smoking_status
109
- }
110
-
111
- # Preprocess the input
112
- processed_input = preprocess_input(input_data)
113
-
114
- # Use the loaded model
115
- try:
116
- prediction = model.predict_proba(processed_input)[0][1]
117
- return f"The probability of stroke is {prediction:.2%}"
118
- except Exception as e:
119
- return f"Error making prediction: {str(e)}, model is not valid"
120
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  # Create the Gradio interface
122
  iface = gr.Interface(
123
  fn=predict,
124
  inputs=[
125
- gr.Radio(choices=['Female', 'Male'], label="Gender"),
126
- gr.Slider(minimum=0, maximum=100, label="Age"),
127
- gr.Radio(choices=['Yes', 'No'], label="Hypertension"),
128
- gr.Radio(choices=['Yes', 'No'], label="Ever Married"),
129
- gr.Radio(choices=['Private', 'Self-employed', 'Govt_job', 'children', 'Never_worked'], label="Work Type"),
130
- gr.Radio(choices=['Yes', 'No'], label="Heart Disease"),
131
- gr.Number(label="Average Glucose Level"),
132
- gr.Slider(minimum=10, maximum=50, label="BMI"),
133
- gr.Radio(choices=['formerly smoked', 'never smoked', 'smokes', 'Unknown'], label="Smoking Status"),
134
- gr.Radio(choices=['Urban', 'Rural'], label="Residence Type")
135
  ],
136
  outputs='text',
137
  title='Stroke Probability Predictor',
138
  description='Predicts the probability of having a stroke based on input features.'
139
  )
140
 
141
- if __name__ == "__main__":
142
- iface.launch()
 
2
  import pandas as pd
3
  import numpy as np
4
  import pickle
 
 
 
 
 
5
 
6
  def decode_file(file_path):
7
+ with open(file_path, 'rb') as file:
8
+ obj = pickle.load(file)
9
+ return obj
10
+
11
+ model = decode_file('model.pkl')
12
+
13
+
14
+ def predict(gender, age, hypertension, ever_married, work_type, heart_disease, avg_glucose_level, bmi, smoking_status, Residence_type):
15
+ # Mapping for categorical variables
16
+ gender_mapping = {'Male': 1, 'Female': 0}
17
+ hypertension_mapping = {'Yes': 1, 'No': 0}
18
+ ever_married_mapping = {'Yes': 1, 'No': 0}
19
+ work_type_mapping = {'Private': 2, 'Self-employed': 4, 'Govt_job': 3, 'children': 1, 'Never_worked': 0}
20
+ heart_disease_mapping = {'Yes': 1, 'No': 0}
21
+ smoking_status_mapping = {'formerly smoked': 3, 'smokes': 1, 'never smoked': 2, 'Unknown': 0}
22
+ Residence_type_mapping = {'Urban': 1, 'Rural': 0}
23
+
24
+ # Map categorical variables to their corresponding numerical values
25
+ gender = gender_mapping[gender]
26
+ hypertension = hypertension_mapping[hypertension]
27
+ ever_married = ever_married_mapping[ever_married]
28
+ work_type = work_type_mapping[work_type]
29
+ heart_disease = heart_disease_mapping[heart_disease]
30
+ smoking_status = smoking_status_mapping[smoking_status]
31
+ Residence_type = Residence_type_mapping[Residence_type]
32
+
33
+ inputs = [gender, age, hypertension, ever_married, work_type, heart_disease, avg_glucose_level, bmi, smoking_status, Residence_type]
34
+ input_labels = ['gender', 'age', 'hypertension', 'ever_married', 'work_type', 'heart_disease', 'avg_glucose_level', 'bmi', 'smoking_status', 'Residence_type']
35
+
36
+ # Convert the input into a pandas DataFrame
37
+ input_df = pd.DataFrame([inputs], columns=input_labels)
38
+
39
+ # Predict the stroke probability
40
+ prediction = model.predict_proba(input_df)[0][1]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
+ # Return the prediction
43
+ result = "The probability of stroke is {:.2f}%".format(prediction * 100) # to give a percentage
44
+ return result
45
+
46
+
47
+
48
+
49
+
50
+
51
+ input_labels = [
52
+ 'gender', 'age', 'hypertension', 'ever_married', 'work_type',
53
+ 'heart_disease', 'avg_glucose_level', 'bmi', 'smoking_status', 'Residence_type'
54
+ ]
55
  # Create the Gradio interface
56
  iface = gr.Interface(
57
  fn=predict,
58
  inputs=[
59
+ gr.components.Radio(choices=['Female', 'Male'], label="Gender"),
60
+ gr.components.Slider(label="Age"),
61
+ gr.components.Radio(choices=['Yes', 'No'], label="Hypertension"),
62
+ gr.components.Radio(choices=['Yes', 'No'], label="Ever Married"),
63
+ gr.components.Radio(choices=['Private', 'Self-employed', 'Govt_job', 'children', 'Never_worked'], label="Work Type"),
64
+ gr.components.Radio(choices=['Yes', 'No'], label="Heart Disease"),
65
+ gr.components.Number(label="Average Glucose Level"),
66
+ gr.components.Slider(label="BMI"),
67
+ gr.components.Radio(choices=['formerly smoked', 'never smoked', 'smokes', 'Unknown'], label="Smoking Status"),
68
+ gr.components.Radio(choices=['Urban', 'Rural'], label="Residence Type")
69
  ],
70
  outputs='text',
71
  title='Stroke Probability Predictor',
72
  description='Predicts the probability of having a stroke based on input features.'
73
  )
74
 
75
+
76
+ iface.launch()