ranimeree commited on
Commit
9b1573b
·
verified ·
1 Parent(s): 90a3dee

Update app.py features

Browse files
Files changed (1) hide show
  1. app.py +33 -11
app.py CHANGED
@@ -11,19 +11,45 @@ def preprocess_input(data_dict):
11
  """Preprocess input data to match the training format"""
12
  df = pd.DataFrame([data_dict])
13
 
14
- # Numeric features
15
  numeric_features = ['age', 'avg_glucose_level', 'bmi']
 
 
16
 
17
  # Scale numeric features
18
  scaler = StandardScaler()
19
- df[numeric_features] = scaler.fit_transform(df[numeric_features])
 
20
 
21
- # Create dummy variables for categorical features
22
- df = pd.get_dummies(df, columns=['gender', 'hypertension', 'heart_disease',
23
- 'ever_married', 'work_type', 'Residence_type',
24
- 'smoking_status'])
25
 
26
- # Ensure all expected columns are present
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  expected_columns = [
28
  'num__age', 'num__avg_glucose_level', 'num__bmi',
29
  'cat__gender_Male', 'cat__gender_Other', 'cat__hypertension_1',
@@ -34,10 +60,6 @@ def preprocess_input(data_dict):
34
  'cat__smoking_status_never smoked', 'cat__smoking_status_smokes'
35
  ]
36
 
37
- for col in expected_columns:
38
- if col not in df.columns:
39
- df[col] = 0
40
-
41
  return df[expected_columns]
42
 
43
  def predict(gender, age, hypertension, ever_married, work_type, heart_disease,
 
11
  """Preprocess input data to match the training format"""
12
  df = pd.DataFrame([data_dict])
13
 
14
+ # Numeric features - add prefix 'num__'
15
  numeric_features = ['age', 'avg_glucose_level', 'bmi']
16
+ for feat in numeric_features:
17
+ df[f'num__{feat}'] = df[feat]
18
 
19
  # Scale numeric features
20
  scaler = StandardScaler()
21
+ for feat in numeric_features:
22
+ df[f'num__{feat}'] = scaler.fit_transform(df[[feat]])
23
 
24
+ # Create categorical features with proper prefixes
25
+ # gender
26
+ df['cat__gender_Male'] = (df['gender'] == 'Male').astype(float)
27
+ df['cat__gender_Other'] = 0.0 # Assuming no 'Other' gender in our interface
28
 
29
+ # hypertension
30
+ df['cat__hypertension_1'] = df['hypertension'].astype(float)
31
+
32
+ # heart_disease
33
+ df['cat__heart_disease_1'] = df['heart_disease'].astype(float)
34
+
35
+ # ever_married
36
+ df['cat__ever_married_Yes'] = (df['ever_married'] == 'Yes').astype(float)
37
+
38
+ # work_type
39
+ df['cat__work_type_Never_worked'] = (df['work_type'] == 'Never_worked').astype(float)
40
+ df['cat__work_type_Private'] = (df['work_type'] == 'Private').astype(float)
41
+ df['cat__work_type_Self-employed'] = (df['work_type'] == 'Self-employed').astype(float)
42
+ df['cat__work_type_children'] = (df['work_type'] == 'children').astype(float)
43
+
44
+ # Residence_type
45
+ df['cat__Residence_type_Urban'] = (df['Residence_type'] == 'Urban').astype(float)
46
+
47
+ # smoking_status
48
+ df['cat__smoking_status_formerly smoked'] = (df['smoking_status'] == 'formerly smoked').astype(float)
49
+ df['cat__smoking_status_never smoked'] = (df['smoking_status'] == 'never smoked').astype(float)
50
+ df['cat__smoking_status_smokes'] = (df['smoking_status'] == 'smokes').astype(float)
51
+
52
+ # Select only the transformed columns in the correct order
53
  expected_columns = [
54
  'num__age', 'num__avg_glucose_level', 'num__bmi',
55
  'cat__gender_Male', 'cat__gender_Other', 'cat__hypertension_1',
 
60
  'cat__smoking_status_never smoked', 'cat__smoking_status_smokes'
61
  ]
62
 
 
 
 
 
63
  return df[expected_columns]
64
 
65
  def predict(gender, age, hypertension, ever_married, work_type, heart_disease,