Spaces:
Runtime error
Runtime error
Update app.py features
Browse files
app.py
CHANGED
|
@@ -11,19 +11,45 @@ def preprocess_input(data_dict):
|
|
| 11 |
"""Preprocess input data to match the training format"""
|
| 12 |
df = pd.DataFrame([data_dict])
|
| 13 |
|
| 14 |
-
# Numeric features
|
| 15 |
numeric_features = ['age', 'avg_glucose_level', 'bmi']
|
|
|
|
|
|
|
| 16 |
|
| 17 |
# Scale numeric features
|
| 18 |
scaler = StandardScaler()
|
| 19 |
-
|
|
|
|
| 20 |
|
| 21 |
-
# Create
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
|
| 26 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
expected_columns = [
|
| 28 |
'num__age', 'num__avg_glucose_level', 'num__bmi',
|
| 29 |
'cat__gender_Male', 'cat__gender_Other', 'cat__hypertension_1',
|
|
@@ -34,10 +60,6 @@ def preprocess_input(data_dict):
|
|
| 34 |
'cat__smoking_status_never smoked', 'cat__smoking_status_smokes'
|
| 35 |
]
|
| 36 |
|
| 37 |
-
for col in expected_columns:
|
| 38 |
-
if col not in df.columns:
|
| 39 |
-
df[col] = 0
|
| 40 |
-
|
| 41 |
return df[expected_columns]
|
| 42 |
|
| 43 |
def predict(gender, age, hypertension, ever_married, work_type, heart_disease,
|
|
|
|
| 11 |
"""Preprocess input data to match the training format"""
|
| 12 |
df = pd.DataFrame([data_dict])
|
| 13 |
|
| 14 |
+
# Numeric features - add prefix 'num__'
|
| 15 |
numeric_features = ['age', 'avg_glucose_level', 'bmi']
|
| 16 |
+
for feat in numeric_features:
|
| 17 |
+
df[f'num__{feat}'] = df[feat]
|
| 18 |
|
| 19 |
# Scale numeric features
|
| 20 |
scaler = StandardScaler()
|
| 21 |
+
for feat in numeric_features:
|
| 22 |
+
df[f'num__{feat}'] = scaler.fit_transform(df[[feat]])
|
| 23 |
|
| 24 |
+
# Create categorical features with proper prefixes
|
| 25 |
+
# gender
|
| 26 |
+
df['cat__gender_Male'] = (df['gender'] == 'Male').astype(float)
|
| 27 |
+
df['cat__gender_Other'] = 0.0 # Assuming no 'Other' gender in our interface
|
| 28 |
|
| 29 |
+
# hypertension
|
| 30 |
+
df['cat__hypertension_1'] = df['hypertension'].astype(float)
|
| 31 |
+
|
| 32 |
+
# heart_disease
|
| 33 |
+
df['cat__heart_disease_1'] = df['heart_disease'].astype(float)
|
| 34 |
+
|
| 35 |
+
# ever_married
|
| 36 |
+
df['cat__ever_married_Yes'] = (df['ever_married'] == 'Yes').astype(float)
|
| 37 |
+
|
| 38 |
+
# work_type
|
| 39 |
+
df['cat__work_type_Never_worked'] = (df['work_type'] == 'Never_worked').astype(float)
|
| 40 |
+
df['cat__work_type_Private'] = (df['work_type'] == 'Private').astype(float)
|
| 41 |
+
df['cat__work_type_Self-employed'] = (df['work_type'] == 'Self-employed').astype(float)
|
| 42 |
+
df['cat__work_type_children'] = (df['work_type'] == 'children').astype(float)
|
| 43 |
+
|
| 44 |
+
# Residence_type
|
| 45 |
+
df['cat__Residence_type_Urban'] = (df['Residence_type'] == 'Urban').astype(float)
|
| 46 |
+
|
| 47 |
+
# smoking_status
|
| 48 |
+
df['cat__smoking_status_formerly smoked'] = (df['smoking_status'] == 'formerly smoked').astype(float)
|
| 49 |
+
df['cat__smoking_status_never smoked'] = (df['smoking_status'] == 'never smoked').astype(float)
|
| 50 |
+
df['cat__smoking_status_smokes'] = (df['smoking_status'] == 'smokes').astype(float)
|
| 51 |
+
|
| 52 |
+
# Select only the transformed columns in the correct order
|
| 53 |
expected_columns = [
|
| 54 |
'num__age', 'num__avg_glucose_level', 'num__bmi',
|
| 55 |
'cat__gender_Male', 'cat__gender_Other', 'cat__hypertension_1',
|
|
|
|
| 60 |
'cat__smoking_status_never smoked', 'cat__smoking_status_smokes'
|
| 61 |
]
|
| 62 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
return df[expected_columns]
|
| 64 |
|
| 65 |
def predict(gender, age, hypertension, ever_married, work_type, heart_disease,
|