chkp-talexm commited on
Commit Β·
d84704c
1
Parent(s): ed3683e
update
Browse files
app.py
CHANGED
|
@@ -222,53 +222,53 @@ if uploaded_file:
|
|
| 222 |
|
| 223 |
xgb_preds = xgb.predict(input_df[xgb_training_features])
|
| 224 |
|
| 225 |
-
# π₯ List of features RandomForest was trained with
|
| 226 |
-
rf_training_features = [
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
]
|
| 235 |
-
|
| 236 |
-
# β
Ensure all training features exist in `input_df`
|
| 237 |
-
for col in rf_training_features:
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
# Get intersection of trained features and current input_df columns
|
| 242 |
-
common_features = list(set(rf.feature_names_in_) & set(input_df.columns))
|
| 243 |
-
|
| 244 |
-
# Select only the matching features
|
| 245 |
-
input_df_rf = input_df[common_features]
|
| 246 |
-
|
| 247 |
-
# Predict without needing to add missing features
|
| 248 |
-
rf_preds = rf.predict(input_df_rf)
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
print("RF Model Trained Features:", rf.feature_names_in_)
|
| 252 |
-
print("Input Data Features:", input_df_rf.columns.tolist())
|
| 253 |
-
|
| 254 |
-
# Debugging: Check for missing or extra features
|
| 255 |
-
missing_features = set(rf.feature_names_in_) - set(input_df_rf.columns)
|
| 256 |
-
extra_features = set(input_df_rf.columns) - set(rf.feature_names_in_)
|
| 257 |
-
|
| 258 |
-
print("Missing Features in Input:", missing_features)
|
| 259 |
-
print("Extra Features in Input:", extra_features)
|
| 260 |
-
# β
Make Predictions with RandomForest
|
| 261 |
-
rf_preds = rf.predict(input_df_rf)
|
| 262 |
|
| 263 |
catboost_probs = catboost.predict_proba(input_df)[:, 1]
|
| 264 |
xgb_probs = xgb.predict_proba(input_df)[:, 1]
|
| 265 |
-
rf_probs = rf.predict_proba(input_df)[:, 1]
|
| 266 |
|
| 267 |
# Combine results
|
| 268 |
predictions_df = pd.DataFrame({
|
| 269 |
"CatBoost": catboost_preds,
|
| 270 |
"XGBoost": xgb_preds,
|
| 271 |
-
|
| 272 |
})
|
| 273 |
|
| 274 |
# Apply "at least one model predicts 1" rule
|
|
@@ -278,7 +278,7 @@ if uploaded_file:
|
|
| 278 |
probabilities_df = pd.DataFrame({
|
| 279 |
"CatBoost_Prob": catboost_probs,
|
| 280 |
"XGBoost_Prob": xgb_probs,
|
| 281 |
-
|
| 282 |
})
|
| 283 |
|
| 284 |
# Save results
|
|
|
|
| 222 |
|
| 223 |
xgb_preds = xgb.predict(input_df[xgb_training_features])
|
| 224 |
|
| 225 |
+
# # π₯ List of features RandomForest was trained with
|
| 226 |
+
# rf_training_features = [
|
| 227 |
+
# "age_level", "gender", "product", "campaign_id", "webpage_id",
|
| 228 |
+
# "product_category_1", "product_category_2", "user_group_id",
|
| 229 |
+
# "user_depth", "city_development_index", "var_1",
|
| 230 |
+
# "click_sum_age_sex_prod", "click_count_age_sex_prod",
|
| 231 |
+
# "unique_campaigns_age_sex_prod", "unique_webpages_age_sex_prod",
|
| 232 |
+
# "click_sum_city_age_prod", "click_count_city_age_prod",
|
| 233 |
+
# "unique_campaigns_city_age_prod", "unique_webpages_city_age_prod"
|
| 234 |
+
# ]
|
| 235 |
+
#
|
| 236 |
+
# # β
Ensure all training features exist in `input_df`
|
| 237 |
+
# for col in rf_training_features:
|
| 238 |
+
# if col not in input_df.columns:
|
| 239 |
+
# input_df[col] = 0 # Default missing columns to 0
|
| 240 |
+
#
|
| 241 |
+
# # Get intersection of trained features and current input_df columns
|
| 242 |
+
# common_features = list(set(rf.feature_names_in_) & set(input_df.columns))
|
| 243 |
+
#
|
| 244 |
+
# # Select only the matching features
|
| 245 |
+
# input_df_rf = input_df[common_features]
|
| 246 |
+
#
|
| 247 |
+
# # Predict without needing to add missing features
|
| 248 |
+
# rf_preds = rf.predict(input_df_rf)
|
| 249 |
+
#
|
| 250 |
+
#
|
| 251 |
+
# print("RF Model Trained Features:", rf.feature_names_in_)
|
| 252 |
+
# print("Input Data Features:", input_df_rf.columns.tolist())
|
| 253 |
+
#
|
| 254 |
+
# # Debugging: Check for missing or extra features
|
| 255 |
+
# missing_features = set(rf.feature_names_in_) - set(input_df_rf.columns)
|
| 256 |
+
# extra_features = set(input_df_rf.columns) - set(rf.feature_names_in_)
|
| 257 |
+
#
|
| 258 |
+
# print("Missing Features in Input:", missing_features)
|
| 259 |
+
# print("Extra Features in Input:", extra_features)
|
| 260 |
+
# # β
Make Predictions with RandomForest
|
| 261 |
+
# rf_preds = rf.predict(input_df_rf)
|
| 262 |
|
| 263 |
catboost_probs = catboost.predict_proba(input_df)[:, 1]
|
| 264 |
xgb_probs = xgb.predict_proba(input_df)[:, 1]
|
| 265 |
+
#rf_probs = rf.predict_proba(input_df)[:, 1]
|
| 266 |
|
| 267 |
# Combine results
|
| 268 |
predictions_df = pd.DataFrame({
|
| 269 |
"CatBoost": catboost_preds,
|
| 270 |
"XGBoost": xgb_preds,
|
| 271 |
+
# "RandomForest": rf_preds
|
| 272 |
})
|
| 273 |
|
| 274 |
# Apply "at least one model predicts 1" rule
|
|
|
|
| 278 |
probabilities_df = pd.DataFrame({
|
| 279 |
"CatBoost_Prob": catboost_probs,
|
| 280 |
"XGBoost_Prob": xgb_probs,
|
| 281 |
+
# "RandomForest_Prob": rf_probs
|
| 282 |
})
|
| 283 |
|
| 284 |
# Save results
|