chkp-talexm commited on
Commit
d84704c
Β·
1 Parent(s): ed3683e
Files changed (1) hide show
  1. app.py +40 -40
app.py CHANGED
@@ -222,53 +222,53 @@ if uploaded_file:
222
 
223
  xgb_preds = xgb.predict(input_df[xgb_training_features])
224
 
225
- # πŸ”₯ List of features RandomForest was trained with
226
- rf_training_features = [
227
- "age_level", "gender", "product", "campaign_id", "webpage_id",
228
- "product_category_1", "product_category_2", "user_group_id",
229
- "user_depth", "city_development_index", "var_1",
230
- "click_sum_age_sex_prod", "click_count_age_sex_prod",
231
- "unique_campaigns_age_sex_prod", "unique_webpages_age_sex_prod",
232
- "click_sum_city_age_prod", "click_count_city_age_prod",
233
- "unique_campaigns_city_age_prod", "unique_webpages_city_age_prod"
234
- ]
235
-
236
- # βœ… Ensure all training features exist in `input_df`
237
- for col in rf_training_features:
238
- if col not in input_df.columns:
239
- input_df[col] = 0 # Default missing columns to 0
240
-
241
- # Get intersection of trained features and current input_df columns
242
- common_features = list(set(rf.feature_names_in_) & set(input_df.columns))
243
-
244
- # Select only the matching features
245
- input_df_rf = input_df[common_features]
246
-
247
- # Predict without needing to add missing features
248
- rf_preds = rf.predict(input_df_rf)
249
-
250
-
251
- print("RF Model Trained Features:", rf.feature_names_in_)
252
- print("Input Data Features:", input_df_rf.columns.tolist())
253
-
254
- # Debugging: Check for missing or extra features
255
- missing_features = set(rf.feature_names_in_) - set(input_df_rf.columns)
256
- extra_features = set(input_df_rf.columns) - set(rf.feature_names_in_)
257
-
258
- print("Missing Features in Input:", missing_features)
259
- print("Extra Features in Input:", extra_features)
260
- # βœ… Make Predictions with RandomForest
261
- rf_preds = rf.predict(input_df_rf)
262
 
263
  catboost_probs = catboost.predict_proba(input_df)[:, 1]
264
  xgb_probs = xgb.predict_proba(input_df)[:, 1]
265
- rf_probs = rf.predict_proba(input_df)[:, 1]
266
 
267
  # Combine results
268
  predictions_df = pd.DataFrame({
269
  "CatBoost": catboost_preds,
270
  "XGBoost": xgb_preds,
271
- "RandomForest": rf_preds
272
  })
273
 
274
  # Apply "at least one model predicts 1" rule
@@ -278,7 +278,7 @@ if uploaded_file:
278
  probabilities_df = pd.DataFrame({
279
  "CatBoost_Prob": catboost_probs,
280
  "XGBoost_Prob": xgb_probs,
281
- "RandomForest_Prob": rf_probs
282
  })
283
 
284
  # Save results
 
222
 
223
  xgb_preds = xgb.predict(input_df[xgb_training_features])
224
 
225
+ # # πŸ”₯ List of features RandomForest was trained with
226
+ # rf_training_features = [
227
+ # "age_level", "gender", "product", "campaign_id", "webpage_id",
228
+ # "product_category_1", "product_category_2", "user_group_id",
229
+ # "user_depth", "city_development_index", "var_1",
230
+ # "click_sum_age_sex_prod", "click_count_age_sex_prod",
231
+ # "unique_campaigns_age_sex_prod", "unique_webpages_age_sex_prod",
232
+ # "click_sum_city_age_prod", "click_count_city_age_prod",
233
+ # "unique_campaigns_city_age_prod", "unique_webpages_city_age_prod"
234
+ # ]
235
+ #
236
+ # # βœ… Ensure all training features exist in `input_df`
237
+ # for col in rf_training_features:
238
+ # if col not in input_df.columns:
239
+ # input_df[col] = 0 # Default missing columns to 0
240
+ #
241
+ # # Get intersection of trained features and current input_df columns
242
+ # common_features = list(set(rf.feature_names_in_) & set(input_df.columns))
243
+ #
244
+ # # Select only the matching features
245
+ # input_df_rf = input_df[common_features]
246
+ #
247
+ # # Predict without needing to add missing features
248
+ # rf_preds = rf.predict(input_df_rf)
249
+ #
250
+ #
251
+ # print("RF Model Trained Features:", rf.feature_names_in_)
252
+ # print("Input Data Features:", input_df_rf.columns.tolist())
253
+ #
254
+ # # Debugging: Check for missing or extra features
255
+ # missing_features = set(rf.feature_names_in_) - set(input_df_rf.columns)
256
+ # extra_features = set(input_df_rf.columns) - set(rf.feature_names_in_)
257
+ #
258
+ # print("Missing Features in Input:", missing_features)
259
+ # print("Extra Features in Input:", extra_features)
260
+ # # βœ… Make Predictions with RandomForest
261
+ # rf_preds = rf.predict(input_df_rf)
262
 
263
  catboost_probs = catboost.predict_proba(input_df)[:, 1]
264
  xgb_probs = xgb.predict_proba(input_df)[:, 1]
265
+ #rf_probs = rf.predict_proba(input_df)[:, 1]
266
 
267
  # Combine results
268
  predictions_df = pd.DataFrame({
269
  "CatBoost": catboost_preds,
270
  "XGBoost": xgb_preds,
271
+ # "RandomForest": rf_preds
272
  })
273
 
274
  # Apply "at least one model predicts 1" rule
 
278
  probabilities_df = pd.DataFrame({
279
  "CatBoost_Prob": catboost_probs,
280
  "XGBoost_Prob": xgb_probs,
281
+ # "RandomForest_Prob": rf_probs
282
  })
283
 
284
  # Save results