catninja123 commited on
Commit
ebffd3c
·
verified ·
1 Parent(s): a90e56a

Upload train_v38_2_pro.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. train_v38_2_pro.py +5 -4
train_v38_2_pro.py CHANGED
@@ -1,5 +1,6 @@
1
  """
2
- V38.2-PRO MODEL - Optimized Residualization + Feature Selection
 
3
  ====================================================================
4
  Improvements over V38.2-resid:
5
  1. Two-stage training: Stage 1 = feature importance, Stage 2 = selected features only
@@ -386,7 +387,7 @@ def add_residualized_features(df, train_mask, cat_cols, selected_features=None):
386
 
387
  all_feat = list(set(num_cols + cat_cols))
388
  feature_cols = list(dict.fromkeys([c for c in all_feat if c in df.columns]))
389
- for remove in [TARGET, 'student_id', 'year', 'sid_str', 'Unnamed: 0']:
390
  if remove in feature_cols:
391
  feature_cols.remove(remove)
392
 
@@ -833,7 +834,7 @@ results = {
833
  'feature_importance': [[f, float(i)] for f, i in fi_pairs[:50]] if feature_cols_final and all_fi else [],
834
  }
835
 
836
- with open(os.path.join(OUTPUT_DIR, 'v38_2_pro_results.json'), 'w') as f:
837
  json.dump(results, f, indent=2)
838
 
839
  oof_df = df_base[['student_id', 'school', 'year', TARGET]].copy()
@@ -844,7 +845,7 @@ oof_df['final_pred'] = final_blend_prob
844
  oof_df.to_csv(os.path.join(OUTPUT_DIR, 'v38_2_pro_oof_predictions.csv'), index=False)
845
 
846
  print(f"\n{'='*70}")
847
- print(f" V38.2-PRO MODEL COMPLETE")
848
  print(f" Total time: {elapsed/60:.1f} minutes")
849
  print(f" Features: {len(feature_cols_final) if feature_cols_final else 'N/A'} (selected from {len(all_feature_names)})")
850
  print(f" GroupKFold AUC: {final_auc:.4f} (V37.3: 0.8697, V38.2-resid: 0.8682)")
 
1
  """
2
+ V38.2-PRO-ABLATION: No portfolio_size
3
+ Original: V38.2-PRO MODEL - Optimized Residualization + Feature Selection
4
  ====================================================================
5
  Improvements over V38.2-resid:
6
  1. Two-stage training: Stage 1 = feature importance, Stage 2 = selected features only
 
387
 
388
  all_feat = list(set(num_cols + cat_cols))
389
  feature_cols = list(dict.fromkeys([c for c in all_feat if c in df.columns]))
390
+ for remove in [TARGET, 'student_id', 'year', 'sid_str', 'Unnamed: 0', 'portfolio_size']:
391
  if remove in feature_cols:
392
  feature_cols.remove(remove)
393
 
 
834
  'feature_importance': [[f, float(i)] for f, i in fi_pairs[:50]] if feature_cols_final and all_fi else [],
835
  }
836
 
837
+ with open(os.path.join(OUTPUT_DIR, 'v38_2_pro_ablation_results.json'), 'w') as f:
838
  json.dump(results, f, indent=2)
839
 
840
  oof_df = df_base[['student_id', 'school', 'year', TARGET]].copy()
 
845
  oof_df.to_csv(os.path.join(OUTPUT_DIR, 'v38_2_pro_oof_predictions.csv'), index=False)
846
 
847
  print(f"\n{'='*70}")
848
+ print(f" V38.2-PRO ABLATION (no portfolio_size) COMPLETE")
849
  print(f" Total time: {elapsed/60:.1f} minutes")
850
  print(f" Features: {len(feature_cols_final) if feature_cols_final else 'N/A'} (selected from {len(all_feature_names)})")
851
  print(f" GroupKFold AUC: {final_auc:.4f} (V37.3: 0.8697, V38.2-resid: 0.8682)")