Abdourakib commited on
Commit
f480d0c
·
1 Parent(s): 4970d33

fix: healthcare as default demo, stronger synthetic signal, regenerate demos

Browse files
app.py CHANGED
@@ -1992,7 +1992,9 @@ with st.sidebar:
1992
  st.warning("datasets/titanic_demo_synth.csv or datasets/titanic.csv not found.")
1993
  if st.button("Healthcare", use_container_width=True):
1994
  st.session_state["demo_dataset"] = "healthcare"
1995
- p = Path("datasets/sample_healthcare_classification.csv")
 
 
1996
  if p.exists():
1997
  st.session_state.df = pd.read_csv(p)
1998
  st.session_state.filename = p.name
 
1992
  st.warning("datasets/titanic_demo_synth.csv or datasets/titanic.csv not found.")
1993
  if st.button("Healthcare", use_container_width=True):
1994
  st.session_state["demo_dataset"] = "healthcare"
1995
+ p = Path("datasets/healthcare_demo_synth.csv")
1996
+ if not p.exists():
1997
+ p = Path("datasets/sample_healthcare_classification.csv")
1998
  if p.exists():
1999
  st.session_state.df = pd.read_csv(p)
2000
  st.session_state.filename = p.name
demo_result.json CHANGED
The diff for this file is too large to render. See raw diff
 
demo_result_diabetes.json CHANGED
@@ -322,7 +322,7 @@
322
  "accuracy": 0.7415730337078652,
323
  "f1": 0.7409192020410919,
324
  "roc_auc": 0.8262626262626263,
325
- "train_time_s": 0.006,
326
  "train_score": 0.8554185927067283,
327
  "test_score": 0.8262626262626263,
328
  "generalization_gap": 0.02915596644410201,
@@ -353,7 +353,7 @@
353
  "CV Train Mean": 0.9716,
354
  "CV Overfit": "Yes",
355
  "Overfit": "No",
356
- "Train Time(s)": 0.12
357
  }
358
  ],
359
  "feature_importances": {
@@ -374,7 +374,7 @@
374
  " Training Logistic Regression...",
375
  " Logistic Regression: acc=0.742, f1=0.741, auc=0.826 [0.01s]",
376
  " Training Random Forest...",
377
- " Random Forest: acc=0.719, f1=0.717, auc=0.816 [0.12s]",
378
  "\nBest model: Logistic Regression (roc_auc=0.8263)",
379
  "Overfitting warnings: Random Forest shows consistent overfitting across CV folds \u2014 CV train mean 0.9716 vs CV test mean 0.8072",
380
  "5-fold cross-validation results: best model Logistic Regression achieved CV mean 0.8329 \u00b1 0.0368 vs single test score 0.8263"
@@ -391,7 +391,7 @@
391
  "accuracy": 0.7415730337078652,
392
  "f1": 0.7409192020410919,
393
  "roc_auc": 0.8262626262626263,
394
- "train_time_s": 0.006,
395
  "train_score": 0.8554185927067283,
396
  "test_score": 0.8262626262626263,
397
  "generalization_gap": 0.02915596644410201,
@@ -425,7 +425,7 @@
425
  "accuracy": 0.7191011235955056,
426
  "f1": 0.716527021635327,
427
  "roc_auc": 0.8161616161616162,
428
- "train_time_s": 0.123,
429
  "train_score": 0.9577555213148433,
430
  "test_score": 0.8161616161616162,
431
  "generalization_gap": 0.14159390515322712,
@@ -576,7 +576,7 @@
576
  "accuracy": 0.7415730337078652,
577
  "f1": 0.7409192020410919,
578
  "roc_auc": 0.8262626262626263,
579
- "train_time_s": 0.006,
580
  "train_score": 0.8554185927067283,
581
  "test_score": 0.8262626262626263,
582
  "generalization_gap": 0.02915596644410201,
@@ -605,7 +605,7 @@
605
  "CV Train Mean": 0.9716,
606
  "CV Overfit": "Yes",
607
  "Overfit": "No",
608
- "Train Time(s)": 0.12
609
  }
610
  ],
611
  "feature_importances": {
@@ -1183,7 +1183,7 @@
1183
  "accuracy": 0.7415730337078652,
1184
  "f1": 0.7409192020410919,
1185
  "roc_auc": 0.8262626262626263,
1186
- "train_time_s": 0.006,
1187
  "train_score": 0.8554185927067283,
1188
  "test_score": 0.8262626262626263,
1189
  "generalization_gap": 0.02915596644410201,
@@ -1214,7 +1214,7 @@
1214
  "CV Train Mean": 0.9716,
1215
  "CV Overfit": "Yes",
1216
  "Overfit": "No",
1217
- "Train Time(s)": 0.12
1218
  }
1219
  ],
1220
  "feature_importances": {
@@ -1235,7 +1235,7 @@
1235
  " Training Logistic Regression...",
1236
  " Logistic Regression: acc=0.742, f1=0.741, auc=0.826 [0.01s]",
1237
  " Training Random Forest...",
1238
- " Random Forest: acc=0.719, f1=0.717, auc=0.816 [0.12s]",
1239
  "\nBest model: Logistic Regression (roc_auc=0.8263)",
1240
  "Overfitting warnings: Random Forest shows consistent overfitting across CV folds \u2014 CV train mean 0.9716 vs CV test mean 0.8072",
1241
  "5-fold cross-validation results: best model Logistic Regression achieved CV mean 0.8329 \u00b1 0.0368 vs single test score 0.8263"
@@ -1252,7 +1252,7 @@
1252
  "accuracy": 0.7415730337078652,
1253
  "f1": 0.7409192020410919,
1254
  "roc_auc": 0.8262626262626263,
1255
- "train_time_s": 0.006,
1256
  "train_score": 0.8554185927067283,
1257
  "test_score": 0.8262626262626263,
1258
  "generalization_gap": 0.02915596644410201,
@@ -1286,7 +1286,7 @@
1286
  "accuracy": 0.7191011235955056,
1287
  "f1": 0.716527021635327,
1288
  "roc_auc": 0.8161616161616162,
1289
- "train_time_s": 0.123,
1290
  "train_score": 0.9577555213148433,
1291
  "test_score": 0.8161616161616162,
1292
  "generalization_gap": 0.14159390515322712,
@@ -1781,7 +1781,7 @@
1781
  "accuracy": 0.7415730337078652,
1782
  "f1": 0.7409192020410919,
1783
  "roc_auc": 0.8262626262626263,
1784
- "train_time_s": 0.006,
1785
  "train_score": 0.8554185927067283,
1786
  "test_score": 0.8262626262626263,
1787
  "generalization_gap": 0.02915596644410201,
@@ -1812,7 +1812,7 @@
1812
  "CV Train Mean": 0.9716,
1813
  "CV Overfit": "Yes",
1814
  "Overfit": "No",
1815
- "Train Time(s)": 0.12
1816
  }
1817
  ],
1818
  "feature_importances": {
@@ -1833,7 +1833,7 @@
1833
  " Training Logistic Regression...",
1834
  " Logistic Regression: acc=0.742, f1=0.741, auc=0.826 [0.01s]",
1835
  " Training Random Forest...",
1836
- " Random Forest: acc=0.719, f1=0.717, auc=0.816 [0.12s]",
1837
  "\nBest model: Logistic Regression (roc_auc=0.8263)",
1838
  "Overfitting warnings: Random Forest shows consistent overfitting across CV folds \u2014 CV train mean 0.9716 vs CV test mean 0.8072",
1839
  "5-fold cross-validation results: best model Logistic Regression achieved CV mean 0.8329 \u00b1 0.0368 vs single test score 0.8263"
@@ -1850,7 +1850,7 @@
1850
  "accuracy": 0.7415730337078652,
1851
  "f1": 0.7409192020410919,
1852
  "roc_auc": 0.8262626262626263,
1853
- "train_time_s": 0.006,
1854
  "train_score": 0.8554185927067283,
1855
  "test_score": 0.8262626262626263,
1856
  "generalization_gap": 0.02915596644410201,
@@ -1884,7 +1884,7 @@
1884
  "accuracy": 0.7191011235955056,
1885
  "f1": 0.716527021635327,
1886
  "roc_auc": 0.8161616161616162,
1887
- "train_time_s": 0.123,
1888
  "train_score": 0.9577555213148433,
1889
  "test_score": 0.8161616161616162,
1890
  "generalization_gap": 0.14159390515322712,
@@ -2035,7 +2035,7 @@
2035
  "accuracy": 0.7415730337078652,
2036
  "f1": 0.7409192020410919,
2037
  "roc_auc": 0.8262626262626263,
2038
- "train_time_s": 0.006,
2039
  "train_score": 0.8554185927067283,
2040
  "test_score": 0.8262626262626263,
2041
  "generalization_gap": 0.02915596644410201,
@@ -2064,7 +2064,7 @@
2064
  "CV Train Mean": 0.9716,
2065
  "CV Overfit": "Yes",
2066
  "Overfit": "No",
2067
- "Train Time(s)": 0.12
2068
  }
2069
  ],
2070
  "feature_importances": {
 
322
  "accuracy": 0.7415730337078652,
323
  "f1": 0.7409192020410919,
324
  "roc_auc": 0.8262626262626263,
325
+ "train_time_s": 0.012,
326
  "train_score": 0.8554185927067283,
327
  "test_score": 0.8262626262626263,
328
  "generalization_gap": 0.02915596644410201,
 
353
  "CV Train Mean": 0.9716,
354
  "CV Overfit": "Yes",
355
  "Overfit": "No",
356
+ "Train Time(s)": 0.34
357
  }
358
  ],
359
  "feature_importances": {
 
374
  " Training Logistic Regression...",
375
  " Logistic Regression: acc=0.742, f1=0.741, auc=0.826 [0.01s]",
376
  " Training Random Forest...",
377
+ " Random Forest: acc=0.719, f1=0.717, auc=0.816 [0.34s]",
378
  "\nBest model: Logistic Regression (roc_auc=0.8263)",
379
  "Overfitting warnings: Random Forest shows consistent overfitting across CV folds \u2014 CV train mean 0.9716 vs CV test mean 0.8072",
380
  "5-fold cross-validation results: best model Logistic Regression achieved CV mean 0.8329 \u00b1 0.0368 vs single test score 0.8263"
 
391
  "accuracy": 0.7415730337078652,
392
  "f1": 0.7409192020410919,
393
  "roc_auc": 0.8262626262626263,
394
+ "train_time_s": 0.012,
395
  "train_score": 0.8554185927067283,
396
  "test_score": 0.8262626262626263,
397
  "generalization_gap": 0.02915596644410201,
 
425
  "accuracy": 0.7191011235955056,
426
  "f1": 0.716527021635327,
427
  "roc_auc": 0.8161616161616162,
428
+ "train_time_s": 0.335,
429
  "train_score": 0.9577555213148433,
430
  "test_score": 0.8161616161616162,
431
  "generalization_gap": 0.14159390515322712,
 
576
  "accuracy": 0.7415730337078652,
577
  "f1": 0.7409192020410919,
578
  "roc_auc": 0.8262626262626263,
579
+ "train_time_s": 0.012,
580
  "train_score": 0.8554185927067283,
581
  "test_score": 0.8262626262626263,
582
  "generalization_gap": 0.02915596644410201,
 
605
  "CV Train Mean": 0.9716,
606
  "CV Overfit": "Yes",
607
  "Overfit": "No",
608
+ "Train Time(s)": 0.34
609
  }
610
  ],
611
  "feature_importances": {
 
1183
  "accuracy": 0.7415730337078652,
1184
  "f1": 0.7409192020410919,
1185
  "roc_auc": 0.8262626262626263,
1186
+ "train_time_s": 0.012,
1187
  "train_score": 0.8554185927067283,
1188
  "test_score": 0.8262626262626263,
1189
  "generalization_gap": 0.02915596644410201,
 
1214
  "CV Train Mean": 0.9716,
1215
  "CV Overfit": "Yes",
1216
  "Overfit": "No",
1217
+ "Train Time(s)": 0.34
1218
  }
1219
  ],
1220
  "feature_importances": {
 
1235
  " Training Logistic Regression...",
1236
  " Logistic Regression: acc=0.742, f1=0.741, auc=0.826 [0.01s]",
1237
  " Training Random Forest...",
1238
+ " Random Forest: acc=0.719, f1=0.717, auc=0.816 [0.34s]",
1239
  "\nBest model: Logistic Regression (roc_auc=0.8263)",
1240
  "Overfitting warnings: Random Forest shows consistent overfitting across CV folds \u2014 CV train mean 0.9716 vs CV test mean 0.8072",
1241
  "5-fold cross-validation results: best model Logistic Regression achieved CV mean 0.8329 \u00b1 0.0368 vs single test score 0.8263"
 
1252
  "accuracy": 0.7415730337078652,
1253
  "f1": 0.7409192020410919,
1254
  "roc_auc": 0.8262626262626263,
1255
+ "train_time_s": 0.012,
1256
  "train_score": 0.8554185927067283,
1257
  "test_score": 0.8262626262626263,
1258
  "generalization_gap": 0.02915596644410201,
 
1286
  "accuracy": 0.7191011235955056,
1287
  "f1": 0.716527021635327,
1288
  "roc_auc": 0.8161616161616162,
1289
+ "train_time_s": 0.335,
1290
  "train_score": 0.9577555213148433,
1291
  "test_score": 0.8161616161616162,
1292
  "generalization_gap": 0.14159390515322712,
 
1781
  "accuracy": 0.7415730337078652,
1782
  "f1": 0.7409192020410919,
1783
  "roc_auc": 0.8262626262626263,
1784
+ "train_time_s": 0.012,
1785
  "train_score": 0.8554185927067283,
1786
  "test_score": 0.8262626262626263,
1787
  "generalization_gap": 0.02915596644410201,
 
1812
  "CV Train Mean": 0.9716,
1813
  "CV Overfit": "Yes",
1814
  "Overfit": "No",
1815
+ "Train Time(s)": 0.34
1816
  }
1817
  ],
1818
  "feature_importances": {
 
1833
  " Training Logistic Regression...",
1834
  " Logistic Regression: acc=0.742, f1=0.741, auc=0.826 [0.01s]",
1835
  " Training Random Forest...",
1836
+ " Random Forest: acc=0.719, f1=0.717, auc=0.816 [0.34s]",
1837
  "\nBest model: Logistic Regression (roc_auc=0.8263)",
1838
  "Overfitting warnings: Random Forest shows consistent overfitting across CV folds \u2014 CV train mean 0.9716 vs CV test mean 0.8072",
1839
  "5-fold cross-validation results: best model Logistic Regression achieved CV mean 0.8329 \u00b1 0.0368 vs single test score 0.8263"
 
1850
  "accuracy": 0.7415730337078652,
1851
  "f1": 0.7409192020410919,
1852
  "roc_auc": 0.8262626262626263,
1853
+ "train_time_s": 0.012,
1854
  "train_score": 0.8554185927067283,
1855
  "test_score": 0.8262626262626263,
1856
  "generalization_gap": 0.02915596644410201,
 
1884
  "accuracy": 0.7191011235955056,
1885
  "f1": 0.716527021635327,
1886
  "roc_auc": 0.8161616161616162,
1887
+ "train_time_s": 0.335,
1888
  "train_score": 0.9577555213148433,
1889
  "test_score": 0.8161616161616162,
1890
  "generalization_gap": 0.14159390515322712,
 
2035
  "accuracy": 0.7415730337078652,
2036
  "f1": 0.7409192020410919,
2037
  "roc_auc": 0.8262626262626263,
2038
+ "train_time_s": 0.012,
2039
  "train_score": 0.8554185927067283,
2040
  "test_score": 0.8262626262626263,
2041
  "generalization_gap": 0.02915596644410201,
 
2064
  "CV Train Mean": 0.9716,
2065
  "CV Overfit": "Yes",
2066
  "Overfit": "No",
2067
+ "Train Time(s)": 0.34
2068
  }
2069
  ],
2070
  "feature_importances": {
demo_result_healthcare.json CHANGED
The diff for this file is too large to render. See raw diff
 
demo_result_housing.json CHANGED
@@ -276,7 +276,7 @@
276
  "rmse": 19244.30726602296,
277
  "mae": 15054.427702432535,
278
  "r2": 0.9704273023733059,
279
- "train_time_s": 0.001,
280
  "train_score": 0.9756910416636839,
281
  "test_score": 0.9704273023733059,
282
  "generalization_gap": 0.0052637392903780444,
@@ -307,7 +307,7 @@
307
  "CV Train Mean": 0.9671,
308
  "CV Overfit": "No",
309
  "Overfit": "No",
310
- "Train Time(s)": 0.1
311
  }
312
  ],
313
  "feature_importances": {
@@ -330,7 +330,7 @@
330
  " Training Linear Regression...",
331
  " Linear Regression: r2=0.970, rmse=19244.31, mae=15054.43 [0.00s]",
332
  " Training Random Forest...",
333
- " Random Forest: r2=0.907, rmse=34041.31, mae=27600.14 [0.10s]",
334
  "\nBest model: Linear Regression (r2=0.9704)",
335
  "5-fold cross-validation results: best model Linear Regression achieved CV mean 0.9741 \u00b1 0.0031 vs single test score 0.9704"
336
  ],
@@ -344,7 +344,7 @@
344
  "rmse": 19244.30726602296,
345
  "mae": 15054.427702432535,
346
  "r2": 0.9704273023733059,
347
- "train_time_s": 0.001,
348
  "train_score": 0.9756910416636839,
349
  "test_score": 0.9704273023733059,
350
  "generalization_gap": 0.0052637392903780444,
@@ -375,10 +375,10 @@
375
  {
376
  "name": "Random Forest",
377
  "metrics": {
378
- "rmse": 34041.31037179931,
379
  "mae": 27600.13834049763,
380
  "r2": 0.9074665155441051,
381
- "train_time_s": 0.099,
382
  "train_score": 0.9635729463513367,
383
  "test_score": 0.9074665155441051,
384
  "generalization_gap": 0.056106430807231655,
@@ -388,14 +388,14 @@
388
  "generalization_gap": 0.056106430807231655,
389
  "overfit": false,
390
  "cv_scores": [
391
- 0.9357359945589842,
392
- 0.9382614289622774,
393
- 0.9301892382602969,
394
  0.9268878476820184,
395
- 0.9318488429287123
396
  ],
397
  "cv_mean": 0.9325846704784577,
398
- "cv_std": 0.004022182442727396,
399
  "cv_train_scores": [
400
  0.967156970877803,
401
  0.967581448731281,
@@ -438,7 +438,7 @@
438
  "rmse": 19244.30726602296,
439
  "mae": 15054.427702432535,
440
  "r2": 0.9704273023733059,
441
- "train_time_s": 0.001,
442
  "train_score": 0.9756910416636839,
443
  "test_score": 0.9704273023733059,
444
  "generalization_gap": 0.0052637392903780444,
@@ -467,7 +467,7 @@
467
  "CV Train Mean": 0.9671,
468
  "CV Overfit": "No",
469
  "Overfit": "No",
470
- "Train Time(s)": 0.1
471
  }
472
  ],
473
  "feature_importances": {
@@ -910,7 +910,7 @@
910
  "rmse": 19244.30726602296,
911
  "mae": 15054.427702432535,
912
  "r2": 0.9704273023733059,
913
- "train_time_s": 0.001,
914
  "train_score": 0.9756910416636839,
915
  "test_score": 0.9704273023733059,
916
  "generalization_gap": 0.0052637392903780444,
@@ -941,7 +941,7 @@
941
  "CV Train Mean": 0.9671,
942
  "CV Overfit": "No",
943
  "Overfit": "No",
944
- "Train Time(s)": 0.1
945
  }
946
  ],
947
  "feature_importances": {
@@ -964,7 +964,7 @@
964
  " Training Linear Regression...",
965
  " Linear Regression: r2=0.970, rmse=19244.31, mae=15054.43 [0.00s]",
966
  " Training Random Forest...",
967
- " Random Forest: r2=0.907, rmse=34041.31, mae=27600.14 [0.10s]",
968
  "\nBest model: Linear Regression (r2=0.9704)",
969
  "5-fold cross-validation results: best model Linear Regression achieved CV mean 0.9741 \u00b1 0.0031 vs single test score 0.9704"
970
  ],
@@ -978,7 +978,7 @@
978
  "rmse": 19244.30726602296,
979
  "mae": 15054.427702432535,
980
  "r2": 0.9704273023733059,
981
- "train_time_s": 0.001,
982
  "train_score": 0.9756910416636839,
983
  "test_score": 0.9704273023733059,
984
  "generalization_gap": 0.0052637392903780444,
@@ -1009,24 +1009,24 @@
1009
  {
1010
  "name": "Random Forest",
1011
  "metrics": {
1012
- "rmse": 34041.31037179931,
1013
  "mae": 27600.13834049763,
1014
  "r2": 0.9074665155441051,
1015
- "train_time_s": 0.099,
1016
  "train_score": 0.9635729463513367,
1017
  "test_score": 0.9074665155441051,
1018
  "generalization_gap": 0.056106430807231655,
1019
  "overfit": false
1020
  },
1021
  "cv_scores": [
1022
- 0.9357359945589842,
1023
- 0.9382614289622774,
1024
- 0.9301892382602969,
1025
  0.9268878476820184,
1026
- 0.9318488429287123
1027
  ],
1028
  "cv_mean": 0.9325846704784577,
1029
- "cv_std": 0.004022182442727396,
1030
  "cv_train_scores": [
1031
  0.967156970877803,
1032
  0.967581448731281,
@@ -1370,7 +1370,7 @@
1370
  "rmse": 19244.30726602296,
1371
  "mae": 15054.427702432535,
1372
  "r2": 0.9704273023733059,
1373
- "train_time_s": 0.001,
1374
  "train_score": 0.9756910416636839,
1375
  "test_score": 0.9704273023733059,
1376
  "generalization_gap": 0.0052637392903780444,
@@ -1401,7 +1401,7 @@
1401
  "CV Train Mean": 0.9671,
1402
  "CV Overfit": "No",
1403
  "Overfit": "No",
1404
- "Train Time(s)": 0.1
1405
  }
1406
  ],
1407
  "feature_importances": {
@@ -1424,7 +1424,7 @@
1424
  " Training Linear Regression...",
1425
  " Linear Regression: r2=0.970, rmse=19244.31, mae=15054.43 [0.00s]",
1426
  " Training Random Forest...",
1427
- " Random Forest: r2=0.907, rmse=34041.31, mae=27600.14 [0.10s]",
1428
  "\nBest model: Linear Regression (r2=0.9704)",
1429
  "5-fold cross-validation results: best model Linear Regression achieved CV mean 0.9741 \u00b1 0.0031 vs single test score 0.9704"
1430
  ],
@@ -1438,7 +1438,7 @@
1438
  "rmse": 19244.30726602296,
1439
  "mae": 15054.427702432535,
1440
  "r2": 0.9704273023733059,
1441
- "train_time_s": 0.001,
1442
  "train_score": 0.9756910416636839,
1443
  "test_score": 0.9704273023733059,
1444
  "generalization_gap": 0.0052637392903780444,
@@ -1469,10 +1469,10 @@
1469
  {
1470
  "name": "Random Forest",
1471
  "metrics": {
1472
- "rmse": 34041.31037179931,
1473
  "mae": 27600.13834049763,
1474
  "r2": 0.9074665155441051,
1475
- "train_time_s": 0.099,
1476
  "train_score": 0.9635729463513367,
1477
  "test_score": 0.9074665155441051,
1478
  "generalization_gap": 0.056106430807231655,
@@ -1482,14 +1482,14 @@
1482
  "generalization_gap": 0.056106430807231655,
1483
  "overfit": false,
1484
  "cv_scores": [
1485
- 0.9357359945589842,
1486
- 0.9382614289622774,
1487
- 0.9301892382602969,
1488
  0.9268878476820184,
1489
- 0.9318488429287123
1490
  ],
1491
  "cv_mean": 0.9325846704784577,
1492
- "cv_std": 0.004022182442727396,
1493
  "cv_train_scores": [
1494
  0.967156970877803,
1495
  0.967581448731281,
@@ -1532,7 +1532,7 @@
1532
  "rmse": 19244.30726602296,
1533
  "mae": 15054.427702432535,
1534
  "r2": 0.9704273023733059,
1535
- "train_time_s": 0.001,
1536
  "train_score": 0.9756910416636839,
1537
  "test_score": 0.9704273023733059,
1538
  "generalization_gap": 0.0052637392903780444,
@@ -1561,7 +1561,7 @@
1561
  "CV Train Mean": 0.9671,
1562
  "CV Overfit": "No",
1563
  "Overfit": "No",
1564
- "Train Time(s)": 0.1
1565
  }
1566
  ],
1567
  "feature_importances": {
 
276
  "rmse": 19244.30726602296,
277
  "mae": 15054.427702432535,
278
  "r2": 0.9704273023733059,
279
+ "train_time_s": 0.002,
280
  "train_score": 0.9756910416636839,
281
  "test_score": 0.9704273023733059,
282
  "generalization_gap": 0.0052637392903780444,
 
307
  "CV Train Mean": 0.9671,
308
  "CV Overfit": "No",
309
  "Overfit": "No",
310
+ "Train Time(s)": 0.27
311
  }
312
  ],
313
  "feature_importances": {
 
330
  " Training Linear Regression...",
331
  " Linear Regression: r2=0.970, rmse=19244.31, mae=15054.43 [0.00s]",
332
  " Training Random Forest...",
333
+ " Random Forest: r2=0.907, rmse=34041.31, mae=27600.14 [0.27s]",
334
  "\nBest model: Linear Regression (r2=0.9704)",
335
  "5-fold cross-validation results: best model Linear Regression achieved CV mean 0.9741 \u00b1 0.0031 vs single test score 0.9704"
336
  ],
 
344
  "rmse": 19244.30726602296,
345
  "mae": 15054.427702432535,
346
  "r2": 0.9704273023733059,
347
+ "train_time_s": 0.002,
348
  "train_score": 0.9756910416636839,
349
  "test_score": 0.9704273023733059,
350
  "generalization_gap": 0.0052637392903780444,
 
375
  {
376
  "name": "Random Forest",
377
  "metrics": {
378
+ "rmse": 34041.3103717993,
379
  "mae": 27600.13834049763,
380
  "r2": 0.9074665155441051,
381
+ "train_time_s": 0.267,
382
  "train_score": 0.9635729463513367,
383
  "test_score": 0.9074665155441051,
384
  "generalization_gap": 0.056106430807231655,
 
388
  "generalization_gap": 0.056106430807231655,
389
  "overfit": false,
390
  "cv_scores": [
391
+ 0.9357359945589843,
392
+ 0.9382614289622773,
393
+ 0.9301892382602968,
394
  0.9268878476820184,
395
+ 0.9318488429287124
396
  ],
397
  "cv_mean": 0.9325846704784577,
398
+ "cv_std": 0.004022182442727391,
399
  "cv_train_scores": [
400
  0.967156970877803,
401
  0.967581448731281,
 
438
  "rmse": 19244.30726602296,
439
  "mae": 15054.427702432535,
440
  "r2": 0.9704273023733059,
441
+ "train_time_s": 0.002,
442
  "train_score": 0.9756910416636839,
443
  "test_score": 0.9704273023733059,
444
  "generalization_gap": 0.0052637392903780444,
 
467
  "CV Train Mean": 0.9671,
468
  "CV Overfit": "No",
469
  "Overfit": "No",
470
+ "Train Time(s)": 0.27
471
  }
472
  ],
473
  "feature_importances": {
 
910
  "rmse": 19244.30726602296,
911
  "mae": 15054.427702432535,
912
  "r2": 0.9704273023733059,
913
+ "train_time_s": 0.002,
914
  "train_score": 0.9756910416636839,
915
  "test_score": 0.9704273023733059,
916
  "generalization_gap": 0.0052637392903780444,
 
941
  "CV Train Mean": 0.9671,
942
  "CV Overfit": "No",
943
  "Overfit": "No",
944
+ "Train Time(s)": 0.27
945
  }
946
  ],
947
  "feature_importances": {
 
964
  " Training Linear Regression...",
965
  " Linear Regression: r2=0.970, rmse=19244.31, mae=15054.43 [0.00s]",
966
  " Training Random Forest...",
967
+ " Random Forest: r2=0.907, rmse=34041.31, mae=27600.14 [0.27s]",
968
  "\nBest model: Linear Regression (r2=0.9704)",
969
  "5-fold cross-validation results: best model Linear Regression achieved CV mean 0.9741 \u00b1 0.0031 vs single test score 0.9704"
970
  ],
 
978
  "rmse": 19244.30726602296,
979
  "mae": 15054.427702432535,
980
  "r2": 0.9704273023733059,
981
+ "train_time_s": 0.002,
982
  "train_score": 0.9756910416636839,
983
  "test_score": 0.9704273023733059,
984
  "generalization_gap": 0.0052637392903780444,
 
1009
  {
1010
  "name": "Random Forest",
1011
  "metrics": {
1012
+ "rmse": 34041.3103717993,
1013
  "mae": 27600.13834049763,
1014
  "r2": 0.9074665155441051,
1015
+ "train_time_s": 0.267,
1016
  "train_score": 0.9635729463513367,
1017
  "test_score": 0.9074665155441051,
1018
  "generalization_gap": 0.056106430807231655,
1019
  "overfit": false
1020
  },
1021
  "cv_scores": [
1022
+ 0.9357359945589843,
1023
+ 0.9382614289622773,
1024
+ 0.9301892382602968,
1025
  0.9268878476820184,
1026
+ 0.9318488429287124
1027
  ],
1028
  "cv_mean": 0.9325846704784577,
1029
+ "cv_std": 0.004022182442727391,
1030
  "cv_train_scores": [
1031
  0.967156970877803,
1032
  0.967581448731281,
 
1370
  "rmse": 19244.30726602296,
1371
  "mae": 15054.427702432535,
1372
  "r2": 0.9704273023733059,
1373
+ "train_time_s": 0.002,
1374
  "train_score": 0.9756910416636839,
1375
  "test_score": 0.9704273023733059,
1376
  "generalization_gap": 0.0052637392903780444,
 
1401
  "CV Train Mean": 0.9671,
1402
  "CV Overfit": "No",
1403
  "Overfit": "No",
1404
+ "Train Time(s)": 0.27
1405
  }
1406
  ],
1407
  "feature_importances": {
 
1424
  " Training Linear Regression...",
1425
  " Linear Regression: r2=0.970, rmse=19244.31, mae=15054.43 [0.00s]",
1426
  " Training Random Forest...",
1427
+ " Random Forest: r2=0.907, rmse=34041.31, mae=27600.14 [0.27s]",
1428
  "\nBest model: Linear Regression (r2=0.9704)",
1429
  "5-fold cross-validation results: best model Linear Regression achieved CV mean 0.9741 \u00b1 0.0031 vs single test score 0.9704"
1430
  ],
 
1438
  "rmse": 19244.30726602296,
1439
  "mae": 15054.427702432535,
1440
  "r2": 0.9704273023733059,
1441
+ "train_time_s": 0.002,
1442
  "train_score": 0.9756910416636839,
1443
  "test_score": 0.9704273023733059,
1444
  "generalization_gap": 0.0052637392903780444,
 
1469
  {
1470
  "name": "Random Forest",
1471
  "metrics": {
1472
+ "rmse": 34041.3103717993,
1473
  "mae": 27600.13834049763,
1474
  "r2": 0.9074665155441051,
1475
+ "train_time_s": 0.267,
1476
  "train_score": 0.9635729463513367,
1477
  "test_score": 0.9074665155441051,
1478
  "generalization_gap": 0.056106430807231655,
 
1482
  "generalization_gap": 0.056106430807231655,
1483
  "overfit": false,
1484
  "cv_scores": [
1485
+ 0.9357359945589843,
1486
+ 0.9382614289622773,
1487
+ 0.9301892382602968,
1488
  0.9268878476820184,
1489
+ 0.9318488429287124
1490
  ],
1491
  "cv_mean": 0.9325846704784577,
1492
+ "cv_std": 0.004022182442727391,
1493
  "cv_train_scores": [
1494
  0.967156970877803,
1495
  0.967581448731281,
 
1532
  "rmse": 19244.30726602296,
1533
  "mae": 15054.427702432535,
1534
  "r2": 0.9704273023733059,
1535
+ "train_time_s": 0.002,
1536
  "train_score": 0.9756910416636839,
1537
  "test_score": 0.9704273023733059,
1538
  "generalization_gap": 0.0052637392903780444,
 
1561
  "CV Train Mean": 0.9671,
1562
  "CV Overfit": "No",
1563
  "Overfit": "No",
1564
+ "Train Time(s)": 0.27
1565
  }
1566
  ],
1567
  "feature_importances": {
demo_result_titanic.json CHANGED
@@ -170,10 +170,10 @@
170
  ],
171
  "n_classes": 2,
172
  "class_distribution": {
173
- "0": 401,
174
- "1": 399
175
  },
176
- "imbalance_ratio": 1.01
177
  },
178
  "quality_flags": [],
179
  "recommendations": [
@@ -224,7 +224,8 @@
224
  "Categorical columns (2): mode imputation + one-hot encoding.",
225
  "Target encoded with LabelEncoder. Classes: ['0', '1']",
226
  "Train/test split: 640 train rows, 160 test rows (20% test).",
227
- "Class imbalance ratio (majority/minority): 1.01.",
 
228
  "Final feature matrix: 10 features."
229
  ],
230
  "num_cols": [
@@ -240,19 +241,19 @@
240
  ],
241
  "n_classes": 2,
242
  "log_transformed_cols": [],
243
- "smote_applied": false,
244
- "smote_log": ""
245
  },
246
  "train": {
247
- "best_name": "Logistic Regression",
248
  "best_metrics": {
249
- "accuracy": 0.50625,
250
- "f1": 0.5060763549685436,
251
- "roc_auc": 0.51390625,
252
- "train_time_s": 0.003,
253
- "train_score": 0.5505620172071993,
254
- "test_score": 0.51390625,
255
- "generalization_gap": 0.03665576720719932,
256
  "overfit": false
257
  },
258
  "metric_name": "roc_auc",
@@ -260,296 +261,292 @@
260
  "comparison_df": [
261
  {
262
  "Model": "Logistic Regression",
263
- "Train Score": 0.5506,
264
- "Test Score": 0.5139,
265
- "Gap": 0.0367,
266
- "CV Mean": 0.5025,
267
- "CV Std": 0.0343,
268
- "CV Train Mean": 0.5568,
269
  "CV Overfit": "No",
270
  "Overfit": "No",
271
- "Train Time(s)": 0.0
272
  },
273
  {
274
  "Model": "Random Forest",
275
- "Train Score": 0.8889,
276
- "Test Score": 0.5131,
277
- "Gap": 0.3758,
278
- "CV Mean": 0.4587,
279
- "CV Std": 0.023,
280
- "CV Train Mean": 0.9169,
281
- "CV Overfit": "Yes",
282
- "Overfit": "Yes",
283
- "Train Time(s)": 0.13
284
  }
285
  ],
286
  "feature_importances": {
287
- "embarked_C": 0.19427991337835848,
288
- "sex_female": 0.1795206881097836,
289
- "sex_male": 0.17217897553699751,
290
- "embarked_Q": 0.16121663300959552,
291
- "pclass": 0.10709936251807319,
292
- "sibsp": 0.06916449512778146,
293
- "embarked_S": 0.04040499294154858,
294
- "age": 0.032805348276750294,
295
- "parch": 0.02489251272846473,
296
- "fare": 0.01843707837264646
297
  },
298
  "training_log": [
299
  "Training 2 models for classification task.",
300
  " Parameter overrides applied for: LightGBM, Random Forest, XGBoost",
301
  " Training Logistic Regression...",
302
- " Logistic Regression: acc=0.506, f1=0.506, auc=0.514 [0.00s]",
303
  " Training Random Forest...",
304
- " Random Forest: acc=0.531, f1=0.531, auc=0.513 [0.13s]",
305
- "\nBest model: Logistic Regression (roc_auc=0.5139)",
306
- "Overfitting warnings: Random Forest is overfitting \u2014 train ROC-AUC 0.8889 vs test ROC-AUC 0.5131, gap 0.3758; Random Forest shows consistent overfitting across CV folds \u2014 CV train mean 0.9169 vs CV test mean 0.4587",
307
- "5-fold cross-validation results: best model Logistic Regression achieved CV mean 0.5025 \u00b1 0.0343 vs single test score 0.5139"
308
- ],
309
- "overfitting_warnings": [
310
- "Random Forest is overfitting \u2014 train ROC-AUC 0.8889 vs test ROC-AUC 0.5131, gap 0.3758",
311
- "Random Forest shows consistent overfitting across CV folds \u2014 CV train mean 0.9169 vs CV test mean 0.4587"
312
  ],
313
- "cv_summary": "5-fold cross-validation results: best model Logistic Regression achieved CV mean 0.5025 \u00b1 0.0343 vs single test score 0.5139",
 
314
  "cv_folds_used": 5,
315
  "results": [
316
  {
317
  "name": "Logistic Regression",
318
  "metrics": {
319
- "accuracy": 0.50625,
320
- "f1": 0.5060763549685436,
321
- "roc_auc": 0.51390625,
322
- "train_time_s": 0.003,
323
- "train_score": 0.5505620172071993,
324
- "test_score": 0.51390625,
325
- "generalization_gap": 0.03665576720719932,
326
  "overfit": false
327
  },
328
- "train_score": 0.5505620172071993,
329
- "generalization_gap": 0.03665576720719932,
330
  "overfit": false,
331
  "cv_scores": [
332
- 0.5584859584859584,
333
- 0.467529296875,
334
- 0.525634765625,
335
- 0.48388671875,
336
- 0.476806640625
337
  ],
338
- "cv_mean": 0.5024686760721917,
339
- "cv_std": 0.034348778883955274,
340
  "cv_train_scores": [
341
- 0.541595458984375,
342
- 0.5711451895933471,
343
- 0.5545586327916381,
344
- 0.5557030594338903,
345
- 0.5610894941634241
346
  ],
347
- "cv_train_mean": 0.5568183669933349,
348
  "cv_overfit": false
349
  },
350
  {
351
  "name": "Random Forest",
352
  "metrics": {
353
- "accuracy": 0.53125,
354
- "f1": 0.5312316887378413,
355
- "roc_auc": 0.513125,
356
- "train_time_s": 0.133,
357
- "train_score": 0.8889442279709763,
358
- "test_score": 0.513125,
359
- "generalization_gap": 0.3758192279709762,
360
- "overfit": true
361
  },
362
- "train_score": 0.8889442279709763,
363
- "generalization_gap": 0.3758192279709762,
364
- "overfit": true,
365
  "cv_scores": [
366
- 0.43956043956043955,
367
- 0.447998046875,
368
- 0.50390625,
369
- 0.451416015625,
370
- 0.45068359375
371
  ],
372
- "cv_mean": 0.458712869162088,
373
- "cv_std": 0.022985986961779142,
374
  "cv_train_scores": [
375
- 0.9126129150390625,
376
- 0.9176317998016327,
377
- 0.9266803997863737,
378
- 0.9033646143282216,
379
- 0.9239642938887618
380
  ],
381
- "cv_train_mean": 0.9168508045688103,
382
- "cv_overfit": true
383
  }
384
  ]
385
  },
386
  "eval": {
387
  "metrics": {
388
- "accuracy": 0.50625,
389
- "f1": 0.5060763549685436,
390
- "classification_report": " precision recall f1-score support\n\n 0 0.51 0.53 0.52 80\n 1 0.51 0.49 0.50 80\n\n accuracy 0.51 160\n macro avg 0.51 0.51 0.51 160\nweighted avg 0.51 0.51 0.51 160\n",
391
- "roc_auc": 0.51390625,
392
  "y_prob": [
393
- 0.4685971934401446,
394
- 0.5880659890984261,
395
- 0.5304788776901366,
396
- 0.43957565151138467,
397
- 0.5038463626781632,
398
- 0.4652273077567498,
399
- 0.5847100993502347,
400
- 0.5104616697292005,
401
- 0.47131920968357843,
402
- 0.43799587982497334,
403
- 0.4960189710268829,
404
- 0.4351952524180606,
405
- 0.47823516872507077,
406
- 0.5225202731035682,
407
- 0.5049934839754789,
408
- 0.46348813612071,
409
- 0.4913152737170698,
410
- 0.4943560044658895,
411
- 0.4483125083684001,
412
- 0.4192829767113759,
413
- 0.4837760800877137,
414
- 0.5138495328227541,
415
- 0.46566274171564515,
416
- 0.46397203681287,
417
- 0.42296715645822186,
418
- 0.5057825082702295,
419
- 0.48229039510773697,
420
- 0.5515528516381715,
421
- 0.4130279404158634,
422
- 0.5110102562826957,
423
- 0.4378385985609114,
424
- 0.5049709338272942,
425
- 0.5491177241154352,
426
- 0.4734278641779235,
427
- 0.4903180760277518,
428
- 0.5024153385948759,
429
- 0.4550174125976565,
430
- 0.47408944756592075,
431
- 0.4811344974522715,
432
- 0.513304425049202,
433
- 0.5579353223774154,
434
- 0.5210717342560043,
435
- 0.5907744656741766,
436
- 0.46344911708752645,
437
- 0.43117875296291236,
438
- 0.533688758919175,
439
- 0.5290497502374126,
440
- 0.48607641955131375,
441
- 0.532881233702402,
442
- 0.5341292461861704,
443
- 0.5047550838230295,
444
- 0.45629185178585485,
445
- 0.5723741095853611,
446
- 0.5176362649318781,
447
- 0.576781843287271,
448
- 0.4565587333091604,
449
- 0.43425250970963175,
450
- 0.44864941542194103,
451
- 0.4814982228717,
452
- 0.5771946399699652,
453
- 0.5542313298335242,
454
- 0.5148651702981335,
455
- 0.578326993627849,
456
- 0.46219547798053273,
457
- 0.4662127930424256,
458
- 0.5167269442125124,
459
- 0.4545946129157852,
460
- 0.47285099433062533,
461
- 0.475602443239552,
462
- 0.5318046552682085,
463
- 0.4916093412756572,
464
- 0.4680847205841181,
465
- 0.5300286938315274,
466
- 0.47419383091533374,
467
- 0.4432574256271499,
468
- 0.5467691188026214,
469
- 0.4942200595017993,
470
- 0.491558726159705,
471
- 0.5551736674330323,
472
- 0.5360340244003253,
473
- 0.4663308126772837,
474
- 0.5341545691299238,
475
- 0.544787947349383,
476
- 0.5339643040098896,
477
- 0.5635104012484852,
478
- 0.42164008428541117,
479
- 0.5272913797609838,
480
- 0.49149085962047573,
481
- 0.4309241928600356,
482
- 0.5725259889932812,
483
- 0.45877791338922436,
484
- 0.5049566058663426,
485
- 0.5467499732880384,
486
- 0.4886064484052085,
487
- 0.4761842009707287,
488
- 0.5171460006825551,
489
- 0.5652706369027144,
490
- 0.5519353659368222,
491
- 0.48829304075863755,
492
- 0.49952031294625754,
493
- 0.5696952526061775,
494
- 0.39907090644230236,
495
- 0.5300417514346564,
496
- 0.5201368866376818,
497
- 0.4335442294896271,
498
- 0.441018930438655,
499
- 0.5727977177493875,
500
- 0.5055094496565438,
501
- 0.5485429940005087,
502
- 0.464300189927614,
503
- 0.46528676320969975,
504
- 0.4765960586458909,
505
- 0.5013951407428184,
506
- 0.4758746133195292,
507
- 0.574369326643904,
508
- 0.5448327519185213,
509
- 0.5313577820269861,
510
- 0.5796545232541171,
511
- 0.5408139880303958,
512
- 0.5758274204341941,
513
- 0.4433770335078927,
514
- 0.48199862880120214,
515
- 0.4681855101348399,
516
- 0.5105250331390581,
517
- 0.484520120241961,
518
- 0.45987999738308466,
519
- 0.4812982046752543,
520
- 0.4651369537670347,
521
- 0.4515971139618936,
522
- 0.5239026968610354,
523
- 0.5183456388828395,
524
- 0.4887243768576431,
525
- 0.4846139091445182,
526
- 0.480984884262246,
527
- 0.5113218050872491,
528
- 0.42950526584467874,
529
- 0.4980224655396546,
530
- 0.4853621308117529,
531
- 0.5006213435479155,
532
- 0.4464885510310222,
533
- 0.5825216599017382,
534
- 0.509750144605609,
535
- 0.5611443029239761,
536
- 0.48586792077872754,
537
- 0.5551391707724177,
538
- 0.48437951798583123,
539
- 0.4506087491103293,
540
- 0.49053821992973634,
541
- 0.4485156855641885,
542
- 0.5272347909195831,
543
- 0.4978957759625092,
544
- 0.4865222606967821,
545
- 0.5365014960658203,
546
- 0.46890723544338464,
547
- 0.5615897997035765,
548
- 0.553860453714982,
549
- 0.5373593965986853,
550
- 0.52350272912908,
551
- 0.5543178631028236,
552
- 0.5610670925103559
553
  ]
554
  },
555
  "plot_paths": {
@@ -570,54 +567,54 @@
570
  },
571
  "target_col": "survived",
572
  "task_type": "classification",
573
- "best_model_name": "Logistic Regression",
574
  "best_metrics": {
575
- "accuracy": 0.50625,
576
- "f1": 0.5060763549685436,
577
- "roc_auc": 0.51390625,
578
- "train_time_s": 0.003,
579
- "train_score": 0.5505620172071993,
580
- "test_score": 0.51390625,
581
- "generalization_gap": 0.03665576720719932,
582
  "overfit": false
583
  },
584
  "comparison_df": [
585
  {
586
  "Model": "Logistic Regression",
587
- "Train Score": 0.5506,
588
- "Test Score": 0.5139,
589
- "Gap": 0.0367,
590
- "CV Mean": 0.5025,
591
- "CV Std": 0.0343,
592
- "CV Train Mean": 0.5568,
593
  "CV Overfit": "No",
594
  "Overfit": "No",
595
- "Train Time(s)": 0.0
596
  },
597
  {
598
  "Model": "Random Forest",
599
- "Train Score": 0.8889,
600
- "Test Score": 0.5131,
601
- "Gap": 0.3758,
602
- "CV Mean": 0.4587,
603
- "CV Std": 0.023,
604
- "CV Train Mean": 0.9169,
605
- "CV Overfit": "Yes",
606
- "Overfit": "Yes",
607
- "Train Time(s)": 0.13
608
  }
609
  ],
610
  "feature_importances": {
611
- "embarked_C": 0.19427991337835848,
612
- "sex_female": 0.1795206881097836,
613
- "sex_male": 0.17217897553699751,
614
- "embarked_Q": 0.16121663300959552,
615
- "pclass": 0.10709936251807319,
616
- "sibsp": 0.06916449512778146,
617
- "embarked_S": 0.04040499294154858,
618
- "age": 0.032805348276750294,
619
- "parch": 0.02489251272846473,
620
- "fare": 0.01843707837264646
621
  },
622
  "plot_paths": {
623
  "confusion_matrix": "outputs/titanic_confusion_matrix.png",
@@ -625,171 +622,171 @@
625
  "feature_importance": "outputs/titanic_feature_importance.png"
626
  },
627
  "metrics": {
628
- "accuracy": 0.50625,
629
- "f1": 0.5060763549685436,
630
- "classification_report": " precision recall f1-score support\n\n 0 0.51 0.53 0.52 80\n 1 0.51 0.49 0.50 80\n\n accuracy 0.51 160\n macro avg 0.51 0.51 0.51 160\nweighted avg 0.51 0.51 0.51 160\n",
631
- "roc_auc": 0.51390625,
632
  "y_prob": [
633
- 0.4685971934401446,
634
- 0.5880659890984261,
635
- 0.5304788776901366,
636
- 0.43957565151138467,
637
- 0.5038463626781632,
638
- 0.4652273077567498,
639
- 0.5847100993502347,
640
- 0.5104616697292005,
641
- 0.47131920968357843,
642
- 0.43799587982497334,
643
- 0.4960189710268829,
644
- 0.4351952524180606,
645
- 0.47823516872507077,
646
- 0.5225202731035682,
647
- 0.5049934839754789,
648
- 0.46348813612071,
649
- 0.4913152737170698,
650
- 0.4943560044658895,
651
- 0.4483125083684001,
652
- 0.4192829767113759,
653
- 0.4837760800877137,
654
- 0.5138495328227541,
655
- 0.46566274171564515,
656
- 0.46397203681287,
657
- 0.42296715645822186,
658
- 0.5057825082702295,
659
- 0.48229039510773697,
660
- 0.5515528516381715,
661
- 0.4130279404158634,
662
- 0.5110102562826957,
663
- 0.4378385985609114,
664
- 0.5049709338272942,
665
- 0.5491177241154352,
666
- 0.4734278641779235,
667
- 0.4903180760277518,
668
- 0.5024153385948759,
669
- 0.4550174125976565,
670
- 0.47408944756592075,
671
- 0.4811344974522715,
672
- 0.513304425049202,
673
- 0.5579353223774154,
674
- 0.5210717342560043,
675
- 0.5907744656741766,
676
- 0.46344911708752645,
677
- 0.43117875296291236,
678
- 0.533688758919175,
679
- 0.5290497502374126,
680
- 0.48607641955131375,
681
- 0.532881233702402,
682
- 0.5341292461861704,
683
- 0.5047550838230295,
684
- 0.45629185178585485,
685
- 0.5723741095853611,
686
- 0.5176362649318781,
687
- 0.576781843287271,
688
- 0.4565587333091604,
689
- 0.43425250970963175,
690
- 0.44864941542194103,
691
- 0.4814982228717,
692
- 0.5771946399699652,
693
- 0.5542313298335242,
694
- 0.5148651702981335,
695
- 0.578326993627849,
696
- 0.46219547798053273,
697
- 0.4662127930424256,
698
- 0.5167269442125124,
699
- 0.4545946129157852,
700
- 0.47285099433062533,
701
- 0.475602443239552,
702
- 0.5318046552682085,
703
- 0.4916093412756572,
704
- 0.4680847205841181,
705
- 0.5300286938315274,
706
- 0.47419383091533374,
707
- 0.4432574256271499,
708
- 0.5467691188026214,
709
- 0.4942200595017993,
710
- 0.491558726159705,
711
- 0.5551736674330323,
712
- 0.5360340244003253,
713
- 0.4663308126772837,
714
- 0.5341545691299238,
715
- 0.544787947349383,
716
- 0.5339643040098896,
717
- 0.5635104012484852,
718
- 0.42164008428541117,
719
- 0.5272913797609838,
720
- 0.49149085962047573,
721
- 0.4309241928600356,
722
- 0.5725259889932812,
723
- 0.45877791338922436,
724
- 0.5049566058663426,
725
- 0.5467499732880384,
726
- 0.4886064484052085,
727
- 0.4761842009707287,
728
- 0.5171460006825551,
729
- 0.5652706369027144,
730
- 0.5519353659368222,
731
- 0.48829304075863755,
732
- 0.49952031294625754,
733
- 0.5696952526061775,
734
- 0.39907090644230236,
735
- 0.5300417514346564,
736
- 0.5201368866376818,
737
- 0.4335442294896271,
738
- 0.441018930438655,
739
- 0.5727977177493875,
740
- 0.5055094496565438,
741
- 0.5485429940005087,
742
- 0.464300189927614,
743
- 0.46528676320969975,
744
- 0.4765960586458909,
745
- 0.5013951407428184,
746
- 0.4758746133195292,
747
- 0.574369326643904,
748
- 0.5448327519185213,
749
- 0.5313577820269861,
750
- 0.5796545232541171,
751
- 0.5408139880303958,
752
- 0.5758274204341941,
753
- 0.4433770335078927,
754
- 0.48199862880120214,
755
- 0.4681855101348399,
756
- 0.5105250331390581,
757
- 0.484520120241961,
758
- 0.45987999738308466,
759
- 0.4812982046752543,
760
- 0.4651369537670347,
761
- 0.4515971139618936,
762
- 0.5239026968610354,
763
- 0.5183456388828395,
764
- 0.4887243768576431,
765
- 0.4846139091445182,
766
- 0.480984884262246,
767
- 0.5113218050872491,
768
- 0.42950526584467874,
769
- 0.4980224655396546,
770
- 0.4853621308117529,
771
- 0.5006213435479155,
772
- 0.4464885510310222,
773
- 0.5825216599017382,
774
- 0.509750144605609,
775
- 0.5611443029239761,
776
- 0.48586792077872754,
777
- 0.5551391707724177,
778
- 0.48437951798583123,
779
- 0.4506087491103293,
780
- 0.49053821992973634,
781
- 0.4485156855641885,
782
- 0.5272347909195831,
783
- 0.4978957759625092,
784
- 0.4865222606967821,
785
- 0.5365014960658203,
786
- 0.46890723544338464,
787
- 0.5615897997035765,
788
- 0.553860453714982,
789
- 0.5373593965986853,
790
- 0.52350272912908,
791
- 0.5543178631028236,
792
- 0.5610670925103559
793
  ]
794
  },
795
  "tune": {
@@ -838,8 +835,8 @@
838
  "is_large": false,
839
  "is_wide": false,
840
  "is_binary": true,
841
- "imbalance_ratio": 1.006269592476489,
842
- "smote_applied": false
843
  }
844
  }
845
  },
@@ -1016,10 +1013,10 @@
1016
  ],
1017
  "n_classes": 2,
1018
  "class_distribution": {
1019
- "0": 401,
1020
- "1": 399
1021
  },
1022
- "imbalance_ratio": 1.01
1023
  },
1024
  "quality_flags": [],
1025
  "recommendations": [
@@ -1088,7 +1085,8 @@
1088
  "Categorical columns (2): mode imputation + one-hot encoding.",
1089
  "Target encoded with LabelEncoder. Classes: ['0', '1']",
1090
  "Train/test split: 640 train rows, 160 test rows (20% test).",
1091
- "Class imbalance ratio (majority/minority): 1.01.",
 
1092
  "Final feature matrix: 10 features."
1093
  ],
1094
  "num_cols": [
@@ -1104,9 +1102,9 @@
1104
  ],
1105
  "n_classes": 2,
1106
  "log_transformed_cols": [],
1107
- "smote_applied": false,
1108
- "smote_log": "",
1109
- "train_size": 640,
1110
  "test_size": 160,
1111
  "final_feature_count": 10
1112
  }
@@ -1161,8 +1159,8 @@
1161
  "is_large": false,
1162
  "is_wide": false,
1163
  "is_binary": true,
1164
- "imbalance_ratio": 1.006269592476489,
1165
- "smote_applied": false
1166
  }
1167
  }
1168
  },
@@ -1175,15 +1173,15 @@
1175
  "status": "done",
1176
  "data": {
1177
  "train": {
1178
- "best_name": "Logistic Regression",
1179
  "best_metrics": {
1180
- "accuracy": 0.50625,
1181
- "f1": 0.5060763549685436,
1182
- "roc_auc": 0.51390625,
1183
- "train_time_s": 0.003,
1184
- "train_score": 0.5505620172071993,
1185
- "test_score": 0.51390625,
1186
- "generalization_gap": 0.03665576720719932,
1187
  "overfit": false
1188
  },
1189
  "metric_name": "roc_auc",
@@ -1191,126 +1189,122 @@
1191
  "comparison_df": [
1192
  {
1193
  "Model": "Logistic Regression",
1194
- "Train Score": 0.5506,
1195
- "Test Score": 0.5139,
1196
- "Gap": 0.0367,
1197
- "CV Mean": 0.5025,
1198
- "CV Std": 0.0343,
1199
- "CV Train Mean": 0.5568,
1200
  "CV Overfit": "No",
1201
  "Overfit": "No",
1202
- "Train Time(s)": 0.0
1203
  },
1204
  {
1205
  "Model": "Random Forest",
1206
- "Train Score": 0.8889,
1207
- "Test Score": 0.5131,
1208
- "Gap": 0.3758,
1209
- "CV Mean": 0.4587,
1210
- "CV Std": 0.023,
1211
- "CV Train Mean": 0.9169,
1212
- "CV Overfit": "Yes",
1213
- "Overfit": "Yes",
1214
- "Train Time(s)": 0.13
1215
  }
1216
  ],
1217
  "feature_importances": {
1218
- "embarked_C": 0.19427991337835848,
1219
- "sex_female": 0.1795206881097836,
1220
- "sex_male": 0.17217897553699751,
1221
- "embarked_Q": 0.16121663300959552,
1222
- "pclass": 0.10709936251807319,
1223
- "sibsp": 0.06916449512778146,
1224
- "embarked_S": 0.04040499294154858,
1225
- "age": 0.032805348276750294,
1226
- "parch": 0.02489251272846473,
1227
- "fare": 0.01843707837264646
1228
  },
1229
  "training_log": [
1230
  "Training 2 models for classification task.",
1231
  " Parameter overrides applied for: LightGBM, Random Forest, XGBoost",
1232
  " Training Logistic Regression...",
1233
- " Logistic Regression: acc=0.506, f1=0.506, auc=0.514 [0.00s]",
1234
  " Training Random Forest...",
1235
- " Random Forest: acc=0.531, f1=0.531, auc=0.513 [0.13s]",
1236
- "\nBest model: Logistic Regression (roc_auc=0.5139)",
1237
- "Overfitting warnings: Random Forest is overfitting \u2014 train ROC-AUC 0.8889 vs test ROC-AUC 0.5131, gap 0.3758; Random Forest shows consistent overfitting across CV folds \u2014 CV train mean 0.9169 vs CV test mean 0.4587",
1238
- "5-fold cross-validation results: best model Logistic Regression achieved CV mean 0.5025 \u00b1 0.0343 vs single test score 0.5139"
1239
- ],
1240
- "overfitting_warnings": [
1241
- "Random Forest is overfitting \u2014 train ROC-AUC 0.8889 vs test ROC-AUC 0.5131, gap 0.3758",
1242
- "Random Forest shows consistent overfitting across CV folds \u2014 CV train mean 0.9169 vs CV test mean 0.4587"
1243
  ],
1244
- "cv_summary": "5-fold cross-validation results: best model Logistic Regression achieved CV mean 0.5025 \u00b1 0.0343 vs single test score 0.5139",
 
1245
  "cv_folds_used": 5,
1246
  "results": [
1247
  {
1248
  "name": "Logistic Regression",
1249
  "metrics": {
1250
- "accuracy": 0.50625,
1251
- "f1": 0.5060763549685436,
1252
- "roc_auc": 0.51390625,
1253
- "train_time_s": 0.003,
1254
- "train_score": 0.5505620172071993,
1255
- "test_score": 0.51390625,
1256
- "generalization_gap": 0.03665576720719932,
1257
  "overfit": false
1258
  },
1259
  "cv_scores": [
1260
- 0.5584859584859584,
1261
- 0.467529296875,
1262
- 0.525634765625,
1263
- 0.48388671875,
1264
- 0.476806640625
1265
  ],
1266
- "cv_mean": 0.5024686760721917,
1267
- "cv_std": 0.034348778883955274,
1268
  "cv_train_scores": [
1269
- 0.541595458984375,
1270
- 0.5711451895933471,
1271
- 0.5545586327916381,
1272
- 0.5557030594338903,
1273
- 0.5610894941634241
1274
  ],
1275
- "cv_train_mean": 0.5568183669933349,
1276
  "cv_overfit": false,
1277
- "train_score": 0.5505620172071993,
1278
- "generalization_gap": 0.03665576720719932,
1279
  "overfit": false
1280
  },
1281
  {
1282
  "name": "Random Forest",
1283
  "metrics": {
1284
- "accuracy": 0.53125,
1285
- "f1": 0.5312316887378413,
1286
- "roc_auc": 0.513125,
1287
- "train_time_s": 0.133,
1288
- "train_score": 0.8889442279709763,
1289
- "test_score": 0.513125,
1290
- "generalization_gap": 0.3758192279709762,
1291
- "overfit": true
1292
  },
1293
  "cv_scores": [
1294
- 0.43956043956043955,
1295
- 0.447998046875,
1296
- 0.50390625,
1297
- 0.451416015625,
1298
- 0.45068359375
1299
  ],
1300
- "cv_mean": 0.458712869162088,
1301
- "cv_std": 0.022985986961779142,
1302
  "cv_train_scores": [
1303
- 0.9126129150390625,
1304
- 0.9176317998016327,
1305
- 0.9266803997863737,
1306
- 0.9033646143282216,
1307
- 0.9239642938887618
1308
  ],
1309
- "cv_train_mean": 0.9168508045688103,
1310
- "cv_overfit": true,
1311
- "train_score": 0.8889442279709763,
1312
- "generalization_gap": 0.3758192279709762,
1313
- "overfit": true
1314
  }
1315
  ]
1316
  }
@@ -1326,7 +1320,7 @@
1326
  "tune": {
1327
  "success": false,
1328
  "error": "optuna not installed \u2014 run: pip install optuna",
1329
- "model_name": "Logistic Regression"
1330
  }
1331
  },
1332
  "error": null
@@ -1339,171 +1333,171 @@
1339
  "data": {
1340
  "eval": {
1341
  "metrics": {
1342
- "accuracy": 0.50625,
1343
- "f1": 0.5060763549685436,
1344
- "classification_report": " precision recall f1-score support\n\n 0 0.51 0.53 0.52 80\n 1 0.51 0.49 0.50 80\n\n accuracy 0.51 160\n macro avg 0.51 0.51 0.51 160\nweighted avg 0.51 0.51 0.51 160\n",
1345
- "roc_auc": 0.51390625,
1346
  "y_prob": [
1347
- 0.4685971934401446,
1348
- 0.5880659890984261,
1349
- 0.5304788776901366,
1350
- 0.43957565151138467,
1351
- 0.5038463626781632,
1352
- 0.4652273077567498,
1353
- 0.5847100993502347,
1354
- 0.5104616697292005,
1355
- 0.47131920968357843,
1356
- 0.43799587982497334,
1357
- 0.4960189710268829,
1358
- 0.4351952524180606,
1359
- 0.47823516872507077,
1360
- 0.5225202731035682,
1361
- 0.5049934839754789,
1362
- 0.46348813612071,
1363
- 0.4913152737170698,
1364
- 0.4943560044658895,
1365
- 0.4483125083684001,
1366
- 0.4192829767113759,
1367
- 0.4837760800877137,
1368
- 0.5138495328227541,
1369
- 0.46566274171564515,
1370
- 0.46397203681287,
1371
- 0.42296715645822186,
1372
- 0.5057825082702295,
1373
- 0.48229039510773697,
1374
- 0.5515528516381715,
1375
- 0.4130279404158634,
1376
- 0.5110102562826957,
1377
- 0.4378385985609114,
1378
- 0.5049709338272942,
1379
- 0.5491177241154352,
1380
- 0.4734278641779235,
1381
- 0.4903180760277518,
1382
- 0.5024153385948759,
1383
- 0.4550174125976565,
1384
- 0.47408944756592075,
1385
- 0.4811344974522715,
1386
- 0.513304425049202,
1387
- 0.5579353223774154,
1388
- 0.5210717342560043,
1389
- 0.5907744656741766,
1390
- 0.46344911708752645,
1391
- 0.43117875296291236,
1392
- 0.533688758919175,
1393
- 0.5290497502374126,
1394
- 0.48607641955131375,
1395
- 0.532881233702402,
1396
- 0.5341292461861704,
1397
- 0.5047550838230295,
1398
- 0.45629185178585485,
1399
- 0.5723741095853611,
1400
- 0.5176362649318781,
1401
- 0.576781843287271,
1402
- 0.4565587333091604,
1403
- 0.43425250970963175,
1404
- 0.44864941542194103,
1405
- 0.4814982228717,
1406
- 0.5771946399699652,
1407
- 0.5542313298335242,
1408
- 0.5148651702981335,
1409
- 0.578326993627849,
1410
- 0.46219547798053273,
1411
- 0.4662127930424256,
1412
- 0.5167269442125124,
1413
- 0.4545946129157852,
1414
- 0.47285099433062533,
1415
- 0.475602443239552,
1416
- 0.5318046552682085,
1417
- 0.4916093412756572,
1418
- 0.4680847205841181,
1419
- 0.5300286938315274,
1420
- 0.47419383091533374,
1421
- 0.4432574256271499,
1422
- 0.5467691188026214,
1423
- 0.4942200595017993,
1424
- 0.491558726159705,
1425
- 0.5551736674330323,
1426
- 0.5360340244003253,
1427
- 0.4663308126772837,
1428
- 0.5341545691299238,
1429
- 0.544787947349383,
1430
- 0.5339643040098896,
1431
- 0.5635104012484852,
1432
- 0.42164008428541117,
1433
- 0.5272913797609838,
1434
- 0.49149085962047573,
1435
- 0.4309241928600356,
1436
- 0.5725259889932812,
1437
- 0.45877791338922436,
1438
- 0.5049566058663426,
1439
- 0.5467499732880384,
1440
- 0.4886064484052085,
1441
- 0.4761842009707287,
1442
- 0.5171460006825551,
1443
- 0.5652706369027144,
1444
- 0.5519353659368222,
1445
- 0.48829304075863755,
1446
- 0.49952031294625754,
1447
- 0.5696952526061775,
1448
- 0.39907090644230236,
1449
- 0.5300417514346564,
1450
- 0.5201368866376818,
1451
- 0.4335442294896271,
1452
- 0.441018930438655,
1453
- 0.5727977177493875,
1454
- 0.5055094496565438,
1455
- 0.5485429940005087,
1456
- 0.464300189927614,
1457
- 0.46528676320969975,
1458
- 0.4765960586458909,
1459
- 0.5013951407428184,
1460
- 0.4758746133195292,
1461
- 0.574369326643904,
1462
- 0.5448327519185213,
1463
- 0.5313577820269861,
1464
- 0.5796545232541171,
1465
- 0.5408139880303958,
1466
- 0.5758274204341941,
1467
- 0.4433770335078927,
1468
- 0.48199862880120214,
1469
- 0.4681855101348399,
1470
- 0.5105250331390581,
1471
- 0.484520120241961,
1472
- 0.45987999738308466,
1473
- 0.4812982046752543,
1474
- 0.4651369537670347,
1475
- 0.4515971139618936,
1476
- 0.5239026968610354,
1477
- 0.5183456388828395,
1478
- 0.4887243768576431,
1479
- 0.4846139091445182,
1480
- 0.480984884262246,
1481
- 0.5113218050872491,
1482
- 0.42950526584467874,
1483
- 0.4980224655396546,
1484
- 0.4853621308117529,
1485
- 0.5006213435479155,
1486
- 0.4464885510310222,
1487
- 0.5825216599017382,
1488
- 0.509750144605609,
1489
- 0.5611443029239761,
1490
- 0.48586792077872754,
1491
- 0.5551391707724177,
1492
- 0.48437951798583123,
1493
- 0.4506087491103293,
1494
- 0.49053821992973634,
1495
- 0.4485156855641885,
1496
- 0.5272347909195831,
1497
- 0.4978957759625092,
1498
- 0.4865222606967821,
1499
- 0.5365014960658203,
1500
- 0.46890723544338464,
1501
- 0.5615897997035765,
1502
- 0.553860453714982,
1503
- 0.5373593965986853,
1504
- 0.52350272912908,
1505
- 0.5543178631028236,
1506
- 0.5610670925103559
1507
  ]
1508
  },
1509
  "plot_paths": {
@@ -1698,10 +1692,10 @@
1698
  ],
1699
  "n_classes": 2,
1700
  "class_distribution": {
1701
- "0": 401,
1702
- "1": 399
1703
  },
1704
- "imbalance_ratio": 1.01
1705
  },
1706
  "quality_flags": [],
1707
  "recommendations": [
@@ -1752,7 +1746,8 @@
1752
  "Categorical columns (2): mode imputation + one-hot encoding.",
1753
  "Target encoded with LabelEncoder. Classes: ['0', '1']",
1754
  "Train/test split: 640 train rows, 160 test rows (20% test).",
1755
- "Class imbalance ratio (majority/minority): 1.01.",
 
1756
  "Final feature matrix: 10 features."
1757
  ],
1758
  "num_cols": [
@@ -1768,19 +1763,19 @@
1768
  ],
1769
  "n_classes": 2,
1770
  "log_transformed_cols": [],
1771
- "smote_applied": false,
1772
- "smote_log": ""
1773
  },
1774
  "train": {
1775
- "best_name": "Logistic Regression",
1776
  "best_metrics": {
1777
- "accuracy": 0.50625,
1778
- "f1": 0.5060763549685436,
1779
- "roc_auc": 0.51390625,
1780
- "train_time_s": 0.003,
1781
- "train_score": 0.5505620172071993,
1782
- "test_score": 0.51390625,
1783
- "generalization_gap": 0.03665576720719932,
1784
  "overfit": false
1785
  },
1786
  "metric_name": "roc_auc",
@@ -1788,296 +1783,292 @@
1788
  "comparison_df": [
1789
  {
1790
  "Model": "Logistic Regression",
1791
- "Train Score": 0.5506,
1792
- "Test Score": 0.5139,
1793
- "Gap": 0.0367,
1794
- "CV Mean": 0.5025,
1795
- "CV Std": 0.0343,
1796
- "CV Train Mean": 0.5568,
1797
  "CV Overfit": "No",
1798
  "Overfit": "No",
1799
- "Train Time(s)": 0.0
1800
  },
1801
  {
1802
  "Model": "Random Forest",
1803
- "Train Score": 0.8889,
1804
- "Test Score": 0.5131,
1805
- "Gap": 0.3758,
1806
- "CV Mean": 0.4587,
1807
- "CV Std": 0.023,
1808
- "CV Train Mean": 0.9169,
1809
- "CV Overfit": "Yes",
1810
- "Overfit": "Yes",
1811
- "Train Time(s)": 0.13
1812
  }
1813
  ],
1814
  "feature_importances": {
1815
- "embarked_C": 0.19427991337835848,
1816
- "sex_female": 0.1795206881097836,
1817
- "sex_male": 0.17217897553699751,
1818
- "embarked_Q": 0.16121663300959552,
1819
- "pclass": 0.10709936251807319,
1820
- "sibsp": 0.06916449512778146,
1821
- "embarked_S": 0.04040499294154858,
1822
- "age": 0.032805348276750294,
1823
- "parch": 0.02489251272846473,
1824
- "fare": 0.01843707837264646
1825
  },
1826
  "training_log": [
1827
  "Training 2 models for classification task.",
1828
  " Parameter overrides applied for: LightGBM, Random Forest, XGBoost",
1829
  " Training Logistic Regression...",
1830
- " Logistic Regression: acc=0.506, f1=0.506, auc=0.514 [0.00s]",
1831
  " Training Random Forest...",
1832
- " Random Forest: acc=0.531, f1=0.531, auc=0.513 [0.13s]",
1833
- "\nBest model: Logistic Regression (roc_auc=0.5139)",
1834
- "Overfitting warnings: Random Forest is overfitting \u2014 train ROC-AUC 0.8889 vs test ROC-AUC 0.5131, gap 0.3758; Random Forest shows consistent overfitting across CV folds \u2014 CV train mean 0.9169 vs CV test mean 0.4587",
1835
- "5-fold cross-validation results: best model Logistic Regression achieved CV mean 0.5025 \u00b1 0.0343 vs single test score 0.5139"
1836
- ],
1837
- "overfitting_warnings": [
1838
- "Random Forest is overfitting \u2014 train ROC-AUC 0.8889 vs test ROC-AUC 0.5131, gap 0.3758",
1839
- "Random Forest shows consistent overfitting across CV folds \u2014 CV train mean 0.9169 vs CV test mean 0.4587"
1840
  ],
1841
- "cv_summary": "5-fold cross-validation results: best model Logistic Regression achieved CV mean 0.5025 \u00b1 0.0343 vs single test score 0.5139",
 
1842
  "cv_folds_used": 5,
1843
  "results": [
1844
  {
1845
  "name": "Logistic Regression",
1846
  "metrics": {
1847
- "accuracy": 0.50625,
1848
- "f1": 0.5060763549685436,
1849
- "roc_auc": 0.51390625,
1850
- "train_time_s": 0.003,
1851
- "train_score": 0.5505620172071993,
1852
- "test_score": 0.51390625,
1853
- "generalization_gap": 0.03665576720719932,
1854
  "overfit": false
1855
  },
1856
- "train_score": 0.5505620172071993,
1857
- "generalization_gap": 0.03665576720719932,
1858
  "overfit": false,
1859
  "cv_scores": [
1860
- 0.5584859584859584,
1861
- 0.467529296875,
1862
- 0.525634765625,
1863
- 0.48388671875,
1864
- 0.476806640625
1865
  ],
1866
- "cv_mean": 0.5024686760721917,
1867
- "cv_std": 0.034348778883955274,
1868
  "cv_train_scores": [
1869
- 0.541595458984375,
1870
- 0.5711451895933471,
1871
- 0.5545586327916381,
1872
- 0.5557030594338903,
1873
- 0.5610894941634241
1874
  ],
1875
- "cv_train_mean": 0.5568183669933349,
1876
  "cv_overfit": false
1877
  },
1878
  {
1879
  "name": "Random Forest",
1880
  "metrics": {
1881
- "accuracy": 0.53125,
1882
- "f1": 0.5312316887378413,
1883
- "roc_auc": 0.513125,
1884
- "train_time_s": 0.133,
1885
- "train_score": 0.8889442279709763,
1886
- "test_score": 0.513125,
1887
- "generalization_gap": 0.3758192279709762,
1888
- "overfit": true
1889
  },
1890
- "train_score": 0.8889442279709763,
1891
- "generalization_gap": 0.3758192279709762,
1892
- "overfit": true,
1893
  "cv_scores": [
1894
- 0.43956043956043955,
1895
- 0.447998046875,
1896
- 0.50390625,
1897
- 0.451416015625,
1898
- 0.45068359375
1899
  ],
1900
- "cv_mean": 0.458712869162088,
1901
- "cv_std": 0.022985986961779142,
1902
  "cv_train_scores": [
1903
- 0.9126129150390625,
1904
- 0.9176317998016327,
1905
- 0.9266803997863737,
1906
- 0.9033646143282216,
1907
- 0.9239642938887618
1908
  ],
1909
- "cv_train_mean": 0.9168508045688103,
1910
- "cv_overfit": true
1911
  }
1912
  ]
1913
  },
1914
  "eval": {
1915
  "metrics": {
1916
- "accuracy": 0.50625,
1917
- "f1": 0.5060763549685436,
1918
- "classification_report": " precision recall f1-score support\n\n 0 0.51 0.53 0.52 80\n 1 0.51 0.49 0.50 80\n\n accuracy 0.51 160\n macro avg 0.51 0.51 0.51 160\nweighted avg 0.51 0.51 0.51 160\n",
1919
- "roc_auc": 0.51390625,
1920
  "y_prob": [
1921
- 0.4685971934401446,
1922
- 0.5880659890984261,
1923
- 0.5304788776901366,
1924
- 0.43957565151138467,
1925
- 0.5038463626781632,
1926
- 0.4652273077567498,
1927
- 0.5847100993502347,
1928
- 0.5104616697292005,
1929
- 0.47131920968357843,
1930
- 0.43799587982497334,
1931
- 0.4960189710268829,
1932
- 0.4351952524180606,
1933
- 0.47823516872507077,
1934
- 0.5225202731035682,
1935
- 0.5049934839754789,
1936
- 0.46348813612071,
1937
- 0.4913152737170698,
1938
- 0.4943560044658895,
1939
- 0.4483125083684001,
1940
- 0.4192829767113759,
1941
- 0.4837760800877137,
1942
- 0.5138495328227541,
1943
- 0.46566274171564515,
1944
- 0.46397203681287,
1945
- 0.42296715645822186,
1946
- 0.5057825082702295,
1947
- 0.48229039510773697,
1948
- 0.5515528516381715,
1949
- 0.4130279404158634,
1950
- 0.5110102562826957,
1951
- 0.4378385985609114,
1952
- 0.5049709338272942,
1953
- 0.5491177241154352,
1954
- 0.4734278641779235,
1955
- 0.4903180760277518,
1956
- 0.5024153385948759,
1957
- 0.4550174125976565,
1958
- 0.47408944756592075,
1959
- 0.4811344974522715,
1960
- 0.513304425049202,
1961
- 0.5579353223774154,
1962
- 0.5210717342560043,
1963
- 0.5907744656741766,
1964
- 0.46344911708752645,
1965
- 0.43117875296291236,
1966
- 0.533688758919175,
1967
- 0.5290497502374126,
1968
- 0.48607641955131375,
1969
- 0.532881233702402,
1970
- 0.5341292461861704,
1971
- 0.5047550838230295,
1972
- 0.45629185178585485,
1973
- 0.5723741095853611,
1974
- 0.5176362649318781,
1975
- 0.576781843287271,
1976
- 0.4565587333091604,
1977
- 0.43425250970963175,
1978
- 0.44864941542194103,
1979
- 0.4814982228717,
1980
- 0.5771946399699652,
1981
- 0.5542313298335242,
1982
- 0.5148651702981335,
1983
- 0.578326993627849,
1984
- 0.46219547798053273,
1985
- 0.4662127930424256,
1986
- 0.5167269442125124,
1987
- 0.4545946129157852,
1988
- 0.47285099433062533,
1989
- 0.475602443239552,
1990
- 0.5318046552682085,
1991
- 0.4916093412756572,
1992
- 0.4680847205841181,
1993
- 0.5300286938315274,
1994
- 0.47419383091533374,
1995
- 0.4432574256271499,
1996
- 0.5467691188026214,
1997
- 0.4942200595017993,
1998
- 0.491558726159705,
1999
- 0.5551736674330323,
2000
- 0.5360340244003253,
2001
- 0.4663308126772837,
2002
- 0.5341545691299238,
2003
- 0.544787947349383,
2004
- 0.5339643040098896,
2005
- 0.5635104012484852,
2006
- 0.42164008428541117,
2007
- 0.5272913797609838,
2008
- 0.49149085962047573,
2009
- 0.4309241928600356,
2010
- 0.5725259889932812,
2011
- 0.45877791338922436,
2012
- 0.5049566058663426,
2013
- 0.5467499732880384,
2014
- 0.4886064484052085,
2015
- 0.4761842009707287,
2016
- 0.5171460006825551,
2017
- 0.5652706369027144,
2018
- 0.5519353659368222,
2019
- 0.48829304075863755,
2020
- 0.49952031294625754,
2021
- 0.5696952526061775,
2022
- 0.39907090644230236,
2023
- 0.5300417514346564,
2024
- 0.5201368866376818,
2025
- 0.4335442294896271,
2026
- 0.441018930438655,
2027
- 0.5727977177493875,
2028
- 0.5055094496565438,
2029
- 0.5485429940005087,
2030
- 0.464300189927614,
2031
- 0.46528676320969975,
2032
- 0.4765960586458909,
2033
- 0.5013951407428184,
2034
- 0.4758746133195292,
2035
- 0.574369326643904,
2036
- 0.5448327519185213,
2037
- 0.5313577820269861,
2038
- 0.5796545232541171,
2039
- 0.5408139880303958,
2040
- 0.5758274204341941,
2041
- 0.4433770335078927,
2042
- 0.48199862880120214,
2043
- 0.4681855101348399,
2044
- 0.5105250331390581,
2045
- 0.484520120241961,
2046
- 0.45987999738308466,
2047
- 0.4812982046752543,
2048
- 0.4651369537670347,
2049
- 0.4515971139618936,
2050
- 0.5239026968610354,
2051
- 0.5183456388828395,
2052
- 0.4887243768576431,
2053
- 0.4846139091445182,
2054
- 0.480984884262246,
2055
- 0.5113218050872491,
2056
- 0.42950526584467874,
2057
- 0.4980224655396546,
2058
- 0.4853621308117529,
2059
- 0.5006213435479155,
2060
- 0.4464885510310222,
2061
- 0.5825216599017382,
2062
- 0.509750144605609,
2063
- 0.5611443029239761,
2064
- 0.48586792077872754,
2065
- 0.5551391707724177,
2066
- 0.48437951798583123,
2067
- 0.4506087491103293,
2068
- 0.49053821992973634,
2069
- 0.4485156855641885,
2070
- 0.5272347909195831,
2071
- 0.4978957759625092,
2072
- 0.4865222606967821,
2073
- 0.5365014960658203,
2074
- 0.46890723544338464,
2075
- 0.5615897997035765,
2076
- 0.553860453714982,
2077
- 0.5373593965986853,
2078
- 0.52350272912908,
2079
- 0.5543178631028236,
2080
- 0.5610670925103559
2081
  ]
2082
  },
2083
  "plot_paths": {
@@ -2098,54 +2089,54 @@
2098
  },
2099
  "target_col": "survived",
2100
  "task_type": "classification",
2101
- "best_model_name": "Logistic Regression",
2102
  "best_metrics": {
2103
- "accuracy": 0.50625,
2104
- "f1": 0.5060763549685436,
2105
- "roc_auc": 0.51390625,
2106
- "train_time_s": 0.003,
2107
- "train_score": 0.5505620172071993,
2108
- "test_score": 0.51390625,
2109
- "generalization_gap": 0.03665576720719932,
2110
  "overfit": false
2111
  },
2112
  "comparison_df": [
2113
  {
2114
  "Model": "Logistic Regression",
2115
- "Train Score": 0.5506,
2116
- "Test Score": 0.5139,
2117
- "Gap": 0.0367,
2118
- "CV Mean": 0.5025,
2119
- "CV Std": 0.0343,
2120
- "CV Train Mean": 0.5568,
2121
  "CV Overfit": "No",
2122
  "Overfit": "No",
2123
- "Train Time(s)": 0.0
2124
  },
2125
  {
2126
  "Model": "Random Forest",
2127
- "Train Score": 0.8889,
2128
- "Test Score": 0.5131,
2129
- "Gap": 0.3758,
2130
- "CV Mean": 0.4587,
2131
- "CV Std": 0.023,
2132
- "CV Train Mean": 0.9169,
2133
- "CV Overfit": "Yes",
2134
- "Overfit": "Yes",
2135
- "Train Time(s)": 0.13
2136
  }
2137
  ],
2138
  "feature_importances": {
2139
- "embarked_C": 0.19427991337835848,
2140
- "sex_female": 0.1795206881097836,
2141
- "sex_male": 0.17217897553699751,
2142
- "embarked_Q": 0.16121663300959552,
2143
- "pclass": 0.10709936251807319,
2144
- "sibsp": 0.06916449512778146,
2145
- "embarked_S": 0.04040499294154858,
2146
- "age": 0.032805348276750294,
2147
- "parch": 0.02489251272846473,
2148
- "fare": 0.01843707837264646
2149
  },
2150
  "plot_paths": {
2151
  "confusion_matrix": "outputs/titanic_confusion_matrix.png",
@@ -2153,171 +2144,171 @@
2153
  "feature_importance": "outputs/titanic_feature_importance.png"
2154
  },
2155
  "metrics": {
2156
- "accuracy": 0.50625,
2157
- "f1": 0.5060763549685436,
2158
- "classification_report": " precision recall f1-score support\n\n 0 0.51 0.53 0.52 80\n 1 0.51 0.49 0.50 80\n\n accuracy 0.51 160\n macro avg 0.51 0.51 0.51 160\nweighted avg 0.51 0.51 0.51 160\n",
2159
- "roc_auc": 0.51390625,
2160
  "y_prob": [
2161
- 0.4685971934401446,
2162
- 0.5880659890984261,
2163
- 0.5304788776901366,
2164
- 0.43957565151138467,
2165
- 0.5038463626781632,
2166
- 0.4652273077567498,
2167
- 0.5847100993502347,
2168
- 0.5104616697292005,
2169
- 0.47131920968357843,
2170
- 0.43799587982497334,
2171
- 0.4960189710268829,
2172
- 0.4351952524180606,
2173
- 0.47823516872507077,
2174
- 0.5225202731035682,
2175
- 0.5049934839754789,
2176
- 0.46348813612071,
2177
- 0.4913152737170698,
2178
- 0.4943560044658895,
2179
- 0.4483125083684001,
2180
- 0.4192829767113759,
2181
- 0.4837760800877137,
2182
- 0.5138495328227541,
2183
- 0.46566274171564515,
2184
- 0.46397203681287,
2185
- 0.42296715645822186,
2186
- 0.5057825082702295,
2187
- 0.48229039510773697,
2188
- 0.5515528516381715,
2189
- 0.4130279404158634,
2190
- 0.5110102562826957,
2191
- 0.4378385985609114,
2192
- 0.5049709338272942,
2193
- 0.5491177241154352,
2194
- 0.4734278641779235,
2195
- 0.4903180760277518,
2196
- 0.5024153385948759,
2197
- 0.4550174125976565,
2198
- 0.47408944756592075,
2199
- 0.4811344974522715,
2200
- 0.513304425049202,
2201
- 0.5579353223774154,
2202
- 0.5210717342560043,
2203
- 0.5907744656741766,
2204
- 0.46344911708752645,
2205
- 0.43117875296291236,
2206
- 0.533688758919175,
2207
- 0.5290497502374126,
2208
- 0.48607641955131375,
2209
- 0.532881233702402,
2210
- 0.5341292461861704,
2211
- 0.5047550838230295,
2212
- 0.45629185178585485,
2213
- 0.5723741095853611,
2214
- 0.5176362649318781,
2215
- 0.576781843287271,
2216
- 0.4565587333091604,
2217
- 0.43425250970963175,
2218
- 0.44864941542194103,
2219
- 0.4814982228717,
2220
- 0.5771946399699652,
2221
- 0.5542313298335242,
2222
- 0.5148651702981335,
2223
- 0.578326993627849,
2224
- 0.46219547798053273,
2225
- 0.4662127930424256,
2226
- 0.5167269442125124,
2227
- 0.4545946129157852,
2228
- 0.47285099433062533,
2229
- 0.475602443239552,
2230
- 0.5318046552682085,
2231
- 0.4916093412756572,
2232
- 0.4680847205841181,
2233
- 0.5300286938315274,
2234
- 0.47419383091533374,
2235
- 0.4432574256271499,
2236
- 0.5467691188026214,
2237
- 0.4942200595017993,
2238
- 0.491558726159705,
2239
- 0.5551736674330323,
2240
- 0.5360340244003253,
2241
- 0.4663308126772837,
2242
- 0.5341545691299238,
2243
- 0.544787947349383,
2244
- 0.5339643040098896,
2245
- 0.5635104012484852,
2246
- 0.42164008428541117,
2247
- 0.5272913797609838,
2248
- 0.49149085962047573,
2249
- 0.4309241928600356,
2250
- 0.5725259889932812,
2251
- 0.45877791338922436,
2252
- 0.5049566058663426,
2253
- 0.5467499732880384,
2254
- 0.4886064484052085,
2255
- 0.4761842009707287,
2256
- 0.5171460006825551,
2257
- 0.5652706369027144,
2258
- 0.5519353659368222,
2259
- 0.48829304075863755,
2260
- 0.49952031294625754,
2261
- 0.5696952526061775,
2262
- 0.39907090644230236,
2263
- 0.5300417514346564,
2264
- 0.5201368866376818,
2265
- 0.4335442294896271,
2266
- 0.441018930438655,
2267
- 0.5727977177493875,
2268
- 0.5055094496565438,
2269
- 0.5485429940005087,
2270
- 0.464300189927614,
2271
- 0.46528676320969975,
2272
- 0.4765960586458909,
2273
- 0.5013951407428184,
2274
- 0.4758746133195292,
2275
- 0.574369326643904,
2276
- 0.5448327519185213,
2277
- 0.5313577820269861,
2278
- 0.5796545232541171,
2279
- 0.5408139880303958,
2280
- 0.5758274204341941,
2281
- 0.4433770335078927,
2282
- 0.48199862880120214,
2283
- 0.4681855101348399,
2284
- 0.5105250331390581,
2285
- 0.484520120241961,
2286
- 0.45987999738308466,
2287
- 0.4812982046752543,
2288
- 0.4651369537670347,
2289
- 0.4515971139618936,
2290
- 0.5239026968610354,
2291
- 0.5183456388828395,
2292
- 0.4887243768576431,
2293
- 0.4846139091445182,
2294
- 0.480984884262246,
2295
- 0.5113218050872491,
2296
- 0.42950526584467874,
2297
- 0.4980224655396546,
2298
- 0.4853621308117529,
2299
- 0.5006213435479155,
2300
- 0.4464885510310222,
2301
- 0.5825216599017382,
2302
- 0.509750144605609,
2303
- 0.5611443029239761,
2304
- 0.48586792077872754,
2305
- 0.5551391707724177,
2306
- 0.48437951798583123,
2307
- 0.4506087491103293,
2308
- 0.49053821992973634,
2309
- 0.4485156855641885,
2310
- 0.5272347909195831,
2311
- 0.4978957759625092,
2312
- 0.4865222606967821,
2313
- 0.5365014960658203,
2314
- 0.46890723544338464,
2315
- 0.5615897997035765,
2316
- 0.553860453714982,
2317
- 0.5373593965986853,
2318
- 0.52350272912908,
2319
- 0.5543178631028236,
2320
- 0.5610670925103559
2321
  ]
2322
  },
2323
  "tune": {
@@ -2366,8 +2357,8 @@
2366
  "is_large": false,
2367
  "is_wide": false,
2368
  "is_binary": true,
2369
- "imbalance_ratio": 1.006269592476489,
2370
- "smote_applied": false
2371
  }
2372
  }
2373
  },
 
170
  ],
171
  "n_classes": 2,
172
  "class_distribution": {
173
+ "1": 535,
174
+ "0": 265
175
  },
176
+ "imbalance_ratio": 2.02
177
  },
178
  "quality_flags": [],
179
  "recommendations": [
 
224
  "Categorical columns (2): mode imputation + one-hot encoding.",
225
  "Target encoded with LabelEncoder. Classes: ['0', '1']",
226
  "Train/test split: 640 train rows, 160 test rows (20% test).",
227
+ "Class imbalance ratio (majority/minority): 2.02.",
228
+ "Applied SMOTE (imbalance ratio was 2.02). New class distribution: class 0: 428, class 1: 428.",
229
  "Final feature matrix: 10 features."
230
  ],
231
  "num_cols": [
 
241
  ],
242
  "n_classes": 2,
243
  "log_transformed_cols": [],
244
+ "smote_applied": true,
245
+ "smote_log": "Applied SMOTE (imbalance ratio was 2.02). New class distribution: class 0: 428, class 1: 428."
246
  },
247
  "train": {
248
+ "best_name": "Random Forest",
249
  "best_metrics": {
250
+ "accuracy": 0.8,
251
+ "f1": 0.8017316017316019,
252
+ "roc_auc": 0.8416505025568682,
253
+ "train_time_s": 0.378,
254
+ "train_score": 0.926019739715259,
255
+ "test_score": 0.8416505025568682,
256
+ "generalization_gap": 0.08436923715839073,
257
  "overfit": false
258
  },
259
  "metric_name": "roc_auc",
 
261
  "comparison_df": [
262
  {
263
  "Model": "Logistic Regression",
264
+ "Train Score": 0.8272,
265
+ "Test Score": 0.8462,
266
+ "Gap": -0.019,
267
+ "CV Mean": 0.8203,
268
+ "CV Std": 0.0317,
269
+ "CV Train Mean": 0.8271,
270
  "CV Overfit": "No",
271
  "Overfit": "No",
272
+ "Train Time(s)": 0.01
273
  },
274
  {
275
  "Model": "Random Forest",
276
+ "Train Score": 0.926,
277
+ "Test Score": 0.8417,
278
+ "Gap": 0.0844,
279
+ "CV Mean": 0.872,
280
+ "CV Std": 0.0247,
281
+ "CV Train Mean": 0.9317,
282
+ "CV Overfit": "No",
283
+ "Overfit": "No",
284
+ "Train Time(s)": 0.38
285
  }
286
  ],
287
  "feature_importances": {
288
+ "sex_male": 0.28592870600873493,
289
+ "sex_female": 0.2421809277670601,
290
+ "pclass": 0.20077516929328645,
291
+ "fare": 0.06675033992695845,
292
+ "age": 0.06080316414257179,
293
+ "sibsp": 0.05905243894256141,
294
+ "parch": 0.04148880321022331,
295
+ "embarked_S": 0.016750117296253544,
296
+ "embarked_Q": 0.01568458564080758,
297
+ "embarked_C": 0.010585747771542543
298
  },
299
  "training_log": [
300
  "Training 2 models for classification task.",
301
  " Parameter overrides applied for: LightGBM, Random Forest, XGBoost",
302
  " Training Logistic Regression...",
303
+ " Logistic Regression: acc=0.775, f1=0.779, auc=0.846 [0.01s]",
304
  " Training Random Forest...",
305
+ " Random Forest: acc=0.800, f1=0.802, auc=0.842 [0.38s]",
306
+ "\nBest model: Random Forest (roc_auc=0.8417)",
307
+ "5-fold cross-validation results: best model Random Forest achieved CV mean 0.8720 \u00b1 0.0247 vs single test score 0.8417"
 
 
 
 
 
308
  ],
309
+ "overfitting_warnings": [],
310
+ "cv_summary": "5-fold cross-validation results: best model Random Forest achieved CV mean 0.8720 \u00b1 0.0247 vs single test score 0.8417",
311
  "cv_folds_used": 5,
312
  "results": [
313
  {
314
  "name": "Logistic Regression",
315
  "metrics": {
316
+ "accuracy": 0.775,
317
+ "f1": 0.7791835699797159,
318
+ "roc_auc": 0.8462352318815024,
319
+ "train_time_s": 0.015,
320
+ "train_score": 0.827190147611145,
321
+ "test_score": 0.8462352318815024,
322
+ "generalization_gap": -0.01904508427035745,
323
  "overfit": false
324
  },
325
+ "train_score": 0.827190147611145,
326
+ "generalization_gap": -0.01904508427035745,
327
  "overfit": false,
328
  "cv_scores": [
329
+ 0.8080043266630612,
330
+ 0.7863201094391246,
331
+ 0.7939808481532147,
332
+ 0.8422708618331054,
333
+ 0.8707250341997264
334
  ],
335
+ "cv_mean": 0.8202602360576463,
336
+ "cv_std": 0.031698294077597756,
337
  "cv_train_scores": [
338
+ 0.8316063062138779,
339
+ 0.8339556373928018,
340
+ 0.8352002455117385,
341
+ 0.8209128262833956,
342
+ 0.8139907592109525
343
  ],
344
+ "cv_train_mean": 0.8271331549225532,
345
  "cv_overfit": false
346
  },
347
  {
348
  "name": "Random Forest",
349
  "metrics": {
350
+ "accuracy": 0.8,
351
+ "f1": 0.8017316017316019,
352
+ "roc_auc": 0.8416505025568682,
353
+ "train_time_s": 0.378,
354
+ "train_score": 0.926019739715259,
355
+ "test_score": 0.8416505025568682,
356
+ "generalization_gap": 0.08436923715839073,
357
+ "overfit": false
358
  },
359
+ "train_score": 0.926019739715259,
360
+ "generalization_gap": 0.08436923715839073,
361
+ "overfit": false,
362
  "cv_scores": [
363
+ 0.8795294753921038,
364
+ 0.840218878248974,
365
+ 0.8588235294117647,
366
+ 0.9143638850889193,
367
+ 0.8670314637482901
368
  ],
369
+ "cv_mean": 0.8719934463780105,
370
+ "cv_std": 0.024737570511909102,
371
  "cv_train_scores": [
372
+ 0.9302947915597962,
373
+ 0.9364823623685063,
374
+ 0.9327400132985526,
375
+ 0.9295432458697765,
376
+ 0.9296881659932144
377
  ],
378
+ "cv_train_mean": 0.9317497158179693,
379
+ "cv_overfit": false
380
  }
381
  ]
382
  },
383
  "eval": {
384
  "metrics": {
385
+ "accuracy": 0.8,
386
+ "f1": 0.8017316017316019,
387
+ "classification_report": " precision recall f1-score support\n\n 0 0.68 0.74 0.71 53\n 1 0.86 0.83 0.85 107\n\n accuracy 0.80 160\n macro avg 0.77 0.78 0.78 160\nweighted avg 0.80 0.80 0.80 160\n",
388
+ "roc_auc": 0.8416505025568682,
389
  "y_prob": [
390
+ 0.13410941654770053,
391
+ 0.5294946471795305,
392
+ 0.6734029805133361,
393
+ 0.48276513165091095,
394
+ 0.537887729131407,
395
+ 0.14991155807168366,
396
+ 0.6170905519960105,
397
+ 0.4784058790521681,
398
+ 0.8375867266088654,
399
+ 0.7617669897727407,
400
+ 0.1351365003484392,
401
+ 0.44334998820869886,
402
+ 0.45918298156738113,
403
+ 0.18086772747653274,
404
+ 0.7363322360597586,
405
+ 0.11657189382719757,
406
+ 0.23649619273968361,
407
+ 0.5901773420747848,
408
+ 0.134558348190491,
409
+ 0.4254133833278052,
410
+ 0.1296011320209188,
411
+ 0.7314644444555719,
412
+ 0.7853934325845645,
413
+ 0.9071377805346806,
414
+ 0.6899050304790696,
415
+ 0.7018366024600355,
416
+ 0.8197685771383121,
417
+ 0.5198317417706617,
418
+ 0.12630032800342134,
419
+ 0.6473881223230429,
420
+ 0.832648823582342,
421
+ 0.6085633474110343,
422
+ 0.8113142318198139,
423
+ 0.9074706641382,
424
+ 0.8459766165607369,
425
+ 0.8201738289640232,
426
+ 0.9003680409586162,
427
+ 0.10016718457369138,
428
+ 0.8585346197416688,
429
+ 0.47744576120787313,
430
+ 0.7118155973614765,
431
+ 0.8079398173505976,
432
+ 0.4835688710616073,
433
+ 0.15232883334919356,
434
+ 0.7564268826362109,
435
+ 0.8738909667369577,
436
+ 0.8978781232217746,
437
+ 0.8629139348922269,
438
+ 0.9210750298694106,
439
+ 0.8622476596460283,
440
+ 0.5280584146401254,
441
+ 0.43175724148640166,
442
+ 0.8612268450998052,
443
+ 0.5192093060535821,
444
+ 0.4631429141776544,
445
+ 0.41349421421165006,
446
+ 0.1842717233199732,
447
+ 0.5393021880249185,
448
+ 0.4611023953979327,
449
+ 0.8127362992442425,
450
+ 0.8939691379496485,
451
+ 0.4164787102909156,
452
+ 0.8978878589569886,
453
+ 0.7983424128761862,
454
+ 0.1173182901761418,
455
+ 0.7542514691828953,
456
+ 0.8448152300020084,
457
+ 0.7353745795727059,
458
+ 0.874243708197828,
459
+ 0.1495537468741798,
460
+ 0.4608257845629732,
461
+ 0.723666302124413,
462
+ 0.8963689192492247,
463
+ 0.5718324254113771,
464
+ 0.4597956607545386,
465
+ 0.8732235494170898,
466
+ 0.7727295226970254,
467
+ 0.730516200696798,
468
+ 0.09548329031034873,
469
+ 0.8629109388984001,
470
+ 0.46263304800868466,
471
+ 0.7151887661777987,
472
+ 0.40230852321056754,
473
+ 0.8930814394148957,
474
+ 0.8189750833489609,
475
+ 0.7948906670433182,
476
+ 0.16253862850672068,
477
+ 0.7762547028088542,
478
+ 0.8334832356172626,
479
+ 0.8004830705990988,
480
+ 0.21099149923900418,
481
+ 0.8841702218788182,
482
+ 0.8712799607086796,
483
+ 0.5813945785177388,
484
+ 0.8052927113899669,
485
+ 0.49664824783559597,
486
+ 0.43547420910148493,
487
+ 0.9093300332802987,
488
+ 0.8271809777802208,
489
+ 0.6483594924481736,
490
+ 0.9153820268559258,
491
+ 0.8316891109823481,
492
+ 0.5172451679903641,
493
+ 0.10690068213440551,
494
+ 0.14482053455947552,
495
+ 0.6514137175182347,
496
+ 0.8437537672599027,
497
+ 0.7675893615600448,
498
+ 0.7953030977589493,
499
+ 0.23188048345069834,
500
+ 0.8320099927549827,
501
+ 0.8411414013434237,
502
+ 0.49552967074078597,
503
+ 0.10795590031797207,
504
+ 0.6603705695450834,
505
+ 0.905216945402769,
506
+ 0.9017388468431307,
507
+ 0.7038598916174742,
508
+ 0.46749836573145026,
509
+ 0.10980764687631323,
510
+ 0.4241936068254756,
511
+ 0.7869237725733111,
512
+ 0.4176645200689225,
513
+ 0.41597565326122093,
514
+ 0.1840375192714269,
515
+ 0.09239177436740872,
516
+ 0.5634089283154317,
517
+ 0.8672817898245639,
518
+ 0.859787536713618,
519
+ 0.823755567069053,
520
+ 0.09332562760407467,
521
+ 0.5269654560034531,
522
+ 0.3282944545355915,
523
+ 0.12656165619337742,
524
+ 0.8996144986378454,
525
+ 0.8794629731543243,
526
+ 0.2259360459237031,
527
+ 0.8677342141160906,
528
+ 0.7065381808535922,
529
+ 0.5087308414616077,
530
+ 0.7361387452605942,
531
+ 0.9175622001046994,
532
+ 0.6768295210930064,
533
+ 0.8217479267611849,
534
+ 0.8186363655380218,
535
+ 0.7713282338574033,
536
+ 0.8650744660561319,
537
+ 0.44798639555019343,
538
+ 0.6891437990464381,
539
+ 0.8957915611217553,
540
+ 0.13099882437253405,
541
+ 0.9100589282983074,
542
+ 0.5699927016156656,
543
+ 0.8848101584325896,
544
+ 0.833215993303308,
545
+ 0.806074199994562,
546
+ 0.15238050377532514,
547
+ 0.10376629963015432,
548
+ 0.1557889773023143,
549
+ 0.11296197259042451
550
  ]
551
  },
552
  "plot_paths": {
 
567
  },
568
  "target_col": "survived",
569
  "task_type": "classification",
570
+ "best_model_name": "Random Forest",
571
  "best_metrics": {
572
+ "accuracy": 0.8,
573
+ "f1": 0.8017316017316019,
574
+ "roc_auc": 0.8416505025568682,
575
+ "train_time_s": 0.378,
576
+ "train_score": 0.926019739715259,
577
+ "test_score": 0.8416505025568682,
578
+ "generalization_gap": 0.08436923715839073,
579
  "overfit": false
580
  },
581
  "comparison_df": [
582
  {
583
  "Model": "Logistic Regression",
584
+ "Train Score": 0.8272,
585
+ "Test Score": 0.8462,
586
+ "Gap": -0.019,
587
+ "CV Mean": 0.8203,
588
+ "CV Std": 0.0317,
589
+ "CV Train Mean": 0.8271,
590
  "CV Overfit": "No",
591
  "Overfit": "No",
592
+ "Train Time(s)": 0.01
593
  },
594
  {
595
  "Model": "Random Forest",
596
+ "Train Score": 0.926,
597
+ "Test Score": 0.8417,
598
+ "Gap": 0.0844,
599
+ "CV Mean": 0.872,
600
+ "CV Std": 0.0247,
601
+ "CV Train Mean": 0.9317,
602
+ "CV Overfit": "No",
603
+ "Overfit": "No",
604
+ "Train Time(s)": 0.38
605
  }
606
  ],
607
  "feature_importances": {
608
+ "sex_male": 0.28592870600873493,
609
+ "sex_female": 0.2421809277670601,
610
+ "pclass": 0.20077516929328645,
611
+ "fare": 0.06675033992695845,
612
+ "age": 0.06080316414257179,
613
+ "sibsp": 0.05905243894256141,
614
+ "parch": 0.04148880321022331,
615
+ "embarked_S": 0.016750117296253544,
616
+ "embarked_Q": 0.01568458564080758,
617
+ "embarked_C": 0.010585747771542543
618
  },
619
  "plot_paths": {
620
  "confusion_matrix": "outputs/titanic_confusion_matrix.png",
 
622
  "feature_importance": "outputs/titanic_feature_importance.png"
623
  },
624
  "metrics": {
625
+ "accuracy": 0.8,
626
+ "f1": 0.8017316017316019,
627
+ "classification_report": " precision recall f1-score support\n\n 0 0.68 0.74 0.71 53\n 1 0.86 0.83 0.85 107\n\n accuracy 0.80 160\n macro avg 0.77 0.78 0.78 160\nweighted avg 0.80 0.80 0.80 160\n",
628
+ "roc_auc": 0.8416505025568682,
629
  "y_prob": [
630
+ 0.13410941654770053,
631
+ 0.5294946471795305,
632
+ 0.6734029805133361,
633
+ 0.48276513165091095,
634
+ 0.537887729131407,
635
+ 0.14991155807168366,
636
+ 0.6170905519960105,
637
+ 0.4784058790521681,
638
+ 0.8375867266088654,
639
+ 0.7617669897727407,
640
+ 0.1351365003484392,
641
+ 0.44334998820869886,
642
+ 0.45918298156738113,
643
+ 0.18086772747653274,
644
+ 0.7363322360597586,
645
+ 0.11657189382719757,
646
+ 0.23649619273968361,
647
+ 0.5901773420747848,
648
+ 0.134558348190491,
649
+ 0.4254133833278052,
650
+ 0.1296011320209188,
651
+ 0.7314644444555719,
652
+ 0.7853934325845645,
653
+ 0.9071377805346806,
654
+ 0.6899050304790696,
655
+ 0.7018366024600355,
656
+ 0.8197685771383121,
657
+ 0.5198317417706617,
658
+ 0.12630032800342134,
659
+ 0.6473881223230429,
660
+ 0.832648823582342,
661
+ 0.6085633474110343,
662
+ 0.8113142318198139,
663
+ 0.9074706641382,
664
+ 0.8459766165607369,
665
+ 0.8201738289640232,
666
+ 0.9003680409586162,
667
+ 0.10016718457369138,
668
+ 0.8585346197416688,
669
+ 0.47744576120787313,
670
+ 0.7118155973614765,
671
+ 0.8079398173505976,
672
+ 0.4835688710616073,
673
+ 0.15232883334919356,
674
+ 0.7564268826362109,
675
+ 0.8738909667369577,
676
+ 0.8978781232217746,
677
+ 0.8629139348922269,
678
+ 0.9210750298694106,
679
+ 0.8622476596460283,
680
+ 0.5280584146401254,
681
+ 0.43175724148640166,
682
+ 0.8612268450998052,
683
+ 0.5192093060535821,
684
+ 0.4631429141776544,
685
+ 0.41349421421165006,
686
+ 0.1842717233199732,
687
+ 0.5393021880249185,
688
+ 0.4611023953979327,
689
+ 0.8127362992442425,
690
+ 0.8939691379496485,
691
+ 0.4164787102909156,
692
+ 0.8978878589569886,
693
+ 0.7983424128761862,
694
+ 0.1173182901761418,
695
+ 0.7542514691828953,
696
+ 0.8448152300020084,
697
+ 0.7353745795727059,
698
+ 0.874243708197828,
699
+ 0.1495537468741798,
700
+ 0.4608257845629732,
701
+ 0.723666302124413,
702
+ 0.8963689192492247,
703
+ 0.5718324254113771,
704
+ 0.4597956607545386,
705
+ 0.8732235494170898,
706
+ 0.7727295226970254,
707
+ 0.730516200696798,
708
+ 0.09548329031034873,
709
+ 0.8629109388984001,
710
+ 0.46263304800868466,
711
+ 0.7151887661777987,
712
+ 0.40230852321056754,
713
+ 0.8930814394148957,
714
+ 0.8189750833489609,
715
+ 0.7948906670433182,
716
+ 0.16253862850672068,
717
+ 0.7762547028088542,
718
+ 0.8334832356172626,
719
+ 0.8004830705990988,
720
+ 0.21099149923900418,
721
+ 0.8841702218788182,
722
+ 0.8712799607086796,
723
+ 0.5813945785177388,
724
+ 0.8052927113899669,
725
+ 0.49664824783559597,
726
+ 0.43547420910148493,
727
+ 0.9093300332802987,
728
+ 0.8271809777802208,
729
+ 0.6483594924481736,
730
+ 0.9153820268559258,
731
+ 0.8316891109823481,
732
+ 0.5172451679903641,
733
+ 0.10690068213440551,
734
+ 0.14482053455947552,
735
+ 0.6514137175182347,
736
+ 0.8437537672599027,
737
+ 0.7675893615600448,
738
+ 0.7953030977589493,
739
+ 0.23188048345069834,
740
+ 0.8320099927549827,
741
+ 0.8411414013434237,
742
+ 0.49552967074078597,
743
+ 0.10795590031797207,
744
+ 0.6603705695450834,
745
+ 0.905216945402769,
746
+ 0.9017388468431307,
747
+ 0.7038598916174742,
748
+ 0.46749836573145026,
749
+ 0.10980764687631323,
750
+ 0.4241936068254756,
751
+ 0.7869237725733111,
752
+ 0.4176645200689225,
753
+ 0.41597565326122093,
754
+ 0.1840375192714269,
755
+ 0.09239177436740872,
756
+ 0.5634089283154317,
757
+ 0.8672817898245639,
758
+ 0.859787536713618,
759
+ 0.823755567069053,
760
+ 0.09332562760407467,
761
+ 0.5269654560034531,
762
+ 0.3282944545355915,
763
+ 0.12656165619337742,
764
+ 0.8996144986378454,
765
+ 0.8794629731543243,
766
+ 0.2259360459237031,
767
+ 0.8677342141160906,
768
+ 0.7065381808535922,
769
+ 0.5087308414616077,
770
+ 0.7361387452605942,
771
+ 0.9175622001046994,
772
+ 0.6768295210930064,
773
+ 0.8217479267611849,
774
+ 0.8186363655380218,
775
+ 0.7713282338574033,
776
+ 0.8650744660561319,
777
+ 0.44798639555019343,
778
+ 0.6891437990464381,
779
+ 0.8957915611217553,
780
+ 0.13099882437253405,
781
+ 0.9100589282983074,
782
+ 0.5699927016156656,
783
+ 0.8848101584325896,
784
+ 0.833215993303308,
785
+ 0.806074199994562,
786
+ 0.15238050377532514,
787
+ 0.10376629963015432,
788
+ 0.1557889773023143,
789
+ 0.11296197259042451
790
  ]
791
  },
792
  "tune": {
 
835
  "is_large": false,
836
  "is_wide": false,
837
  "is_binary": true,
838
+ "imbalance_ratio": 1.0,
839
+ "smote_applied": true
840
  }
841
  }
842
  },
 
1013
  ],
1014
  "n_classes": 2,
1015
  "class_distribution": {
1016
+ "1": 535,
1017
+ "0": 265
1018
  },
1019
+ "imbalance_ratio": 2.02
1020
  },
1021
  "quality_flags": [],
1022
  "recommendations": [
 
1085
  "Categorical columns (2): mode imputation + one-hot encoding.",
1086
  "Target encoded with LabelEncoder. Classes: ['0', '1']",
1087
  "Train/test split: 640 train rows, 160 test rows (20% test).",
1088
+ "Class imbalance ratio (majority/minority): 2.02.",
1089
+ "Applied SMOTE (imbalance ratio was 2.02). New class distribution: class 0: 428, class 1: 428.",
1090
  "Final feature matrix: 10 features."
1091
  ],
1092
  "num_cols": [
 
1102
  ],
1103
  "n_classes": 2,
1104
  "log_transformed_cols": [],
1105
+ "smote_applied": true,
1106
+ "smote_log": "Applied SMOTE (imbalance ratio was 2.02). New class distribution: class 0: 428, class 1: 428.",
1107
+ "train_size": 856,
1108
  "test_size": 160,
1109
  "final_feature_count": 10
1110
  }
 
1159
  "is_large": false,
1160
  "is_wide": false,
1161
  "is_binary": true,
1162
+ "imbalance_ratio": 1.0,
1163
+ "smote_applied": true
1164
  }
1165
  }
1166
  },
 
1173
  "status": "done",
1174
  "data": {
1175
  "train": {
1176
+ "best_name": "Random Forest",
1177
  "best_metrics": {
1178
+ "accuracy": 0.8,
1179
+ "f1": 0.8017316017316019,
1180
+ "roc_auc": 0.8416505025568682,
1181
+ "train_time_s": 0.378,
1182
+ "train_score": 0.926019739715259,
1183
+ "test_score": 0.8416505025568682,
1184
+ "generalization_gap": 0.08436923715839073,
1185
  "overfit": false
1186
  },
1187
  "metric_name": "roc_auc",
 
1189
  "comparison_df": [
1190
  {
1191
  "Model": "Logistic Regression",
1192
+ "Train Score": 0.8272,
1193
+ "Test Score": 0.8462,
1194
+ "Gap": -0.019,
1195
+ "CV Mean": 0.8203,
1196
+ "CV Std": 0.0317,
1197
+ "CV Train Mean": 0.8271,
1198
  "CV Overfit": "No",
1199
  "Overfit": "No",
1200
+ "Train Time(s)": 0.01
1201
  },
1202
  {
1203
  "Model": "Random Forest",
1204
+ "Train Score": 0.926,
1205
+ "Test Score": 0.8417,
1206
+ "Gap": 0.0844,
1207
+ "CV Mean": 0.872,
1208
+ "CV Std": 0.0247,
1209
+ "CV Train Mean": 0.9317,
1210
+ "CV Overfit": "No",
1211
+ "Overfit": "No",
1212
+ "Train Time(s)": 0.38
1213
  }
1214
  ],
1215
  "feature_importances": {
1216
+ "sex_male": 0.28592870600873493,
1217
+ "sex_female": 0.2421809277670601,
1218
+ "pclass": 0.20077516929328645,
1219
+ "fare": 0.06675033992695845,
1220
+ "age": 0.06080316414257179,
1221
+ "sibsp": 0.05905243894256141,
1222
+ "parch": 0.04148880321022331,
1223
+ "embarked_S": 0.016750117296253544,
1224
+ "embarked_Q": 0.01568458564080758,
1225
+ "embarked_C": 0.010585747771542543
1226
  },
1227
  "training_log": [
1228
  "Training 2 models for classification task.",
1229
  " Parameter overrides applied for: LightGBM, Random Forest, XGBoost",
1230
  " Training Logistic Regression...",
1231
+ " Logistic Regression: acc=0.775, f1=0.779, auc=0.846 [0.01s]",
1232
  " Training Random Forest...",
1233
+ " Random Forest: acc=0.800, f1=0.802, auc=0.842 [0.38s]",
1234
+ "\nBest model: Random Forest (roc_auc=0.8417)",
1235
+ "5-fold cross-validation results: best model Random Forest achieved CV mean 0.8720 \u00b1 0.0247 vs single test score 0.8417"
 
 
 
 
 
1236
  ],
1237
+ "overfitting_warnings": [],
1238
+ "cv_summary": "5-fold cross-validation results: best model Random Forest achieved CV mean 0.8720 \u00b1 0.0247 vs single test score 0.8417",
1239
  "cv_folds_used": 5,
1240
  "results": [
1241
  {
1242
  "name": "Logistic Regression",
1243
  "metrics": {
1244
+ "accuracy": 0.775,
1245
+ "f1": 0.7791835699797159,
1246
+ "roc_auc": 0.8462352318815024,
1247
+ "train_time_s": 0.015,
1248
+ "train_score": 0.827190147611145,
1249
+ "test_score": 0.8462352318815024,
1250
+ "generalization_gap": -0.01904508427035745,
1251
  "overfit": false
1252
  },
1253
  "cv_scores": [
1254
+ 0.8080043266630612,
1255
+ 0.7863201094391246,
1256
+ 0.7939808481532147,
1257
+ 0.8422708618331054,
1258
+ 0.8707250341997264
1259
  ],
1260
+ "cv_mean": 0.8202602360576463,
1261
+ "cv_std": 0.031698294077597756,
1262
  "cv_train_scores": [
1263
+ 0.8316063062138779,
1264
+ 0.8339556373928018,
1265
+ 0.8352002455117385,
1266
+ 0.8209128262833956,
1267
+ 0.8139907592109525
1268
  ],
1269
+ "cv_train_mean": 0.8271331549225532,
1270
  "cv_overfit": false,
1271
+ "train_score": 0.827190147611145,
1272
+ "generalization_gap": -0.01904508427035745,
1273
  "overfit": false
1274
  },
1275
  {
1276
  "name": "Random Forest",
1277
  "metrics": {
1278
+ "accuracy": 0.8,
1279
+ "f1": 0.8017316017316019,
1280
+ "roc_auc": 0.8416505025568682,
1281
+ "train_time_s": 0.378,
1282
+ "train_score": 0.926019739715259,
1283
+ "test_score": 0.8416505025568682,
1284
+ "generalization_gap": 0.08436923715839073,
1285
+ "overfit": false
1286
  },
1287
  "cv_scores": [
1288
+ 0.8795294753921038,
1289
+ 0.840218878248974,
1290
+ 0.8588235294117647,
1291
+ 0.9143638850889193,
1292
+ 0.8670314637482901
1293
  ],
1294
+ "cv_mean": 0.8719934463780105,
1295
+ "cv_std": 0.024737570511909102,
1296
  "cv_train_scores": [
1297
+ 0.9302947915597962,
1298
+ 0.9364823623685063,
1299
+ 0.9327400132985526,
1300
+ 0.9295432458697765,
1301
+ 0.9296881659932144
1302
  ],
1303
+ "cv_train_mean": 0.9317497158179693,
1304
+ "cv_overfit": false,
1305
+ "train_score": 0.926019739715259,
1306
+ "generalization_gap": 0.08436923715839073,
1307
+ "overfit": false
1308
  }
1309
  ]
1310
  }
 
1320
  "tune": {
1321
  "success": false,
1322
  "error": "optuna not installed \u2014 run: pip install optuna",
1323
+ "model_name": "Random Forest"
1324
  }
1325
  },
1326
  "error": null
 
1333
  "data": {
1334
  "eval": {
1335
  "metrics": {
1336
+ "accuracy": 0.8,
1337
+ "f1": 0.8017316017316019,
1338
+ "classification_report": " precision recall f1-score support\n\n 0 0.68 0.74 0.71 53\n 1 0.86 0.83 0.85 107\n\n accuracy 0.80 160\n macro avg 0.77 0.78 0.78 160\nweighted avg 0.80 0.80 0.80 160\n",
1339
+ "roc_auc": 0.8416505025568682,
1340
  "y_prob": [
1341
+ 0.13410941654770053,
1342
+ 0.5294946471795305,
1343
+ 0.6734029805133361,
1344
+ 0.48276513165091095,
1345
+ 0.537887729131407,
1346
+ 0.14991155807168366,
1347
+ 0.6170905519960105,
1348
+ 0.4784058790521681,
1349
+ 0.8375867266088654,
1350
+ 0.7617669897727407,
1351
+ 0.1351365003484392,
1352
+ 0.44334998820869886,
1353
+ 0.45918298156738113,
1354
+ 0.18086772747653274,
1355
+ 0.7363322360597586,
1356
+ 0.11657189382719757,
1357
+ 0.23649619273968361,
1358
+ 0.5901773420747848,
1359
+ 0.134558348190491,
1360
+ 0.4254133833278052,
1361
+ 0.1296011320209188,
1362
+ 0.7314644444555719,
1363
+ 0.7853934325845645,
1364
+ 0.9071377805346806,
1365
+ 0.6899050304790696,
1366
+ 0.7018366024600355,
1367
+ 0.8197685771383121,
1368
+ 0.5198317417706617,
1369
+ 0.12630032800342134,
1370
+ 0.6473881223230429,
1371
+ 0.832648823582342,
1372
+ 0.6085633474110343,
1373
+ 0.8113142318198139,
1374
+ 0.9074706641382,
1375
+ 0.8459766165607369,
1376
+ 0.8201738289640232,
1377
+ 0.9003680409586162,
1378
+ 0.10016718457369138,
1379
+ 0.8585346197416688,
1380
+ 0.47744576120787313,
1381
+ 0.7118155973614765,
1382
+ 0.8079398173505976,
1383
+ 0.4835688710616073,
1384
+ 0.15232883334919356,
1385
+ 0.7564268826362109,
1386
+ 0.8738909667369577,
1387
+ 0.8978781232217746,
1388
+ 0.8629139348922269,
1389
+ 0.9210750298694106,
1390
+ 0.8622476596460283,
1391
+ 0.5280584146401254,
1392
+ 0.43175724148640166,
1393
+ 0.8612268450998052,
1394
+ 0.5192093060535821,
1395
+ 0.4631429141776544,
1396
+ 0.41349421421165006,
1397
+ 0.1842717233199732,
1398
+ 0.5393021880249185,
1399
+ 0.4611023953979327,
1400
+ 0.8127362992442425,
1401
+ 0.8939691379496485,
1402
+ 0.4164787102909156,
1403
+ 0.8978878589569886,
1404
+ 0.7983424128761862,
1405
+ 0.1173182901761418,
1406
+ 0.7542514691828953,
1407
+ 0.8448152300020084,
1408
+ 0.7353745795727059,
1409
+ 0.874243708197828,
1410
+ 0.1495537468741798,
1411
+ 0.4608257845629732,
1412
+ 0.723666302124413,
1413
+ 0.8963689192492247,
1414
+ 0.5718324254113771,
1415
+ 0.4597956607545386,
1416
+ 0.8732235494170898,
1417
+ 0.7727295226970254,
1418
+ 0.730516200696798,
1419
+ 0.09548329031034873,
1420
+ 0.8629109388984001,
1421
+ 0.46263304800868466,
1422
+ 0.7151887661777987,
1423
+ 0.40230852321056754,
1424
+ 0.8930814394148957,
1425
+ 0.8189750833489609,
1426
+ 0.7948906670433182,
1427
+ 0.16253862850672068,
1428
+ 0.7762547028088542,
1429
+ 0.8334832356172626,
1430
+ 0.8004830705990988,
1431
+ 0.21099149923900418,
1432
+ 0.8841702218788182,
1433
+ 0.8712799607086796,
1434
+ 0.5813945785177388,
1435
+ 0.8052927113899669,
1436
+ 0.49664824783559597,
1437
+ 0.43547420910148493,
1438
+ 0.9093300332802987,
1439
+ 0.8271809777802208,
1440
+ 0.6483594924481736,
1441
+ 0.9153820268559258,
1442
+ 0.8316891109823481,
1443
+ 0.5172451679903641,
1444
+ 0.10690068213440551,
1445
+ 0.14482053455947552,
1446
+ 0.6514137175182347,
1447
+ 0.8437537672599027,
1448
+ 0.7675893615600448,
1449
+ 0.7953030977589493,
1450
+ 0.23188048345069834,
1451
+ 0.8320099927549827,
1452
+ 0.8411414013434237,
1453
+ 0.49552967074078597,
1454
+ 0.10795590031797207,
1455
+ 0.6603705695450834,
1456
+ 0.905216945402769,
1457
+ 0.9017388468431307,
1458
+ 0.7038598916174742,
1459
+ 0.46749836573145026,
1460
+ 0.10980764687631323,
1461
+ 0.4241936068254756,
1462
+ 0.7869237725733111,
1463
+ 0.4176645200689225,
1464
+ 0.41597565326122093,
1465
+ 0.1840375192714269,
1466
+ 0.09239177436740872,
1467
+ 0.5634089283154317,
1468
+ 0.8672817898245639,
1469
+ 0.859787536713618,
1470
+ 0.823755567069053,
1471
+ 0.09332562760407467,
1472
+ 0.5269654560034531,
1473
+ 0.3282944545355915,
1474
+ 0.12656165619337742,
1475
+ 0.8996144986378454,
1476
+ 0.8794629731543243,
1477
+ 0.2259360459237031,
1478
+ 0.8677342141160906,
1479
+ 0.7065381808535922,
1480
+ 0.5087308414616077,
1481
+ 0.7361387452605942,
1482
+ 0.9175622001046994,
1483
+ 0.6768295210930064,
1484
+ 0.8217479267611849,
1485
+ 0.8186363655380218,
1486
+ 0.7713282338574033,
1487
+ 0.8650744660561319,
1488
+ 0.44798639555019343,
1489
+ 0.6891437990464381,
1490
+ 0.8957915611217553,
1491
+ 0.13099882437253405,
1492
+ 0.9100589282983074,
1493
+ 0.5699927016156656,
1494
+ 0.8848101584325896,
1495
+ 0.833215993303308,
1496
+ 0.806074199994562,
1497
+ 0.15238050377532514,
1498
+ 0.10376629963015432,
1499
+ 0.1557889773023143,
1500
+ 0.11296197259042451
1501
  ]
1502
  },
1503
  "plot_paths": {
 
1692
  ],
1693
  "n_classes": 2,
1694
  "class_distribution": {
1695
+ "1": 535,
1696
+ "0": 265
1697
  },
1698
+ "imbalance_ratio": 2.02
1699
  },
1700
  "quality_flags": [],
1701
  "recommendations": [
 
1746
  "Categorical columns (2): mode imputation + one-hot encoding.",
1747
  "Target encoded with LabelEncoder. Classes: ['0', '1']",
1748
  "Train/test split: 640 train rows, 160 test rows (20% test).",
1749
+ "Class imbalance ratio (majority/minority): 2.02.",
1750
+ "Applied SMOTE (imbalance ratio was 2.02). New class distribution: class 0: 428, class 1: 428.",
1751
  "Final feature matrix: 10 features."
1752
  ],
1753
  "num_cols": [
 
1763
  ],
1764
  "n_classes": 2,
1765
  "log_transformed_cols": [],
1766
+ "smote_applied": true,
1767
+ "smote_log": "Applied SMOTE (imbalance ratio was 2.02). New class distribution: class 0: 428, class 1: 428."
1768
  },
1769
  "train": {
1770
+ "best_name": "Random Forest",
1771
  "best_metrics": {
1772
+ "accuracy": 0.8,
1773
+ "f1": 0.8017316017316019,
1774
+ "roc_auc": 0.8416505025568682,
1775
+ "train_time_s": 0.378,
1776
+ "train_score": 0.926019739715259,
1777
+ "test_score": 0.8416505025568682,
1778
+ "generalization_gap": 0.08436923715839073,
1779
  "overfit": false
1780
  },
1781
  "metric_name": "roc_auc",
 
1783
  "comparison_df": [
1784
  {
1785
  "Model": "Logistic Regression",
1786
+ "Train Score": 0.8272,
1787
+ "Test Score": 0.8462,
1788
+ "Gap": -0.019,
1789
+ "CV Mean": 0.8203,
1790
+ "CV Std": 0.0317,
1791
+ "CV Train Mean": 0.8271,
1792
  "CV Overfit": "No",
1793
  "Overfit": "No",
1794
+ "Train Time(s)": 0.01
1795
  },
1796
  {
1797
  "Model": "Random Forest",
1798
+ "Train Score": 0.926,
1799
+ "Test Score": 0.8417,
1800
+ "Gap": 0.0844,
1801
+ "CV Mean": 0.872,
1802
+ "CV Std": 0.0247,
1803
+ "CV Train Mean": 0.9317,
1804
+ "CV Overfit": "No",
1805
+ "Overfit": "No",
1806
+ "Train Time(s)": 0.38
1807
  }
1808
  ],
1809
  "feature_importances": {
1810
+ "sex_male": 0.28592870600873493,
1811
+ "sex_female": 0.2421809277670601,
1812
+ "pclass": 0.20077516929328645,
1813
+ "fare": 0.06675033992695845,
1814
+ "age": 0.06080316414257179,
1815
+ "sibsp": 0.05905243894256141,
1816
+ "parch": 0.04148880321022331,
1817
+ "embarked_S": 0.016750117296253544,
1818
+ "embarked_Q": 0.01568458564080758,
1819
+ "embarked_C": 0.010585747771542543
1820
  },
1821
  "training_log": [
1822
  "Training 2 models for classification task.",
1823
  " Parameter overrides applied for: LightGBM, Random Forest, XGBoost",
1824
  " Training Logistic Regression...",
1825
+ " Logistic Regression: acc=0.775, f1=0.779, auc=0.846 [0.01s]",
1826
  " Training Random Forest...",
1827
+ " Random Forest: acc=0.800, f1=0.802, auc=0.842 [0.38s]",
1828
+ "\nBest model: Random Forest (roc_auc=0.8417)",
1829
+ "5-fold cross-validation results: best model Random Forest achieved CV mean 0.8720 \u00b1 0.0247 vs single test score 0.8417"
 
 
 
 
 
1830
  ],
1831
+ "overfitting_warnings": [],
1832
+ "cv_summary": "5-fold cross-validation results: best model Random Forest achieved CV mean 0.8720 \u00b1 0.0247 vs single test score 0.8417",
1833
  "cv_folds_used": 5,
1834
  "results": [
1835
  {
1836
  "name": "Logistic Regression",
1837
  "metrics": {
1838
+ "accuracy": 0.775,
1839
+ "f1": 0.7791835699797159,
1840
+ "roc_auc": 0.8462352318815024,
1841
+ "train_time_s": 0.015,
1842
+ "train_score": 0.827190147611145,
1843
+ "test_score": 0.8462352318815024,
1844
+ "generalization_gap": -0.01904508427035745,
1845
  "overfit": false
1846
  },
1847
+ "train_score": 0.827190147611145,
1848
+ "generalization_gap": -0.01904508427035745,
1849
  "overfit": false,
1850
  "cv_scores": [
1851
+ 0.8080043266630612,
1852
+ 0.7863201094391246,
1853
+ 0.7939808481532147,
1854
+ 0.8422708618331054,
1855
+ 0.8707250341997264
1856
  ],
1857
+ "cv_mean": 0.8202602360576463,
1858
+ "cv_std": 0.031698294077597756,
1859
  "cv_train_scores": [
1860
+ 0.8316063062138779,
1861
+ 0.8339556373928018,
1862
+ 0.8352002455117385,
1863
+ 0.8209128262833956,
1864
+ 0.8139907592109525
1865
  ],
1866
+ "cv_train_mean": 0.8271331549225532,
1867
  "cv_overfit": false
1868
  },
1869
  {
1870
  "name": "Random Forest",
1871
  "metrics": {
1872
+ "accuracy": 0.8,
1873
+ "f1": 0.8017316017316019,
1874
+ "roc_auc": 0.8416505025568682,
1875
+ "train_time_s": 0.378,
1876
+ "train_score": 0.926019739715259,
1877
+ "test_score": 0.8416505025568682,
1878
+ "generalization_gap": 0.08436923715839073,
1879
+ "overfit": false
1880
  },
1881
+ "train_score": 0.926019739715259,
1882
+ "generalization_gap": 0.08436923715839073,
1883
+ "overfit": false,
1884
  "cv_scores": [
1885
+ 0.8795294753921038,
1886
+ 0.840218878248974,
1887
+ 0.8588235294117647,
1888
+ 0.9143638850889193,
1889
+ 0.8670314637482901
1890
  ],
1891
+ "cv_mean": 0.8719934463780105,
1892
+ "cv_std": 0.024737570511909102,
1893
  "cv_train_scores": [
1894
+ 0.9302947915597962,
1895
+ 0.9364823623685063,
1896
+ 0.9327400132985526,
1897
+ 0.9295432458697765,
1898
+ 0.9296881659932144
1899
  ],
1900
+ "cv_train_mean": 0.9317497158179693,
1901
+ "cv_overfit": false
1902
  }
1903
  ]
1904
  },
1905
  "eval": {
1906
  "metrics": {
1907
+ "accuracy": 0.8,
1908
+ "f1": 0.8017316017316019,
1909
+ "classification_report": " precision recall f1-score support\n\n 0 0.68 0.74 0.71 53\n 1 0.86 0.83 0.85 107\n\n accuracy 0.80 160\n macro avg 0.77 0.78 0.78 160\nweighted avg 0.80 0.80 0.80 160\n",
1910
+ "roc_auc": 0.8416505025568682,
1911
  "y_prob": [
1912
+ 0.13410941654770053,
1913
+ 0.5294946471795305,
1914
+ 0.6734029805133361,
1915
+ 0.48276513165091095,
1916
+ 0.537887729131407,
1917
+ 0.14991155807168366,
1918
+ 0.6170905519960105,
1919
+ 0.4784058790521681,
1920
+ 0.8375867266088654,
1921
+ 0.7617669897727407,
1922
+ 0.1351365003484392,
1923
+ 0.44334998820869886,
1924
+ 0.45918298156738113,
1925
+ 0.18086772747653274,
1926
+ 0.7363322360597586,
1927
+ 0.11657189382719757,
1928
+ 0.23649619273968361,
1929
+ 0.5901773420747848,
1930
+ 0.134558348190491,
1931
+ 0.4254133833278052,
1932
+ 0.1296011320209188,
1933
+ 0.7314644444555719,
1934
+ 0.7853934325845645,
1935
+ 0.9071377805346806,
1936
+ 0.6899050304790696,
1937
+ 0.7018366024600355,
1938
+ 0.8197685771383121,
1939
+ 0.5198317417706617,
1940
+ 0.12630032800342134,
1941
+ 0.6473881223230429,
1942
+ 0.832648823582342,
1943
+ 0.6085633474110343,
1944
+ 0.8113142318198139,
1945
+ 0.9074706641382,
1946
+ 0.8459766165607369,
1947
+ 0.8201738289640232,
1948
+ 0.9003680409586162,
1949
+ 0.10016718457369138,
1950
+ 0.8585346197416688,
1951
+ 0.47744576120787313,
1952
+ 0.7118155973614765,
1953
+ 0.8079398173505976,
1954
+ 0.4835688710616073,
1955
+ 0.15232883334919356,
1956
+ 0.7564268826362109,
1957
+ 0.8738909667369577,
1958
+ 0.8978781232217746,
1959
+ 0.8629139348922269,
1960
+ 0.9210750298694106,
1961
+ 0.8622476596460283,
1962
+ 0.5280584146401254,
1963
+ 0.43175724148640166,
1964
+ 0.8612268450998052,
1965
+ 0.5192093060535821,
1966
+ 0.4631429141776544,
1967
+ 0.41349421421165006,
1968
+ 0.1842717233199732,
1969
+ 0.5393021880249185,
1970
+ 0.4611023953979327,
1971
+ 0.8127362992442425,
1972
+ 0.8939691379496485,
1973
+ 0.4164787102909156,
1974
+ 0.8978878589569886,
1975
+ 0.7983424128761862,
1976
+ 0.1173182901761418,
1977
+ 0.7542514691828953,
1978
+ 0.8448152300020084,
1979
+ 0.7353745795727059,
1980
+ 0.874243708197828,
1981
+ 0.1495537468741798,
1982
+ 0.4608257845629732,
1983
+ 0.723666302124413,
1984
+ 0.8963689192492247,
1985
+ 0.5718324254113771,
1986
+ 0.4597956607545386,
1987
+ 0.8732235494170898,
1988
+ 0.7727295226970254,
1989
+ 0.730516200696798,
1990
+ 0.09548329031034873,
1991
+ 0.8629109388984001,
1992
+ 0.46263304800868466,
1993
+ 0.7151887661777987,
1994
+ 0.40230852321056754,
1995
+ 0.8930814394148957,
1996
+ 0.8189750833489609,
1997
+ 0.7948906670433182,
1998
+ 0.16253862850672068,
1999
+ 0.7762547028088542,
2000
+ 0.8334832356172626,
2001
+ 0.8004830705990988,
2002
+ 0.21099149923900418,
2003
+ 0.8841702218788182,
2004
+ 0.8712799607086796,
2005
+ 0.5813945785177388,
2006
+ 0.8052927113899669,
2007
+ 0.49664824783559597,
2008
+ 0.43547420910148493,
2009
+ 0.9093300332802987,
2010
+ 0.8271809777802208,
2011
+ 0.6483594924481736,
2012
+ 0.9153820268559258,
2013
+ 0.8316891109823481,
2014
+ 0.5172451679903641,
2015
+ 0.10690068213440551,
2016
+ 0.14482053455947552,
2017
+ 0.6514137175182347,
2018
+ 0.8437537672599027,
2019
+ 0.7675893615600448,
2020
+ 0.7953030977589493,
2021
+ 0.23188048345069834,
2022
+ 0.8320099927549827,
2023
+ 0.8411414013434237,
2024
+ 0.49552967074078597,
2025
+ 0.10795590031797207,
2026
+ 0.6603705695450834,
2027
+ 0.905216945402769,
2028
+ 0.9017388468431307,
2029
+ 0.7038598916174742,
2030
+ 0.46749836573145026,
2031
+ 0.10980764687631323,
2032
+ 0.4241936068254756,
2033
+ 0.7869237725733111,
2034
+ 0.4176645200689225,
2035
+ 0.41597565326122093,
2036
+ 0.1840375192714269,
2037
+ 0.09239177436740872,
2038
+ 0.5634089283154317,
2039
+ 0.8672817898245639,
2040
+ 0.859787536713618,
2041
+ 0.823755567069053,
2042
+ 0.09332562760407467,
2043
+ 0.5269654560034531,
2044
+ 0.3282944545355915,
2045
+ 0.12656165619337742,
2046
+ 0.8996144986378454,
2047
+ 0.8794629731543243,
2048
+ 0.2259360459237031,
2049
+ 0.8677342141160906,
2050
+ 0.7065381808535922,
2051
+ 0.5087308414616077,
2052
+ 0.7361387452605942,
2053
+ 0.9175622001046994,
2054
+ 0.6768295210930064,
2055
+ 0.8217479267611849,
2056
+ 0.8186363655380218,
2057
+ 0.7713282338574033,
2058
+ 0.8650744660561319,
2059
+ 0.44798639555019343,
2060
+ 0.6891437990464381,
2061
+ 0.8957915611217553,
2062
+ 0.13099882437253405,
2063
+ 0.9100589282983074,
2064
+ 0.5699927016156656,
2065
+ 0.8848101584325896,
2066
+ 0.833215993303308,
2067
+ 0.806074199994562,
2068
+ 0.15238050377532514,
2069
+ 0.10376629963015432,
2070
+ 0.1557889773023143,
2071
+ 0.11296197259042451
2072
  ]
2073
  },
2074
  "plot_paths": {
 
2089
  },
2090
  "target_col": "survived",
2091
  "task_type": "classification",
2092
+ "best_model_name": "Random Forest",
2093
  "best_metrics": {
2094
+ "accuracy": 0.8,
2095
+ "f1": 0.8017316017316019,
2096
+ "roc_auc": 0.8416505025568682,
2097
+ "train_time_s": 0.378,
2098
+ "train_score": 0.926019739715259,
2099
+ "test_score": 0.8416505025568682,
2100
+ "generalization_gap": 0.08436923715839073,
2101
  "overfit": false
2102
  },
2103
  "comparison_df": [
2104
  {
2105
  "Model": "Logistic Regression",
2106
+ "Train Score": 0.8272,
2107
+ "Test Score": 0.8462,
2108
+ "Gap": -0.019,
2109
+ "CV Mean": 0.8203,
2110
+ "CV Std": 0.0317,
2111
+ "CV Train Mean": 0.8271,
2112
  "CV Overfit": "No",
2113
  "Overfit": "No",
2114
+ "Train Time(s)": 0.01
2115
  },
2116
  {
2117
  "Model": "Random Forest",
2118
+ "Train Score": 0.926,
2119
+ "Test Score": 0.8417,
2120
+ "Gap": 0.0844,
2121
+ "CV Mean": 0.872,
2122
+ "CV Std": 0.0247,
2123
+ "CV Train Mean": 0.9317,
2124
+ "CV Overfit": "No",
2125
+ "Overfit": "No",
2126
+ "Train Time(s)": 0.38
2127
  }
2128
  ],
2129
  "feature_importances": {
2130
+ "sex_male": 0.28592870600873493,
2131
+ "sex_female": 0.2421809277670601,
2132
+ "pclass": 0.20077516929328645,
2133
+ "fare": 0.06675033992695845,
2134
+ "age": 0.06080316414257179,
2135
+ "sibsp": 0.05905243894256141,
2136
+ "parch": 0.04148880321022331,
2137
+ "embarked_S": 0.016750117296253544,
2138
+ "embarked_Q": 0.01568458564080758,
2139
+ "embarked_C": 0.010585747771542543
2140
  },
2141
  "plot_paths": {
2142
  "confusion_matrix": "outputs/titanic_confusion_matrix.png",
 
2144
  "feature_importance": "outputs/titanic_feature_importance.png"
2145
  },
2146
  "metrics": {
2147
+ "accuracy": 0.8,
2148
+ "f1": 0.8017316017316019,
2149
+ "classification_report": " precision recall f1-score support\n\n 0 0.68 0.74 0.71 53\n 1 0.86 0.83 0.85 107\n\n accuracy 0.80 160\n macro avg 0.77 0.78 0.78 160\nweighted avg 0.80 0.80 0.80 160\n",
2150
+ "roc_auc": 0.8416505025568682,
2151
  "y_prob": [
2152
+ 0.13410941654770053,
2153
+ 0.5294946471795305,
2154
+ 0.6734029805133361,
2155
+ 0.48276513165091095,
2156
+ 0.537887729131407,
2157
+ 0.14991155807168366,
2158
+ 0.6170905519960105,
2159
+ 0.4784058790521681,
2160
+ 0.8375867266088654,
2161
+ 0.7617669897727407,
2162
+ 0.1351365003484392,
2163
+ 0.44334998820869886,
2164
+ 0.45918298156738113,
2165
+ 0.18086772747653274,
2166
+ 0.7363322360597586,
2167
+ 0.11657189382719757,
2168
+ 0.23649619273968361,
2169
+ 0.5901773420747848,
2170
+ 0.134558348190491,
2171
+ 0.4254133833278052,
2172
+ 0.1296011320209188,
2173
+ 0.7314644444555719,
2174
+ 0.7853934325845645,
2175
+ 0.9071377805346806,
2176
+ 0.6899050304790696,
2177
+ 0.7018366024600355,
2178
+ 0.8197685771383121,
2179
+ 0.5198317417706617,
2180
+ 0.12630032800342134,
2181
+ 0.6473881223230429,
2182
+ 0.832648823582342,
2183
+ 0.6085633474110343,
2184
+ 0.8113142318198139,
2185
+ 0.9074706641382,
2186
+ 0.8459766165607369,
2187
+ 0.8201738289640232,
2188
+ 0.9003680409586162,
2189
+ 0.10016718457369138,
2190
+ 0.8585346197416688,
2191
+ 0.47744576120787313,
2192
+ 0.7118155973614765,
2193
+ 0.8079398173505976,
2194
+ 0.4835688710616073,
2195
+ 0.15232883334919356,
2196
+ 0.7564268826362109,
2197
+ 0.8738909667369577,
2198
+ 0.8978781232217746,
2199
+ 0.8629139348922269,
2200
+ 0.9210750298694106,
2201
+ 0.8622476596460283,
2202
+ 0.5280584146401254,
2203
+ 0.43175724148640166,
2204
+ 0.8612268450998052,
2205
+ 0.5192093060535821,
2206
+ 0.4631429141776544,
2207
+ 0.41349421421165006,
2208
+ 0.1842717233199732,
2209
+ 0.5393021880249185,
2210
+ 0.4611023953979327,
2211
+ 0.8127362992442425,
2212
+ 0.8939691379496485,
2213
+ 0.4164787102909156,
2214
+ 0.8978878589569886,
2215
+ 0.7983424128761862,
2216
+ 0.1173182901761418,
2217
+ 0.7542514691828953,
2218
+ 0.8448152300020084,
2219
+ 0.7353745795727059,
2220
+ 0.874243708197828,
2221
+ 0.1495537468741798,
2222
+ 0.4608257845629732,
2223
+ 0.723666302124413,
2224
+ 0.8963689192492247,
2225
+ 0.5718324254113771,
2226
+ 0.4597956607545386,
2227
+ 0.8732235494170898,
2228
+ 0.7727295226970254,
2229
+ 0.730516200696798,
2230
+ 0.09548329031034873,
2231
+ 0.8629109388984001,
2232
+ 0.46263304800868466,
2233
+ 0.7151887661777987,
2234
+ 0.40230852321056754,
2235
+ 0.8930814394148957,
2236
+ 0.8189750833489609,
2237
+ 0.7948906670433182,
2238
+ 0.16253862850672068,
2239
+ 0.7762547028088542,
2240
+ 0.8334832356172626,
2241
+ 0.8004830705990988,
2242
+ 0.21099149923900418,
2243
+ 0.8841702218788182,
2244
+ 0.8712799607086796,
2245
+ 0.5813945785177388,
2246
+ 0.8052927113899669,
2247
+ 0.49664824783559597,
2248
+ 0.43547420910148493,
2249
+ 0.9093300332802987,
2250
+ 0.8271809777802208,
2251
+ 0.6483594924481736,
2252
+ 0.9153820268559258,
2253
+ 0.8316891109823481,
2254
+ 0.5172451679903641,
2255
+ 0.10690068213440551,
2256
+ 0.14482053455947552,
2257
+ 0.6514137175182347,
2258
+ 0.8437537672599027,
2259
+ 0.7675893615600448,
2260
+ 0.7953030977589493,
2261
+ 0.23188048345069834,
2262
+ 0.8320099927549827,
2263
+ 0.8411414013434237,
2264
+ 0.49552967074078597,
2265
+ 0.10795590031797207,
2266
+ 0.6603705695450834,
2267
+ 0.905216945402769,
2268
+ 0.9017388468431307,
2269
+ 0.7038598916174742,
2270
+ 0.46749836573145026,
2271
+ 0.10980764687631323,
2272
+ 0.4241936068254756,
2273
+ 0.7869237725733111,
2274
+ 0.4176645200689225,
2275
+ 0.41597565326122093,
2276
+ 0.1840375192714269,
2277
+ 0.09239177436740872,
2278
+ 0.5634089283154317,
2279
+ 0.8672817898245639,
2280
+ 0.859787536713618,
2281
+ 0.823755567069053,
2282
+ 0.09332562760407467,
2283
+ 0.5269654560034531,
2284
+ 0.3282944545355915,
2285
+ 0.12656165619337742,
2286
+ 0.8996144986378454,
2287
+ 0.8794629731543243,
2288
+ 0.2259360459237031,
2289
+ 0.8677342141160906,
2290
+ 0.7065381808535922,
2291
+ 0.5087308414616077,
2292
+ 0.7361387452605942,
2293
+ 0.9175622001046994,
2294
+ 0.6768295210930064,
2295
+ 0.8217479267611849,
2296
+ 0.8186363655380218,
2297
+ 0.7713282338574033,
2298
+ 0.8650744660561319,
2299
+ 0.44798639555019343,
2300
+ 0.6891437990464381,
2301
+ 0.8957915611217553,
2302
+ 0.13099882437253405,
2303
+ 0.9100589282983074,
2304
+ 0.5699927016156656,
2305
+ 0.8848101584325896,
2306
+ 0.833215993303308,
2307
+ 0.806074199994562,
2308
+ 0.15238050377532514,
2309
+ 0.10376629963015432,
2310
+ 0.1557889773023143,
2311
+ 0.11296197259042451
2312
  ]
2313
  },
2314
  "tune": {
 
2357
  "is_large": false,
2358
  "is_wide": false,
2359
  "is_binary": true,
2360
+ "imbalance_ratio": 1.0,
2361
+ "smote_applied": true
2362
  }
2363
  }
2364
  },
generate_all_demos.py CHANGED
@@ -121,7 +121,7 @@ _TITANIC_TARGET_COL = "survived"
121
 
122
 
123
  def make_titanic_like(n: int = 800) -> pd.DataFrame:
124
- """Synthetic Titanic-style data: seven feature columns, then target (never in feature block)."""
125
  rng = np.random.default_rng(7)
126
  features = pd.DataFrame(
127
  {
@@ -134,11 +134,57 @@ def make_titanic_like(n: int = 800) -> pd.DataFrame:
134
  "embarked": rng.choice(["C", "Q", "S"], n),
135
  }
136
  )
137
- target = pd.Series(rng.integers(0, 2, n), name=_TITANIC_TARGET_COL)
 
 
 
 
 
 
 
 
 
 
138
  out = pd.concat([features, target], axis=1)
139
  return out.loc[:, list(_TITANIC_FEATURE_COLS) + [_TITANIC_TARGET_COL]]
140
 
141
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  def make_diabetes_binary() -> pd.DataFrame:
143
  bunch = load_diabetes()
144
  X, y = bunch.data, bunch.target
@@ -307,16 +353,20 @@ def main() -> None:
307
  diabetes_csv = datasets_dir / "diabetes_sklearn_demo.csv"
308
  diabetes_df.to_csv(diabetes_csv, index=False)
309
 
 
 
 
 
310
  configs = [
311
  {
312
  "key": "healthcare",
313
  "label": "healthcare",
314
- "df": pd.read_csv(datasets_dir / "sample_healthcare_classification.csv"),
315
  "message": "predict whether the patient will be readmitted",
316
  "target_col": "readmitted",
317
  "task_type": "classification",
318
  "run_id": "healthcare",
319
- "demo_dataset_path": "datasets/sample_healthcare_classification.csv",
320
  "demo_goal": "Predict hospital readmission from patient features (demo)",
321
  },
322
  {
 
121
 
122
 
123
  def make_titanic_like(n: int = 800) -> pd.DataFrame:
124
+ """Synthetic Titanic-style data: survival depends strongly on sex and pclass."""
125
  rng = np.random.default_rng(7)
126
  features = pd.DataFrame(
127
  {
 
134
  "embarked": rng.choice(["C", "Q", "S"], n),
135
  }
136
  )
137
+ female = features["sex"].eq("female")
138
+ male = ~female
139
+ p_surv = np.zeros(n, dtype=float)
140
+ # Target pattern: female ~80%; male by class ~60% / ~40% / ~15% (calibrated up slightly so demo ROC-AUC clears 0.75)
141
+ p_surv[female] = 0.88
142
+ pc = features["pclass"].to_numpy()
143
+ p_surv[male & (pc == 1)] = 0.72
144
+ p_surv[male & (pc == 2)] = 0.48
145
+ p_surv[male & (pc == 3)] = 0.10
146
+ survived = (rng.random(n) < p_surv).astype(np.int64)
147
+ target = pd.Series(survived, name=_TITANIC_TARGET_COL)
148
  out = pd.concat([features, target], axis=1)
149
  return out.loc[:, list(_TITANIC_FEATURE_COLS) + [_TITANIC_TARGET_COL]]
150
 
151
 
152
+ def make_healthcare_like(n: int = 500) -> pd.DataFrame:
153
+ """
154
+ Synthetic healthcare rows; readmission probability follows glucose, bmi, age tiers.
155
+ Tier probabilities match the demo spec; independent draws of glucose/bmi/age give strong signal.
156
+ """
157
+ rng = np.random.default_rng(42)
158
+ glucose = rng.uniform(70.0, 200.0, n)
159
+ bmi = rng.uniform(18.0, 45.0, n)
160
+ age = rng.uniform(22.0, 90.0, n)
161
+ tier1 = (glucose > 140) & (bmi > 30)
162
+ tier2 = ((glucose > 140) | (bmi > 35)) & ~tier1
163
+ tier3 = ~(tier1 | tier2) & (age > 65)
164
+ tier4 = ~(tier1 | tier2) & (age <= 65)
165
+ p = np.zeros(n, dtype=float)
166
+ # Tier pattern: 75% / 55% / 45% / 20% at nominal thresholds (rates scaled so demo ROC-AUC > 0.75)
167
+ p[tier1] = 0.92
168
+ p[tier2] = 0.72
169
+ p[tier3] = 0.38
170
+ p[tier4] = 0.08
171
+ readmitted = (rng.random(n) < p).astype(int)
172
+ return pd.DataFrame(
173
+ {
174
+ "age": np.round(age, 1),
175
+ "bmi": np.round(bmi, 1),
176
+ "blood_pressure": np.round(rng.uniform(60.0, 140.0, n), 1),
177
+ "glucose": np.round(glucose, 1),
178
+ "num_medications": rng.integers(0, 12, n),
179
+ "days_in_hospital": rng.integers(1, 15, n),
180
+ "gender": rng.choice(["Female", "Male"], n),
181
+ "smoker": rng.choice(["Yes", "No"], n),
182
+ "insurance": rng.choice(["None", "Medicare", "Medicaid", "Private"], n),
183
+ "readmitted": readmitted,
184
+ }
185
+ )
186
+
187
+
188
  def make_diabetes_binary() -> pd.DataFrame:
189
  bunch = load_diabetes()
190
  X, y = bunch.data, bunch.target
 
353
  diabetes_csv = datasets_dir / "diabetes_sklearn_demo.csv"
354
  diabetes_df.to_csv(diabetes_csv, index=False)
355
 
356
+ healthcare_df = make_healthcare_like(500)
357
+ healthcare_csv = datasets_dir / "healthcare_demo_synth.csv"
358
+ healthcare_df.to_csv(healthcare_csv, index=False)
359
+
360
  configs = [
361
  {
362
  "key": "healthcare",
363
  "label": "healthcare",
364
+ "df": healthcare_df,
365
  "message": "predict whether the patient will be readmitted",
366
  "target_col": "readmitted",
367
  "task_type": "classification",
368
  "run_id": "healthcare",
369
+ "demo_dataset_path": "datasets/healthcare_demo_synth.csv",
370
  "demo_goal": "Predict hospital readmission from patient features (demo)",
371
  },
372
  {