Spaces:

Abdourakib
/

ai-data-science-agent

Running

App Files Files Community

Abdourakib commited on 17 days ago

Commit

f480d0c

1 Parent(s): 4970d33

fix: healthcare as default demo, stronger synthetic signal, regenerate demos

Browse files

Files changed (7) hide show

app.py +3 -1
demo_result.json +0 -0
demo_result_diabetes.json +19 -19
demo_result_healthcare.json +0 -0
demo_result_housing.json +37 -37
demo_result_titanic.json +1178 -1187
generate_all_demos.py +54 -4

app.py CHANGED Viewed

@@ -1992,7 +1992,9 @@ with st.sidebar:
                 st.warning("datasets/titanic_demo_synth.csv or datasets/titanic.csv not found.")
         if st.button("Healthcare", use_container_width=True):
             st.session_state["demo_dataset"] = "healthcare"
-            p = Path("datasets/sample_healthcare_classification.csv")
             if p.exists():
                 st.session_state.df = pd.read_csv(p)
                 st.session_state.filename = p.name

                 st.warning("datasets/titanic_demo_synth.csv or datasets/titanic.csv not found.")
         if st.button("Healthcare", use_container_width=True):
             st.session_state["demo_dataset"] = "healthcare"
+            p = Path("datasets/healthcare_demo_synth.csv")
+            if not p.exists():
+                p = Path("datasets/sample_healthcare_classification.csv")
             if p.exists():
                 st.session_state.df = pd.read_csv(p)
                 st.session_state.filename = p.name

demo_result.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

demo_result_diabetes.json CHANGED Viewed

@@ -322,7 +322,7 @@
         "accuracy": 0.7415730337078652,
         "f1": 0.7409192020410919,
         "roc_auc": 0.8262626262626263,
-        "train_time_s": 0.006,
         "train_score": 0.8554185927067283,
         "test_score": 0.8262626262626263,
         "generalization_gap": 0.02915596644410201,
@@ -353,7 +353,7 @@
           "CV Train Mean": 0.9716,
           "CV Overfit": "Yes",
           "Overfit": "No",
-          "Train Time(s)": 0.12
         }
       ],
       "feature_importances": {
@@ -374,7 +374,7 @@
         "  Training Logistic Regression...",
         "    Logistic Regression: acc=0.742, f1=0.741, auc=0.826  [0.01s]",
         "  Training Random Forest...",
-        "    Random Forest: acc=0.719, f1=0.717, auc=0.816  [0.12s]",
         "\nBest model: Logistic Regression (roc_auc=0.8263)",
         "Overfitting warnings: Random Forest shows consistent overfitting across CV folds \u2014 CV train mean 0.9716 vs CV test mean 0.8072",
         "5-fold cross-validation results: best model Logistic Regression achieved CV mean 0.8329 \u00b1 0.0368 vs single test score 0.8263"
@@ -391,7 +391,7 @@
             "accuracy": 0.7415730337078652,
             "f1": 0.7409192020410919,
             "roc_auc": 0.8262626262626263,
-            "train_time_s": 0.006,
             "train_score": 0.8554185927067283,
             "test_score": 0.8262626262626263,
             "generalization_gap": 0.02915596644410201,
@@ -425,7 +425,7 @@
             "accuracy": 0.7191011235955056,
             "f1": 0.716527021635327,
             "roc_auc": 0.8161616161616162,
-            "train_time_s": 0.123,
             "train_score": 0.9577555213148433,
             "test_score": 0.8161616161616162,
             "generalization_gap": 0.14159390515322712,
@@ -576,7 +576,7 @@
       "accuracy": 0.7415730337078652,
       "f1": 0.7409192020410919,
       "roc_auc": 0.8262626262626263,
-      "train_time_s": 0.006,
       "train_score": 0.8554185927067283,
       "test_score": 0.8262626262626263,
       "generalization_gap": 0.02915596644410201,
@@ -605,7 +605,7 @@
         "CV Train Mean": 0.9716,
         "CV Overfit": "Yes",
         "Overfit": "No",
-        "Train Time(s)": 0.12
       }
     ],
     "feature_importances": {
@@ -1183,7 +1183,7 @@
             "accuracy": 0.7415730337078652,
             "f1": 0.7409192020410919,
             "roc_auc": 0.8262626262626263,
-            "train_time_s": 0.006,
             "train_score": 0.8554185927067283,
             "test_score": 0.8262626262626263,
             "generalization_gap": 0.02915596644410201,
@@ -1214,7 +1214,7 @@
               "CV Train Mean": 0.9716,
               "CV Overfit": "Yes",
               "Overfit": "No",
-              "Train Time(s)": 0.12
             }
           ],
           "feature_importances": {
@@ -1235,7 +1235,7 @@
             "  Training Logistic Regression...",
             "    Logistic Regression: acc=0.742, f1=0.741, auc=0.826  [0.01s]",
             "  Training Random Forest...",
-            "    Random Forest: acc=0.719, f1=0.717, auc=0.816  [0.12s]",
             "\nBest model: Logistic Regression (roc_auc=0.8263)",
             "Overfitting warnings: Random Forest shows consistent overfitting across CV folds \u2014 CV train mean 0.9716 vs CV test mean 0.8072",
             "5-fold cross-validation results: best model Logistic Regression achieved CV mean 0.8329 \u00b1 0.0368 vs single test score 0.8263"
@@ -1252,7 +1252,7 @@
                 "accuracy": 0.7415730337078652,
                 "f1": 0.7409192020410919,
                 "roc_auc": 0.8262626262626263,
-                "train_time_s": 0.006,
                 "train_score": 0.8554185927067283,
                 "test_score": 0.8262626262626263,
                 "generalization_gap": 0.02915596644410201,
@@ -1286,7 +1286,7 @@
                 "accuracy": 0.7191011235955056,
                 "f1": 0.716527021635327,
                 "roc_auc": 0.8161616161616162,
-                "train_time_s": 0.123,
                 "train_score": 0.9577555213148433,
                 "test_score": 0.8161616161616162,
                 "generalization_gap": 0.14159390515322712,
@@ -1781,7 +1781,7 @@
             "accuracy": 0.7415730337078652,
             "f1": 0.7409192020410919,
             "roc_auc": 0.8262626262626263,
-            "train_time_s": 0.006,
             "train_score": 0.8554185927067283,
             "test_score": 0.8262626262626263,
             "generalization_gap": 0.02915596644410201,
@@ -1812,7 +1812,7 @@
               "CV Train Mean": 0.9716,
               "CV Overfit": "Yes",
               "Overfit": "No",
-              "Train Time(s)": 0.12
             }
           ],
           "feature_importances": {
@@ -1833,7 +1833,7 @@
             "  Training Logistic Regression...",
             "    Logistic Regression: acc=0.742, f1=0.741, auc=0.826  [0.01s]",
             "  Training Random Forest...",
-            "    Random Forest: acc=0.719, f1=0.717, auc=0.816  [0.12s]",
             "\nBest model: Logistic Regression (roc_auc=0.8263)",
             "Overfitting warnings: Random Forest shows consistent overfitting across CV folds \u2014 CV train mean 0.9716 vs CV test mean 0.8072",
             "5-fold cross-validation results: best model Logistic Regression achieved CV mean 0.8329 \u00b1 0.0368 vs single test score 0.8263"
@@ -1850,7 +1850,7 @@
                 "accuracy": 0.7415730337078652,
                 "f1": 0.7409192020410919,
                 "roc_auc": 0.8262626262626263,
-                "train_time_s": 0.006,
                 "train_score": 0.8554185927067283,
                 "test_score": 0.8262626262626263,
                 "generalization_gap": 0.02915596644410201,
@@ -1884,7 +1884,7 @@
                 "accuracy": 0.7191011235955056,
                 "f1": 0.716527021635327,
                 "roc_auc": 0.8161616161616162,
-                "train_time_s": 0.123,
                 "train_score": 0.9577555213148433,
                 "test_score": 0.8161616161616162,
                 "generalization_gap": 0.14159390515322712,
@@ -2035,7 +2035,7 @@
           "accuracy": 0.7415730337078652,
           "f1": 0.7409192020410919,
           "roc_auc": 0.8262626262626263,
-          "train_time_s": 0.006,
           "train_score": 0.8554185927067283,
           "test_score": 0.8262626262626263,
           "generalization_gap": 0.02915596644410201,
@@ -2064,7 +2064,7 @@
             "CV Train Mean": 0.9716,
             "CV Overfit": "Yes",
             "Overfit": "No",
-            "Train Time(s)": 0.12
           }
         ],
         "feature_importances": {

         "accuracy": 0.7415730337078652,
         "f1": 0.7409192020410919,
         "roc_auc": 0.8262626262626263,
+        "train_time_s": 0.012,
         "train_score": 0.8554185927067283,
         "test_score": 0.8262626262626263,
         "generalization_gap": 0.02915596644410201,
           "CV Train Mean": 0.9716,
           "CV Overfit": "Yes",
           "Overfit": "No",
+          "Train Time(s)": 0.34
         }
       ],
       "feature_importances": {
         "  Training Logistic Regression...",
         "    Logistic Regression: acc=0.742, f1=0.741, auc=0.826  [0.01s]",
         "  Training Random Forest...",
+        "    Random Forest: acc=0.719, f1=0.717, auc=0.816  [0.34s]",
         "\nBest model: Logistic Regression (roc_auc=0.8263)",
         "Overfitting warnings: Random Forest shows consistent overfitting across CV folds \u2014 CV train mean 0.9716 vs CV test mean 0.8072",
         "5-fold cross-validation results: best model Logistic Regression achieved CV mean 0.8329 \u00b1 0.0368 vs single test score 0.8263"
             "accuracy": 0.7415730337078652,
             "f1": 0.7409192020410919,
             "roc_auc": 0.8262626262626263,
+            "train_time_s": 0.012,
             "train_score": 0.8554185927067283,
             "test_score": 0.8262626262626263,
             "generalization_gap": 0.02915596644410201,
             "accuracy": 0.7191011235955056,
             "f1": 0.716527021635327,
             "roc_auc": 0.8161616161616162,
+            "train_time_s": 0.335,
             "train_score": 0.9577555213148433,
             "test_score": 0.8161616161616162,
             "generalization_gap": 0.14159390515322712,
       "accuracy": 0.7415730337078652,
       "f1": 0.7409192020410919,
       "roc_auc": 0.8262626262626263,
+      "train_time_s": 0.012,
       "train_score": 0.8554185927067283,
       "test_score": 0.8262626262626263,
       "generalization_gap": 0.02915596644410201,
         "CV Train Mean": 0.9716,
         "CV Overfit": "Yes",
         "Overfit": "No",
+        "Train Time(s)": 0.34
       }
     ],
     "feature_importances": {
             "accuracy": 0.7415730337078652,
             "f1": 0.7409192020410919,
             "roc_auc": 0.8262626262626263,
+            "train_time_s": 0.012,
             "train_score": 0.8554185927067283,
             "test_score": 0.8262626262626263,
             "generalization_gap": 0.02915596644410201,
               "CV Train Mean": 0.9716,
               "CV Overfit": "Yes",
               "Overfit": "No",
+              "Train Time(s)": 0.34
             }
           ],
           "feature_importances": {
             "  Training Logistic Regression...",
             "    Logistic Regression: acc=0.742, f1=0.741, auc=0.826  [0.01s]",
             "  Training Random Forest...",
+            "    Random Forest: acc=0.719, f1=0.717, auc=0.816  [0.34s]",
             "\nBest model: Logistic Regression (roc_auc=0.8263)",
             "Overfitting warnings: Random Forest shows consistent overfitting across CV folds \u2014 CV train mean 0.9716 vs CV test mean 0.8072",
             "5-fold cross-validation results: best model Logistic Regression achieved CV mean 0.8329 \u00b1 0.0368 vs single test score 0.8263"
                 "accuracy": 0.7415730337078652,
                 "f1": 0.7409192020410919,
                 "roc_auc": 0.8262626262626263,
+                "train_time_s": 0.012,
                 "train_score": 0.8554185927067283,
                 "test_score": 0.8262626262626263,
                 "generalization_gap": 0.02915596644410201,
                 "accuracy": 0.7191011235955056,
                 "f1": 0.716527021635327,
                 "roc_auc": 0.8161616161616162,
+                "train_time_s": 0.335,
                 "train_score": 0.9577555213148433,
                 "test_score": 0.8161616161616162,
                 "generalization_gap": 0.14159390515322712,
             "accuracy": 0.7415730337078652,
             "f1": 0.7409192020410919,
             "roc_auc": 0.8262626262626263,
+            "train_time_s": 0.012,
             "train_score": 0.8554185927067283,
             "test_score": 0.8262626262626263,
             "generalization_gap": 0.02915596644410201,
               "CV Train Mean": 0.9716,
               "CV Overfit": "Yes",
               "Overfit": "No",
+              "Train Time(s)": 0.34
             }
           ],
           "feature_importances": {
             "  Training Logistic Regression...",
             "    Logistic Regression: acc=0.742, f1=0.741, auc=0.826  [0.01s]",
             "  Training Random Forest...",
+            "    Random Forest: acc=0.719, f1=0.717, auc=0.816  [0.34s]",
             "\nBest model: Logistic Regression (roc_auc=0.8263)",
             "Overfitting warnings: Random Forest shows consistent overfitting across CV folds \u2014 CV train mean 0.9716 vs CV test mean 0.8072",
             "5-fold cross-validation results: best model Logistic Regression achieved CV mean 0.8329 \u00b1 0.0368 vs single test score 0.8263"
                 "accuracy": 0.7415730337078652,
                 "f1": 0.7409192020410919,
                 "roc_auc": 0.8262626262626263,
+                "train_time_s": 0.012,
                 "train_score": 0.8554185927067283,
                 "test_score": 0.8262626262626263,
                 "generalization_gap": 0.02915596644410201,
                 "accuracy": 0.7191011235955056,
                 "f1": 0.716527021635327,
                 "roc_auc": 0.8161616161616162,
+                "train_time_s": 0.335,
                 "train_score": 0.9577555213148433,
                 "test_score": 0.8161616161616162,
                 "generalization_gap": 0.14159390515322712,
           "accuracy": 0.7415730337078652,
           "f1": 0.7409192020410919,
           "roc_auc": 0.8262626262626263,
+          "train_time_s": 0.012,
           "train_score": 0.8554185927067283,
           "test_score": 0.8262626262626263,
           "generalization_gap": 0.02915596644410201,
             "CV Train Mean": 0.9716,
             "CV Overfit": "Yes",
             "Overfit": "No",
+            "Train Time(s)": 0.34
           }
         ],
         "feature_importances": {

demo_result_healthcare.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

demo_result_housing.json CHANGED Viewed

@@ -276,7 +276,7 @@
         "rmse": 19244.30726602296,
         "mae": 15054.427702432535,
         "r2": 0.9704273023733059,
-        "train_time_s": 0.001,
         "train_score": 0.9756910416636839,
         "test_score": 0.9704273023733059,
         "generalization_gap": 0.0052637392903780444,
@@ -307,7 +307,7 @@
           "CV Train Mean": 0.9671,
           "CV Overfit": "No",
           "Overfit": "No",
-          "Train Time(s)": 0.1
         }
       ],
       "feature_importances": {
@@ -330,7 +330,7 @@
         "  Training Linear Regression...",
         "    Linear Regression: r2=0.970, rmse=19244.31, mae=15054.43  [0.00s]",
         "  Training Random Forest...",
-        "    Random Forest: r2=0.907, rmse=34041.31, mae=27600.14  [0.10s]",
         "\nBest model: Linear Regression (r2=0.9704)",
         "5-fold cross-validation results: best model Linear Regression achieved CV mean 0.9741 \u00b1 0.0031 vs single test score 0.9704"
       ],
@@ -344,7 +344,7 @@
             "rmse": 19244.30726602296,
             "mae": 15054.427702432535,
             "r2": 0.9704273023733059,
-            "train_time_s": 0.001,
             "train_score": 0.9756910416636839,
             "test_score": 0.9704273023733059,
             "generalization_gap": 0.0052637392903780444,
@@ -375,10 +375,10 @@
         {
           "name": "Random Forest",
           "metrics": {
-            "rmse": 34041.31037179931,
             "mae": 27600.13834049763,
             "r2": 0.9074665155441051,
-            "train_time_s": 0.099,
             "train_score": 0.9635729463513367,
             "test_score": 0.9074665155441051,
             "generalization_gap": 0.056106430807231655,
@@ -388,14 +388,14 @@
           "generalization_gap": 0.056106430807231655,
           "overfit": false,
           "cv_scores": [
-            0.9357359945589842,
-            0.9382614289622774,
-            0.9301892382602969,
             0.9268878476820184,
-            0.9318488429287123
           ],
           "cv_mean": 0.9325846704784577,
-          "cv_std": 0.004022182442727396,
           "cv_train_scores": [
             0.967156970877803,
             0.967581448731281,
@@ -438,7 +438,7 @@
       "rmse": 19244.30726602296,
       "mae": 15054.427702432535,
       "r2": 0.9704273023733059,
-      "train_time_s": 0.001,
       "train_score": 0.9756910416636839,
       "test_score": 0.9704273023733059,
       "generalization_gap": 0.0052637392903780444,
@@ -467,7 +467,7 @@
         "CV Train Mean": 0.9671,
         "CV Overfit": "No",
         "Overfit": "No",
-        "Train Time(s)": 0.1
       }
     ],
     "feature_importances": {
@@ -910,7 +910,7 @@
             "rmse": 19244.30726602296,
             "mae": 15054.427702432535,
             "r2": 0.9704273023733059,
-            "train_time_s": 0.001,
             "train_score": 0.9756910416636839,
             "test_score": 0.9704273023733059,
             "generalization_gap": 0.0052637392903780444,
@@ -941,7 +941,7 @@
               "CV Train Mean": 0.9671,
               "CV Overfit": "No",
               "Overfit": "No",
-              "Train Time(s)": 0.1
             }
           ],
           "feature_importances": {
@@ -964,7 +964,7 @@
             "  Training Linear Regression...",
             "    Linear Regression: r2=0.970, rmse=19244.31, mae=15054.43  [0.00s]",
             "  Training Random Forest...",
-            "    Random Forest: r2=0.907, rmse=34041.31, mae=27600.14  [0.10s]",
             "\nBest model: Linear Regression (r2=0.9704)",
             "5-fold cross-validation results: best model Linear Regression achieved CV mean 0.9741 \u00b1 0.0031 vs single test score 0.9704"
           ],
@@ -978,7 +978,7 @@
                 "rmse": 19244.30726602296,
                 "mae": 15054.427702432535,
                 "r2": 0.9704273023733059,
-                "train_time_s": 0.001,
                 "train_score": 0.9756910416636839,
                 "test_score": 0.9704273023733059,
                 "generalization_gap": 0.0052637392903780444,
@@ -1009,24 +1009,24 @@
             {
               "name": "Random Forest",
               "metrics": {
-                "rmse": 34041.31037179931,
                 "mae": 27600.13834049763,
                 "r2": 0.9074665155441051,
-                "train_time_s": 0.099,
                 "train_score": 0.9635729463513367,
                 "test_score": 0.9074665155441051,
                 "generalization_gap": 0.056106430807231655,
                 "overfit": false
               },
               "cv_scores": [
-                0.9357359945589842,
-                0.9382614289622774,
-                0.9301892382602969,
                 0.9268878476820184,
-                0.9318488429287123
               ],
               "cv_mean": 0.9325846704784577,
-              "cv_std": 0.004022182442727396,
               "cv_train_scores": [
                 0.967156970877803,
                 0.967581448731281,
@@ -1370,7 +1370,7 @@
             "rmse": 19244.30726602296,
             "mae": 15054.427702432535,
             "r2": 0.9704273023733059,
-            "train_time_s": 0.001,
             "train_score": 0.9756910416636839,
             "test_score": 0.9704273023733059,
             "generalization_gap": 0.0052637392903780444,
@@ -1401,7 +1401,7 @@
               "CV Train Mean": 0.9671,
               "CV Overfit": "No",
               "Overfit": "No",
-              "Train Time(s)": 0.1
             }
           ],
           "feature_importances": {
@@ -1424,7 +1424,7 @@
             "  Training Linear Regression...",
             "    Linear Regression: r2=0.970, rmse=19244.31, mae=15054.43  [0.00s]",
             "  Training Random Forest...",
-            "    Random Forest: r2=0.907, rmse=34041.31, mae=27600.14  [0.10s]",
             "\nBest model: Linear Regression (r2=0.9704)",
             "5-fold cross-validation results: best model Linear Regression achieved CV mean 0.9741 \u00b1 0.0031 vs single test score 0.9704"
           ],
@@ -1438,7 +1438,7 @@
                 "rmse": 19244.30726602296,
                 "mae": 15054.427702432535,
                 "r2": 0.9704273023733059,
-                "train_time_s": 0.001,
                 "train_score": 0.9756910416636839,
                 "test_score": 0.9704273023733059,
                 "generalization_gap": 0.0052637392903780444,
@@ -1469,10 +1469,10 @@
             {
               "name": "Random Forest",
               "metrics": {
-                "rmse": 34041.31037179931,
                 "mae": 27600.13834049763,
                 "r2": 0.9074665155441051,
-                "train_time_s": 0.099,
                 "train_score": 0.9635729463513367,
                 "test_score": 0.9074665155441051,
                 "generalization_gap": 0.056106430807231655,
@@ -1482,14 +1482,14 @@
               "generalization_gap": 0.056106430807231655,
               "overfit": false,
               "cv_scores": [
-                0.9357359945589842,
-                0.9382614289622774,
-                0.9301892382602969,
                 0.9268878476820184,
-                0.9318488429287123
               ],
               "cv_mean": 0.9325846704784577,
-              "cv_std": 0.004022182442727396,
               "cv_train_scores": [
                 0.967156970877803,
                 0.967581448731281,
@@ -1532,7 +1532,7 @@
           "rmse": 19244.30726602296,
           "mae": 15054.427702432535,
           "r2": 0.9704273023733059,
-          "train_time_s": 0.001,
           "train_score": 0.9756910416636839,
           "test_score": 0.9704273023733059,
           "generalization_gap": 0.0052637392903780444,
@@ -1561,7 +1561,7 @@
             "CV Train Mean": 0.9671,
             "CV Overfit": "No",
             "Overfit": "No",
-            "Train Time(s)": 0.1
           }
         ],
         "feature_importances": {

         "rmse": 19244.30726602296,
         "mae": 15054.427702432535,
         "r2": 0.9704273023733059,
+        "train_time_s": 0.002,
         "train_score": 0.9756910416636839,
         "test_score": 0.9704273023733059,
         "generalization_gap": 0.0052637392903780444,
           "CV Train Mean": 0.9671,
           "CV Overfit": "No",
           "Overfit": "No",
+          "Train Time(s)": 0.27
         }
       ],
       "feature_importances": {
         "  Training Linear Regression...",
         "    Linear Regression: r2=0.970, rmse=19244.31, mae=15054.43  [0.00s]",
         "  Training Random Forest...",
+        "    Random Forest: r2=0.907, rmse=34041.31, mae=27600.14  [0.27s]",
         "\nBest model: Linear Regression (r2=0.9704)",
         "5-fold cross-validation results: best model Linear Regression achieved CV mean 0.9741 \u00b1 0.0031 vs single test score 0.9704"
       ],
             "rmse": 19244.30726602296,
             "mae": 15054.427702432535,
             "r2": 0.9704273023733059,
+            "train_time_s": 0.002,
             "train_score": 0.9756910416636839,
             "test_score": 0.9704273023733059,
             "generalization_gap": 0.0052637392903780444,
         {
           "name": "Random Forest",
           "metrics": {
+            "rmse": 34041.3103717993,
             "mae": 27600.13834049763,
             "r2": 0.9074665155441051,
+            "train_time_s": 0.267,
             "train_score": 0.9635729463513367,
             "test_score": 0.9074665155441051,
             "generalization_gap": 0.056106430807231655,
           "generalization_gap": 0.056106430807231655,
           "overfit": false,
           "cv_scores": [
+            0.9357359945589843,
+            0.9382614289622773,
+            0.9301892382602968,
             0.9268878476820184,
+            0.9318488429287124
           ],
           "cv_mean": 0.9325846704784577,
+          "cv_std": 0.004022182442727391,
           "cv_train_scores": [
             0.967156970877803,
             0.967581448731281,
       "rmse": 19244.30726602296,
       "mae": 15054.427702432535,
       "r2": 0.9704273023733059,
+      "train_time_s": 0.002,
       "train_score": 0.9756910416636839,
       "test_score": 0.9704273023733059,
       "generalization_gap": 0.0052637392903780444,
         "CV Train Mean": 0.9671,
         "CV Overfit": "No",
         "Overfit": "No",
+        "Train Time(s)": 0.27
       }
     ],
     "feature_importances": {
             "rmse": 19244.30726602296,
             "mae": 15054.427702432535,
             "r2": 0.9704273023733059,
+            "train_time_s": 0.002,
             "train_score": 0.9756910416636839,
             "test_score": 0.9704273023733059,
             "generalization_gap": 0.0052637392903780444,
               "CV Train Mean": 0.9671,
               "CV Overfit": "No",
               "Overfit": "No",
+              "Train Time(s)": 0.27
             }
           ],
           "feature_importances": {
             "  Training Linear Regression...",
             "    Linear Regression: r2=0.970, rmse=19244.31, mae=15054.43  [0.00s]",
             "  Training Random Forest...",
+            "    Random Forest: r2=0.907, rmse=34041.31, mae=27600.14  [0.27s]",
             "\nBest model: Linear Regression (r2=0.9704)",
             "5-fold cross-validation results: best model Linear Regression achieved CV mean 0.9741 \u00b1 0.0031 vs single test score 0.9704"
           ],
                 "rmse": 19244.30726602296,
                 "mae": 15054.427702432535,
                 "r2": 0.9704273023733059,
+                "train_time_s": 0.002,
                 "train_score": 0.9756910416636839,
                 "test_score": 0.9704273023733059,
                 "generalization_gap": 0.0052637392903780444,
             {
               "name": "Random Forest",
               "metrics": {
+                "rmse": 34041.3103717993,
                 "mae": 27600.13834049763,
                 "r2": 0.9074665155441051,
+                "train_time_s": 0.267,
                 "train_score": 0.9635729463513367,
                 "test_score": 0.9074665155441051,
                 "generalization_gap": 0.056106430807231655,
                 "overfit": false
               },
               "cv_scores": [
+                0.9357359945589843,
+                0.9382614289622773,
+                0.9301892382602968,
                 0.9268878476820184,
+                0.9318488429287124
               ],
               "cv_mean": 0.9325846704784577,
+              "cv_std": 0.004022182442727391,
               "cv_train_scores": [
                 0.967156970877803,
                 0.967581448731281,
             "rmse": 19244.30726602296,
             "mae": 15054.427702432535,
             "r2": 0.9704273023733059,
+            "train_time_s": 0.002,
             "train_score": 0.9756910416636839,
             "test_score": 0.9704273023733059,
             "generalization_gap": 0.0052637392903780444,
               "CV Train Mean": 0.9671,
               "CV Overfit": "No",
               "Overfit": "No",
+              "Train Time(s)": 0.27
             }
           ],
           "feature_importances": {
             "  Training Linear Regression...",
             "    Linear Regression: r2=0.970, rmse=19244.31, mae=15054.43  [0.00s]",
             "  Training Random Forest...",
+            "    Random Forest: r2=0.907, rmse=34041.31, mae=27600.14  [0.27s]",
             "\nBest model: Linear Regression (r2=0.9704)",
             "5-fold cross-validation results: best model Linear Regression achieved CV mean 0.9741 \u00b1 0.0031 vs single test score 0.9704"
           ],
                 "rmse": 19244.30726602296,
                 "mae": 15054.427702432535,
                 "r2": 0.9704273023733059,
+                "train_time_s": 0.002,
                 "train_score": 0.9756910416636839,
                 "test_score": 0.9704273023733059,
                 "generalization_gap": 0.0052637392903780444,
             {
               "name": "Random Forest",
               "metrics": {
+                "rmse": 34041.3103717993,
                 "mae": 27600.13834049763,
                 "r2": 0.9074665155441051,
+                "train_time_s": 0.267,
                 "train_score": 0.9635729463513367,
                 "test_score": 0.9074665155441051,
                 "generalization_gap": 0.056106430807231655,
               "generalization_gap": 0.056106430807231655,
               "overfit": false,
               "cv_scores": [
+                0.9357359945589843,
+                0.9382614289622773,
+                0.9301892382602968,
                 0.9268878476820184,
+                0.9318488429287124
               ],
               "cv_mean": 0.9325846704784577,
+              "cv_std": 0.004022182442727391,
               "cv_train_scores": [
                 0.967156970877803,
                 0.967581448731281,
           "rmse": 19244.30726602296,
           "mae": 15054.427702432535,
           "r2": 0.9704273023733059,
+          "train_time_s": 0.002,
           "train_score": 0.9756910416636839,
           "test_score": 0.9704273023733059,
           "generalization_gap": 0.0052637392903780444,
             "CV Train Mean": 0.9671,
             "CV Overfit": "No",
             "Overfit": "No",
+            "Train Time(s)": 0.27
           }
         ],
         "feature_importances": {

demo_result_titanic.json CHANGED Viewed

@@ -170,10 +170,10 @@
         ],
         "n_classes": 2,
         "class_distribution": {
-          "0": 401,
-          "1": 399
         },
-        "imbalance_ratio": 1.01
       },
       "quality_flags": [],
       "recommendations": [
@@ -224,7 +224,8 @@
         "Categorical columns (2): mode imputation + one-hot encoding.",
         "Target encoded with LabelEncoder. Classes: ['0', '1']",
         "Train/test split: 640 train rows, 160 test rows (20% test).",
-        "Class imbalance ratio (majority/minority): 1.01.",
         "Final feature matrix: 10 features."
       ],
       "num_cols": [
@@ -240,19 +241,19 @@
       ],
       "n_classes": 2,
       "log_transformed_cols": [],
-      "smote_applied": false,
-      "smote_log": ""
     },
     "train": {
-      "best_name": "Logistic Regression",
       "best_metrics": {
-        "accuracy": 0.50625,
-        "f1": 0.5060763549685436,
-        "roc_auc": 0.51390625,
-        "train_time_s": 0.003,
-        "train_score": 0.5505620172071993,
-        "test_score": 0.51390625,
-        "generalization_gap": 0.03665576720719932,
         "overfit": false
       },
       "metric_name": "roc_auc",
@@ -260,296 +261,292 @@
       "comparison_df": [
         {
           "Model": "Logistic Regression",
-          "Train Score": 0.5506,
-          "Test Score": 0.5139,
-          "Gap": 0.0367,
-          "CV Mean": 0.5025,
-          "CV Std": 0.0343,
-          "CV Train Mean": 0.5568,
           "CV Overfit": "No",
           "Overfit": "No",
-          "Train Time(s)": 0.0
         },
         {
           "Model": "Random Forest",
-          "Train Score": 0.8889,
-          "Test Score": 0.5131,
-          "Gap": 0.3758,
-          "CV Mean": 0.4587,
-          "CV Std": 0.023,
-          "CV Train Mean": 0.9169,
-          "CV Overfit": "Yes",
-          "Overfit": "Yes",
-          "Train Time(s)": 0.13
         }
       ],
       "feature_importances": {
-        "embarked_C": 0.19427991337835848,
-        "sex_female": 0.1795206881097836,
-        "sex_male": 0.17217897553699751,
-        "embarked_Q": 0.16121663300959552,
-        "pclass": 0.10709936251807319,
-        "sibsp": 0.06916449512778146,
-        "embarked_S": 0.04040499294154858,
-        "age": 0.032805348276750294,
-        "parch": 0.02489251272846473,
-        "fare": 0.01843707837264646
       },
       "training_log": [
         "Training 2 models for classification task.",
         "  Parameter overrides applied for: LightGBM, Random Forest, XGBoost",
         "  Training Logistic Regression...",
-        "    Logistic Regression: acc=0.506, f1=0.506, auc=0.514  [0.00s]",
         "  Training Random Forest...",
-        "    Random Forest: acc=0.531, f1=0.531, auc=0.513  [0.13s]",
-        "\nBest model: Logistic Regression (roc_auc=0.5139)",
-        "Overfitting warnings: Random Forest is overfitting \u2014 train ROC-AUC 0.8889 vs test ROC-AUC 0.5131, gap 0.3758; Random Forest shows consistent overfitting across CV folds \u2014 CV train mean 0.9169 vs CV test mean 0.4587",
-        "5-fold cross-validation results: best model Logistic Regression achieved CV mean 0.5025 \u00b1 0.0343 vs single test score 0.5139"
-      ],
-      "overfitting_warnings": [
-        "Random Forest is overfitting \u2014 train ROC-AUC 0.8889 vs test ROC-AUC 0.5131, gap 0.3758",
-        "Random Forest shows consistent overfitting across CV folds \u2014 CV train mean 0.9169 vs CV test mean 0.4587"
       ],
-      "cv_summary": "5-fold cross-validation results: best model Logistic Regression achieved CV mean 0.5025 \u00b1 0.0343 vs single test score 0.5139",
       "cv_folds_used": 5,
       "results": [
         {
           "name": "Logistic Regression",
           "metrics": {
-            "accuracy": 0.50625,
-            "f1": 0.5060763549685436,
-            "roc_auc": 0.51390625,
-            "train_time_s": 0.003,
-            "train_score": 0.5505620172071993,
-            "test_score": 0.51390625,
-            "generalization_gap": 0.03665576720719932,
             "overfit": false
           },
-          "train_score": 0.5505620172071993,
-          "generalization_gap": 0.03665576720719932,
           "overfit": false,
           "cv_scores": [
-            0.5584859584859584,
-            0.467529296875,
-            0.525634765625,
-            0.48388671875,
-            0.476806640625
           ],
-          "cv_mean": 0.5024686760721917,
-          "cv_std": 0.034348778883955274,
           "cv_train_scores": [
-            0.541595458984375,
-            0.5711451895933471,
-            0.5545586327916381,
-            0.5557030594338903,
-            0.5610894941634241
           ],
-          "cv_train_mean": 0.5568183669933349,
           "cv_overfit": false
         },
         {
           "name": "Random Forest",
           "metrics": {
-            "accuracy": 0.53125,
-            "f1": 0.5312316887378413,
-            "roc_auc": 0.513125,
-            "train_time_s": 0.133,
-            "train_score": 0.8889442279709763,
-            "test_score": 0.513125,
-            "generalization_gap": 0.3758192279709762,
-            "overfit": true
           },
-          "train_score": 0.8889442279709763,
-          "generalization_gap": 0.3758192279709762,
-          "overfit": true,
           "cv_scores": [
-            0.43956043956043955,
-            0.447998046875,
-            0.50390625,
-            0.451416015625,
-            0.45068359375
           ],
-          "cv_mean": 0.458712869162088,
-          "cv_std": 0.022985986961779142,
           "cv_train_scores": [
-            0.9126129150390625,
-            0.9176317998016327,
-            0.9266803997863737,
-            0.9033646143282216,
-            0.9239642938887618
           ],
-          "cv_train_mean": 0.9168508045688103,
-          "cv_overfit": true
         }
       ]
     },
     "eval": {
       "metrics": {
-        "accuracy": 0.50625,
-        "f1": 0.5060763549685436,
-        "classification_report": "              precision    recall  f1-score   support\n\n           0       0.51      0.53      0.52        80\n           1       0.51      0.49      0.50        80\n\n    accuracy                           0.51       160\n   macro avg       0.51      0.51      0.51       160\nweighted avg       0.51      0.51      0.51       160\n",
-        "roc_auc": 0.51390625,
         "y_prob": [
-          0.4685971934401446,
-          0.5880659890984261,
-          0.5304788776901366,
-          0.43957565151138467,
-          0.5038463626781632,
-          0.4652273077567498,
-          0.5847100993502347,
-          0.5104616697292005,
-          0.47131920968357843,
-          0.43799587982497334,
-          0.4960189710268829,
-          0.4351952524180606,
-          0.47823516872507077,
-          0.5225202731035682,
-          0.5049934839754789,
-          0.46348813612071,
-          0.4913152737170698,
-          0.4943560044658895,
-          0.4483125083684001,
-          0.4192829767113759,
-          0.4837760800877137,
-          0.5138495328227541,
-          0.46566274171564515,
-          0.46397203681287,
-          0.42296715645822186,
-          0.5057825082702295,
-          0.48229039510773697,
-          0.5515528516381715,
-          0.4130279404158634,
-          0.5110102562826957,
-          0.4378385985609114,
-          0.5049709338272942,
-          0.5491177241154352,
-          0.4734278641779235,
-          0.4903180760277518,
-          0.5024153385948759,
-          0.4550174125976565,
-          0.47408944756592075,
-          0.4811344974522715,
-          0.513304425049202,
-          0.5579353223774154,
-          0.5210717342560043,
-          0.5907744656741766,
-          0.46344911708752645,
-          0.43117875296291236,
-          0.533688758919175,
-          0.5290497502374126,
-          0.48607641955131375,
-          0.532881233702402,
-          0.5341292461861704,
-          0.5047550838230295,
-          0.45629185178585485,
-          0.5723741095853611,
-          0.5176362649318781,
-          0.576781843287271,
-          0.4565587333091604,
-          0.43425250970963175,
-          0.44864941542194103,
-          0.4814982228717,
-          0.5771946399699652,
-          0.5542313298335242,
-          0.5148651702981335,
-          0.578326993627849,
-          0.46219547798053273,
-          0.4662127930424256,
-          0.5167269442125124,
-          0.4545946129157852,
-          0.47285099433062533,
-          0.475602443239552,
-          0.5318046552682085,
-          0.4916093412756572,
-          0.4680847205841181,
-          0.5300286938315274,
-          0.47419383091533374,
-          0.4432574256271499,
-          0.5467691188026214,
-          0.4942200595017993,
-          0.491558726159705,
-          0.5551736674330323,
-          0.5360340244003253,
-          0.4663308126772837,
-          0.5341545691299238,
-          0.544787947349383,
-          0.5339643040098896,
-          0.5635104012484852,
-          0.42164008428541117,
-          0.5272913797609838,
-          0.49149085962047573,
-          0.4309241928600356,
-          0.5725259889932812,
-          0.45877791338922436,
-          0.5049566058663426,
-          0.5467499732880384,
-          0.4886064484052085,
-          0.4761842009707287,
-          0.5171460006825551,
-          0.5652706369027144,
-          0.5519353659368222,
-          0.48829304075863755,
-          0.49952031294625754,
-          0.5696952526061775,
-          0.39907090644230236,
-          0.5300417514346564,
-          0.5201368866376818,
-          0.4335442294896271,
-          0.441018930438655,
-          0.5727977177493875,
-          0.5055094496565438,
-          0.5485429940005087,
-          0.464300189927614,
-          0.46528676320969975,
-          0.4765960586458909,
-          0.5013951407428184,
-          0.4758746133195292,
-          0.574369326643904,
-          0.5448327519185213,
-          0.5313577820269861,
-          0.5796545232541171,
-          0.5408139880303958,
-          0.5758274204341941,
-          0.4433770335078927,
-          0.48199862880120214,
-          0.4681855101348399,
-          0.5105250331390581,
-          0.484520120241961,
-          0.45987999738308466,
-          0.4812982046752543,
-          0.4651369537670347,
-          0.4515971139618936,
-          0.5239026968610354,
-          0.5183456388828395,
-          0.4887243768576431,
-          0.4846139091445182,
-          0.480984884262246,
-          0.5113218050872491,
-          0.42950526584467874,
-          0.4980224655396546,
-          0.4853621308117529,
-          0.5006213435479155,
-          0.4464885510310222,
-          0.5825216599017382,
-          0.509750144605609,
-          0.5611443029239761,
-          0.48586792077872754,
-          0.5551391707724177,
-          0.48437951798583123,
-          0.4506087491103293,
-          0.49053821992973634,
-          0.4485156855641885,
-          0.5272347909195831,
-          0.4978957759625092,
-          0.4865222606967821,
-          0.5365014960658203,
-          0.46890723544338464,
-          0.5615897997035765,
-          0.553860453714982,
-          0.5373593965986853,
-          0.52350272912908,
-          0.5543178631028236,
-          0.5610670925103559
         ]
       },
       "plot_paths": {
@@ -570,54 +567,54 @@
     },
     "target_col": "survived",
     "task_type": "classification",
-    "best_model_name": "Logistic Regression",
     "best_metrics": {
-      "accuracy": 0.50625,
-      "f1": 0.5060763549685436,
-      "roc_auc": 0.51390625,
-      "train_time_s": 0.003,
-      "train_score": 0.5505620172071993,
-      "test_score": 0.51390625,
-      "generalization_gap": 0.03665576720719932,
       "overfit": false
     },
     "comparison_df": [
       {
         "Model": "Logistic Regression",
-        "Train Score": 0.5506,
-        "Test Score": 0.5139,
-        "Gap": 0.0367,
-        "CV Mean": 0.5025,
-        "CV Std": 0.0343,
-        "CV Train Mean": 0.5568,
         "CV Overfit": "No",
         "Overfit": "No",
-        "Train Time(s)": 0.0
       },
       {
         "Model": "Random Forest",
-        "Train Score": 0.8889,
-        "Test Score": 0.5131,
-        "Gap": 0.3758,
-        "CV Mean": 0.4587,
-        "CV Std": 0.023,
-        "CV Train Mean": 0.9169,
-        "CV Overfit": "Yes",
-        "Overfit": "Yes",
-        "Train Time(s)": 0.13
       }
     ],
     "feature_importances": {
-      "embarked_C": 0.19427991337835848,
-      "sex_female": 0.1795206881097836,
-      "sex_male": 0.17217897553699751,
-      "embarked_Q": 0.16121663300959552,
-      "pclass": 0.10709936251807319,
-      "sibsp": 0.06916449512778146,
-      "embarked_S": 0.04040499294154858,
-      "age": 0.032805348276750294,
-      "parch": 0.02489251272846473,
-      "fare": 0.01843707837264646
     },
     "plot_paths": {
       "confusion_matrix": "outputs/titanic_confusion_matrix.png",
@@ -625,171 +622,171 @@
       "feature_importance": "outputs/titanic_feature_importance.png"
     },
     "metrics": {
-      "accuracy": 0.50625,
-      "f1": 0.5060763549685436,
-      "classification_report": "              precision    recall  f1-score   support\n\n           0       0.51      0.53      0.52        80\n           1       0.51      0.49      0.50        80\n\n    accuracy                           0.51       160\n   macro avg       0.51      0.51      0.51       160\nweighted avg       0.51      0.51      0.51       160\n",
-      "roc_auc": 0.51390625,
       "y_prob": [
-        0.4685971934401446,
-        0.5880659890984261,
-        0.5304788776901366,
-        0.43957565151138467,
-        0.5038463626781632,
-        0.4652273077567498,
-        0.5847100993502347,
-        0.5104616697292005,
-        0.47131920968357843,
-        0.43799587982497334,
-        0.4960189710268829,
-        0.4351952524180606,
-        0.47823516872507077,
-        0.5225202731035682,
-        0.5049934839754789,
-        0.46348813612071,
-        0.4913152737170698,
-        0.4943560044658895,
-        0.4483125083684001,
-        0.4192829767113759,
-        0.4837760800877137,
-        0.5138495328227541,
-        0.46566274171564515,
-        0.46397203681287,
-        0.42296715645822186,
-        0.5057825082702295,
-        0.48229039510773697,
-        0.5515528516381715,
-        0.4130279404158634,
-        0.5110102562826957,
-        0.4378385985609114,
-        0.5049709338272942,
-        0.5491177241154352,
-        0.4734278641779235,
-        0.4903180760277518,
-        0.5024153385948759,
-        0.4550174125976565,
-        0.47408944756592075,
-        0.4811344974522715,
-        0.513304425049202,
-        0.5579353223774154,
-        0.5210717342560043,
-        0.5907744656741766,
-        0.46344911708752645,
-        0.43117875296291236,
-        0.533688758919175,
-        0.5290497502374126,
-        0.48607641955131375,
-        0.532881233702402,
-        0.5341292461861704,
-        0.5047550838230295,
-        0.45629185178585485,
-        0.5723741095853611,
-        0.5176362649318781,
-        0.576781843287271,
-        0.4565587333091604,
-        0.43425250970963175,
-        0.44864941542194103,
-        0.4814982228717,
-        0.5771946399699652,
-        0.5542313298335242,
-        0.5148651702981335,
-        0.578326993627849,
-        0.46219547798053273,
-        0.4662127930424256,
-        0.5167269442125124,
-        0.4545946129157852,
-        0.47285099433062533,
-        0.475602443239552,
-        0.5318046552682085,
-        0.4916093412756572,
-        0.4680847205841181,
-        0.5300286938315274,
-        0.47419383091533374,
-        0.4432574256271499,
-        0.5467691188026214,
-        0.4942200595017993,
-        0.491558726159705,
-        0.5551736674330323,
-        0.5360340244003253,
-        0.4663308126772837,
-        0.5341545691299238,
-        0.544787947349383,
-        0.5339643040098896,
-        0.5635104012484852,
-        0.42164008428541117,
-        0.5272913797609838,
-        0.49149085962047573,
-        0.4309241928600356,
-        0.5725259889932812,
-        0.45877791338922436,
-        0.5049566058663426,
-        0.5467499732880384,
-        0.4886064484052085,
-        0.4761842009707287,
-        0.5171460006825551,
-        0.5652706369027144,
-        0.5519353659368222,
-        0.48829304075863755,
-        0.49952031294625754,
-        0.5696952526061775,
-        0.39907090644230236,
-        0.5300417514346564,
-        0.5201368866376818,
-        0.4335442294896271,
-        0.441018930438655,
-        0.5727977177493875,
-        0.5055094496565438,
-        0.5485429940005087,
-        0.464300189927614,
-        0.46528676320969975,
-        0.4765960586458909,
-        0.5013951407428184,
-        0.4758746133195292,
-        0.574369326643904,
-        0.5448327519185213,
-        0.5313577820269861,
-        0.5796545232541171,
-        0.5408139880303958,
-        0.5758274204341941,
-        0.4433770335078927,
-        0.48199862880120214,
-        0.4681855101348399,
-        0.5105250331390581,
-        0.484520120241961,
-        0.45987999738308466,
-        0.4812982046752543,
-        0.4651369537670347,
-        0.4515971139618936,
-        0.5239026968610354,
-        0.5183456388828395,
-        0.4887243768576431,
-        0.4846139091445182,
-        0.480984884262246,
-        0.5113218050872491,
-        0.42950526584467874,
-        0.4980224655396546,
-        0.4853621308117529,
-        0.5006213435479155,
-        0.4464885510310222,
-        0.5825216599017382,
-        0.509750144605609,
-        0.5611443029239761,
-        0.48586792077872754,
-        0.5551391707724177,
-        0.48437951798583123,
-        0.4506087491103293,
-        0.49053821992973634,
-        0.4485156855641885,
-        0.5272347909195831,
-        0.4978957759625092,
-        0.4865222606967821,
-        0.5365014960658203,
-        0.46890723544338464,
-        0.5615897997035765,
-        0.553860453714982,
-        0.5373593965986853,
-        0.52350272912908,
-        0.5543178631028236,
-        0.5610670925103559
       ]
     },
     "tune": {
@@ -838,8 +835,8 @@
         "is_large": false,
         "is_wide": false,
         "is_binary": true,
-        "imbalance_ratio": 1.006269592476489,
-        "smote_applied": false
       }
     }
   },
@@ -1016,10 +1013,10 @@
             ],
             "n_classes": 2,
             "class_distribution": {
-              "0": 401,
-              "1": 399
             },
-            "imbalance_ratio": 1.01
           },
           "quality_flags": [],
           "recommendations": [
@@ -1088,7 +1085,8 @@
             "Categorical columns (2): mode imputation + one-hot encoding.",
             "Target encoded with LabelEncoder. Classes: ['0', '1']",
             "Train/test split: 640 train rows, 160 test rows (20% test).",
-            "Class imbalance ratio (majority/minority): 1.01.",
             "Final feature matrix: 10 features."
           ],
           "num_cols": [
@@ -1104,9 +1102,9 @@
           ],
           "n_classes": 2,
           "log_transformed_cols": [],
-          "smote_applied": false,
-          "smote_log": "",
-          "train_size": 640,
           "test_size": 160,
           "final_feature_count": 10
         }
@@ -1161,8 +1159,8 @@
             "is_large": false,
             "is_wide": false,
             "is_binary": true,
-            "imbalance_ratio": 1.006269592476489,
-            "smote_applied": false
           }
         }
       },
@@ -1175,15 +1173,15 @@
       "status": "done",
       "data": {
         "train": {
-          "best_name": "Logistic Regression",
           "best_metrics": {
-            "accuracy": 0.50625,
-            "f1": 0.5060763549685436,
-            "roc_auc": 0.51390625,
-            "train_time_s": 0.003,
-            "train_score": 0.5505620172071993,
-            "test_score": 0.51390625,
-            "generalization_gap": 0.03665576720719932,
             "overfit": false
           },
           "metric_name": "roc_auc",
@@ -1191,126 +1189,122 @@
           "comparison_df": [
             {
               "Model": "Logistic Regression",
-              "Train Score": 0.5506,
-              "Test Score": 0.5139,
-              "Gap": 0.0367,
-              "CV Mean": 0.5025,
-              "CV Std": 0.0343,
-              "CV Train Mean": 0.5568,
               "CV Overfit": "No",
               "Overfit": "No",
-              "Train Time(s)": 0.0
             },
             {
               "Model": "Random Forest",
-              "Train Score": 0.8889,
-              "Test Score": 0.5131,
-              "Gap": 0.3758,
-              "CV Mean": 0.4587,
-              "CV Std": 0.023,
-              "CV Train Mean": 0.9169,
-              "CV Overfit": "Yes",
-              "Overfit": "Yes",
-              "Train Time(s)": 0.13
             }
           ],
           "feature_importances": {
-            "embarked_C": 0.19427991337835848,
-            "sex_female": 0.1795206881097836,
-            "sex_male": 0.17217897553699751,
-            "embarked_Q": 0.16121663300959552,
-            "pclass": 0.10709936251807319,
-            "sibsp": 0.06916449512778146,
-            "embarked_S": 0.04040499294154858,
-            "age": 0.032805348276750294,
-            "parch": 0.02489251272846473,
-            "fare": 0.01843707837264646
           },
           "training_log": [
             "Training 2 models for classification task.",
             "  Parameter overrides applied for: LightGBM, Random Forest, XGBoost",
             "  Training Logistic Regression...",
-            "    Logistic Regression: acc=0.506, f1=0.506, auc=0.514  [0.00s]",
             "  Training Random Forest...",
-            "    Random Forest: acc=0.531, f1=0.531, auc=0.513  [0.13s]",
-            "\nBest model: Logistic Regression (roc_auc=0.5139)",
-            "Overfitting warnings: Random Forest is overfitting \u2014 train ROC-AUC 0.8889 vs test ROC-AUC 0.5131, gap 0.3758; Random Forest shows consistent overfitting across CV folds \u2014 CV train mean 0.9169 vs CV test mean 0.4587",
-            "5-fold cross-validation results: best model Logistic Regression achieved CV mean 0.5025 \u00b1 0.0343 vs single test score 0.5139"
-          ],
-          "overfitting_warnings": [
-            "Random Forest is overfitting \u2014 train ROC-AUC 0.8889 vs test ROC-AUC 0.5131, gap 0.3758",
-            "Random Forest shows consistent overfitting across CV folds \u2014 CV train mean 0.9169 vs CV test mean 0.4587"
           ],
-          "cv_summary": "5-fold cross-validation results: best model Logistic Regression achieved CV mean 0.5025 \u00b1 0.0343 vs single test score 0.5139",
           "cv_folds_used": 5,
           "results": [
             {
               "name": "Logistic Regression",
               "metrics": {
-                "accuracy": 0.50625,
-                "f1": 0.5060763549685436,
-                "roc_auc": 0.51390625,
-                "train_time_s": 0.003,
-                "train_score": 0.5505620172071993,
-                "test_score": 0.51390625,
-                "generalization_gap": 0.03665576720719932,
                 "overfit": false
               },
               "cv_scores": [
-                0.5584859584859584,
-                0.467529296875,
-                0.525634765625,
-                0.48388671875,
-                0.476806640625
               ],
-              "cv_mean": 0.5024686760721917,
-              "cv_std": 0.034348778883955274,
               "cv_train_scores": [
-                0.541595458984375,
-                0.5711451895933471,
-                0.5545586327916381,
-                0.5557030594338903,
-                0.5610894941634241
               ],
-              "cv_train_mean": 0.5568183669933349,
               "cv_overfit": false,
-              "train_score": 0.5505620172071993,
-              "generalization_gap": 0.03665576720719932,
               "overfit": false
             },
             {
               "name": "Random Forest",
               "metrics": {
-                "accuracy": 0.53125,
-                "f1": 0.5312316887378413,
-                "roc_auc": 0.513125,
-                "train_time_s": 0.133,
-                "train_score": 0.8889442279709763,
-                "test_score": 0.513125,
-                "generalization_gap": 0.3758192279709762,
-                "overfit": true
               },
               "cv_scores": [
-                0.43956043956043955,
-                0.447998046875,
-                0.50390625,
-                0.451416015625,
-                0.45068359375
               ],
-              "cv_mean": 0.458712869162088,
-              "cv_std": 0.022985986961779142,
               "cv_train_scores": [
-                0.9126129150390625,
-                0.9176317998016327,
-                0.9266803997863737,
-                0.9033646143282216,
-                0.9239642938887618
               ],
-              "cv_train_mean": 0.9168508045688103,
-              "cv_overfit": true,
-              "train_score": 0.8889442279709763,
-              "generalization_gap": 0.3758192279709762,
-              "overfit": true
             }
           ]
         }
@@ -1326,7 +1320,7 @@
         "tune": {
           "success": false,
           "error": "optuna not installed \u2014 run: pip install optuna",
-          "model_name": "Logistic Regression"
         }
       },
       "error": null
@@ -1339,171 +1333,171 @@
       "data": {
         "eval": {
           "metrics": {
-            "accuracy": 0.50625,
-            "f1": 0.5060763549685436,
-            "classification_report": "              precision    recall  f1-score   support\n\n           0       0.51      0.53      0.52        80\n           1       0.51      0.49      0.50        80\n\n    accuracy                           0.51       160\n   macro avg       0.51      0.51      0.51       160\nweighted avg       0.51      0.51      0.51       160\n",
-            "roc_auc": 0.51390625,
             "y_prob": [
-              0.4685971934401446,
-              0.5880659890984261,
-              0.5304788776901366,
-              0.43957565151138467,
-              0.5038463626781632,
-              0.4652273077567498,
-              0.5847100993502347,
-              0.5104616697292005,
-              0.47131920968357843,
-              0.43799587982497334,
-              0.4960189710268829,
-              0.4351952524180606,
-              0.47823516872507077,
-              0.5225202731035682,
-              0.5049934839754789,
-              0.46348813612071,
-              0.4913152737170698,
-              0.4943560044658895,
-              0.4483125083684001,
-              0.4192829767113759,
-              0.4837760800877137,
-              0.5138495328227541,
-              0.46566274171564515,
-              0.46397203681287,
-              0.42296715645822186,
-              0.5057825082702295,
-              0.48229039510773697,
-              0.5515528516381715,
-              0.4130279404158634,
-              0.5110102562826957,
-              0.4378385985609114,
-              0.5049709338272942,
-              0.5491177241154352,
-              0.4734278641779235,
-              0.4903180760277518,
-              0.5024153385948759,
-              0.4550174125976565,
-              0.47408944756592075,
-              0.4811344974522715,
-              0.513304425049202,
-              0.5579353223774154,
-              0.5210717342560043,
-              0.5907744656741766,
-              0.46344911708752645,
-              0.43117875296291236,
-              0.533688758919175,
-              0.5290497502374126,
-              0.48607641955131375,
-              0.532881233702402,
-              0.5341292461861704,
-              0.5047550838230295,
-              0.45629185178585485,
-              0.5723741095853611,
-              0.5176362649318781,
-              0.576781843287271,
-              0.4565587333091604,
-              0.43425250970963175,
-              0.44864941542194103,
-              0.4814982228717,
-              0.5771946399699652,
-              0.5542313298335242,
-              0.5148651702981335,
-              0.578326993627849,
-              0.46219547798053273,
-              0.4662127930424256,
-              0.5167269442125124,
-              0.4545946129157852,
-              0.47285099433062533,
-              0.475602443239552,
-              0.5318046552682085,
-              0.4916093412756572,
-              0.4680847205841181,
-              0.5300286938315274,
-              0.47419383091533374,
-              0.4432574256271499,
-              0.5467691188026214,
-              0.4942200595017993,
-              0.491558726159705,
-              0.5551736674330323,
-              0.5360340244003253,
-              0.4663308126772837,
-              0.5341545691299238,
-              0.544787947349383,
-              0.5339643040098896,
-              0.5635104012484852,
-              0.42164008428541117,
-              0.5272913797609838,
-              0.49149085962047573,
-              0.4309241928600356,
-              0.5725259889932812,
-              0.45877791338922436,
-              0.5049566058663426,
-              0.5467499732880384,
-              0.4886064484052085,
-              0.4761842009707287,
-              0.5171460006825551,
-              0.5652706369027144,
-              0.5519353659368222,
-              0.48829304075863755,
-              0.49952031294625754,
-              0.5696952526061775,
-              0.39907090644230236,
-              0.5300417514346564,
-              0.5201368866376818,
-              0.4335442294896271,
-              0.441018930438655,
-              0.5727977177493875,
-              0.5055094496565438,
-              0.5485429940005087,
-              0.464300189927614,
-              0.46528676320969975,
-              0.4765960586458909,
-              0.5013951407428184,
-              0.4758746133195292,
-              0.574369326643904,
-              0.5448327519185213,
-              0.5313577820269861,
-              0.5796545232541171,
-              0.5408139880303958,
-              0.5758274204341941,
-              0.4433770335078927,
-              0.48199862880120214,
-              0.4681855101348399,
-              0.5105250331390581,
-              0.484520120241961,
-              0.45987999738308466,
-              0.4812982046752543,
-              0.4651369537670347,
-              0.4515971139618936,
-              0.5239026968610354,
-              0.5183456388828395,
-              0.4887243768576431,
-              0.4846139091445182,
-              0.480984884262246,
-              0.5113218050872491,
-              0.42950526584467874,
-              0.4980224655396546,
-              0.4853621308117529,
-              0.5006213435479155,
-              0.4464885510310222,
-              0.5825216599017382,
-              0.509750144605609,
-              0.5611443029239761,
-              0.48586792077872754,
-              0.5551391707724177,
-              0.48437951798583123,
-              0.4506087491103293,
-              0.49053821992973634,
-              0.4485156855641885,
-              0.5272347909195831,
-              0.4978957759625092,
-              0.4865222606967821,
-              0.5365014960658203,
-              0.46890723544338464,
-              0.5615897997035765,
-              0.553860453714982,
-              0.5373593965986853,
-              0.52350272912908,
-              0.5543178631028236,
-              0.5610670925103559
             ]
           },
           "plot_paths": {
@@ -1698,10 +1692,10 @@
             ],
             "n_classes": 2,
             "class_distribution": {
-              "0": 401,
-              "1": 399
             },
-            "imbalance_ratio": 1.01
           },
           "quality_flags": [],
           "recommendations": [
@@ -1752,7 +1746,8 @@
             "Categorical columns (2): mode imputation + one-hot encoding.",
             "Target encoded with LabelEncoder. Classes: ['0', '1']",
             "Train/test split: 640 train rows, 160 test rows (20% test).",
-            "Class imbalance ratio (majority/minority): 1.01.",
             "Final feature matrix: 10 features."
           ],
           "num_cols": [
@@ -1768,19 +1763,19 @@
           ],
           "n_classes": 2,
           "log_transformed_cols": [],
-          "smote_applied": false,
-          "smote_log": ""
         },
         "train": {
-          "best_name": "Logistic Regression",
           "best_metrics": {
-            "accuracy": 0.50625,
-            "f1": 0.5060763549685436,
-            "roc_auc": 0.51390625,
-            "train_time_s": 0.003,
-            "train_score": 0.5505620172071993,
-            "test_score": 0.51390625,
-            "generalization_gap": 0.03665576720719932,
             "overfit": false
           },
           "metric_name": "roc_auc",
@@ -1788,296 +1783,292 @@
           "comparison_df": [
             {
               "Model": "Logistic Regression",
-              "Train Score": 0.5506,
-              "Test Score": 0.5139,
-              "Gap": 0.0367,
-              "CV Mean": 0.5025,
-              "CV Std": 0.0343,
-              "CV Train Mean": 0.5568,
               "CV Overfit": "No",
               "Overfit": "No",
-              "Train Time(s)": 0.0
             },
             {
               "Model": "Random Forest",
-              "Train Score": 0.8889,
-              "Test Score": 0.5131,
-              "Gap": 0.3758,
-              "CV Mean": 0.4587,
-              "CV Std": 0.023,
-              "CV Train Mean": 0.9169,
-              "CV Overfit": "Yes",
-              "Overfit": "Yes",
-              "Train Time(s)": 0.13
             }
           ],
           "feature_importances": {
-            "embarked_C": 0.19427991337835848,
-            "sex_female": 0.1795206881097836,
-            "sex_male": 0.17217897553699751,
-            "embarked_Q": 0.16121663300959552,
-            "pclass": 0.10709936251807319,
-            "sibsp": 0.06916449512778146,
-            "embarked_S": 0.04040499294154858,
-            "age": 0.032805348276750294,
-            "parch": 0.02489251272846473,
-            "fare": 0.01843707837264646
           },
           "training_log": [
             "Training 2 models for classification task.",
             "  Parameter overrides applied for: LightGBM, Random Forest, XGBoost",
             "  Training Logistic Regression...",
-            "    Logistic Regression: acc=0.506, f1=0.506, auc=0.514  [0.00s]",
             "  Training Random Forest...",
-            "    Random Forest: acc=0.531, f1=0.531, auc=0.513  [0.13s]",
-            "\nBest model: Logistic Regression (roc_auc=0.5139)",
-            "Overfitting warnings: Random Forest is overfitting \u2014 train ROC-AUC 0.8889 vs test ROC-AUC 0.5131, gap 0.3758; Random Forest shows consistent overfitting across CV folds \u2014 CV train mean 0.9169 vs CV test mean 0.4587",
-            "5-fold cross-validation results: best model Logistic Regression achieved CV mean 0.5025 \u00b1 0.0343 vs single test score 0.5139"
-          ],
-          "overfitting_warnings": [
-            "Random Forest is overfitting \u2014 train ROC-AUC 0.8889 vs test ROC-AUC 0.5131, gap 0.3758",
-            "Random Forest shows consistent overfitting across CV folds \u2014 CV train mean 0.9169 vs CV test mean 0.4587"
           ],
-          "cv_summary": "5-fold cross-validation results: best model Logistic Regression achieved CV mean 0.5025 \u00b1 0.0343 vs single test score 0.5139",
           "cv_folds_used": 5,
           "results": [
             {
               "name": "Logistic Regression",
               "metrics": {
-                "accuracy": 0.50625,
-                "f1": 0.5060763549685436,
-                "roc_auc": 0.51390625,
-                "train_time_s": 0.003,
-                "train_score": 0.5505620172071993,
-                "test_score": 0.51390625,
-                "generalization_gap": 0.03665576720719932,
                 "overfit": false
               },
-              "train_score": 0.5505620172071993,
-              "generalization_gap": 0.03665576720719932,
               "overfit": false,
               "cv_scores": [
-                0.5584859584859584,
-                0.467529296875,
-                0.525634765625,
-                0.48388671875,
-                0.476806640625
               ],
-              "cv_mean": 0.5024686760721917,
-              "cv_std": 0.034348778883955274,
               "cv_train_scores": [
-                0.541595458984375,
-                0.5711451895933471,
-                0.5545586327916381,
-                0.5557030594338903,
-                0.5610894941634241
               ],
-              "cv_train_mean": 0.5568183669933349,
               "cv_overfit": false
             },
             {
               "name": "Random Forest",
               "metrics": {
-                "accuracy": 0.53125,
-                "f1": 0.5312316887378413,
-                "roc_auc": 0.513125,
-                "train_time_s": 0.133,
-                "train_score": 0.8889442279709763,
-                "test_score": 0.513125,
-                "generalization_gap": 0.3758192279709762,
-                "overfit": true
               },
-              "train_score": 0.8889442279709763,
-              "generalization_gap": 0.3758192279709762,
-              "overfit": true,
               "cv_scores": [
-                0.43956043956043955,
-                0.447998046875,
-                0.50390625,
-                0.451416015625,
-                0.45068359375
               ],
-              "cv_mean": 0.458712869162088,
-              "cv_std": 0.022985986961779142,
               "cv_train_scores": [
-                0.9126129150390625,
-                0.9176317998016327,
-                0.9266803997863737,
-                0.9033646143282216,
-                0.9239642938887618
               ],
-              "cv_train_mean": 0.9168508045688103,
-              "cv_overfit": true
             }
           ]
         },
         "eval": {
           "metrics": {
-            "accuracy": 0.50625,
-            "f1": 0.5060763549685436,
-            "classification_report": "              precision    recall  f1-score   support\n\n           0       0.51      0.53      0.52        80\n           1       0.51      0.49      0.50        80\n\n    accuracy                           0.51       160\n   macro avg       0.51      0.51      0.51       160\nweighted avg       0.51      0.51      0.51       160\n",
-            "roc_auc": 0.51390625,
             "y_prob": [
-              0.4685971934401446,
-              0.5880659890984261,
-              0.5304788776901366,
-              0.43957565151138467,
-              0.5038463626781632,
-              0.4652273077567498,
-              0.5847100993502347,
-              0.5104616697292005,
-              0.47131920968357843,
-              0.43799587982497334,
-              0.4960189710268829,
-              0.4351952524180606,
-              0.47823516872507077,
-              0.5225202731035682,
-              0.5049934839754789,
-              0.46348813612071,
-              0.4913152737170698,
-              0.4943560044658895,
-              0.4483125083684001,
-              0.4192829767113759,
-              0.4837760800877137,
-              0.5138495328227541,
-              0.46566274171564515,
-              0.46397203681287,
-              0.42296715645822186,
-              0.5057825082702295,
-              0.48229039510773697,
-              0.5515528516381715,
-              0.4130279404158634,
-              0.5110102562826957,
-              0.4378385985609114,
-              0.5049709338272942,
-              0.5491177241154352,
-              0.4734278641779235,
-              0.4903180760277518,
-              0.5024153385948759,
-              0.4550174125976565,
-              0.47408944756592075,
-              0.4811344974522715,
-              0.513304425049202,
-              0.5579353223774154,
-              0.5210717342560043,
-              0.5907744656741766,
-              0.46344911708752645,
-              0.43117875296291236,
-              0.533688758919175,
-              0.5290497502374126,
-              0.48607641955131375,
-              0.532881233702402,
-              0.5341292461861704,
-              0.5047550838230295,
-              0.45629185178585485,
-              0.5723741095853611,
-              0.5176362649318781,
-              0.576781843287271,
-              0.4565587333091604,
-              0.43425250970963175,
-              0.44864941542194103,
-              0.4814982228717,
-              0.5771946399699652,
-              0.5542313298335242,
-              0.5148651702981335,
-              0.578326993627849,
-              0.46219547798053273,
-              0.4662127930424256,
-              0.5167269442125124,
-              0.4545946129157852,
-              0.47285099433062533,
-              0.475602443239552,
-              0.5318046552682085,
-              0.4916093412756572,
-              0.4680847205841181,
-              0.5300286938315274,
-              0.47419383091533374,
-              0.4432574256271499,
-              0.5467691188026214,
-              0.4942200595017993,
-              0.491558726159705,
-              0.5551736674330323,
-              0.5360340244003253,
-              0.4663308126772837,
-              0.5341545691299238,
-              0.544787947349383,
-              0.5339643040098896,
-              0.5635104012484852,
-              0.42164008428541117,
-              0.5272913797609838,
-              0.49149085962047573,
-              0.4309241928600356,
-              0.5725259889932812,
-              0.45877791338922436,
-              0.5049566058663426,
-              0.5467499732880384,
-              0.4886064484052085,
-              0.4761842009707287,
-              0.5171460006825551,
-              0.5652706369027144,
-              0.5519353659368222,
-              0.48829304075863755,
-              0.49952031294625754,
-              0.5696952526061775,
-              0.39907090644230236,
-              0.5300417514346564,
-              0.5201368866376818,
-              0.4335442294896271,
-              0.441018930438655,
-              0.5727977177493875,
-              0.5055094496565438,
-              0.5485429940005087,
-              0.464300189927614,
-              0.46528676320969975,
-              0.4765960586458909,
-              0.5013951407428184,
-              0.4758746133195292,
-              0.574369326643904,
-              0.5448327519185213,
-              0.5313577820269861,
-              0.5796545232541171,
-              0.5408139880303958,
-              0.5758274204341941,
-              0.4433770335078927,
-              0.48199862880120214,
-              0.4681855101348399,
-              0.5105250331390581,
-              0.484520120241961,
-              0.45987999738308466,
-              0.4812982046752543,
-              0.4651369537670347,
-              0.4515971139618936,
-              0.5239026968610354,
-              0.5183456388828395,
-              0.4887243768576431,
-              0.4846139091445182,
-              0.480984884262246,
-              0.5113218050872491,
-              0.42950526584467874,
-              0.4980224655396546,
-              0.4853621308117529,
-              0.5006213435479155,
-              0.4464885510310222,
-              0.5825216599017382,
-              0.509750144605609,
-              0.5611443029239761,
-              0.48586792077872754,
-              0.5551391707724177,
-              0.48437951798583123,
-              0.4506087491103293,
-              0.49053821992973634,
-              0.4485156855641885,
-              0.5272347909195831,
-              0.4978957759625092,
-              0.4865222606967821,
-              0.5365014960658203,
-              0.46890723544338464,
-              0.5615897997035765,
-              0.553860453714982,
-              0.5373593965986853,
-              0.52350272912908,
-              0.5543178631028236,
-              0.5610670925103559
             ]
           },
           "plot_paths": {
@@ -2098,54 +2089,54 @@
         },
         "target_col": "survived",
         "task_type": "classification",
-        "best_model_name": "Logistic Regression",
         "best_metrics": {
-          "accuracy": 0.50625,
-          "f1": 0.5060763549685436,
-          "roc_auc": 0.51390625,
-          "train_time_s": 0.003,
-          "train_score": 0.5505620172071993,
-          "test_score": 0.51390625,
-          "generalization_gap": 0.03665576720719932,
           "overfit": false
         },
         "comparison_df": [
           {
             "Model": "Logistic Regression",
-            "Train Score": 0.5506,
-            "Test Score": 0.5139,
-            "Gap": 0.0367,
-            "CV Mean": 0.5025,
-            "CV Std": 0.0343,
-            "CV Train Mean": 0.5568,
             "CV Overfit": "No",
             "Overfit": "No",
-            "Train Time(s)": 0.0
           },
           {
             "Model": "Random Forest",
-            "Train Score": 0.8889,
-            "Test Score": 0.5131,
-            "Gap": 0.3758,
-            "CV Mean": 0.4587,
-            "CV Std": 0.023,
-            "CV Train Mean": 0.9169,
-            "CV Overfit": "Yes",
-            "Overfit": "Yes",
-            "Train Time(s)": 0.13
           }
         ],
         "feature_importances": {
-          "embarked_C": 0.19427991337835848,
-          "sex_female": 0.1795206881097836,
-          "sex_male": 0.17217897553699751,
-          "embarked_Q": 0.16121663300959552,
-          "pclass": 0.10709936251807319,
-          "sibsp": 0.06916449512778146,
-          "embarked_S": 0.04040499294154858,
-          "age": 0.032805348276750294,
-          "parch": 0.02489251272846473,
-          "fare": 0.01843707837264646
         },
         "plot_paths": {
           "confusion_matrix": "outputs/titanic_confusion_matrix.png",
@@ -2153,171 +2144,171 @@
           "feature_importance": "outputs/titanic_feature_importance.png"
         },
         "metrics": {
-          "accuracy": 0.50625,
-          "f1": 0.5060763549685436,
-          "classification_report": "              precision    recall  f1-score   support\n\n           0       0.51      0.53      0.52        80\n           1       0.51      0.49      0.50        80\n\n    accuracy                           0.51       160\n   macro avg       0.51      0.51      0.51       160\nweighted avg       0.51      0.51      0.51       160\n",
-          "roc_auc": 0.51390625,
           "y_prob": [
-            0.4685971934401446,
-            0.5880659890984261,
-            0.5304788776901366,
-            0.43957565151138467,
-            0.5038463626781632,
-            0.4652273077567498,
-            0.5847100993502347,
-            0.5104616697292005,
-            0.47131920968357843,
-            0.43799587982497334,
-            0.4960189710268829,
-            0.4351952524180606,
-            0.47823516872507077,
-            0.5225202731035682,
-            0.5049934839754789,
-            0.46348813612071,
-            0.4913152737170698,
-            0.4943560044658895,
-            0.4483125083684001,
-            0.4192829767113759,
-            0.4837760800877137,
-            0.5138495328227541,
-            0.46566274171564515,
-            0.46397203681287,
-            0.42296715645822186,
-            0.5057825082702295,
-            0.48229039510773697,
-            0.5515528516381715,
-            0.4130279404158634,
-            0.5110102562826957,
-            0.4378385985609114,
-            0.5049709338272942,
-            0.5491177241154352,
-            0.4734278641779235,
-            0.4903180760277518,
-            0.5024153385948759,
-            0.4550174125976565,
-            0.47408944756592075,
-            0.4811344974522715,
-            0.513304425049202,
-            0.5579353223774154,
-            0.5210717342560043,
-            0.5907744656741766,
-            0.46344911708752645,
-            0.43117875296291236,
-            0.533688758919175,
-            0.5290497502374126,
-            0.48607641955131375,
-            0.532881233702402,
-            0.5341292461861704,
-            0.5047550838230295,
-            0.45629185178585485,
-            0.5723741095853611,
-            0.5176362649318781,
-            0.576781843287271,
-            0.4565587333091604,
-            0.43425250970963175,
-            0.44864941542194103,
-            0.4814982228717,
-            0.5771946399699652,
-            0.5542313298335242,
-            0.5148651702981335,
-            0.578326993627849,
-            0.46219547798053273,
-            0.4662127930424256,
-            0.5167269442125124,
-            0.4545946129157852,
-            0.47285099433062533,
-            0.475602443239552,
-            0.5318046552682085,
-            0.4916093412756572,
-            0.4680847205841181,
-            0.5300286938315274,
-            0.47419383091533374,
-            0.4432574256271499,
-            0.5467691188026214,
-            0.4942200595017993,
-            0.491558726159705,
-            0.5551736674330323,
-            0.5360340244003253,
-            0.4663308126772837,
-            0.5341545691299238,
-            0.544787947349383,
-            0.5339643040098896,
-            0.5635104012484852,
-            0.42164008428541117,
-            0.5272913797609838,
-            0.49149085962047573,
-            0.4309241928600356,
-            0.5725259889932812,
-            0.45877791338922436,
-            0.5049566058663426,
-            0.5467499732880384,
-            0.4886064484052085,
-            0.4761842009707287,
-            0.5171460006825551,
-            0.5652706369027144,
-            0.5519353659368222,
-            0.48829304075863755,
-            0.49952031294625754,
-            0.5696952526061775,
-            0.39907090644230236,
-            0.5300417514346564,
-            0.5201368866376818,
-            0.4335442294896271,
-            0.441018930438655,
-            0.5727977177493875,
-            0.5055094496565438,
-            0.5485429940005087,
-            0.464300189927614,
-            0.46528676320969975,
-            0.4765960586458909,
-            0.5013951407428184,
-            0.4758746133195292,
-            0.574369326643904,
-            0.5448327519185213,
-            0.5313577820269861,
-            0.5796545232541171,
-            0.5408139880303958,
-            0.5758274204341941,
-            0.4433770335078927,
-            0.48199862880120214,
-            0.4681855101348399,
-            0.5105250331390581,
-            0.484520120241961,
-            0.45987999738308466,
-            0.4812982046752543,
-            0.4651369537670347,
-            0.4515971139618936,
-            0.5239026968610354,
-            0.5183456388828395,
-            0.4887243768576431,
-            0.4846139091445182,
-            0.480984884262246,
-            0.5113218050872491,
-            0.42950526584467874,
-            0.4980224655396546,
-            0.4853621308117529,
-            0.5006213435479155,
-            0.4464885510310222,
-            0.5825216599017382,
-            0.509750144605609,
-            0.5611443029239761,
-            0.48586792077872754,
-            0.5551391707724177,
-            0.48437951798583123,
-            0.4506087491103293,
-            0.49053821992973634,
-            0.4485156855641885,
-            0.5272347909195831,
-            0.4978957759625092,
-            0.4865222606967821,
-            0.5365014960658203,
-            0.46890723544338464,
-            0.5615897997035765,
-            0.553860453714982,
-            0.5373593965986853,
-            0.52350272912908,
-            0.5543178631028236,
-            0.5610670925103559
           ]
         },
         "tune": {
@@ -2366,8 +2357,8 @@
             "is_large": false,
             "is_wide": false,
             "is_binary": true,
-            "imbalance_ratio": 1.006269592476489,
-            "smote_applied": false
           }
         }
       },

         ],
         "n_classes": 2,
         "class_distribution": {
+          "1": 535,
+          "0": 265
         },
+        "imbalance_ratio": 2.02
       },
       "quality_flags": [],
       "recommendations": [
         "Categorical columns (2): mode imputation + one-hot encoding.",
         "Target encoded with LabelEncoder. Classes: ['0', '1']",
         "Train/test split: 640 train rows, 160 test rows (20% test).",
+        "Class imbalance ratio (majority/minority): 2.02.",
+        "Applied SMOTE (imbalance ratio was 2.02). New class distribution: class 0: 428, class 1: 428.",
         "Final feature matrix: 10 features."
       ],
       "num_cols": [
       ],
       "n_classes": 2,
       "log_transformed_cols": [],
+      "smote_applied": true,
+      "smote_log": "Applied SMOTE (imbalance ratio was 2.02). New class distribution: class 0: 428, class 1: 428."
     },
     "train": {
+      "best_name": "Random Forest",
       "best_metrics": {
+        "accuracy": 0.8,
+        "f1": 0.8017316017316019,
+        "roc_auc": 0.8416505025568682,
+        "train_time_s": 0.378,
+        "train_score": 0.926019739715259,
+        "test_score": 0.8416505025568682,
+        "generalization_gap": 0.08436923715839073,
         "overfit": false
       },
       "metric_name": "roc_auc",
       "comparison_df": [
         {
           "Model": "Logistic Regression",
+          "Train Score": 0.8272,
+          "Test Score": 0.8462,
+          "Gap": -0.019,
+          "CV Mean": 0.8203,
+          "CV Std": 0.0317,
+          "CV Train Mean": 0.8271,
           "CV Overfit": "No",
           "Overfit": "No",
+          "Train Time(s)": 0.01
         },
         {
           "Model": "Random Forest",
+          "Train Score": 0.926,
+          "Test Score": 0.8417,
+          "Gap": 0.0844,
+          "CV Mean": 0.872,
+          "CV Std": 0.0247,
+          "CV Train Mean": 0.9317,
+          "CV Overfit": "No",
+          "Overfit": "No",
+          "Train Time(s)": 0.38
         }
       ],
       "feature_importances": {
+        "sex_male": 0.28592870600873493,
+        "sex_female": 0.2421809277670601,
+        "pclass": 0.20077516929328645,
+        "fare": 0.06675033992695845,
+        "age": 0.06080316414257179,
+        "sibsp": 0.05905243894256141,
+        "parch": 0.04148880321022331,
+        "embarked_S": 0.016750117296253544,
+        "embarked_Q": 0.01568458564080758,
+        "embarked_C": 0.010585747771542543
       },
       "training_log": [
         "Training 2 models for classification task.",
         "  Parameter overrides applied for: LightGBM, Random Forest, XGBoost",
         "  Training Logistic Regression...",
+        "    Logistic Regression: acc=0.775, f1=0.779, auc=0.846  [0.01s]",
         "  Training Random Forest...",
+        "    Random Forest: acc=0.800, f1=0.802, auc=0.842  [0.38s]",
+        "\nBest model: Random Forest (roc_auc=0.8417)",
+        "5-fold cross-validation results: best model Random Forest achieved CV mean 0.8720 \u00b1 0.0247 vs single test score 0.8417"
       ],
+      "overfitting_warnings": [],
+      "cv_summary": "5-fold cross-validation results: best model Random Forest achieved CV mean 0.8720 \u00b1 0.0247 vs single test score 0.8417",
       "cv_folds_used": 5,
       "results": [
         {
           "name": "Logistic Regression",
           "metrics": {
+            "accuracy": 0.775,
+            "f1": 0.7791835699797159,
+            "roc_auc": 0.8462352318815024,
+            "train_time_s": 0.015,
+            "train_score": 0.827190147611145,
+            "test_score": 0.8462352318815024,
+            "generalization_gap": -0.01904508427035745,
             "overfit": false
           },
+          "train_score": 0.827190147611145,
+          "generalization_gap": -0.01904508427035745,
           "overfit": false,
           "cv_scores": [
+            0.8080043266630612,
+            0.7863201094391246,
+            0.7939808481532147,
+            0.8422708618331054,
+            0.8707250341997264
           ],
+          "cv_mean": 0.8202602360576463,
+          "cv_std": 0.031698294077597756,
           "cv_train_scores": [
+            0.8316063062138779,
+            0.8339556373928018,
+            0.8352002455117385,
+            0.8209128262833956,
+            0.8139907592109525
           ],
+          "cv_train_mean": 0.8271331549225532,
           "cv_overfit": false
         },
         {
           "name": "Random Forest",
           "metrics": {
+            "accuracy": 0.8,
+            "f1": 0.8017316017316019,
+            "roc_auc": 0.8416505025568682,
+            "train_time_s": 0.378,
+            "train_score": 0.926019739715259,
+            "test_score": 0.8416505025568682,
+            "generalization_gap": 0.08436923715839073,
+            "overfit": false
           },
+          "train_score": 0.926019739715259,
+          "generalization_gap": 0.08436923715839073,
+          "overfit": false,
           "cv_scores": [
+            0.8795294753921038,
+            0.840218878248974,
+            0.8588235294117647,
+            0.9143638850889193,
+            0.8670314637482901
           ],
+          "cv_mean": 0.8719934463780105,
+          "cv_std": 0.024737570511909102,
           "cv_train_scores": [
+            0.9302947915597962,
+            0.9364823623685063,
+            0.9327400132985526,
+            0.9295432458697765,
+            0.9296881659932144
           ],
+          "cv_train_mean": 0.9317497158179693,
+          "cv_overfit": false
         }
       ]
     },
     "eval": {
       "metrics": {
+        "accuracy": 0.8,
+        "f1": 0.8017316017316019,
+        "classification_report": "              precision    recall  f1-score   support\n\n           0       0.68      0.74      0.71        53\n           1       0.86      0.83      0.85       107\n\n    accuracy                           0.80       160\n   macro avg       0.77      0.78      0.78       160\nweighted avg       0.80      0.80      0.80       160\n",
+        "roc_auc": 0.8416505025568682,
         "y_prob": [
+          0.13410941654770053,
+          0.5294946471795305,
+          0.6734029805133361,
+          0.48276513165091095,
+          0.537887729131407,
+          0.14991155807168366,
+          0.6170905519960105,
+          0.4784058790521681,
+          0.8375867266088654,
+          0.7617669897727407,
+          0.1351365003484392,
+          0.44334998820869886,
+          0.45918298156738113,
+          0.18086772747653274,
+          0.7363322360597586,
+          0.11657189382719757,
+          0.23649619273968361,
+          0.5901773420747848,
+          0.134558348190491,
+          0.4254133833278052,
+          0.1296011320209188,
+          0.7314644444555719,
+          0.7853934325845645,
+          0.9071377805346806,
+          0.6899050304790696,
+          0.7018366024600355,
+          0.8197685771383121,
+          0.5198317417706617,
+          0.12630032800342134,
+          0.6473881223230429,
+          0.832648823582342,
+          0.6085633474110343,
+          0.8113142318198139,
+          0.9074706641382,
+          0.8459766165607369,
+          0.8201738289640232,
+          0.9003680409586162,
+          0.10016718457369138,
+          0.8585346197416688,
+          0.47744576120787313,
+          0.7118155973614765,
+          0.8079398173505976,
+          0.4835688710616073,
+          0.15232883334919356,
+          0.7564268826362109,
+          0.8738909667369577,
+          0.8978781232217746,
+          0.8629139348922269,
+          0.9210750298694106,
+          0.8622476596460283,
+          0.5280584146401254,
+          0.43175724148640166,
+          0.8612268450998052,
+          0.5192093060535821,
+          0.4631429141776544,
+          0.41349421421165006,
+          0.1842717233199732,
+          0.5393021880249185,
+          0.4611023953979327,
+          0.8127362992442425,
+          0.8939691379496485,
+          0.4164787102909156,
+          0.8978878589569886,
+          0.7983424128761862,
+          0.1173182901761418,
+          0.7542514691828953,
+          0.8448152300020084,
+          0.7353745795727059,
+          0.874243708197828,
+          0.1495537468741798,
+          0.4608257845629732,
+          0.723666302124413,
+          0.8963689192492247,
+          0.5718324254113771,
+          0.4597956607545386,
+          0.8732235494170898,
+          0.7727295226970254,
+          0.730516200696798,
+          0.09548329031034873,
+          0.8629109388984001,
+          0.46263304800868466,
+          0.7151887661777987,
+          0.40230852321056754,
+          0.8930814394148957,
+          0.8189750833489609,
+          0.7948906670433182,
+          0.16253862850672068,
+          0.7762547028088542,
+          0.8334832356172626,
+          0.8004830705990988,
+          0.21099149923900418,
+          0.8841702218788182,
+          0.8712799607086796,
+          0.5813945785177388,
+          0.8052927113899669,
+          0.49664824783559597,
+          0.43547420910148493,
+          0.9093300332802987,
+          0.8271809777802208,
+          0.6483594924481736,
+          0.9153820268559258,
+          0.8316891109823481,
+          0.5172451679903641,
+          0.10690068213440551,
+          0.14482053455947552,
+          0.6514137175182347,
+          0.8437537672599027,
+          0.7675893615600448,
+          0.7953030977589493,
+          0.23188048345069834,
+          0.8320099927549827,
+          0.8411414013434237,
+          0.49552967074078597,
+          0.10795590031797207,
+          0.6603705695450834,
+          0.905216945402769,
+          0.9017388468431307,
+          0.7038598916174742,
+          0.46749836573145026,
+          0.10980764687631323,
+          0.4241936068254756,
+          0.7869237725733111,
+          0.4176645200689225,
+          0.41597565326122093,
+          0.1840375192714269,
+          0.09239177436740872,
+          0.5634089283154317,
+          0.8672817898245639,
+          0.859787536713618,
+          0.823755567069053,
+          0.09332562760407467,
+          0.5269654560034531,
+          0.3282944545355915,
+          0.12656165619337742,
+          0.8996144986378454,
+          0.8794629731543243,
+          0.2259360459237031,
+          0.8677342141160906,
+          0.7065381808535922,
+          0.5087308414616077,
+          0.7361387452605942,
+          0.9175622001046994,
+          0.6768295210930064,
+          0.8217479267611849,
+          0.8186363655380218,
+          0.7713282338574033,
+          0.8650744660561319,
+          0.44798639555019343,
+          0.6891437990464381,
+          0.8957915611217553,
+          0.13099882437253405,
+          0.9100589282983074,
+          0.5699927016156656,
+          0.8848101584325896,
+          0.833215993303308,
+          0.806074199994562,
+          0.15238050377532514,
+          0.10376629963015432,
+          0.1557889773023143,
+          0.11296197259042451
         ]
       },
       "plot_paths": {
     },
     "target_col": "survived",
     "task_type": "classification",
+    "best_model_name": "Random Forest",
     "best_metrics": {
+      "accuracy": 0.8,
+      "f1": 0.8017316017316019,
+      "roc_auc": 0.8416505025568682,
+      "train_time_s": 0.378,
+      "train_score": 0.926019739715259,
+      "test_score": 0.8416505025568682,
+      "generalization_gap": 0.08436923715839073,
       "overfit": false
     },
     "comparison_df": [
       {
         "Model": "Logistic Regression",
+        "Train Score": 0.8272,
+        "Test Score": 0.8462,
+        "Gap": -0.019,
+        "CV Mean": 0.8203,
+        "CV Std": 0.0317,
+        "CV Train Mean": 0.8271,
         "CV Overfit": "No",
         "Overfit": "No",
+        "Train Time(s)": 0.01
       },
       {
         "Model": "Random Forest",
+        "Train Score": 0.926,
+        "Test Score": 0.8417,
+        "Gap": 0.0844,
+        "CV Mean": 0.872,
+        "CV Std": 0.0247,
+        "CV Train Mean": 0.9317,
+        "CV Overfit": "No",
+        "Overfit": "No",
+        "Train Time(s)": 0.38
       }
     ],
     "feature_importances": {
+      "sex_male": 0.28592870600873493,
+      "sex_female": 0.2421809277670601,
+      "pclass": 0.20077516929328645,
+      "fare": 0.06675033992695845,
+      "age": 0.06080316414257179,
+      "sibsp": 0.05905243894256141,
+      "parch": 0.04148880321022331,
+      "embarked_S": 0.016750117296253544,
+      "embarked_Q": 0.01568458564080758,
+      "embarked_C": 0.010585747771542543
     },
     "plot_paths": {
       "confusion_matrix": "outputs/titanic_confusion_matrix.png",
       "feature_importance": "outputs/titanic_feature_importance.png"
     },
     "metrics": {
+      "accuracy": 0.8,
+      "f1": 0.8017316017316019,
+      "classification_report": "              precision    recall  f1-score   support\n\n           0       0.68      0.74      0.71        53\n           1       0.86      0.83      0.85       107\n\n    accuracy                           0.80       160\n   macro avg       0.77      0.78      0.78       160\nweighted avg       0.80      0.80      0.80       160\n",
+      "roc_auc": 0.8416505025568682,
       "y_prob": [
+        0.13410941654770053,
+        0.5294946471795305,
+        0.6734029805133361,
+        0.48276513165091095,
+        0.537887729131407,
+        0.14991155807168366,
+        0.6170905519960105,
+        0.4784058790521681,
+        0.8375867266088654,
+        0.7617669897727407,
+        0.1351365003484392,
+        0.44334998820869886,
+        0.45918298156738113,
+        0.18086772747653274,
+        0.7363322360597586,
+        0.11657189382719757,
+        0.23649619273968361,
+        0.5901773420747848,
+        0.134558348190491,
+        0.4254133833278052,
+        0.1296011320209188,
+        0.7314644444555719,
+        0.7853934325845645,
+        0.9071377805346806,
+        0.6899050304790696,
+        0.7018366024600355,
+        0.8197685771383121,
+        0.5198317417706617,
+        0.12630032800342134,
+        0.6473881223230429,
+        0.832648823582342,
+        0.6085633474110343,
+        0.8113142318198139,
+        0.9074706641382,
+        0.8459766165607369,
+        0.8201738289640232,
+        0.9003680409586162,
+        0.10016718457369138,
+        0.8585346197416688,
+        0.47744576120787313,
+        0.7118155973614765,
+        0.8079398173505976,
+        0.4835688710616073,
+        0.15232883334919356,
+        0.7564268826362109,
+        0.8738909667369577,
+        0.8978781232217746,
+        0.8629139348922269,
+        0.9210750298694106,
+        0.8622476596460283,
+        0.5280584146401254,
+        0.43175724148640166,
+        0.8612268450998052,
+        0.5192093060535821,
+        0.4631429141776544,
+        0.41349421421165006,
+        0.1842717233199732,
+        0.5393021880249185,
+        0.4611023953979327,
+        0.8127362992442425,
+        0.8939691379496485,
+        0.4164787102909156,
+        0.8978878589569886,
+        0.7983424128761862,
+        0.1173182901761418,
+        0.7542514691828953,
+        0.8448152300020084,
+        0.7353745795727059,
+        0.874243708197828,
+        0.1495537468741798,
+        0.4608257845629732,
+        0.723666302124413,
+        0.8963689192492247,
+        0.5718324254113771,
+        0.4597956607545386,
+        0.8732235494170898,
+        0.7727295226970254,
+        0.730516200696798,
+        0.09548329031034873,
+        0.8629109388984001,
+        0.46263304800868466,
+        0.7151887661777987,
+        0.40230852321056754,
+        0.8930814394148957,
+        0.8189750833489609,
+        0.7948906670433182,
+        0.16253862850672068,
+        0.7762547028088542,
+        0.8334832356172626,
+        0.8004830705990988,
+        0.21099149923900418,
+        0.8841702218788182,
+        0.8712799607086796,
+        0.5813945785177388,
+        0.8052927113899669,
+        0.49664824783559597,
+        0.43547420910148493,
+        0.9093300332802987,
+        0.8271809777802208,
+        0.6483594924481736,
+        0.9153820268559258,
+        0.8316891109823481,
+        0.5172451679903641,
+        0.10690068213440551,
+        0.14482053455947552,
+        0.6514137175182347,
+        0.8437537672599027,
+        0.7675893615600448,
+        0.7953030977589493,
+        0.23188048345069834,
+        0.8320099927549827,
+        0.8411414013434237,
+        0.49552967074078597,
+        0.10795590031797207,
+        0.6603705695450834,
+        0.905216945402769,
+        0.9017388468431307,
+        0.7038598916174742,
+        0.46749836573145026,
+        0.10980764687631323,
+        0.4241936068254756,
+        0.7869237725733111,
+        0.4176645200689225,
+        0.41597565326122093,
+        0.1840375192714269,
+        0.09239177436740872,
+        0.5634089283154317,
+        0.8672817898245639,
+        0.859787536713618,
+        0.823755567069053,
+        0.09332562760407467,
+        0.5269654560034531,
+        0.3282944545355915,
+        0.12656165619337742,
+        0.8996144986378454,
+        0.8794629731543243,
+        0.2259360459237031,
+        0.8677342141160906,
+        0.7065381808535922,
+        0.5087308414616077,
+        0.7361387452605942,
+        0.9175622001046994,
+        0.6768295210930064,
+        0.8217479267611849,
+        0.8186363655380218,
+        0.7713282338574033,
+        0.8650744660561319,
+        0.44798639555019343,
+        0.6891437990464381,
+        0.8957915611217553,
+        0.13099882437253405,
+        0.9100589282983074,
+        0.5699927016156656,
+        0.8848101584325896,
+        0.833215993303308,
+        0.806074199994562,
+        0.15238050377532514,
+        0.10376629963015432,
+        0.1557889773023143,
+        0.11296197259042451
       ]
     },
     "tune": {
         "is_large": false,
         "is_wide": false,
         "is_binary": true,
+        "imbalance_ratio": 1.0,
+        "smote_applied": true
       }
     }
   },
             ],
             "n_classes": 2,
             "class_distribution": {
+              "1": 535,
+              "0": 265
             },
+            "imbalance_ratio": 2.02
           },
           "quality_flags": [],
           "recommendations": [
             "Categorical columns (2): mode imputation + one-hot encoding.",
             "Target encoded with LabelEncoder. Classes: ['0', '1']",
             "Train/test split: 640 train rows, 160 test rows (20% test).",
+            "Class imbalance ratio (majority/minority): 2.02.",
+            "Applied SMOTE (imbalance ratio was 2.02). New class distribution: class 0: 428, class 1: 428.",
             "Final feature matrix: 10 features."
           ],
           "num_cols": [
           ],
           "n_classes": 2,
           "log_transformed_cols": [],
+          "smote_applied": true,
+          "smote_log": "Applied SMOTE (imbalance ratio was 2.02). New class distribution: class 0: 428, class 1: 428.",
+          "train_size": 856,
           "test_size": 160,
           "final_feature_count": 10
         }
             "is_large": false,
             "is_wide": false,
             "is_binary": true,
+            "imbalance_ratio": 1.0,
+            "smote_applied": true
           }
         }
       },
       "status": "done",
       "data": {
         "train": {
+          "best_name": "Random Forest",
           "best_metrics": {
+            "accuracy": 0.8,
+            "f1": 0.8017316017316019,
+            "roc_auc": 0.8416505025568682,
+            "train_time_s": 0.378,
+            "train_score": 0.926019739715259,
+            "test_score": 0.8416505025568682,
+            "generalization_gap": 0.08436923715839073,
             "overfit": false
           },
           "metric_name": "roc_auc",
           "comparison_df": [
             {
               "Model": "Logistic Regression",
+              "Train Score": 0.8272,
+              "Test Score": 0.8462,
+              "Gap": -0.019,
+              "CV Mean": 0.8203,
+              "CV Std": 0.0317,
+              "CV Train Mean": 0.8271,
               "CV Overfit": "No",
               "Overfit": "No",
+              "Train Time(s)": 0.01
             },
             {
               "Model": "Random Forest",
+              "Train Score": 0.926,
+              "Test Score": 0.8417,
+              "Gap": 0.0844,
+              "CV Mean": 0.872,
+              "CV Std": 0.0247,
+              "CV Train Mean": 0.9317,
+              "CV Overfit": "No",
+              "Overfit": "No",
+              "Train Time(s)": 0.38
             }
           ],
           "feature_importances": {
+            "sex_male": 0.28592870600873493,
+            "sex_female": 0.2421809277670601,
+            "pclass": 0.20077516929328645,
+            "fare": 0.06675033992695845,
+            "age": 0.06080316414257179,
+            "sibsp": 0.05905243894256141,
+            "parch": 0.04148880321022331,
+            "embarked_S": 0.016750117296253544,
+            "embarked_Q": 0.01568458564080758,
+            "embarked_C": 0.010585747771542543
           },
           "training_log": [
             "Training 2 models for classification task.",
             "  Parameter overrides applied for: LightGBM, Random Forest, XGBoost",
             "  Training Logistic Regression...",
+            "    Logistic Regression: acc=0.775, f1=0.779, auc=0.846  [0.01s]",
             "  Training Random Forest...",
+            "    Random Forest: acc=0.800, f1=0.802, auc=0.842  [0.38s]",
+            "\nBest model: Random Forest (roc_auc=0.8417)",
+            "5-fold cross-validation results: best model Random Forest achieved CV mean 0.8720 \u00b1 0.0247 vs single test score 0.8417"
           ],
+          "overfitting_warnings": [],
+          "cv_summary": "5-fold cross-validation results: best model Random Forest achieved CV mean 0.8720 \u00b1 0.0247 vs single test score 0.8417",
           "cv_folds_used": 5,
           "results": [
             {
               "name": "Logistic Regression",
               "metrics": {
+                "accuracy": 0.775,
+                "f1": 0.7791835699797159,
+                "roc_auc": 0.8462352318815024,
+                "train_time_s": 0.015,
+                "train_score": 0.827190147611145,
+                "test_score": 0.8462352318815024,
+                "generalization_gap": -0.01904508427035745,
                 "overfit": false
               },
               "cv_scores": [
+                0.8080043266630612,
+                0.7863201094391246,
+                0.7939808481532147,
+                0.8422708618331054,
+                0.8707250341997264
               ],
+              "cv_mean": 0.8202602360576463,
+              "cv_std": 0.031698294077597756,
               "cv_train_scores": [
+                0.8316063062138779,
+                0.8339556373928018,
+                0.8352002455117385,
+                0.8209128262833956,
+                0.8139907592109525
               ],
+              "cv_train_mean": 0.8271331549225532,
               "cv_overfit": false,
+              "train_score": 0.827190147611145,
+              "generalization_gap": -0.01904508427035745,
               "overfit": false
             },
             {
               "name": "Random Forest",
               "metrics": {
+                "accuracy": 0.8,
+                "f1": 0.8017316017316019,
+                "roc_auc": 0.8416505025568682,
+                "train_time_s": 0.378,
+                "train_score": 0.926019739715259,
+                "test_score": 0.8416505025568682,
+                "generalization_gap": 0.08436923715839073,
+                "overfit": false
               },
               "cv_scores": [
+                0.8795294753921038,
+                0.840218878248974,
+                0.8588235294117647,
+                0.9143638850889193,
+                0.8670314637482901
               ],
+              "cv_mean": 0.8719934463780105,
+              "cv_std": 0.024737570511909102,
               "cv_train_scores": [
+                0.9302947915597962,
+                0.9364823623685063,
+                0.9327400132985526,
+                0.9295432458697765,
+                0.9296881659932144
               ],
+              "cv_train_mean": 0.9317497158179693,
+              "cv_overfit": false,
+              "train_score": 0.926019739715259,
+              "generalization_gap": 0.08436923715839073,
+              "overfit": false
             }
           ]
         }
         "tune": {
           "success": false,
           "error": "optuna not installed \u2014 run: pip install optuna",
+          "model_name": "Random Forest"
         }
       },
       "error": null
       "data": {
         "eval": {
           "metrics": {
+            "accuracy": 0.8,
+            "f1": 0.8017316017316019,
+            "classification_report": "              precision    recall  f1-score   support\n\n           0       0.68      0.74      0.71        53\n           1       0.86      0.83      0.85       107\n\n    accuracy                           0.80       160\n   macro avg       0.77      0.78      0.78       160\nweighted avg       0.80      0.80      0.80       160\n",
+            "roc_auc": 0.8416505025568682,
             "y_prob": [
+              0.13410941654770053,
+              0.5294946471795305,
+              0.6734029805133361,
+              0.48276513165091095,
+              0.537887729131407,
+              0.14991155807168366,
+              0.6170905519960105,
+              0.4784058790521681,
+              0.8375867266088654,
+              0.7617669897727407,
+              0.1351365003484392,
+              0.44334998820869886,
+              0.45918298156738113,
+              0.18086772747653274,
+              0.7363322360597586,
+              0.11657189382719757,
+              0.23649619273968361,
+              0.5901773420747848,
+              0.134558348190491,
+              0.4254133833278052,
+              0.1296011320209188,
+              0.7314644444555719,
+              0.7853934325845645,
+              0.9071377805346806,
+              0.6899050304790696,
+              0.7018366024600355,
+              0.8197685771383121,
+              0.5198317417706617,
+              0.12630032800342134,
+              0.6473881223230429,
+              0.832648823582342,
+              0.6085633474110343,
+              0.8113142318198139,
+              0.9074706641382,
+              0.8459766165607369,
+              0.8201738289640232,
+              0.9003680409586162,
+              0.10016718457369138,
+              0.8585346197416688,
+              0.47744576120787313,
+              0.7118155973614765,
+              0.8079398173505976,
+              0.4835688710616073,
+              0.15232883334919356,
+              0.7564268826362109,
+              0.8738909667369577,
+              0.8978781232217746,
+              0.8629139348922269,
+              0.9210750298694106,
+              0.8622476596460283,
+              0.5280584146401254,
+              0.43175724148640166,
+              0.8612268450998052,
+              0.5192093060535821,
+              0.4631429141776544,
+              0.41349421421165006,
+              0.1842717233199732,
+              0.5393021880249185,
+              0.4611023953979327,
+              0.8127362992442425,
+              0.8939691379496485,
+              0.4164787102909156,
+              0.8978878589569886,
+              0.7983424128761862,
+              0.1173182901761418,
+              0.7542514691828953,
+              0.8448152300020084,
+              0.7353745795727059,
+              0.874243708197828,
+              0.1495537468741798,
+              0.4608257845629732,
+              0.723666302124413,
+              0.8963689192492247,
+              0.5718324254113771,
+              0.4597956607545386,
+              0.8732235494170898,
+              0.7727295226970254,
+              0.730516200696798,
+              0.09548329031034873,
+              0.8629109388984001,
+              0.46263304800868466,
+              0.7151887661777987,
+              0.40230852321056754,
+              0.8930814394148957,
+              0.8189750833489609,
+              0.7948906670433182,
+              0.16253862850672068,
+              0.7762547028088542,
+              0.8334832356172626,
+              0.8004830705990988,
+              0.21099149923900418,
+              0.8841702218788182,
+              0.8712799607086796,
+              0.5813945785177388,
+              0.8052927113899669,
+              0.49664824783559597,
+              0.43547420910148493,
+              0.9093300332802987,
+              0.8271809777802208,
+              0.6483594924481736,
+              0.9153820268559258,
+              0.8316891109823481,
+              0.5172451679903641,
+              0.10690068213440551,
+              0.14482053455947552,
+              0.6514137175182347,
+              0.8437537672599027,
+              0.7675893615600448,
+              0.7953030977589493,
+              0.23188048345069834,
+              0.8320099927549827,
+              0.8411414013434237,
+              0.49552967074078597,
+              0.10795590031797207,
+              0.6603705695450834,
+              0.905216945402769,
+              0.9017388468431307,
+              0.7038598916174742,
+              0.46749836573145026,
+              0.10980764687631323,
+              0.4241936068254756,
+              0.7869237725733111,
+              0.4176645200689225,
+              0.41597565326122093,
+              0.1840375192714269,
+              0.09239177436740872,
+              0.5634089283154317,
+              0.8672817898245639,
+              0.859787536713618,
+              0.823755567069053,
+              0.09332562760407467,
+              0.5269654560034531,
+              0.3282944545355915,
+              0.12656165619337742,
+              0.8996144986378454,
+              0.8794629731543243,
+              0.2259360459237031,
+              0.8677342141160906,
+              0.7065381808535922,
+              0.5087308414616077,
+              0.7361387452605942,
+              0.9175622001046994,
+              0.6768295210930064,
+              0.8217479267611849,
+              0.8186363655380218,
+              0.7713282338574033,
+              0.8650744660561319,
+              0.44798639555019343,
+              0.6891437990464381,
+              0.8957915611217553,
+              0.13099882437253405,
+              0.9100589282983074,
+              0.5699927016156656,
+              0.8848101584325896,
+              0.833215993303308,
+              0.806074199994562,
+              0.15238050377532514,
+              0.10376629963015432,
+              0.1557889773023143,
+              0.11296197259042451
             ]
           },
           "plot_paths": {
             ],
             "n_classes": 2,
             "class_distribution": {
+              "1": 535,
+              "0": 265
             },
+            "imbalance_ratio": 2.02
           },
           "quality_flags": [],
           "recommendations": [
             "Categorical columns (2): mode imputation + one-hot encoding.",
             "Target encoded with LabelEncoder. Classes: ['0', '1']",
             "Train/test split: 640 train rows, 160 test rows (20% test).",
+            "Class imbalance ratio (majority/minority): 2.02.",
+            "Applied SMOTE (imbalance ratio was 2.02). New class distribution: class 0: 428, class 1: 428.",
             "Final feature matrix: 10 features."
           ],
           "num_cols": [
           ],
           "n_classes": 2,
           "log_transformed_cols": [],
+          "smote_applied": true,
+          "smote_log": "Applied SMOTE (imbalance ratio was 2.02). New class distribution: class 0: 428, class 1: 428."
         },
         "train": {
+          "best_name": "Random Forest",
           "best_metrics": {
+            "accuracy": 0.8,
+            "f1": 0.8017316017316019,
+            "roc_auc": 0.8416505025568682,
+            "train_time_s": 0.378,
+            "train_score": 0.926019739715259,
+            "test_score": 0.8416505025568682,
+            "generalization_gap": 0.08436923715839073,
             "overfit": false
           },
           "metric_name": "roc_auc",
           "comparison_df": [
             {
               "Model": "Logistic Regression",
+              "Train Score": 0.8272,
+              "Test Score": 0.8462,
+              "Gap": -0.019,
+              "CV Mean": 0.8203,
+              "CV Std": 0.0317,
+              "CV Train Mean": 0.8271,
               "CV Overfit": "No",
               "Overfit": "No",
+              "Train Time(s)": 0.01
             },
             {
               "Model": "Random Forest",
+              "Train Score": 0.926,
+              "Test Score": 0.8417,
+              "Gap": 0.0844,
+              "CV Mean": 0.872,
+              "CV Std": 0.0247,
+              "CV Train Mean": 0.9317,
+              "CV Overfit": "No",
+              "Overfit": "No",
+              "Train Time(s)": 0.38
             }
           ],
           "feature_importances": {
+            "sex_male": 0.28592870600873493,
+            "sex_female": 0.2421809277670601,
+            "pclass": 0.20077516929328645,
+            "fare": 0.06675033992695845,
+            "age": 0.06080316414257179,
+            "sibsp": 0.05905243894256141,
+            "parch": 0.04148880321022331,
+            "embarked_S": 0.016750117296253544,
+            "embarked_Q": 0.01568458564080758,
+            "embarked_C": 0.010585747771542543
           },
           "training_log": [
             "Training 2 models for classification task.",
             "  Parameter overrides applied for: LightGBM, Random Forest, XGBoost",
             "  Training Logistic Regression...",
+            "    Logistic Regression: acc=0.775, f1=0.779, auc=0.846  [0.01s]",
             "  Training Random Forest...",
+            "    Random Forest: acc=0.800, f1=0.802, auc=0.842  [0.38s]",
+            "\nBest model: Random Forest (roc_auc=0.8417)",
+            "5-fold cross-validation results: best model Random Forest achieved CV mean 0.8720 \u00b1 0.0247 vs single test score 0.8417"
           ],
+          "overfitting_warnings": [],
+          "cv_summary": "5-fold cross-validation results: best model Random Forest achieved CV mean 0.8720 \u00b1 0.0247 vs single test score 0.8417",
           "cv_folds_used": 5,
           "results": [
             {
               "name": "Logistic Regression",
               "metrics": {
+                "accuracy": 0.775,
+                "f1": 0.7791835699797159,
+                "roc_auc": 0.8462352318815024,
+                "train_time_s": 0.015,
+                "train_score": 0.827190147611145,
+                "test_score": 0.8462352318815024,
+                "generalization_gap": -0.01904508427035745,
                 "overfit": false
               },
+              "train_score": 0.827190147611145,
+              "generalization_gap": -0.01904508427035745,
               "overfit": false,
               "cv_scores": [
+                0.8080043266630612,
+                0.7863201094391246,
+                0.7939808481532147,
+                0.8422708618331054,
+                0.8707250341997264
               ],
+              "cv_mean": 0.8202602360576463,
+              "cv_std": 0.031698294077597756,
               "cv_train_scores": [
+                0.8316063062138779,
+                0.8339556373928018,
+                0.8352002455117385,
+                0.8209128262833956,
+                0.8139907592109525
               ],
+              "cv_train_mean": 0.8271331549225532,
               "cv_overfit": false
             },
             {
               "name": "Random Forest",
               "metrics": {
+                "accuracy": 0.8,
+                "f1": 0.8017316017316019,
+                "roc_auc": 0.8416505025568682,
+                "train_time_s": 0.378,
+                "train_score": 0.926019739715259,
+                "test_score": 0.8416505025568682,
+                "generalization_gap": 0.08436923715839073,
+                "overfit": false
               },
+              "train_score": 0.926019739715259,
+              "generalization_gap": 0.08436923715839073,
+              "overfit": false,
               "cv_scores": [
+                0.8795294753921038,
+                0.840218878248974,
+                0.8588235294117647,
+                0.9143638850889193,
+                0.8670314637482901
               ],
+              "cv_mean": 0.8719934463780105,
+              "cv_std": 0.024737570511909102,
               "cv_train_scores": [
+                0.9302947915597962,
+                0.9364823623685063,
+                0.9327400132985526,
+                0.9295432458697765,
+                0.9296881659932144
               ],
+              "cv_train_mean": 0.9317497158179693,
+              "cv_overfit": false
             }
           ]
         },
         "eval": {
           "metrics": {
+            "accuracy": 0.8,
+            "f1": 0.8017316017316019,
+            "classification_report": "              precision    recall  f1-score   support\n\n           0       0.68      0.74      0.71        53\n           1       0.86      0.83      0.85       107\n\n    accuracy                           0.80       160\n   macro avg       0.77      0.78      0.78       160\nweighted avg       0.80      0.80      0.80       160\n",
+            "roc_auc": 0.8416505025568682,
             "y_prob": [
+              0.13410941654770053,
+              0.5294946471795305,
+              0.6734029805133361,
+              0.48276513165091095,
+              0.537887729131407,
+              0.14991155807168366,
+              0.6170905519960105,
+              0.4784058790521681,
+              0.8375867266088654,
+              0.7617669897727407,
+              0.1351365003484392,
+              0.44334998820869886,
+              0.45918298156738113,
+              0.18086772747653274,
+              0.7363322360597586,
+              0.11657189382719757,
+              0.23649619273968361,
+              0.5901773420747848,
+              0.134558348190491,
+              0.4254133833278052,
+              0.1296011320209188,
+              0.7314644444555719,
+              0.7853934325845645,
+              0.9071377805346806,
+              0.6899050304790696,
+              0.7018366024600355,
+              0.8197685771383121,
+              0.5198317417706617,
+              0.12630032800342134,
+              0.6473881223230429,
+              0.832648823582342,
+              0.6085633474110343,
+              0.8113142318198139,
+              0.9074706641382,
+              0.8459766165607369,
+              0.8201738289640232,
+              0.9003680409586162,
+              0.10016718457369138,
+              0.8585346197416688,
+              0.47744576120787313,
+              0.7118155973614765,
+              0.8079398173505976,
+              0.4835688710616073,
+              0.15232883334919356,
+              0.7564268826362109,
+              0.8738909667369577,
+              0.8978781232217746,
+              0.8629139348922269,
+              0.9210750298694106,
+              0.8622476596460283,
+              0.5280584146401254,
+              0.43175724148640166,
+              0.8612268450998052,
+              0.5192093060535821,
+              0.4631429141776544,
+              0.41349421421165006,
+              0.1842717233199732,
+              0.5393021880249185,
+              0.4611023953979327,
+              0.8127362992442425,
+              0.8939691379496485,
+              0.4164787102909156,
+              0.8978878589569886,
+              0.7983424128761862,
+              0.1173182901761418,
+              0.7542514691828953,
+              0.8448152300020084,
+              0.7353745795727059,
+              0.874243708197828,
+              0.1495537468741798,
+              0.4608257845629732,
+              0.723666302124413,
+              0.8963689192492247,
+              0.5718324254113771,
+              0.4597956607545386,
+              0.8732235494170898,
+              0.7727295226970254,
+              0.730516200696798,
+              0.09548329031034873,
+              0.8629109388984001,
+              0.46263304800868466,
+              0.7151887661777987,
+              0.40230852321056754,
+              0.8930814394148957,
+              0.8189750833489609,
+              0.7948906670433182,
+              0.16253862850672068,
+              0.7762547028088542,
+              0.8334832356172626,
+              0.8004830705990988,
+              0.21099149923900418,
+              0.8841702218788182,
+              0.8712799607086796,
+              0.5813945785177388,
+              0.8052927113899669,
+              0.49664824783559597,
+              0.43547420910148493,
+              0.9093300332802987,
+              0.8271809777802208,
+              0.6483594924481736,
+              0.9153820268559258,
+              0.8316891109823481,
+              0.5172451679903641,
+              0.10690068213440551,
+              0.14482053455947552,
+              0.6514137175182347,
+              0.8437537672599027,
+              0.7675893615600448,
+              0.7953030977589493,
+              0.23188048345069834,
+              0.8320099927549827,
+              0.8411414013434237,
+              0.49552967074078597,
+              0.10795590031797207,
+              0.6603705695450834,
+              0.905216945402769,
+              0.9017388468431307,
+              0.7038598916174742,
+              0.46749836573145026,
+              0.10980764687631323,
+              0.4241936068254756,
+              0.7869237725733111,
+              0.4176645200689225,
+              0.41597565326122093,
+              0.1840375192714269,
+              0.09239177436740872,
+              0.5634089283154317,
+              0.8672817898245639,
+              0.859787536713618,
+              0.823755567069053,
+              0.09332562760407467,
+              0.5269654560034531,
+              0.3282944545355915,
+              0.12656165619337742,
+              0.8996144986378454,
+              0.8794629731543243,
+              0.2259360459237031,
+              0.8677342141160906,
+              0.7065381808535922,
+              0.5087308414616077,
+              0.7361387452605942,
+              0.9175622001046994,
+              0.6768295210930064,
+              0.8217479267611849,
+              0.8186363655380218,
+              0.7713282338574033,
+              0.8650744660561319,
+              0.44798639555019343,
+              0.6891437990464381,
+              0.8957915611217553,
+              0.13099882437253405,
+              0.9100589282983074,
+              0.5699927016156656,
+              0.8848101584325896,
+              0.833215993303308,
+              0.806074199994562,
+              0.15238050377532514,
+              0.10376629963015432,
+              0.1557889773023143,
+              0.11296197259042451
             ]
           },
           "plot_paths": {
         },
         "target_col": "survived",
         "task_type": "classification",
+        "best_model_name": "Random Forest",
         "best_metrics": {
+          "accuracy": 0.8,
+          "f1": 0.8017316017316019,
+          "roc_auc": 0.8416505025568682,
+          "train_time_s": 0.378,
+          "train_score": 0.926019739715259,
+          "test_score": 0.8416505025568682,
+          "generalization_gap": 0.08436923715839073,
           "overfit": false
         },
         "comparison_df": [
           {
             "Model": "Logistic Regression",
+            "Train Score": 0.8272,
+            "Test Score": 0.8462,
+            "Gap": -0.019,
+            "CV Mean": 0.8203,
+            "CV Std": 0.0317,
+            "CV Train Mean": 0.8271,
             "CV Overfit": "No",
             "Overfit": "No",
+            "Train Time(s)": 0.01
           },
           {
             "Model": "Random Forest",
+            "Train Score": 0.926,
+            "Test Score": 0.8417,
+            "Gap": 0.0844,
+            "CV Mean": 0.872,
+            "CV Std": 0.0247,
+            "CV Train Mean": 0.9317,
+            "CV Overfit": "No",
+            "Overfit": "No",
+            "Train Time(s)": 0.38
           }
         ],
         "feature_importances": {
+          "sex_male": 0.28592870600873493,
+          "sex_female": 0.2421809277670601,
+          "pclass": 0.20077516929328645,
+          "fare": 0.06675033992695845,
+          "age": 0.06080316414257179,
+          "sibsp": 0.05905243894256141,
+          "parch": 0.04148880321022331,
+          "embarked_S": 0.016750117296253544,
+          "embarked_Q": 0.01568458564080758,
+          "embarked_C": 0.010585747771542543
         },
         "plot_paths": {
           "confusion_matrix": "outputs/titanic_confusion_matrix.png",
           "feature_importance": "outputs/titanic_feature_importance.png"
         },
         "metrics": {
+          "accuracy": 0.8,
+          "f1": 0.8017316017316019,
+          "classification_report": "              precision    recall  f1-score   support\n\n           0       0.68      0.74      0.71        53\n           1       0.86      0.83      0.85       107\n\n    accuracy                           0.80       160\n   macro avg       0.77      0.78      0.78       160\nweighted avg       0.80      0.80      0.80       160\n",
+          "roc_auc": 0.8416505025568682,
           "y_prob": [
+            0.13410941654770053,
+            0.5294946471795305,
+            0.6734029805133361,
+            0.48276513165091095,
+            0.537887729131407,
+            0.14991155807168366,
+            0.6170905519960105,
+            0.4784058790521681,
+            0.8375867266088654,
+            0.7617669897727407,
+            0.1351365003484392,
+            0.44334998820869886,
+            0.45918298156738113,
+            0.18086772747653274,
+            0.7363322360597586,
+            0.11657189382719757,
+            0.23649619273968361,
+            0.5901773420747848,
+            0.134558348190491,
+            0.4254133833278052,
+            0.1296011320209188,
+            0.7314644444555719,
+            0.7853934325845645,
+            0.9071377805346806,
+            0.6899050304790696,
+            0.7018366024600355,
+            0.8197685771383121,
+            0.5198317417706617,
+            0.12630032800342134,
+            0.6473881223230429,
+            0.832648823582342,
+            0.6085633474110343,
+            0.8113142318198139,
+            0.9074706641382,
+            0.8459766165607369,
+            0.8201738289640232,
+            0.9003680409586162,
+            0.10016718457369138,
+            0.8585346197416688,
+            0.47744576120787313,
+            0.7118155973614765,
+            0.8079398173505976,
+            0.4835688710616073,
+            0.15232883334919356,
+            0.7564268826362109,
+            0.8738909667369577,
+            0.8978781232217746,
+            0.8629139348922269,
+            0.9210750298694106,
+            0.8622476596460283,
+            0.5280584146401254,
+            0.43175724148640166,
+            0.8612268450998052,
+            0.5192093060535821,
+            0.4631429141776544,
+            0.41349421421165006,
+            0.1842717233199732,
+            0.5393021880249185,
+            0.4611023953979327,
+            0.8127362992442425,
+            0.8939691379496485,
+            0.4164787102909156,
+            0.8978878589569886,
+            0.7983424128761862,
+            0.1173182901761418,
+            0.7542514691828953,
+            0.8448152300020084,
+            0.7353745795727059,
+            0.874243708197828,
+            0.1495537468741798,
+            0.4608257845629732,
+            0.723666302124413,
+            0.8963689192492247,
+            0.5718324254113771,
+            0.4597956607545386,
+            0.8732235494170898,
+            0.7727295226970254,
+            0.730516200696798,
+            0.09548329031034873,
+            0.8629109388984001,
+            0.46263304800868466,
+            0.7151887661777987,
+            0.40230852321056754,
+            0.8930814394148957,
+            0.8189750833489609,
+            0.7948906670433182,
+            0.16253862850672068,
+            0.7762547028088542,
+            0.8334832356172626,
+            0.8004830705990988,
+            0.21099149923900418,
+            0.8841702218788182,
+            0.8712799607086796,
+            0.5813945785177388,
+            0.8052927113899669,
+            0.49664824783559597,
+            0.43547420910148493,
+            0.9093300332802987,
+            0.8271809777802208,
+            0.6483594924481736,
+            0.9153820268559258,
+            0.8316891109823481,
+            0.5172451679903641,
+            0.10690068213440551,
+            0.14482053455947552,
+            0.6514137175182347,
+            0.8437537672599027,
+            0.7675893615600448,
+            0.7953030977589493,
+            0.23188048345069834,
+            0.8320099927549827,
+            0.8411414013434237,
+            0.49552967074078597,
+            0.10795590031797207,
+            0.6603705695450834,
+            0.905216945402769,
+            0.9017388468431307,
+            0.7038598916174742,
+            0.46749836573145026,
+            0.10980764687631323,
+            0.4241936068254756,
+            0.7869237725733111,
+            0.4176645200689225,
+            0.41597565326122093,
+            0.1840375192714269,
+            0.09239177436740872,
+            0.5634089283154317,
+            0.8672817898245639,
+            0.859787536713618,
+            0.823755567069053,
+            0.09332562760407467,
+            0.5269654560034531,
+            0.3282944545355915,
+            0.12656165619337742,
+            0.8996144986378454,
+            0.8794629731543243,
+            0.2259360459237031,
+            0.8677342141160906,
+            0.7065381808535922,
+            0.5087308414616077,
+            0.7361387452605942,
+            0.9175622001046994,
+            0.6768295210930064,
+            0.8217479267611849,
+            0.8186363655380218,
+            0.7713282338574033,
+            0.8650744660561319,
+            0.44798639555019343,
+            0.6891437990464381,
+            0.8957915611217553,
+            0.13099882437253405,
+            0.9100589282983074,
+            0.5699927016156656,
+            0.8848101584325896,
+            0.833215993303308,
+            0.806074199994562,
+            0.15238050377532514,
+            0.10376629963015432,
+            0.1557889773023143,
+            0.11296197259042451
           ]
         },
         "tune": {
             "is_large": false,
             "is_wide": false,
             "is_binary": true,
+            "imbalance_ratio": 1.0,
+            "smote_applied": true
           }
         }
       },

generate_all_demos.py CHANGED Viewed

@@ -121,7 +121,7 @@ _TITANIC_TARGET_COL = "survived"
 def make_titanic_like(n: int = 800) -> pd.DataFrame:
-    """Synthetic Titanic-style data: seven feature columns, then target (never in feature block)."""
     rng = np.random.default_rng(7)
     features = pd.DataFrame(
         {
@@ -134,11 +134,57 @@ def make_titanic_like(n: int = 800) -> pd.DataFrame:
             "embarked": rng.choice(["C", "Q", "S"], n),
         }
     )
-    target = pd.Series(rng.integers(0, 2, n), name=_TITANIC_TARGET_COL)
     out = pd.concat([features, target], axis=1)
     return out.loc[:, list(_TITANIC_FEATURE_COLS) + [_TITANIC_TARGET_COL]]
 def make_diabetes_binary() -> pd.DataFrame:
     bunch = load_diabetes()
     X, y = bunch.data, bunch.target
@@ -307,16 +353,20 @@ def main() -> None:
     diabetes_csv = datasets_dir / "diabetes_sklearn_demo.csv"
     diabetes_df.to_csv(diabetes_csv, index=False)
     configs = [
         {
             "key": "healthcare",
             "label": "healthcare",
-            "df": pd.read_csv(datasets_dir / "sample_healthcare_classification.csv"),
             "message": "predict whether the patient will be readmitted",
             "target_col": "readmitted",
             "task_type": "classification",
             "run_id": "healthcare",
-            "demo_dataset_path": "datasets/sample_healthcare_classification.csv",
             "demo_goal": "Predict hospital readmission from patient features (demo)",
         },
         {

 def make_titanic_like(n: int = 800) -> pd.DataFrame:
+    """Synthetic Titanic-style data: survival depends strongly on sex and pclass."""
     rng = np.random.default_rng(7)
     features = pd.DataFrame(
         {
             "embarked": rng.choice(["C", "Q", "S"], n),
         }
     )
+    female = features["sex"].eq("female")
+    male = ~female
+    p_surv = np.zeros(n, dtype=float)
+    # Target pattern: female ~80%; male by class ~60% / ~40% / ~15% (calibrated up slightly so demo ROC-AUC clears 0.75)
+    p_surv[female] = 0.88
+    pc = features["pclass"].to_numpy()
+    p_surv[male & (pc == 1)] = 0.72
+    p_surv[male & (pc == 2)] = 0.48
+    p_surv[male & (pc == 3)] = 0.10
+    survived = (rng.random(n) < p_surv).astype(np.int64)
+    target = pd.Series(survived, name=_TITANIC_TARGET_COL)
     out = pd.concat([features, target], axis=1)
     return out.loc[:, list(_TITANIC_FEATURE_COLS) + [_TITANIC_TARGET_COL]]
+def make_healthcare_like(n: int = 500) -> pd.DataFrame:
+    """
+    Synthetic healthcare rows; readmission probability follows glucose, bmi, age tiers.
+    Tier probabilities match the demo spec; independent draws of glucose/bmi/age give strong signal.
+    """
+    rng = np.random.default_rng(42)
+    glucose = rng.uniform(70.0, 200.0, n)
+    bmi = rng.uniform(18.0, 45.0, n)
+    age = rng.uniform(22.0, 90.0, n)
+    tier1 = (glucose > 140) & (bmi > 30)
+    tier2 = ((glucose > 140) | (bmi > 35)) & ~tier1
+    tier3 = ~(tier1 | tier2) & (age > 65)
+    tier4 = ~(tier1 | tier2) & (age <= 65)
+    p = np.zeros(n, dtype=float)
+    # Tier pattern: 75% / 55% / 45% / 20% at nominal thresholds (rates scaled so demo ROC-AUC > 0.75)
+    p[tier1] = 0.92
+    p[tier2] = 0.72
+    p[tier3] = 0.38
+    p[tier4] = 0.08
+    readmitted = (rng.random(n) < p).astype(int)
+    return pd.DataFrame(
+        {
+            "age": np.round(age, 1),
+            "bmi": np.round(bmi, 1),
+            "blood_pressure": np.round(rng.uniform(60.0, 140.0, n), 1),
+            "glucose": np.round(glucose, 1),
+            "num_medications": rng.integers(0, 12, n),
+            "days_in_hospital": rng.integers(1, 15, n),
+            "gender": rng.choice(["Female", "Male"], n),
+            "smoker": rng.choice(["Yes", "No"], n),
+            "insurance": rng.choice(["None", "Medicare", "Medicaid", "Private"], n),
+            "readmitted": readmitted,
+        }
+    )
 def make_diabetes_binary() -> pd.DataFrame:
     bunch = load_diabetes()
     X, y = bunch.data, bunch.target
     diabetes_csv = datasets_dir / "diabetes_sklearn_demo.csv"
     diabetes_df.to_csv(diabetes_csv, index=False)
+    healthcare_df = make_healthcare_like(500)
+    healthcare_csv = datasets_dir / "healthcare_demo_synth.csv"
+    healthcare_df.to_csv(healthcare_csv, index=False)
     configs = [
         {
             "key": "healthcare",
             "label": "healthcare",
+            "df": healthcare_df,
             "message": "predict whether the patient will be readmitted",
             "target_col": "readmitted",
             "task_type": "classification",
             "run_id": "healthcare",
+            "demo_dataset_path": "datasets/healthcare_demo_synth.csv",
             "demo_goal": "Predict hospital readmission from patient features (demo)",
         },
         {