Bawil commited on 29 days ago

Commit

5199058

verified ·

1 Parent(s): 4266ba2

Upload 31 files

Browse files

Files changed (31) hide show

models/for_WMH_Vent/class_weights/class_weights_fold0_standard_3class.json +27 -0
models/for_WMH_Vent/class_weights/class_weights_fold1_standard_3class.json +27 -0
models/for_WMH_Vent/class_weights/class_weights_fold2_standard_3class.json +27 -0
models/for_WMH_Vent/class_weights/class_weights_fold3_standard_3class.json +27 -0
models/for_WMH_Vent/data_splits/concat_fold_assignments.json +475 -0
models/for_WMH_Vent/data_splits/fold_assignments.json +543 -0
models/for_WMH_Vent/data_splits/for_assignment.py +234 -0
models/for_WMH_Vent/data_splits/local_fold_assignments.json +421 -0
models/for_WMH_Vent/data_splits/public_fold_assignments.json +102 -0
models/for_WMH_Vent/download_models.txt +1 -0
models/for_WMH_Vent/folds_results_zscore2_all/per_class_summary.csv +9 -0
models/for_WMH_Vent/folds_results_zscore2_all/test_metrics_all_variants_folds.csv +27 -0
models/for_WMH_Vent/folds_results_zscore2_all/training_info_all_variants_folds.csv +17 -0
models/for_WMH_Vent/folds_results_zscore2_all/variant_comparison_test.csv +5 -0
models/for_WMH_Vent/folds_results_zscore2_all/variant_comparison_training.csv +5 -0
models/for_WMH_Vent/model_training_scripts/attn_unet_model.py +85 -0
models/for_WMH_Vent/model_training_scripts/base_runner_all.py +23 -0
models/for_WMH_Vent/model_training_scripts/dlv3_unet_model.py +198 -0
models/for_WMH_Vent/model_training_scripts/dlv3_unet_model_GN.py +247 -0
models/for_WMH_Vent/model_training_scripts/p4_compute_class_weights.py +353 -0
models/for_WMH_Vent/model_training_scripts/p4_data_loader.py +912 -0
models/for_WMH_Vent/model_training_scripts/p4_error_analysis.py +1033 -0
models/for_WMH_Vent/model_training_scripts/p4_folds_results_aggregator.py +611 -0
models/for_WMH_Vent/model_training_scripts/p4_inference.py +1146 -0
models/for_WMH_Vent/model_training_scripts/p4_run_experiments_all.py +576 -0
models/for_WMH_Vent/model_training_scripts/p4_unet_viz.py +640 -0
models/for_WMH_Vent/model_training_scripts/p4_variant_all_net.py +1051 -0
models/for_WMH_Vent/model_training_scripts/trans_unet_model.py +125 -0
models/for_WMH_Vent/model_training_scripts/unet_model.py +87 -0
models/for_WMH_Vent/model_training_scripts/utility_functions.py +96 -0
models/for_WMH_Vent/results_fold_avg_var_1_zscore2/models/standard_3class/download_models.txt +1 -0

models/for_WMH_Vent/class_weights/class_weights_fold0_standard_3class.json ADDED Viewed

	@@ -0,0 +1,27 @@

+{
+  "fold_id": 0,
+  "class_scenario": "3class",
+  "preprocessing": "standard",
+  "num_classes": 3,
+  "total_pixels": 119144448,
+  "class_pixel_counts": [
+    118420367,
+    496384,
+    227697
+  ],
+  "class_frequencies": [
+    0.993922662682528,
+    0.004166236936193619,
+    0.0019111003812783622
+  ],
+  "class_weights": [
+    0.003950922707703595,
+    0.9423307646632635,
+    2.0537183126290333
+  ],
+  "class_names": [
+    "Background",
+    "Ventricles",
+    "Abnormal WMH"
+  ]
+}

models/for_WMH_Vent/class_weights/class_weights_fold1_standard_3class.json ADDED Viewed

	@@ -0,0 +1,27 @@

+{
+  "fold_id": 1,
+  "class_scenario": "3class",
+  "preprocessing": "standard",
+  "num_classes": 3,
+  "total_pixels": 119341056,
+  "class_pixel_counts": [
+    118646442,
+    470627,
+    223987
+  ],
+  "class_frequencies": [
+    0.994179588958891,
+    0.003943546469037445,
+    0.0018768645720714924
+  ],
+  "class_weights": [
+    0.003834061426337229,
+    0.96633402011123,
+    2.029831918462433
+  ],
+  "class_names": [
+    "Background",
+    "Ventricles",
+    "Abnormal WMH"
+  ]
+}

models/for_WMH_Vent/class_weights/class_weights_fold2_standard_3class.json ADDED Viewed

	@@ -0,0 +1,27 @@

+{
+  "fold_id": 2,
+  "class_scenario": "3class",
+  "preprocessing": "standard",
+  "num_classes": 3,
+  "total_pixels": 119472128,
+  "class_pixel_counts": [
+    118787277,
+    464952,
+    219899
+  ],
+  "class_frequencies": [
+    0.994267692293888,
+    0.0038917194142553484,
+    0.001840588291856658
+  ],
+  "class_weights": [
+    0.0037673539050414257,
+    0.9622481463361134,
+    2.033984499758845
+  ],
+  "class_names": [
+    "Background",
+    "Ventricles",
+    "Abnormal WMH"
+  ]
+}

models/for_WMH_Vent/class_weights/class_weights_fold3_standard_3class.json ADDED Viewed

	@@ -0,0 +1,27 @@

+{
+  "fold_id": 3,
+  "class_scenario": "3class",
+  "preprocessing": "standard",
+  "num_classes": 3,
+  "total_pixels": 119734272,
+  "class_pixel_counts": [
+    118973104,
+    509903,
+    251265
+  ],
+  "class_frequencies": [
+    0.9936428560738232,
+    0.004258621959132971,
+    0.0020985219670438216
+  ],
+  "class_weights": [
+    0.004240031541573218,
+    0.9890739908996539,
+    2.006685977558773
+  ],
+  "class_names": [
+    "Background",
+    "Ventricles",
+    "Abnormal WMH"
+  ]
+}

models/for_WMH_Vent/data_splits/concat_fold_assignments.json ADDED Viewed

	@@ -0,0 +1,475 @@

+{
+  "metadata": {
+    "datasets": [
+      "Local_SAI",
+      "Public_MSSEG"
+    ],
+    "total_patients": 115,
+    "test_patients": 13,
+    "trainval_patients": 102,
+    "local_split": "70/10/20",
+    "public_split": "60/20/20",
+    "n_folds": 4,
+    "random_seed": 42
+  },
+  "test_set": {
+    "patients": [
+      "110012",
+      "105549",
+      "109816",
+      "105074",
+      "106780",
+      "107680",
+      "108807",
+      "106063",
+      "114585",
+      "111489",
+      "c01p04",
+      "c07p05",
+      "c08p04"
+    ],
+    "n_patients": 13
+  },
+  "folds": {
+    "fold_0": {
+      "train_patients": [
+        "109395",
+        "115788",
+        "113845",
+        "114770",
+        "102313",
+        "104797",
+        "111189",
+        "105597",
+        "111140",
+        "106270",
+        "114836",
+        "108295",
+        "104518",
+        "110218",
+        "110784",
+        "101627",
+        "104280",
+        "107966",
+        "101228",
+        "104420",
+        "109944",
+        "114903",
+        "112765",
+        "106200",
+        "106506",
+        "106536",
+        "112055",
+        "104447",
+        "106976",
+        "105978",
+        "110543",
+        "114058",
+        "113394",
+        "107739",
+        "112657",
+        "111008",
+        "105911",
+        "111852",
+        "105465",
+        "114128",
+        "110280",
+        "112414",
+        "105302",
+        "107455",
+        "110327",
+        "114990",
+        "112730",
+        "104453",
+        "111691",
+        "114454",
+        "104474",
+        "104252",
+        "109654",
+        "104937",
+        "104871",
+        "107508",
+        "114525",
+        "115588",
+        "110540",
+        "109267",
+        "107539",
+        "108344",
+        "112659",
+        "112776",
+        "113046",
+        "107233",
+        "102035",
+        "106905",
+        "107997",
+        "112378",
+        "104520",
+        "106639",
+        "104670",
+        "104899",
+        "115628",
+        "108444",
+        "109923",
+        "110157",
+        "114304",
+        "114266",
+        "c08p03",
+        "c01p01",
+        "c08p02",
+        "c07p03",
+        "c07p04",
+        "c01p02",
+        "c07p01",
+        "c08p05",
+        "c07p02"
+      ],
+      "val_patients": [
+        "108726",
+        "105917",
+        "105755",
+        "109141",
+        "110497",
+        "112997",
+        "104810",
+        "108975",
+        "107130",
+        "107630",
+        "c01p05",
+        "c08p01",
+        "c01p03"
+      ],
+      "n_train": 89,
+      "n_val": 13
+    },
+    "fold_1": {
+      "train_patients": [
+        "108726",
+        "105917",
+        "105755",
+        "109141",
+        "110497",
+        "112997",
+        "104810",
+        "108975",
+        "107130",
+        "107630",
+        "114836",
+        "108295",
+        "104518",
+        "110218",
+        "110784",
+        "101627",
+        "104280",
+        "107966",
+        "101228",
+        "104420",
+        "109944",
+        "114903",
+        "112765",
+        "106200",
+        "106506",
+        "106536",
+        "112055",
+        "104447",
+        "106976",
+        "105978",
+        "110543",
+        "114058",
+        "113394",
+        "107739",
+        "112657",
+        "111008",
+        "105911",
+        "111852",
+        "105465",
+        "114128",
+        "110280",
+        "112414",
+        "105302",
+        "107455",
+        "110327",
+        "114990",
+        "112730",
+        "104453",
+        "111691",
+        "114454",
+        "104474",
+        "104252",
+        "109654",
+        "104937",
+        "104871",
+        "107508",
+        "114525",
+        "115588",
+        "110540",
+        "109267",
+        "107539",
+        "108344",
+        "112659",
+        "112776",
+        "113046",
+        "107233",
+        "102035",
+        "106905",
+        "107997",
+        "112378",
+        "104520",
+        "106639",
+        "104670",
+        "104899",
+        "115628",
+        "108444",
+        "109923",
+        "110157",
+        "114304",
+        "114266",
+        "c01p05",
+        "c08p01",
+        "c01p03",
+        "c07p03",
+        "c07p04",
+        "c01p02",
+        "c07p01",
+        "c08p05",
+        "c07p02"
+      ],
+      "val_patients": [
+        "109395",
+        "115788",
+        "113845",
+        "114770",
+        "102313",
+        "104797",
+        "111189",
+        "105597",
+        "111140",
+        "106270",
+        "c08p03",
+        "c01p01",
+        "c08p02"
+      ],
+      "n_train": 89,
+      "n_val": 13
+    },
+    "fold_2": {
+      "train_patients": [
+        "108726",
+        "105917",
+        "105755",
+        "109141",
+        "110497",
+        "112997",
+        "104810",
+        "108975",
+        "107130",
+        "107630",
+        "109395",
+        "115788",
+        "113845",
+        "114770",
+        "102313",
+        "104797",
+        "111189",
+        "105597",
+        "111140",
+        "106270",
+        "109944",
+        "114903",
+        "112765",
+        "106200",
+        "106506",
+        "106536",
+        "112055",
+        "104447",
+        "106976",
+        "105978",
+        "110543",
+        "114058",
+        "113394",
+        "107739",
+        "112657",
+        "111008",
+        "105911",
+        "111852",
+        "105465",
+        "114128",
+        "110280",
+        "112414",
+        "105302",
+        "107455",
+        "110327",
+        "114990",
+        "112730",
+        "104453",
+        "111691",
+        "114454",
+        "104474",
+        "104252",
+        "109654",
+        "104937",
+        "104871",
+        "107508",
+        "114525",
+        "115588",
+        "110540",
+        "109267",
+        "107539",
+        "108344",
+        "112659",
+        "112776",
+        "113046",
+        "107233",
+        "102035",
+        "106905",
+        "107997",
+        "112378",
+        "104520",
+        "106639",
+        "104670",
+        "104899",
+        "115628",
+        "108444",
+        "109923",
+        "110157",
+        "114304",
+        "114266",
+        "c01p05",
+        "c08p01",
+        "c01p03",
+        "c08p03",
+        "c01p01",
+        "c08p02",
+        "c07p01",
+        "c08p05",
+        "c07p02"
+      ],
+      "val_patients": [
+        "114836",
+        "108295",
+        "104518",
+        "110218",
+        "110784",
+        "101627",
+        "104280",
+        "107966",
+        "101228",
+        "104420",
+        "c07p03",
+        "c07p04",
+        "c01p02"
+      ],
+      "n_train": 89,
+      "n_val": 13
+    },
+    "fold_3": {
+      "train_patients": [
+        "108726",
+        "105917",
+        "105755",
+        "109141",
+        "110497",
+        "112997",
+        "104810",
+        "108975",
+        "107130",
+        "107630",
+        "109395",
+        "115788",
+        "113845",
+        "114770",
+        "102313",
+        "104797",
+        "111189",
+        "105597",
+        "111140",
+        "106270",
+        "114836",
+        "108295",
+        "104518",
+        "110218",
+        "110784",
+        "101627",
+        "104280",
+        "107966",
+        "101228",
+        "104420",
+        "110543",
+        "114058",
+        "113394",
+        "107739",
+        "112657",
+        "111008",
+        "105911",
+        "111852",
+        "105465",
+        "114128",
+        "110280",
+        "112414",
+        "105302",
+        "107455",
+        "110327",
+        "114990",
+        "112730",
+        "104453",
+        "111691",
+        "114454",
+        "104474",
+        "104252",
+        "109654",
+        "104937",
+        "104871",
+        "107508",
+        "114525",
+        "115588",
+        "110540",
+        "109267",
+        "107539",
+        "108344",
+        "112659",
+        "112776",
+        "113046",
+        "107233",
+        "102035",
+        "106905",
+        "107997",
+        "112378",
+        "104520",
+        "106639",
+        "104670",
+        "104899",
+        "115628",
+        "108444",
+        "109923",
+        "110157",
+        "114304",
+        "114266",
+        "c01p05",
+        "c08p01",
+        "c01p03",
+        "c08p03",
+        "c01p01",
+        "c08p02",
+        "c07p03",
+        "c07p04",
+        "c01p02"
+      ],
+      "val_patients": [
+        "109944",
+        "114903",
+        "112765",
+        "106200",
+        "106506",
+        "106536",
+        "112055",
+        "104447",
+        "106976",
+        "105978",
+        "c07p01",
+        "c08p05",
+        "c07p02"
+      ],
+      "n_train": 89,
+      "n_val": 13
+    }
+  }
+}

models/for_WMH_Vent/data_splits/fold_assignments.json ADDED Viewed

	@@ -0,0 +1,543 @@

+{
+  "metadata": {
+    "total_patients": 115,
+    "test_patients": 23,
+    "trainval_patients": 92,
+    "n_folds": 5,
+    "random_seed": 42,
+    "datasets": [
+      "Local_SAI",
+      "Public_MSSEG"
+    ]
+  },
+  "test_set": {
+    "patients": [
+      "112776",
+      "104252",
+      "107539",
+      "111140",
+      "104518",
+      "107997",
+      "111189",
+      "110543",
+      "108344",
+      "104520",
+      "c01p01",
+      "107130",
+      "113394",
+      "c08p04",
+      "105074",
+      "101228",
+      "111691",
+      "105978",
+      "c07p01",
+      "109267",
+      "114836",
+      "c08p03",
+      "104670"
+    ],
+    "n_patients": 23
+  },
+  "folds": {
+    "fold_0": {
+      "train_patients": [
+        "102035",
+        "102313",
+        "104280",
+        "104447",
+        "104453",
+        "104474",
+        "104797",
+        "104810",
+        "104899",
+        "105302",
+        "105465",
+        "105549",
+        "105597",
+        "105755",
+        "105917",
+        "106063",
+        "106200",
+        "106506",
+        "106536",
+        "106639",
+        "106905",
+        "107233",
+        "107455",
+        "107508",
+        "107630",
+        "107680",
+        "107739",
+        "108295",
+        "108444",
+        "108726",
+        "109141",
+        "109395",
+        "109654",
+        "109923",
+        "109944",
+        "110012",
+        "110157",
+        "110280",
+        "110327",
+        "110497",
+        "110540",
+        "110784",
+        "111489",
+        "111852",
+        "112055",
+        "112378",
+        "112414",
+        "112657",
+        "112730",
+        "112765",
+        "112997",
+        "113046",
+        "114058",
+        "114128",
+        "114266",
+        "114304",
+        "114525",
+        "114585",
+        "114770",
+        "114903",
+        "114990",
+        "115588",
+        "115628",
+        "115788",
+        "c01p02",
+        "c01p03",
+        "c01p05",
+        "c07p02",
+        "c07p03",
+        "c07p04",
+        "c07p05",
+        "c08p02",
+        "c08p05"
+      ],
+      "val_patients": [
+        "101627",
+        "104420",
+        "104871",
+        "104937",
+        "105911",
+        "106270",
+        "106780",
+        "106976",
+        "107966",
+        "108807",
+        "108975",
+        "109816",
+        "110218",
+        "111008",
+        "112659",
+        "113845",
+        "114454",
+        "c01p04",
+        "c08p01"
+      ],
+      "n_train": 73,
+      "n_val": 19
+    },
+    "fold_1": {
+      "train_patients": [
+        "101627",
+        "102035",
+        "102313",
+        "104280",
+        "104420",
+        "104453",
+        "104474",
+        "104797",
+        "104871",
+        "104937",
+        "105302",
+        "105465",
+        "105755",
+        "105911",
+        "105917",
+        "106063",
+        "106200",
+        "106270",
+        "106506",
+        "106536",
+        "106639",
+        "106780",
+        "106905",
+        "106976",
+        "107233",
+        "107630",
+        "107966",
+        "108295",
+        "108444",
+        "108726",
+        "108807",
+        "108975",
+        "109141",
+        "109654",
+        "109816",
+        "109944",
+        "110157",
+        "110218",
+        "110280",
+        "110327",
+        "110497",
+        "110540",
+        "110784",
+        "111008",
+        "111489",
+        "111852",
+        "112055",
+        "112378",
+        "112414",
+        "112657",
+        "112659",
+        "112730",
+        "112765",
+        "113845",
+        "114304",
+        "114454",
+        "114525",
+        "114585",
+        "114770",
+        "114903",
+        "115628",
+        "115788",
+        "c01p02",
+        "c01p03",
+        "c01p04",
+        "c01p05",
+        "c07p02",
+        "c07p03",
+        "c07p04",
+        "c07p05",
+        "c08p01",
+        "c08p02",
+        "c08p05"
+      ],
+      "val_patients": [
+        "104447",
+        "104810",
+        "104899",
+        "105549",
+        "105597",
+        "107455",
+        "107508",
+        "107680",
+        "107739",
+        "109395",
+        "109923",
+        "110012",
+        "112997",
+        "113046",
+        "114058",
+        "114128",
+        "114266",
+        "114990",
+        "115588"
+      ],
+      "n_train": 73,
+      "n_val": 19
+    },
+    "fold_2": {
+      "train_patients": [
+        "101627",
+        "102035",
+        "102313",
+        "104420",
+        "104447",
+        "104810",
+        "104871",
+        "104899",
+        "104937",
+        "105465",
+        "105549",
+        "105597",
+        "105911",
+        "106063",
+        "106200",
+        "106270",
+        "106506",
+        "106780",
+        "106976",
+        "107233",
+        "107455",
+        "107508",
+        "107630",
+        "107680",
+        "107739",
+        "107966",
+        "108444",
+        "108807",
+        "108975",
+        "109141",
+        "109395",
+        "109654",
+        "109816",
+        "109923",
+        "109944",
+        "110012",
+        "110157",
+        "110218",
+        "110280",
+        "110327",
+        "110497",
+        "110784",
+        "111008",
+        "111489",
+        "111852",
+        "112055",
+        "112378",
+        "112414",
+        "112657",
+        "112659",
+        "112730",
+        "112765",
+        "112997",
+        "113046",
+        "113845",
+        "114058",
+        "114128",
+        "114266",
+        "114304",
+        "114454",
+        "114585",
+        "114770",
+        "114990",
+        "115588",
+        "115628",
+        "c01p02",
+        "c01p03",
+        "c01p04",
+        "c01p05",
+        "c07p02",
+        "c07p04",
+        "c08p01",
+        "c08p02",
+        "c08p05"
+      ],
+      "val_patients": [
+        "104280",
+        "104453",
+        "104474",
+        "104797",
+        "105302",
+        "105755",
+        "105917",
+        "106536",
+        "106639",
+        "106905",
+        "108295",
+        "108726",
+        "110540",
+        "114525",
+        "114903",
+        "115788",
+        "c07p03",
+        "c07p05"
+      ],
+      "n_train": 74,
+      "n_val": 18
+    },
+    "fold_3": {
+      "train_patients": [
+        "101627",
+        "102035",
+        "102313",
+        "104280",
+        "104420",
+        "104447",
+        "104453",
+        "104474",
+        "104797",
+        "104810",
+        "104871",
+        "104899",
+        "104937",
+        "105302",
+        "105465",
+        "105549",
+        "105597",
+        "105755",
+        "105911",
+        "105917",
+        "106063",
+        "106200",
+        "106270",
+        "106506",
+        "106536",
+        "106639",
+        "106780",
+        "106905",
+        "106976",
+        "107233",
+        "107455",
+        "107508",
+        "107680",
+        "107739",
+        "107966",
+        "108295",
+        "108444",
+        "108726",
+        "108807",
+        "108975",
+        "109395",
+        "109816",
+        "109923",
+        "110012",
+        "110218",
+        "110327",
+        "110497",
+        "110540",
+        "111008",
+        "112378",
+        "112414",
+        "112659",
+        "112730",
+        "112997",
+        "113046",
+        "113845",
+        "114058",
+        "114128",
+        "114266",
+        "114304",
+        "114454",
+        "114525",
+        "114585",
+        "114903",
+        "114990",
+        "115588",
+        "115628",
+        "115788",
+        "c01p03",
+        "c01p04",
+        "c07p02",
+        "c07p03",
+        "c07p05",
+        "c08p01"
+      ],
+      "val_patients": [
+        "107630",
+        "109141",
+        "109654",
+        "109944",
+        "110157",
+        "110280",
+        "110784",
+        "111489",
+        "111852",
+        "112055",
+        "112657",
+        "112765",
+        "114770",
+        "c01p02",
+        "c01p05",
+        "c07p04",
+        "c08p02",
+        "c08p05"
+      ],
+      "n_train": 74,
+      "n_val": 18
+    },
+    "fold_4": {
+      "train_patients": [
+        "101627",
+        "104280",
+        "104420",
+        "104447",
+        "104453",
+        "104474",
+        "104797",
+        "104810",
+        "104871",
+        "104899",
+        "104937",
+        "105302",
+        "105549",
+        "105597",
+        "105755",
+        "105911",
+        "105917",
+        "106270",
+        "106536",
+        "106639",
+        "106780",
+        "106905",
+        "106976",
+        "107455",
+        "107508",
+        "107630",
+        "107680",
+        "107739",
+        "107966",
+        "108295",
+        "108726",
+        "108807",
+        "108975",
+        "109141",
+        "109395",
+        "109654",
+        "109816",
+        "109923",
+        "109944",
+        "110012",
+        "110157",
+        "110218",
+        "110280",
+        "110540",
+        "110784",
+        "111008",
+        "111489",
+        "111852",
+        "112055",
+        "112657",
+        "112659",
+        "112765",
+        "112997",
+        "113046",
+        "113845",
+        "114058",
+        "114128",
+        "114266",
+        "114454",
+        "114525",
+        "114770",
+        "114903",
+        "114990",
+        "115588",
+        "115788",
+        "c01p02",
+        "c01p04",
+        "c01p05",
+        "c07p03",
+        "c07p04",
+        "c07p05",
+        "c08p01",
+        "c08p02",
+        "c08p05"
+      ],
+      "val_patients": [
+        "102035",
+        "102313",
+        "105465",
+        "106063",
+        "106200",
+        "106506",
+        "107233",
+        "108444",
+        "110327",
+        "110497",
+        "112378",
+        "112414",
+        "112730",
+        "114304",
+        "114585",
+        "115628",
+        "c01p03",
+        "c07p02"
+      ],
+      "n_train": 74,
+      "n_val": 18
+    }
+  }
+}

models/for_WMH_Vent/data_splits/for_assignment.py ADDED Viewed

	@@ -0,0 +1,234 @@

+import os
+import json
+import numpy as np
+from sklearn.model_selection import KFold
+# ─────────────────────────────────────────────
+# Patient IDs
+# ─────────────────────────────────────────────
+local_patients_id = [
+    '101228', '101627', '102035', '102313', '104252', '104280', '104420',
+    '104447', '104453', '104474', '104518', '104520', '104670', '104797',
+    '104810', '104871', '104899', '104937', '105074', '105302', '105465',
+    '105549', '105597', '105755', '105911', '105917', '105978', '106063',
+    '106200', '106270', '106506', '106536', '106639', '106780', '106905',
+    '106976', '107130', '107233', '107455', '107508', '107539', '107630',
+    '107680', '107739', '107966', '107997', '108295', '108344', '108444',
+    '108726', '108807', '108975', '109141', '109267', '109395', '109654',
+    '109816', '109923', '109944', '110012', '110157', '110218', '110280',
+    '110327', '110497', '110540', '110543', '110784', '111008', '111140',
+    '111189', '111489', '111691', '111852', '112055', '112378', '112414',
+    '112657', '112659', '112730', '112765', '112776', '112997', '113046',
+    '113394', '113845', '114058', '114128', '114266', '114304', '114454',
+    '114525', '114585', '114770', '114836', '114903', '114990', '115588',
+    '115628', '115788',
+]
+public_patients_id = [
+    'c01p01', 'c01p02', 'c01p03', 'c01p04', 'c01p05',
+    'c07p01', 'c07p02', 'c07p03', 'c07p04', 'c07p05',
+    'c08p01', 'c08p02', 'c08p03', 'c08p04', 'c08p05',
+]
+RANDOM_SEED = 42
+N_FOLDS = 4
+# ─────────────────────────────────────────────────────────────────────────────
+# make_folds_exact  (LOCAL)
+#   Carves n_val_per_fold * n_folds patients as an exclusive val pool,
+#   then rotates the val window.  Val sets are perfectly non-overlapping.
+# ─────────────────────────────────────────────────────────────────────────────
+def make_folds_exact(trainval, n_val_per_fold, n_folds, rng):
+    arr = np.array(trainval)
+    rng.shuffle(arr)
+    total_val_pool = n_folds * n_val_per_fold           # 5 * 10 = 50
+    assert total_val_pool <= len(arr), (
+        f"Not enough trainval ({len(arr)}) for {n_folds} x {n_val_per_fold} val = {total_val_pool}"
+    )
+    val_pool   = arr[:total_val_pool]                   # 50 dedicated val patients
+    train_base = arr[total_val_pool:]                   # 29 always-train patients
+    folds = {}
+    for fold_idx in range(n_folds):
+        val_pts = val_pool[fold_idx * n_val_per_fold:(fold_idx + 1) * n_val_per_fold].tolist()
+        other_val = np.concatenate([
+            val_pool[:fold_idx * n_val_per_fold],
+            val_pool[(fold_idx + 1) * n_val_per_fold:]
+        ])
+        train_pts = np.concatenate([other_val, train_base]).tolist()
+        folds[f"fold_{fold_idx}"] = {
+            "train_patients": train_pts,
+            "val_patients":   val_pts,
+            "n_train": len(train_pts),
+            "n_val":   len(val_pts),
+        }
+    return folds
+# ─────────────────────────────────────────────────────────────────────────────
+# make_folds_kfold  (PUBLIC)
+#   With only 12 trainval patients and 5 folds, KFold is the only way to keep
+#   val sets strictly non-overlapping. Val sizes will be 3,3,2,2,2.
+#   (5 * 3 = 15 > 12, so exact 3 per fold is mathematically impossible without
+#    overlap; KFold is the standard, correct solution.)
+# ─────────────────────────────────────────────────────────────────────────────
+def make_folds_kfold(trainval, n_folds, rng):
+    arr = np.array(trainval)
+    rng.shuffle(arr)
+    kf = KFold(n_splits=n_folds, shuffle=False)        # arr already shuffled
+    folds = {}
+    for fold_idx, (train_idx, val_idx) in enumerate(kf.split(arr)):
+        folds[f"fold_{fold_idx}"] = {
+            "train_patients": arr[train_idx].tolist(),
+            "val_patients":   arr[val_idx].tolist(),
+            "n_train": len(train_idx),
+            "n_val":   len(val_idx),
+        }
+    return folds
+# ─────────────────────────────────────────────────────────────────────────���───
+# LOCAL  --  70 / 10 / 20
+#   99 total  ->  test=20, val=10 per fold, train=69 per fold
+# ─────────────────────────────────────────────────────────────────────────────
+n_local = len(local_patients_id)                       # 99
+n_local_test         = round(n_local * 0.20)           # 20
+n_local_val_per_fold = round(n_local * 0.10)           # 10
+rng_local = np.random.default_rng(RANDOM_SEED)
+local_arr = np.array(local_patients_id)
+rng_local.shuffle(local_arr)
+local_test     = local_arr[:n_local_test].tolist()     # 20
+local_trainval = local_arr[n_local_test:].tolist()     # 79
+local_folds = make_folds_exact(
+    local_trainval,
+    n_val_per_fold=n_local_val_per_fold,
+    n_folds=N_FOLDS,
+    rng=np.random.default_rng(RANDOM_SEED + 1),
+)
+local_split = {
+    "metadata": {
+        "dataset": "Local_SAI",
+        "total_patients": n_local,
+        "test_patients": n_local_test,
+        "trainval_patients": len(local_trainval),
+        "target_split": "70/10/20 (train/val/test)",
+        "exact_counts": "train=69, val=10, test=20 per fold",
+        "n_folds": N_FOLDS,
+        "random_seed": RANDOM_SEED,
+    },
+    "test_set": {"patients": local_test, "n_patients": n_local_test},
+    "folds": local_folds,
+}
+# ─────────────────────────────────────────────────────────────────────────────
+# PUBLIC  --  60 / 20 / 20
+#   15 total  ->  test=3 (center-balanced), trainval=12
+#   KFold(5) on 12 -> val sizes: 3,3,2,2,2  (non-overlapping, closest to 20%)
+#   train sizes:                             9,9,10,10,10
+# ─────────────────────────────────────────────────────────────────────────────
+n_public = len(public_patients_id)                     # 15
+# Center-balanced test: 1 patient per center
+centers = {}
+for pid in public_patients_id:
+    centers.setdefault(pid[:3], []).append(pid)
+public_test = []
+public_trainval = []
+for center, pids in sorted(centers.items()):
+    arr = np.array(pids)
+    np.random.default_rng(RANDOM_SEED + hash(center) % 1000).shuffle(arr)
+    public_test.append(arr[0])           # 1 test per center  -> 3 total
+    public_trainval += arr[1:].tolist()  # 4 trainval per center -> 12 total
+public_folds = make_folds_kfold(
+    public_trainval,
+    n_folds=N_FOLDS,
+    rng=np.random.default_rng(RANDOM_SEED + 2),
+)
+public_split = {
+    "metadata": {
+        "dataset": "Public_MSSEG",
+        "total_patients": n_public,
+        "test_patients": len(public_test),
+        "trainval_patients": len(public_trainval),
+        "target_split": "60/20/20 (train/val/test)",
+        "n_folds": N_FOLDS,
+        "random_seed": RANDOM_SEED,
+        "center_balanced_test": True,
+    },
+    "test_set": {"patients": public_test, "n_patients": len(public_test)},
+    "folds": public_folds,
+}
+# ─────────────────────────────────────────────────────────────────────────────
+# CONCATENATED
+# ─────────────────────────────────────────────────────────────────────────────
+concat_test = local_test + public_test
+concat_folds = {}
+for fold_key in local_folds:
+    lf = local_folds[fold_key]
+    pf = public_folds[fold_key]
+    concat_folds[fold_key] = {
+        "train_patients": lf["train_patients"] + pf["train_patients"],
+        "val_patients":   lf["val_patients"]   + pf["val_patients"],
+        "n_train": lf["n_train"] + pf["n_train"],
+        "n_val":   lf["n_val"]   + pf["n_val"],
+    }
+concat_split = {
+    "metadata": {
+        "datasets": ["Local_SAI", "Public_MSSEG"],
+        "total_patients": n_local + n_public,
+        "test_patients": len(concat_test),
+        "trainval_patients": len(local_trainval) + len(public_trainval),
+        "local_split": "70/10/20",
+        "public_split": "60/20/20",
+        "n_folds": N_FOLDS,
+        "random_seed": RANDOM_SEED,
+    },
+    "test_set": {"patients": concat_test, "n_patients": len(concat_test)},
+    "folds": concat_folds,
+}
+# ─────────────────────────────────────────────────────────────────────────────
+# Save
+# ───────────────��─────────────────────────────────────────────────────────────
+output_dir = os.path.dirname(os.path.abspath(__file__))
+for name, data in [
+    ("local_fold_assignments.json",  local_split),
+    ("public_fold_assignments.json", public_split),
+    ("concat_fold_assignments.json", concat_split),
+]:
+    path = os.path.join(output_dir, name)
+    with open(path, "w") as f:
+        json.dump(data, f, indent=2)
+    print(f"Saved: {path}")
+# ─────────────────────────────────────────────────────────────────────────────
+# Sanity check
+# ─────────────────────────────────────────────────────────────────────────────
+print("\n=== SANITY CHECK ===")
+for label, split_data in [("LOCAL", local_split), ("PUBLIC", public_split), ("CONCAT", concat_split)]:
+    test_pts = set(split_data["test_set"]["patients"])
+    print(f"\n{label}  (test={len(test_pts)})")
+    val_sets = []
+    for fold_key, fold in split_data["folds"].items():
+        train_pts = set(fold["train_patients"])
+        val_pts   = set(fold["val_patients"])
+        val_sets.append(val_pts)
+        tv_overlap   = len(train_pts & val_pts)
+        tst_overlap  = len((train_pts | val_pts) & test_pts)
+        print(f"  {fold_key}: train={len(train_pts):3d}, val={len(val_pts):2d} | "
+              f"train/val overlap={tv_overlap} | (train+val)/test overlap={tst_overlap}")
+    bad = [f"f{i}&f{j}" for i in range(len(val_sets)) for j in range(i+1, len(val_sets)) if val_sets[i] & val_sets[j]]
+    print(f"  Val sets unique across folds: {'FAIL: ' + str(bad) if bad else 'OK'}")

models/for_WMH_Vent/data_splits/local_fold_assignments.json ADDED Viewed

	@@ -0,0 +1,421 @@

+{
+  "metadata": {
+    "dataset": "Local_SAI",
+    "total_patients": 100,
+    "test_patients": 10,
+    "trainval_patients": 90,
+    "target_split": "70/10/20 (train/val/test)",
+    "exact_counts": "train=70, val=10, test=20 per fold",
+    "n_folds": 4,
+    "random_seed": 42
+  },
+  "test_set": {
+    "patients": [
+      "110012",
+      "105549",
+      "109816",
+      "105074",
+      "106780",
+      "107680",
+      "108807",
+      "106063",
+      "114585",
+      "111489"
+    ],
+    "n_patients": 10
+  },
+  "folds": {
+    "fold_0": {
+      "train_patients": [
+        "109395",
+        "115788",
+        "113845",
+        "114770",
+        "102313",
+        "104797",
+        "111189",
+        "105597",
+        "111140",
+        "106270",
+        "114836",
+        "108295",
+        "104518",
+        "110218",
+        "110784",
+        "101627",
+        "104280",
+        "107966",
+        "101228",
+        "104420",
+        "109944",
+        "114903",
+        "112765",
+        "106200",
+        "106506",
+        "106536",
+        "112055",
+        "104447",
+        "106976",
+        "105978",
+        "110543",
+        "114058",
+        "113394",
+        "107739",
+        "112657",
+        "111008",
+        "105911",
+        "111852",
+        "105465",
+        "114128",
+        "110280",
+        "112414",
+        "105302",
+        "107455",
+        "110327",
+        "114990",
+        "112730",
+        "104453",
+        "111691",
+        "114454",
+        "104474",
+        "104252",
+        "109654",
+        "104937",
+        "104871",
+        "107508",
+        "114525",
+        "115588",
+        "110540",
+        "109267",
+        "107539",
+        "108344",
+        "112659",
+        "112776",
+        "113046",
+        "107233",
+        "102035",
+        "106905",
+        "107997",
+        "112378",
+        "104520",
+        "106639",
+        "104670",
+        "104899",
+        "115628",
+        "108444",
+        "109923",
+        "110157",
+        "114304",
+        "114266"
+      ],
+      "val_patients": [
+        "108726",
+        "105917",
+        "105755",
+        "109141",
+        "110497",
+        "112997",
+        "104810",
+        "108975",
+        "107130",
+        "107630"
+      ],
+      "n_train": 80,
+      "n_val": 10
+    },
+    "fold_1": {
+      "train_patients": [
+        "108726",
+        "105917",
+        "105755",
+        "109141",
+        "110497",
+        "112997",
+        "104810",
+        "108975",
+        "107130",
+        "107630",
+        "114836",
+        "108295",
+        "104518",
+        "110218",
+        "110784",
+        "101627",
+        "104280",
+        "107966",
+        "101228",
+        "104420",
+        "109944",
+        "114903",
+        "112765",
+        "106200",
+        "106506",
+        "106536",
+        "112055",
+        "104447",
+        "106976",
+        "105978",
+        "110543",
+        "114058",
+        "113394",
+        "107739",
+        "112657",
+        "111008",
+        "105911",
+        "111852",
+        "105465",
+        "114128",
+        "110280",
+        "112414",
+        "105302",
+        "107455",
+        "110327",
+        "114990",
+        "112730",
+        "104453",
+        "111691",
+        "114454",
+        "104474",
+        "104252",
+        "109654",
+        "104937",
+        "104871",
+        "107508",
+        "114525",
+        "115588",
+        "110540",
+        "109267",
+        "107539",
+        "108344",
+        "112659",
+        "112776",
+        "113046",
+        "107233",
+        "102035",
+        "106905",
+        "107997",
+        "112378",
+        "104520",
+        "106639",
+        "104670",
+        "104899",
+        "115628",
+        "108444",
+        "109923",
+        "110157",
+        "114304",
+        "114266"
+      ],
+      "val_patients": [
+        "109395",
+        "115788",
+        "113845",
+        "114770",
+        "102313",
+        "104797",
+        "111189",
+        "105597",
+        "111140",
+        "106270"
+      ],
+      "n_train": 80,
+      "n_val": 10
+    },
+    "fold_2": {
+      "train_patients": [
+        "108726",
+        "105917",
+        "105755",
+        "109141",
+        "110497",
+        "112997",
+        "104810",
+        "108975",
+        "107130",
+        "107630",
+        "109395",
+        "115788",
+        "113845",
+        "114770",
+        "102313",
+        "104797",
+        "111189",
+        "105597",
+        "111140",
+        "106270",
+        "109944",
+        "114903",
+        "112765",
+        "106200",
+        "106506",
+        "106536",
+        "112055",
+        "104447",
+        "106976",
+        "105978",
+        "110543",
+        "114058",
+        "113394",
+        "107739",
+        "112657",
+        "111008",
+        "105911",
+        "111852",
+        "105465",
+        "114128",
+        "110280",
+        "112414",
+        "105302",
+        "107455",
+        "110327",
+        "114990",
+        "112730",
+        "104453",
+        "111691",
+        "114454",
+        "104474",
+        "104252",
+        "109654",
+        "104937",
+        "104871",
+        "107508",
+        "114525",
+        "115588",
+        "110540",
+        "109267",
+        "107539",
+        "108344",
+        "112659",
+        "112776",
+        "113046",
+        "107233",
+        "102035",
+        "106905",
+        "107997",
+        "112378",
+        "104520",
+        "106639",
+        "104670",
+        "104899",
+        "115628",
+        "108444",
+        "109923",
+        "110157",
+        "114304",
+        "114266"
+      ],
+      "val_patients": [
+        "114836",
+        "108295",
+        "104518",
+        "110218",
+        "110784",
+        "101627",
+        "104280",
+        "107966",
+        "101228",
+        "104420"
+      ],
+      "n_train": 80,
+      "n_val": 10
+    },
+    "fold_3": {
+      "train_patients": [
+        "108726",
+        "105917",
+        "105755",
+        "109141",
+        "110497",
+        "112997",
+        "104810",
+        "108975",
+        "107130",
+        "107630",
+        "109395",
+        "115788",
+        "113845",
+        "114770",
+        "102313",
+        "104797",
+        "111189",
+        "105597",
+        "111140",
+        "106270",
+        "114836",
+        "108295",
+        "104518",
+        "110218",
+        "110784",
+        "101627",
+        "104280",
+        "107966",
+        "101228",
+        "104420",
+        "110543",
+        "114058",
+        "113394",
+        "107739",
+        "112657",
+        "111008",
+        "105911",
+        "111852",
+        "105465",
+        "114128",
+        "110280",
+        "112414",
+        "105302",
+        "107455",
+        "110327",
+        "114990",
+        "112730",
+        "104453",
+        "111691",
+        "114454",
+        "104474",
+        "104252",
+        "109654",
+        "104937",
+        "104871",
+        "107508",
+        "114525",
+        "115588",
+        "110540",
+        "109267",
+        "107539",
+        "108344",
+        "112659",
+        "112776",
+        "113046",
+        "107233",
+        "102035",
+        "106905",
+        "107997",
+        "112378",
+        "104520",
+        "106639",
+        "104670",
+        "104899",
+        "115628",
+        "108444",
+        "109923",
+        "110157",
+        "114304",
+        "114266"
+      ],
+      "val_patients": [
+        "109944",
+        "114903",
+        "112765",
+        "106200",
+        "106506",
+        "106536",
+        "112055",
+        "104447",
+        "106976",
+        "105978"
+      ],
+      "n_train": 80,
+      "n_val": 10
+    }
+  }
+}

models/for_WMH_Vent/data_splits/public_fold_assignments.json ADDED Viewed

	@@ -0,0 +1,102 @@

+{
+  "metadata": {
+    "dataset": "Public_MSSEG",
+    "total_patients": 15,
+    "test_patients": 3,
+    "trainval_patients": 12,
+    "target_split": "60/20/20 (train/val/test)",
+    "n_folds": 4,
+    "random_seed": 42,
+    "center_balanced_test": true
+  },
+  "test_set": {
+    "patients": [
+      "c01p04",
+      "c07p05",
+      "c08p04"
+    ],
+    "n_patients": 3
+  },
+  "folds": {
+    "fold_0": {
+      "train_patients": [
+        "c08p03",
+        "c01p01",
+        "c08p02",
+        "c07p03",
+        "c07p04",
+        "c01p02",
+        "c07p01",
+        "c08p05",
+        "c07p02"
+      ],
+      "val_patients": [
+        "c01p05",
+        "c08p01",
+        "c01p03"
+      ],
+      "n_train": 9,
+      "n_val": 3
+    },
+    "fold_1": {
+      "train_patients": [
+        "c01p05",
+        "c08p01",
+        "c01p03",
+        "c07p03",
+        "c07p04",
+        "c01p02",
+        "c07p01",
+        "c08p05",
+        "c07p02"
+      ],
+      "val_patients": [
+        "c08p03",
+        "c01p01",
+        "c08p02"
+      ],
+      "n_train": 9,
+      "n_val": 3
+    },
+    "fold_2": {
+      "train_patients": [
+        "c01p05",
+        "c08p01",
+        "c01p03",
+        "c08p03",
+        "c01p01",
+        "c08p02",
+        "c07p01",
+        "c08p05",
+        "c07p02"
+      ],
+      "val_patients": [
+        "c07p03",
+        "c07p04",
+        "c01p02"
+      ],
+      "n_train": 9,
+      "n_val": 3
+    },
+    "fold_3": {
+      "train_patients": [
+        "c01p05",
+        "c08p01",
+        "c01p03",
+        "c08p03",
+        "c01p01",
+        "c08p02",
+        "c07p03",
+        "c07p04",
+        "c01p02"
+      ],
+      "val_patients": [
+        "c07p01",
+        "c08p05",
+        "c07p02"
+      ],
+      "n_train": 9,
+      "n_val": 3
+    }
+  }
+}

models/for_WMH_Vent/download_models.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ Visit our Hugging Face link for downloading the trained models.

models/for_WMH_Vent/folds_results_zscore2_all/per_class_summary.csv ADDED Viewed

	@@ -0,0 +1,9 @@

+Variant,Variant_Name,Class,Class_Name,DICE_mean,DICE_std,DICE_min,DICE_max,PRECISION_mean,PRECISION_std,PRECISION_min,PRECISION_max,RECALL_mean,RECALL_std,RECALL_min,RECALL_max,IOU_mean,IOU_std,IOU_min,IOU_max,SPECIFICITY_mean,SPECIFICITY_std,SPECIFICITY_min,SPECIFICITY_max,HD95_mean,HD95_std,HD95_min,HD95_max,LESION_SENSITIVITY_mean,LESION_SENSITIVITY_std,LESION_PRECISION_mean,LESION_PRECISION_std,LESION_F1_mean,LESION_F1_std,LESION_N_GT_LESIONS_total,LESION_N_PRED_LESIONS_total,LESION_TP_LESIONS_total,LESION_FN_LESIONS_total,LESION_FP_LESIONS_total
+1,unet,1,Ventricles,0.9296308495604303,0.003051861083997252,0.9245313971595007,0.9325869622190041,0.937810327296536,0.004534371323946414,0.9299792648109558,0.9408155762340407,0.9221807114485115,0.002258280011483868,0.9198278289806433,0.9258708828514376,0.86883963293893,0.005257697310767231,0.8600597389216186,0.8739402369187041,0.9992060262932462,5.3781685628696937e-05,0.9991132143255909,0.9992439887040112,1.0,0.0,1.0,1.0,,,,,,,0.0,0.0,0.0,0.0,0.0
+1,unet,2,Abnormal_WMH,0.8471261192911104,0.006988603634009174,0.8380055641483046,0.8562749203494235,0.8861666894324636,0.004785959852918547,0.8819829027399184,0.8938984076631564,0.8156915305742668,0.008049307835491817,0.8038219006844454,0.8241991234390766,0.7363711260717487,0.01045759631012397,0.7227128240374371,0.7500107261976053,0.9992840254178714,1.4410851399106235e-05,0.9992605553058871,0.9992976281722222,4.579276208116416,0.9906935564361317,3.105706418326917,5.669535448758443,,,,,,,,,,,
+2,attnunet,1,Ventricles,0.9104890513851166,0.024899999222747722,0.8675609526332009,0.9278078258646835,0.9203443411150141,0.02293220806285698,0.8806633355974736,0.9350793260117669,0.9019219497921485,0.026912289770452267,0.8562340091217527,0.923007569878129,0.83718265247343,0.040705456147877725,0.7670303812669205,0.8657288299651152,0.9989795287466985,0.000273307129456502,0.9985083429568933,0.9991706542188458,1.2282992876459566,0.3954259655345788,1.0,1.913197150583827,,,,,,,0.0,0.0,0.0,0.0,0.0
+2,attnunet,2,Abnormal_WMH,0.826975751920205,0.015579775036519973,0.8023896495263613,0.8453085695774089,0.8886304925692461,0.009863625617703263,0.8773743681724526,0.9034203955732797,0.779984526149581,0.01966711140506486,0.7519818345459007,0.8069007799264991,0.7066411846722295,0.022442058844092443,0.6715032946218304,0.7335430311874717,0.9993673813767945,5.564253319795054e-05,0.9993140913333589,0.9994591017880485,5.868210623237481,1.1565233310098124,4.299965705388233,7.125643309860791,,,,,,,,,,,
+3,dlv3unet,1,Ventricles,0.9005661992435416,0.0020867923289419102,0.8974816472833985,0.9032637548534808,0.8997698242284116,0.002619190013214462,0.8961697010761339,0.9031854303942591,0.9018365317555639,0.00304923631516394,0.896838192141597,0.9048159553177055,0.8198187029362412,0.0034773428576675017,0.8147050743404023,0.8243681794688564,0.9987641261362938,3.990839508953154e-05,0.9987224004496448,0.9988270370474228,1.0,0.0,1.0,1.0,,,,,,,0.0,0.0,0.0,0.0,0.0
+3,dlv3unet,2,Abnormal_WMH,0.7763168733853871,0.003073255677872925,0.772352120446274,0.7808450853012623,0.7932948495860329,0.01339398897949029,0.775105014208068,0.8127526621991921,0.7653741420470819,0.01188742959950416,0.7489376058870139,0.7803045906209611,0.6370210758311682,0.003999380279290716,0.6319568324618485,0.6429538443669356,0.9985668433084038,0.00013274507182798207,0.9983882554846853,0.9987628711423673,4.7126929962683395,0.5556289444958085,4.095494923513843,5.423612659365294,,,,,,,,,,,
+4,transunet,1,Ventricles,0.9246872887842248,0.004597522753464204,0.917144392619005,0.9284374594079503,0.9320059959760637,0.011631626135529186,0.9158405109177434,0.9481783808085702,0.9184641125298365,0.004922383784531681,0.9100900076594562,0.9224872999070102,0.8603159951545595,0.007862485177144832,0.8474580455251116,0.8667238350341302,0.9991215386213639,0.00017857472387549319,0.9988581545201223,0.999358864513995,1.0,0.0,1.0,1.0,,,,,,,0.0,0.0,0.0,0.0,0.0
+4,transunet,2,Abnormal_WMH,0.8322919090444327,0.010816310171427137,0.81389122085058,0.8417282856836877,0.9035192038694635,0.003183241810633453,0.8989558813413554,0.9074719810187692,0.7761566255927599,0.015082099936625985,0.7515534359141638,0.7926254513986619,0.7142798166712054,0.01573238533985691,0.6875437726337009,0.7281151773663771,0.9994577459658872,1.5563988190790074e-05,0.9994339623025651,0.9994755539827085,5.929181221900818,1.9288286807668098,4.026591793193768,8.744998558832915,,,,,,,,,,,

models/for_WMH_Vent/folds_results_zscore2_all/test_metrics_all_variants_folds.csv ADDED Viewed

	@@ -0,0 +1,27 @@

+Variant,Variant_Name,Fold,Test_Samples,DICE_class_1,DICE_class_2,DICE_mean,PRECISION_class_1,PRECISION_class_2,PRECISION_mean,RECALL_class_1,RECALL_class_2,RECALL_mean,IOU_class_1,IOU_class_2,IOU_mean,SPECIFICITY_class_1,SPECIFICITY_class_2,SPECIFICITY_mean,HD95_class_1,HD95_class_2,HD95_mean,LESION_LESION_SENSITIVITY_class_0,LESION_LESION_PRECISION_class_0,LESION_LESION_F1_class_0,LESION_N_GT_LESIONS_class_0,LESION_N_PRED_LESIONS_class_0,LESION_TP_LESIONS_class_0,LESION_FN_LESIONS_class_0,LESION_FP_LESIONS_class_0,LESION_LESION_SENSITIVITY_class_1,LESION_LESION_PRECISION_class_1,LESION_LESION_F1_class_1,LESION_N_GT_LESIONS_class_1,LESION_N_PRED_LESIONS_class_1,LESION_TP_LESIONS_class_1,LESION_FN_LESIONS_class_1,LESION_FP_LESIONS_class_1,LESION_LESION_SENSITIVITY_mean,LESION_LESION_PRECISION_mean,LESION_LESION_F1_mean,LESION_N_GT_LESIONS_total,LESION_N_PRED_LESIONS_total,LESION_TP_LESIONS_total,LESION_FN_LESIONS_total,LESION_FP_LESIONS_total
+1,unet,0,70,0.924531397,0.843338613,0.883935005,0.929979265,0.882387206,0.906183236,0.919827829,0.812886653,0.866357241,0.860059739,0.730685865,0.795372802,0.999113214,0.999260555,0.999186885,1,5.669535449,3.334767724,,,,,,,,,,,,,,,,,0.810285987,0.717464393,0.753906356,275,309,226,49,84
+1,unet,1,70,0.931030738,0.850885379,0.890958059,0.940815576,0.886398241,0.913606909,0.921931425,0.821858445,0.871894935,0.87127183,0.742075089,0.806673459,0.999243989,0.999293762,0.999268875,1,3.105706418,2.052853209,,,,,,,,,,,,,,,,,0.831870007,0.757975006,0.788804562,275,308,230,45,76
+1,unet,2,70,0.932586962,0.85627492,0.894430941,0.939876251,0.893898408,0.916887329,0.925870883,0.824199123,0.875035003,0.873940237,0.750010726,0.811975482,0.999235735,0.999297628,0.999266682,1,4.274767062,2.637383531,,,,,,,,,,,,,,,,,0.8190735,0.761199505,0.785236019,275,299,227,48,69
+1,unet,3,70,0.930374301,0.838005564,0.884189933,0.940570217,0.881982903,0.91127656,0.921092709,0.803821901,0.862457305,0.870086726,0.722712824,0.796399775,0.999231167,0.999284157,0.999257662,1,5.267095904,3.133547952,,,,,,,,,,,,,,,,,0.803511136,0.755088192,0.768750434,275,312,221,54,92
+2,attnunet,0,70,0.927807826,0.84530857,0.886558198,0.933405023,0.891192522,0.912298773,0.92300757,0.80690078,0.864954175,0.86572883,0.733543031,0.799635931,0.9991422,0.99933414,0.99923817,1,6.817768379,3.90888419,,,,,,,,,,,,,,,,,0.805918858,0.737682024,0.763465574,275,311,224,51,81
+2,attnunet,1,70,0.921130442,0.827778912,0.874454677,0.935079326,0.882534684,0.908807005,0.908634865,0.784884088,0.846759477,0.854381693,0.707421007,0.78090135,0.999170654,0.999314091,0.999242373,1,4.299965705,2.649982853,,,,,,,,,,,,,,,,,0.797643669,0.741290568,0.757996731,275,306,221,54,83
+2,attnunet,2,70,0.925456985,0.832425877,0.878941431,0.93222968,0.903420396,0.917825038,0.919811356,0.776171402,0.847991379,0.861589705,0.714097406,0.787843556,0.999096918,0.999459102,0.99927801,1,5.229465099,3.114732549,,,,,,,,,,,,,,,,,0.800801564,0.781510442,0.783598099,275,291,222,53,66
+2,attnunet,3,70,0.867560953,0.80238965,0.834975301,0.880663336,0.877374368,0.879018852,0.856234009,0.751981835,0.804107922,0.767030381,0.671503295,0.719266838,0.998508343,0.999362192,0.998935268,1.913197151,7.12564331,4.51942023,,,,,,,,,,,,,,,,,0.800023323,0.639032382,0.699206596,275,339,222,53,112
+3,dlv3unet,0,70,0.900234027,0.780845085,0.840539556,0.896169701,0.794573359,0.84537153,0.904815955,0.772086706,0.838451331,0.819275677,0.642953844,0.731114761,0.9987224,0.998555632,0.998639016,1,5.423612659,3.21180633,,,,,,,,,,,,,,,,,0.753118133,0.708678143,0.719359836,275,287,209,66,85
+3,dlv3unet,1,70,0.903263755,0.776870874,0.840067314,0.90318543,0.812752662,0.857969046,0.90365004,0.748937606,0.826293823,0.824368179,0.63776127,0.731064725,0.998827037,0.998762871,0.998794954,1,4.251005732,2.625502866,,,,,,,,,,,,,,,,,0.730627383,0.773161114,0.746853388,275,257,200,75,65
+3,dlv3unet,2,70,0.897481647,0.77235212,0.834916884,0.898679638,0.790748363,0.844714001,0.896838192,0.760167666,0.828502929,0.814705074,0.631956832,0.723330953,0.998738575,0.998560615,0.998649595,1,5.08065867,3.040329335,,,,,,,,,,,,,,,,,0.710273969,0.713006921,0.702347611,275,273,196,79,86
+3,dlv3unet,3,70,0.901285368,0.775199414,0.838242391,0.901044527,0.775105014,0.838074771,0.902041939,0.780304591,0.841173265,0.820925881,0.635412356,0.728169118,0.998768492,0.998388255,0.998578374,1,4.095494924,2.547747462,,,,,,,,,,,,,,,,,0.686803058,0.707394828,0.686217762,275,264,189,86,84
+4,transunet,0,70,0.928372145,0.841728286,0.885050215,0.948178381,0.902428523,0.925303452,0.910090008,0.792625451,0.85135773,0.866710076,0.728115177,0.797412627,0.999358865,0.999433962,0.999396413,1,8.744998559,4.872499279,,,,,,,,,,,,,,,,,0.791318113,0.78846014,0.778279842,275,299,224,51,71
+4,transunet,1,70,0.928437459,0.837057166,0.882747312,0.935028265,0.907471981,0.921250123,0.9224873,0.779866141,0.851176721,0.866723835,0.721270863,0.793997349,0.999162149,0.99945489,0.999308519,1,6.680013727,3.840006863,,,,,,,,,,,,,,,,,0.769424443,0.762231589,0.753840342,275,297,214,61,74
+4,transunet,2,70,0.924795158,0.836490964,0.880643061,0.928976828,0.90522043,0.917098629,0.921395395,0.780581474,0.850988434,0.860372024,0.720189454,0.790280739,0.999106986,0.999466578,0.999286782,1,4.026591793,2.513295897,,,,,,,,,,,,,,,,,0.768415042,0.766517149,0.760036199,275,282,215,60,64
+4,transunet,3,70,0.917144393,0.813891221,0.865517807,0.915840511,0.898955881,0.907398196,0.919883748,0.751553436,0.835718592,0.847458046,0.687543773,0.767500909,0.998858155,0.999475554,0.999166854,1,4.265120809,2.632560404,,,,,,,,,,,,,,,,,0.810819711,0.695502554,0.740358673,275,330,225,50,98
+,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
+,unet - mean,,,0.9296,0.8471,0.8884,0.9378,0.8862,0.912,0.9222,0.8157,0.8689,0.8688,0.7364,0.8026,0.9992,0.9993,0.9992,1,4.6,2.8,,,,,,,,,,,,,,,,,0.8162,0.7479,0.7742,275,307,226,49,80.25
+,attn - mean,,,0.9105,0.827,0.8687,0.9203,0.8886,0.9045,0.9019,0.78,0.841,0.8372,0.7066,0.7719,0.999,0.9994,0.9992,1.2,5.9,3.5,,,,,,,,,,,,,,,,,0.8011,0.7249,0.7511,275,311.75,222.25,52.75,85.5
+,dlv3 - mean,,,0.9006,0.7763,0.8384,0.8998,0.7933,0.8465,0.9018,0.7654,0.8336,0.8198,0.637,0.7284,0.9988,0.9986,0.9987,1,4.7,2.9,,,,,,,,,,,,,,,,,0.7202,0.7256,0.7137,275,270.25,198.5,76.5,80
+,trans - mean,,,0.9247,0.8323,0.8785,0.932,0.9035,0.9178,0.9185,0.7762,0.8473,0.8603,0.7143,0.7873,0.9991,0.9995,0.9993,1,5.9,3.5,,,,,,,,,,,,,,,,,0.785,0.7532,0.7581,275,302,219.5,55.5,76.75
+,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
+,unet - std,,,0.0031,0.007,0.0045,0.0045,0.0048,0.0039,0.0023,0.008,0.0049,0.0053,0.0105,0.007,0.0001,0,0,0,1,0.5,,,,,,,,,,,,,,,,,0.0106,0.0177,0.0139,0,4.8477,3.2404,3.2404,8.6132
+,attn - std,,,0.0249,0.0156,0.02,0.0229,0.0099,0.0151,0.0269,0.0197,0.0225,0.0407,0.0224,0.0311,0.0003,0.0001,0.0001,0.4,1.2,0.7,,,,,,,,,,,,,,,,,0.003,0.0525,0.0314,0,17.3692,1.0897,1.0897,16.6508
+,dlv3 - std,,,0.0021,0.0031,0.0022,0.0026,0.0134,0.0072,0.003,0.0119,0.0063,0.0035,0.004,0.0032,0,0.0001,0.0001,0,0.6,0.3,,,,,,,,,,,,,,,,,0.0245,0.0276,0.0224,0,11.211,7.2284,7.2284,8.6891
+,trans - std,,,0.0046,0.0108,0.0076,0.0116,0.0032,0.0066,0.0049,0.0151,0.0067,0.0079,0.0157,0.0117,0.0002,0,0.0001,0,1.9,1,,,,,,,,,,,,,,,,,0.0175,0.0348,0.0136,0,17.4499,5.0249,5.0249,12.794

models/for_WMH_Vent/folds_results_zscore2_all/training_info_all_variants_folds.csv ADDED Viewed

	@@ -0,0 +1,17 @@

+Variant,Variant_Name,Fold,Best_Epoch,Composite_Score,Total_Epochs,First_Valid_Epoch,Total_Valid_Epochs,Best_Epoch_Val_Loss,Best_Epoch_Dice_Ventricles,Best_Epoch_Dice_Abnormal_WMH,Best_Epoch_Dice_Mean,Best_Abnormal_Epoch,Best_Abnormal_Dice,Best_Ventricles_Epoch,Best_Ventricles_Dice
+1,unet,0,49,0.837773437480731,60,1,60,0.24741753935813904,0.9308493801953847,0.8054339622632157,0.9115566193830764,43,0.8058087693216404,49,0.9308493801953847
+1,unet,1,45,0.8509202240606865,60,1,60,0.3080134391784668,0.9268369262837436,0.8394508168223501,0.9215010850395001,28,0.8441074580031014,38,0.9274915960857308
+1,unet,2,36,0.8128944361644407,60,1,60,0.27736401557922363,0.9342240045327603,0.7672727272708917,0.9000714500704411,32,0.7696575927137208,34,0.9378331718769447
+1,unet,3,41,0.8148548201069025,60,1,60,0.3056482672691345,0.9412208603997376,0.7717556478564912,0.9037425888471717,41,0.7717556478564912,44,0.9415513142951589
+2,attnunet,0,38,0.8465806985395226,60,1,60,0.2354777455329895,0.9361820594989245,0.8154564254052402,0.9167136699540254,38,0.8154564254052402,49,0.9369088654755128
+2,attnunet,1,42,0.8468065449382642,60,1,60,0.3282952904701233,0.9189075870475,0.8399396631183776,0.9189869898404228,42,0.8399396631183776,42,0.9189075870475
+2,attnunet,2,35,0.8082210232243792,60,1,60,0.2833690643310547,0.9301114433264854,0.7625408277658984,0.8971071685730373,35,0.7625408277658984,38,0.932546742487403
+2,attnunet,3,35,0.7675559444491301,60,1,60,0.3719373941421509,0.8997412800024551,0.7247121664376812,0.8740189336882455,35,0.7247121664376812,51,0.9082138618936411
+3,dlv3unet,0,41,0.7945477803722963,60,1,60,0.3116353750228882,0.8988588122221663,0.7600894570132255,0.8856122734890453,41,0.7600894570132255,54,0.9004052827384709
+3,dlv3unet,1,42,0.8150221762616997,60,1,60,0.3728603720664978,0.9037163637265287,0.8019888405839849,0.9011564065443626,42,0.8019888405839849,40,0.9049275398385249
+3,dlv3unet,2,28,0.7727672322932403,60,1,60,0.34316256642341614,0.9029322657428571,0.7270063486878747,0.8760687237787342,34,0.7281193622294404,40,0.9059795923856953
+3,dlv3unet,3,28,0.768303148626621,60,1,60,0.3877088725566864,0.9124585568837787,0.7222274480285933,0.8774333183819368,28,0.7222274480285933,37,0.916682545438336
+4,transunet,0,39,0.8410510311241638,60,1,60,0.24608999490737915,0.9346885813142664,0.808755760367703,0.9139696605000669,35,0.8090334741168289,45,0.9357508099451346
+4,transunet,1,48,0.8483119522767122,60,1,60,0.3149019777774811,0.9253444084272909,0.8369980458771218,0.9201816141429934,29,0.8392693984451316,50,0.9254004460337096
+4,transunet,2,35,0.8109694312756469,60,1,60,0.27634185552597046,0.9331867846270409,0.7644126357335528,0.89876925662194,35,0.7644126357335528,55,0.9336813537844856
+4,transunet,3,28,0.7773769906034641,60,1,60,0.34197694063186646,0.9332184349847285,0.7193485902853871,0.883513943096132,28,0.7193485902853871,50,0.9413152166406702

models/for_WMH_Vent/folds_results_zscore2_all/variant_comparison_test.csv ADDED Viewed

	@@ -0,0 +1,5 @@

+Variant,Variant_Name,N_Folds,DICE_Mean,DICE_Std,DICE_Class1_Mean,DICE_Class1_Std,DICE_Class2_Mean,DICE_Class2_Std,PRECISION_Mean,PRECISION_Std,PRECISION_Class1_Mean,PRECISION_Class1_Std,PRECISION_Class2_Mean,PRECISION_Class2_Std,RECALL_Mean,RECALL_Std,RECALL_Class1_Mean,RECALL_Class1_Std,RECALL_Class2_Mean,RECALL_Class2_Std,IOU_Mean,IOU_Std,IOU_Class1_Mean,IOU_Class1_Std,IOU_Class2_Mean,IOU_Class2_Std,SPECIFICITY_Mean,SPECIFICITY_Std,SPECIFICITY_Class1_Mean,SPECIFICITY_Class1_Std,SPECIFICITY_Class2_Mean,SPECIFICITY_Class2_Std,HD95_Mean,HD95_Std,HD95_Class1_Mean,HD95_Class1_Std,HD95_Class2_Mean,HD95_Class2_Std,LESION_SENSITIVITY_Mean,LESION_SENSITIVITY_Std,LESION_PRECISION_Mean,LESION_PRECISION_Std,LESION_F1_Mean,LESION_F1_Std,LESION_N_GT_LESIONS_Total,LESION_N_PRED_LESIONS_Total,LESION_TP_LESIONS_Total,LESION_FN_LESIONS_Total,LESION_FP_LESIONS_Total
+1,unet,4,0.88837848442577,0.004488176438408171,0.9296308495604303,0.003051861083997252,0.8471261192911104,0.006988603634009174,0.9119885083644996,0.003899542620426997,0.937810327296536,0.004534371323946414,0.8861666894324636,0.004785959852918547,0.8689361210113892,0.004862525795740292,0.9221807114485115,0.002258280011483868,0.8156915305742668,0.008049307835491817,0.8026053795053394,0.006985123311053306,0.86883963293893,0.005257697310767231,0.7363711260717487,0.01045759631012397,0.9992450258555589,3.3829763475009536e-05,0.9992060262932462,5.3781685628696937e-05,0.9992840254178714,1.4410851399106235e-05,2.789638104058208,0.4953467782180657,1.0,0.0,4.579276208116416,0.9906935564361317,0.8161851575612329,0.010604103780010409,0.7479317737104494,0.01772268907341698,0.7741743428489778,0.01393389833458131,1100,1228,904,196,321
+2,attnunet,4,0.8687324016526609,0.019964152607882032,0.9104890513851166,0.024899999222747722,0.826975751920205,0.015579775036519973,0.9044874168421302,0.015051711319037623,0.9203443411150141,0.02293220806285698,0.8886304925692461,0.009863625617703263,0.8409532379708647,0.02245478860306288,0.9019219497921485,0.026912289770452267,0.779984526149581,0.01966711140506486,0.7719119185728298,0.031123754171912342,0.83718265247343,0.040705456147877725,0.7066411846722295,0.022442058844092443,0.9991734550617465,0.000138385919780625,0.9989795287466985,0.000273307129456502,0.9993673813767945,5.564253319795054e-05,3.5482549554417186,0.7190357923335028,1.2282992876459566,0.3954259655345788,5.868210623237481,1.1565233310098124,0.8010968535966527,0.0030172783603819066,0.7248788539521523,0.05246431861595214,0.7510667501711839,0.0314226022122775,1100,1247,889,211,342
+3,dlv3unet,4,0.8384415363144644,0.0022083747230999306,0.9005661992435416,0.0020867923289419102,0.7763168733853871,0.003073255677872925,0.8465323369072222,0.0071934443649559685,0.8997698242284116,0.002619190013214462,0.7932948495860329,0.01339398897949029,0.8336053369013228,0.0063294943822809775,0.9018365317555639,0.00304923631516394,0.7653741420470819,0.01188742959950416,0.7284198893837046,0.003170869073367662,0.8198187029362412,0.0034773428576675017,0.6370210758311682,0.003999380279290716,0.9986654847223488,7.953573517565171e-05,0.9987641261362938,3.990839508953154e-05,0.9985668433084038,0.00013274507182798207,2.8563464981341697,0.27781447224790423,1.0,0.0,4.7126929962683395,0.5556289444958085,0.720205635740483,0.024526594995177977,0.7255602516176558,0.02756091610984561,0.7136946491830041,0.022446218734487867,1100,1081,794,306,320
+4,transunet,4,0.8784895989143288,0.007649748417636039,0.9246872887842248,0.004597522753464204,0.8322919090444327,0.010816310171427137,0.9177625999227637,0.00664998101397249,0.9320059959760637,0.011631626135529186,0.9035192038694635,0.003183241810633453,0.8473103690612981,0.006693789315880423,0.9184641125298365,0.004922383784531681,0.7761566255927599,0.015082099936625985,0.7872979059128824,0.011704790584520298,0.8603159951545595,0.007862485177144832,0.7142798166712054,0.01573238533985691,0.9992896422936255,8.191687124816227e-05,0.9991215386213639,0.00017857472387549319,0.9994577459658872,1.5563988190790074e-05,3.4645906109504088,0.964414340383405,1.0,0.0,5.929181221900818,1.9288286807668098,0.784994327470758,0.01749453278425242,0.7531778580152171,0.03475336972283068,0.7581287639607504,0.013636999062794438,1100,1208,878,222,307

models/for_WMH_Vent/folds_results_zscore2_all/variant_comparison_training.csv ADDED Viewed

	@@ -0,0 +1,5 @@

+Variant,Variant_Name,N_Folds,Best_Epoch_Mean,Best_Epoch_Std,Best_Epoch_Min,Best_Epoch_Max,Composite_Score_Mean,Composite_Score_Std,Best_Epoch_Val_Loss_Mean,Best_Epoch_Val_Loss_Std,Best_Epoch_Dice_Mean_Mean,Best_Epoch_Dice_Mean_Std,Best_Epoch_Dice_Ventricles_Mean,Best_Epoch_Dice_Ventricles_Std,Best_Epoch_Dice_Abnormal_WMH_Mean,Best_Epoch_Dice_Abnormal_WMH_Std
+1,unet,4,42.75,4.815340071064556,36,49,0.8291107294531901,0.0159444009351335,0.284610815346241,0.02462779461924618,0.9092179358350473,0.00821557699733339,0.9332827928529066,0.005276587173391204,0.7959782885532373,0.02911192217235299
+2,attnunet,4,37.5,2.8722813232690143,35,42,0.8172910527878241,0.03272955110785234,0.3047698736190796,0.05080431749397413,0.9017066905139327,0.018107910266314746,0.9212355924688412,0.013870856883885596,0.7856622706817994,0.044953429555710196
+3,dlv3unet,4,34.75,6.7592529172978875,28,42,0.7876600843884642,0.018658861388516038,0.35384179651737213,0.029172388598382663,0.8850676805485197,0.009980085098274465,0.9044914996438328,0.0049556335292024285,0.7528280235784197,0.03190873102426834
+4,transunet,4,37.5,7.22841614740048,28,48,0.8194273513199967,0.028025314220223092,0.2948276922106743,0.0365483168698032,0.904108618590283,0.01421469994866525,0.9316095523383316,0.0036677177073460732,0.7823787580659411,0.0446502980212195

models/for_WMH_Vent/model_training_scripts/attn_unet_model.py ADDED Viewed

	@@ -0,0 +1,85 @@

+###################### Libraries ######################
+# Deep Learning
+import keras
+from keras.models import Model
+from keras.layers import Input, Conv2D, MaxPooling2D, Conv2DTranspose, concatenate
+def build_attention_unet_3class(input_shape=(256, 256, 1), num_classes=3):
+    """Enhanced Attention U-Net architecture with dropout"""
+    def attention_block(F_g, F_l, F_int):
+        """Attention gate implementation"""
+        W_g = Conv2D(F_int, 1, padding='same')(F_g)
+        W_x = Conv2D(F_int, 1, padding='same')(F_l)
+        psi = keras.layers.Add()([W_g, W_x])
+        psi = keras.layers.Activation('relu')(psi)
+        psi = Conv2D(1, 1, padding='same')(psi)
+        psi = keras.layers.Activation('sigmoid')(psi)
+        return keras.layers.Multiply()([F_l, psi])
+    inputs = Input(input_shape)
+    # Encoder with dropout (matching your original dropout pattern)
+    c1 = Conv2D(64, 3, activation='relu', padding='same')(inputs)
+    c1 = Conv2D(64, 3, activation='relu', padding='same')(c1)
+    p1 = MaxPooling2D(2)(c1)
+    p1 = keras.layers.Dropout(0.1)(p1)
+    c2 = Conv2D(128, 3, activation='relu', padding='same')(p1)
+    c2 = Conv2D(128, 3, activation='relu', padding='same')(c2)
+    p2 = MaxPooling2D(2)(c2)
+    p2 = keras.layers.Dropout(0.1)(p2)
+    c3 = Conv2D(256, 3, activation='relu', padding='same')(p2)
+    c3 = Conv2D(256, 3, activation='relu', padding='same')(c3)
+    p3 = MaxPooling2D(2)(c3)
+    p3 = keras.layers.Dropout(0.2)(p3)
+    c4 = Conv2D(512, 3, activation='relu', padding='same')(p3)
+    c4 = Conv2D(512, 3, activation='relu', padding='same')(c4)
+    p4 = MaxPooling2D(2)(c4)
+    p4 = keras.layers.Dropout(0.2)(p4)
+    # Bridge
+    c5 = Conv2D(1024, 3, activation='relu', padding='same')(p4)
+    c5 = Conv2D(1024, 3, activation='relu', padding='same')(c5)
+    c5 = keras.layers.Dropout(0.3)(c5)
+    # Decoder with attention gates (using Conv2DTranspose - more standard)
+    u6 = Conv2DTranspose(512, 2, strides=2, padding='same')(c5)
+    att6 = attention_block(u6, c4, 256)
+    u6 = concatenate([u6, att6])
+    u6 = keras.layers.Dropout(0.2)(u6)
+    c6 = Conv2D(512, 3, activation='relu', padding='same')(u6)
+    c6 = Conv2D(512, 3, activation='relu', padding='same')(c6)
+    u7 = Conv2DTranspose(256, 2, strides=2, padding='same')(c6)
+    att7 = attention_block(u7, c3, 128)
+    u7 = concatenate([u7, att7])
+    u7 = keras.layers.Dropout(0.2)(u7)
+    c7 = Conv2D(256, 3, activation='relu', padding='same')(u7)
+    c7 = Conv2D(256, 3, activation='relu', padding='same')(c7)
+    u8 = Conv2DTranspose(128, 2, strides=2, padding='same')(c7)
+    att8 = attention_block(u8, c2, 64)
+    u8 = concatenate([u8, att8])
+    u8 = keras.layers.Dropout(0.1)(u8)
+    c8 = Conv2D(128, 3, activation='relu', padding='same')(u8)
+    c8 = Conv2D(128, 3, activation='relu', padding='same')(c8)
+    u9 = Conv2DTranspose(64, 2, strides=2, padding='same')(c8)
+    att9 = attention_block(u9, c1, 32)
+    u9 = concatenate([u9, att9])
+    u9 = keras.layers.Dropout(0.1)(u9)
+    c9 = Conv2D(64, 3, activation='relu', padding='same')(u9)
+    c9 = Conv2D(64, 3, activation='relu', padding='same')(c9)
+    # Output layer - preserving your original conditional logic
+    if num_classes == 1:
+        outputs = Conv2D(1, 1, activation='sigmoid')(c9)
+    else:
+        outputs = Conv2D(num_classes, 1, activation='softmax')(c9)
+    return Model(inputs, outputs)

models/for_WMH_Vent/model_training_scripts/base_runner_all.py ADDED Viewed

	@@ -0,0 +1,23 @@

+import subprocess
+import sys
+import numpy as np
+# Run scripts one after another
+for fold in range(4):
+    # Skip folds:
+    # if fold in list(np.array([0])):
+    #     continue
+    for variant in range(5):
+        # Skip variants:
+        if variant in list(np.array([0])):
+            continue
+        # subprocess.run([sys.executable, "p4_run_experiments_all.py", "--variant", str(variant), "--fold", str(fold), "--scenario", "standard_3class"])

models/for_WMH_Vent/model_training_scripts/dlv3_unet_model.py ADDED Viewed

	@@ -0,0 +1,198 @@

+###################### Libraries ######################
+# Deep Learning
+import tensorflow as tf
+import keras
+from keras.models import Model, load_model
+from keras.layers import Input, Conv2D, MaxPooling2D, Conv2DTranspose, concatenate
+from keras import backend as K
+from tensorflow.keras import layers, optimizers, callbacks
+from keras.utils import to_categorical
+def build_deeplabv3_unet_3class(input_shape=(256, 256, 1), num_classes=3):
+    """
+    Standard DeepLabV3+ implementation with ResNet-50 backbone
+    Following the original paper: "Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation"
+    """
+    def conv_block(x, filters, kernel_size=3, strides=1, dilation_rate=1, use_bias=False, name=None):
+        """Standard convolution block with BN and ReLU"""
+        x = layers.Conv2D(filters, kernel_size, strides=strides, padding='same',
+                         dilation_rate=dilation_rate, use_bias=use_bias, name=name)(x)
+        # x = layers.BatchNormalization()(x)
+        x = layers.Activation('relu')(x)
+        return x
+    def bottleneck_residual_block(x, filters, strides=1, dilation_rate=1, projection_shortcut=False, name_prefix=""):
+        """ResNet-50 bottleneck block with optional atrous convolution"""
+        shortcut = x
+        # Projection shortcut if needed
+        if projection_shortcut:
+            shortcut = layers.Conv2D(filters * 4, 1, strides=strides, use_bias=False,
+                                   name=f"{name_prefix}_0_conv")(shortcut)
+            # shortcut = layers.BatchNormalization(name=f"{name_prefix}_0_bn")(shortcut)
+        # Bottleneck layers
+        x = layers.Conv2D(filters, 1, use_bias=False, name=f"{name_prefix}_1_conv")(x)
+        # x = layers.BatchNormalization(name=f"{name_prefix}_1_bn")(x)
+        x = layers.Activation('relu')(x)
+        x = layers.Conv2D(filters, 3, strides=strides, padding='same',
+                         dilation_rate=dilation_rate, use_bias=False, name=f"{name_prefix}_2_conv")(x)
+        # x = layers.BatchNormalization(name=f"{name_prefix}_2_bn")(x)
+        x = layers.Activation('relu')(x)
+        x = layers.Conv2D(filters * 4, 1, use_bias=False, name=f"{name_prefix}_3_conv")(x)
+        # x = layers.BatchNormalization(name=f"{name_prefix}_3_bn")(x)
+        x = layers.Add()([shortcut, x])
+        x = layers.Activation('relu')(x)
+        return x
+    def aspp_block(x, filters=256):
+        """Atrous Spatial Pyramid Pooling with proper implementation"""
+        # ASPP branches
+        # 1x1 convolution
+        b1 = layers.Conv2D(filters, 1, use_bias=False, name='aspp_1x1')(x)
+        # b1 = layers.BatchNormalization(name='aspp_1x1_bn')(b1)
+        b1 = layers.Activation('relu')(b1)
+        # 3x3 convolution with rate = 6
+        b2 = layers.Conv2D(filters, 3, padding='same', dilation_rate=6, use_bias=False, name='aspp_3x3_6')(x)
+        # b2 = layers.BatchNormalization(name='aspp_3x3_6_bn')(b2)
+        b2 = layers.Activation('relu')(b2)
+        # 3x3 convolution with rate = 12
+        b3 = layers.Conv2D(filters, 3, padding='same', dilation_rate=12, use_bias=False, name='aspp_3x3_12')(x)
+        # b3 = layers.BatchNormalization(name='aspp_3x3_12_bn')(b3)
+        b3 = layers.Activation('relu')(b3)
+        # 3x3 convolution with rate = 18
+        b4 = layers.Conv2D(filters, 3, padding='same', dilation_rate=18, use_bias=False, name='aspp_3x3_18')(x)
+        # b4 = layers.BatchNormalization(name='aspp_3x3_18_bn')(b4)
+        b4 = layers.Activation('relu')(b4)
+        # Image-level features (Global Average Pooling) - Simplified approach
+        # Get input spatial dimensions
+        input_shape = tf.shape(x)
+        h, w = input_shape[1], input_shape[2]
+        b5 = layers.GlobalAveragePooling2D(name='aspp_gap')(x)
+        b5 = layers.Reshape((1, 1, -1))(b5)
+        b5 = layers.Conv2D(filters, 1, use_bias=False, name='aspp_gap_conv')(b5)
+        # b5 = layers.BatchNormalization(name='aspp_gap_bn')(b5)
+        b5 = layers.Activation('relu')(b5)
+        # Use a resize function that handles KerasTensors properly
+        def resize_to_input_shape(args):
+            features, spatial_shape = args
+            return tf.image.resize(features, spatial_shape, method='bilinear')
+        b5 = layers.Lambda(resize_to_input_shape, name='aspp_gap_resize')([b5, [h, w]])
+        # Concatenate all branches
+        concat_features = layers.Concatenate(name='aspp_concat')([b1, b2, b3, b4, b5])
+        # Final 1x1 convolution
+        output = layers.Conv2D(filters, 1, use_bias=False, name='aspp_final_conv')(concat_features)
+        # output = layers.BatchNormalization(name='aspp_final_bn')(output)
+        output = layers.Activation('relu')(output)
+        output = layers.Dropout(0.1, name='aspp_dropout')(output)
+        return output
+    # Input layer
+    inputs = layers.Input(input_shape, name='input')
+    # ==================== ENCODER (ResNet-50 Backbone) ====================
+    # Initial convolution
+    x = layers.Conv2D(64, 7, strides=2, padding='same', use_bias=False, name='conv1')(inputs)
+    # x = layers.BatchNormalization(name='conv1_bn')(x)
+    x = layers.Activation('relu')(x)
+    x = layers.MaxPooling2D(3, strides=2, padding='same', name='pool1')(x)
+    # Stage 1 (conv2_x) - Low-level features for decoder
+    x = bottleneck_residual_block(x, 64, strides=1, projection_shortcut=True, name_prefix='conv2_block1')
+    x = bottleneck_residual_block(x, 64, name_prefix='conv2_block2')
+    low_level_features = bottleneck_residual_block(x, 64, name_prefix='conv2_block3')
+    # Stage 2 (conv3_x)
+    x = bottleneck_residual_block(low_level_features, 128, strides=2, projection_shortcut=True, name_prefix='conv3_block1')
+    x = bottleneck_residual_block(x, 128, name_prefix='conv3_block2')
+    x = bottleneck_residual_block(x, 128, name_prefix='conv3_block3')
+    x = bottleneck_residual_block(x, 128, name_prefix='conv3_block4')
+    # Stage 3 (conv4_x) - With atrous convolution
+    x = bottleneck_residual_block(x, 256, strides=1, dilation_rate=2, projection_shortcut=True, name_prefix='conv4_block1')
+    x = bottleneck_residual_block(x, 256, dilation_rate=2, name_prefix='conv4_block2')
+    x = bottleneck_residual_block(x, 256, dilation_rate=2, name_prefix='conv4_block3')
+    x = bottleneck_residual_block(x, 256, dilation_rate=2, name_prefix='conv4_block4')
+    x = bottleneck_residual_block(x, 256, dilation_rate=2, name_prefix='conv4_block5')
+    x = bottleneck_residual_block(x, 256, dilation_rate=2, name_prefix='conv4_block6')
+    # Stage 4 (conv5_x) - With higher atrous rate
+    x = bottleneck_residual_block(x, 512, strides=1, dilation_rate=4, projection_shortcut=True, name_prefix='conv5_block1')
+    x = bottleneck_residual_block(x, 512, dilation_rate=4, name_prefix='conv5_block2')
+    x = bottleneck_residual_block(x, 512, dilation_rate=4, name_prefix='conv5_block3')
+    # ==================== ASPP MODULE ====================
+    x = aspp_block(x, filters=256)
+    # ==================== DECODER ====================
+    # Use fixed upsampling - the spatial relationship should be predictable
+    # ASPP output is at 1/16 resolution, low_level_features at 1/4 resolution
+    # So we need 4x upsampling to match
+    x = layers.UpSampling2D(size=(4, 4), interpolation='bilinear', name='decoder_upsample1')(x)
+    # Process low-level features
+    low_level_features = layers.Conv2D(48, 1, use_bias=False, name='decoder_low_level_conv')(low_level_features)
+    # low_level_features = layers.BatchNormalization(name='decoder_low_level_bn')(low_level_features)
+    low_level_features = layers.Activation('relu')(low_level_features)
+    # If there's still a size mismatch, crop or pad to match
+    def match_spatial_dims(tensors):
+        high_level, low_level = tensors
+        # Get shapes
+        high_shape = tf.shape(high_level)
+        low_shape = tf.shape(low_level)
+        # Crop high_level to match low_level if it's larger
+        high_level_matched = high_level[:, :low_shape[1], :low_shape[2], :]
+        return high_level_matched, low_level
+    x_matched, low_level_matched = layers.Lambda(match_spatial_dims, name='match_dims')([x, low_level_features])
+    # Concatenate high-level and low-level features
+    x = layers.Concatenate(name='decoder_concat')([x_matched, low_level_matched])
+    # Refine features
+    x = layers.Conv2D(256, 3, padding='same', use_bias=False, name='decoder_conv1')(x)
+    # x = layers.BatchNormalization(name='decoder_conv1_bn')(x)
+    x = layers.Activation('relu')(x)
+    x = layers.Dropout(0.1, name='decoder_dropout1')(x)  # Light regularization
+    x = layers.Conv2D(256, 3, padding='same', use_bias=False, name='decoder_conv2')(x)
+    # x = layers.BatchNormalization(name='decoder_conv2_bn')(x)
+    x = layers.Activation('relu')(x)
+    x = layers.Dropout(0.1, name='decoder_dropout2')(x)
+    # Final upsampling to original resolution (4x upsampling)
+    x = layers.UpSampling2D(size=(4, 4), interpolation='bilinear', name='decoder_upsample2')(x)
+    # ==================== OUTPUT ====================
+    # Output layer - preserving your original conditional logic
+    if num_classes == 1:
+        outputs = layers.Conv2D(1, 1, activation='sigmoid', name='output')(x)
+    else:
+        outputs = layers.Conv2D(num_classes, 1, activation='softmax', name='output')(x)
+    # Create model
+    model = keras.Model(inputs, outputs, name='DeepLabV3Plus_ResNet50')
+    return model

models/for_WMH_Vent/model_training_scripts/dlv3_unet_model_GN.py ADDED Viewed

	@@ -0,0 +1,247 @@

+###################### Libraries ######################
+# Deep Learning
+import tensorflow as tf
+import keras
+from keras.models import Model, load_model
+from keras.layers import Input, Conv2D, MaxPooling2D, Conv2DTranspose, concatenate
+from keras import backend as K
+from tensorflow.keras import layers, optimizers, callbacks
+from keras.utils import to_categorical
+def build_deeplabv3_unet_3class(input_shape=(256, 256, 1), num_classes=3):
+    """
+    DeepLabV3+ with ResNet-50 backbone.
+    Key fix over the original:
+      - All BatchNormalization replaced with GroupNormalization (groups=8).
+        GroupNorm is batch-size independent, so inference statistics are
+        identical whether training=True or training=False — no more need to
+        force training=True at inference time.
+    Input:  single-channel (grayscale) MRI images  →  (H, W, 1)
+    Output: per-pixel class probabilities           →  (H, W, num_classes)
+            or binary mask                          →  (H, W, 1)  when num_classes==1
+    Reference:
+        "Encoder-Decoder with Atrous Separable Convolution for
+         Semantic Image Segmentation", Chen et al. 2018.
+    """
+    # ------------------------------------------------------------------
+    # Helper: GroupNorm drop-in for BatchNorm
+    # groups=8 works well for filter counts ≥ 32 that are multiples of 8.
+    # ------------------------------------------------------------------
+    def group_norm(name=None):
+        return layers.GroupNormalization(groups=4, name=name)
+    # ------------------------------------------------------------------
+    def conv_block(x, filters, kernel_size=3, strides=1,
+                   dilation_rate=1, use_bias=False, name=None):
+        """Standard convolution block with GroupNorm and ReLU."""
+        x = layers.Conv2D(
+            filters, kernel_size, strides=strides, padding='same',
+            dilation_rate=dilation_rate, use_bias=use_bias, name=name
+        )(x)
+        x = group_norm()(x)
+        x = layers.Activation('relu')(x)
+        return x
+    # ------------------------------------------------------------------
+    def bottleneck_residual_block(x, filters, strides=1, dilation_rate=1,
+                                  projection_shortcut=False, name_prefix=""):
+        """ResNet-50 bottleneck block with optional atrous convolution."""
+        shortcut = x
+        # Projection shortcut if dimensions change
+        if projection_shortcut:
+            shortcut = layers.Conv2D(
+                filters * 4, 1, strides=strides, use_bias=False,
+                name=f"{name_prefix}_0_conv"
+            )(shortcut)
+            shortcut = group_norm(name=f"{name_prefix}_0_gn")(shortcut)
+        # 1×1 → 3×3 (possibly atrous) → 1×1  bottleneck
+        x = layers.Conv2D(filters, 1, use_bias=False,
+                          name=f"{name_prefix}_1_conv")(x)
+        x = group_norm(name=f"{name_prefix}_1_gn")(x)
+        x = layers.Activation('relu')(x)
+        x = layers.Conv2D(
+            filters, 3, strides=strides, padding='same',
+            dilation_rate=dilation_rate, use_bias=False,
+            name=f"{name_prefix}_2_conv"
+        )(x)
+        x = group_norm(name=f"{name_prefix}_2_gn")(x)
+        x = layers.Activation('relu')(x)
+        x = layers.Conv2D(filters * 4, 1, use_bias=False,
+                          name=f"{name_prefix}_3_conv")(x)
+        x = group_norm(name=f"{name_prefix}_3_gn")(x)
+        x = layers.Add()([shortcut, x])
+        x = layers.Activation('relu')(x)
+        return x
+    # ------------------------------------------------------------------
+    def aspp_block(x, filters=256):
+        """Atrous Spatial Pyramid Pooling."""
+        # Branch 1 — 1×1 conv
+        b1 = layers.Conv2D(filters, 1, use_bias=False, name='aspp_1x1')(x)
+        b1 = group_norm(name='aspp_1x1_gn')(b1)
+        b1 = layers.Activation('relu')(b1)
+        # Branch 2 — 3×3, rate=6
+        b2 = layers.Conv2D(filters, 3, padding='same', dilation_rate=6,
+                           use_bias=False, name='aspp_3x3_6')(x)
+        b2 = group_norm(name='aspp_3x3_6_gn')(b2)
+        b2 = layers.Activation('relu')(b2)
+        # Branch 3 — 3×3, rate=12
+        b3 = layers.Conv2D(filters, 3, padding='same', dilation_rate=12,
+                           use_bias=False, name='aspp_3x3_12')(x)
+        b3 = group_norm(name='aspp_3x3_12_gn')(b3)
+        b3 = layers.Activation('relu')(b3)
+        # Branch 4 — 3×3, rate=18
+        b4 = layers.Conv2D(filters, 3, padding='same', dilation_rate=18,
+                           use_bias=False, name='aspp_3x3_18')(x)
+        b4 = group_norm(name='aspp_3x3_18_gn')(b4)
+        b4 = layers.Activation('relu')(b4)
+        # Branch 5 — image-level global context via GAP + resize
+        input_shape_dyn = tf.shape(x)
+        h, w = input_shape_dyn[1], input_shape_dyn[2]
+        b5 = layers.GlobalAveragePooling2D(name='aspp_gap')(x)
+        b5 = layers.Reshape((1, 1, -1))(b5)
+        b5 = layers.Conv2D(filters, 1, use_bias=False,
+                           name='aspp_gap_conv')(b5)
+        b5 = group_norm(name='aspp_gap_gn')(b5)
+        b5 = layers.Activation('relu')(b5)
+        b5 = layers.Lambda(
+            lambda args: tf.image.resize(args[0], args[1], method='bilinear'),
+            name='aspp_gap_resize'
+        )([b5, [h, w]])
+        # Fuse all branches
+        concat = layers.Concatenate(name='aspp_concat')([b1, b2, b3, b4, b5])
+        out = layers.Conv2D(filters, 1, use_bias=False,
+                            name='aspp_final_conv')(concat)
+        out = group_norm(name='aspp_final_gn')(out)
+        out = layers.Activation('relu')(out)
+        out = layers.Dropout(0.1, name='aspp_dropout')(out)
+        return out
+    # ==================================================================
+    # INPUT — grayscale, single channel
+    # ==================================================================
+    inputs = layers.Input(input_shape, name='input')   # (H, W, 1)
+    # ==================================================================
+    # ENCODER — ResNet-50 backbone
+    # ==================================================================
+    # Stem
+    x = layers.Conv2D(64, 7, strides=2, padding='same',
+                      use_bias=False, name='conv1')(inputs)
+    x = group_norm(name='conv1_gn')(x)
+    x = layers.Activation('relu')(x)
+    x = layers.MaxPooling2D(3, strides=2, padding='same', name='pool1')(x)
+    # Stage 1 — conv2_x  (output stride 4 → low-level features for decoder)
+    x = bottleneck_residual_block(x, 64, strides=1,
+                                  projection_shortcut=True,
+                                  name_prefix='conv2_block1')
+    x = bottleneck_residual_block(x, 64, name_prefix='conv2_block2')
+    low_level_features = bottleneck_residual_block(x, 64,
+                                                   name_prefix='conv2_block3')
+    # Stage 2 — conv3_x  (output stride 8)
+    x = bottleneck_residual_block(low_level_features, 128, strides=2,
+                                  projection_shortcut=True,
+                                  name_prefix='conv3_block1')
+    x = bottleneck_residual_block(x, 128, name_prefix='conv3_block2')
+    x = bottleneck_residual_block(x, 128, name_prefix='conv3_block3')
+    x = bottleneck_residual_block(x, 128, name_prefix='conv3_block4')
+    # Stage 3 — conv4_x  (atrous rate=2, keeps stride at 8)
+    x = bottleneck_residual_block(x, 256, strides=1, dilation_rate=2,
+                                  projection_shortcut=True,
+                                  name_prefix='conv4_block1')
+    for i in range(2, 7):
+        x = bottleneck_residual_block(x, 256, dilation_rate=2,
+                                      name_prefix=f'conv4_block{i}')
+    # Stage 4 — conv5_x  (atrous rate=4, keeps stride at 8)
+    x = bottleneck_residual_block(x, 512, strides=1, dilation_rate=4,
+                                  projection_shortcut=True,
+                                  name_prefix='conv5_block1')
+    x = bottleneck_residual_block(x, 512, dilation_rate=4,
+                                  name_prefix='conv5_block2')
+    x = bottleneck_residual_block(x, 512, dilation_rate=4,
+                                  name_prefix='conv5_block3')
+    # ==================================================================
+    # ASPP MODULE
+    # ==================================================================
+    x = aspp_block(x, filters=256)
+    # ==================================================================
+    # DECODER
+    # ==================================================================
+    # 4× upsample to reach low-level feature resolution (output stride 4)
+    x = layers.UpSampling2D(size=(4, 4), interpolation='bilinear',
+                            name='decoder_upsample1')(x)
+    # Reduce low-level feature channels to 48  (as in the original paper)
+    low_level_features = layers.Conv2D(
+        48, 1, use_bias=False, name='decoder_low_level_conv'
+    )(low_level_features)
+    low_level_features = group_norm(name='decoder_low_level_gn')(low_level_features)
+    low_level_features = layers.Activation('relu')(low_level_features)
+    # Align spatial dims in case of any off-by-one from pooling
+    def match_spatial_dims(tensors):
+        high_level, low_level = tensors
+        low_shape = tf.shape(low_level)
+        return high_level[:, :low_shape[1], :low_shape[2], :], low_level
+    x_matched, low_matched = layers.Lambda(
+        match_spatial_dims, name='match_dims'
+    )([x, low_level_features])
+    # Fuse high-level and low-level features
+    x = layers.Concatenate(name='decoder_concat')([x_matched, low_matched])
+    x = layers.Conv2D(256, 3, padding='same', use_bias=False,
+                      name='decoder_conv1')(x)
+    x = group_norm(name='decoder_conv1_gn')(x)
+    x = layers.Activation('relu')(x)
+    x = layers.Dropout(0.1, name='decoder_dropout1')(x)
+    x = layers.Conv2D(256, 3, padding='same', use_bias=False,
+                      name='decoder_conv2')(x)
+    x = group_norm(name='decoder_conv2_gn')(x)
+    x = layers.Activation('relu')(x)
+    x = layers.Dropout(0.1, name='decoder_dropout2')(x)
+    # Final 4× upsample back to original resolution
+    x = layers.UpSampling2D(size=(4, 4), interpolation='bilinear',
+                            name='decoder_upsample2')(x)
+    # ==================================================================
+    # OUTPUT
+    # ==================================================================
+    if num_classes == 1:
+        # Binary segmentation  →  sigmoid, single-channel mask
+        outputs = layers.Conv2D(1, 1, activation='sigmoid', name='output')(x)
+    else:
+        # Multi-class segmentation  →  softmax over num_classes channels
+        outputs = layers.Conv2D(num_classes, 1, activation='softmax',
+                                name='output')(x)
+    model = keras.Model(inputs, outputs, name='DeepLabV3Plus_ResNet50_GN')
+    return model

models/for_WMH_Vent/model_training_scripts/p4_compute_class_weights.py ADDED Viewed

	@@ -0,0 +1,353 @@

+"""
+P4 - Utility script to calculate inverse frequency weights for class balancing
+Usage:
+    python p4_compute_class_weights.py --fold 0 --scenario 4class --preprocessing standard
+Output:
+    Saves class weights to JSON file for reproducibility
+    Prints weights for use in training
+Authors:
+"Mahdi Bashiri Bawil, Mousa Shamsi, Abolhassan Shakeri Bavil"
+Developer:
+"Mahdi Bashiri Bawil"
+"""
+import numpy as np
+import json
+from pathlib import Path
+from tqdm import tqdm
+import argparse
+# Import data loader
+from p4_data_loader import DataConfig, P2DataLoader
+def compute_class_frequencies(dataset, num_classes, total_samples=None):
+    """
+    Compute class frequencies from dataset
+    Args:
+        dataset: TensorFlow dataset yielding (paired_input, target_mask)
+        num_classes: Number of classes (3 or 4)
+        total_samples: Total number of samples (for progress bar)
+    Returns:
+        class_pixel_counts: Array of pixel counts per class
+        total_pixels: Total number of pixels analyzed
+    """
+    class_pixel_counts = np.zeros(num_classes, dtype=np.int64)
+    total_pixels = 0
+    print(f"Computing class frequencies for {num_classes}-class scenario...")
+    iterator = tqdm(dataset, total=total_samples, desc="Processing") if total_samples else dataset
+    for paired_input, target_mask, _, _ in iterator:
+        # target_mask shape: (batch_size, 256, 256)
+        masks = target_mask.numpy()
+        for mask in masks:
+            # Count pixels for each class
+            for class_id in range(num_classes):
+                class_pixel_counts[class_id] += np.sum(mask == class_id)
+            total_pixels += mask.size
+    return class_pixel_counts, total_pixels
+def compute_inverse_frequency_weights(class_pixel_counts, num_classes):
+    """
+    Compute inverse frequency weights with normalization
+    Args:
+        class_pixel_counts: Array of pixel counts per class
+        num_classes: Number of classes
+    Returns:
+        class_weights: Normalized inverse frequency weights
+        class_frequencies: Class frequencies (for reference)
+    """
+    total_pixels = np.sum(class_pixel_counts)
+    # Class frequencies
+    class_frequencies = class_pixel_counts / total_pixels
+    # Inverse frequency (with small epsilon to avoid division by zero)
+    epsilon = 1e-6
+    inverse_freq = 1.0 / (class_frequencies + epsilon)
+    # Normalize weights to sum = num_classes
+    # This keeps weights in a reasonable range while maintaining relative importance
+    class_weights = inverse_freq / np.sum(inverse_freq) * num_classes
+    return class_weights, class_frequencies
+def compute_and_save_class_weights(fold_id, class_scenario, preprocessing,
+                                   output_dir='class_weights'):
+    """
+    Compute class weights for a specific fold and scenario
+    Args:
+        fold_id: Fold number (0-4)
+        class_scenario: '3class' or '4class'
+        preprocessing: 'standard' or 'zoomed'
+        output_dir: Directory to save weights
+    Returns:
+        Dictionary with weights and statistics
+    """
+    print("\n" + "="*70)
+    print(f"COMPUTING CLASS WEIGHTS")
+    print("="*70)
+    print(f"Fold: {fold_id}")
+    print(f"Scenario: {class_scenario}")
+    print(f"Preprocessing: {preprocessing}")
+    print("="*70 + "\n")
+    # Initialize data loader
+    config = DataConfig()
+    data_loader = P2DataLoader(config)
+    # Determine number of classes
+    num_classes = 3 if class_scenario == '3class' else 4
+    # Load training dataset
+    print("Loading training dataset...")
+    train_dataset = data_loader.create_dataset_for_fold(
+        fold_id=fold_id,
+        split='train',
+        preprocessing=preprocessing,
+        class_scenario=class_scenario,
+        batch_size=8,  # Larger batch for faster processing
+        shuffle=False  # No need to shuffle for counting
+    )
+    # Get dataset size
+    train_size = sum(1 for _ in train_dataset)
+    print(f"Training samples: {train_size}")
+    # Recreate dataset after consuming
+    train_dataset = data_loader.create_dataset_for_fold(
+        fold_id=fold_id,
+        split='train',
+        preprocessing=preprocessing,
+        class_scenario=class_scenario,
+        batch_size=8,
+        shuffle=False
+    )
+    # Compute class frequencies
+    class_pixel_counts, total_pixels = compute_class_frequencies(
+        train_dataset, num_classes, train_size
+    )
+    # Compute inverse frequency weights
+    class_weights, class_frequencies = compute_inverse_frequency_weights(
+        class_pixel_counts, num_classes
+    )
+    # Print results
+    print("\n" + "="*70)
+    print("RESULTS")
+    print("="*70)
+    class_names = {
+        3: ['Background', 'Ventricles', 'Abnormal WMH'],
+        4: ['Background', 'Ventricles', 'Normal WMH', 'Abnormal WMH']
+    }
+    print(f"\nTotal pixels analyzed: {total_pixels:,}")
+    print(f"\nClass Statistics:")
+    print("-" * 70)
+    for i in range(num_classes):
+        print(f"Class {i} ({class_names[num_classes][i]}):")
+        print(f"  Pixel count:  {class_pixel_counts[i]:,}")
+        print(f"  Frequency:    {class_frequencies[i]:.6f} ({class_frequencies[i]*100:.2f}%)")
+        print(f"  Weight:       {class_weights[i]:.4f}")
+        print()
+    # Save to JSON
+    output_path = Path(output_dir)
+    output_path.mkdir(exist_ok=True)
+    results = {
+        'fold_id': fold_id,
+        'class_scenario': class_scenario,
+        'preprocessing': preprocessing,
+        'num_classes': num_classes,
+        'total_pixels': int(total_pixels),
+        'class_pixel_counts': class_pixel_counts.tolist(),
+        'class_frequencies': class_frequencies.tolist(),
+        'class_weights': class_weights.tolist(),
+        'class_names': class_names[num_classes]
+    }
+    filename = f"class_weights_fold{fold_id}_{preprocessing}_{class_scenario}.json"
+    filepath = output_path / filename
+    with open(filepath, 'w') as f:
+        json.dump(results, f, indent=2)
+    print("="*70)
+    print(f"✅ Class weights saved to: {filepath}")
+    print("="*70)
+    # Print weights in format ready for code
+    print("\nFor use in training script:")
+    print("-" * 70)
+    print(f"class_weights = tf.constant({class_weights.tolist()}, dtype=tf.float32)")
+    print()
+    return results
+def compute_all_scenarios_for_fold(fold_id):
+    """
+    Compute class weights for all 4 scenarios of a given fold
+    Args:
+        fold_id: Fold number (0-4)
+    """
+    scenarios = [
+        {'preprocessing': 'standard', 'class_scenario': '3class'},
+        {'preprocessing': 'standard', 'class_scenario': '4class'},
+        {'preprocessing': 'zoomed', 'class_scenario': '3class'},
+        {'preprocessing': 'zoomed', 'class_scenario': '4class'},
+    ]
+    all_results = {}
+    for scenario in scenarios:
+        results = compute_and_save_class_weights(
+            fold_id=fold_id,
+            class_scenario=scenario['class_scenario'],
+            preprocessing=scenario['preprocessing']
+        )
+        key = f"{scenario['preprocessing']}_{scenario['class_scenario']}"
+        all_results[key] = results
+        print("\n" + "="*70 + "\n")
+    return all_results
+def load_class_weights(fold_id, class_scenario, preprocessing, weights_dir='class_weights'):
+    """
+    Load previously computed class weights
+    Args:
+        fold_id: Fold number (0-4)
+        class_scenario: '3class' or '4class'
+        preprocessing: 'standard' or 'zoomed'
+        weights_dir: Directory containing weights files
+    Returns:
+        class_weights: NumPy array of weights
+    """
+    weights_path = Path(weights_dir)
+    filename = f"class_weights_fold{fold_id}_{preprocessing}_{class_scenario}.json"
+    filepath = weights_path / filename
+    if not filepath.exists():
+        raise FileNotFoundError(
+            f"Class weights not found: {filepath}\n"
+            f"Run compute_and_save_class_weights() first."
+        )
+    with open(filepath, 'r') as f:
+        results = json.load(f)
+    class_weights = np.array(results['class_weights'], dtype=np.float32)
+    return class_weights
+def main():
+    """Main entry point with argument parsing"""
+    parser = argparse.ArgumentParser(
+        description='Compute class weights from training data',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+    # Single scenario
+    python p2_compute_class_weights.py --fold 0 --scenario 4class --preprocessing standard
+    # All scenarios for one fold
+    python p2_compute_class_weights.py --fold 0 --all
+    # All folds (for completeness)
+    python p2_compute_class_weights.py --all-folds
+        """
+    )
+    parser.add_argument(
+        '--fold',
+        type=int,
+        choices=[0, 1, 2, 3, 4],
+        help='Fold number (0-4)'
+    )
+    parser.add_argument(
+        '--scenario',
+        type=str,
+        choices=['3class', '4class'],
+        help='Class scenario'
+    )
+    parser.add_argument(
+        '--preprocessing',
+        type=str,
+        choices=['standard', 'zoomed'],
+        help='Preprocessing type'
+    )
+    parser.add_argument(
+        '--all',
+        action='store_true',
+        help='Compute for all scenarios of specified fold'
+    )
+    parser.add_argument(
+        '--all-folds',
+        action='store_true',
+        help='Compute for all scenarios of all folds'
+    )
+    args = parser.parse_args()
+    # Validate arguments
+    if args.all_folds:
+        # Compute for all folds
+        for fold_id in range(5):
+            print(f"\n{'='*70}")
+            print(f"PROCESSING FOLD {fold_id}")
+            print(f"{'='*70}\n")
+            compute_all_scenarios_for_fold(fold_id)
+    elif args.all:
+        # Compute all scenarios for one fold
+        if args.fold is None:
+            parser.error("--fold is required when using --all")
+        compute_all_scenarios_for_fold(args.fold)
+    else:
+        # Compute single scenario
+        if args.fold is None or args.scenario is None or args.preprocessing is None:
+            parser.error("--fold, --scenario, and --preprocessing are required")
+        compute_and_save_class_weights(
+            fold_id=args.fold,
+            class_scenario=args.scenario,
+            preprocessing=args.preprocessing
+        )
+if __name__ == "__main__":
+    main()

models/for_WMH_Vent/model_training_scripts/p4_data_loader.py ADDED Viewed

	@@ -0,0 +1,912 @@

+"""
+P4 Article - Data Loading System
+Complete implementation for brain segmentation experiments
+WMH and Ventricles Segmentation with U-Net Models - Journal Paper Implementation
+Three-class segmentation: Background vs Ventricles vs Abnormal WMH
+Professional results saving and visualization for publication
+This relates to our article:
+"Deep Learning-Based Neuroanatomical Profiling Reveals Detailed Brain Changes:
+A Large-Scale Multiple Sclerosis Study"
+Features:
+- Load FLAIR images and individual mask files from Cohort directory
+- Support both Local_SAI (MS3SEG) and Public_MSSEG (MSSEG2016) datasets
+- Handle standard and zoomed preprocessing variants
+- Combine masks into 3-class or 4-class format
+- Create paired inputs: [FLAIR | mask] concatenated (256x512)
+- Patient-stratified K-fold cross-validation
+- TensorFlow dataset creation with proper batching
+Authors:
+"Mahdi Bashiri Bawil, Mousa Shamsi, Abolhassan Shakeri Bavil"
+Developer:
+"Mahdi Bashiri Bawil"
+"""
+import numpy as np
+import os
+from pathlib import Path
+from typing import Tuple, List, Dict, Optional
+import json
+from sklearn.model_selection import KFold
+from tqdm import tqdm
+import cv2 as cv
+# Deep Learning
+import tensorflow as tf
+###################### Configuration ######################
+class DataConfig:
+    """Data configuration for P4 experiments"""
+    def __init__(self):
+        # Base paths
+        self.cohort_dir = Path("/mnt/e/MBashiri/ours_articles/Paper#2/Data/Cohort")  # CHANGE THIS to your actual path of Data Cohort
+        # Dataset configurations
+        self.datasets = {
+            'Local_SAI_updated': {
+                'base_path': self.cohort_dir / 'Local_SAI_updated',
+                'slice_range': (1, 20),  # inclusive range 9,15
+                'patient_prefix_length': 6  # "101228"
+            },
+            'Public_MSSEG': {
+                'base_path': self.cohort_dir / 'Public_MSSEG',
+                'slice_range': (1, 50),  # inclusive range 24,43
+                'patient_prefix_length': 6  # "c01p01"
+            }
+        }
+        # Preprocessing variants
+        self.preprocessing_types = ['standard', 'zoomed']
+        # Class scenarios
+        self.class_scenarios = {
+            '3class': {
+                'num_classes': 3,
+                'class_names': ['Background', 'Ventricles', 'Abnormal WMH'],
+                'description': 'Three-class: Background, Ventricles, Abnormal WMH',
+                'class_mapping': {
+                    'background': 0,
+                    'ventricles': 1,
+                    'abnormal_wmh': 2,
+                }
+            },
+            '4class': {
+                'num_classes': 4,
+                'class_names': ['Background', 'Ventricles', 'Normal WMH', 'Abnormal WMH'],
+                'description': 'Four-class: Background, Ventricles, Normal WMH, Abnormal WMH',
+                'class_mapping': {
+                    'background': 0,
+                    'ventricles': 1,
+                    'normal_wmh': 2,
+                    'abnormal_wmh': 3
+                }
+            }
+        }
+        # K-fold parameters
+        self.k_folds = 4
+        self.test_split = 0.2  # 20% for test set
+        self.random_state = 42
+        # Image parameters
+        self.target_size = (256, 256)
+        self.paired_width = 512  # FLAIR (256) + mask (256)
+        # Paths for splits
+        self.splits_dir = Path("data_splits")
+        self.splits_file = self.splits_dir / "concat_fold_assignments.json"
+###################### Helper Functions ######################
+def extract_patient_id(filename: str, prefix_length: int = 6) -> str:
+    """
+    Extract patient ID from filename
+    Args:
+        filename: e.g., "101228_5.npy" or "c01p01_25.png"
+        prefix_length: Number of characters in patient ID
+    Returns:
+        Patient ID: e.g., "101228" or "c01p01"
+    """
+    return filename.split('_')[0][:prefix_length]
+def extract_slice_number(filename: str) -> int:
+    """
+    Extract slice number from filename
+    Args:
+        filename: e.g., "101228_5.npy" or "c01p01_25.png"
+    Returns:
+        Slice number as integer
+    """
+    # Get the part before file extension
+    basename = filename.split('.')[0]
+    # Get the last part after splitting by '_'
+    slice_num = basename.split('_')[-1]
+    return int(slice_num)
+def load_flair_image(flair_path: Path, normalize: bool = False, of_z_score: bool = False) -> np.ndarray:
+    """
+    Load FLAIR image (.png format)
+    Args:
+        flair_path: Path to .png file
+        normalize: Whether to apply z-score normalization
+    Returns:
+        FLAIR image (256, 256, 1) as float32
+    """
+    if of_z_score:
+        # Load NPY: the already z-scored FLAIR image data
+        flair = np.load(str(flair_path).replace('.png','.npy')).astype(np.float32)
+    else:
+        # Load PNG as grayscale
+        flair = cv.imread(str(flair_path), cv.IMREAD_GRAYSCALE).astype(np.float32)
+        # Normalize to [-1, 1]:
+        flair = (flair - np.min(flair)) / (np.max(flair) - np.min(flair))
+        flair = (2 * flair) - 1
+    # Ensure correct shape
+    if len(flair.shape) == 2:
+        flair = np.expand_dims(flair, axis=-1)
+    # Additional normalization if needed (should already be normalized)
+    if normalize and (np.std(flair) > 2.0 or np.abs(np.mean(flair)) > 1.0):
+        # Re-normalize if values seem off
+        flair = (flair - np.mean(flair)) / (np.std(flair) + 1e-7)
+    return flair
+def load_mask_image(mask_path: Path) -> np.ndarray:
+    """
+    Load mask image (.png format)
+    Args:
+        mask_path: Path to .png file
+    Returns:
+        Binary mask (256, 256) as uint8
+    """
+    # Load PNG as grayscale
+    mask = cv.imread(str(mask_path), cv.IMREAD_GRAYSCALE)
+    if mask is None:
+        raise FileNotFoundError(f"Could not load mask: {mask_path}")
+    # Binarize (any non-zero value becomes 1)
+    mask = (mask > 0).astype(np.uint8)
+    return mask
+def combine_masks(vent_mask: np.ndarray,
+                  nwmh_mask: np.ndarray,
+                  abwmh_mask: np.ndarray,
+                  class_scenario: str,
+                  preprocess: bool = False) -> np.ndarray:
+    """
+    Combine individual masks into multi-class format
+    Args:
+        vent_mask: Ventricles mask (256, 256)
+        nwmh_mask: Normal WMH mask (256, 256)
+        abwmh_mask: Abnormal WMH mask (256, 256)
+        class_scenario: '3class' or '4class'
+        preprocess: Boolean turning the morphological preprocessing on or off
+    Returns:
+        Combined mask (256, 256) with class labels
+    """
+    if preprocess:
+        from skimage.morphology import remove_small_objects, binary_erosion, binary_closing, binary_opening, disk, binary_dilation
+        min_object_size = 5
+        closing_kernel_size = 2
+        dilation_kernel_size = 1
+        vent_mask = vent_mask > 0
+        abwmh_mask = abwmh_mask > 0
+        nwmh_mask = nwmh_mask > 0
+        abwmh_mask = binary_closing(abwmh_mask, disk(closing_kernel_size))
+        abwmh_mask = binary_erosion(abwmh_mask, disk(dilation_kernel_size))
+        abwmh_mask = remove_small_objects(abwmh_mask, min_size=min_object_size)
+        nwmh_mask = binary_closing(nwmh_mask, disk(closing_kernel_size))
+        nwmh_mask = binary_erosion(nwmh_mask, disk(dilation_kernel_size))
+        nwmh_mask = remove_small_objects(nwmh_mask, min_size=min_object_size)
+        vent_mask = binary_closing(vent_mask, disk(closing_kernel_size))
+        vent_mask = binary_erosion(vent_mask, disk(dilation_kernel_size))
+        vent_mask = remove_small_objects(vent_mask, min_size=min_object_size)
+        abwmh_mask = abwmh_mask & ~vent_mask
+        nwmh_mask = nwmh_mask & ~vent_mask
+        abwmh_mask = abwmh_mask & ~nwmh_mask
+    if class_scenario == '3class':
+        # Class 0: Background (default)
+        # Class 1: Ventricles
+        # Class 2: Abnormal WMH
+        combined = np.zeros_like(vent_mask, dtype=np.uint8)
+        combined[vent_mask>0] = 1
+        combined[abwmh_mask>0] = 2
+    elif class_scenario == '4class':
+        # Class 0: Background (default)
+        # Class 1: Ventricles
+        # Class 2: Normal WMH
+        # Class 3: Abnormal WMH
+        combined = np.zeros_like(vent_mask, dtype=np.uint8)
+        combined[vent_mask>0] = 1
+        combined[nwmh_mask>0] = 2
+        combined[abwmh_mask>0] = 3
+    else:
+        raise ValueError(f"Unknown class_scenario: {class_scenario}")
+    return combined
+def is_valid_slice(vent_mask: np.ndarray,
+                   nwmh_mask: np.ndarray,
+                   abwmh_mask: np.ndarray) -> bool:
+    """
+    Check if slice has at least one non-empty mask
+    Args:
+        vent_mask: Ventricles mask (256, 256)
+        nwmh_mask: Normal WMH mask (256, 256)
+        abwmh_mask: Abnormal WMH mask (256, 256)
+    Returns:
+        True if at least one mask has non-zero pixels
+    """
+    has_ventricles = np.sum(vent_mask) > 50
+    has_nwmh = np.sum(nwmh_mask) > 50
+    has_abwmh = np.sum(abwmh_mask) > 50
+    # Valid if ANY mask has content
+    return True #  or has_nwmh has_ventricles or has_abwmh #
+def create_paired_input(flair: np.ndarray,
+                                 mask: np.ndarray,
+                                 brain_mask: np.ndarray,
+                                 num_classes: np.ndarray,
+                                 if_bet=False) -> np.ndarray:
+    """
+    Create paired input: [FLAIR | mask] concatenated horizontally
+    Args:
+        flair: FLAIR image (256, 256, 1) float32
+        mask: Combined mask (256, 256) uint8
+    Returns:
+        Paired image (256, 512, 1) float32
+    """
+    # Binarize (any non-zero value becomes 1)
+    brain_mask = brain_mask > 0
+    # Brain extraction
+    if if_bet:
+        # print("\n\t Doing THEEEEEEEEE BET")
+        flair[~brain_mask] = np.min(flair)
+        mask[~brain_mask] = 0
+    # Ensure flair is 3D
+    if len(flair.shape) == 2:
+        flair = np.expand_dims(flair, axis=-1)
+    # Convert mask to float and normalize to [0, 1] range for consistency
+    # For 3-class: 0, 1, 2 -> -1, 0, 1.0
+    # For 4-class: 0, 1, 2, 3 -> -1, -0.333, 0.333, 1.0
+    max_class = num_classes
+    mask_normalized = mask.astype(np.float32)
+    if max_class > 0:
+        mask_normalized = mask_normalized / max_class
+        mask_normalized = (2 * mask_normalized) - 1
+    mask_3d = np.expand_dims(mask_normalized, axis=-1)
+    # Concatenate horizontally: [FLAIR | mask]
+    paired = np.concatenate([flair, mask_3d], axis=1)  # (256, 512, 1)
+    return paired, mask
+###################### Patient Stratified Splitting ######################
+class PatientStratifiedSplitter:
+    """
+    Create patient-stratified train/val/test splits
+    Similar to P6 implementation but adapted for P4 data structure
+    """
+    def __init__(self, config: DataConfig):
+        self.config = config
+        self.config.splits_dir.mkdir(exist_ok=True)
+    def collect_all_patients(self) -> Dict[str, List[str]]:
+        """
+        Collect all unique patient IDs from both datasets
+        Returns:
+            Dictionary mapping dataset_name -> list of patient IDs
+        """
+        all_patients = {}
+        for dataset_name, dataset_config in self.config.datasets.items():
+            patients = set()
+            # Path to FLAIR images (standard preprocessing)
+            flair_dir = dataset_config['base_path'] / 'FLAIR' / 'Preprocessed' / 'images'
+            if not flair_dir.exists():
+                print(f"Warning: {flair_dir} does not exist. Skipping {dataset_name}.")
+                continue
+            # Collect all .png files
+            for flair_file in flair_dir.glob('*.png'):
+                patient_id = extract_patient_id(
+                    flair_file.name,
+                    dataset_config['patient_prefix_length']
+                )
+                patients.add(patient_id)
+            all_patients[dataset_name] = sorted(list(patients))
+            print(f"{dataset_name}: {len(all_patients[dataset_name])} patients")
+        return all_patients
+    def create_patient_stratified_splits(self,
+                                        save: bool = True) -> Dict:
+        """
+        Create patient-stratified K-fold splits
+        Returns:
+            Dictionary containing fold assignments
+        """
+        all_patients = self.collect_all_patients()
+        # Combine patients from both datasets
+        combined_patients = []
+        for dataset_name, patients in all_patients.items():
+            combined_patients.extend(patients)
+        combined_patients = np.array(combined_patients)
+        total_patients = len(combined_patients)
+        print(f"\nTotal unique patients: {total_patients}")
+        # Step 1: Split into train+val (80%) and test (20%)
+        np.random.seed(self.config.random_state)
+        test_size = int(total_patients * self.config.test_split)
+        test_indices = np.random.choice(
+            total_patients,
+            size=test_size,
+            replace=False
+        )
+        test_patients = combined_patients[test_indices]
+        train_val_indices = np.setdiff1d(np.arange(total_patients), test_indices)
+        train_val_patients = combined_patients[train_val_indices]
+        print(f"Test patients: {len(test_patients)}")
+        print(f"Train+Val patients: {len(train_val_patients)}")
+        # Step 2: Create K-fold splits on train+val patients
+        kfold = KFold(
+            n_splits=self.config.k_folds,
+            shuffle=True,
+            random_state=self.config.random_state
+        )
+        fold_assignments = {
+            'metadata': {
+                'total_patients': total_patients,
+                'test_patients': len(test_patients),
+                'trainval_patients': len(train_val_patients),
+                'n_folds': self.config.k_folds,
+                'random_seed': self.config.random_state,
+                'datasets': list(all_patients.keys())
+            },
+            'test_set': {
+                'patients': test_patients.tolist(),
+                'n_patients': len(test_patients)
+            },
+            'folds': {}
+        }
+        for fold_idx, (train_idx, val_idx) in enumerate(kfold.split(train_val_patients)):
+            train_patients_fold = train_val_patients[train_idx]
+            val_patients_fold = train_val_patients[val_idx]
+            fold_assignments['folds'][f'fold_{fold_idx}'] = {
+                'train_patients': train_patients_fold.tolist(),
+                'val_patients': val_patients_fold.tolist(),
+                'n_train': len(train_patients_fold),
+                'n_val': len(val_patients_fold)
+            }
+            print(f"Fold {fold_idx}: Train={len(train_patients_fold)}, Val={len(val_patients_fold)}")
+        # Save to JSON
+        if save:
+            with open(self.config.splits_file, 'w') as f:
+                json.dump(fold_assignments, f, indent=2)
+            print(f"\nâœ… Fold assignments saved to: {self.config.splits_file}")
+        return fold_assignments
+    def load_fold_assignments(self) -> Dict:
+        """Load existing fold assignments from JSON"""
+        if not self.config.splits_file.exists():
+            raise FileNotFoundError(
+                f"Fold assignments not found: {self.config.splits_file}\n"
+                f"Run create_patient_stratified_splits() first."
+            )
+        with open(self.config.splits_file, 'r') as f:
+            fold_assignments = json.load(f)
+        return fold_assignments
+    def verify_patient_separation(self, fold_assignments: Dict) -> bool:
+        """
+        Verify no patient appears in multiple folds or in both train/val
+        Similar to P6's verification logic
+        """
+        print("\n" + "="*60)
+        print("VERIFYING PATIENT SEPARATION")
+        print("="*60)
+        all_issues = []
+        test_patients = set(fold_assignments['test_set']['patients'])
+        # Check 1: No patient in both test and train/val
+        for fold_name, fold_data in fold_assignments['folds'].items():
+            train_patients = set(fold_data['train_patients'])
+            val_patients = set(fold_data['val_patients'])
+            test_train_overlap = test_patients.intersection(train_patients)
+            test_val_overlap = test_patients.intersection(val_patients)
+            if test_train_overlap:
+                issue = f"{fold_name}: Test-Train overlap: {test_train_overlap}"
+                all_issues.append(issue)
+                print(f"❌ {issue}")
+            if test_val_overlap:
+                issue = f"{fold_name}: Test-Val overlap: {test_val_overlap}"
+                all_issues.append(issue)
+                print(f"❌ {issue}")
+        # Check 2: No patient in both train and val within same fold
+        for fold_name, fold_data in fold_assignments['folds'].items():
+            train_patients = set(fold_data['train_patients'])
+            val_patients = set(fold_data['val_patients'])
+            train_val_overlap = train_patients.intersection(val_patients)
+            if train_val_overlap:
+                issue = f"{fold_name}: Train-Val overlap: {train_val_overlap}"
+                all_issues.append(issue)
+                print(f"❌ {issue}")
+        # Check 3: Each patient in validation exactly once
+        all_val_patients = []
+        for fold_data in fold_assignments['folds'].values():
+            all_val_patients.extend(fold_data['val_patients'])
+        val_patient_counts = {}
+        for patient in all_val_patients:
+            val_patient_counts[patient] = val_patient_counts.get(patient, 0) + 1
+        for patient, count in val_patient_counts.items():
+            if count != 1:
+                issue = f"Patient {patient} in validation {count} times (should be 1)"
+                all_issues.append(issue)
+                print(f"❌ {issue}")
+        if not all_issues:
+            print("âœ… All patient separation checks passed")
+            print("âœ… No data leakage detected")
+            return True
+        else:
+            print(f"\n❌ Found {len(all_issues)} issues")
+            return False
+###################### Data Loader ######################
+class P2DataLoader:
+    """
+    Main data loader for P2 experiments
+    Handles loading FLAIR and masks, creating paired inputs, TensorFlow datasets
+    """
+    def __init__(self, config: DataConfig):
+        self.config = config
+    def get_file_paths(self,
+                       patient_id: str,
+                       slice_num: int,
+                       dataset_name: str,
+                       preprocessing: str) -> Dict[str, Path]:
+        """
+        Construct file paths for a given patient-slice
+        Args:
+            patient_id: e.g., "101228" or "c01p01"
+            slice_num: Slice number
+            dataset_name: 'Local_SAI_updated' or 'Public_MSSEG'
+            preprocessing: 'standard' or 'zoomed'
+        Returns:
+            Dictionary with paths to FLAIR and mask files
+        """
+        dataset_config = self.config.datasets[dataset_name]
+        base_path = dataset_config['base_path']
+        # Determine subdirectory based on preprocessing
+        if preprocessing == 'standard':
+            flair_subdir = 'images'
+            gt_subdir = 'images'
+        else:  # zoomed
+            flair_subdir = 'zoomed/images'
+            gt_subdir = 'zoomed/images'
+        # Construct paths
+        flair_path = base_path / 'FLAIR' / 'Preprocessed' / flair_subdir / f'{patient_id}_{slice_num}.png'
+        vent_path = base_path / 'GroundTruth' / gt_subdir / 'Vent_Masks' / f'{patient_id}_{slice_num}.png'
+        nwmh_path = base_path / 'GroundTruth' / gt_subdir / 'nWMH_Masks' / f'{patient_id}_{slice_num}.png'
+        abwmh_path = base_path / 'GroundTruth' / gt_subdir / 'abWMH_Masks' / f'{patient_id}_{slice_num}.png'
+        brain_path = base_path / 'GroundTruth' / gt_subdir / 'Brain_Masks' / f'{patient_id}_{slice_num}.png'
+        # Optional: zooming factors (only for zoomed preprocessing)
+        zoom_factors_path = None
+        if preprocessing == 'zoomed':
+            zoom_factors_path = base_path / 'FLAIR' / 'Preprocessed' / 'zoomed' / 'images' / f'{patient_id}_zooming_factors.npy'
+        return {
+            'flair': flair_path,
+            'vent_mask': vent_path,
+            'nwmh_mask': nwmh_path,
+            'abwmh_mask': abwmh_path,
+            'brain_mask': brain_path,
+            'zoom_factors': zoom_factors_path
+        }
+    def load_single_slice(self,
+                         patient_id: str,
+                         slice_num: int,
+                         dataset_name: str,
+                         preprocessing: str,
+                         class_scenario: str,
+                         of_z_score: bool = True,
+                         if_bet: bool = True,
+                         pre_morph: bool = False) -> Tuple[np.ndarray, np.ndarray]:
+        """
+        Load a single patient-slice and create paired input
+        Args:
+            patient_id: Patient identifier
+            slice_num: Slice number
+            dataset_name: 'Local_SAI_updated' or 'Public_MSSEG'
+            preprocessing: 'standard' or 'zoomed'
+            class_scenario: '3class' or '4class'
+        Returns:
+            Tuple of (paired_input, combined_mask)
+            - paired_input: (256, 512, 1) FLAIR + mask concatenated
+            - combined_mask: (256, 256) multi-class labels
+        """
+        # Class number
+        num_classes = int(class_scenario[0]) - 1
+        # Get file paths
+        paths = self.get_file_paths(patient_id, slice_num, dataset_name, preprocessing)
+        # Load FLAIR
+        flair = load_flair_image(paths['flair'], of_z_score=of_z_score)
+        # Load masks
+        vent_mask = load_mask_image(paths['vent_mask'])
+        nwmh_mask = load_mask_image(paths['nwmh_mask'])
+        abwmh_mask = load_mask_image(paths['abwmh_mask'])
+        brain_mask = load_mask_image(paths['brain_mask'])
+        # Combine masks
+        combined_mask = combine_masks(vent_mask, nwmh_mask, abwmh_mask, class_scenario, preprocess=pre_morph)
+        # Create paired input
+        paired_input, combined_mask = create_paired_input(flair, combined_mask, brain_mask, num_classes=num_classes, if_bet=if_bet)
+        return paired_input, combined_mask
+    def collect_patient_slices(self,
+                            patient_list: List[str],
+                            dataset_name: str,
+                            preprocessing: str) -> List[Tuple[str, int, str]]:
+        """
+        Collect all valid slice files for given patients
+        FILTERS OUT SLICES WITH ALL EMPTY MASKS
+        Args:
+            patient_list: List of patient IDs
+            dataset_name: 'Local_SAI_updated' or 'Public_MSSEG'
+            preprocessing: 'standard' or 'zoomed'
+        Returns:
+            List of tuples (patient_id, slice_num, dataset_name)
+        """
+        dataset_config = self.config.datasets[dataset_name]
+        slice_min, slice_max = dataset_config['slice_range']
+        patient_slices = []
+        skipped_empty = 0
+        for patient_id in patient_list:
+            # Check which dataset this patient belongs to
+            # Try to find patient in current dataset
+            for slice_num in range(slice_min, slice_max + 1):
+                paths = self.get_file_paths(patient_id, slice_num, dataset_name, preprocessing)
+                # Check if all required files exist
+                if (paths['flair'].exists() and
+                    paths['vent_mask'].exists() and
+                    paths['nwmh_mask'].exists() and
+                    paths['abwmh_mask'].exists() and
+                    paths['brain_mask'].exists()):
+                    # VALIDATION: Check if masks are not all empty
+                    try:
+                        vent_mask = load_mask_image(paths['vent_mask'])
+                        nwmh_mask = load_mask_image(paths['nwmh_mask'])
+                        abwmh_mask = load_mask_image(paths['abwmh_mask'])
+                        brain_mask = load_mask_image(paths['brain_mask'])
+                        # Only add if at least one mask has content
+                        if is_valid_slice(vent_mask, nwmh_mask, abwmh_mask):
+                            patient_slices.append((patient_id, slice_num, dataset_name))
+                        else:
+                            skipped_empty += 1
+                    except Exception as e:
+                        print(f"Warning: Could not validate {patient_id}_{slice_num}: {e}")
+                        skipped_empty += 1
+        if skipped_empty > 0:
+            print(f"  ⚠️  Skipped {skipped_empty} slices with empty masks")
+        return patient_slices
+    def create_dataset_for_fold(self,
+                                fold_id: int,
+                                split: str,
+                                preprocessing: str,
+                                class_scenario: str,
+                                batch_size: int = 1,
+                                shuffle: bool = True,
+                                use_z_scored: bool = True,
+                                bet: bool = False) -> tf.data.Dataset:
+        """
+        Create TensorFlow dataset for a specific fold and split
+        Args:
+            fold_id: Fold number (0-4)
+            split: 'train', 'val', or 'test'
+            preprocessing: 'standard' or 'zoomed'
+            class_scenario: '3class' or '4class'
+            batch_size: Batch size
+            shuffle: Whether to shuffle data
+        Returns:
+            tf.data.Dataset yielding (paired_input, combined_mask) batches
+        """
+        # Load fold assignments
+        splitter = PatientStratifiedSplitter(self.config)
+        fold_assignments = splitter.load_fold_assignments()
+        # Get patient list for this split
+        if split == 'test':
+            patient_list = fold_assignments['test_set']['patients']
+        else:
+            fold_key = f'fold_{fold_id}'
+            if split == 'train':
+                patient_list = fold_assignments['folds'][fold_key]['train_patients']
+            elif split == 'val':
+                patient_list = fold_assignments['folds'][fold_key]['val_patients']
+            else:
+                raise ValueError(f"Unknown split: {split}")
+        print(f"\nCreating dataset for fold {fold_id}, split '{split}'")
+        print(f"Patients: {len(patient_list)}")
+        # Collect all patient-slices from both datasets
+        all_patient_slices = []
+        for dataset_name in self.config.datasets.keys():
+            # Filter patient list to only include patients from this dataset
+            # This is done by checking patient ID prefix
+            dataset_patients = [p for p in patient_list]
+            patient_slices = self.collect_patient_slices(
+                dataset_patients,
+                dataset_name,
+                preprocessing
+            )
+            all_patient_slices.extend(patient_slices)
+        print(f"Total slices: {len(all_patient_slices)}")
+        if len(all_patient_slices) == 0:
+            raise ValueError(f"No data found for fold {fold_id}, split '{split}'")
+        # Create TensorFlow dataset
+        def data_generator():
+            """Generator function for tf.data.Dataset"""
+            for patient_id, slice_num, dataset_name in all_patient_slices:
+                try:
+                    paired_input, combined_mask = self.load_single_slice(
+                        patient_id, slice_num, dataset_name,
+                        preprocessing, class_scenario
+                    )
+                    yield paired_input, combined_mask, patient_id, slice_num
+                except Exception as e:
+                    print(f"Error loading {patient_id}_{slice_num}: {e}")
+                    continue
+        # Create dataset
+        dataset = tf.data.Dataset.from_generator(
+            data_generator,
+            output_signature=(
+                tf.TensorSpec(shape=(256, 512, 1), dtype=tf.float32),   # concatenated image
+                tf.TensorSpec(shape=(256, 256), dtype=tf.uint8),        # multi-level mask
+                tf.TensorSpec(shape=(),            dtype=tf.string),    # patient_id
+                tf.TensorSpec(shape=(),            dtype=tf.int32)      # slice_num
+            )
+        )
+        # ── Cache BEFORE shuffle/batch ──────────────────────────────────────
+        # On epoch 1 the generator runs once and all 700 samples are stored
+        # in RAM (~350 MB).  From epoch 2 onward no disk I/O occurs at all.
+        # Placing cache HERE (on unbatched, unshuffled samples) means:
+        #   • The expensive load/decode/combine step is paid only once.
+        #   • Shuffle re-randomises the order freshly each epoch (because
+        #     reshuffle_each_iteration=True is the default).
+        #   • Batch composition therefore differs every epoch as desired.
+        dataset = dataset.cache()
+        # Shuffle if training  (acts on the in-RAM cache every epoch)
+        if shuffle and split == 'train':
+            dataset = dataset.shuffle(
+                buffer_size=len(all_patient_slices),
+                reshuffle_each_iteration=True   # new random order each epoch
+            )
+        # Batch and prefetch
+        dataset = dataset.batch(batch_size)
+        dataset = dataset.prefetch(tf.data.AUTOTUNE)
+        return dataset
+###################### Testing & Validation Functions ######################
+def test_data_loading():
+    """Test data loading functionality"""
+    print("\n" + "="*60)
+    print("TESTING DATA LOADING")
+    print("="*60)
+    config = DataConfig()
+    # Test 1: Create fold assignments
+    print("\n[TEST 1] Creating patient stratified splits...")
+    splitter = PatientStratifiedSplitter(config)
+    fold_assignments = splitter.create_patient_stratified_splits(save=True)
+    # Verify patient separation
+    is_valid = splitter.verify_patient_separation(fold_assignments)
+    if not is_valid:
+        print("❌ Patient separation verification failed!")
+        return False
+    # Test 2: Load a single slice
+    print("\n[TEST 2] Loading single slice...")
+    loader = P2DataLoader(config)
+    # Get a test patient from fold 0 train set
+    test_patient = fold_assignments['folds']['fold_0']['train_patients'][0]
+    # Determine which dataset this patient belongs to
+    if test_patient.startswith('c'):
+        test_dataset = 'Public_MSSEG'
+        test_slice = 25  # Middle of 20-46 range
+    else:
+        test_dataset = 'Local_SAI_updated'
+        test_slice = 10  # Middle of 8-15 range
+    try:
+        paired_input, combined_mask = loader.load_single_slice(
+            test_patient, test_slice, test_dataset,
+            'standard', '4class'
+        )
+        print(f"âœ… Loaded slice {test_patient}_{test_slice}")
+        print(f"   Paired input shape: {paired_input.shape}")
+        print(f"   Combined mask shape: {combined_mask.shape}")
+        print(f"   Mask unique values: {np.unique(combined_mask)}")
+    except Exception as e:
+        print(f"❌ Failed to load slice: {e}")
+        return False
+    # Test 3: Create TensorFlow dataset
+    print("\n[TEST 3] Creating TensorFlow dataset...")
+    try:
+        dataset = loader.create_dataset_for_fold(
+            fold_id=0,
+            split='train',
+            preprocessing='standard',
+            class_scenario='4class',
+            batch_size=2,
+            shuffle=True
+        )
+        # Get first batch
+        for batch_paired, batch_masks in dataset.take(1):
+            print(f"âœ… Created dataset")
+            print(f"   Batch paired input shape: {batch_paired.shape}")
+            print(f"   Batch masks shape: {batch_masks.shape}")
+            print(f"   Paired input dtype: {batch_paired.dtype}")
+            print(f"   Masks dtype: {batch_masks.dtype}")
+    except Exception as e:
+        print(f"❌ Failed to create dataset: {e}")
+        return False
+    print("\n" + "="*60)
+    print("âœ… ALL TESTS PASSED")
+    print("="*60)
+    return True
+###################### Main Execution ######################
+if __name__ == "__main__":
+    # Run tests
+    success = test_data_loading()
+    if success:
+        print("\n" + "="*60)
+        print("DATA LOADER READY FOR USE")
+        print("="*60)
+        print("\nNext steps:")
+        print("1. Verify fold_assignments.json created in data_splits/")
+        print("2. Check that all file paths are correct for your system")
+        print("3. Proceed to model implementation")
+    else:
+        print("\n" + "="*60)
+        print("❌ DATA LOADER TESTS FAILED")
+        print("="*60)
+        print("\nPlease fix the issues above before proceeding")

models/for_WMH_Vent/model_training_scripts/p4_error_analysis.py ADDED Viewed

	@@ -0,0 +1,1033 @@

+"""
+P2 Article - Error Analysis & Hard Case Ranking Module
+for Ventricles and WMH Segmentation
+Integrates with p4_inference.py to identify problematic slices and patients,
+rank them by difficulty, and produce rich diagnostic visualizations.
+Developer: Mahdi Bashiri Bawil
+"""
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib.gridspec as gridspec
+import matplotlib.patches as mpatches
+from matplotlib.colors import ListedColormap, BoundaryNorm
+import pandas as pd
+import json
+from pathlib import Path
+from collections import defaultdict
+from scipy.ndimage import binary_erosion, label as scipy_label
+from tqdm import tqdm
+# ─────────────────────────────────────────────────────────────────────────────
+# SECTION 1 — Slice-level metric computation
+# ─────────────────────────────────────────────────────────────────────────────
+def _dice_binary(gt_bin, pred_bin):
+    """Dice for a single binary mask pair. Returns NaN if both are empty."""
+    tp = np.sum(gt_bin & pred_bin)
+    denom = np.sum(gt_bin) + np.sum(pred_bin)
+    if denom == 0:
+        return np.nan          # class truly absent — not a failure
+    return float(2 * tp / (denom + 1e-7))
+def _iou_binary(gt_bin, pred_bin):
+    tp = np.sum(gt_bin & pred_bin)
+    denom = np.sum(gt_bin | pred_bin)
+    if denom == 0:
+        return np.nan
+    return float(tp / (denom + 1e-7))
+def _precision_recall(gt_bin, pred_bin):
+    tp = np.sum(gt_bin & pred_bin)
+    fp = np.sum(~gt_bin & pred_bin)
+    fn = np.sum(gt_bin & ~pred_bin)
+    precision = float(tp / (tp + fp + 1e-7))
+    recall    = float(tp / (tp + fn + 1e-7))
+    return precision, recall
+def _false_positive_volume(gt_bin, pred_bin):
+    """Fraction of predicted pixels that are false positives."""
+    fp = np.sum(~gt_bin & pred_bin)
+    total_pred = np.sum(pred_bin)
+    if total_pred == 0:
+        return 0.0
+    return float(fp / total_pred)
+def _false_negative_volume(gt_bin, pred_bin):
+    """Fraction of GT pixels that are missed."""
+    fn = np.sum(gt_bin & ~pred_bin)
+    total_gt = np.sum(gt_bin)
+    if total_gt == 0:
+        return 0.0
+    return float(fn / total_gt)
+def _gt_load(gt_hw, class_idx):
+    """Return binary GT mask for a specific class from a (H,W) label map."""
+    return gt_hw == class_idx
+def _pred_load(pred_hw, class_idx):
+    return pred_hw == class_idx
+def compute_slice_metrics(gt_hw, pred_hw, num_classes, class_names,
+                          mean_confidence=None):
+    """
+    Compute per-class and summary metrics for a single 2-D slice.
+    Parameters
+    ----------
+    gt_hw : np.ndarray (H, W) — integer label map (ground truth)
+    pred_hw : np.ndarray (H, W) — integer label map (prediction)
+    num_classes : int
+    class_names : list[str]
+    mean_confidence : float | None — mean max-softmax probability for the slice
+    Returns
+    -------
+    dict with per-class and aggregate metrics
+    """
+    results = {}
+    dice_values   = []
+    iou_values    = []
+    for cls in range(num_classes):
+        gt_bin   = _gt_load(gt_hw, cls)
+        pred_bin = _pred_load(pred_hw, cls)
+        dice = _dice_binary(gt_bin, pred_bin)
+        iou  = _iou_binary(gt_bin, pred_bin)
+        prec, rec = _precision_recall(gt_bin, pred_bin)
+        fpr  = _false_positive_volume(gt_bin, pred_bin)
+        fnr  = _false_negative_volume(gt_bin, pred_bin)
+        gt_px   = int(np.sum(gt_bin))
+        pred_px = int(np.sum(pred_bin))
+        error_px = int(np.sum(gt_bin != pred_bin))
+        results[class_names[cls]] = {
+            'dice':           dice,
+            'iou':            iou,
+            'precision':      prec,
+            'recall':         rec,
+            'fp_rate':        fpr,
+            'fn_rate':        fnr,
+            'gt_pixels':      gt_px,
+            'pred_pixels':    pred_px,
+            'error_pixels':   error_px,
+        }
+        if not np.isnan(dice):
+            dice_values.append(dice)
+        if not np.isnan(iou):
+            iou_values.append(iou)
+    # Pixel-level error rate (ignoring class)
+    total_px    = gt_hw.size
+    wrong_px    = int(np.sum(gt_hw != pred_hw))
+    error_rate  = wrong_px / total_px
+    # Focus on foreground classes only (skip background=0) for composite score
+    fg_dice = []
+    for cls in range(1, num_classes):
+        d = results[class_names[cls]]['dice']
+        if not np.isnan(d):
+            fg_dice.append(d)
+    mean_fg_dice = float(np.mean(fg_dice)) if fg_dice else np.nan
+    min_fg_dice  = float(np.min(fg_dice))  if fg_dice else np.nan
+    results['_summary'] = {
+        'error_rate':      error_rate,
+        'wrong_pixels':    wrong_px,
+        'total_pixels':    total_px,
+        'mean_fg_dice':    mean_fg_dice,
+        'min_fg_dice':     min_fg_dice,
+        'mean_confidence': mean_confidence,
+    }
+    return results
+# ─────────────────────────────────────────────────────────────────────────────
+# SECTION 2 — Build slice-level and patient-level tables
+# ─────────────────────────────────────────────────────────────────────────────
+def build_error_tables(patient_results, num_classes, class_names):
+    """
+    Iterate over all patients / slices stored in patient_results
+    (the dict returned by run_inference) and build:
+      - slice_records : list of dicts, one per 2-D slice
+      - patient_records : list of dicts, one per patient (aggregated)
+    Parameters
+    ----------
+    patient_results : dict
+        {patient_id: {'predictions', 'ground_truths', 'probabilities',
+                      'flairs', 'slice_indices'}}
+    num_classes : int
+    class_names : list[str]
+    Returns
+    -------
+    slice_df : pd.DataFrame
+    patient_df : pd.DataFrame
+    """
+    slice_records   = []
+    patient_records = []
+    for patient_id, data in tqdm(patient_results.items(),
+                                 desc="Building error tables"):
+        order = np.argsort(data['slice_indices'])
+        preds   = np.array(data['predictions'])[order]      # (S, H, W)
+        gts     = np.array(data['ground_truths'])[order]    # (S, H, W, C) or (S, H, W)
+        probs   = np.array(data['probabilities'])[order]    # (S, H, W)
+        slices  = np.array(data['slice_indices'])[order]    # (S,)
+        # Ground truth may be one-hot: collapse to label map
+        if gts.ndim == 4:
+            gts = np.argmax(gts, axis=-1)
+        patient_fg_dice   = defaultdict(list)
+        patient_error_rates = []
+        for i, slice_num in enumerate(slices):
+            gt_hw   = gts[i]
+            pred_hw = preds[i]
+            prob_hw = probs[i]
+            mean_conf = float(np.mean(prob_hw))
+            m = compute_slice_metrics(gt_hw, pred_hw, num_classes,
+                                      class_names, mean_confidence=mean_conf)
+            row = {
+                'patient_id':   patient_id,
+                'slice_num':    int(slice_num),
+                'slice_id':     f"{patient_id}_slice_{int(slice_num):03d}",
+                'error_rate':   m['_summary']['error_rate'],
+                'wrong_pixels': m['_summary']['wrong_pixels'],
+                'mean_fg_dice': m['_summary']['mean_fg_dice'],
+                'min_fg_dice':  m['_summary']['min_fg_dice'],
+                'mean_confidence': m['_summary']['mean_confidence'],
+            }
+            for cls in range(num_classes):
+                cname = class_names[cls]
+                cm    = m[cname]
+                prefix = cname.lower().replace(' ', '_')
+                row[f'{prefix}_dice']      = cm['dice']
+                row[f'{prefix}_iou']       = cm['iou']
+                row[f'{prefix}_precision'] = cm['precision']
+                row[f'{prefix}_recall']    = cm['recall']
+                row[f'{prefix}_fp_rate']   = cm['fp_rate']
+                row[f'{prefix}_fn_rate']   = cm['fn_rate']
+                row[f'{prefix}_gt_px']     = cm['gt_pixels']
+                row[f'{prefix}_pred_px']   = cm['pred_pixels']
+                row[f'{prefix}_err_px']    = cm['error_pixels']
+                if cls > 0 and not np.isnan(cm['dice']):
+                    patient_fg_dice[cname].append(cm['dice'])
+            patient_error_rates.append(m['_summary']['error_rate'])
+            slice_records.append(row)
+        # ── Patient summary ──
+        pat_row = {'patient_id': patient_id,
+                   'n_slices':   len(slices),
+                   'mean_error_rate': float(np.mean(patient_error_rates))}
+        for cls in range(1, num_classes):
+            cname  = class_names[cls]
+            vals   = patient_fg_dice[cname]
+            prefix = cname.lower().replace(' ', '_')
+            pat_row[f'{prefix}_mean_dice'] = float(np.mean(vals)) if vals else np.nan
+            pat_row[f'{prefix}_std_dice']  = float(np.std(vals))  if vals else np.nan
+            pat_row[f'{prefix}_min_dice']  = float(np.min(vals))  if vals else np.nan
+        # Composite: mean of per-class mean dices (foreground only)
+        fg_means = [pat_row[f"{class_names[c].lower().replace(' ', '_')}_mean_dice"]
+                    for c in range(1, num_classes)
+                    if not np.isnan(pat_row.get(
+                        f"{class_names[c].lower().replace(' ','_')}_mean_dice", np.nan))]
+        pat_row['composite_dice'] = float(np.mean(fg_means)) if fg_means else np.nan
+        patient_records.append(pat_row)
+    slice_df   = pd.DataFrame(slice_records)
+    patient_df = pd.DataFrame(patient_records)
+    return slice_df, patient_df
+# ─────────────────────────────────────────────────────────────────────────────
+# SECTION 3 — Composite difficulty score & ranking
+# ─────────────────────────────────────────────────────────────────────────────
+def rank_slices(slice_df, class_names, num_classes,
+                fg_dice_weight=0.6, error_rate_weight=0.2,
+                confidence_weight=0.2):
+    """
+    Add a `difficulty_score` column to slice_df (higher = harder).
+    Score = fg_dice_weight * (1 - mean_fg_dice)
+          + error_rate_weight * error_rate
+          + confidence_weight * (1 - mean_confidence)
+    NaN dice (class absent in GT) is neutral (0.5) so it doesn't
+    inflate difficulty for slices where the class just doesn't exist.
+    """
+    df = slice_df.copy()
+    # Fill NaN mean_fg_dice with 0.5 for scoring (class not present → neutral)
+    fg_dice_filled = df['mean_fg_dice'].fillna(0.5)
+    conf_filled    = df['mean_confidence'].fillna(0.5)
+    df['difficulty_score'] = (
+        fg_dice_weight    * (1 - fg_dice_filled) +
+        error_rate_weight * df['error_rate'] +
+        confidence_weight * (1 - conf_filled)
+    )
+    df = df.sort_values('difficulty_score', ascending=False).reset_index(drop=True)
+    df['difficulty_rank'] = df.index + 1
+    return df
+def rank_patients(patient_df):
+    """Sort patients from hardest to easiest (lowest composite dice first)."""
+    df = patient_df.copy()
+    df = df.sort_values('composite_dice', ascending=True).reset_index(drop=True)
+    df['difficulty_rank'] = df.index + 1
+    return df
+# ─────────────────────────────────────────────────────────────────────────────
+# SECTION 4 — Visualization helpers
+# ─────────────────────────────────────────────────────────────────────────────
+CLASS_COLORS_3 = ['black', '#2196F3', '#F44336']           # BG, Vent, WMH
+CLASS_COLORS_4 = ['black', '#2196F3', '#4CAF50', '#F44336'] # BG, Vent, NormWMH, AbWMH
+ERROR_CMAP = ListedColormap(['#1A1A1A',   # correct background
+                              '#FF5722',  # FP (pred fg, gt bg)
+                              '#03A9F4',  # FN (gt fg, pred bg)
+                              '#FFEB3B']) # class confusion
+def _get_class_cmap(num_classes):
+    colors = CLASS_COLORS_3 if num_classes == 3 else CLASS_COLORS_4
+    cmap = ListedColormap(colors)
+    norm = BoundaryNorm(range(num_classes + 1), num_classes)
+    return cmap, norm
+def _build_error_rgb(gt_hw, pred_hw, num_classes):
+    """
+    Build a pixel-wise error classification map:
+      0 = correct
+      1 = false positive (model predicts fg, GT is bg)
+      2 = false negative (GT is fg, model predicts bg)
+      3 = class confusion (both fg but wrong class)
+    """
+    gt_fg   = gt_hw > 0
+    pred_fg = pred_hw > 0
+    err = np.zeros_like(gt_hw, dtype=np.uint8)
+    err[~gt_fg & pred_fg]              = 1   # FP
+    err[gt_fg  & ~pred_fg]             = 2   # FN
+    err[gt_fg  & pred_fg & (gt_hw != pred_hw)] = 3  # confusion
+    return err
+def _add_class_legend(ax, class_names, num_classes):
+    colors = CLASS_COLORS_3 if num_classes == 3 else CLASS_COLORS_4
+    patches = [mpatches.Patch(color=colors[i], label=class_names[i])
+               for i in range(num_classes)]
+    ax.legend(handles=patches, loc='lower right', fontsize=7,
+              framealpha=0.8, markerscale=0.8)
+# ─────────────────────────────────────────────────────────────────────────────
+# SECTION 5 — Diagnostic slice visualization
+# ─────────────────────────────────────────────────────────────────────────────
+def visualize_hard_slice(flair, gt_hw, pred_hw, prob_hw,
+                         slice_metrics_row, class_names, num_classes,
+                         save_path, rank=None):
+    """
+    Create a rich 3-row diagnostic panel for a single hard slice.
+    Row 1 : FLAIR | GT mask | Predicted mask | Overlay (GT contour on FLAIR)
+    Row 2 : Confidence map | Error type map | GT vs Pred contour overlay
+    Row 3 : Per-class dice bar chart | FP/FN summary table
+    """
+    cmap_cls, norm_cls = _get_class_cmap(num_classes)
+    err_map = _build_error_rgb(gt_hw, pred_hw, num_classes)
+    patient_id  = slice_metrics_row.get('patient_id', '?')
+    slice_num   = slice_metrics_row.get('slice_num', '?')
+    diff_score  = slice_metrics_row.get('difficulty_score', float('nan'))
+    diff_rank   = slice_metrics_row.get('difficulty_rank', rank)
+    mean_conf   = slice_metrics_row.get('mean_confidence', float('nan'))
+    mean_fg_d   = slice_metrics_row.get('mean_fg_dice', float('nan'))
+    fig = plt.figure(figsize=(20, 14))
+    fig.patch.set_facecolor('#0D0D0D')
+    title_str = (f"Patient: {patient_id}  |  Slice: {slice_num:03d}  |  "
+                 f"Rank #{diff_rank}  |  Difficulty: {diff_score:.3f}  |  "
+                 f"Mean FG Dice: {mean_fg_d:.3f}  |  Mean Conf: {mean_conf:.3f}")
+    fig.suptitle(title_str, color='white', fontsize=12, fontweight='bold', y=0.98)
+    gs = gridspec.GridSpec(3, 4, figure=fig,
+                           hspace=0.35, wspace=0.25,
+                           left=0.04, right=0.98,
+                           top=0.93, bottom=0.04)
+    def styled_ax(pos):
+        ax = fig.add_subplot(pos)
+        ax.set_facecolor('#0D0D0D')
+        ax.tick_params(colors='white')
+        for spine in ax.spines.values():
+            spine.set_edgecolor('#444')
+        return ax
+    # ── Row 0 ──────────────────────────────────────────────────────────────
+    ax00 = styled_ax(gs[0, 0])
+    ax00.imshow(flair, cmap='gray', vmin=flair.min(), vmax=flair.max())
+    ax00.set_title('FLAIR', color='white', fontsize=10)
+    ax00.axis('off')
+    ax01 = styled_ax(gs[0, 1])
+    ax01.imshow(gt_hw, cmap=cmap_cls, norm=norm_cls, interpolation='nearest')
+    ax01.set_title('Ground Truth', color='white', fontsize=10)
+    ax01.axis('off')
+    _add_class_legend(ax01, class_names, num_classes)
+    ax02 = styled_ax(gs[0, 2])
+    ax02.imshow(pred_hw, cmap=cmap_cls, norm=norm_cls, interpolation='nearest')
+    ax02.set_title('Prediction', color='white', fontsize=10)
+    ax02.axis('off')
+    _add_class_legend(ax02, class_names, num_classes)
+    # GT contour overlay on FLAIR
+    ax03 = styled_ax(gs[0, 3])
+    ax03.imshow(flair, cmap='gray', vmin=flair.min(), vmax=flair.max())
+    colors_cls = CLASS_COLORS_3 if num_classes == 3 else CLASS_COLORS_4
+    for cls in range(1, num_classes):
+        gt_bin   = (gt_hw == cls).astype(np.uint8)
+        pred_bin = (pred_hw == cls).astype(np.uint8)
+        if gt_bin.any():
+            ax03.contour(gt_bin,   levels=[0.5], colors=[colors_cls[cls]],
+                         linewidths=1.5, linestyles='solid')
+        if pred_bin.any():
+            ax03.contour(pred_bin, levels=[0.5], colors=[colors_cls[cls]],
+                         linewidths=1.2, linestyles='dashed')
+    gt_patch   = mpatches.Patch(color='white',  linestyle='solid',  label='GT (solid)')
+    pred_patch = mpatches.Patch(color='white',  linestyle='dashed', label='Pred (dashed)')
+    ax03.legend(handles=[gt_patch, pred_patch], loc='lower right',
+                fontsize=7, framealpha=0.7)
+    ax03.set_title('GT vs Pred Contours', color='white', fontsize=10)
+    ax03.axis('off')
+    # ── Row 1 ──────────────────────────────────────────────────────────────
+    ax10 = styled_ax(gs[1, 0])
+    im_conf = ax10.imshow(prob_hw, cmap='plasma', vmin=0, vmax=1)
+    plt.colorbar(im_conf, ax=ax10, fraction=0.046, pad=0.04).ax.yaxis.set_tick_params(color='white')
+    ax10.set_title('Confidence Map', color='white', fontsize=10)
+    ax10.axis('off')
+    # Low-confidence overlay on FLAIR
+    ax11 = styled_ax(gs[1, 1])
+    ax11.imshow(flair, cmap='gray')
+    low_conf_mask = prob_hw < 0.5
+    overlay = np.zeros((*flair.shape, 4))
+    overlay[low_conf_mask] = [1, 0.3, 0, 0.55]   # orange-red for uncertain regions
+    ax11.imshow(overlay)
+    ax11.set_title('Low-Confidence Regions (<0.5)', color='white', fontsize=10)
+    ax11.axis('off')
+    ax12 = styled_ax(gs[1, 2])
+    err_colors = ['#1A1A1A', '#FF5722', '#03A9F4', '#FFEB3B']
+    err_cmap   = ListedColormap(err_colors)
+    err_norm   = BoundaryNorm([0, 1, 2, 3, 4], 4)
+    ax12.imshow(err_map, cmap=err_cmap, norm=err_norm, interpolation='nearest')
+    patches_err = [
+        mpatches.Patch(color='#1A1A1A', label='Correct'),
+        mpatches.Patch(color='#FF5722', label='False Positive'),
+        mpatches.Patch(color='#03A9F4', label='False Negative'),
+        mpatches.Patch(color='#FFEB3B', label='Class Confusion'),
+    ]
+    ax12.legend(handles=patches_err, loc='lower right', fontsize=6.5, framealpha=0.8)
+    ax12.set_title('Error Type Map', color='white', fontsize=10)
+    ax12.axis('off')
+    # FLAIR + error overlay
+    ax13 = styled_ax(gs[1, 3])
+    flair_rgb = np.stack([flair] * 3, axis=-1)
+    # Normalise 0-1
+    flair_rgb = (flair_rgb - flair_rgb.min()) / (flair_rgb.max() - flair_rgb.min() + 1e-7)
+    err_overlay = flair_rgb.copy()
+    err_overlay[err_map == 1] = [1.0, 0.34, 0.13]  # FP
+    err_overlay[err_map == 2] = [0.01, 0.66, 0.96] # FN
+    err_overlay[err_map == 3] = [1.0, 0.92, 0.23]  # confusion
+    ax13.imshow(err_overlay)
+    ax13.set_title('FLAIR + Error Overlay', color='white', fontsize=10)
+    ax13.axis('off')
+    # ── Row 2: metrics ─────────────────────────────────────────────────────
+    ax20 = styled_ax(gs[2, 0:2])
+    ax20.set_facecolor('#111')
+    bar_labels  = []
+    bar_dice    = []
+    bar_colors  = []
+    for cls in range(1, num_classes):
+        cname  = class_names[cls]
+        prefix = cname.lower().replace(' ', '_')
+        d = slice_metrics_row.get(f'{prefix}_dice', np.nan)
+        bar_labels.append(cname)
+        bar_dice.append(d if not np.isnan(d) else 0)
+        bar_colors.append(colors_cls[cls])
+    x = np.arange(len(bar_labels))
+    bars = ax20.bar(x, bar_dice, color=bar_colors, edgecolor='white',
+                    linewidth=0.8, width=0.5)
+    ax20.axhline(0.5, color='red',    linestyle='--', linewidth=1, label='Threshold 0.5')
+    ax20.axhline(0.8, color='yellow', linestyle='--', linewidth=1, label='Good 0.8')
+    ax20.set_xticks(x)
+    ax20.set_xticklabels(bar_labels, color='white', fontsize=9)
+    ax20.set_ylim(0, 1.05)
+    ax20.set_ylabel('Dice Score', color='white', fontsize=9)
+    ax20.set_title('Per-Class Dice', color='white', fontsize=10)
+    ax20.tick_params(axis='y', colors='white')
+    ax20.legend(fontsize=7, labelcolor='white', framealpha=0.3)
+    for bar, val in zip(bars, bar_dice):
+        ax20.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 0.02,
+                  f'{val:.3f}', ha='center', color='white', fontsize=9)
+    # Table: per-class FP/FN/precision/recall
+    ax21 = styled_ax(gs[2, 2:4])
+    ax21.axis('off')
+    col_labels = ['Class', 'Dice', 'Prec', 'Recall', 'FP rate', 'FN rate',
+                  'GT px', 'Pred px']
+    table_data = []
+    for cls in range(1, num_classes):
+        cname  = class_names[cls]
+        prefix = cname.lower().replace(' ', '_')
+        def _g(k):
+            v = slice_metrics_row.get(f'{prefix}_{k}', np.nan)
+            return f'{v:.3f}' if not np.isnan(v) else 'N/A'
+        table_data.append([
+            cname,
+            _g('dice'), _g('precision'), _g('recall'),
+            _g('fp_rate'), _g('fn_rate'),
+            str(int(slice_metrics_row.get(f'{prefix}_gt_px', 0))),
+            str(int(slice_metrics_row.get(f'{prefix}_pred_px', 0))),
+        ])
+    tbl = ax21.table(cellText=table_data, colLabels=col_labels,
+                     cellLoc='center', loc='center')
+    tbl.auto_set_font_size(False)
+    tbl.set_fontsize(8)
+    tbl.scale(1, 1.6)
+    for (r, c), cell in tbl.get_celld().items():
+        cell.set_edgecolor('#444')
+        if r == 0:
+            cell.set_facecolor('#2C2C2C')
+            cell.set_text_props(color='white', fontweight='bold')
+        else:
+            cell.set_facecolor('#1A1A1A')
+            cell.set_text_props(color='white')
+    ax21.set_title('Per-Class Metrics Summary', color='white', fontsize=10, pad=8)
+    plt.savefig(save_path, dpi=130, bbox_inches='tight',
+                facecolor=fig.get_facecolor())
+    plt.close(fig)
+# ─────────────────────────────────────────────────────────────────────────────
+# SECTION 6 — Patient-level summary visualization
+# ─────────────────────────────────────────────────────────────────────────────
+def visualize_patient_summary(patient_id, patient_data, slice_df_patient,
+                               class_names, num_classes, save_path):
+    """
+    One-page summary for a single patient showing:
+      - Dice scores across all slices (line plot per class)
+      - Confidence vs. error rate scatter
+      - Per-slice FP / FN bar chart
+      - Overall dice distribution box plots
+    """
+    order    = np.argsort(patient_data['slice_indices'])
+    slices   = np.array(patient_data['slice_indices'])[order]
+    n_slices = len(slices)
+    fig, axes = plt.subplots(2, 2, figsize=(18, 10))
+    fig.patch.set_facecolor('#0D0D0D')
+    fig.suptitle(f'Patient Summary  |  ID: {patient_id}  |  {n_slices} slices',
+                 color='white', fontsize=13, fontweight='bold')
+    colors_cls = CLASS_COLORS_3 if num_classes == 3 else CLASS_COLORS_4
+    df = slice_df_patient.sort_values('slice_num').reset_index(drop=True)
+    # ── Plot 1: Per-slice Dice per class ──────────────────────────────────
+    ax = axes[0, 0]
+    ax.set_facecolor('#111')
+    for cls in range(1, num_classes):
+        cname  = class_names[cls]
+        prefix = cname.lower().replace(' ', '_')
+        col    = f'{prefix}_dice'
+        if col in df.columns:
+            valid = df[col].notna()
+            ax.plot(df.loc[valid, 'slice_num'], df.loc[valid, col],
+                    color=colors_cls[cls], linewidth=1.5,
+                    marker='o', markersize=3, label=cname)
+    ax.axhline(0.5, color='red',    linestyle='--', linewidth=0.8, alpha=0.7)
+    ax.axhline(0.8, color='yellow', linestyle='--', linewidth=0.8, alpha=0.7)
+    ax.set_xlabel('Slice Number', color='white')
+    ax.set_ylabel('Dice Score', color='white')
+    ax.set_title('Per-Slice Dice by Class', color='white', fontsize=10)
+    ax.legend(fontsize=8, labelcolor='white', framealpha=0.3)
+    ax.tick_params(colors='white')
+    for spine in ax.spines.values():
+        spine.set_edgecolor('#444')
+    ax.set_ylim(0, 1.05)
+    # ── Plot 2: Confidence vs Error rate scatter ───────────────────────────
+    ax = axes[0, 1]
+    ax.set_facecolor('#111')
+    sc = ax.scatter(df['mean_confidence'], df['error_rate'],
+                    c=df['mean_fg_dice'].fillna(0.5),
+                    cmap='RdYlGn', vmin=0, vmax=1,
+                    s=50, edgecolors='white', linewidths=0.3, alpha=0.85)
+    cbar = plt.colorbar(sc, ax=ax)
+    cbar.set_label('Mean FG Dice', color='white')
+    cbar.ax.yaxis.set_tick_params(color='white')
+    plt.setp(plt.getp(cbar.ax.axes, 'yticklabels'), color='white')
+    ax.set_xlabel('Mean Confidence', color='white')
+    ax.set_ylabel('Pixel Error Rate', color='white')
+    ax.set_title('Confidence vs Error Rate\n(colour = Mean FG Dice)',
+                 color='white', fontsize=10)
+    ax.tick_params(colors='white')
+    for spine in ax.spines.values():
+        spine.set_edgecolor('#444')
+    # Annotate worst 3 slices
+    worst3 = df.nlargest(3, 'difficulty_score') if 'difficulty_score' in df.columns \
+             else df.nlargest(3, 'error_rate')
+    for _, row in worst3.iterrows():
+        ax.annotate(f"sl{int(row['slice_num']):03d}",
+                    (row['mean_confidence'], row['error_rate']),
+                    textcoords="offset points", xytext=(5, 5),
+                    fontsize=7, color='white')
+    # ── Plot 3: FP / FN pixel rates per slice ─────────────────────────────
+    ax = axes[1, 0]
+    ax.set_facecolor('#111')
+    x = df['slice_num'].values
+    # Use WMH class (last foreground class) as primary interest
+    cls_main   = num_classes - 1
+    prefix_m   = class_names[cls_main].lower().replace(' ', '_')
+    fp_col     = f'{prefix_m}_fp_rate'
+    fn_col     = f'{prefix_m}_fn_rate'
+    if fp_col in df.columns and fn_col in df.columns:
+        width = 0.4
+        ax.bar(x - width/2, df[fp_col].fillna(0), width=width,
+               color='#FF5722', alpha=0.8, label='FP Rate')
+        ax.bar(x + width/2, df[fn_col].fillna(0), width=width,
+               color='#03A9F4', alpha=0.8, label='FN Rate')
+    ax.set_xlabel('Slice Number', color='white')
+    ax.set_ylabel('Rate', color='white')
+    ax.set_title(f'FP / FN Rate per Slice  [{class_names[cls_main]}]',
+                 color='white', fontsize=10)
+    ax.legend(fontsize=8, labelcolor='white', framealpha=0.3)
+    ax.tick_params(colors='white')
+    for spine in ax.spines.values():
+        spine.set_edgecolor('#444')
+    # ── Plot 4: Dice distribution box plots ───────────────────────────────
+    ax = axes[1, 1]
+    ax.set_facecolor('#111')
+    box_data   = []
+    box_labels = []
+    box_colors = []
+    for cls in range(1, num_classes):
+        cname  = class_names[cls]
+        prefix = cname.lower().replace(' ', '_')
+        col    = f'{prefix}_dice'
+        vals   = df[col].dropna().values if col in df.columns else np.array([])
+        box_data.append(vals)
+        box_labels.append(cname)
+        box_colors.append(colors_cls[cls])
+    bp = ax.boxplot(box_data, patch_artist=True,
+                    medianprops=dict(color='white', linewidth=2))
+    for patch, color in zip(bp['boxes'], box_colors):
+        patch.set_facecolor(color)
+        patch.set_alpha(0.7)
+    for element in ['whiskers', 'caps', 'fliers']:
+        for item in bp[element]:
+            item.set_color('white')
+    ax.set_xticklabels(box_labels, color='white')
+    ax.set_ylabel('Dice Score', color='white')
+    ax.set_title('Dice Score Distribution per Class', color='white', fontsize=10)
+    ax.axhline(0.5, color='red',    linestyle='--', linewidth=0.8, alpha=0.7)
+    ax.axhline(0.8, color='yellow', linestyle='--', linewidth=0.8, alpha=0.7)
+    ax.tick_params(colors='white')
+    for spine in ax.spines.values():
+        spine.set_edgecolor('#444')
+    ax.set_ylim(0, 1.05)
+    plt.tight_layout(rect=[0, 0, 1, 0.95])
+    plt.savefig(save_path, dpi=120, bbox_inches='tight',
+                facecolor=fig.get_facecolor())
+    plt.close(fig)
+# ─────────────────────────────────────────────────────────────────────────────
+# SECTION 7 — Dataset-level overview visualizations
+# ─────────────────────────────────────────────────────────────────────────────
+def visualize_dataset_overview(slice_df, patient_df, class_names,
+                                num_classes, save_dir):
+    """
+    Global overview plots saved to save_dir/overview/:
+      1. Dice distribution across all slices (violin per class)
+      2. Patient ranking bar chart (composite dice)
+      3. Error rate histogram
+      4. Confidence vs dice scatter (all slices)
+      5. Difficulty score distribution
+    """
+    overview_dir = Path(save_dir) / 'overview'
+    overview_dir.mkdir(parents=True, exist_ok=True)
+    colors_cls = CLASS_COLORS_3 if num_classes == 3 else CLASS_COLORS_4
+    # ── 1. Dice violin ────────────────────────────────────────────────────
+    fig, ax = plt.subplots(figsize=(10, 6))
+    fig.patch.set_facecolor('#0D0D0D')
+    ax.set_facecolor('#111')
+    violin_data   = []
+    violin_labels = []
+    for cls in range(1, num_classes):
+        cname  = class_names[cls]
+        prefix = cname.lower().replace(' ', '_')
+        col    = f'{prefix}_dice'
+        vals   = slice_df[col].dropna().values if col in slice_df.columns else np.array([])
+        violin_data.append(vals)
+        violin_labels.append(cname)
+    parts = ax.violinplot(violin_data, showmedians=True, showextrema=True)
+    for i, (pc, color) in enumerate(zip(parts['bodies'],
+                                        [colors_cls[c] for c in range(1, num_classes)])):
+        pc.set_facecolor(color)
+        pc.set_alpha(0.7)
+    parts['cmedians'].set_colors('white')
+    parts['cmaxes'].set_colors('#aaa')
+    parts['cmins'].set_colors('#aaa')
+    parts['cbars'].set_colors('#aaa')
+    ax.set_xticks(range(1, len(violin_labels) + 1))
+    ax.set_xticklabels(violin_labels, color='white')
+    ax.axhline(0.5, color='red',    linestyle='--', linewidth=0.9, label='0.5 threshold')
+    ax.axhline(0.8, color='yellow', linestyle='--', linewidth=0.9, label='0.8 target')
+    ax.set_ylabel('Dice Score', color='white')
+    ax.set_title('Dice Distribution — All Slices', color='white', fontsize=12)
+    ax.tick_params(colors='white')
+    ax.legend(fontsize=8, labelcolor='white', framealpha=0.3)
+    for spine in ax.spines.values():
+        spine.set_edgecolor('#444')
+    ax.set_ylim(0, 1.05)
+    plt.tight_layout()
+    plt.savefig(overview_dir / 'dice_violin_all_slices.png', dpi=130,
+                bbox_inches='tight', facecolor=fig.get_facecolor())
+    plt.close(fig)
+    # ── 2. Patient ranking bar chart ──────────────────────────────────────
+    pat_sorted = patient_df.sort_values('composite_dice').reset_index(drop=True)
+    n_patients = len(pat_sorted)
+    fig, ax = plt.subplots(figsize=(max(12, n_patients * 0.6), 5))
+    fig.patch.set_facecolor('#0D0D0D')
+    ax.set_facecolor('#111')
+    bar_colors = ['#F44336' if v < 0.5 else '#FFC107' if v < 0.7 else '#4CAF50'
+                  for v in pat_sorted['composite_dice'].fillna(0)]
+    ax.bar(range(n_patients), pat_sorted['composite_dice'].fillna(0),
+           color=bar_colors, edgecolor='#333', linewidth=0.5)
+    ax.set_xticks(range(n_patients))
+    ax.set_xticklabels(pat_sorted['patient_id'], rotation=75,
+                       ha='right', color='white', fontsize=7)
+    ax.axhline(0.5, color='red',    linestyle='--', linewidth=0.9)
+    ax.axhline(0.7, color='orange', linestyle='--', linewidth=0.9)
+    ax.axhline(0.8, color='yellow', linestyle='--', linewidth=0.9)
+    ax.set_ylabel('Composite Dice (mean FG classes)', color='white')
+    ax.set_title('Patient Ranking — Composite Dice (worst → best)',
+                 color='white', fontsize=12)
+    ax.tick_params(colors='white')
+    for spine in ax.spines.values():
+        spine.set_edgecolor('#444')
+    ax.set_ylim(0, 1.05)
+    red_p    = mpatches.Patch(color='#F44336', label='< 0.5 (critical)')
+    orange_p = mpatches.Patch(color='#FFC107', label='0.5–0.7 (poor)')
+    green_p  = mpatches.Patch(color='#4CAF50', label='≥ 0.7 (acceptable)')
+    ax.legend(handles=[red_p, orange_p, green_p],
+              fontsize=8, labelcolor='white', framealpha=0.3)
+    plt.tight_layout()
+    plt.savefig(overview_dir / 'patient_ranking.png', dpi=130,
+                bbox_inches='tight', facecolor=fig.get_facecolor())
+    plt.close(fig)
+    # ── 3. Error rate histogram ────────────────────────────────────────────
+    fig, ax = plt.subplots(figsize=(9, 5))
+    fig.patch.set_facecolor('#0D0D0D')
+    ax.set_facecolor('#111')
+    ax.hist(slice_df['error_rate'].dropna(), bins=40, color='#9C27B0',
+            edgecolor='white', linewidth=0.3, alpha=0.85)
+    ax.set_xlabel('Pixel Error Rate per Slice', color='white')
+    ax.set_ylabel('Count', color='white')
+    ax.set_title('Pixel Error Rate Distribution — All Slices', color='white', fontsize=12)
+    ax.tick_params(colors='white')
+    for spine in ax.spines.values():
+        spine.set_edgecolor('#444')
+    plt.tight_layout()
+    plt.savefig(overview_dir / 'error_rate_histogram.png', dpi=130,
+                bbox_inches='tight', facecolor=fig.get_facecolor())
+    plt.close(fig)
+    # ── 4. Confidence vs mean FG Dice scatter ─────────────────────────────
+    fig, ax = plt.subplots(figsize=(9, 6))
+    fig.patch.set_facecolor('#0D0D0D')
+    ax.set_facecolor('#111')
+    sc = ax.scatter(slice_df['mean_confidence'], slice_df['mean_fg_dice'].fillna(0),
+                    c=slice_df['error_rate'], cmap='RdYlGn_r',
+                    vmin=0, vmax=0.3, s=10, alpha=0.6)
+    cbar = plt.colorbar(sc, ax=ax)
+    cbar.set_label('Pixel Error Rate', color='white')
+    cbar.ax.yaxis.set_tick_params(color='white')
+    plt.setp(plt.getp(cbar.ax.axes, 'yticklabels'), color='white')
+    ax.set_xlabel('Mean Softmax Confidence', color='white')
+    ax.set_ylabel('Mean FG Dice', color='white')
+    ax.set_title('Confidence vs FG Dice — All Slices', color='white', fontsize=12)
+    ax.tick_params(colors='white')
+    for spine in ax.spines.values():
+        spine.set_edgecolor('#444')
+    plt.tight_layout()
+    plt.savefig(overview_dir / 'confidence_vs_dice_scatter.png', dpi=130,
+                bbox_inches='tight', facecolor=fig.get_facecolor())
+    plt.close(fig)
+    # ── 5. Difficulty score distribution ──────────────────────────────────
+    if 'difficulty_score' in slice_df.columns:
+        fig, ax = plt.subplots(figsize=(9, 5))
+        fig.patch.set_facecolor('#0D0D0D')
+        ax.set_facecolor('#111')
+        ax.hist(slice_df['difficulty_score'].dropna(), bins=40,
+                color='#FF9800', edgecolor='white', linewidth=0.3, alpha=0.85)
+        ax.set_xlabel('Difficulty Score', color='white')
+        ax.set_ylabel('Count', color='white')
+        ax.set_title('Difficulty Score Distribution — All Slices', color='white', fontsize=12)
+        ax.tick_params(colors='white')
+        for spine in ax.spines.values():
+            spine.set_edgecolor('#444')
+        plt.tight_layout()
+        plt.savefig(overview_dir / 'difficulty_score_histogram.png', dpi=130,
+                    bbox_inches='tight', facecolor=fig.get_facecolor())
+        plt.close(fig)
+    print(f"  ✅ Overview plots saved to: {overview_dir}")
+# ─────────────────────────────────────────────────────────────────────────────
+# SECTION 8 — Main entry point: run_error_analysis
+# ─────────────────────────────────────────────────────────────────────────────
+def run_error_analysis(results, config,
+                       top_n_slices=30,
+                       top_n_patients=10,
+                       fg_dice_weight=0.6,
+                       error_rate_weight=0.2,
+                       confidence_weight=0.2):
+    """
+    Full pipeline: build tables → rank → save CSVs → generate visualizations.
+    Call after run_inference():
+        results = run_inference(config)
+        run_error_analysis(results, config)
+    Parameters
+    ----------
+    results : dict  — returned by run_inference()
+    config  : InferenceConfig
+    top_n_slices : int  — how many hardest slices to visualize individually
+    top_n_patients : int — how many hardest patients to get summary plots
+    fg_dice_weight, error_rate_weight, confidence_weight : floats for ranking
+    """
+    patient_results = results['patients_results']
+    class_names     = config.class_names
+    num_classes     = config.num_classes
+    # Output sub-directories
+    error_dir = config.inference_dir / 'error_analysis'
+    hard_slices_dir  = error_dir / 'hard_slices'
+    patient_summaries_dir = error_dir / 'patient_summaries'
+    tables_dir = error_dir / 'tables'
+    for d in [hard_slices_dir, patient_summaries_dir, tables_dir]:
+        d.mkdir(parents=True, exist_ok=True)
+    print("\n" + "=" * 70)
+    print("ERROR ANALYSIS — Building slice & patient tables")
+    print("=" * 70)
+    # ── Step 1: build tables ──────────────────────────────────────────────
+    slice_df, patient_df = build_error_tables(patient_results, num_classes, class_names)
+    # ── Step 2: rank ──────────────────────────────────────────────────────
+    slice_df   = rank_slices(slice_df, class_names, num_classes,
+                             fg_dice_weight, error_rate_weight, confidence_weight)
+    patient_df = rank_patients(patient_df)
+    # ── Step 3: save CSVs ─────────────────────────────────────────────────
+    slice_csv   = tables_dir / 'slice_difficulty_ranking.csv'
+    patient_csv = tables_dir / 'patient_difficulty_ranking.csv'
+    slice_df.to_csv(slice_csv,   index=False)
+    patient_df.to_csv(patient_csv, index=False)
+    print(f"  ✅ Slice table  → {slice_csv}")
+    print(f"  ✅ Patient table → {patient_csv}")
+    # ── Step 4: dataset overview plots ────────────────────────────────────
+    print("\nGenerating dataset overview plots...")
+    visualize_dataset_overview(slice_df, patient_df, class_names,
+                               num_classes, error_dir)
+    # ── Step 5: hard slice visualizations ────────────────────────────────
+    print(f"\nVisualizing top-{top_n_slices} hardest slices...")
+    hard_slices = slice_df.head(top_n_slices)
+    for _, row in tqdm(hard_slices.iterrows(),
+                       total=len(hard_slices), desc="Hard slice panels"):
+        patient_id = row['patient_id']
+        slice_num  = int(row['slice_num'])
+        data  = patient_results[patient_id]
+        order = np.argsort(data['slice_indices'])
+        slices_sorted = np.array(data['slice_indices'])[order]
+        # Find position of this slice
+        pos = np.where(slices_sorted == slice_num)[0]
+        if len(pos) == 0:
+            continue
+        pos = pos[0]
+        gts   = np.array(data['ground_truths'])[order]
+        preds = np.array(data['predictions'])[order]
+        probs = np.array(data['probabilities'])[order]
+        flairs = np.array(data['flairs'])[order]
+        gt_hw    = gts[pos]
+        pred_hw  = preds[pos]
+        prob_hw  = probs[pos]
+        flair_hw = flairs[pos]
+        # Collapse one-hot GT if needed
+        if gt_hw.ndim == 3:
+            gt_hw = np.argmax(gt_hw, axis=-1)
+        rank = int(row['difficulty_rank'])
+        fname = (f"rank{rank:04d}_"
+                 f"{patient_id}_slice{slice_num:03d}"
+                 f"_dice{row['mean_fg_dice']:.3f}.png")
+        save_path = hard_slices_dir / fname
+        visualize_hard_slice(
+            flair=flair_hw,
+            gt_hw=gt_hw,
+            pred_hw=pred_hw,
+            prob_hw=prob_hw,
+            slice_metrics_row=row.to_dict(),
+            class_names=class_names,
+            num_classes=num_classes,
+            save_path=save_path,
+            rank=rank
+        )
+    print(f"  ✅ Hard slice panels → {hard_slices_dir}")
+    # ── Step 6: patient summary visualizations ────────────────────────────
+    print(f"\nGenerating top-{top_n_patients} hardest patient summaries...")
+    hard_patients = patient_df.head(top_n_patients)
+    for _, pat_row in tqdm(hard_patients.iterrows(),
+                            total=len(hard_patients), desc="Patient summaries"):
+        patient_id = pat_row['patient_id']
+        if patient_id not in patient_results:
+            continue
+        data = patient_results[patient_id]
+        slice_df_patient = slice_df[slice_df['patient_id'] == patient_id].copy()
+        rank = int(pat_row['difficulty_rank'])
+        comp = pat_row.get('composite_dice', float('nan'))
+        fname = (f"rank{rank:03d}_{patient_id}"
+                 f"_composite{comp:.3f}.png")
+        save_path = patient_summaries_dir / fname
+        visualize_patient_summary(
+            patient_id=patient_id,
+            patient_data=data,
+            slice_df_patient=slice_df_patient,
+            class_names=class_names,
+            num_classes=num_classes,
+            save_path=save_path
+        )
+    print(f"  ✅ Patient summaries → {patient_summaries_dir}")
+    # ── Step 7: print console summary ─────────────────────────────────────
+    print("\n" + "=" * 70)
+    print("ERROR ANALYSIS SUMMARY")
+    print("=" * 70)
+    print(f"\nTotal slices analysed : {len(slice_df)}")
+    print(f"Total patients         : {len(patient_df)}")
+    print(f"\nTop-10 Hardest Slices:")
+    top10_cols = ['difficulty_rank', 'slice_id', 'mean_fg_dice',
+                  'error_rate', 'mean_confidence', 'difficulty_score']
+    top10_cols = [c for c in top10_cols if c in slice_df.columns]
+    print(slice_df[top10_cols].head(10).to_string(index=False))
+    print(f"\nTop-10 Hardest Patients:")
+    fg_dice_cols = [f"{class_names[c].lower().replace(' ', '_')}_mean_dice"
+                    for c in range(1, num_classes)]
+    pat_cols = ['difficulty_rank', 'patient_id', 'n_slices', 'composite_dice'] + \
+               [c for c in fg_dice_cols if c in patient_df.columns]
+    print(patient_df[pat_cols].head(10).to_string(index=False))
+    print("\n" + "=" * 70)
+    print(f"All error analysis outputs → {error_dir}")
+    print("=" * 70 + "\n")
+    return {
+        'slice_df':   slice_df,
+        'patient_df': patient_df,
+        'error_dir':  error_dir
+    }

models/for_WMH_Vent/model_training_scripts/p4_folds_results_aggregator.py ADDED Viewed

	@@ -0,0 +1,611 @@

+"""
+P4 - All U-Net models with Adaptive Loss (WCE + UFL)
+WMH and Ventricles Segmentation with U-Net Models - Journal Paper Implementation
+Three-class segmentation: Background vs Ventricles vs Abnormal WMH
+Professional results saving and visualization for publication
+This relates to our article:
+"Deep Learning-Based Neuroanatomical Profiling Reveals Detailed Brain Changes:
+A Large-Scale Multiple Sclerosis Study"
+Features:
+- Aggregatation of all inferenced results
+- Includes lesion-level (connected-component) metrics: sensitivity, precision,
+  F1, TP/FP/FN lesion counts (added to address reviewer R1C7)
+Authors:
+"Mahdi Bashiri Bawil, Mousa Shamsi, Abolhassan Shakeri Bavil"
+Developer:
+"Mahdi Bashiri Bawil"
+"""
+import os
+import json
+import pandas as pd
+import numpy as np
+from pathlib import Path
+import warnings
+warnings.filterwarnings('ignore')
+class ResultsAggregator:
+    """
+    Aggregates segmentation results across multiple variants and folds.
+    """
+    def __init__(self, base_dir='./'):
+        """
+        Initialize the aggregator.
+        Args:
+            base_dir: Base directory containing all results folders
+        """
+        self.base_dir = Path(base_dir)
+        self.variants = {
+            1: "unet",
+            2: "attnunet",
+            3: "dlv3unet",
+            4: "transunet"
+        }
+        self.class_names = ["Background", "Ventricles", "Abnormal_WMH"]
+        self.num_variants = 4
+        self.num_folds = 4
+    def find_results_folders(self):
+        """Find all results folders matching the naming pattern."""
+        results_folders = []
+        for variant in range(self.num_variants):
+            for fold in range(self.num_folds):
+                folder_pattern = f"results_fold_{fold}_var_{variant+1}_zscore2"
+                folder_path = self.base_dir / folder_pattern
+                if folder_path.exists():
+                    results_folders.append({
+                        'variant': variant+1,
+                        'fold': fold,
+                        'path': folder_path
+                    })
+        return results_folders
+    def load_test_metrics(self, results_folder):
+        """Load test metrics from JSON file."""
+        metrics_path = results_folder['path'] / 'inference_all_test' / 'standard_3class' / 'metrics' / 'test_metrics_complete.json'
+        if not metrics_path.exists():
+            print(f"Warning: Metrics file not found at {metrics_path}")
+            return None
+        with open(metrics_path, 'r') as f:
+            data = json.load(f)
+        return data
+    def load_training_summary(self, results_folder):
+        """Load training summary from JSON file (new format)."""
+        summary_path = results_folder['path'] / 'models' / 'standard_3class' / f"fold_{results_folder['fold']}" / 'training_summary.json'
+        if not summary_path.exists():
+            # Fallback to history.json if training_summary doesn't exist
+            return self.load_training_history(results_folder)
+        with open(summary_path, 'r') as f:
+            data = json.load(f)
+        return data
+    def load_training_history(self, results_folder):
+        """Load training history from JSON file (legacy support)."""
+        history_path = results_folder['path'] / 'models' / 'standard_3class' / f"fold_{results_folder['fold']}" / 'history.json'
+        if not history_path.exists():
+            print(f"Warning: History file not found at {history_path}")
+            return None
+        with open(history_path, 'r') as f:
+            data = json.load(f)
+        return data
+    def load_best_epoch_analysis(self, results_folder):
+        """Load best epoch analysis from JSON file (new format)."""
+        analysis_path = results_folder['path'] / 'models' / 'standard_3class' / f"fold_{results_folder['fold']}" / 'best_epoch_analysis.json'
+        if not analysis_path.exists():
+            return None
+        with open(analysis_path, 'r') as f:
+            data = json.load(f)
+        return data
+    def extract_test_metrics_row(self, results_folder, metrics_data):
+        """
+        Extract a row of test metrics for the summary dataframe.
+        Includes both voxel-level and lesion-level metrics.
+        """
+        if metrics_data is None:
+            return None
+        row = {
+            'Variant': results_folder['variant'],
+            'Variant_Name': self.variants[results_folder['variant']],
+            'Fold': results_folder['fold'],
+            'Test_Samples': metrics_data['config']['test_samples']
+        }
+        # ── Voxel-level metrics (unchanged) ─────────────────────────────────
+        for metric_name in ['dice', 'precision', 'recall', 'iou', 'specificity', 'hd95']:
+            metric_data = metrics_data['metrics'][metric_name]
+            for class_idx in range(3):
+                if class_idx != 0:
+                    row[f'{metric_name.upper()}_class_{class_idx}'] = metric_data.get(f'class_{class_idx}')
+            row[f'{metric_name.upper()}_mean'] = metric_data.get('mean')
+        # ── Lesion-level metrics (new — R1C7) ────────────────────────────────
+        lesion_data = metrics_data['metrics'].get('lesion', None)
+        if lesion_data is not None:
+            for class_idx in range(2):   # foreground classes only
+                key = f'class_{class_idx}'
+                cls = lesion_data.get(key, {})
+                # Scalar rates (averaged across patients in inference script)
+                for sk in ['lesion_sensitivity', 'lesion_precision', 'lesion_f1']:
+                    col = f'LESION_{sk.upper()}_class_{class_idx}'
+                    row[col] = cls.get(sk)
+                # Integer counts (summed across patients in inference script)
+                for ck in ['n_gt_lesions', 'n_pred_lesions', 'tp_lesions', 'fn_lesions', 'fp_lesions']:
+                    col = f'LESION_{ck.upper()}_class_{class_idx}'
+                    row[col] = cls.get(ck)
+            # Cross-class summary keys produced by aggregate_patient_metrics()
+            for sk in ['lesion_sensitivity', 'lesion_precision', 'lesion_f1']:
+                row[f'LESION_{sk.upper()}_mean'] = lesion_data.get(f'mean_{sk}')
+            for ck in ['n_gt_lesions', 'n_pred_lesions', 'tp_lesions', 'fn_lesions', 'fp_lesions']:
+                row[f'LESION_{ck.upper()}_total'] = lesion_data.get(f'total_{ck}')
+        return row
+    def extract_training_info_row(self, results_folder, training_data, best_epoch_analysis):
+        """Extract training information including best epoch details."""
+        if training_data is None:
+            return None
+        row = {
+            'Variant': results_folder['variant'],
+            'Variant_Name': self.variants[results_folder['variant']],
+            'Fold': results_folder['fold']
+        }
+        # Try to extract from training_summary.json first
+        if isinstance(training_data, dict) and 'best_epoch_selection' in training_data:
+            row['Best_Epoch'] = training_data['best_epoch_selection']['overall_best_epoch']
+            row['Composite_Score'] = training_data['best_epoch_selection']['composite_score']
+            row['Total_Epochs'] = training_data['training_config']['total_epochs']
+            # Handle valid_epochs (only for Pix2Pix variants with beta scheduling)
+            if 'valid_epochs' in training_data['best_epoch_selection']:
+                row['First_Valid_Epoch'] = training_data['best_epoch_selection']['valid_epochs']['first_valid_epoch']
+                row['Total_Valid_Epochs'] = training_data['best_epoch_selection']['valid_epochs']['total_valid_epochs']
+            else:
+                row['First_Valid_Epoch'] = 1
+                row['Total_Valid_Epochs'] = training_data['training_config']['total_epochs']
+            # Best epoch metrics
+            best_metrics = training_data['best_epoch_metrics']
+            row['Best_Epoch_Val_Loss'] = best_metrics['val_loss']
+            row['Best_Epoch_Dice_Ventricles'] = best_metrics['dice']['class_1']
+            row['Best_Epoch_Dice_Abnormal_WMH'] = best_metrics['dice'].get('class_2', None)
+            row['Best_Epoch_Dice_Mean'] = best_metrics['dice']['mean']
+            # Priority metrics
+            row['Best_Abnormal_Epoch'] = training_data['priority_metrics']['abnormal_wmh']['best_epoch']
+            row['Best_Abnormal_Dice'] = training_data['priority_metrics']['abnormal_wmh']['best_dice']
+            row['Best_Ventricles_Epoch'] = training_data['priority_metrics']['ventricles']['best_epoch']
+            row['Best_Ventricles_Dice'] = training_data['priority_metrics']['ventricles']['best_dice']
+        # Fallback to best_epoch_analysis.json
+        elif best_epoch_analysis is not None:
+            row['Best_Epoch'] = best_epoch_analysis['best_overall_epoch']
+            row['Composite_Score'] = best_epoch_analysis['composite_score']
+            row['Total_Epochs'] = best_epoch_analysis['total_epochs']
+            row['First_Valid_Epoch'] = best_epoch_analysis['first_valid_epoch']
+            row['Total_Valid_Epochs'] = best_epoch_analysis['total_valid_epochs']
+            # Best epoch metrics
+            best_metrics = best_epoch_analysis['best_epoch_metrics']
+            row['Best_Epoch_Val_Loss'] = best_metrics['val_loss']
+            row['Best_Epoch_Dice_Ventricles'] = best_metrics['dice']['class_1']
+            row['Best_Epoch_Dice_Abnormal_WMH'] = best_metrics['dice'].get('class_2', None)
+            row['Best_Epoch_Dice_Mean'] = best_metrics['dice']['mean']
+            # Priority metrics
+            row['Best_Abnormal_Epoch'] = best_epoch_analysis['best_abnormal_epoch']
+            row['Best_Abnormal_Dice'] = best_epoch_analysis['best_abnormal_dice']
+            row['Best_Ventricles_Epoch'] = best_epoch_analysis['best_ventricles_epoch']
+            row['Best_Ventricles_Dice'] = best_epoch_analysis['best_ventricles_dice']
+        # Legacy fallback to history.json
+        elif isinstance(training_data, dict) and 'val_metrics' in training_data:
+            if 'best_epoch_analysis' in training_data:
+                analysis = training_data['best_epoch_analysis']
+                row['Best_Epoch'] = analysis['best_overall_epoch']
+                row['Composite_Score'] = analysis.get('composite_score', None)
+            else:
+                # Find best validation dice
+                val_dice_list = [m['dice']['mean'] for m in training_data['val_metrics']]
+                row['Best_Epoch'] = val_dice_list.index(max(val_dice_list)) + 1
+                row['Composite_Score'] = max(val_dice_list)
+            row['Total_Epochs'] = len(training_data['val_metrics'])
+        return row
+    def create_test_metrics_summary(self):
+        """Create a comprehensive summary of test metrics."""
+        results_folders = self.find_results_folders()
+        if not results_folders:
+            print("No results folders found!")
+            return None
+        rows = []
+        for folder in results_folders:
+            metrics_data = self.load_test_metrics(folder)
+            row = self.extract_test_metrics_row(folder, metrics_data)
+            if row is not None:
+                rows.append(row)
+        df = pd.DataFrame(rows)
+        df = df.sort_values(['Variant', 'Fold']).reset_index(drop=True)
+        return df
+    def create_training_summary(self):
+        """Create a comprehensive summary of training information."""
+        results_folders = self.find_results_folders()
+        if not results_folders:
+            print("No results folders found!")
+            return None
+        rows = []
+        for folder in results_folders:
+            training_data = self.load_training_summary(folder)
+            best_epoch_analysis = self.load_best_epoch_analysis(folder)
+            row = self.extract_training_info_row(folder, training_data, best_epoch_analysis)
+            if row is not None:
+                rows.append(row)
+        df = pd.DataFrame(rows)
+        df = df.sort_values(['Variant', 'Fold']).reset_index(drop=True)
+        return df
+    def create_per_class_summary(self, test_metrics_df):
+        """
+        Create per-class summary statistics across folds for each variant.
+        Includes both voxel-level and lesion-level metrics.
+        """
+        summaries = []
+        for variant in range(self.num_variants +1):
+            variant_data = test_metrics_df[test_metrics_df['Variant'] == variant]
+            if len(variant_data) == 0:
+                continue
+            for class_idx in range(3):
+                if class_idx == 0:
+                    continue
+                class_summary = {
+                    'Variant': variant,
+                    'Variant_Name': self.variants[variant],
+                    'Class': class_idx,
+                    'Class_Name': self.class_names[class_idx]
+                }
+                # Voxel-level metrics
+                for metric in ['DICE', 'PRECISION', 'RECALL', 'IOU', 'SPECIFICITY', 'HD95']:
+                    col_name = f'{metric}_class_{class_idx}'
+                    if col_name in variant_data.columns:
+                        values = variant_data[col_name].dropna().values
+                        class_summary[f'{metric}_mean'] = np.mean(values)
+                        class_summary[f'{metric}_std']  = np.std(values)
+                        class_summary[f'{metric}_min']  = np.min(values)
+                        class_summary[f'{metric}_max']  = np.max(values)
+                # Lesion-level scalar metrics (mean ± std across folds)
+                for sk in ['LESION_SENSITIVITY', 'LESION_PRECISION', 'LESION_F1']:
+                    col_name = f'LESION_{sk}_class_{class_idx}'
+                    if col_name in variant_data.columns:
+                        values = variant_data[col_name].dropna().values
+                        class_summary[f'{sk}_mean'] = np.mean(values) if len(values) else np.nan
+                        class_summary[f'{sk}_std']  = np.std(values)  if len(values) else np.nan
+                # Lesion-level count metrics (sum across folds — total pool)
+                for ck in ['N_GT_LESIONS', 'N_PRED_LESIONS', 'TP_LESIONS', 'FN_LESIONS', 'FP_LESIONS']:
+                    col_name = f'LESION_{ck}_class_{class_idx}'
+                    if col_name in variant_data.columns:
+                        values = variant_data[col_name].dropna().values
+                        class_summary[f'LESION_{ck}_total'] = int(np.sum(values)) if len(values) else 0
+                summaries.append(class_summary)
+        df = pd.DataFrame(summaries)
+        return df
+    def create_variant_comparison(self, test_metrics_df):
+        """
+        Create a variant comparison table with mean ± std across folds.
+        Includes both voxel-level and lesion-level metrics.
+        """
+        comparisons = []
+        for variant in range(self.num_variants + 1):
+            variant_data = test_metrics_df[test_metrics_df['Variant'] == variant]
+            if len(variant_data) == 0:
+                continue
+            comparison = {
+                'Variant': variant,
+                'Variant_Name': self.variants[variant],
+                'N_Folds': len(variant_data)
+            }
+            # ── Voxel-level metrics ──────────────────────────────────────────
+            for metric in ['DICE', 'PRECISION', 'RECALL', 'IOU', 'SPECIFICITY', 'HD95']:
+                # Overall mean across classes
+                col_name = f'{metric}_mean'
+                if col_name in variant_data.columns:
+                    values = variant_data[col_name].dropna().values
+                    comparison[f'{metric}_Mean'] = np.mean(values)
+                    comparison[f'{metric}_Std']  = np.std(values)
+                # Per-class (Ventricles=1, Abnormal_WMH=2)
+                for class_idx in [1, 2]:
+                    col_name = f'{metric}_class_{class_idx}'
+                    if col_name in variant_data.columns:
+                        values = variant_data[col_name].dropna().values
+                        comparison[f'{metric}_Class{class_idx}_Mean'] = np.mean(values)
+                        comparison[f'{metric}_Class{class_idx}_Std']  = np.std(values)
+            # ── Lesion-level scalar metrics (mean ± std across folds) ────────
+            for sk_suffix in ['LESION_SENSITIVITY', 'LESION_PRECISION', 'LESION_F1']:
+                # Cross-class mean
+                col_name = f'LESION_{sk_suffix}_mean'
+                if col_name in variant_data.columns:
+                    values = variant_data[col_name].dropna().values
+                    comparison[f'{sk_suffix}_Mean'] = np.mean(values) if len(values) else np.nan
+                    comparison[f'{sk_suffix}_Std']  = np.std(values)  if len(values) else np.nan
+                # Per-class
+                for class_idx in [2]:
+                    col_name = f'LESION_{sk_suffix}_class_{class_idx}'
+                    if col_name in variant_data.columns:
+                        values = variant_data[col_name].dropna().values
+                        comparison[f'{sk_suffix}_Class{class_idx}_Mean'] = np.mean(values) if len(values) else np.nan
+                        comparison[f'{sk_suffix}_Class{class_idx}_Std']  = np.std(values)  if len(values) else np.nan
+            # ── Lesion-level count metrics (sum across folds) ────────────────
+            for ck in ['N_GT_LESIONS', 'N_PRED_LESIONS', 'TP_LESIONS', 'FN_LESIONS', 'FP_LESIONS']:
+                # Total across all classes
+                col_name = f'LESION_{ck}_total'
+                if col_name in variant_data.columns:
+                    values = variant_data[col_name].dropna().values
+                    comparison[f'LESION_{ck}_Total'] = int(np.sum(values)) if len(values) else 0
+                # Per-class totals
+                for class_idx in [2]:
+                    col_name = f'LESION_{ck}_class_{class_idx}'
+                    if col_name in variant_data.columns:
+                        values = variant_data[col_name].dropna().values
+                        comparison[f'LESION_{ck}_Class{class_idx}_Total'] = int(np.sum(values)) if len(values) else 0
+            comparisons.append(comparison)
+        df = pd.DataFrame(comparisons)
+        return df
+    def create_training_comparison(self, training_df):
+        """Create training comparison showing convergence patterns."""
+        if training_df is None:
+            return None
+        comparisons = []
+        for variant in range(self.num_variants + 1):
+            variant_data = training_df[training_df['Variant'] == variant]
+            if len(variant_data) == 0:
+                continue
+            comparison = {
+                'Variant': variant,
+                'Variant_Name': self.variants[variant],
+                'N_Folds': len(variant_data)
+            }
+            # Best epoch statistics
+            if 'Best_Epoch' in variant_data.columns:
+                comparison['Best_Epoch_Mean'] = np.mean(variant_data['Best_Epoch'].values)
+                comparison['Best_Epoch_Std']  = np.std(variant_data['Best_Epoch'].values)
+                comparison['Best_Epoch_Min']  = np.min(variant_data['Best_Epoch'].values)
+                comparison['Best_Epoch_Max']  = np.max(variant_data['Best_Epoch'].values)
+            # Composite score statistics
+            if 'Composite_Score' in variant_data.columns:
+                comparison['Composite_Score_Mean'] = np.mean(variant_data['Composite_Score'].dropna().values)
+                comparison['Composite_Score_Std']  = np.std(variant_data['Composite_Score'].dropna().values)
+            # Validation metrics at best epoch
+            for metric_col in ['Best_Epoch_Val_Loss', 'Best_Epoch_Dice_Mean',
+                              'Best_Epoch_Dice_Ventricles', 'Best_Epoch_Dice_Abnormal_WMH']:
+                if metric_col in variant_data.columns:
+                    values = variant_data[metric_col].dropna().values
+                    if len(values) > 0:
+                        comparison[f'{metric_col}_Mean'] = np.mean(values)
+                        comparison[f'{metric_col}_Std']  = np.std(values)
+            comparisons.append(comparison)
+        df = pd.DataFrame(comparisons)
+        return df
+    def generate_all_summaries(self, output_dir='./folds_results'):
+        """Generate all summary CSV files."""
+        output_path = Path(output_dir)
+        output_path.mkdir(exist_ok=True)
+        print("=" * 80)
+        print("RESULTS AGGREGATION STARTED")
+        print("=" * 80)
+        # 1. Test Metrics Summary (all variants, all folds)
+        print("\n1. Generating test metrics summary...")
+        test_metrics_df = self.create_test_metrics_summary()
+        if test_metrics_df is not None:
+            output_file = output_path / 'test_metrics_all_variants_folds.csv'
+            test_metrics_df.to_csv(output_file, index=False)
+            print(f"   ✓ Saved: {output_file}")
+            print(f"   - Shape: {test_metrics_df.shape}")
+        # 2. Training Summary
+        print("\n2. Generating training summary...")
+        training_df = self.create_training_summary()
+        if training_df is not None:
+            output_file = output_path / 'training_info_all_variants_folds.csv'
+            training_df.to_csv(output_file, index=False)
+            print(f"   ✓ Saved: {output_file}")
+            print(f"   - Shape: {training_df.shape}")
+        # 3. Per-Class Summary
+        print("\n3. Generating per-class summary...")
+        per_class_df = None
+        if test_metrics_df is not None:
+            per_class_df = self.create_per_class_summary(test_metrics_df)
+            output_file = output_path / 'per_class_summary.csv'
+            per_class_df.to_csv(output_file, index=False)
+            print(f"   ✓ Saved: {output_file}")
+            print(f"   - Shape: {per_class_df.shape}")
+        # 4. Variant Comparison (Test Metrics)
+        print("\n4. Generating variant comparison (test metrics)...")
+        variant_comparison_df = None
+        if test_metrics_df is not None:
+            variant_comparison_df = self.create_variant_comparison(test_metrics_df)
+            output_file = output_path / 'variant_comparison_test.csv'
+            variant_comparison_df.to_csv(output_file, index=False)
+            print(f"   ✓ Saved: {output_file}")
+            print(f"   - Shape: {variant_comparison_df.shape}")
+        # 5. Variant Comparison (Training)
+        print("\n5. Generating variant comparison (training)...")
+        training_comparison_df = None
+        if training_df is not None:
+            training_comparison_df = self.create_training_comparison(training_df)
+            if training_comparison_df is not None:
+                output_file = output_path / 'variant_comparison_training.csv'
+                training_comparison_df.to_csv(output_file, index=False)
+                print(f"   ✓ Saved: {output_file}")
+                print(f"   - Shape: {training_comparison_df.shape}")
+        print("\n" + "=" * 80)
+        print("AGGREGATION COMPLETE")
+        print("=" * 80)
+        return {
+            'test_metrics': test_metrics_df,
+            'training_info': training_df,
+            'per_class': per_class_df,
+            'variant_comparison_test': variant_comparison_df,
+            'variant_comparison_training': training_comparison_df
+        }
+    def print_summary_statistics(self, dfs):
+        """Print summary statistics to console."""
+        print("\n" + "=" * 80)
+        print("SUMMARY STATISTICS")
+        print("=" * 80)
+        if dfs['variant_comparison_test'] is not None:
+            # ── Voxel-level Dice ─────────────────────────────────────────────
+            print("\n📊 TEST DICE SCORES (Mean ± Std) across folds:")
+            print("-" * 80)
+            for _, row in dfs['variant_comparison_test'].iterrows():
+                print(f"\nVariant {row['Variant']}: {row['Variant_Name']}")
+                print(f"  Overall:        {row['DICE_Mean']:.4f} ± {row['DICE_Std']:.4f}")
+                print(f"  Ventricles:     {row['DICE_Class1_Mean']:.4f} ± {row['DICE_Class1_Std']:.4f}")
+                print(f"  Abnormal WMH:   {row['DICE_Class2_Mean']:.4f} ± {row['DICE_Class2_Std']:.4f}")
+            # ── Lesion-level metrics ─────────────────────────────────────────
+            lesion_cols_present = any(
+                col.startswith('LESION_') for col in dfs['variant_comparison_test'].columns
+            )
+            if lesion_cols_present:
+                print("\n\n🔬 LESION-LEVEL METRICS (Mean ± Std) across folds:")
+                print("-" * 80)
+                for _, row in dfs['variant_comparison_test'].iterrows():
+                    print(f"\nVariant {row['Variant']}: {row['Variant_Name']}")
+                    # Per-class
+                    for class_idx, class_name in [(2, 'Abnormal WMH')]:
+                        sens_col  = f'LESION_LESION_SENSITIVITY_Class{class_idx}_Mean'
+                        prec_col  = f'LESION_LESION_PRECISION_Class{class_idx}_Mean'
+                        f1_col    = f'LESION_LESION_F1_Class{class_idx}_Mean'
+                        tp_col    = f'LESION_TP_LESIONS_Class{class_idx}_Total'
+                        fp_col    = f'LESION_FP_LESIONS_Class{class_idx}_Total'
+                        fn_col    = f'LESION_FN_LESIONS_Class{class_idx}_Total'
+                        gt_col    = f'LESION_N_GT_LESIONS_Class{class_idx}_Total'
+                        print(f"  [{class_name}]")
+                        if sens_col in row:
+                            s_m  = f"{row[sens_col]:.4f}" if pd.notna(row.get(sens_col)) else 'N/A'
+                            s_s  = f"{row.get(f'LESION_LESION_SENSITIVITY_Class{class_idx}_Std', float('nan')):.4f}"
+                            p_m  = f"{row[prec_col]:.4f}" if pd.notna(row.get(prec_col)) else 'N/A'
+                            p_s  = f"{row.get(f'LESION_LESION_PRECISION_Class{class_idx}_Std', float('nan')):.4f}"
+                            f_m  = f"{row[f1_col]:.4f}"  if pd.notna(row.get(f1_col))   else 'N/A'
+                            f_s  = f"{row.get(f'LESION_LESION_F1_Class{class_idx}_Std', float('nan')):.4f}"
+                            print(f"    Sensitivity : {s_m} ± {s_s}")
+                            print(f"    Precision   : {p_m} ± {p_s}")
+                            print(f"    F1          : {f_m} ± {f_s}")
+                        if gt_col in row:
+                            print(f"    GT Lesions  : {int(row.get(gt_col, 0))}   "
+                                  f"TP: {int(row.get(tp_col, 0))}   "
+                                  f"FP: {int(row.get(fp_col, 0))}   "
+                                  f"FN: {int(row.get(fn_col, 0))}")
+        if dfs['variant_comparison_training'] is not None:
+            print("\n\n🏆 TRAINING CONVERGENCE:")
+            print("-" * 80)
+            for _, row in dfs['variant_comparison_training'].iterrows():
+                print(f"\nVariant {row['Variant']}: {row['Variant_Name']}")
+                if 'Best_Epoch_Mean' in row:
+                    print(f"  Best Epoch:     {row['Best_Epoch_Mean']:.1f} ± {row['Best_Epoch_Std']:.1f}")
+                if 'Best_Epoch_Dice_Abnormal_WMH_Mean' in row:
+                    print(f"  Val Abnormal:   {row['Best_Epoch_Dice_Abnormal_WMH_Mean']:.4f} ± {row['Best_Epoch_Dice_Abnormal_WMH_Std']:.4f}")
+# Main execution
+if __name__ == "__main__":
+    # Initialize aggregator
+    aggregator = ResultsAggregator(base_dir='./')
+    # Generate all summaries
+    dfs = aggregator.generate_all_summaries(output_dir='./folds_results_zscore2_all')
+    # Print summary statistics
+    aggregator.print_summary_statistics(dfs)
+    print("\n✓ All CSV files have been generated in './folds_results_zscore2_all' directory")
+    print("\nGenerated files:")
+    print("  1. test_metrics_all_variants_folds.csv - Complete test metrics (voxel + lesion level)")
+    print("  2. training_info_all_variants_folds.csv - Training convergence info")
+    print("  3. per_class_summary.csv - Per-class statistics (voxel + lesion level)")
+    print("  4. variant_comparison_test.csv - Test metrics comparison (voxel + lesion level)")
+    print("  5. variant_comparison_training.csv - Training comparison")

models/for_WMH_Vent/model_training_scripts/p4_inference.py ADDED Viewed

	@@ -0,0 +1,1146 @@

+"""
+P4 Article - Inference Script for ventricles and WMH segmentation task
+Developer:
+Mahdi Bashiri Bawil
+"""
+import tensorflow as tf
+import os
+from collections import defaultdict
+import numpy as np
+import matplotlib.pyplot as plt
+from pathlib import Path
+from tqdm import tqdm
+import json
+import nibabel as nib
+import seaborn as sns
+from sklearn.metrics import confusion_matrix, cohen_kappa_score, classification_report
+from scipy.spatial.distance import directed_hausdorff
+from scipy.ndimage import distance_transform_edt
+from scipy.spatial.distance import cdist
+from scipy.ndimage import binary_erosion
+from scipy.ndimage import label as nd_label
+from unet_model import build_unet_3class # must be updated with the actual used model for traininig
+# Import data loader
+from p4_data_loader import DataConfig, P2DataLoader
+# Error analysis
+from p4_error_analysis import run_error_analysis
+print("TensorFlow Version:", tf.__version__)
+###################### GPU Configuration ######################
+# Configure GPU memory growth
+physical_devices = tf.config.list_physical_devices('GPU')
+if physical_devices:
+    try:
+        for device in physical_devices:
+            tf.config.experimental.set_memory_growth(device, True)
+        print("✅ GPU memory growth enabled")
+        print(f"   Available GPUs: {len(physical_devices)}")
+    except RuntimeError as e:
+        print(f"GPU configuration error: {e}")
+else:
+    print("⚠️  No GPU detected - inference will be slow")
+###################### Inference Configuration ######################
+class InferenceConfig:
+    """Configuration for inference"""
+    def __init__(self,
+                 variant: int = 5,
+                 preprocessing: str = 'standard',
+                 class_scenario: str = '4class',
+                 fold_id: int = 0,
+                 model_name: str = 'best_dice_generator.h5',
+                 architecture_name: str = 'unet'
+                 ):
+        # Experiment identification
+        self.variant = variant
+        self.preprocessing = preprocessing
+        self.class_scenario = class_scenario
+        self.fold_id = fold_id
+        self.model_name = model_name
+        self.architecture_name = architecture_name
+        # Number of classes
+        self.num_classes = 3 if class_scenario == '3class' else 4
+        # Class names
+        if self.num_classes == 4:
+            self.class_names = ['Background', 'Ventricles', 'Normal_WMH', 'Abnormal_WMH']
+        elif self.num_classes == 3:
+            self.class_names = ['Background', 'Ventricles', 'Abnormal_WMH']
+        # Image dimensions
+        self.batch_size = 1  # Use batch_size=1 for inference
+        self.img_width = 256
+        self.img_height = 256
+        # Paths
+        self.results_dir = Path(f"results_fold_{fold_id}_var_{variant}_zscore2")
+        self.models_dir = self.results_dir / "models" / f"{preprocessing}_{class_scenario}"
+        self.checkpoint_dir = self.models_dir / f"fold_{fold_id}"
+        # Output directories
+        self.inference_dir = self.results_dir / "inference_all_test" / f"{preprocessing}_{class_scenario}"
+        # self.predictions_dir = self.inference_dir / "predictions"
+        self.visualizations_dir = self.inference_dir / "visualizations"
+        self.metrics_dir = self.inference_dir / "metrics"
+        # Create directories
+        # self.predictions_dir.mkdir(parents=True, exist_ok=True)
+        self.visualizations_dir.mkdir(parents=True, exist_ok=True)
+        self.metrics_dir.mkdir(parents=True, exist_ok=True)
+        # Model path
+        self.model_path = self.checkpoint_dir / self.model_name
+        # Check if model exists
+        if not self.model_path.exists():
+            raise FileNotFoundError(f"Model not found: {self.model_path}")
+        print(f"\n{'='*70}")
+        print(f"INFERENCE CONFIGURATION")
+        print(f"{'='*70}")
+        print(f"Variant: {self.variant}")
+        print(f"Preprocessing: {self.preprocessing}")
+        print(f"Class scenario: {self.class_scenario} ({self.num_classes} classes)")
+        print(f"Fold: {self.fold_id}")
+        print(f"Architecture: {self.architecture_name}")
+        print(f"Model: {self.model_name}")
+        print(f"Model path: {self.model_path}")
+        print(f"Output directory: {self.inference_dir}")
+        print(f"{'='*70}\n")
+###################### Utility Functions ######################
+def prepare_input(paired_input):
+    """
+    Extract and normalize FLAIR from paired input
+    Args:
+        paired_input: (bs, 256, 512, 1) with FLAIR + mask
+    Returns:
+        flair_normalized: FLAIR normalized to [-1, 1]
+    """
+    # Extract FLAIR (left half)
+    flair_normalized = paired_input[:, :, :256, :]
+    return flair_normalized
+def compute_hd95(mask1, mask2):
+    """
+    Compute 95th percentile Hausdorff Distance between two binary masks
+    Args:
+        mask1: Binary mask 1
+        mask2: Binary mask 2
+    Returns:
+        HD95 value in pixels
+    """
+    # Get boundary points
+    if not np.any(mask1) or not np.any(mask2):
+        return np.nan
+    # Compute distance transforms
+    dt1 = distance_transform_edt(~mask1.astype(bool))
+    dt2 = distance_transform_edt(~mask2.astype(bool))
+    # Get surface points
+    surface1 = mask1.astype(bool) & (dt1 <= 1)
+    surface2 = mask2.astype(bool) & (dt2 <= 1)
+    if not np.any(surface1) or not np.any(surface2):
+        return np.nan
+    # Get coordinates of surface points
+    coords1 = np.argwhere(surface1)
+    coords2 = np.argwhere(surface2)
+    # Compute distances from surface1 to surface2
+    distances1 = np.min(np.sqrt(np.sum((coords1[:, np.newaxis, :] - coords2[np.newaxis, :, :]) ** 2, axis=2)), axis=1)
+    # Compute distances from surface2 to surface1
+    distances2 = np.min(np.sqrt(np.sum((coords2[:, np.newaxis, :] - coords1[np.newaxis, :, :]) ** 2, axis=2)), axis=1)
+    # Combine distances
+    all_distances = np.concatenate([distances1, distances2])
+    # Return 95th percentile
+    return np.percentile(all_distances, 95)
+def compute_hd95_3d(mask1, mask2):
+    """
+    Compute 95th percentile Hausdorff Distance for 3D volume
+    Uses only surface voxels for efficiency
+    Args:
+        mask1: Binary mask (N, H, W)
+        mask2: Binary mask (N, H, W)
+    Returns:
+        HD95 value in pixels
+    """
+    if not np.any(mask1) or not np.any(mask2):
+        return np.nan
+    # Extract surface voxels only (border voxels)
+    from scipy.ndimage import binary_erosion
+    # Surface = original mask minus eroded mask
+    surface1 = mask1.astype(bool) & ~binary_erosion(mask1.astype(bool))
+    surface2 = mask2.astype(bool) & ~binary_erosion(mask2.astype(bool))
+    # Get surface coordinates
+    coords1 = np.argwhere(surface1)
+    coords2 = np.argwhere(surface2)
+    if len(coords1) == 0 or len(coords2) == 0:
+        return np.nan
+    # Subsample if still too large (>10k points each)
+    max_points = 10000
+    if len(coords1) > max_points:
+        idx1 = np.random.choice(len(coords1), max_points, replace=False)
+        coords1 = coords1[idx1]
+    if len(coords2) > max_points:
+        idx2 = np.random.choice(len(coords2), max_points, replace=False)
+        coords2 = coords2[idx2]
+    # Compute distances
+    distances1 = np.min(cdist(coords1, coords2, metric='euclidean'), axis=1)
+    distances2 = np.min(cdist(coords2, coords1, metric='euclidean'), axis=1)
+    # Combine all distances
+    all_distances = np.concatenate([distances1, distances2])
+    # Return 95th percentile
+    return np.percentile(all_distances, 95)
+def compute_lesion_level_metrics(gt_volume, pred_volume, iou_threshold=0.1):
+    """
+    Compute lesion-level (instance-level) metrics by treating each connected
+    component in the GT as an individual lesion.
+    A GT lesion is considered DETECTED if its overlap (IoU) with any single
+    predicted component exceeds `iou_threshold`.
+    A predicted component is a TRUE POSITIVE if it overlaps any GT lesion
+    above threshold, otherwise it is a FALSE POSITIVE lesion.
+    Args:
+        gt_volume   : binary 3-D numpy array (S, H, W) — ground truth for ONE class
+        pred_volume : binary 3-D numpy array (S, H, W) — prediction for ONE class
+        iou_threshold: minimum IoU to count a GT lesion as detected (default 0.1)
+    Returns:
+        dict with keys:
+            n_gt_lesions      : total number of GT lesions
+            n_pred_lesions    : total number of predicted lesion clusters
+            tp_lesions        : GT lesions that were detected
+            fn_lesions        : GT lesions that were missed
+            fp_lesions        : predicted clusters with no GT overlap
+            lesion_sensitivity: tp_lesions / n_gt_lesions
+            lesion_precision  : tp_lesions / n_pred_lesions
+            lesion_f1         : harmonic mean of lesion sensitivity and precision
+    """
+    gt_bin   = gt_volume.astype(bool)
+    pred_bin = pred_volume.astype(bool)
+    # Label connected components
+    gt_labeled,   n_gt   = nd_label(gt_bin)
+    pred_labeled, n_pred = nd_label(pred_bin)
+    tp_lesions = 0
+    detected_pred_ids = set()
+    for gt_id in range(1, n_gt + 1):
+        gt_mask = (gt_labeled == gt_id)
+        # Find all predicted components that overlap this GT lesion
+        overlapping_pred_ids = np.unique(pred_labeled[gt_mask])
+        overlapping_pred_ids = overlapping_pred_ids[overlapping_pred_ids > 0]
+        detected = False
+        for pred_id in overlapping_pred_ids:
+            pred_mask = (pred_labeled == pred_id)
+            intersection = np.logical_and(gt_mask, pred_mask).sum()
+            union        = np.logical_or(gt_mask, pred_mask).sum()
+            iou = intersection / (union + 1e-7)
+            if iou >= iou_threshold:
+                detected = True
+                detected_pred_ids.add(pred_id)
+        if detected:
+            tp_lesions += 1
+    fn_lesions = n_gt   - tp_lesions
+    fp_lesions = n_pred - len(detected_pred_ids)
+    lesion_sensitivity = tp_lesions / (n_gt   + 1e-7)
+    lesion_precision   = tp_lesions / (n_pred + 1e-7) if n_pred > 0 else 0.0
+    lesion_f1 = (2 * lesion_sensitivity * lesion_precision /
+                 (lesion_sensitivity + lesion_precision + 1e-7))
+    return {
+        'n_gt_lesions'       : int(n_gt),
+        'n_pred_lesions'     : int(n_pred),
+        'tp_lesions'         : int(tp_lesions),
+        'fn_lesions'         : int(fn_lesions),
+        'fp_lesions'         : int(fp_lesions),
+        'lesion_sensitivity' : float(lesion_sensitivity),
+        'lesion_precision'   : float(lesion_precision),
+        'lesion_f1'          : float(lesion_f1),
+    }
+def compute_metrics_from_predictions(y_true, y_pred, num_classes, exclude_class=None):
+    """
+    Compute comprehensive metrics from predictions
+    Args:
+        y_true: Ground truth class labels (N, H, W)
+        y_pred: Predicted class labels (N, H, W)
+        num_classes: Number of classes
+        exclude_class: Class to exclude from metrics (e.g., 2 for Normal_WMH in 4-class)
+    Returns:
+        Dictionary containing metrics
+    """
+    # Convert to one-hot
+    y_true_onehot = tf.one_hot(y_true, depth=num_classes, dtype=tf.float32)
+    y_pred_onehot = tf.one_hot(y_pred, depth=num_classes, dtype=tf.float32)
+    # Flatten spatial dimensions
+    y_true_flat = tf.reshape(y_true_onehot, [-1, num_classes])
+    y_pred_flat = tf.reshape(y_pred_onehot, [-1, num_classes])
+    # Convert to numpy
+    y_true_np = y_true_flat.numpy()
+    y_pred_np = y_pred_flat.numpy()
+    metrics = {
+        'dice': {},
+        'precision': {},
+        'recall': {},
+        'iou': {},
+        'specificity': {},
+        'hd95': {},
+        'TP': {}
+    }
+    classes_to_evaluate = [c for c in range(num_classes) if c != exclude_class]
+    for class_idx in classes_to_evaluate:
+        # Extract binary masks for this class
+        true_class = y_true_np[:, class_idx]
+        pred_class = y_pred_np[:, class_idx]
+        # Compute confusion matrix elements
+        TP = np.sum((true_class == 1) & (pred_class == 1))
+        FP = np.sum((true_class == 0) & (pred_class == 1))
+        FN = np.sum((true_class == 1) & (pred_class == 0))
+        TN = np.sum((true_class == 0) & (pred_class == 0))
+        # Dice Score: 2*TP / (2*TP + FP + FN)
+        dice = (2 * TP) / (2 * TP + FP + FN + 1e-7)
+        # Precision: TP / (TP + FP)
+        precision = TP / (TP + FP + 1e-7)
+        # Recall (Sensitivity): TP / (TP + FN)
+        recall = TP / (TP + FN + 1e-7)
+        # IoU (Jaccard): TP / (TP + FP + FN)
+        iou = TP / (TP + FP + FN + 1e-7)
+        # Specificity: TN / (TN + FP)
+        specificity = TN / (TN + FP + 1e-7)
+        # HD95: Hausdorff Distance 95th percentile
+        # Compute on entire volume (all samples combined) for fairness
+        true_class_volume = y_true_np[:, class_idx].reshape(y_true.shape[0], y_true.shape[1], y_true.shape[2])
+        pred_class_volume = y_pred_np[:, class_idx].reshape(y_pred.shape[0], y_pred.shape[1], y_pred.shape[2])
+        hd95_value = compute_hd95_3d(true_class_volume, pred_class_volume)
+        metrics['dice'][f'class_{class_idx}'] = float(dice)
+        metrics['precision'][f'class_{class_idx}'] = float(precision)
+        metrics['recall'][f'class_{class_idx}'] = float(recall)
+        metrics['iou'][f'class_{class_idx}'] = float(iou)
+        metrics['specificity'][f'class_{class_idx}'] = float(specificity)
+        metrics['hd95'][f'class_{class_idx}'] = float(hd95_value)
+        metrics['TP'][f'class_{class_idx}'] = float(TP)
+    # Compute mean metrics (excluding the excluded class)
+    for metric_name in ['dice', 'precision', 'recall', 'iou', 'specificity', 'hd95', 'TP']:
+        metrics[metric_name]['mean'] = np.mean([v for v in metrics[metric_name].values()])
+    # --- Lesion-level metrics (connected-component analysis) ---
+    metrics['lesion'] = {}
+    for class_idx in classes_to_evaluate:
+        if class_idx <= 1:   # skip background and ventricles
+            continue
+        true_vol = y_true_np[:, class_idx].reshape(y_true.shape)
+        pred_vol = y_pred_np[:, class_idx].reshape(y_pred.shape)
+        metrics['lesion'][f'class_{class_idx}'] = compute_lesion_level_metrics(
+            true_vol, pred_vol, iou_threshold=0.1
+        )
+    return metrics
+# def aggregate_patient_metrics(per_patient_metrics, num_classes):
+#     """
+#     Returns both a flat structure (compatible with original overall_metrics)
+#     and an extended structure with std/n for richer reporting.
+#     """
+#     flat_metrics = {m: {} for m in ['dice', 'precision', 'recall', 'iou', 'specificity', 'hd95', 'TP']}
+#     rich_metrics = {m: {} for m in ['dice', 'precision', 'recall', 'iou', 'specificity', 'hd95', 'TP']}
+#     metric_names = ['dice', 'precision', 'recall', 'iou', 'specificity', 'hd95', 'TP']
+#     for metric_name in metric_names:
+#         for class_idx in range(num_classes):
+#             if class_idx == 0: continue
+#             key = f'class_{class_idx}'
+#             values = [
+#                 per_patient_metrics[pid][metric_name][key]
+#                 for pid in per_patient_metrics
+#                 if key in per_patient_metrics[pid][metric_name]
+#                 and not np.isnan(per_patient_metrics[pid][metric_name][key])
+#             ]
+#             TP_values = [
+#                 per_patient_metrics[pid]['TP'][key]
+#                 for pid in per_patient_metrics
+#                 if key in per_patient_metrics[pid]['TP']
+#                 and not np.isnan(per_patient_metrics[pid]['TP'][key])
+#             ]
+#             weighted_mean_values = np.sum((np.array(values) * np.array(TP_values)) / np.sum(np.array(TP_values)))
+#             mean_val = float(np.mean(values)) if values else np.nan
+#             std_val  = float(np.std(values))  if values else np.nan
+#             # Flat: backward compatible with all existing print/save code
+#             flat_metrics[metric_name][key] = weighted_mean_values if metric_name != 'hd95' else mean_val
+#             # Rich: for extended reporting
+#             rich_metrics[metric_name][key] = {
+#                 'mean': mean_val,
+#                 'std':  std_val,
+#                 'n':    len(values)
+#             }
+#         # Mean across classes — same for both
+#         class_means = [
+#             flat_metrics[metric_name][f'class_{c}']
+#             for c in range(num_classes)
+#             if c!=0 and not np.isnan(flat_metrics[metric_name][f'class_{c}'])
+#         ]
+#         mean_across_classes = float(np.mean(class_means)) if class_means else np.nan
+#         flat_metrics[metric_name]['mean'] = mean_across_classes
+#         rich_metrics[metric_name]['mean'] = mean_across_classes
+#     return flat_metrics, rich_metrics
+def aggregate_patient_metrics(per_patient_metrics, num_classes):
+    """
+    Returns both a flat structure (compatible with original overall_metrics)
+    and an extended structure with std/n for richer reporting.
+    Includes lesion-level metrics (connected-component analysis):
+        - lesion_sensitivity : mean across patients of (tp_lesions / n_gt_lesions)
+        - lesion_precision   : mean across patients of (tp_lesions / n_pred_lesions)
+        - lesion_f1          : mean across patients of harmonic mean of the above
+        - n_gt_lesions       : total GT lesions summed across all patients
+        - n_pred_lesions     : total predicted lesion clusters summed across all patients
+        - tp_lesions         : total TP lesions summed across all patients
+        - fn_lesions         : total FN lesions summed across all patients
+        - fp_lesions         : total FP lesions summed across all patients
+    """
+    # ── Voxel-level metrics (unchanged) ─────────────────────────────────────
+    voxel_metric_names = ['dice', 'precision', 'recall', 'iou', 'specificity', 'hd95', 'TP']
+    flat_metrics = {m: {} for m in voxel_metric_names}
+    rich_metrics = {m: {} for m in voxel_metric_names}
+    for metric_name in voxel_metric_names:
+        for class_idx in range(num_classes):
+            if class_idx == 0:
+                continue
+            key = f'class_{class_idx}'
+            values = [
+                per_patient_metrics[pid][metric_name][key]
+                for pid in per_patient_metrics
+                if key in per_patient_metrics[pid][metric_name]
+                and not np.isnan(per_patient_metrics[pid][metric_name][key])
+            ]
+            TP_values = [
+                per_patient_metrics[pid]['TP'][key]
+                for pid in per_patient_metrics
+                if key in per_patient_metrics[pid]['TP']
+                and not np.isnan(per_patient_metrics[pid]['TP'][key])
+            ]
+            weighted_mean_values = np.sum(
+                (np.array(values) * np.array(TP_values)) / np.sum(np.array(TP_values))
+            )
+            mean_val = float(np.mean(values)) if values else np.nan
+            std_val  = float(np.std(values))  if values else np.nan
+            flat_metrics[metric_name][key] = weighted_mean_values if metric_name != 'hd95' else mean_val
+            rich_metrics[metric_name][key] = {
+                'mean': mean_val,
+                'std':  std_val,
+                'n':    len(values)
+            }
+        # Mean across classes
+        class_means = [
+            flat_metrics[metric_name][f'class_{c}']
+            for c in range(num_classes)
+            if c != 0 and not np.isnan(flat_metrics[metric_name][f'class_{c}'])
+        ]
+        mean_across_classes = float(np.mean(class_means)) if class_means else np.nan
+        flat_metrics[metric_name]['mean'] = mean_across_classes
+        rich_metrics[metric_name]['mean'] = mean_across_classes
+    # ── Lesion-level metrics (new) ───────────────────────────────────────────
+    # Scalar fields: averaged across patients (mean ± std)
+    lesion_scalar_keys = ['lesion_sensitivity', 'lesion_precision', 'lesion_f1']
+    # Count fields: summed across patients (total pool)
+    lesion_count_keys  = ['n_gt_lesions', 'n_pred_lesions', 'tp_lesions', 'fn_lesions', 'fp_lesions']
+    flat_metrics['lesion'] = {}
+    rich_metrics['lesion'] = {}
+    for class_idx in range(num_classes):
+        if class_idx <= 1:   # skip background and ventricles
+            continue
+        key = f'class_{class_idx}'
+        flat_metrics['lesion'][key] = {}
+        rich_metrics['lesion'][key] = {}
+        # --- Scalar metrics: mean ± std across patients ---
+        for sk in lesion_scalar_keys:
+            vals = [
+                per_patient_metrics[pid]['lesion'][key][sk]
+                for pid in per_patient_metrics
+                if 'lesion' in per_patient_metrics[pid]
+                and key in per_patient_metrics[pid]['lesion']
+            ]
+            mean_val = float(np.mean(vals)) if vals else np.nan
+            std_val  = float(np.std(vals))  if vals else np.nan
+            flat_metrics['lesion'][key][sk] = mean_val
+            rich_metrics['lesion'][key][sk] = {
+                'mean': mean_val,
+                'std':  std_val,
+                'n':    len(vals)
+            }
+        # --- Count metrics: sum across patients ---
+        for ck in lesion_count_keys:
+            vals = [
+                per_patient_metrics[pid]['lesion'][key][ck]
+                for pid in per_patient_metrics
+                if 'lesion' in per_patient_metrics[pid]
+                and key in per_patient_metrics[pid]['lesion']
+            ]
+            flat_metrics['lesion'][key][ck] = int(np.sum(vals)) if vals else 0
+            rich_metrics['lesion'][key][ck] = int(np.sum(vals)) if vals else 0
+    # Mean lesion scalars across foreground classes
+    for sk in lesion_scalar_keys:
+        class_vals = [
+            flat_metrics['lesion'][f'class_{c}'][sk]
+            for c in range(num_classes)
+            if c > 1 and not np.isnan(flat_metrics['lesion'][f'class_{c}'][sk])
+        ]
+        mean_across = float(np.mean(class_vals)) if class_vals else np.nan
+        flat_metrics['lesion'][f'mean_{sk}'] = mean_across
+        rich_metrics['lesion'][f'mean_{sk}'] = mean_across
+    # Summed counts across foreground classes
+    for ck in lesion_count_keys:
+        flat_metrics['lesion'][f'total_{ck}'] = int(np.sum([
+            flat_metrics['lesion'][f'class_{c}'][ck]
+            for c in range(num_classes) if c > 1
+        ]))
+        rich_metrics['lesion'][f'total_{ck}'] = flat_metrics['lesion'][f'total_{ck}']
+    return flat_metrics, rich_metrics
+###################### Original Visualization Functions ######################
+def visualize_prediction(flair, ground_truth, prediction,
+                        probability_map, save_path,
+                        sample_id, num_classes):
+    """
+    Create comprehensive visualization of prediction
+    Args:
+        flair: Input FLAIR image (H, W)
+        ground_truth: Ground truth mask (H, W)
+        prediction: Predicted mask (H, W)
+        probability_map: Max probability map (H, W)
+        save_path: Path to save figure
+        sample_id: Sample identifier
+        num_classes: Number of classes
+    """
+    fig, axes = plt.subplots(2, 3, figsize=(18, 12))
+    # Input FLAIR
+    axes[0, 0].imshow(flair, cmap='gray')
+    axes[0, 0].set_title('Input FLAIR', fontsize=14, fontweight='bold')
+    axes[0, 0].axis('off')
+    # Ground truth
+    im1 = axes[0, 1].imshow(ground_truth, cmap='jet', vmin=0, vmax=num_classes-1)
+    axes[0, 1].set_title('Ground Truth', fontsize=14, fontweight='bold')
+    axes[0, 1].axis('off')
+    plt.colorbar(im1, ax=axes[0, 1], fraction=0.046, pad=0.04)
+    # Prediction
+    im2 = axes[0, 2].imshow(prediction, cmap='jet', vmin=0, vmax=num_classes-1)
+    axes[0, 2].set_title('Prediction', fontsize=14, fontweight='bold')
+    axes[0, 2].axis('off')
+    plt.colorbar(im2, ax=axes[0, 2], fraction=0.046, pad=0.04)
+    # Max probability
+    im3 = axes[1, 0].imshow(probability_map, cmap='viridis', vmin=0, vmax=1)
+    axes[1, 0].set_title('Prediction Confidence', fontsize=14, fontweight='bold')
+    axes[1, 0].axis('off')
+    plt.colorbar(im3, ax=axes[1, 0], fraction=0.046, pad=0.04)
+    # Error map
+    error_map = (prediction != ground_truth).astype(float)
+    im4 = axes[1, 1].imshow(error_map, cmap='Reds', vmin=0, vmax=1)
+    axes[1, 1].set_title('Error Map (Red=Wrong)', fontsize=14, fontweight='bold')
+    axes[1, 1].axis('off')
+    plt.colorbar(im4, ax=axes[1, 1], fraction=0.046, pad=0.04)
+    # Overlay: FLAIR + Prediction contours
+    axes[1, 2].imshow(flair, cmap='gray')
+    # Create contours for each class
+    from scipy import ndimage
+    for class_idx in range(1, num_classes):  # Skip background
+        class_mask = (prediction == class_idx)
+        contours = class_mask ^ ndimage.binary_erosion(class_mask)
+        if np.any(contours):
+            axes[1, 2].contour(contours, colors=[plt.cm.jet(class_idx/(num_classes-1))], linewidths=1.5)
+    axes[1, 2].set_title('FLAIR + Prediction Overlay', fontsize=14, fontweight='bold')
+    axes[1, 2].axis('off')
+    plt.suptitle(f'Sample: {sample_id}', fontsize=16, fontweight='bold', y=0.98)
+    plt.tight_layout()
+    plt.savefig(save_path, dpi=150, bbox_inches='tight')
+    plt.close()
+def visualize_prediction_short(flair, ground_truth, prediction,
+                        probability_map, save_path,
+                        sample_id, num_classes):
+    """
+    Create comprehensive visualization of prediction
+    Args:
+        flair: Input FLAIR image (H, W)
+        ground_truth: Ground truth mask (H, W)
+        prediction: Predicted mask (H, W)
+        probability_map: Max probability map (H, W)
+        save_path: Path to save figure
+        sample_id: Sample identifier
+        num_classes: Number of classes
+    """
+    fig, axes = plt.subplots(2, 1, figsize=(6, 12))
+    cmap = plt.cm.jet
+    flair_norm = (flair - flair.min()) / (flair.max() - flair.min() + 1e-8)
+    flair_rgb = np.stack([flair_norm] * 3, axis=-1)
+    for ax, mask, title in zip(axes, [ground_truth, prediction], ['Ground Truth Overlay', 'Prediction Overlay']):
+        mask_rgb = cmap(mask / (num_classes - 1))[..., :3]  # (H, W, 3)
+        foreground = mask > 0
+        alpha = np.where(foreground, 0.6, 0.0)[..., np.newaxis]  # fade non-background
+        blended = flair_rgb * (1 - alpha) + mask_rgb * alpha
+        ax.imshow(blended)
+        # ax.set_title(title, fontsize=14, fontweight='bold')
+        ax.axis('off')
+    # Shared colorbar
+    sm = plt.cm.ScalarMappable(cmap=cmap, norm=plt.Normalize(vmin=0, vmax=num_classes - 1))
+    sm.set_array([])
+    # fig.colorbar(sm, ax=axes.ravel().tolist(), fraction=0.02, pad=0.04)
+    # plt.suptitle(f'Sample: {sample_id}', fontsize=16, fontweight='bold')
+    plt.tight_layout()
+    try:
+        plt.savefig(save_path, dpi=150, bbox_inches='tight')
+    except:
+        print(f"\n Unsaved image: {save_path}")
+    plt.close()
+def save_prediction_as_nifti(prediction, save_path, reference_nifti=None):
+    """
+    Save prediction as NIfTI file
+    Args:
+        prediction: Prediction array (H, W) or (H, W, D)
+        save_path: Path to save NIfTI file
+        reference_nifti: Optional reference NIfTI for header info
+    """
+    if reference_nifti is not None:
+        # Use reference header
+        nifti_img = nib.Nifti1Image(prediction.astype(np.uint8), reference_nifti.affine, reference_nifti.header)
+    else:
+        # Create new NIfTI with identity affine
+        nifti_img = nib.Nifti1Image(prediction.astype(np.uint8), np.eye(4))
+    nib.save(nifti_img, save_path)
+###################### Post-processing Function ######################
+def post_process_pred(pred_classes, num_classes=3, min_object_size=5, closing_kernel_size=2):
+    """
+    Post-process a single 2-D multi-class prediction slice.
+    Input
+    -----
+    pred_classes      : np.ndarray of shape (H, W) — integer class labels
+                        produced by tf.argmax(...).numpy()[0] inside the
+                        inference loop (one slice at a time).
+    num_classes       : 3  → classes are 0=BG, 1=Vent, 2=AbWMH
+                        4  → classes are 0=BG, 1=Vent, 2=NormWMH, 3=AbWMH
+    min_object_size   : connected components smaller than this (pixels) are
+                        removed after morphological cleaning. Default 5.
+    closing_kernel_size: radius of the disk used for binary_closing. Default 2.
+    Output
+    ------
+    post_pred : np.ndarray of shape (H, W), same dtype as pred_classes,
+                with cleaned and overlap-resolved integer class labels.
+    Processing pipeline (per class)
+    --------------------------------
+    1. Extract binary mask for each foreground class from the label map.
+    2. Apply binary_closing  → fill small holes / bridge tiny gaps.
+    3. Apply remove_small_objects → discard isolated noise specks.
+    4. Resolve overlaps by anatomical priority:
+           Ventricles  >  Normal WMH  >  Abnormal WMH
+       (a higher-priority class always wins contested pixels)
+    5. Reconstruct the integer label map from the cleaned binary masks.
+    """
+    from skimage.morphology import remove_small_objects, binary_erosion, binary_closing, disk, binary_dilation
+    kernel = disk(closing_kernel_size)
+    def clean(mask):
+        """Apply closing + small-object removal to a single binary mask."""
+        if not mask.any():
+            return mask
+        mask = binary_closing(mask, kernel)
+        # mask = binary_erosion(mask, disk(1))
+        mask = remove_small_objects(mask, min_size=min_object_size)
+        return mask
+    # ── 1. Extract per-class binary masks from the 2-D label map ────────────
+    vent_mask  = (pred_classes == 1)
+    if num_classes == 4:
+        nwmh_mask  = (pred_classes == 2)
+        abwmh_mask = (pred_classes == 3)
+    else:
+        # 3-class scenario: no Normal WMH, AbWMH is class 2
+        nwmh_mask  = np.zeros_like(vent_mask)
+        abwmh_mask = (pred_classes == 2)
+    # ── 2-3. Morphological cleaning per class ───────────────────────────────
+    vent_mask  = clean(vent_mask)
+    nwmh_mask  = clean(nwmh_mask)
+    abwmh_mask = clean(abwmh_mask)
+    # ── 4. Resolve overlaps: higher-priority mask wins ───────────────────────
+    # Ventricles > Normal WMH > Abnormal WMH
+    nwmh_mask  = nwmh_mask  & ~vent_mask   # NormWMH cannot overlap Vent
+    abwmh_mask = abwmh_mask & ~vent_mask   # AbWMH   cannot overlap Vent
+    abwmh_mask = abwmh_mask & ~nwmh_mask   # AbWMH   cannot overlap NormWMH
+    # ── 5. Reconstruct the integer label map ─────────────────────────────────
+    post_pred = np.zeros_like(pred_classes)   # background = 0
+    post_pred[vent_mask] = 1
+    if num_classes == 4:
+        post_pred[nwmh_mask]  = 2
+        post_pred[abwmh_mask] = 3
+    else:
+        post_pred[abwmh_mask] = 2
+    return post_pred
+###################### Main Inference Function ######################
+def run_inference(config: InferenceConfig):
+    """
+    Main inference function
+    Args:
+        config: InferenceConfig object
+    Returns:
+        Dictionary containing all predictions and metrics
+    """
+    print("\n" + "="*70)
+    print(f"RUNNING INFERENCE")
+    print("="*70)
+    # Initialize data loader
+    data_config = DataConfig()
+    data_loader = P2DataLoader(data_config)
+    # Load test dataset
+    print("Loading test data...")
+    test_dataset = data_loader.create_dataset_for_fold(
+        fold_id=config.fold_id,
+        split='test',
+        preprocessing=config.preprocessing,
+        class_scenario=config.class_scenario,
+        batch_size=config.batch_size,
+        shuffle=False
+    )
+    # Get dataset size
+    test_size = tf.data.experimental.cardinality(test_dataset).numpy()
+    if test_size < 0:
+        test_size = sum(1 for _ in test_dataset)
+        test_dataset = data_loader.create_dataset_for_fold(
+            fold_id=config.fold_id, split='test',
+            preprocessing=config.preprocessing,
+            class_scenario=config.class_scenario,
+            batch_size=config.batch_size, shuffle=False
+        )
+    print(f"Test samples: {test_size}\n")
+    # Load model
+    print(f"Loading model from: {config.model_path}")
+    try:
+        if config.architecture_name == 'unet':
+            from unet_model import build_unet_3class as build_specific_3class # must be updated with the actual used model for traininig
+        elif config.architecture_name == 'attnunet':
+            from attn_unet_model import build_attention_unet_3class as build_specific_3class
+        elif config.architecture_name == 'dlv3unet':
+            from dlv3_unet_model_GN import build_deeplabv3_unet_3class as build_specific_3class
+        elif config.architecture_name == 'transunet':
+            from trans_unet_model import build_trans_unet_3class as build_specific_3class
+        else:
+            print(f"❌ Error loading model: Invalid Model Name")
+            raise
+        # Build model architecture first
+        generator = build_specific_3class(
+            input_shape=(256, 256, 1),
+            num_classes=config.num_classes
+        )
+        # Load weights
+        generator.load_weights(str(config.model_path))
+        print("✅ Model loaded successfully\n")
+    except Exception as e:
+        print(f"❌ Error loading model: {e}")
+        raise
+    # Initialize storage - keyed by patient ID
+    patient_results = defaultdict(lambda: {
+        'predictions': [],
+        'ground_truths': [],
+        'probabilities': [],
+        'flairs': [],
+        'slice_indices': []
+    })
+    sample_ids = []
+    # Run inference
+    print("Running inference on test set...")
+    test_bar = tqdm(test_dataset, total=test_size, desc="Inference")
+    for idx, (paired_input, target_mask, patient_id_tensor, slice_num_tensor) in enumerate(test_bar):
+        patient_id = patient_id_tensor.numpy()[0].decode('utf-8')  # batch dim + bytes→str
+        slice_num  = int(slice_num_tensor.numpy()[0])
+        sample_ids.append(f"{patient_id}_slice_{slice_num:03d}")
+        # Prepare input
+        flair_normalized = prepare_input(paired_input)
+        # Generate prediction
+        prediction_softmax = generator(flair_normalized, training=False)
+        # Convert to class labels
+        pred_classes = tf.argmax(prediction_softmax, axis=-1).numpy()[0]
+        max_prob = tf.reduce_max(prediction_softmax, axis=-1).numpy()[0]
+        ground_truth = target_mask.numpy()[0]
+        flair = flair_normalized.numpy()[0, :, :, 0]
+        # Post-process the predictions
+        # pred_classes_post = post_process_pred(pred_classes, num_classes=config.num_classes)
+        # Store per-patient
+        patient_results[patient_id]['predictions'].append(pred_classes)
+        patient_results[patient_id]['ground_truths'].append(ground_truth)
+        patient_results[patient_id]['probabilities'].append(max_prob)
+        patient_results[patient_id]['flairs'].append(flair)
+        patient_results[patient_id]['slice_indices'].append(slice_num)
+        # Create visualization
+        if idx % 10 == 0 or True:  # Visualize every 10th sample
+            # viz_path = config.visualizations_dir / f"visualization_{idx:04d}.png"
+            viz_path = config.visualizations_dir / f"{sample_ids[-1]}.png"
+            visualize_prediction_short(
+                flair, ground_truth, pred_classes,
+                max_prob, viz_path,
+                sample_ids[-1], config.num_classes
+            )
+    print("\n✅ Inference complete!\n")
+    # Compute overall metrics
+    print("Computing metrics...")
+    exclude_class = None
+    per_patient_metrics = {}
+    for patient_id, data in patient_results.items():
+        # Sort slices by anatomical order
+        order = np.argsort(data['slice_indices'])
+        gt_volume   = np.array(data['ground_truths'])[order]    # (S, H, W)
+        pred_volume = np.array(data['predictions'])[order]      # (S, H, W)
+        per_patient_metrics[patient_id] = compute_metrics_from_predictions(
+            gt_volume,
+            pred_volume,
+            config.num_classes
+        )
+        print(f"\nPatint_id : {patient_id} , Stats: {per_patient_metrics[patient_id]}\n")
+        pm = per_patient_metrics[patient_id]
+        print(f"\nPatient_id: {patient_id}")
+        print(f"  Voxel  — Dice: { {k: round(v,4) for k,v in pm['dice'].items()} }")
+        if 'lesion' in pm:
+            for cls, ld in pm['lesion'].items():
+                print(f"  Lesion [{cls}] — "
+                    f"GT:{ld['n_gt_lesions']} Pred:{ld['n_pred_lesions']} "
+                    f"TP:{ld['tp_lesions']} FP:{ld['fp_lesions']} FN:{ld['fn_lesions']} "
+                    f"Sens:{ld['lesion_sensitivity']:.3f} Prec:{ld['lesion_precision']:.3f} "
+                    f"F1:{ld['lesion_f1']:.3f}")
+    # Aggregate across patients
+    overall_metrics, overall_metrics_rich = aggregate_patient_metrics(
+        per_patient_metrics, config.num_classes
+    )
+    # overall_metrics      → drop-in replacement for old overall_metrics, all print/save code unchanged
+    # overall_metrics_rich → use wherever we want mean ± std reporting
+    # Print standard metrics
+    print("\n" + "="*70)
+    print("STANDARD METRICS (Class vs Rest)")
+    print("="*70)
+    print("\nClass-wise Dice Scores:")
+    for class_idx, class_name in enumerate(config.class_names):
+        if exclude_class is not None and class_idx == exclude_class:
+            continue
+        key = f'class_{class_idx}'
+        if key in overall_metrics['dice']:
+            print(f"  {class_name}: {overall_metrics['dice'][key]:.4f}")
+    print(f"  Mean Dice: {overall_metrics['dice']['mean']:.4f}")
+    print("\nClass-wise Precision:")
+    for class_idx, class_name in enumerate(config.class_names):
+        if exclude_class is not None and class_idx == exclude_class:
+            continue
+        key = f'class_{class_idx}'
+        if key in overall_metrics['precision']:
+            print(f"  {class_name}: {overall_metrics['precision'][key]:.4f}")
+    print(f"  Mean Precision: {overall_metrics['precision']['mean']:.4f}")
+    print("\nClass-wise Recall:")
+    for class_idx, class_name in enumerate(config.class_names):
+        if exclude_class is not None and class_idx == exclude_class:
+            continue
+        key = f'class_{class_idx}'
+        if key in overall_metrics['recall']:
+            print(f"  {class_name}: {overall_metrics['recall'][key]:.4f}")
+    print(f"  Mean Recall: {overall_metrics['recall']['mean']:.4f}")
+    print("\nClass-wise IoU:")
+    for class_idx, class_name in enumerate(config.class_names):
+        if exclude_class is not None and class_idx == exclude_class:
+            continue
+        key = f'class_{class_idx}'
+        if key in overall_metrics['iou']:
+            print(f"  {class_name}: {overall_metrics['iou'][key]:.4f}")
+    print(f"  Mean IoU: {overall_metrics['iou']['mean']:.4f}")
+    print("\nClass-wise Specificity:")
+    for class_idx, class_name in enumerate(config.class_names):
+        if exclude_class is not None and class_idx == exclude_class:
+            continue
+        key = f'class_{class_idx}'
+        if key in overall_metrics['specificity']:
+            print(f"  {class_name}: {overall_metrics['specificity'][key]:.4f}")
+    print(f"  Mean Specificity: {overall_metrics['specificity']['mean']:.4f}")
+    print("\nClass-wise HD95 (lower is better):")
+    for class_idx, class_name in enumerate(config.class_names):
+        if exclude_class is not None and class_idx == exclude_class:
+            continue
+        key = f'class_{class_idx}'
+        if key in overall_metrics['hd95']:
+            print(f"  {class_name}: {overall_metrics['hd95'][key]:.4f}")
+    print(f"  Mean HD95: {overall_metrics['hd95']['mean']:.4f}")
+    print("="*70 + "\n")
+    # Print lesion-level metrics
+    print("\n" + "="*70)
+    print("LESION-LEVEL METRICS (Connected-Component Analysis)")
+    print("="*70)
+    for class_idx, class_name in enumerate(config.class_names):
+        if class_idx == 0:
+            continue
+        key = f'class_{class_idx}'
+        if key not in overall_metrics.get('lesion', {}):
+            continue
+        ld = overall_metrics['lesion'][key]
+        print(f"\n  [{class_name}]")
+        print(f"    GT Lesions          : {ld['n_gt_lesions']}")
+        print(f"    Predicted Lesions   : {ld['n_pred_lesions']}")
+        print(f"    TP Lesions          : {ld['tp_lesions']}")
+        print(f"    FP Lesions          : {ld['fp_lesions']}")
+        print(f"    FN Lesions          : {ld['fn_lesions']}")
+        print(f"    Lesion Sensitivity  : {ld['lesion_sensitivity']:.4f}")
+        print(f"    Lesion Precision    : {ld['lesion_precision']:.4f}")
+        print(f"    Lesion F1           : {ld['lesion_f1']:.4f}")
+    print(f"\n  [Summary across foreground classes]")
+    print(f"    Total GT Lesions     : {overall_metrics['lesion']['total_n_gt_lesions']}")
+    print(f"    Total Pred Lesions   : {overall_metrics['lesion']['total_n_pred_lesions']}")
+    print(f"    Total TP Lesions     : {overall_metrics['lesion']['total_tp_lesions']}")
+    print(f"    Total FP Lesions     : {overall_metrics['lesion']['total_fp_lesions']}")
+    print(f"    Total FN Lesions     : {overall_metrics['lesion']['total_fn_lesions']}")
+    print(f"    Mean Lesion Sensitivity : {overall_metrics['lesion']['mean_lesion_sensitivity']:.4f}")
+    print(f"    Mean Lesion Precision   : {overall_metrics['lesion']['mean_lesion_precision']:.4f}")
+    print(f"    Mean Lesion F1          : {overall_metrics['lesion']['mean_lesion_f1']:.4f}")
+    print("="*70 + "\n")
+    # Save all metrics to JSON
+    metrics_file = config.metrics_dir / "test_metrics_complete.json"
+    def convert_to_serializable(obj):
+        """Convert numpy types to Python native types"""
+        if isinstance(obj, dict):
+            return {k: convert_to_serializable(v) for k, v in obj.items()}
+        elif isinstance(obj, (np.integer, np.int64, np.int32)):
+            return int(obj)
+        elif isinstance(obj, (np.floating, np.float64, np.float32)):
+            return float(obj)
+        elif isinstance(obj, np.ndarray):
+            return obj.tolist()
+        else:
+            return obj
+    metrics_to_save = {
+        'config': {
+            'variant': int(config.variant),
+            'preprocessing': config.preprocessing,
+            'class_scenario': config.class_scenario,
+            'fold_id': int(config.fold_id),
+            'num_classes': int(config.num_classes),
+            'class_names': config.class_names,
+            'architecture_name': config.architecture_name,
+            'model_name': config.model_name,
+            'test_samples': int(test_size)
+        },
+        'metrics': convert_to_serializable(overall_metrics)
+    }
+    with open(metrics_file, 'w') as f:
+        json.dump(metrics_to_save, f, indent=2)
+    print(f"\n✅ All metrics saved to: {metrics_file}")
+    # print(f"✅ Predictions saved to: {config.predictions_dir}")
+    print(f"✅ Visualizations saved to: {config.visualizations_dir}")
+    # Return results
+    return {
+        'patients_results': patient_results,
+        'metrics': overall_metrics,
+        'rich_metrics': overall_metrics_rich
+    }
+###################### Main Execution ######################
+if __name__ == "__main__":
+    # Run inference
+    preprocess_options = ['standard']  # ['zoomed', 'standard']
+    scenarios          = ['3class']  # ['3class', '4class']
+    fold_numbers       = list(np.array([0, 1, 2, 3]))
+    for fold_number in fold_numbers:
+        for preprocess_option in preprocess_options:
+            for scenario in scenarios:
+                config = InferenceConfig(
+                    variant=1,
+                    preprocessing=preprocess_option,
+                    class_scenario=scenario,
+                    fold_id=fold_number,
+                    model_name='best_dice_model.h5',
+                    architecture_name='unet'   # a choice from ['unet', 'attnunet', 'dlv3unet', 'transunet']
+                )
+                results = run_inference(config)
+                # ── Error Analysis ──────────��───────────────────────────
+                error_results = run_error_analysis(
+                    results=results,
+                    config=config,
+                    top_n_slices=300,      # visualise N hardest slices
+                    top_n_patients=20,    # patient summary plots
+                    fg_dice_weight=0.7,   # tunable ranking weights
+                    error_rate_weight=0.2,
+                    confidence_weight=0.2,
+                )
+                # ────────────────────────────────────────────────────────
+                print("\n" + "="*70)
+                print("INFERENCE + ERROR ANALYSIS COMPLETE")
+                print("="*70)

models/for_WMH_Vent/model_training_scripts/p4_run_experiments_all.py ADDED Viewed

	@@ -0,0 +1,576 @@

+"""
+P4 Article - Run Multiple Variant Experiments
+Updated runner script supporting all models
+Supports:
+- Variant 1: Baseline U-Net
+- Variant 2: Attention U-Net
+- Variant 3: DeepLabV3+ U-Net
+- Variant 4: Trans U-Net
+Usage:
+    # Single experiment
+    python p4_run_experiments_all.py --variant 2 --fold 0 --scenario standard_3class
+    # All scenarios for one variant+fold
+    python p4_run_experiments_all.py --variant 2 --fold 0
+    # All scenarios for one variant (all folds)
+    python p4_run_experiments_all.py --variant 2
+    # All scenarios (all folds and all variants)
+    python p4_run_experiments_all.py
+"""
+import sys
+import argparse
+import subprocess
+from pathlib import Path
+import tensorflow as tf
+import gc
+from tensorflow.keras import backend as K
+import p4_unet_viz
+def clear_gpu_memory():
+    """Comprehensive GPU memory cleanup between experiments"""
+    print("\n" + "="*70)
+    print("CLEANING UP GPU MEMORY")
+    print("="*70)
+    # Clear Keras session
+    K.clear_session()
+    print("✅ Cleared Keras session")
+    # Force garbage collection
+    gc.collect()
+    print("✅ Ran garbage collection")
+    # Reset TensorFlow graphs
+    tf.compat.v1.reset_default_graph()
+    print("✅ Reset default graph")
+    # Additional cleanup for TF 2.x
+    try:
+        # Clear any cached tensors
+        tf.config.experimental.reset_memory_stats('GPU:0')
+        print("✅ Reset GPU memory stats")
+    except:
+        pass
+    print("="*70 + "\n")
+def run_single_experiment(variant: int,
+                         preprocessing: str,
+                         class_scenario: str,
+                         fold_id: int) -> bool:
+    """
+    Run a single experiment for specified variant
+    Args:
+        variant: 1 (baseline u-net) or 2 (attention u-net) or 3 (deeplabv3+ u-net) or 4 (trans u-net)
+        preprocessing: 'standard' or 'zoomed'
+        class_scenario: '3class' or '4class'
+        fold_id: 0-4
+    Returns:
+        True if successful, False otherwise
+    """
+    print("\n" + "="*80)
+    print(f"RUNNING: Variant {variant} | {preprocessing} | {class_scenario} | Fold {fold_id}")
+    print("="*80 + "\n")
+    try:
+        if variant == 1:
+            # Baseline unet
+            from p4_variant_all_net import ExperimentConfig, train_net
+            config = ExperimentConfig(
+                variant=variant,
+                preprocessing=preprocessing,
+                class_scenario=class_scenario,
+                fold_id=fold_id,
+                architecture_name='unet'
+            )
+            history, history_path = train_net(config)
+            p4_unet_viz.main_viz(history_path)
+            # Run Inference
+            from p4_inference import InferenceConfig, run_inference, run_error_analysis
+            config = InferenceConfig(
+                variant=variant,
+                preprocessing=preprocessing,
+                class_scenario=class_scenario,
+                fold_id=fold_id,
+                model_name='best_dice_model.h5',
+                architecture_name='unet'
+            )
+            results = run_inference(config)
+            # ── Error Analysis ──────────────────────────────────────
+            error_results = run_error_analysis(
+                results=results,
+                config=config,
+                top_n_slices=30,      # visualise N hardest slices
+                top_n_patients=10,    # patient summary plots
+                fg_dice_weight=0.6,   # tunable ranking weights
+                error_rate_weight=0.2,
+                confidence_weight=0.2,
+            )
+        elif variant == 2:
+            # Attention unet
+            from p4_variant_all_net import ExperimentConfig, train_net
+            config = ExperimentConfig(
+                variant=variant,
+                preprocessing=preprocessing,
+                class_scenario=class_scenario,
+                fold_id=fold_id,
+                architecture_name='attnunet'
+            )
+            history, history_path = train_net(config)
+            p4_unet_viz.main_viz(history_path)
+            # Run Inference
+            from p4_inference import InferenceConfig, run_inference, run_error_analysis
+            config = InferenceConfig(
+                variant=variant,
+                preprocessing=preprocessing,
+                class_scenario=class_scenario,
+                fold_id=fold_id,
+                model_name='best_dice_model.h5',
+                architecture_name='attnunet'
+            )
+            results = run_inference(config)
+            # ── Error Analysis ──────────────────────────────────────
+            error_results = run_error_analysis(
+                results=results,
+                config=config,
+                top_n_slices=30,      # visualise N hardest slices
+                top_n_patients=10,    # patient summary plots
+                fg_dice_weight=0.6,   # tunable ranking weights
+                error_rate_weight=0.2,
+                confidence_weight=0.2,
+            )
+        elif variant == 3:
+            # DeepLabV3+ unet
+            from p4_variant_all_net import ExperimentConfig, train_net
+            config = ExperimentConfig(
+                variant=variant,
+                preprocessing=preprocessing,
+                class_scenario=class_scenario,
+                fold_id=fold_id,
+                architecture_name='dlv3unet'
+            )
+            history, history_path = train_net(config)
+            p4_unet_viz.main_viz(history_path)
+            # Run Inference
+            from p4_inference import InferenceConfig, run_inference, run_error_analysis
+            config = InferenceConfig(
+                variant=variant,
+                preprocessing=preprocessing,
+                class_scenario=class_scenario,
+                fold_id=fold_id,
+                model_name='best_dice_model.h5',
+                architecture_name='dlv3unet'
+            )
+            results = run_inference(config)
+            # ── Error Analysis ──────────────────────────────────────
+            error_results = run_error_analysis(
+                results=results,
+                config=config,
+                top_n_slices=30,      # visualise N hardest slices
+                top_n_patients=10,    # patient summary plots
+                fg_dice_weight=0.6,   # tunable ranking weights
+                error_rate_weight=0.2,
+                confidence_weight=0.2,
+            )
+        elif variant == 4:
+            # Trans unet
+            from p4_variant_all_net import ExperimentConfig, train_net
+            config = ExperimentConfig(
+                variant=variant,
+                preprocessing=preprocessing,
+                class_scenario=class_scenario,
+                fold_id=fold_id,
+                architecture_name='transunet'
+            )
+            history, history_path = train_net(config)
+            p4_unet_viz.main_viz(history_path)
+            # Run Inference
+            from p4_inference import InferenceConfig, run_inference, run_error_analysis
+            config = InferenceConfig(
+                variant=variant,
+                preprocessing=preprocessing,
+                class_scenario=class_scenario,
+                fold_id=fold_id,
+                model_name='best_dice_model.h5',
+                architecture_name='transunet'
+            )
+            results = run_inference(config)
+            # ── Error Analysis ──────────────────────────────────────
+            error_results = run_error_analysis(
+                results=results,
+                config=config,
+                top_n_slices=30,      # visualise N hardest slices
+                top_n_patients=10,    # patient summary plots
+                fg_dice_weight=0.6,   # tunable ranking weights
+                error_rate_weight=0.2,
+                confidence_weight=0.2,
+            )
+        else:
+            raise ValueError(f"Unknown variant: {variant}")
+        print(f"\n✅ Experiment completed successfully!")
+        return True
+    except Exception as e:
+        print(f"\n❌ Experiment failed with error:")
+        print(f"   {str(e)}")
+        import traceback
+        traceback.print_exc()
+        return False
+def run_all_scenarios_for_variant_fold(variant: int, fold_id: int) -> dict:
+    """
+    Run all 4 scenarios for a given variant and fold
+    Args:
+        variant: 1 (baseline u-net) or 2 (attention u-net) or 3 (deeplabv3+ u-net) or 4 (trans u-net)
+        fold_id: 0-4
+    Returns:
+        Dictionary with results for each scenario
+    """
+    print("\n" + "="*80)
+    print(f"RUNNING ALL SCENARIOS FOR VARIANT {variant}, FOLD {fold_id}")
+    print("="*80)
+    print("\nTotal experiments: 4")
+    print("  1. standard + 3class")
+    print("  2. standard + 4class")
+    print("  3. zoomed + 3class")
+    print("  4. zoomed + 4class")
+    print("\n" + "="*80 + "\n")
+    experiments = [
+        {'preprocessing': 'zoomed', 'class_scenario': '4class'},
+        {'preprocessing': 'standard', 'class_scenario': '4class'},
+        {'preprocessing': 'zoomed', 'class_scenario': '3class'},
+        {'preprocessing': 'standard', 'class_scenario': '3class'},
+    ]
+    results = {}
+    for idx, scenario in enumerate(experiments, 1):
+        print(f"\n{'#'*80}")
+        print(f"SCENARIO {idx}/4: {scenario['preprocessing']} + {scenario['class_scenario']}")
+        print(f"{'#'*80}\n")
+        # Run in subprocess for complete memory isolation
+        import subprocess
+        import sys
+        cmd = [
+            sys.executable,
+            'p4_run_experiments_all.py',
+            '--variant', str(variant),
+            '--fold', str(fold_id),
+            '--scenario', f"{scenario['preprocessing']}_{scenario['class_scenario']}"
+        ]
+        print(f"Running command: {' '.join(cmd)}\n")
+        try:
+            # Run experiment in separate process
+            result = subprocess.run(cmd, check=True, capture_output=False)
+            if result.returncode == 0:
+                exp_name = f"v{variant}_{scenario['preprocessing']}_{scenario['class_scenario']}_fold{fold_id}"
+                results[exp_name] = {'status': 'SUCCESS'}
+                print(f"\n✅ {exp_name} completed successfully")
+            else:
+                raise Exception(f"Process returned code {result.returncode}")
+        except subprocess.CalledProcessError as e:
+            exp_name = f"v{variant}_{scenario['preprocessing']}_{scenario['class_scenario']}_fold{fold_id}"
+            print(f"\n❌ Error in {scenario['preprocessing']} + {scenario['class_scenario']}")
+            print(f"   Error: {str(e)}")
+            results[exp_name] = {
+                'status': 'FAILED',
+                'error': str(e)
+            }
+            # Ask user if they want to continue
+            response = input("\nContinue with remaining experiments? (y/n): ")
+            if response.lower() != 'y':
+                print("Stopping experiments...")
+                break
+        # Brief pause between experiments
+        import time
+        print("\n⏳ Waiting 5 seconds before next experiment...")
+        time.sleep(5)
+    # Summary
+    print("\n" + "="*80)
+    print(f"VARIANT {variant}, FOLD {fold_id} - SUMMARY")
+    print("="*80)
+    for exp_name, result in results.items():
+        status_icon = "✅" if result['status'] == 'SUCCESS' else "❌"
+        print(f"{status_icon} {exp_name}")
+    print("\n" + "="*80 + "\n")
+    return results
+def run_all_folds_for_variant(variant: int) -> dict:
+    """
+    Run all scenarios for all folds for a given variant
+    Run all 4 experiments for all 5 folds
+    Total: 4 scenarios × 5 folds = 20 training runs
+    Args:
+        variant: 1 (baseline u-net) or 2 (attention u-net) or 3 (deeplabv3+ u-net) or 4 (trans u-net)
+    Returns:
+        Dictionary with results for all folds
+    """
+    print("\n" + "="*80)
+    print(f"RUNNING ALL FOLDS FOR VARIANT {variant}")
+    print("="*80)
+    print("\nTotal experiments: 4 scenarios × 5 folds = 20 training runs")
+    print("Estimated time: ~0.7 hour per experiment (with 60 epochs)")
+    print("Total estimated time: 10-20 hours")
+    print("\n" + "="*80 + "\n")
+    response = input("This will take a long time. Continue? (y/n): ")
+    if response.lower() != 'y':
+        print("Cancelled.")
+        return {}
+    all_results = {}
+    for fold_id in range(5):
+        print(f"\n{'='*80}")
+        print(f"STARTING FOLD {fold_id}")
+        print(f"{'='*80}\n")
+        fold_results = run_all_scenarios_for_variant_fold(variant, fold_id)
+        all_results[f'fold_{fold_id}'] = fold_results
+    # Final summary
+    print("\n" + "="*80)
+    print(f"VARIANT {variant} - ALL FOLDS COMPLETE")
+    print("="*80)
+    for fold_id in range(5):
+        fold_key = f'fold_{fold_id}'
+        if fold_key in all_results:
+            print(f"\nFold {fold_id}:")
+            for exp_name, result in all_results[fold_key].items():
+                status_icon = "✅" if result['status'] == 'SUCCESS' else "❌"
+                print(f"  {status_icon} {exp_name}")
+    print("\n" + "="*80 + "\n")
+    return all_results
+def compare_variants(fold_id: int = 0):
+    """
+    Compare results between baseline and attention variants and newloss variants
+    Args:
+        fold_id: Fold to compare (0-4)
+    """
+    print("\n" + "="*80)
+    print(f"COMPARING VARIANTS FOR FOLD {fold_id}")
+    print("="*80)
+    import json
+    scenarios = [
+        {'preprocessing': 'standard', 'class_scenario': '3class'},
+        {'preprocessing': 'standard', 'class_scenario': '4class'},
+        {'preprocessing': 'zoomed', 'class_scenario': '3class'},
+        {'preprocessing': 'zoomed', 'class_scenario': '4class'},
+    ]
+    results_dir = Path(f"results_fold_{fold_id}")
+    for scenario in scenarios:
+        print(f"\n{scenario['preprocessing']} + {scenario['class_scenario']}:")
+        print("-" * 60)
+        # Baseline (variant 1)
+        baseline_dir = results_dir / "models" / f"{scenario['preprocessing']}_{scenario['class_scenario']}" / f"fold_{fold_id}"
+        baseline_history = baseline_dir / "history.json"
+        # Attention (variant 2)
+        attention_dir = results_dir / "models" / f"{scenario['preprocessing']}_{scenario['class_scenario']}" / f"fold_{fold_id}_variant2"
+        attention_history = attention_dir / "history.json"
+        # Attention (variant 3)
+        newloss_dir = results_dir / "models" / f"{scenario['preprocessing']}_{scenario['class_scenario']}" / f"fold_{fold_id}_variant3"
+        newloss_history = newloss_dir / "history.json"
+        if baseline_history.exists() and attention_history.exists() and newloss_history.exists():
+            with open(baseline_history, 'r') as f:
+                baseline_data = json.load(f)
+            with open(attention_history, 'r') as f:
+                attention_data = json.load(f)
+            with open(newloss_history, 'r') as f:
+                newloss_data = json.load(f)
+            # Compare final validation losses
+            baseline_val = baseline_data['val_loss'][-1]
+            attention_val = attention_data['val_loss'][-1]
+            newloss_val = newloss_data['val_loss'][-1]
+            improvement_1_2 = ((baseline_val - attention_val) / baseline_val) * 100
+            improvement_1_3 = ((baseline_val - newloss_val) / baseline_val) * 100
+            improvement_2_3 = ((attention_val - newloss_val) / attention_val) * 100
+            print(f"  Baseline Val Loss:  {baseline_val:.4f}")
+            print(f"  Attention Val Loss: {attention_val:.4f}")
+            print(f"  NewLoss Val Loss: {newloss_val:.4f}")
+            print(f"  Improvement by V2 on V1:        {improvement_1_2:+.2f}%")
+            print(f"  Improvement by V3 on V1:        {improvement_1_3:+.2f}%")
+            print(f"  Improvement by V3 on V2:        {improvement_2_3:+.2f}%")
+        else:
+            if not baseline_history.exists():
+                print(f"  ⚠️  Baseline results not found")
+            if not attention_history.exists():
+                print(f"  ⚠️  Attention results not found")
+            if not newloss_history.exists():
+                print(f"  ⚠️  NewLoss results not found")
+    print("\n" + "="*80 + "\n")
+def main():
+    """Main entry point with argument parsing"""
+    parser = argparse.ArgumentParser(
+        description='Run P4 experiments for multiple variants',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+    # Single experiment
+    python p4_run_experiments_all.py --variant 2 --fold 0 --scenario standard_3class
+    # All scenarios for variant 2, fold 0
+    python p4_run_experiments_all.py --variant 2 --fold 0
+    # All folds for variant 3
+    python p4_run_experiments_all.py --variant 2
+    # Compare results
+    python p4_run_experiments_all.py --compare --fold 0
+        """
+    )
+    parser.add_argument(
+        '--variant',
+        type=int,
+        choices=[1, 2, 3, 4],
+        help='variant: 1 (baseline u-net) or 2 (attention u-net) or 3 (deeplabv3+ u-net) or 4 (trans u-net)'
+    )
+    parser.add_argument(
+        '--fold',
+        type=int,
+        choices=[0, 1, 2, 3, 4],
+        help='Specific fold to train (0-4)'
+    )
+    parser.add_argument(
+        '--scenario',
+        type=str,
+        choices=['standard_3class', 'standard_4class', 'zoomed_3class', 'zoomed_4class'],
+        help='Specific scenario to train'
+    )
+    parser.add_argument(
+        '--compare',
+        action='store_true',
+        help='Compare results between variants'
+    )
+    args = parser.parse_args()
+    # Handle comparison mode (NOT READY YET!)
+    if args.compare:
+        fold_id = args.fold if args.fold is not None else 0
+        compare_variants(fold_id)
+        return
+    # Validate arguments
+    if args.variant is None:
+        parser.error("--variant is required (unless using --compare)")
+    # Single experiment
+    if args.scenario is not None:
+        preprocessing, class_scenario = args.scenario.split('_')
+        fold_id = args.fold if args.fold is not None else 0
+        print(f"\nRunning single experiment:")
+        print(f"  Variant: {args.variant}")
+        print(f"  Fold: {fold_id}")
+        print(f"  Preprocessing: {preprocessing}")
+        print(f"  Class scenario: {class_scenario}\n")
+        success = run_single_experiment(
+            variant=args.variant,
+            preprocessing=preprocessing,
+            class_scenario=class_scenario,
+            fold_id=fold_id
+        )
+        if success:
+            print("\n✅ Experiment complete!")
+        else:
+            print("\n❌ Experiment failed!")
+            sys.exit(1)
+    # All scenarios for specific fold
+    elif args.fold is not None:
+        run_all_scenarios_for_variant_fold(args.variant, args.fold)
+    # All scenarios for all folds
+    else:
+        run_all_folds_for_variant(args.variant)
+if __name__ == "__main__":
+    main()

models/for_WMH_Vent/model_training_scripts/p4_unet_viz.py ADDED Viewed

	@@ -0,0 +1,640 @@

+"""
+P4 - All U-Net models with Adaptive Loss (WCE + UFL)
+WMH and Ventricles Segmentation with U-Net Models - Journal Paper Implementation
+Three-class segmentation: Background vs Ventricles vs Abnormal WMH
+Professional results saving and visualization for publication
+This relates to our article:
+"Deep Learning-Based Neuroanatomical Profiling Reveals Detailed Brain Changes:
+A Large-Scale Multiple Sclerosis Study"
+Features:
+- Visualization of Results
+Authors:
+"Mahdi Bashiri Bawil, Mousa Shamsi, Abolhassan Shakeri Bavil"
+Developer:
+"Mahdi Bashiri Bawil"
+"""
+import os
+import json
+import matplotlib.pyplot as plt
+import numpy as np
+from pathlib import Path
+def load_history(filepath):
+    """Load training history from JSON file."""
+    with open(filepath, 'r') as f:
+        return json.load(f)
+def detect_num_classes(history):
+    """Detect number of classes from val_metrics."""
+    if not history['val_metrics']:
+        return 3
+    first_metric = history['val_metrics'][0]
+    # Count only class_X keys, not 'mean'
+    num_classes = len([k for k in first_metric['dice'].keys() if k.startswith('class_')])
+    return num_classes
+def get_class_names(num_classes):
+    """Get class names based on number of classes."""
+    if num_classes == 3:
+        return {
+            'class_0': 'Background',
+            'class_1': 'Ventricles',
+            'class_2': 'Abnormal WMH'
+        }
+    elif num_classes == 4:
+        return {
+            'class_0': 'Background',
+            'class_1': 'Ventricles',
+            'class_2': 'Normal WMH',
+            'class_3': 'Abnormal WMH'
+        }
+    else:
+        return {f'class_{i}': f'Class {i}' for i in range(num_classes)}
+def convert_to_native_types(obj):
+    """Recursively convert numpy types to native Python types for JSON serialization."""
+    if isinstance(obj, np.integer):
+        return int(obj)
+    elif isinstance(obj, np.floating):
+        return float(obj)
+    elif isinstance(obj, np.ndarray):
+        return obj.tolist()
+    elif isinstance(obj, dict):
+        return {key: convert_to_native_types(value) for key, value in obj.items()}
+    elif isinstance(obj, list):
+        return [convert_to_native_types(item) for item in obj]
+    else:
+        return obj
+def find_best_epoch(history, num_classes):
+    """
+    Find the best epoch based on prioritized criteria:
+    1. Highest Dice for abnormal WMH (top priority)
+    2. Highest Dice for ventricles (secondary)
+    3. Lowest validation loss (tertiary)
+    4. ONLY consider epochs where beta > 0.95 (CRITICAL REQUIREMENT)
+    """
+    if not history['val_metrics']:
+        return None, {}
+    epochs = range(1, len(history['val_metrics']) + 1)
+    if 'beta_value' in history:
+        beta_values = history['beta_value']
+    else:
+        beta_values = [1] * len(history.get('val_loss', []))
+        history['beta_value'] = beta_values
+    # Find epochs where beta > 0.95 (CRITICAL FILTER)
+    valid_epoch_indices = [i for i, beta in enumerate(beta_values) if beta > 0.95]
+    if not valid_epoch_indices:
+        print("⚠️  WARNING: No epochs found with beta > 0.95!")
+        print("    Using all epochs for analysis (not recommended).")
+        valid_epoch_indices = list(range(len(beta_values)))
+    first_valid_epoch = valid_epoch_indices[0] + 1 if valid_epoch_indices else 1
+    # Determine the key for abnormal WMH
+    abnormal_key = 'class_3' if num_classes == 4 else 'class_2'
+    ventricles_key = 'class_1'
+    # Extract metrics
+    abnormal_dice = [m['dice'][abnormal_key] for m in history['val_metrics']]
+    ventricles_dice = [m['dice'][ventricles_key] for m in history['val_metrics']]
+    val_losses = history['val_loss']
+    # Find best epoch for abnormal WMH dice (only among valid epochs)
+    valid_abnormal_dice = [(i, abnormal_dice[i]) for i in valid_epoch_indices]
+    best_abnormal_idx = max(valid_abnormal_dice, key=lambda x: x[1])[0]
+    best_abnormal_epoch = best_abnormal_idx + 1
+    best_abnormal_dice = abnormal_dice[best_abnormal_idx]
+    # Find best epoch for ventricles dice (only among valid epochs)
+    valid_ventricles_dice = [(i, ventricles_dice[i]) for i in valid_epoch_indices]
+    best_ventricles_idx = max(valid_ventricles_dice, key=lambda x: x[1])[0]
+    best_ventricles_epoch = best_ventricles_idx + 1
+    best_ventricles_dice = ventricles_dice[best_ventricles_idx]
+    # Find best epoch for validation loss (only among valid epochs)
+    valid_val_losses = [(i, val_losses[i]) for i in valid_epoch_indices]
+    best_val_loss_idx = min(valid_val_losses, key=lambda x: x[1])[0]
+    best_val_loss_epoch = best_val_loss_idx + 1
+    best_val_loss = val_losses[best_val_loss_idx]
+    # Calculate composite score (weighted) - ONLY for valid epochs
+    composite_scores = [float('-inf')] * len(abnormal_dice)
+    for i in valid_epoch_indices:
+        # Normalize and weight: 60% abnormal dice, 30% ventricles dice, 10% inv val_loss
+        norm_abnormal = abnormal_dice[i]
+        norm_ventricles = ventricles_dice[i]
+        # Normalize validation loss among valid epochs only
+        valid_val_loss_values = [val_losses[j] for j in valid_epoch_indices]
+        max_val_loss = max(valid_val_loss_values) if valid_val_loss_values else 1
+        norm_val_loss = 1 - (val_losses[i] / max_val_loss) if max_val_loss > 0 else 0
+        composite = 0.6 * norm_abnormal + 0.3 * norm_ventricles + 0.1 * (1 - val_losses[i]) # norm_val_loss
+        composite_scores[i] = composite
+    best_overall_idx = int(np.argmax(composite_scores))  # Convert to int
+    best_overall_epoch = best_overall_idx + 1
+    # Get all metrics at best epoch
+    best_epoch_metrics = history['val_metrics'][best_overall_idx]
+    analysis = {
+        'best_overall_epoch': int(best_overall_epoch),
+        'best_overall_epoch_idx': int(best_overall_idx),
+        'best_abnormal_epoch': int(best_abnormal_epoch),
+        'best_abnormal_dice': float(best_abnormal_dice),
+        'best_ventricles_epoch': int(best_ventricles_epoch),
+        'best_ventricles_dice': float(best_ventricles_dice),
+        'best_val_loss_epoch': int(best_val_loss_epoch),
+        'best_val_loss': float(best_val_loss),
+        'composite_score': float(composite_scores[best_overall_idx]),
+        'abnormal_key': abnormal_key,
+        'num_classes': int(num_classes),
+        'first_valid_epoch': int(first_valid_epoch),
+        'total_valid_epochs': int(len(valid_epoch_indices)),
+        'beta_threshold': 0.95,
+        'total_epochs': int(len(epochs)),
+        # Add complete metrics at best epoch
+        'best_epoch_metrics': {
+            'dice': best_epoch_metrics['dice'],
+            'precision': best_epoch_metrics['precision'],
+            'recall': best_epoch_metrics['recall'],
+            'val_loss': float(val_losses[best_overall_idx]),
+            'train_loss': float(history['train_loss'][best_overall_idx]),
+            'wce_loss': float(history['wce_loss'][best_overall_idx]),
+            'ufd_loss': float(history['ufd_loss'][best_overall_idx]),
+            'val_loss_wce': float(history['val_loss_wce'][best_overall_idx]) if 'val_loss_wce' in history else None,
+            'val_loss_ufd': float(history['val_loss_ufd'][best_overall_idx]) if 'val_loss_ufd' in history else None,
+            'beta_value': float(beta_values[best_overall_idx])
+        }
+    }
+    # Convert all numpy types to native Python types
+    analysis = convert_to_native_types(analysis)
+    return best_overall_epoch, analysis
+def save_analysis_json(analysis, output_path):
+    """Save analysis results to a JSON file."""
+    analysis = convert_to_native_types(analysis)
+    with open(output_path, 'w') as f:
+        json.dump(analysis, f, indent=2)
+    print(f"✓ Analysis saved to: {output_path}")
+def save_enhanced_history(history, analysis, output_path):
+    """Save enhanced history with best epoch analysis appended."""
+    enhanced_history = history.copy()
+    enhanced_history['best_epoch_analysis'] = convert_to_native_types(analysis)
+    enhanced_history = convert_to_native_types(enhanced_history)
+    with open(output_path, 'w') as f:
+        json.dump(enhanced_history, f, indent=2)
+    print(f"✓ Enhanced history saved to: {output_path}")
+def create_training_summary(history, analysis, class_names):
+    """Create a comprehensive training summary for easy parsing."""
+    summary = {
+        'training_config': {
+            'total_epochs': analysis['total_epochs'],
+            'num_classes': analysis['num_classes'],
+            'class_names': class_names,
+            'model_type': 'a U-Net'
+        },
+        'best_epoch_selection': {
+            'overall_best_epoch': analysis['best_overall_epoch'],
+            'composite_score': analysis['composite_score'],
+            'selection_criteria': {
+                'abnormal_wmh_weight': 0.6,
+                'ventricles_weight': 0.3,
+                'val_loss_weight': 0.1
+            }
+        },
+        'priority_metrics': {
+            'abnormal_wmh': {
+                'best_epoch': analysis['best_abnormal_epoch'],
+                'best_dice': analysis['best_abnormal_dice']
+            },
+            'ventricles': {
+                'best_epoch': analysis['best_ventricles_epoch'],
+                'best_dice': analysis['best_ventricles_dice']
+            },
+            'validation_loss': {
+                'best_epoch': analysis['best_val_loss_epoch'],
+                'best_loss': analysis['best_val_loss']
+            }
+        },
+        'best_epoch_metrics': analysis['best_epoch_metrics'],
+        'training_progression': {
+            'final_epoch_metrics': {
+                'dice': history['val_metrics'][-1]['dice'],
+                'precision': history['val_metrics'][-1]['precision'],
+                'recall': history['val_metrics'][-1]['recall'],
+                'val_loss': history['val_loss'][-1],
+                'train_loss': history['train_loss'][-1]
+            },
+            'convergence_info': {
+                'epochs_trained': len(history['val_loss'])
+            }
+        }
+    }
+    # Add epoch-by-epoch metrics for important classes
+    summary['epoch_progression'] = {
+        'abnormal_wmh_dice': [m['dice'][analysis['abnormal_key']] for m in history['val_metrics']],
+        'ventricles_dice': [m['dice']['class_1'] for m in history['val_metrics']],
+        'mean_dice': [m['dice']['mean'] for m in history['val_metrics']],
+        'val_loss': history['val_loss'],
+        'train_loss': history['train_loss']
+    }
+    summary = convert_to_native_types(summary)
+    return summary
+def plot_training_history(history, save_path='training_history.png'):
+    """Create comprehensive visualization of training history."""
+    num_classes = detect_num_classes(history)
+    class_names = get_class_names(num_classes)
+    best_epoch, analysis = find_best_epoch(history, num_classes)
+    epochs = range(1, len(history['train_loss']) + 1)
+    # Detect whether new-style history (with val_loss_wce / val_loss_ufd) is present
+    has_val_components = 'val_loss_wce' in history and 'val_loss_ufd' in history
+    # Create figure — 3 rows × 3 cols when val components exist, else 2×3
+    nrows = 3 if has_val_components else 2
+    fig = plt.figure(figsize=(18, nrows * 5))
+    gs = fig.add_gridspec(nrows, 3, hspace=0.35, wspace=0.3)
+    # Color scheme
+    colors = ['#2E86AB', '#A23B72', '#F18F01', '#C73E1D']
+    wce_color  = '#4CAF50'   # green  – WCE
+    ufd_color  = '#9C27B0'   # purple – UFD
+    beta_color = '#FF5722'   # deep-orange – beta
+    # 1. Training and Validation Loss (combined / weighted)
+    ax1 = fig.add_subplot(gs[0, 0])
+    ax1.plot(epochs, history['train_loss'], 'o-', linewidth=2, markersize=6,
+             color=colors[0], label='Train Loss')
+    ax1.plot(epochs, history['val_loss'], 's-', linewidth=2, markersize=6,
+             color=colors[2], label='Val Loss')
+    if best_epoch:
+        ax1.axvline(x=best_epoch, color='red', linestyle='--', linewidth=2,
+                   alpha=0.7, label=f'Best Epoch ({best_epoch})')
+    ax1.set_xlabel('Epoch', fontsize=11, fontweight='bold')
+    ax1.set_ylabel('Loss', fontsize=11, fontweight='bold')
+    ax1.set_title('Training & Validation Loss\n(Combined Adaptive Loss)', fontsize=13, fontweight='bold')
+    ax1.legend(fontsize=9)
+    ax1.grid(True, alpha=0.3)
+    # 2. Dice Scores (excluding background)
+    ax2 = fig.add_subplot(gs[0, 1])
+    for i in range(1, num_classes):  # Skip class_0 (background)
+        class_key = f'class_{i}'
+        dice_scores = [m['dice'][class_key] for m in history['val_metrics']]
+        ax2.plot(epochs, dice_scores, 'o-', linewidth=2, markersize=6,
+                label=class_names[class_key], color=colors[i % len(colors)])
+    if best_epoch:
+        ax2.axvline(x=best_epoch, color='red', linestyle='--', linewidth=2,
+                   alpha=0.7, label=f'Best Epoch ({best_epoch})')
+    ax2.set_xlabel('Epoch', fontsize=11, fontweight='bold')
+    ax2.set_ylabel('Dice Score', fontsize=11, fontweight='bold')
+    ax2.set_title('Dice Scores by Class', fontsize=13, fontweight='bold')
+    ax2.legend(fontsize=9)
+    ax2.grid(True, alpha=0.3)
+    ax2.set_ylim([0, 1])
+    # 3. Precision Scores (excluding background)
+    ax3 = fig.add_subplot(gs[0, 2])
+    for i in range(1, num_classes):
+        class_key = f'class_{i}'
+        precision_scores = [m['precision'][class_key] for m in history['val_metrics']]
+        ax3.plot(epochs, precision_scores, 's-', linewidth=2, markersize=5,
+                label=class_names[class_key], color=colors[i % len(colors)])
+    if best_epoch:
+        ax3.axvline(x=best_epoch, color='red', linestyle='--', linewidth=2, alpha=0.7)
+    ax3.set_xlabel('Epoch', fontsize=11, fontweight='bold')
+    ax3.set_ylabel('Precision', fontsize=11, fontweight='bold')
+    ax3.set_title('Precision by Class', fontsize=13, fontweight='bold')
+    ax3.legend(fontsize=9)
+    ax3.grid(True, alpha=0.3)
+    ax3.set_ylim([0, 1])
+    # 4. Recall Scores (excluding background)
+    ax4 = fig.add_subplot(gs[1, 0])
+    for i in range(1, num_classes):
+        class_key = f'class_{i}'
+        recall_scores = [m['recall'][class_key] for m in history['val_metrics']]
+        ax4.plot(epochs, recall_scores, '^-', linewidth=2, markersize=5,
+                label=class_names[class_key], color=colors[i % len(colors)])
+    if best_epoch:
+        ax4.axvline(x=best_epoch, color='red', linestyle='--', linewidth=2, alpha=0.7)
+    ax4.set_xlabel('Epoch', fontsize=11, fontweight='bold')
+    ax4.set_ylabel('Recall', fontsize=11, fontweight='bold')
+    ax4.set_title('Recall by Class', fontsize=13, fontweight='bold')
+    ax4.legend(fontsize=9)
+    ax4.grid(True, alpha=0.3)
+    ax4.set_ylim([0, 1])
+    # 5. Mean Metrics
+    ax5 = fig.add_subplot(gs[1, 1])
+    mean_dice = [m['dice']['mean'] for m in history['val_metrics']]
+    mean_precision = [m['precision']['mean'] for m in history['val_metrics']]
+    mean_recall = [m['recall']['mean'] for m in history['val_metrics']]
+    ax5.plot(epochs, mean_dice, 'o-', linewidth=2, markersize=6,
+            color=colors[0], label='Mean Dice')
+    ax5.plot(epochs, mean_precision, 's-', linewidth=2, markersize=5,
+            color=colors[1], label='Mean Precision')
+    ax5.plot(epochs, mean_recall, '^-', linewidth=2, markersize=5,
+            color=colors[2], label='Mean Recall')
+    if best_epoch:
+        ax5.axvline(x=best_epoch, color='red', linestyle='--', linewidth=2, alpha=0.7)
+    ax5.set_xlabel('Epoch', fontsize=11, fontweight='bold')
+    ax5.set_ylabel('Score', fontsize=11, fontweight='bold')
+    ax5.set_title('Mean Validation Metrics', fontsize=13, fontweight='bold')
+    ax5.legend(fontsize=9)
+    ax5.grid(True, alpha=0.3)
+    ax5.set_ylim([0, 1])
+    # ── New Row 3 plots (only when val components are available) ──────────────
+    if has_val_components:
+        # 7. Training Loss Components (WCE vs UFD, train-side)
+        ax7 = fig.add_subplot(gs[2, 0])
+        ax7.plot(epochs, list(1*np.array(history['wce_loss'])), 'o-', linewidth=2, markersize=5,
+                 color=wce_color, label='Train WCE Loss x10')
+        ax7.plot(epochs, history['ufd_loss'], 's-', linewidth=2, markersize=5,
+                 color=ufd_color, label='Train UFD Loss')
+        ax7.plot(epochs, list(1*np.array(history['val_loss_wce'])), 'o--', linewidth=1.5, markersize=4,
+                 color=wce_color, alpha=0.6, label='Val WCE Loss x10')
+        ax7.plot(epochs, history['val_loss_ufd'], 's--', linewidth=1.5, markersize=4,
+                 color=ufd_color, alpha=0.6, label='Val UFD Loss')
+        if best_epoch:
+            ax7.axvline(x=best_epoch, color='red', linestyle='--', linewidth=2,
+                        alpha=0.7, label=f'Best Epoch ({best_epoch})')
+        ax7.set_xlabel('Epoch', fontsize=11, fontweight='bold')
+        ax7.set_ylabel('Loss', fontsize=11, fontweight='bold')
+        ax7.set_title('Loss Components: WCE vs UFD\n(Train solid · Val dashed)', fontsize=13, fontweight='bold')
+        ax7.legend(fontsize=8)
+        ax7.grid(True, alpha=0.3)
+        # 8. Weighted contribution of each loss to the total loss
+        ax8 = fig.add_subplot(gs[2, 1])
+        beta_values = history.get('beta_value', [e / len(epochs) for e in epochs])
+        betas = np.array(beta_values)
+        ones  = np.ones_like(betas)
+        # Weighted contributions
+        train_wce_contrib = (ones - betas) * np.array(history['wce_loss'])
+        train_ufd_contrib = betas            * np.array(history['ufd_loss'])
+        val_wce_contrib   = (ones - betas) * np.array(history['val_loss_wce'])
+        val_ufd_contrib   = betas            * np.array(history['val_loss_ufd'])
+        ax8.stackplot(list(epochs),
+                      train_wce_contrib, train_ufd_contrib,
+                      labels=['(1−β)·WCE  [train] x10', 'β·UFD  [train]'],
+                      colors=[wce_color, ufd_color], alpha=0.55)
+        ax8.plot(epochs, history['train_loss'], 'k-', linewidth=1.5, label='Total Train Loss')
+        # Overlay val contributions as lines for clarity
+        ax8.plot(epochs, val_wce_contrib, '--', color=wce_color, linewidth=1.5,
+                 alpha=0.8, label='(1−β)·WCE  [val] x10')
+        ax8.plot(epochs, val_ufd_contrib, '--', color=ufd_color, linewidth=1.5,
+                 alpha=0.8, label='β·UFD  [val]')
+        if best_epoch:
+            ax8.axvline(x=best_epoch, color='red', linestyle='--', linewidth=2, alpha=0.7)
+        ax8.set_xlabel('Epoch', fontsize=11, fontweight='bold')
+        ax8.set_ylabel('Weighted Loss', fontsize=11, fontweight='bold')
+        ax8.set_title('Weighted Loss Contributions\n(Adaptive β Schedule)', fontsize=13, fontweight='bold')
+        ax8.legend(fontsize=8)
+        ax8.grid(True, alpha=0.3)
+        # # 9. Beta schedule
+        # ax9 = fig.add_subplot(gs[2, 2])
+        # ax9.plot(list(epochs), betas, 'o-', linewidth=2, markersize=5,
+        #          color=beta_color, label='β (epoch/total)')
+        # ax9.fill_between(list(epochs), betas, alpha=0.15, color=beta_color)
+        # ax9.axhline(y=0.95, color='gray', linestyle=':', linewidth=1.5,
+        #             label='β = 0.95 threshold')
+        # if best_epoch:
+        #     ax9.axvline(x=best_epoch, color='red', linestyle='--', linewidth=2,
+        #                 alpha=0.7, label=f'Best Epoch ({best_epoch})')
+        # ax9.set_xlabel('Epoch', fontsize=11, fontweight='bold')
+        # ax9.set_ylabel('β value', fontsize=11, fontweight='bold')
+        # ax9.set_title('Beta Schedule\n(WCE → UFD transition)', fontsize=13, fontweight='bold')
+        # ax9.set_ylim([0, 1.05])
+        # ax9.legend(fontsize=9)
+        # ax9.grid(True, alpha=0.3)
+    # 6. Analysis Summary
+    ax6 = fig.add_subplot(gs[1, 2])
+    ax6.axis('off')
+    if analysis:
+        abnormal_class = class_names[analysis['abnormal_key']]
+        best_epoch_idx = analysis['best_overall_epoch'] - 1
+        # Get dice scores for all classes at the best epoch
+        best_epoch_metrics = history['val_metrics'][best_epoch_idx]['dice']
+        # Build dice scores text (excluding background)
+        dice_scores_text = ""
+        for i in range(1, num_classes):
+            class_key = f'class_{i}'
+            dice_value = best_epoch_metrics[class_key]
+            dice_scores_text += f"          {class_names[class_key]}: {dice_value:.4f}\n"
+        summary_text = f"""
+        TRAINING ANALYSIS SUMMARY
+        {'=' * 40}
+        Model: a U-Net
+        Number of Classes: {analysis['num_classes']}
+        Total Epochs: {len(epochs)}
+        BEST OVERALL EPOCH: {analysis['best_overall_epoch']}
+        (Composite Score: {analysis['composite_score']:.4f})
+        Dice Scores at Best Epoch:
+{dice_scores_text}
+        {'─' * 40}
+        Priority Metrics:
+        {'─' * 40}
+        Best {abnormal_class} Dice:
+          Epoch {analysis['best_abnormal_epoch']}: {analysis['best_abnormal_dice']:.4f}
+        Best Ventricles Dice:
+          Epoch {analysis['best_ventricles_epoch']}: {analysis['best_ventricles_dice']:.4f}
+        Best Validation Loss:
+          Epoch {analysis['best_val_loss_epoch']}: {analysis['best_val_loss']:.4f}
+        {'─' * 40}
+        Loss at Best Epoch:
+          Train WCE:  {analysis['best_epoch_metrics']['wce_loss']:.4f}
+          Train UFD:  {analysis['best_epoch_metrics']['ufd_loss']:.4f}"""
+        if analysis['best_epoch_metrics'].get('val_loss_wce') is not None:
+            summary_text += f"""
+          Val   WCE:  {analysis['best_epoch_metrics']['val_loss_wce']:.4f}
+          Val   UFD:  {analysis['best_epoch_metrics']['val_loss_ufd']:.4f}"""
+        summary_text += f"""
+          β value:    {analysis['best_epoch_metrics']['beta_value']:.4f}
+        {'─' * 40}
+        Scoring Weights:
+          {abnormal_class}: 60%
+          Ventricles: 30%
+          Val Loss: 10%
+        """
+        ax6.text(0.05, 0.95, summary_text, transform=ax6.transAxes,
+                fontsize=9, verticalalignment='top', fontfamily='monospace',
+                bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.3))
+    plt.suptitle('a U-Net Training History - Comprehensive Analysis\n'
+                 '(Adaptive Loss: WCE + UFD with β schedule)',
+                 fontsize=16, fontweight='bold', y=0.998)
+    plt.savefig(save_path, dpi=300, bbox_inches='tight')
+    print(f"✓ Visualization saved to: {save_path}")
+    # plt.show()
+    return analysis
+def print_detailed_analysis(analysis):
+    """Print detailed analysis to console."""
+    if not analysis:
+        print("No analysis available.")
+        return
+    print("\n" + "="*60)
+    print("DETAILED TRAINING ANALYSIS - a U-NET")
+    print("="*60)
+    print(f"\n📊 Number of Classes: {analysis['num_classes']}")
+    print(f"\n🏆 RECOMMENDED EPOCH: {analysis['best_overall_epoch']}")
+    print(f"   Composite Score: {analysis['composite_score']:.4f}")
+    print("\n" + "-"*60)
+    print("Individual Best Performances:")
+    print("-"*60)
+    print(f"\n🎯 Abnormal WMH Dice (TOP PRIORITY):")
+    print(f"   Best Epoch: {analysis['best_abnormal_epoch']}")
+    print(f"   Best Score: {analysis['best_abnormal_dice']:.4f}")
+    print(f"\n🫀 Ventricles Dice (SECONDARY):")
+    print(f"   Best Epoch: {analysis['best_ventricles_epoch']}")
+    print(f"   Best Score: {analysis['best_ventricles_dice']:.4f}")
+    print(f"\n📉 Validation Loss (TERTIARY):")
+    print(f"   Best Epoch: {analysis['best_val_loss_epoch']}")
+    print(f"   Lowest Loss: {analysis['best_val_loss']:.4f}")
+    print("\n" + "="*60)
+    print("\nNote: Best overall epoch is calculated using weighted scoring:")
+    print("  • Abnormal WMH Dice: 60%")
+    print("  • Ventricles Dice: 30%")
+    print("  • Validation Loss: 10%")
+    print("="*60 + "\n")
+def main_viz(filepath='history_sample.json', save_outputs=True):
+    """Main execution function."""
+    # Load history
+    print(f"Loading training history from: {filepath}")
+    history = load_history(filepath)
+    print(f"✓ Loaded {len(history['train_loss'])} epochs of training data")
+    # Get output directory
+    out_dir = os.path.dirname(filepath)
+    # Detect number of classes and get class names
+    num_classes = detect_num_classes(history)
+    class_names = get_class_names(num_classes)
+    # Find best epoch and create analysis
+    best_epoch, analysis = find_best_epoch(history, num_classes)
+    # Create visualization
+    plot_training_history(history, save_path=os.path.join(out_dir, 'a_unet_training_analysis.png'))
+    # Print detailed analysis
+    print_detailed_analysis(analysis)
+    if save_outputs:
+        print("\n" + "="*60)
+        print("SAVING ANALYSIS OUTPUTS")
+        print("="*60)
+        # 1. Save standalone analysis JSON
+        analysis_path = os.path.join(out_dir, 'best_epoch_analysis.json')
+        save_analysis_json(analysis, analysis_path)
+        # 2. Save enhanced history with analysis appended
+        enhanced_history_path = os.path.join(out_dir, 'history_with_analysis.json')
+        save_enhanced_history(history, analysis, enhanced_history_path)
+        # 3. Save training summary
+        summary = create_training_summary(history, analysis, class_names)
+        summary_path = os.path.join(out_dir, 'training_summary.json')
+        with open(summary_path, 'w') as f:
+            json.dump(summary, f, indent=2)
+        print(f"✓ Training summary saved to: {summary_path}")
+        print("\n" + "="*60)
+        print("ALL OUTPUTS SAVED SUCCESSFULLY")
+        print("="*60)
+        print("\nGenerated files:")
+        print(f"  1. unet_training_analysis.png - Visualization")
+        print(f"  2. best_epoch_analysis.json - Best epoch analysis")
+        print(f"  3. history_with_analysis.json - Enhanced history")
+        print(f"  4. training_summary.json - Comprehensive training summary")
+        print("="*60 + "\n")
+    return analysis, history
+if __name__ == "__main__":
+    # experiment_dir = '/mnt/e/MBashiri/ours_articles/Paper#2/Development/results_unet_baseline_fold_0/models'
+    # scenario = 'standard_4class'
+    # fold_num = 'fold_0'
+    # filepath = os.path.join(experiment_dir, scenario, fold_num, 'history.json')
+    # main_viz(filepath=filepath, save_outputs=True)
+    for fold in range(5):
+        # Skip folds:
+        if fold in list(np.array([0, 2, 3, 4])):
+            continue
+        for variant in range(5):
+            # # Skip variants:
+            if variant not in list(np.array([1])):
+                continue
+            experiment_dir = f'/mnt/e/MBashiri/ours_articles/Paper#4/Development/results_fold_{fold}_var_{variant}_zscore2/models'
+            scenario = 'standard_3class'
+            fold_num = f'fold_{fold}'
+            filepath = os.path.join(experiment_dir, scenario, fold_num, 'history.json')
+            main_viz(filepath=filepath)

models/for_WMH_Vent/model_training_scripts/p4_variant_all_net.py ADDED Viewed

	@@ -0,0 +1,1051 @@

+"""
+P4 - All U-Net models with Adaptive Loss (WCE + UFL)
+WMH and Ventricles Segmentation with U-Net Models - Journal Paper Implementation
+Three-class segmentation: Background vs Ventricles vs Abnormal WMH
+Professional results saving and visualization for publication
+This relates to our article:
+"Deep Learning-Based Neuroanatomical Profiling Reveals Detailed Brain Changes:
+A Large-Scale Multiple Sclerosis Study"
+Features:
+- Various U-Net architecture
+- Weighted Categorical Cross-Entropy loss
+- Unified Focal loss
+- One-hot encoded targets
+- Class weight computation per fold
+Authors:
+"Mahdi Bashiri Bawil, Mousa Shamsi, Abolhassan Shakeri Bavil"
+Developer:
+"Mahdi Bashiri Bawil"
+"""
+import tensorflow as tf
+import os
+import time
+import numpy as np
+import matplotlib.pyplot as plt
+from pathlib import Path
+from tqdm import tqdm
+import json
+# Import data loader
+from p4_data_loader import DataConfig, P2DataLoader
+# Import utilities from baseline
+from utility_functions import (
+    clear_gpu_memory,
+    get_gpu_memory_info,
+)
+# Import class weights utility
+from p4_compute_class_weights import compute_and_save_class_weights, load_class_weights
+print("TensorFlow Version:", tf.__version__)
+###################### GPU Configuration ######################
+# Configure GPU memory growth
+physical_devices = tf.config.list_physical_devices('GPU')
+if physical_devices:
+    try:
+        for device in physical_devices:
+            tf.config.experimental.set_memory_growth(device, True)
+        print("✅ GPU memory growth enabled")
+        print(f"   Available GPUs: {len(physical_devices)}")
+    except RuntimeError as e:
+        print(f"GPU configuration error: {e}")
+else:
+    print("⚠️  No GPU detected - training will be slow")
+"""
+GPU Memory Management for Sequential Experiments
+To properly release memory between experiments
+"""
+###################### Target Preparation ######################
+def prepare_inputs(paired_input, target_mask, num_classes):
+    """
+    Prepare inputs for training
+    Args:
+        paired_input: (bs, 256, 512, 1) with FLAIR + mask
+        target_mask: (bs, 256, 256) with class labels [0, num_classes-1]
+        num_classes: number of classes
+    Returns:
+        flair_normalized: FLAIR normalized to [-1, 1]
+        target_onehot: One-hot encoded mask (bs, 256, 256, num_classes)
+    """
+    # Extract FLAIR, previously normalized to [-1, 1]
+    flair_normalized = paired_input[:, :, :256, :]
+    # One-hot encode target
+    target_onehot = tf.one_hot(target_mask, depth=num_classes, dtype=tf.float32)
+    return flair_normalized, target_onehot
+###################### Metrics Calculation ######################
+def compute_classwise_metrics(all_val_true, all_val_pred, num_classes, exclude_class=None):
+    """
+    Compute class-wise Dice, Precision, and Recall for validation predictions.
+    Args:
+        all_val_true: List of one-hot encoded ground truth tensors
+        all_val_pred: List of softmax output tensors from model
+        num_classes: Number of classes (3 or 4)
+        exclude_class: Class to exclude from metric calculation (e.g., 2 for background)
+    Returns:
+        Dictionary containing class-wise and mean metrics
+    """
+    # Concatenate all batches
+    y_true_concat = tf.concat(all_val_true, axis=0)  # Shape: (N, H, W, num_classes)
+    y_pred_concat = tf.concat(all_val_pred, axis=0)  # Shape: (N, H, W, num_classes)
+    # Flatten spatial dimensions: (N*H*W, num_classes)
+    y_true_flat = tf.reshape(y_true_concat, [-1, num_classes])
+    y_pred_flat = tf.reshape(y_pred_concat, [-1, num_classes])
+    # Convert predictions to one-hot (argmax)
+    y_pred_classes = tf.argmax(y_pred_flat, axis=-1)
+    y_pred_onehot = tf.one_hot(y_pred_classes, depth=num_classes)
+    # Convert to numpy for easier computation
+    y_true_np = y_true_flat.numpy()
+    y_pred_np = y_pred_onehot.numpy()
+    metrics = {
+        'dice': {},
+        'precision': {},
+        'recall': {}
+    }
+    classes_to_evaluate = [c for c in range(num_classes) if c != exclude_class]
+    for class_idx in classes_to_evaluate:
+        # Extract binary masks for this class
+        true_class = y_true_np[:, class_idx]
+        pred_class = y_pred_np[:, class_idx]
+        # True Positives, False Positives, False Negatives
+        TP = np.sum((true_class == 1) & (pred_class == 1))
+        FP = np.sum((true_class == 0) & (pred_class == 1))
+        FN = np.sum((true_class == 1) & (pred_class == 0))
+        # Dice Score: 2*TP / (2*TP + FP + FN)
+        dice = (2 * TP) / (2 * TP + FP + FN + 1e-7)
+        # Precision: TP / (TP + FP)
+        precision = TP / (TP + FP + 1e-7)
+        # Recall (Sensitivity): TP / (TP + FN)
+        recall = TP / (TP + FN + 1e-7)
+        metrics['dice'][f'class_{class_idx}'] = float(dice)
+        metrics['precision'][f'class_{class_idx}'] = float(precision)
+        metrics['recall'][f'class_{class_idx}'] = float(recall)
+    # Compute mean metrics (excluding the excluded class)
+    metrics['dice']['mean'] = np.mean([v for v in metrics['dice'].values()])
+    metrics['precision']['mean'] = np.mean([v for v in metrics['precision'].values()])
+    metrics['recall']['mean'] = np.mean([v for v in metrics['recall'].values()])
+    return metrics
+###################### Experiment Configuration ######################
+class ExperimentConfig:
+    """Configuration for a Specific U-Net experiment"""
+    def __init__(self,
+                 variant: int = 1,
+                 preprocessing: str = 'standard',
+                 class_scenario: str = '3class',
+                 fold_id: int = 0,
+                 architecture_name: str = 'unet'
+                 ):
+        # Experiment identification
+        self.variant = variant
+        self.preprocessing = preprocessing  # 'standard' or 'zoomed'
+        self.class_scenario = class_scenario  # '3class' or '4class'
+        self.fold_id = fold_id
+        self.architecture_name = architecture_name
+        # Experiment name
+        self.exp_name = f"exp_{architecture_name}_{preprocessing}_{class_scenario}_fold{fold_id}"
+        # Number of classes
+        self.num_classes = 3 if class_scenario == '3class' else 4
+        # Training hyperparameters
+        self.batch_size = 4
+        self.img_width = 256
+        self.img_height = 256
+        self.epochs = 60
+        # Optimizer parameters
+        self.learning_rate = 2e-4
+        self.beta_1 = 0.9
+        # Adaptive loss parameters
+        self.focal_gamma = 0.5           # Focal loss focusing parameter
+        self.beta_threshold = 0.25       # Transition at epoch 15/60
+        self.beta_smoothness = 0.02      # Transition width
+        self.use_focal_alpha = True      # Use class weights in focal loss
+        # ReduceLROnPlateau parameters
+        self.lr_patience = 5          # Wait 5 epochs before reducing
+        self.lr_reduction_factor = 0.5  # Reduce LR by half
+        self.lr_min = 1e-7            # Don't go below this
+        self.lr_monitor = 'val_loss'  # Or 'val_dice_mean'
+        # Paths
+        self.results_dir = Path(f"results_fold_{fold_id}_var_{variant}_zscore3")
+        self.models_dir = self.results_dir / "models" / f"{preprocessing}_{class_scenario}"
+        self.figures_dir = self.results_dir / "figures" / f"{preprocessing}_{class_scenario}" / f"fold_{fold_id}"
+        self.logs_dir = self.results_dir / "logs" / f"{preprocessing}_{class_scenario}" / f"fold_{fold_id}"
+        # Create directories
+        self.models_dir.mkdir(parents=True, exist_ok=True)
+        self.figures_dir.mkdir(parents=True, exist_ok=True)
+        self.logs_dir.mkdir(parents=True, exist_ok=True)
+        # Checkpoint configuration
+        self.checkpoint_dir = self.models_dir / f"fold_{fold_id}"
+        self.checkpoint_dir.mkdir(exist_ok=True)
+        # Class weights directory
+        self.weights_dir = Path("class_weights")
+        self.weights_dir.mkdir(exist_ok=True)
+        # Save configuration
+        self.save_config()
+    def save_config(self):
+        """Save experiment configuration to JSON"""
+        config_dict = {
+            'variant': self.variant,
+            'variant_name': f'{self.architecture_name}',
+            'preprocessing': self.preprocessing,
+            'class_scenario': self.class_scenario,
+            'fold_id': self.fold_id,
+            'num_classes': self.num_classes,
+            'batch_size': self.batch_size,
+            'epochs': self.epochs,
+            'focal_gamma': self.focal_gamma,
+            'beta_threshold': self.beta_threshold,
+            'beta_smoothness': self.beta_smoothness,
+            'learning_rate': self.learning_rate,
+            'beta_1': self.beta_1,
+            'loss': 'Phase-transitioning segmentation loss (WCE → UFD)'
+        }
+        config_file = self.checkpoint_dir / "config.json"
+        with open(config_file, 'w') as f:
+            json.dump(config_dict, f, indent=2)
+###################### Beta Scheduling ######################
+def smooth_step(x, threshold=0.5, smoothness=0.1):
+    """
+    Smooth step function for phase transition
+    Creates smooth transition around threshold value using sigmoid.
+    Args:
+        x: Current progress (typically epoch / total_epochs)
+        threshold: Center point of transition (e.g., 0.5 for epoch 25/50)
+        smoothness: Width of transition (smaller = sharper, larger = smoother)
+    Returns:
+        Value in [0, 1] representing transition progress
+        - x << threshold: returns ≈ 0
+        - x ≈ threshold: returns ≈ 0.5
+        - x >> threshold: returns ≈ 1
+    Example:
+        epoch_progress = 0.3  # Epoch 15/50
+        beta = smooth_step(0.3, threshold=0.5, smoothness=0.1)
+        # beta ≈ 0.05 (mostly phase 1)
+        epoch_progress = 0.5  # Epoch 25/50
+        beta = smooth_step(0.5, threshold=0.5, smoothness=0.1)
+        # beta ≈ 0.5 (equal mix)
+        epoch_progress = 0.7  # Epoch 35/50
+        beta = smooth_step(0.7, threshold=0.5, smoothness=0.1)
+        # beta ≈ 0.95 (mostly phase 2)
+    """
+    # Sigmoid centered at threshold
+    # (x - threshold) / smoothness controls steepness
+    return tf.sigmoid((x - threshold) / smoothness)
+def compute_beta_schedule(current_epoch, total_epochs,
+                          threshold=0.5, smoothness=0.1):
+    """
+    Compute beta value for current epoch
+    Args:
+        current_epoch: Current epoch number (0-indexed)
+        total_epochs: Total number of epochs
+        threshold: Transition center (0.5 = midpoint)
+        smoothness: Transition width
+    Returns:
+        Beta value in [0, 1]
+    """
+    epoch_progress = tf.cast(current_epoch, tf.float32) / tf.cast(total_epochs, tf.float32)
+    beta = smooth_step(epoch_progress, threshold, smoothness)
+    return beta
+###################### Loss Functions ######################
+def unified_focal_loss(y_true, y_pred, gamma=2.0, alpha=None, exclude_class=None):
+    """
+    Unified Focal Loss
+    Focal loss down-weights easy examples and focuses on hard examples.
+    Particularly effective for class imbalance and boundary regions.
+    Args:
+        y_true: Ground truth labels (bs, H, W, num_classes) one-hot encoded
+        y_pred: Predicted probabilities (bs, H, W, num_classes) from softmax
+        gamma: Focusing parameter (default 2.0)
+            - gamma=0: equivalent to cross-entropy
+            - gamma>0: down-weights easy examples
+            - Higher gamma = more focus on hard examples
+        alpha: Per-class balancing weights (num_classes,) - optional, trainable
+            - If None, no additional balancing
+            - If provided, applies per-class weighting like weighted CE
+    Returns:
+        Scalar loss value
+    Formula:
+        FL = -α * (1 - p_t)^γ * log(p_t)
+        where:
+        - p_t is probability of correct class
+        - (1 - p_t)^γ is modulating factor (focal term)
+        - α is class balancing weight
+    """
+    # Clip predictions to avoid log(0)
+    y_pred = tf.clip_by_value(y_pred, 1e-7, 1.0 - 1e-7)
+    # Probability of correct class at each pixel
+    # y_true is one-hot, so this extracts p for the true class
+    p_t = tf.reduce_sum(y_true * y_pred, axis=-1)
+    # Shape: (bs, H, W)
+    # Focal term: (1 - p_t)^gamma
+    # This is small for easy examples (p_t ≈ 1) and large for hard examples (p_t ≈ 0)
+    focal_term = tf.pow(1.0 - p_t, gamma)
+    # Shape: (bs, H, W)
+    # Cross-entropy term: -log(p_t)
+    ce_term = -tf.math.log(p_t)
+    # Shape: (bs, H, W)
+    # Focal loss: focal_term * ce_term
+    focal_loss = focal_term * ce_term
+    # Shape: (bs, H, W)
+    # Optional: Apply alpha balancing (per-class weights)
+    if alpha is not None:
+        # Get weight for true class at each pixel
+        weights_tensor = tf.cast(alpha, dtype=tf.float32)
+        weights_tensor = tf.reshape(weights_tensor, [1, 1, 1, -1])
+        alpha_map = tf.reduce_sum(y_true * weights_tensor, axis=-1)
+        # Shape: (bs, H, W)
+    # Weighted focal
+    # Exclude specific class if specified
+    if exclude_class is not None:
+        class_mask = tf.argmax(y_true, axis=-1)  # (bs, 256, 256)
+        valid_mask = tf.cast(class_mask != exclude_class, tf.float32)
+        if alpha is not None:
+            focal_loss = alpha_map * focal_loss * valid_mask
+        else:
+            focal_loss = focal_loss * valid_mask
+        return tf.reduce_sum(focal_loss) / (tf.reduce_sum(valid_mask) + 1e-7)
+    else:
+        if alpha is not None:
+            focal_loss = alpha_map * focal_loss
+        return tf.reduce_mean(focal_loss)
+def unified_focal_dice_loss(y_true, y_pred, gamma=0.5, delta=0.6, alpha=None, exclude_class=None):
+    """
+    Unified Focal Loss - Dice-based
+    Combines Dice coefficient with precision-recall focal weighting.
+    Best for imbalanced multi-class segmentation with small structures.
+    Args:
+        y_true: Ground truth one-hot (bs, H, W, num_classes)
+        y_pred: Predicted probabilities (bs, H, W, num_classes)
+        gamma: Focusing parameter for Dice component (default 0.5)
+               - gamma=0: equivalent to Dice loss
+               - gamma>0: focuses on hard examples
+        delta: Weight for precision-recall component (0-1, default 0.6)
+               - Controls emphasis on boundary regions
+        alpha: Per-class weights (num_classes,) - optional
+        exclude_class: Class index to exclude from loss
+    Returns:
+        Scalar loss value
+    Formula:
+        UFL = (1 - Dice)^gamma * (1 - precision * recall)^delta
+        Focuses on hard examples and boundary regions
+    """
+    smooth = 1e-6
+    y_pred = tf.clip_by_value(y_pred, 1e-7, 1.0 - 1e-7)
+    num_classes = tf.shape(y_pred)[-1]
+    unified_losses = []
+    for class_idx in range(num_classes if isinstance(num_classes, int) else y_pred.shape[-1]):
+        # Skip excluded class
+        if exclude_class is not None and class_idx == exclude_class:
+            continue
+        y_true_class = y_true[..., class_idx]
+        y_pred_class = y_pred[..., class_idx]
+        # Flatten for calculations
+        y_true_f = tf.reshape(y_true_class, [-1])
+        y_pred_f = tf.reshape(y_pred_class, [-1])
+        # True positives, false positives, false negatives
+        tp = tf.reduce_sum(y_true_f * y_pred_f)
+        fp = tf.reduce_sum((1.0 - y_true_f) * y_pred_f)
+        fn = tf.reduce_sum(y_true_f * (1.0 - y_pred_f))
+        # Precision and recall
+        precision = (tp + smooth) / (tp + fp + smooth)
+        recall = (tp + smooth) / (tp + fn + smooth)
+        # Dice coefficient
+        dice = (2.0 * tp + smooth) / (2.0 * tp + fp + fn + smooth)
+        # Unified focal loss: focuses on hard examples and boundary regions
+        # (1 - dice)^gamma: focuses on classes with low Dice (hard examples)
+        # (1 - precision * recall)^delta: focuses on boundary regions
+        unified_loss_class = tf.pow(1.0 - dice, gamma) * tf.pow(1.0 - precision * recall, delta)
+        # Apply class weights
+        if alpha is not None:
+            unified_loss_class = unified_loss_class * tf.cast(alpha[class_idx], tf.float32)
+        unified_losses.append(unified_loss_class)
+    # Stack and mean across classes (excluding the skipped class)
+    total_loss = tf.reduce_mean(tf.stack(unified_losses))
+    return total_loss
+def weighted_categorical_crossentropy(y_true, y_pred, class_weights, exclude_class=None):
+    """
+    Weighted categorical cross-entropy loss
+    Args:
+        y_true: (bs, 256, 256, num_classes) one-hot encoded
+        y_pred: (bs, 256, 256, num_classes) softmax probabilities
+        class_weights: (num_classes,) weight per class
+        exclude_class: Optional int, class index to exclude from loss (e.g., 2 for CSF)
+    Returns:
+        Scalar loss value
+    """
+    # Clip predictions to prevent log(0)
+    y_pred = tf.clip_by_value(y_pred, 1e-7, 1.0 - 1e-7)
+    # Cross-entropy per pixel: -sum(y_true * log(y_pred))
+    ce = -tf.reduce_sum(y_true * tf.math.log(y_pred), axis=-1)  # (bs, 256, 256)
+    # Apply class weights
+    # class_weights shape: (num_classes,) -> (1, 1, 1, num_classes) for broadcasting
+    weights_tensor = tf.cast(class_weights, dtype=tf.float32)
+    weights_tensor = tf.reshape(weights_tensor, [1, 1, 1, -1])
+    # Weight map: (bs, 256, 256)
+    pixel_weights = tf.reduce_sum(y_true * weights_tensor, axis=-1)
+    # Weighted cross-entropy
+    # Exclude specific class if specified
+    if exclude_class is not None:
+        class_mask = tf.argmax(y_true, axis=-1)  # (bs, 256, 256)
+        valid_mask = tf.cast(class_mask != exclude_class, tf.float32)
+        weighted_ce = ce * pixel_weights * valid_mask
+        return tf.reduce_sum(weighted_ce) / (tf.reduce_sum(valid_mask) + 1e-7)
+    else:
+        weighted_ce = ce * pixel_weights
+        return tf.reduce_mean(weighted_ce)
+def adaptive_segmentation_loss(y_true, y_pred, class_weights, beta,
+                               focal_gamma=0.5, use_focal_alpha=True,
+                               exclude_class=None):
+    """
+    Adaptive segmentation loss with hard phase transition
+    Combines weighted cross-entropy (phase 1) and focal loss (phase 2)
+    based on epoch progress (beta).
+    Args:
+        y_true: Ground truth (bs, H, W, num_classes) one-hot
+        y_pred: Predictions (bs, H, W, num_classes) softmax probabilities
+        class_weights: Trainable class weights (num_classes,)
+        beta: Transition parameter [0, 1]
+            - beta=0: pure weighted CE (early training)
+            - beta=1: pure focal loss (late training)
+        focal_gamma: Focusing parameter for focal loss (default 0.5)
+        use_focal_alpha: Whether to use class_weights as focal alpha
+    Returns:
+        seg_loss: Final loss
+        wcce_loss: Weighted CE component (for monitoring)
+        focal_loss: Focal loss component (for monitoring)
+    Phase Behavior:
+        Epochs 1-10: beta ≈ 0 → Weighted CE dominates
+            - Learns basic class separation
+            - Benefits from explicit class weighting
+        Epochs 10-20: beta transitions 0 → 1
+            - Smooth change in loss landscape
+            - Gradual shift in training dynamics
+        Epochs 20-60: beta ≈ 1 → Focal loss dominates
+            - Focuses on hard examples
+            - Refines boundaries and difficult regions
+    """
+    # Compute Phase 1 loss: Weighted Cross-Entropy
+    wcce_loss = 10 * weighted_categorical_crossentropy(y_true, y_pred, class_weights, exclude_class=exclude_class)
+    # Compute Phase 2 loss: Focal Loss
+    focal_alpha = class_weights if use_focal_alpha else None
+    focal_loss = unified_focal_dice_loss(y_true, y_pred,
+                                       gamma=focal_gamma,
+                                       alpha=focal_alpha,
+                                       exclude_class=exclude_class)
+    # Adaptive combination based on beta
+    # beta=0: (1-0)*wce + 0*focal = wce (phase 1)
+    # beta=1: (1-1)*wce + 1*focal = focal (phase 2)
+    # beta=0.5: 0.5*wce + 0.5*focal = equal mix (transition)
+    seg_loss = (1.0 - beta) * wcce_loss + beta * focal_loss
+    return seg_loss, wcce_loss, focal_loss
+###################### Training Functions ######################
+@tf.function
+def train_step(input_image, target_onehot, model, optimizer,
+               class_weights, beta, focal_gamma,
+               use_focal_alpha=True, exclude_class=None):
+    """
+    Single training step for U-Net
+    Args:
+        input_image: Input FLAIR (bs, 256, 256, 1) in [-1, 1]
+        target_onehot: Target mask (bs, 256, 256, num_classes) one-hot
+        model: a specific U-Net model
+        optimizer: Optimizer
+        class_weights: (num_classes,) weight per class
+        beta: Current beta for phase transition
+    Returns:
+        loss: Training loss value
+    """
+    with tf.GradientTape() as tape:
+        # Forward pass
+        predictions = model(input_image, training=True)
+        # Compute loss
+        seg_loss, wcce_loss, focal_loss = adaptive_segmentation_loss(target_onehot, predictions, class_weights,
+                                                                     beta, focal_gamma, use_focal_alpha, exclude_class)
+    # Calculate gradients
+    gradients = tape.gradient(seg_loss, model.trainable_variables)
+    # Apply gradients
+    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
+    return seg_loss, wcce_loss, focal_loss
+def generate_and_save_images(model, test_input, test_target,
+                            epoch, save_path, num_classes):
+    """
+    Generate predictions and save visualization
+    Args:
+        model: a specific U-Net model
+        test_input: Test input image (bs, 256, 512, 1)
+        test_target: Test target mask (bs, 256, 256)
+        epoch: Current epoch number
+        save_path: Path to save figure
+        num_classes: Number of classes
+    """
+    for ik in range(test_input.numpy().shape[0]):
+        # Extract FLAIR
+        flair_normalized = test_input[ik, :, :256, :]
+        flair_normalized = tf.expand_dims(flair_normalized, axis=0)
+        # Generate prediction
+        prediction_softmax = model(flair_normalized, training=False)
+        # Convert to class labels
+        pred_classes = tf.argmax(prediction_softmax, axis=-1).numpy()
+        target_mask = test_target[ik].numpy()
+        # Create figure
+        plt.figure(figsize=(20, 5))
+        # Input FLAIR
+        plt.subplot(1, 5, 1)
+        plt.title('Input FLAIR')
+        plt.imshow(flair_normalized[0, :, :, 0], cmap='gray')
+        plt.axis('off')
+        # Ground truth
+        plt.subplot(1, 5, 2)
+        plt.title('Ground Truth')
+        plt.imshow(target_mask, cmap='jet', vmin=0, vmax=num_classes-1)
+        plt.colorbar()
+        plt.axis('off')
+        # Prediction
+        plt.subplot(1, 5, 3)
+        plt.title('Predicted Classes')
+        plt.imshow(pred_classes[0], cmap='jet', vmin=0, vmax=num_classes-1)
+        plt.colorbar()
+        plt.axis('off')
+        # Class probabilities for most confident prediction
+        plt.subplot(1, 5, 4)
+        plt.title('Max Probability')
+        max_prob = tf.reduce_max(prediction_softmax[0], axis=-1).numpy()
+        plt.imshow(max_prob, cmap='viridis', vmin=0, vmax=1)
+        plt.colorbar()
+        plt.axis('off')
+        # Difference map
+        plt.subplot(1, 5, 5)
+        plt.title('Error Map (Red=Wrong)')
+        error_map = (pred_classes[0] != target_mask).astype(float)
+        plt.imshow(error_map, cmap='Reds', vmin=0, vmax=1)
+        plt.colorbar()
+        plt.axis('off')
+        plt.tight_layout()
+        plt.savefig(save_path / f'epoch_{epoch:03d}_{ik+1}.png', dpi=300, bbox_inches='tight')
+        plt.close()
+###################### Main Training Function ######################
+def train_net(config: ExperimentConfig):
+    """
+    Main training function for a Specific U-Net
+    Args:
+        config: ExperimentConfig object
+    """
+    print("\n" + "="*70)
+    print(f"TRAINING {config.architecture_name}: {config.exp_name}")
+    print("="*70)
+    print(f"Variant: {config.variant}")
+    print(f"Preprocessing: {config.preprocessing}")
+    print(f"Class scenario: {config.class_scenario} ({config.num_classes} classes)")
+    print(f"Fold: {config.fold_id}")
+    print(f"Epochs: {config.epochs}")
+    print(f"Batch size: {config.batch_size}")
+    print(f"Loss: Weighted Categorical Cross-Entropy → Unified Focal")
+    print("="*70 + "\n")
+    # Check initial GPU memory
+    get_gpu_memory_info()
+    # Initialize data loader
+    data_config = DataConfig()
+    data_loader = P2DataLoader(data_config)
+    # Load datasets
+    print("Loading training data...")
+    train_dataset = data_loader.create_dataset_for_fold(
+        fold_id=config.fold_id,
+        split='train',
+        preprocessing=config.preprocessing,
+        class_scenario=config.class_scenario,
+        batch_size=config.batch_size,
+        shuffle=True
+    )
+    print("Loading validation data...")
+    val_dataset = data_loader.create_dataset_for_fold(
+        fold_id=config.fold_id,
+        split='val',
+        preprocessing=config.preprocessing,
+        class_scenario=config.class_scenario,
+        batch_size=config.batch_size,
+        shuffle=False
+    )
+    # Get dataset sizes
+    # Note: from_generator pipelines always report cardinality as INFINITE (-1)
+    # even with .cache(), so we derive the batch count from the slice list instead.
+    # We iterate once here; this also warms the in-memory cache so epoch 1 is fast.
+    print("Warming dataset cache (first pass over data — subsequent epochs use RAM)...")
+    train_size = sum(1 for _ in train_dataset)
+    val_size   = sum(1 for _ in val_dataset)
+    # ⚠️  Do NOT rebuild the datasets here — that would create new generators and
+    #     throw away the cache we just populated.
+    print(f"Training samples (batches): {train_size}")
+    print(f"Validation samples (batches): {val_size}\n")
+    # Compute or load class weights
+    print("Computing class weights from training data...")
+    try:
+        class_weights = load_class_weights(
+            config.fold_id, config.class_scenario,
+            config.preprocessing, config.weights_dir
+        )
+        print("✅ Loaded pre-computed class weights")
+    except FileNotFoundError:
+        print("Computing class weights (this may take a few minutes)...")
+        results = compute_and_save_class_weights(
+            config.fold_id, config.class_scenario,
+            config.preprocessing, str(config.weights_dir)
+        )
+        class_weights = np.array(results['class_weights'], dtype=np.float32)
+    print(f"Class weights: {class_weights}")
+    # Build model
+    print(f"\n🏗️  Building {config.architecture_name} model...")
+    if config.architecture_name == 'unet':
+        from unet_model import build_unet_3class as build_specific_3class # must be updated with the actual used model for traininig
+    elif config.architecture_name == 'attnunet':
+        from attn_unet_model import build_attention_unet_3class as build_specific_3class
+    elif config.architecture_name == 'dlv3unet':
+        from dlv3_unet_model_GN import build_deeplabv3_unet_3class as build_specific_3class
+    elif config.architecture_name == 'transunet':
+        from trans_unet_model import build_trans_unet_3class as build_specific_3class
+    else:
+        print(f"❌ Error loading model: Invalid Model Name")
+        raise
+    model = build_specific_3class(input_shape=(256, 256, 1), num_classes=config.num_classes)
+    print(f"Model parameters: {model.count_params():,}\n")
+    # Optimizer (will be updated with ReduceLROnPlateau)
+    optimizer = tf.keras.optimizers.legacy.Adam(
+        config.learning_rate, beta_1=config.beta_1
+    )
+    # Initialize optimizer variables
+    print("Initializing optimizer variables...")
+    dummy_input = tf.zeros((1, 256, 256, 1))
+    with tf.GradientTape() as tape:
+        output = model(dummy_input, training=True)
+        dummy_loss = tf.reduce_mean(output)
+    # Apply dummy gradients to build optimizer variables
+    grads = tape.gradient(dummy_loss, model.trainable_variables)
+    optimizer.apply_gradients(zip(grads, model.trainable_variables))
+    print("✅ Optimizer variables initialized\n")
+    # Checkpoint
+    checkpoint = tf.train.Checkpoint(
+        optimizer=optimizer,
+        model=model
+    )
+    checkpoint_prefix = config.checkpoint_dir / "ckpt"
+    manager = tf.train.CheckpointManager(
+        checkpoint, config.checkpoint_dir, max_to_keep=1
+    )
+    if manager.latest_checkpoint:
+        checkpoint.restore(manager.latest_checkpoint)
+        print(f"✅ Restored from checkpoint: {manager.latest_checkpoint}\n")
+    else:
+        print("Starting training from scratch\n")
+    # Get example for visualization
+    skip_n = 1 # min(100 // config.batch_size, val_size - 1)
+    example_paired, example_target, _, _ = next(iter(val_dataset.skip(skip_n).take(20)))
+    print("Initializing metrics computer...")
+    if config.num_classes == 4:
+        class_names = ['Background', 'Ventricles', 'Normal_WMH', 'Abnormal_WMH']
+    elif config.num_classes == 3:
+        class_names = ['Background', 'Ventricles', 'Abnormal_WMH']
+    # Training history
+    history = {
+        'train_loss': [],
+        'wce_loss': [],
+        'ufd_loss': [],
+        'val_loss': [],
+        'val_loss_wce': [],
+        'val_loss_ufd': [],
+        'val_metrics': [],
+        'beta_value': []
+    }
+    # Training loop
+    best_val_loss = float('inf')
+    best_val_dice = float('-inf')
+    exclude_class = 2 if config.num_classes == 4 else None  # Exclude class 2 only in 4-class
+    try:
+        for epoch in range(config.epochs):
+            start_time = time.time()
+            # Compute beta for this epoch
+            beta_value = compute_beta_schedule(
+                epoch, config.epochs,
+                config.beta_threshold, config.beta_smoothness
+            )
+            # Training metrics
+            epoch_losses = []
+            epoch_loss_wce = []
+            epoch_loss_ufd = []
+            # Training loop
+            # Update learning rate based on epoch
+            # y1 = 2 * np.exp(-np.log(400) * x)       # original
+            # y2 = 2 * np.exp(-np.log(400) * x**2)    # milder
+            # y3 = 2 * np.exp(-np.log(400) * x**3)    # even milder ✅
+            # y4 = 2 * np.exp(-np.log(400) * x**5)    # very mild
+            new_lr = config.learning_rate * np.exp(-np.log(400) * (epoch / config.epochs)**3)  # Steadily and exponentially decay from 2e-4 to 5e-7
+            optimizer.learning_rate.assign(new_lr)
+            print(f"\nEpoch {epoch+1}/{config.epochs} (β={beta_value.numpy():.4f}) (lr={new_lr*10000:.3f} 10-4)")
+            train_bar = tqdm(train_dataset, total=train_size, desc="Training")
+            for paired_input, target_mask, patient_id_tensor, slice_num_tensor in train_bar:
+                patient_id = patient_id_tensor.numpy()[0].decode('utf-8')  # batch dim + bytes→str
+                slice_num  = int(slice_num_tensor.numpy()[0])
+                # ✅ Prepare inputs: normalize FLAIR + one-hot encode target
+                flair_normalized, target_onehot = prepare_inputs(
+                    paired_input, target_mask, config.num_classes
+                )
+                # Train step
+                loss, wce_loss, ufd_loss = train_step(
+                    flair_normalized, target_onehot,
+                    model, optimizer, class_weights,
+                    beta_value, config.focal_gamma
+                )
+                epoch_losses.append(loss.numpy())
+                epoch_loss_wce.append(wce_loss.numpy())
+                epoch_loss_ufd.append(ufd_loss.numpy())
+                # Update progress bar
+                train_bar.set_postfix({
+                    'seg_loss': f"{loss.numpy():.5f}",
+                    'wce_loss': f"{wce_loss.numpy():.5f}",
+                    'ufd_loss': f"{ufd_loss.numpy():.5f}",
+                })
+            # Calculate epoch average
+            avg_train_loss = np.mean(epoch_losses)
+            avg_train_loss_wce = np.mean(epoch_loss_wce)
+            avg_train_loss_ufd = np.mean(epoch_loss_ufd)
+            history['train_loss'].append(avg_train_loss)
+            history['wce_loss'].append(avg_train_loss_wce)
+            history['ufd_loss'].append(avg_train_loss_ufd)
+            history['beta_value'].append(float(beta_value.numpy()))
+            # Validation
+            val_losses = []
+            val_losses_wce = []
+            val_losses_ufd = []
+            all_val_true = []
+            all_val_pred = []
+            for val_paired, val_target, patient_id_tensor, slice_num_tensor in val_dataset:
+                try:
+                    patient_id = patient_id_tensor.numpy()[0].decode('utf-8')  # batch dim + bytes→str
+                    slice_num  = int(slice_num_tensor.numpy()[0])
+                    val_flair_norm, val_target_onehot = prepare_inputs(
+                        val_paired, val_target, config.num_classes
+                    )
+                    val_pred = model(val_flair_norm, training=False)
+                    val_loss, val_wce_loss, val_ufd_loss = adaptive_segmentation_loss(
+                        val_target_onehot, val_pred, class_weights,
+                        beta_value, focal_gamma=config.focal_gamma, exclude_class=exclude_class
+                    )
+                    # Store true and prediction values for metrics calculation
+                    all_val_true.append(val_target_onehot)
+                    all_val_pred.append(val_pred)
+                    if not tf.math.is_nan(val_loss):
+                        val_losses.append(val_loss.numpy())
+                        val_losses_wce.append(val_wce_loss.numpy())
+                        val_losses_ufd.append(val_ufd_loss.numpy())
+                except:
+                    continue
+            if len(val_losses) > 0:
+                avg_val_loss = np.mean(val_losses)
+                avg_val_loss_wce = np.mean(val_losses_wce)
+                avg_val_loss_ufd = np.mean(val_losses_ufd)
+                history['val_loss'].append(avg_val_loss)
+                history['val_loss_wce'].append(avg_val_loss_wce)
+                history['val_loss_ufd'].append(avg_val_loss_ufd)
+                # Compute class-wise metrics
+                val_metrics = compute_classwise_metrics(
+                    all_val_true, all_val_pred,
+                    config.num_classes#, exclude_class=exclude_class
+                )
+                history['val_metrics'].append(val_metrics)
+                # Print validation results
+                epoch_time = time.time() - start_time
+                print(f"\n{'='*70}")
+                print(f"Epoch {epoch+1}/{config.epochs} Summary (Time: {epoch_time:.2f}s)")
+                print(f"{'='*70}")
+                print(f"Training Loss: {avg_train_loss:.4f} | wce_loss: {avg_train_loss_wce:.4f} | ufd_loss: {avg_train_loss_ufd:.4f}")
+                print(f"Validation Loss: {avg_val_loss:.4f}")
+                print(f"\nClass-wise Dice Scores:")
+                for class_name, dice_val in val_metrics['dice'].items():
+                    if class_name != 'mean':
+                        print(f"  {class_name}: {dice_val:.4f}")
+                        if class_name == f"class_{config.num_classes - 1}":
+                            abwmh_val_dice = dice_val
+                        elif class_name == f"class_1":
+                            vent_val_dice = dice_val
+                print(f"  Mean Dice: {val_metrics['dice']['mean']:.4f}")
+                print(f"\nClass-wise Precision:")
+                for class_name, prec_val in val_metrics['precision'].items():
+                    if class_name != 'mean':
+                        print(f"  {class_name}: {prec_val:.4f}")
+                print(f"  Mean Precision: {val_metrics['precision']['mean']:.4f}")
+                print(f"\nClass-wise Recall:")
+                for class_name, rec_val in val_metrics['recall'].items():
+                    if class_name != 'mean':
+                        print(f"  {class_name}: {rec_val:.4f}")
+                print(f"  Mean Recall: {val_metrics['recall']['mean']:.4f}")
+                print(f"{'='*70}\n")
+                # Save best model based on overall validation performance
+                overal_val_performance = 0.6 * abwmh_val_dice + 0.3 * vent_val_dice + 0.1 * (1 - 1*avg_val_loss)
+                if overal_val_performance > best_val_dice and beta_value.numpy() > 0.9:
+                    best_val_dice = overal_val_performance
+                    model.save_weights(f"{config.checkpoint_dir}/best_dice_model.h5")
+                    print(f"✓ Best model saved (performance: {best_val_dice:.4f})")
+            else:
+                print("Warning: No valid validation batches")
+                history['val_loss'].append(float('nan'))
+                history['val_metrics'].append({})
+            # Save checkpoint
+            if (epoch + 1) % 5 == 0 and False:
+                manager.save()
+                print(f"  💾 Saved checkpoint")
+            # Generate sample images
+            if ((epoch + 1) % 5 == 0 or epoch == 0) or True:
+                generate_and_save_images(
+                    model, example_paired, example_target,
+                    epoch + 1, config.figures_dir, config.num_classes
+                )
+                print(f"  📊 Saved visualization")
+        # # Save final model
+        # final_model_path = config.checkpoint_dir / "final_model.h5"
+        # model.save(final_model_path)
+        # print(f"\n✅ Training complete! Final model saved to {final_model_path}")
+        # Save history
+        history_serializable = {
+            key: [float(val) if isinstance(val, (int, float, np.number)) else val
+                  for val in values]
+            for key, values in history.items()
+        }
+        history_file = config.checkpoint_dir / "history.json"
+        with open(history_file, 'w') as f:
+            json.dump(history_serializable, f, indent=2)
+        return history, history_file
+    finally:
+        # CRITICAL: Always cleanup, even if training fails
+        print("\n🧹 Cleaning up resources...")
+        # Delete models explicitly to break references
+        try:
+            del model
+            del optimizer
+            del checkpoint
+            del manager
+            del train_dataset
+            del val_dataset
+            print("✅ Deleted model objects")
+        except Exception as e:
+            print(f"⚠️  Error deleting objects: {e}")
+        # Clear GPU memory
+        clear_gpu_memory()
+        # Check final GPU memory
+        get_gpu_memory_info()
+###################### Main Execution ######################
+if __name__ == "__main__":
+    # Example: Train a specific U-Net for 3-class, standard preprocessing, fold 0
+    config = ExperimentConfig(
+        variant=3,
+        preprocessing='standard',
+        class_scenario='3class',
+        fold_id=0,
+        architecture_name='dlv3unet'    # ['unet', 'attnunet', 'dlv3unet', transunet']
+    )
+    history, history_path = train_net(config)
+    print("\n" + "="*70)
+    print("U-NET TRAINING COMPLETE")
+    print("="*70)

models/for_WMH_Vent/model_training_scripts/trans_unet_model.py ADDED Viewed

	@@ -0,0 +1,125 @@

+###################### Libraries ######################
+# Deep Learning
+import tensorflow as tf
+import keras
+from keras.models import Model, load_model
+from keras.layers import Input, Conv2D, MaxPooling2D, Conv2DTranspose, concatenate
+from keras import backend as K
+from tensorflow.keras import layers, optimizers, callbacks
+from keras.utils import to_categorical
+def build_trans_unet_3class(input_shape=(256, 256, 1), num_classes=3):
+    """
+    TransUNet architecture for medical image segmentation
+    Combines CNN encoder with Transformer decoder
+    """
+    inputs = layers.Input(input_shape)
+    # ==================== CNN ENCODER ====================
+    # Stage 1
+    conv1 = layers.Conv2D(64, 3, padding='same', activation='relu')(inputs)
+    conv1 = layers.Conv2D(64, 3, padding='same', activation='relu')(conv1)
+    conv1 = layers.Dropout(0.1)(conv1)
+    pool1 = layers.MaxPooling2D(pool_size=(2, 2))(conv1)
+    # Stage 2
+    conv2 = layers.Conv2D(128, 3, padding='same', activation='relu')(pool1)
+    conv2 = layers.Conv2D(128, 3, padding='same', activation='relu')(conv2)
+    conv2 = layers.Dropout(0.1)(conv2)
+    pool2 = layers.MaxPooling2D(pool_size=(2, 2))(conv2)
+    # Stage 3
+    conv3 = layers.Conv2D(256, 3, padding='same', activation='relu')(pool2)
+    conv3 = layers.Conv2D(256, 3, padding='same', activation='relu')(conv3)
+    conv3 = layers.Dropout(0.2)(conv3)
+    pool3 = layers.MaxPooling2D(pool_size=(2, 2))(conv3)
+    # Stage 4
+    conv4 = layers.Conv2D(512, 3, padding='same', activation='relu')(pool3)
+    conv4 = layers.Conv2D(512, 3, padding='same', activation='relu')(conv4)
+    conv4 = layers.Dropout(0.2)(conv4)
+    pool4 = layers.MaxPooling2D(pool_size=(2, 2))(conv4)
+    # ==================== TRANSFORMER BOTTLENECK ====================
+    # Bottleneck features: 16x16x512
+    bottleneck = layers.Conv2D(768, 3, padding='same', activation='relu')(pool4)
+    bottleneck = layers.Dropout(0.3)(bottleneck)
+    # Prepare for transformer: reshape to sequence
+    batch_size = tf.shape(bottleneck)[0]
+    h, w = 16, 16  # feature map dimensions at bottleneck
+    d_model = 768  # transformer dimension
+    # Flatten spatial dimensions for transformer
+    transformer_input = layers.Reshape((h * w, d_model))(bottleneck)
+    # Add positional encoding
+    positions = tf.range(start=0, limit=h * w, delta=1)
+    pos_encoding = layers.Embedding(h * w, d_model)(positions)
+    transformer_input = transformer_input + pos_encoding
+    # Multi-head attention blocks
+    for _ in range(4):  # 4 transformer layers
+        # Multi-head attention
+        attention_output = layers.MultiHeadAttention(
+            num_heads=8, key_dim=d_model // 8, dropout=0.1
+        )(transformer_input, transformer_input)
+        attention_output = layers.Dropout(0.1)(attention_output)
+        transformer_input = layers.LayerNormalization()(transformer_input + attention_output)
+        # Feed forward network
+        ffn = layers.Dense(d_model * 2, activation='relu')(transformer_input)
+        ffn = layers.Dropout(0.1)(ffn)
+        ffn = layers.Dense(d_model)(ffn)
+        ffn = layers.Dropout(0.1)(ffn)
+        transformer_input = layers.LayerNormalization()(transformer_input + ffn)
+    # Reshape back to spatial format
+    transformer_output = layers.Reshape((h, w, d_model))(transformer_input)
+    # Project back to bottleneck dimension
+    bottleneck_enhanced = layers.Conv2D(512, 1, activation='relu')(transformer_output)
+    bottleneck_enhanced = layers.Dropout(0.3)(bottleneck_enhanced)
+    # ==================== CNN DECODER ====================
+    # Decoder Stage 1
+    up1 = layers.Conv2DTranspose(512, 2, strides=2, padding='same')(bottleneck_enhanced)
+    concat1 = layers.Concatenate()([up1, conv4])
+    concat1 = layers.Dropout(0.2)(concat1)
+    conv_up1 = layers.Conv2D(512, 3, padding='same', activation='relu')(concat1)
+    conv_up1 = layers.Conv2D(512, 3, padding='same', activation='relu')(conv_up1)
+    # Decoder Stage 2
+    up2 = layers.Conv2DTranspose(256, 2, strides=2, padding='same')(conv_up1)
+    concat2 = layers.Concatenate()([up2, conv3])
+    concat2 = layers.Dropout(0.2)(concat2)
+    conv_up2 = layers.Conv2D(256, 3, padding='same', activation='relu')(concat2)
+    conv_up2 = layers.Conv2D(256, 3, padding='same', activation='relu')(conv_up2)
+    # Decoder Stage 3
+    up3 = layers.Conv2DTranspose(128, 2, strides=2, padding='same')(conv_up2)
+    concat3 = layers.Concatenate()([up3, conv2])
+    concat3 = layers.Dropout(0.1)(concat3)
+    conv_up3 = layers.Conv2D(128, 3, padding='same', activation='relu')(concat3)
+    conv_up3 = layers.Conv2D(128, 3, padding='same', activation='relu')(conv_up3)
+    # Decoder Stage 4
+    up4 = layers.Conv2DTranspose(64, 2, strides=2, padding='same')(conv_up3)
+    concat4 = layers.Concatenate()([up4, conv1])
+    concat4 = layers.Dropout(0.1)(concat4)
+    conv_up4 = layers.Conv2D(64, 3, padding='same', activation='relu')(concat4)
+    conv_up4 = layers.Conv2D(64, 3, padding='same', activation='relu')(conv_up4)
+    # ==================== OUTPUT LAYER ====================
+    if num_classes == 1:
+        outputs = layers.Conv2D(1, 1, activation='sigmoid')(conv_up4)
+    else:
+        outputs = layers.Conv2D(num_classes, 1, activation='softmax')(conv_up4)
+    model = tf.keras.Model(inputs, outputs, name='TransUNet')
+    return model

models/for_WMH_Vent/model_training_scripts/unet_model.py ADDED Viewed

	@@ -0,0 +1,87 @@

+###################### Libraries ######################
+# Deep Learning
+import keras
+from keras.models import Model
+from keras.layers import Input, Conv2D, MaxPooling2D, Conv2DTranspose, concatenate
+def build_unet_3class(input_shape=(256, 256, 1), num_classes=3):
+    """Enhanced U-Net architecture with batch normalization and dropout"""
+    inputs = Input(input_shape)
+    # Encoder with batch normalization
+    c1 = Conv2D(64, 3, activation='relu', padding='same')(inputs)
+    # c1 = keras.layers.BatchNormalization()(c1)
+    c1 = Conv2D(64, 3, activation='relu', padding='same')(c1)
+    # c1 = keras.layers.BatchNormalization()(c1)
+    p1 = MaxPooling2D()(c1)
+    p1 = keras.layers.Dropout(0.1)(p1)
+    c2 = Conv2D(128, 3, activation='relu', padding='same')(p1)
+    # c2 = keras.layers.BatchNormalization()(c2)
+    c2 = Conv2D(128, 3, activation='relu', padding='same')(c2)
+    # c2 = keras.layers.BatchNormalization()(c2)
+    p2 = MaxPooling2D()(c2)
+    p2 = keras.layers.Dropout(0.1)(p2)
+    c3 = Conv2D(256, 3, activation='relu', padding='same')(p2)
+    # c3 = keras.layers.BatchNormalization()(c3)
+    c3 = Conv2D(256, 3, activation='relu', padding='same')(c3)
+    # c3 = keras.layers.BatchNormalization()(c3)
+    p3 = MaxPooling2D()(c3)
+    p3 = keras.layers.Dropout(0.2)(p3)
+    c4 = Conv2D(512, 3, activation='relu', padding='same')(p3)
+    # c4 = keras.layers.BatchNormalization()(c4)
+    c4 = Conv2D(512, 3, activation='relu', padding='same')(c4)
+    # c4 = keras.layers.BatchNormalization()(c4)
+    p4 = MaxPooling2D()(c4)
+    p4 = keras.layers.Dropout(0.2)(p4)
+    # Bottleneck
+    c5 = Conv2D(1024, 3, activation='relu', padding='same')(p4)
+    # c5 = keras.layers.BatchNormalization()(c5)
+    c5 = Conv2D(1024, 3, activation='relu', padding='same')(c5)
+    # c5 = keras.layers.BatchNormalization()(c5)
+    c5 = keras.layers.Dropout(0.3)(c5)
+    # Decoder
+    u6 = Conv2DTranspose(512, 2, strides=2, padding='same')(c5)
+    u6 = concatenate([u6, c4])
+    u6 = keras.layers.Dropout(0.2)(u6)
+    c6 = Conv2D(512, 3, activation='relu', padding='same')(u6)
+    # c6 = keras.layers.BatchNormalization()(c6)
+    c6 = Conv2D(512, 3, activation='relu', padding='same')(c6)
+    # c6 = keras.layers.BatchNormalization()(c6)
+    u7 = Conv2DTranspose(256, 2, strides=2, padding='same')(c6)
+    u7 = concatenate([u7, c3])
+    u7 = keras.layers.Dropout(0.2)(u7)
+    c7 = Conv2D(256, 3, activation='relu', padding='same')(u7)
+    # c7 = keras.layers.BatchNormalization()(c7)
+    c7 = Conv2D(256, 3, activation='relu', padding='same')(c7)
+    # c7 = keras.layers.BatchNormalization()(c7)
+    u8 = Conv2DTranspose(128, 2, strides=2, padding='same')(c7)
+    u8 = concatenate([u8, c2])
+    u8 = keras.layers.Dropout(0.1)(u8)
+    c8 = Conv2D(128, 3, activation='relu', padding='same')(u8)
+    # c8 = keras.layers.BatchNormalization()(c8)
+    c8 = Conv2D(128, 3, activation='relu', padding='same')(c8)
+    # c8 = keras.layers.BatchNormalization()(c8)
+    u9 = Conv2DTranspose(64, 2, strides=2, padding='same')(c8)
+    u9 = concatenate([u9, c1])
+    u9 = keras.layers.Dropout(0.1)(u9)
+    c9 = Conv2D(64, 3, activation='relu', padding='same')(u9)
+    # c9 = keras.layers.BatchNormalization()(c9)
+    c9 = Conv2D(64, 3, activation='relu', padding='same')(c9)
+    # c9 = keras.layers.BatchNormalization()(c9)
+    # Output layer
+    if num_classes == 1:
+        outputs = Conv2D(1, 1, activation='sigmoid')(c9)
+    else:
+        outputs = Conv2D(num_classes, 1, activation='softmax')(c9)
+    return Model(inputs, outputs, name='UNet')

models/for_WMH_Vent/model_training_scripts/utility_functions.py ADDED Viewed

	@@ -0,0 +1,96 @@

+"""
+P4 Article - Utility Functions
+Developer:
+"Mahdi Bashiri Bawil"
+"""
+import gc
+import tensorflow as tf
+from tensorflow.keras import backend as K
+print("TensorFlow Version:", tf.__version__)
+###################### GPU Configuration ######################
+# Configure GPU memory growth
+physical_devices = tf.config.list_physical_devices('GPU')
+if physical_devices:
+    try:
+        for device in physical_devices:
+            tf.config.experimental.set_memory_growth(device, True)
+        print("✅ GPU memory growth enabled")
+        print(f"   Available GPUs: {len(physical_devices)}")
+    except RuntimeError as e:
+        print(f"GPU configuration error: {e}")
+else:
+    print("⚠️  No GPU detected - training will be slow")
+"""
+GPU Memory Management for Sequential Experiments
+To properly release memory between experiments
+"""
+def clear_gpu_memory():
+    """
+    Comprehensive GPU memory cleanup between experiments
+    Call this after each experiment completes
+    """
+    print("\n" + "="*70)
+    print("CLEANING UP GPU MEMORY")
+    print("="*70)
+    # Clear Keras session
+    K.clear_session()
+    print("✅ Cleared Keras session")
+    # Force garbage collection multiple times
+    for _ in range(3):
+        gc.collect()
+    print("✅ Ran garbage collection (3 passes)")
+    # Reset TensorFlow graphs
+    tf.compat.v1.reset_default_graph()
+    print("✅ Reset default graph")
+    # Additional cleanup for TF 2.x
+    try:
+        # Clear any cached tensors
+        tf.config.experimental.reset_memory_stats('GPU:0')
+        print("✅ Reset GPU memory stats")
+    except:
+        pass
+    # CRITICAL: Reset GPU memory allocator
+    # This forces TensorFlow to release memory back to the system
+    try:
+        physical_devices = tf.config.list_physical_devices('GPU')
+        if physical_devices:
+            # Disable and re-enable memory growth to flush allocator
+            for device in physical_devices:
+                tf.config.experimental.set_memory_growth(device, False)
+                tf.config.experimental.set_memory_growth(device, True)
+            print("✅ Reset memory growth (flushed allocator)")
+    except Exception as e:
+        print(f"⚠️  Could not reset memory growth: {e}")
+    print("="*70 + "\n")
+def get_gpu_memory_info():
+    """
+    Print current GPU memory usage
+    Useful for monitoring memory leaks
+    """
+    try:
+        gpu_devices = tf.config.list_physical_devices('GPU')
+        if gpu_devices:
+            for device in gpu_devices:
+                details = tf.config.experimental.get_memory_info(device.name.replace('/physical_device:', ''))
+                current_mb = details['current'] / 1024**2
+                peak_mb = details['peak'] / 1024**2
+                print(f"GPU Memory - Current: {current_mb:.1f} MB, Peak: {peak_mb:.1f} MB")
+    except Exception as e:
+        print(f"Could not get GPU memory info: {e}")

models/for_WMH_Vent/results_fold_avg_var_1_zscore2/models/standard_3class/download_models.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ Visit our Hugging Face link for downloading the trained models.