Add test confusion matrices (JSON + PNG) for all variants

Files changed (10) hide show

README.md CHANGED Viewed

@@ -164,17 +164,20 @@ python inference_uchen_ume.py \
 ## Repo layout
 ```
-center_crop_all/             ← use with center_crop_whole_page at inference (~99% test)
   final_model.pt
   model_card.json
-  results.json
-without_preprocess/          ← use with preprocess none / full pages (~81% test)
   final_model.pt
   model_card.json
   results.json
-  benchmark_eval_results.json
-with_preprocess/             ← legacy (train/val crop, test full page) — do not use
-  final_model.pt
   ...
 ```

 ## Repo layout
 ```
+center_crop_all/             ← center_crop_whole_page at inference (~99% test)
   final_model.pt
   model_card.json
+  results.json               ← includes confusion_matrix
+  confusion_matrix.json
+  confusion_matrix.png
+without_preprocess/          ← full pages (~81% test, ~85% benchmark)
   final_model.pt
   model_card.json
   results.json
+  confusion_matrix.json
+  confusion_matrix.png
+  benchmark_eval_results.json   ← benchmark CM in JSON
+with_preprocess/             ← legacy mismatch — do not use
   ...
 ```

center_crop_all/confusion_matrix.json ADDED Viewed

+{
+  "split": "test",
+  "labels": [
+    "uchen",
+    "ume"
+  ],
+  "matrix": [
+    [
+      94,
+      3
+    ],
+    [
+      3,
+      751
+    ]
+  ],
+  "test_metrics": {
+    "loss": 0.032431522018710204,
+    "accuracy": 0.9929494712103408,
+    "macro_f1": 0.9825466925538024,
+    "weighted_f1": 0.9929494712103408,
+    "auc_roc": 0.9996991987749186
+  },
+  "preprocess": {
+    "train": "center_crop_whole_page",
+    "val": "center_crop_whole_page",
+    "test": "center_crop_whole_page",
+    "size": 224
+  }
+}

center_crop_all/confusion_matrix.png ADDED Viewed

center_crop_all/results.json CHANGED Viewed

@@ -16,5 +16,15 @@
     "test": "center_crop_whole_page",
     "size": 224
   },
-  "inference_note": "Use --preprocess center_crop_whole_page at inference (same as train/val/test)."
 }

     "test": "center_crop_whole_page",
     "size": 224
   },
+  "inference_note": "Use --preprocess center_crop_whole_page at inference (same as train/val/test).",
+  "confusion_matrix": [
+    [
+      94,
+      3
+    ],
+    [
+      3,
+      751
+    ]
+  ]
 }

with_preprocess/confusion_matrix.json ADDED Viewed

+{
+  "split": "test",
+  "labels": [
+    "uchen",
+    "ume"
+  ],
+  "matrix": [
+    [
+      99,
+      0
+    ],
+    [
+      381,
+      387
+    ]
+  ],
+  "test_metrics": {
+    "loss": 1.5028612467717066,
+    "accuracy": 0.5605536332179931,
+    "macro_f1": 0.5060493910234842,
+    "weighted_f1": 0.6326582036211453,
+    "auc_roc": 0.9685921717171717
+  }
+}

with_preprocess/confusion_matrix.png ADDED Viewed

with_preprocess/results.json CHANGED Viewed

@@ -13,5 +13,15 @@
   "splits_file": "/root/script-classification-model-train/experiments/uchen_ume_binary/checkpoints/uchen_ume_whole_page/splits.json",
   "skip_stage_c": false,
   "stage_c_skip_reason": null,
-  "best_checkpoint": "best_stage_c_last_blocks.pt"
-}

   "splits_file": "/root/script-classification-model-train/experiments/uchen_ume_binary/checkpoints/uchen_ume_whole_page/splits.json",
   "skip_stage_c": false,
   "stage_c_skip_reason": null,
+  "best_checkpoint": "best_stage_c_last_blocks.pt",
+  "confusion_matrix": [
+    [
+      99,
+      0
+    ],
+    [
+      381,
+      387
+    ]
+  ]
+}

without_preprocess/confusion_matrix.json ADDED Viewed

+{
+  "split": "test",
+  "labels": [
+    "uchen",
+    "ume"
+  ],
+  "matrix": [
+    [
+      97,
+      2
+    ],
+    [
+      165,
+      603
+    ]
+  ],
+  "test_metrics": {
+    "loss": 0.48820294297059763,
+    "accuracy": 0.8073817762399077,
+    "macro_f1": 0.7078823289680483,
+    "weighted_f1": 0.8394339697286689,
+    "auc_roc": 0.9698679503367003
+  }
+}

without_preprocess/confusion_matrix.png ADDED Viewed

without_preprocess/results.json CHANGED Viewed

@@ -13,5 +13,15 @@
   "splits_file": "/root/script-classification-model-train/experiments/uchen_ume_binary/checkpoints/uchen_ume_binary/splits.json",
   "skip_stage_c": false,
   "stage_c_skip_reason": null,
-  "best_checkpoint": "best_stage_c_last_blocks.pt"
-}

   "splits_file": "/root/script-classification-model-train/experiments/uchen_ume_binary/checkpoints/uchen_ume_binary/splits.json",
   "skip_stage_c": false,
   "stage_c_skip_reason": null,
+  "best_checkpoint": "best_stage_c_last_blocks.pt",
+  "confusion_matrix": [
+    [
+      97,
+      2
+    ],
+    [
+      165,
+      603
+    ]
+  ]
+}