viliana-dev commited on Mar 3

Commit

a522118

verified ·

1 Parent(s): 4e8895d

Upload sandbagging probes and results

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

results/anthropic--claude-sonnet-4.6_baseline.json +152 -0
results/attention_probe_results.json +46 -0
results/attention_probe_results_10.json +46 -0
results/attention_probe_results_25.json +46 -0
results/attention_probe_results_50.json +46 -0
results/attention_probe_results_75.json +46 -0
results/embedding_baseline.json +152 -0
results/evaluation.json +372 -0
results/gemini_baseline.json +172 -0
results/google--gemini-3-flash-preview_baseline.json +172 -0
results/google--gemini-3.1-pro-preview_baseline.json +172 -0
results/linear_probe_results.json +152 -0
results/mlp_probe_results.json +122 -0
results/multimax_probe_results_10.json +30 -0
results/multimax_probe_results_100.json +30 -0
results/multimax_probe_results_10_25.json +58 -0
results/multimax_probe_results_25.json +30 -0
results/multimax_probe_results_50.json +30 -0
results/multimax_probe_results_50_75_100.json +86 -0
results/multimax_probe_results_75.json +30 -0
results/openai--gpt-5.1_baseline.json +172 -0
results/rolling_means_probe_results.json +46 -0
results/rolling_means_probe_results_10.json +46 -0
results/rolling_means_probe_results_25.json +46 -0
results/rolling_means_probe_results_50.json +46 -0
results/rolling_means_probe_results_75.json +46 -0
results/tfidf_baseline.json +152 -0
saved_probes/mlp_L20_K50.0.pt +3 -0
saved_probes/mlp_L20_K75.0.pt +3 -0
saved_probes/mlp_L30_K10.0.pt +3 -0
saved_probes/mlp_L30_K100.0.pt +3 -0
saved_probes/mlp_L30_K25.0.pt +3 -0
saved_probes/mlp_L30_K50.0.pt +3 -0
saved_probes/mlp_L30_K75.0.pt +3 -0
saved_probes/mlp_L35_K10.0.pt +3 -0
saved_probes/mlp_L35_K100.0.pt +3 -0
saved_probes/mlp_L35_K25.0.pt +3 -0
saved_probes/mlp_L35_K50.0.pt +3 -0
saved_probes/mlp_L35_K75.0.pt +3 -0
saved_probes/multimax_L10_K10.0_seed0.pt +3 -0
saved_probes/multimax_L10_K10.0_seed1.pt +3 -0
saved_probes/multimax_L10_K10.0_seed2.pt +3 -0
saved_probes/multimax_L10_K100.0_seed0.pt +3 -0
saved_probes/multimax_L10_K100.0_seed1.pt +3 -0
saved_probes/multimax_L10_K100.0_seed2.pt +3 -0
saved_probes/multimax_L10_K25.0_seed0.pt +3 -0
saved_probes/multimax_L10_K25.0_seed1.pt +3 -0
saved_probes/multimax_L10_K25.0_seed2.pt +3 -0
saved_probes/multimax_L10_K50.0_seed0.pt +3 -0
saved_probes/multimax_L10_K50.0_seed1.pt +3 -0

results/anthropic--claude-sonnet-4.6_baseline.json ADDED Viewed

	@@ -0,0 +1,152 @@

+{
+  "10": {
+    "all": {
+      "accuracy": 0.5085,
+      "auroc": 0.5085,
+      "gmean2": 0.0335,
+      "tpr": 0.0341,
+      "tnr": 0.983
+    },
+    "train": {
+      "accuracy": 0.5041,
+      "auroc": 0.5041,
+      "gmean2": 0.024,
+      "tpr": 0.0244,
+      "tnr": 0.9837
+    },
+    "val": {
+      "accuracy": 0.5192,
+      "auroc": 0.5192,
+      "gmean2": 0.0385,
+      "tpr": 0.0385,
+      "tnr": 1.0
+    },
+    "test": {
+      "accuracy": 0.5185,
+      "auroc": 0.5185,
+      "gmean2": 0.0713,
+      "tpr": 0.0741,
+      "tnr": 0.963
+    }
+  },
+  "25": {
+    "all": {
+      "accuracy": 0.5114,
+      "auroc": 0.5114,
+      "gmean2": 0.0391,
+      "tpr": 0.0398,
+      "tnr": 0.983
+    },
+    "train": {
+      "accuracy": 0.5122,
+      "auroc": 0.5122,
+      "gmean2": 0.0323,
+      "tpr": 0.0325,
+      "tnr": 0.9919
+    },
+    "val": {
+      "accuracy": 0.5192,
+      "auroc": 0.5192,
+      "gmean2": 0.0385,
+      "tpr": 0.0385,
+      "tnr": 1.0
+    },
+    "test": {
+      "accuracy": 0.5,
+      "auroc": 0.5,
+      "gmean2": 0.0686,
+      "tpr": 0.0741,
+      "tnr": 0.9259
+    }
+  },
+  "50": {
+    "all": {
+      "accuracy": 0.5284,
+      "auroc": 0.5284,
+      "gmean2": 0.0828,
+      "tpr": 0.0852,
+      "tnr": 0.9716
+    },
+    "train": {
+      "accuracy": 0.5244,
+      "auroc": 0.5244,
+      "gmean2": 0.0787,
+      "tpr": 0.0813,
+      "tnr": 0.9675
+    },
+    "val": {
+      "accuracy": 0.5192,
+      "auroc": 0.5192,
+      "gmean2": 0.0385,
+      "tpr": 0.0385,
+      "tnr": 1.0
+    },
+    "test": {
+      "accuracy": 0.5556,
+      "auroc": 0.5556,
+      "gmean2": 0.1427,
+      "tpr": 0.1481,
+      "tnr": 0.963
+    }
+  },
+  "75": {
+    "all": {
+      "accuracy": 0.5341,
+      "auroc": 0.5341,
+      "gmean2": 0.1179,
+      "tpr": 0.125,
+      "tnr": 0.9432
+    },
+    "train": {
+      "accuracy": 0.5407,
+      "auroc": 0.5407,
+      "gmean2": 0.1237,
+      "tpr": 0.1301,
+      "tnr": 0.9512
+    },
+    "val": {
+      "accuracy": 0.4808,
+      "auroc": 0.4808,
+      "gmean2": 0.0355,
+      "tpr": 0.0385,
+      "tnr": 0.9231
+    },
+    "test": {
+      "accuracy": 0.5556,
+      "auroc": 0.5556,
+      "gmean2": 0.1715,
+      "tpr": 0.1852,
+      "tnr": 0.9259
+    }
+  },
+  "100": {
+    "all": {
+      "accuracy": 0.5455,
+      "auroc": 0.5455,
+      "gmean2": 0.2484,
+      "tpr": 0.3239,
+      "tnr": 0.767
+    },
+    "train": {
+      "accuracy": 0.5488,
+      "auroc": 0.5488,
+      "gmean2": 0.2547,
+      "tpr": 0.3333,
+      "tnr": 0.7642
+    },
+    "val": {
+      "accuracy": 0.5192,
+      "auroc": 0.5192,
+      "gmean2": 0.2071,
+      "tpr": 0.2692,
+      "tnr": 0.7692
+    },
+    "test": {
+      "accuracy": 0.5556,
+      "auroc": 0.5556,
+      "gmean2": 0.2593,
+      "tpr": 0.3333,
+      "tnr": 0.7778
+    }
+  }
+}

results/attention_probe_results.json ADDED Viewed

	@@ -0,0 +1,46 @@

+{
+  "10|100.0": {
+    "val_auroc": 0.6538,
+    "test_auroc": 0.7037,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "test_accuracy": 0.6296,
+    "test_accuracy_std": 0.0,
+    "test_gmean2": 0.3745,
+    "test_gmean2_std": 0.0,
+    "n_seeds": 1
+  },
+  "20|100.0": {
+    "val_auroc": 0.7071,
+    "test_auroc": 0.6516,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "test_accuracy": 0.5741,
+    "test_accuracy_std": 0.0,
+    "test_gmean2": 0.3265,
+    "test_gmean2_std": 0.0,
+    "n_seeds": 1
+  },
+  "30|100.0": {
+    "val_auroc": 0.679,
+    "test_auroc": 0.6612,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "test_accuracy": 0.5741,
+    "test_accuracy_std": 0.0,
+    "test_gmean2": 0.3128,
+    "test_gmean2_std": 0.0,
+    "n_seeds": 1
+  },
+  "35|100.0": {
+    "val_auroc": 0.6612,
+    "test_auroc": 0.5967,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "test_accuracy": 0.5556,
+    "test_accuracy_std": 0.0,
+    "test_gmean2": 0.1111,
+    "test_gmean2_std": 0.0,
+    "n_seeds": 1
+  }
+}

results/attention_probe_results_10.json ADDED Viewed

	@@ -0,0 +1,46 @@

+{
+  "10|10.0": {
+    "val_auroc": 0.5592,
+    "test_auroc": 0.6927,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "test_accuracy": 0.6667,
+    "test_accuracy_std": 0.0,
+    "test_gmean2": 0.439,
+    "test_gmean2_std": 0.0,
+    "n_seeds": 1
+  },
+  "20|10.0": {
+    "val_auroc": 0.5666,
+    "test_auroc": 0.6941,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "test_accuracy": 0.6111,
+    "test_accuracy_std": 0.0,
+    "test_gmean2": 0.3567,
+    "test_gmean2_std": 0.0,
+    "n_seeds": 1
+  },
+  "30|10.0": {
+    "val_auroc": 0.5695,
+    "test_auroc": 0.7023,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "test_accuracy": 0.7037,
+    "test_accuracy_std": 0.0,
+    "test_gmean2": 0.4609,
+    "test_gmean2_std": 0.0,
+    "n_seeds": 1
+  },
+  "35|10.0": {
+    "val_auroc": 0.574,
+    "test_auroc": 0.716,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "test_accuracy": 0.7222,
+    "test_accuracy_std": 0.0,
+    "test_gmean2": 0.5213,
+    "test_gmean2_std": 0.0,
+    "n_seeds": 1
+  }
+}

results/attention_probe_results_25.json ADDED Viewed

	@@ -0,0 +1,46 @@

+{
+  "10|25.0": {
+    "val_auroc": 0.5828,
+    "test_auroc": 0.6324,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "test_accuracy": 0.5556,
+    "test_accuracy_std": 0.0,
+    "test_gmean2": 0.2743,
+    "test_gmean2_std": 0.0,
+    "n_seeds": 1
+  },
+  "20|25.0": {
+    "val_auroc": 0.608,
+    "test_auroc": 0.594,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "test_accuracy": 0.5,
+    "test_accuracy_std": 0.0,
+    "test_gmean2": 0.1509,
+    "test_gmean2_std": 0.0,
+    "n_seeds": 1
+  },
+  "30|25.0": {
+    "val_auroc": 0.5962,
+    "test_auroc": 0.5748,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "test_accuracy": 0.4815,
+    "test_accuracy_std": 0.0,
+    "test_gmean2": 0.144,
+    "test_gmean2_std": 0.0,
+    "n_seeds": 1
+  },
+  "35|25.0": {
+    "val_auroc": 0.6331,
+    "test_auroc": 0.5281,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "test_accuracy": 0.5,
+    "test_accuracy_std": 0.0,
+    "test_gmean2": 0.0,
+    "test_gmean2_std": 0.0,
+    "n_seeds": 1
+  }
+}

results/attention_probe_results_50.json ADDED Viewed

	@@ -0,0 +1,46 @@

+{
+  "10|50.0": {
+    "val_auroc": 0.5592,
+    "test_auroc": 0.7298,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "test_accuracy": 0.5741,
+    "test_accuracy_std": 0.0,
+    "test_gmean2": 0.2305,
+    "test_gmean2_std": 0.0,
+    "n_seeds": 1
+  },
+  "20|50.0": {
+    "val_auroc": 0.6154,
+    "test_auroc": 0.6818,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "test_accuracy": 0.5926,
+    "test_accuracy_std": 0.0,
+    "test_gmean2": 0.2634,
+    "test_gmean2_std": 0.0,
+    "n_seeds": 1
+  },
+  "30|50.0": {
+    "val_auroc": 0.5769,
+    "test_auroc": 0.6557,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "test_accuracy": 0.5926,
+    "test_accuracy_std": 0.0,
+    "test_gmean2": 0.3457,
+    "test_gmean2_std": 0.0,
+    "n_seeds": 1
+  },
+  "35|50.0": {
+    "val_auroc": 0.6361,
+    "test_auroc": 0.6063,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "test_accuracy": 0.5556,
+    "test_accuracy_std": 0.0,
+    "test_gmean2": 0.1111,
+    "test_gmean2_std": 0.0,
+    "n_seeds": 1
+  }
+}

results/attention_probe_results_75.json ADDED Viewed

	@@ -0,0 +1,46 @@

+{
+  "10|75.0": {
+    "val_auroc": 0.6006,
+    "test_auroc": 0.6488,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "test_accuracy": 0.5556,
+    "test_accuracy_std": 0.0,
+    "test_gmean2": 0.3086,
+    "test_gmean2_std": 0.0,
+    "n_seeds": 1
+  },
+  "20|75.0": {
+    "val_auroc": 0.6346,
+    "test_auroc": 0.6173,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "test_accuracy": 0.5185,
+    "test_accuracy_std": 0.0,
+    "test_gmean2": 0.2675,
+    "test_gmean2_std": 0.0,
+    "n_seeds": 1
+  },
+  "30|75.0": {
+    "val_auroc": 0.6531,
+    "test_auroc": 0.642,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "test_accuracy": 0.5741,
+    "test_accuracy_std": 0.0,
+    "test_gmean2": 0.3128,
+    "test_gmean2_std": 0.0,
+    "n_seeds": 1
+  },
+  "35|75.0": {
+    "val_auroc": 0.645,
+    "test_auroc": 0.6104,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "test_accuracy": 0.5741,
+    "test_accuracy_std": 0.0,
+    "test_gmean2": 0.1481,
+    "test_gmean2_std": 0.0,
+    "n_seeds": 1
+  }
+}

results/embedding_baseline.json ADDED Viewed

	@@ -0,0 +1,152 @@

+{
+  "10": {
+    "train": {
+      "accuracy": 0.8049,
+      "auroc": 0.9001,
+      "gmean2": 0.6476,
+      "tpr": 0.7886,
+      "tnr": 0.8211
+    },
+    "val": {
+      "accuracy": 0.7885,
+      "auroc": 0.8876,
+      "gmean2": 0.6036,
+      "tpr": 0.9231,
+      "tnr": 0.6538
+    },
+    "test": {
+      "accuracy": 0.6111,
+      "auroc": 0.5871,
+      "gmean2": 0.3649,
+      "tpr": 0.7037,
+      "tnr": 0.5185
+    },
+    "all": {
+      "accuracy": 0.7727,
+      "auroc": 0.8478,
+      "gmean2": 0.5966,
+      "tpr": 0.7955,
+      "tnr": 0.75
+    }
+  },
+  "25": {
+    "train": {
+      "accuracy": 0.8171,
+      "auroc": 0.9007,
+      "gmean2": 0.6672,
+      "tpr": 0.7967,
+      "tnr": 0.8374
+    },
+    "val": {
+      "accuracy": 0.7885,
+      "auroc": 0.895,
+      "gmean2": 0.6036,
+      "tpr": 0.9231,
+      "tnr": 0.6538
+    },
+    "test": {
+      "accuracy": 0.5741,
+      "auroc": 0.6049,
+      "gmean2": 0.3265,
+      "tpr": 0.6296,
+      "tnr": 0.5185
+    },
+    "all": {
+      "accuracy": 0.7756,
+      "auroc": 0.8504,
+      "gmean2": 0.6013,
+      "tpr": 0.7898,
+      "tnr": 0.7614
+    }
+  },
+  "50": {
+    "train": {
+      "accuracy": 0.8049,
+      "auroc": 0.8929,
+      "gmean2": 0.6468,
+      "tpr": 0.7724,
+      "tnr": 0.8374
+    },
+    "val": {
+      "accuracy": 0.7885,
+      "auroc": 0.8876,
+      "gmean2": 0.6124,
+      "tpr": 0.8846,
+      "tnr": 0.6923
+    },
+    "test": {
+      "accuracy": 0.5741,
+      "auroc": 0.6214,
+      "gmean2": 0.3265,
+      "tpr": 0.6296,
+      "tnr": 0.5185
+    },
+    "all": {
+      "accuracy": 0.767,
+      "auroc": 0.8455,
+      "gmean2": 0.5884,
+      "tpr": 0.767,
+      "tnr": 0.767
+    }
+  },
+  "75": {
+    "train": {
+      "accuracy": 0.8049,
+      "auroc": 0.8901,
+      "gmean2": 0.6462,
+      "tpr": 0.7642,
+      "tnr": 0.8455
+    },
+    "val": {
+      "accuracy": 0.7885,
+      "auroc": 0.8994,
+      "gmean2": 0.6124,
+      "tpr": 0.8846,
+      "tnr": 0.6923
+    },
+    "test": {
+      "accuracy": 0.5556,
+      "auroc": 0.631,
+      "gmean2": 0.3073,
+      "tpr": 0.5926,
+      "tnr": 0.5185
+    },
+    "all": {
+      "accuracy": 0.7642,
+      "auroc": 0.8461,
+      "gmean2": 0.5839,
+      "tpr": 0.7557,
+      "tnr": 0.7727
+    }
+  },
+  "100": {
+    "train": {
+      "accuracy": 0.8089,
+      "auroc": 0.8915,
+      "gmean2": 0.6531,
+      "tpr": 0.7724,
+      "tnr": 0.8455
+    },
+    "val": {
+      "accuracy": 0.7885,
+      "auroc": 0.8964,
+      "gmean2": 0.6124,
+      "tpr": 0.8846,
+      "tnr": 0.6923
+    },
+    "test": {
+      "accuracy": 0.5556,
+      "auroc": 0.6283,
+      "gmean2": 0.3073,
+      "tpr": 0.5926,
+      "tnr": 0.5185
+    },
+    "all": {
+      "accuracy": 0.767,
+      "auroc": 0.8462,
+      "gmean2": 0.5883,
+      "tpr": 0.7614,
+      "tnr": 0.7727
+    }
+  }
+}

results/evaluation.json ADDED Viewed

	@@ -0,0 +1,372 @@

+{
+  "behavior": "sandbagging",
+  "probes": {
+    "best_aurocs": {
+      "10": 0.7435,
+      "25": 0.7298,
+      "50": 0.7229,
+      "75": 0.7339,
+      "100": 0.7339
+    }
+  },
+  "baselines": {
+    "anthropic--claude-sonnet-4.6_baseline": {
+      "10.0": {
+        "accuracy": 0.5185,
+        "auroc": 0.5185,
+        "gmean2": 0.0713,
+        "tpr": 0.0741,
+        "tnr": 0.963
+      },
+      "25.0": {
+        "accuracy": 0.5,
+        "auroc": 0.5,
+        "gmean2": 0.0686,
+        "tpr": 0.0741,
+        "tnr": 0.9259
+      },
+      "50.0": {
+        "accuracy": 0.5556,
+        "auroc": 0.5556,
+        "gmean2": 0.1427,
+        "tpr": 0.1481,
+        "tnr": 0.963
+      },
+      "75.0": {
+        "accuracy": 0.5556,
+        "auroc": 0.5556,
+        "gmean2": 0.1715,
+        "tpr": 0.1852,
+        "tnr": 0.9259
+      },
+      "100.0": {
+        "accuracy": 0.5556,
+        "auroc": 0.5556,
+        "gmean2": 0.2593,
+        "tpr": 0.3333,
+        "tnr": 0.7778
+      }
+    },
+    "embedding_baseline": {
+      "10.0": {
+        "accuracy": 0.6111,
+        "auroc": 0.5871,
+        "gmean2": 0.3649,
+        "tpr": 0.7037,
+        "tnr": 0.5185
+      },
+      "25.0": {
+        "accuracy": 0.5741,
+        "auroc": 0.6049,
+        "gmean2": 0.3265,
+        "tpr": 0.6296,
+        "tnr": 0.5185
+      },
+      "50.0": {
+        "accuracy": 0.5741,
+        "auroc": 0.6214,
+        "gmean2": 0.3265,
+        "tpr": 0.6296,
+        "tnr": 0.5185
+      },
+      "75.0": {
+        "accuracy": 0.5556,
+        "auroc": 0.631,
+        "gmean2": 0.3073,
+        "tpr": 0.5926,
+        "tnr": 0.5185
+      },
+      "100.0": {
+        "accuracy": 0.5556,
+        "auroc": 0.6283,
+        "gmean2": 0.3073,
+        "tpr": 0.5926,
+        "tnr": 0.5185
+      }
+    },
+    "gemini_baseline": {
+      "10.0": {
+        "accuracy": 0.463,
+        "auroc": 0.463,
+        "gmean2": 0.1372,
+        "tpr": 0.7407,
+        "tnr": 0.1852,
+        "n": 54
+      },
+      "25.0": {
+        "accuracy": 0.4815,
+        "auroc": 0.4815,
+        "gmean2": 0.0658,
+        "tpr": 0.8889,
+        "tnr": 0.0741,
+        "n": 54
+      },
+      "50.0": {
+        "accuracy": 0.4815,
+        "auroc": 0.4815,
+        "gmean2": 0.0658,
+        "tpr": 0.8889,
+        "tnr": 0.0741,
+        "n": 54
+      },
+      "75.0": {
+        "accuracy": 0.463,
+        "auroc": 0.463,
+        "gmean2": 0.0,
+        "tpr": 0.9259,
+        "tnr": 0.0,
+        "n": 54
+      },
+      "100.0": {
+        "accuracy": 0.537,
+        "auroc": 0.537,
+        "gmean2": 0.107,
+        "tpr": 0.963,
+        "tnr": 0.1111,
+        "n": 54
+      }
+    },
+    "google--gemini-3-flash-preview_baseline": {
+      "10.0": {
+        "accuracy": 0.463,
+        "auroc": 0.463,
+        "gmean2": 0.1372,
+        "tpr": 0.7407,
+        "tnr": 0.1852,
+        "n": 54
+      },
+      "25.0": {
+        "accuracy": 0.4815,
+        "auroc": 0.4815,
+        "gmean2": 0.0658,
+        "tpr": 0.8889,
+        "tnr": 0.0741,
+        "n": 54
+      },
+      "50.0": {
+        "accuracy": 0.4815,
+        "auroc": 0.4815,
+        "gmean2": 0.0658,
+        "tpr": 0.8889,
+        "tnr": 0.0741,
+        "n": 54
+      },
+      "75.0": {
+        "accuracy": 0.463,
+        "auroc": 0.463,
+        "gmean2": 0.0,
+        "tpr": 0.9259,
+        "tnr": 0.0,
+        "n": 54
+      },
+      "100.0": {
+        "accuracy": 0.537,
+        "auroc": 0.537,
+        "gmean2": 0.107,
+        "tpr": 0.963,
+        "tnr": 0.1111,
+        "n": 54
+      }
+    },
+    "google--gemini-3.1-pro-preview_baseline": {
+      "10.0": {
+        "accuracy": 0.5185,
+        "auroc": 0.5185,
+        "gmean2": 0.1317,
+        "tpr": 0.8889,
+        "tnr": 0.1481,
+        "n": 54
+      },
+      "25.0": {
+        "accuracy": 0.4815,
+        "auroc": 0.4815,
+        "gmean2": 0.0658,
+        "tpr": 0.8889,
+        "tnr": 0.0741,
+        "n": 54
+      },
+      "50.0": {
+        "accuracy": 0.4815,
+        "auroc": 0.4815,
+        "gmean2": 0.0658,
+        "tpr": 0.8889,
+        "tnr": 0.0741,
+        "n": 54
+      },
+      "75.0": {
+        "accuracy": 0.537,
+        "auroc": 0.537,
+        "gmean2": 0.0741,
+        "tpr": 1.0,
+        "tnr": 0.0741,
+        "n": 54
+      },
+      "100.0": {
+        "accuracy": 0.5556,
+        "auroc": 0.5556,
+        "gmean2": 0.1715,
+        "tpr": 0.9259,
+        "tnr": 0.1852,
+        "n": 54
+      }
+    },
+    "openai--gpt-5.1_baseline": {
+      "10.0": {
+        "accuracy": 0.551,
+        "auroc": 0.5217,
+        "gmean2": 0.0435,
+        "tpr": 1.0,
+        "tnr": 0.0435,
+        "n": 49
+      },
+      "25.0": {
+        "accuracy": 0.5208,
+        "auroc": 0.5,
+        "gmean2": 0.0,
+        "tpr": 1.0,
+        "tnr": 0.0,
+        "n": 48
+      },
+      "50.0": {
+        "accuracy": 0.5417,
+        "auroc": 0.5217,
+        "gmean2": 0.0435,
+        "tpr": 1.0,
+        "tnr": 0.0435,
+        "n": 48
+      },
+      "75.0": {
+        "accuracy": 0.5319,
+        "auroc": 0.5217,
+        "gmean2": 0.0435,
+        "tpr": 1.0,
+        "tnr": 0.0435,
+        "n": 47
+      },
+      "100.0": {
+        "accuracy": 0.5833,
+        "auroc": 0.5687,
+        "gmean2": 0.2,
+        "tpr": 0.92,
+        "tnr": 0.2174,
+        "n": 48
+      }
+    },
+    "tfidf_baseline": {
+      "10.0": {
+        "accuracy": 0.6667,
+        "auroc": 0.701,
+        "gmean2": 0.4444,
+        "tpr": 0.6667,
+        "tnr": 0.6667
+      },
+      "25.0": {
+        "accuracy": 0.5926,
+        "auroc": 0.6543,
+        "gmean2": 0.3512,
+        "tpr": 0.5926,
+        "tnr": 0.5926
+      },
+      "50.0": {
+        "accuracy": 0.6111,
+        "auroc": 0.6886,
+        "gmean2": 0.332,
+        "tpr": 0.8148,
+        "tnr": 0.4074
+      },
+      "75.0": {
+        "accuracy": 0.6111,
+        "auroc": 0.7119,
+        "gmean2": 0.3457,
+        "tpr": 0.7778,
+        "tnr": 0.4444
+      },
+      "100.0": {
+        "accuracy": 0.6111,
+        "auroc": 0.7215,
+        "gmean2": 0.3457,
+        "tpr": 0.7778,
+        "tnr": 0.4444
+      }
+    }
+  },
+  "comparisons": {
+    "anthropic--claude-sonnet-4.6_baseline": {
+      "igc": {
+        "10": 0.2250000000000001,
+        "25": 0.2298,
+        "50": 0.1673,
+        "75": 0.17830000000000001,
+        "100": 0.17830000000000001
+      },
+      "detection_latency_probe": 10,
+      "detection_latency_baseline": null
+    },
+    "embedding_baseline": {
+      "igc": {
+        "10": 0.1564000000000001,
+        "25": 0.12490000000000001,
+        "50": 0.10150000000000003,
+        "75": 0.10289999999999999,
+        "100": 0.10560000000000003
+      },
+      "detection_latency_probe": 10,
+      "detection_latency_baseline": null
+    },
+    "gemini_baseline": {
+      "igc": {
+        "10": 0.2805,
+        "25": 0.24830000000000002,
+        "50": 0.2414,
+        "75": 0.2709,
+        "100": 0.19689999999999996
+      },
+      "detection_latency_probe": 10,
+      "detection_latency_baseline": null
+    },
+    "google--gemini-3-flash-preview_baseline": {
+      "igc": {
+        "10": 0.2805,
+        "25": 0.24830000000000002,
+        "50": 0.2414,
+        "75": 0.2709,
+        "100": 0.19689999999999996
+      },
+      "detection_latency_probe": 10,
+      "detection_latency_baseline": null
+    },
+    "google--gemini-3.1-pro-preview_baseline": {
+      "igc": {
+        "10": 0.2250000000000001,
+        "25": 0.24830000000000002,
+        "50": 0.2414,
+        "75": 0.19689999999999996,
+        "100": 0.17830000000000001
+      },
+      "detection_latency_probe": 10,
+      "detection_latency_baseline": null
+    },
+    "openai--gpt-5.1_baseline": {
+      "igc": {
+        "10": 0.2218,
+        "25": 0.2298,
+        "50": 0.20119999999999993,
+        "75": 0.21219999999999994,
+        "100": 0.1652
+      },
+      "detection_latency_probe": 10,
+      "detection_latency_baseline": null
+    },
+    "tfidf_baseline": {
+      "igc": {
+        "10": 0.04250000000000009,
+        "25": 0.07550000000000001,
+        "50": 0.0343,
+        "75": 0.02200000000000002,
+        "100": 0.012399999999999967
+      },
+      "detection_latency_probe": 10,
+      "detection_latency_baseline": 10
+    }
+  }
+}

results/gemini_baseline.json ADDED Viewed

	@@ -0,0 +1,172 @@

+{
+  "10": {
+    "all": {
+      "accuracy": 0.5185,
+      "auroc": 0.5185,
+      "gmean2": 0.1029,
+      "tpr": 0.1111,
+      "tnr": 0.9259,
+      "n": 54
+    },
+    "train": {
+      "accuracy": 0.0,
+      "auroc": 0.5,
+      "gmean2": 0.0,
+      "tpr": 0.0,
+      "tnr": 0.0,
+      "n": 0
+    },
+    "val": {
+      "accuracy": 0.0,
+      "auroc": 0.5,
+      "gmean2": 0.0,
+      "tpr": 0.0,
+      "tnr": 0.0,
+      "n": 0
+    },
+    "test": {
+      "accuracy": 0.5185,
+      "auroc": 0.5185,
+      "gmean2": 0.1029,
+      "tpr": 0.1111,
+      "tnr": 0.9259,
+      "n": 54
+    }
+  },
+  "25": {
+    "all": {
+      "accuracy": 0.463,
+      "auroc": 0.463,
+      "gmean2": 0.0905,
+      "tpr": 0.1111,
+      "tnr": 0.8148,
+      "n": 54
+    },
+    "train": {
+      "accuracy": 0.0,
+      "auroc": 0.5,
+      "gmean2": 0.0,
+      "tpr": 0.0,
+      "tnr": 0.0,
+      "n": 0
+    },
+    "val": {
+      "accuracy": 0.0,
+      "auroc": 0.5,
+      "gmean2": 0.0,
+      "tpr": 0.0,
+      "tnr": 0.0,
+      "n": 0
+    },
+    "test": {
+      "accuracy": 0.463,
+      "auroc": 0.463,
+      "gmean2": 0.0905,
+      "tpr": 0.1111,
+      "tnr": 0.8148,
+      "n": 54
+    }
+  },
+  "50": {
+    "all": {
+      "accuracy": 0.537,
+      "auroc": 0.537,
+      "gmean2": 0.1893,
+      "tpr": 0.2222,
+      "tnr": 0.8519,
+      "n": 54
+    },
+    "train": {
+      "accuracy": 0.0,
+      "auroc": 0.5,
+      "gmean2": 0.0,
+      "tpr": 0.0,
+      "tnr": 0.0,
+      "n": 0
+    },
+    "val": {
+      "accuracy": 0.0,
+      "auroc": 0.5,
+      "gmean2": 0.0,
+      "tpr": 0.0,
+      "tnr": 0.0,
+      "n": 0
+    },
+    "test": {
+      "accuracy": 0.537,
+      "auroc": 0.537,
+      "gmean2": 0.1893,
+      "tpr": 0.2222,
+      "tnr": 0.8519,
+      "n": 54
+    }
+  },
+  "75": {
+    "all": {
+      "accuracy": 0.5,
+      "auroc": 0.5,
+      "gmean2": 0.2085,
+      "tpr": 0.2963,
+      "tnr": 0.7037,
+      "n": 54
+    },
+    "train": {
+      "accuracy": 0.0,
+      "auroc": 0.5,
+      "gmean2": 0.0,
+      "tpr": 0.0,
+      "tnr": 0.0,
+      "n": 0
+    },
+    "val": {
+      "accuracy": 0.0,
+      "auroc": 0.5,
+      "gmean2": 0.0,
+      "tpr": 0.0,
+      "tnr": 0.0,
+      "n": 0
+    },
+    "test": {
+      "accuracy": 0.5,
+      "auroc": 0.5,
+      "gmean2": 0.2085,
+      "tpr": 0.2963,
+      "tnr": 0.7037,
+      "n": 54
+    }
+  },
+  "100": {
+    "all": {
+      "accuracy": 0.5,
+      "auroc": 0.5,
+      "gmean2": 0.2469,
+      "tpr": 0.4444,
+      "tnr": 0.5556,
+      "n": 54
+    },
+    "train": {
+      "accuracy": 0.0,
+      "auroc": 0.5,
+      "gmean2": 0.0,
+      "tpr": 0.0,
+      "tnr": 0.0,
+      "n": 0
+    },
+    "val": {
+      "accuracy": 0.0,
+      "auroc": 0.5,
+      "gmean2": 0.0,
+      "tpr": 0.0,
+      "tnr": 0.0,
+      "n": 0
+    },
+    "test": {
+      "accuracy": 0.5,
+      "auroc": 0.5,
+      "gmean2": 0.2469,
+      "tpr": 0.4444,
+      "tnr": 0.5556,
+      "n": 54
+    }
+  }
+}

results/google--gemini-3-flash-preview_baseline.json ADDED Viewed

	@@ -0,0 +1,172 @@

+{
+  "10": {
+    "all": {
+      "accuracy": 0.5185,
+      "auroc": 0.5185,
+      "gmean2": 0.1029,
+      "tpr": 0.1111,
+      "tnr": 0.9259,
+      "n": 54
+    },
+    "train": {
+      "accuracy": 0.0,
+      "auroc": 0.5,
+      "gmean2": 0.0,
+      "tpr": 0.0,
+      "tnr": 0.0,
+      "n": 0
+    },
+    "val": {
+      "accuracy": 0.0,
+      "auroc": 0.5,
+      "gmean2": 0.0,
+      "tpr": 0.0,
+      "tnr": 0.0,
+      "n": 0
+    },
+    "test": {
+      "accuracy": 0.5185,
+      "auroc": 0.5185,
+      "gmean2": 0.1029,
+      "tpr": 0.1111,
+      "tnr": 0.9259,
+      "n": 54
+    }
+  },
+  "25": {
+    "all": {
+      "accuracy": 0.463,
+      "auroc": 0.463,
+      "gmean2": 0.0905,
+      "tpr": 0.1111,
+      "tnr": 0.8148,
+      "n": 54
+    },
+    "train": {
+      "accuracy": 0.0,
+      "auroc": 0.5,
+      "gmean2": 0.0,
+      "tpr": 0.0,
+      "tnr": 0.0,
+      "n": 0
+    },
+    "val": {
+      "accuracy": 0.0,
+      "auroc": 0.5,
+      "gmean2": 0.0,
+      "tpr": 0.0,
+      "tnr": 0.0,
+      "n": 0
+    },
+    "test": {
+      "accuracy": 0.463,
+      "auroc": 0.463,
+      "gmean2": 0.0905,
+      "tpr": 0.1111,
+      "tnr": 0.8148,
+      "n": 54
+    }
+  },
+  "50": {
+    "all": {
+      "accuracy": 0.537,
+      "auroc": 0.537,
+      "gmean2": 0.1893,
+      "tpr": 0.2222,
+      "tnr": 0.8519,
+      "n": 54
+    },
+    "train": {
+      "accuracy": 0.0,
+      "auroc": 0.5,
+      "gmean2": 0.0,
+      "tpr": 0.0,
+      "tnr": 0.0,
+      "n": 0
+    },
+    "val": {
+      "accuracy": 0.0,
+      "auroc": 0.5,
+      "gmean2": 0.0,
+      "tpr": 0.0,
+      "tnr": 0.0,
+      "n": 0
+    },
+    "test": {
+      "accuracy": 0.537,
+      "auroc": 0.537,
+      "gmean2": 0.1893,
+      "tpr": 0.2222,
+      "tnr": 0.8519,
+      "n": 54
+    }
+  },
+  "75": {
+    "all": {
+      "accuracy": 0.5,
+      "auroc": 0.5,
+      "gmean2": 0.2085,
+      "tpr": 0.2963,
+      "tnr": 0.7037,
+      "n": 54
+    },
+    "train": {
+      "accuracy": 0.0,
+      "auroc": 0.5,
+      "gmean2": 0.0,
+      "tpr": 0.0,
+      "tnr": 0.0,
+      "n": 0
+    },
+    "val": {
+      "accuracy": 0.0,
+      "auroc": 0.5,
+      "gmean2": 0.0,
+      "tpr": 0.0,
+      "tnr": 0.0,
+      "n": 0
+    },
+    "test": {
+      "accuracy": 0.5,
+      "auroc": 0.5,
+      "gmean2": 0.2085,
+      "tpr": 0.2963,
+      "tnr": 0.7037,
+      "n": 54
+    }
+  },
+  "100": {
+    "all": {
+      "accuracy": 0.5,
+      "auroc": 0.5,
+      "gmean2": 0.2469,
+      "tpr": 0.4444,
+      "tnr": 0.5556,
+      "n": 54
+    },
+    "train": {
+      "accuracy": 0.0,
+      "auroc": 0.5,
+      "gmean2": 0.0,
+      "tpr": 0.0,
+      "tnr": 0.0,
+      "n": 0
+    },
+    "val": {
+      "accuracy": 0.0,
+      "auroc": 0.5,
+      "gmean2": 0.0,
+      "tpr": 0.0,
+      "tnr": 0.0,
+      "n": 0
+    },
+    "test": {
+      "accuracy": 0.5,
+      "auroc": 0.5,
+      "gmean2": 0.2469,
+      "tpr": 0.4444,
+      "tnr": 0.5556,
+      "n": 54
+    }
+  }
+}

results/google--gemini-3.1-pro-preview_baseline.json ADDED Viewed

	@@ -0,0 +1,172 @@

+{
+  "10": {
+    "all": {
+      "accuracy": 0.5185,
+      "auroc": 0.5185,
+      "gmean2": 0.1811,
+      "tpr": 0.2222,
+      "tnr": 0.8148,
+      "n": 54
+    },
+    "train": {
+      "accuracy": 0.0,
+      "auroc": 0.5,
+      "gmean2": 0.0,
+      "tpr": 0.0,
+      "tnr": 0.0,
+      "n": 0
+    },
+    "val": {
+      "accuracy": 0.0,
+      "auroc": 0.5,
+      "gmean2": 0.0,
+      "tpr": 0.0,
+      "tnr": 0.0,
+      "n": 0
+    },
+    "test": {
+      "accuracy": 0.5185,
+      "auroc": 0.5185,
+      "gmean2": 0.1811,
+      "tpr": 0.2222,
+      "tnr": 0.8148,
+      "n": 54
+    }
+  },
+  "25": {
+    "all": {
+      "accuracy": 0.5098,
+      "auroc": 0.5138,
+      "gmean2": 0.2215,
+      "tpr": 0.3077,
+      "tnr": 0.72,
+      "n": 51
+    },
+    "train": {
+      "accuracy": 0.0,
+      "auroc": 0.5,
+      "gmean2": 0.0,
+      "tpr": 0.0,
+      "tnr": 0.0,
+      "n": 0
+    },
+    "val": {
+      "accuracy": 0.0,
+      "auroc": 0.5,
+      "gmean2": 0.0,
+      "tpr": 0.0,
+      "tnr": 0.0,
+      "n": 0
+    },
+    "test": {
+      "accuracy": 0.5098,
+      "auroc": 0.5138,
+      "gmean2": 0.2215,
+      "tpr": 0.3077,
+      "tnr": 0.72,
+      "n": 51
+    }
+  },
+  "50": {
+    "all": {
+      "accuracy": 0.6038,
+      "auroc": 0.6011,
+      "gmean2": 0.3419,
+      "tpr": 0.4615,
+      "tnr": 0.7407,
+      "n": 53
+    },
+    "train": {
+      "accuracy": 0.0,
+      "auroc": 0.5,
+      "gmean2": 0.0,
+      "tpr": 0.0,
+      "tnr": 0.0,
+      "n": 0
+    },
+    "val": {
+      "accuracy": 0.0,
+      "auroc": 0.5,
+      "gmean2": 0.0,
+      "tpr": 0.0,
+      "tnr": 0.0,
+      "n": 0
+    },
+    "test": {
+      "accuracy": 0.6038,
+      "auroc": 0.6011,
+      "gmean2": 0.3419,
+      "tpr": 0.4615,
+      "tnr": 0.7407,
+      "n": 53
+    }
+  },
+  "75": {
+    "all": {
+      "accuracy": 0.6111,
+      "auroc": 0.6111,
+      "gmean2": 0.3649,
+      "tpr": 0.5185,
+      "tnr": 0.7037,
+      "n": 54
+    },
+    "train": {
+      "accuracy": 0.0,
+      "auroc": 0.5,
+      "gmean2": 0.0,
+      "tpr": 0.0,
+      "tnr": 0.0,
+      "n": 0
+    },
+    "val": {
+      "accuracy": 0.0,
+      "auroc": 0.5,
+      "gmean2": 0.0,
+      "tpr": 0.0,
+      "tnr": 0.0,
+      "n": 0
+    },
+    "test": {
+      "accuracy": 0.6111,
+      "auroc": 0.6111,
+      "gmean2": 0.3649,
+      "tpr": 0.5185,
+      "tnr": 0.7037,
+      "n": 54
+    }
+  },
+  "100": {
+    "all": {
+      "accuracy": 0.5926,
+      "auroc": 0.5926,
+      "gmean2": 0.3388,
+      "tpr": 0.4815,
+      "tnr": 0.7037,
+      "n": 54
+    },
+    "train": {
+      "accuracy": 0.0,
+      "auroc": 0.5,
+      "gmean2": 0.0,
+      "tpr": 0.0,
+      "tnr": 0.0,
+      "n": 0
+    },
+    "val": {
+      "accuracy": 0.0,
+      "auroc": 0.5,
+      "gmean2": 0.0,
+      "tpr": 0.0,
+      "tnr": 0.0,
+      "n": 0
+    },
+    "test": {
+      "accuracy": 0.5926,
+      "auroc": 0.5926,
+      "gmean2": 0.3388,
+      "tpr": 0.4815,
+      "tnr": 0.7037,
+      "n": 54
+    }
+  }
+}

results/linear_probe_results.json ADDED Viewed

	@@ -0,0 +1,152 @@

+{
+  "10|10": {
+    "val_auroc": 1.0,
+    "test_auroc": 0.738,
+    "test_accuracy": 0.6481,
+    "test_gmean2": 0.4198
+  },
+  "10|25": {
+    "val_auroc": 1.0,
+    "test_auroc": 0.679,
+    "test_accuracy": 0.6481,
+    "test_gmean2": 0.417
+  },
+  "10|50": {
+    "val_auroc": 1.0,
+    "test_auroc": 0.6708,
+    "test_accuracy": 0.5926,
+    "test_gmean2": 0.3512
+  },
+  "10|75": {
+    "val_auroc": 1.0,
+    "test_auroc": 0.6571,
+    "test_accuracy": 0.6111,
+    "test_gmean2": 0.3649
+  },
+  "10|100": {
+    "val_auroc": 1.0,
+    "test_auroc": 0.679,
+    "test_accuracy": 0.5741,
+    "test_gmean2": 0.3128
+  },
+  "20|10": {
+    "val_auroc": 1.0,
+    "test_auroc": 0.7133,
+    "test_accuracy": 0.6296,
+    "test_gmean2": 0.3909
+  },
+  "20|25": {
+    "val_auroc": 1.0,
+    "test_auroc": 0.6571,
+    "test_accuracy": 0.6111,
+    "test_gmean2": 0.3704
+  },
+  "20|50": {
+    "val_auroc": 1.0,
+    "test_auroc": 0.6653,
+    "test_accuracy": 0.6667,
+    "test_gmean2": 0.4431
+  },
+  "20|75": {
+    "val_auroc": 1.0,
+    "test_auroc": 0.6612,
+    "test_accuracy": 0.6852,
+    "test_gmean2": 0.4691
+  },
+  "20|100": {
+    "val_auroc": 1.0,
+    "test_auroc": 0.6379,
+    "test_accuracy": 0.6296,
+    "test_gmean2": 0.3964
+  },
+  "30|10": {
+    "val_auroc": 1.0,
+    "test_auroc": 0.7366,
+    "test_accuracy": 0.6667,
+    "test_gmean2": 0.439
+  },
+  "30|25": {
+    "val_auroc": 1.0,
+    "test_auroc": 0.727,
+    "test_accuracy": 0.6296,
+    "test_gmean2": 0.3951
+  },
+  "30|50": {
+    "val_auroc": 1.0,
+    "test_auroc": 0.6927,
+    "test_accuracy": 0.6852,
+    "test_gmean2": 0.4691
+  },
+  "30|75": {
+    "val_auroc": 1.0,
+    "test_auroc": 0.6914,
+    "test_accuracy": 0.6667,
+    "test_gmean2": 0.4431
+  },
+  "30|100": {
+    "val_auroc": 1.0,
+    "test_auroc": 0.6639,
+    "test_accuracy": 0.6296,
+    "test_gmean2": 0.3951
+  },
+  "35|10": {
+    "val_auroc": 1.0,
+    "test_auroc": 0.7284,
+    "test_accuracy": 0.6667,
+    "test_gmean2": 0.4431
+  },
+  "35|25": {
+    "val_auroc": 1.0,
+    "test_auroc": 0.7298,
+    "test_accuracy": 0.6481,
+    "test_gmean2": 0.4198
+  },
+  "35|50": {
+    "val_auroc": 1.0,
+    "test_auroc": 0.7229,
+    "test_accuracy": 0.6481,
+    "test_gmean2": 0.4198
+  },
+  "35|75": {
+    "val_auroc": 1.0,
+    "test_auroc": 0.7339,
+    "test_accuracy": 0.6481,
+    "test_gmean2": 0.4198
+  },
+  "35|100": {
+    "val_auroc": 1.0,
+    "test_auroc": 0.7339,
+    "test_accuracy": 0.6481,
+    "test_gmean2": 0.4198
+  },
+  "all|10": {
+    "val_auroc": 1.0,
+    "test_auroc": 0.7435,
+    "test_accuracy": 0.6111,
+    "test_gmean2": 0.3704
+  },
+  "all|25": {
+    "val_auroc": 1.0,
+    "test_auroc": 0.7092,
+    "test_accuracy": 0.6296,
+    "test_gmean2": 0.3909
+  },
+  "all|50": {
+    "val_auroc": 1.0,
+    "test_auroc": 0.7064,
+    "test_accuracy": 0.6481,
+    "test_gmean2": 0.417
+  },
+  "all|75": {
+    "val_auroc": 1.0,
+    "test_auroc": 0.7064,
+    "test_accuracy": 0.6111,
+    "test_gmean2": 0.3731
+  },
+  "all|100": {
+    "val_auroc": 1.0,
+    "test_auroc": 0.6941,
+    "test_accuracy": 0.5926,
+    "test_gmean2": 0.3457
+  }
+}

results/mlp_probe_results.json ADDED Viewed

	@@ -0,0 +1,122 @@

+{
+  "10|10": {
+    "val_auroc": 0.5251,
+    "test_auroc": 0.6886,
+    "test_accuracy": 0.6296,
+    "test_gmean2": 0.3964
+  },
+  "10|25": {
+    "val_auroc": 0.568,
+    "test_auroc": 0.6502,
+    "test_accuracy": 0.5926,
+    "test_gmean2": 0.3388
+  },
+  "10|50": {
+    "val_auroc": 0.605,
+    "test_auroc": 0.6571,
+    "test_accuracy": 0.5185,
+    "test_gmean2": 0.0713
+  },
+  "10|75": {
+    "val_auroc": 0.6154,
+    "test_auroc": 0.6708,
+    "test_accuracy": 0.5926,
+    "test_gmean2": 0.3292
+  },
+  "10|100": {
+    "val_auroc": 0.6331,
+    "test_auroc": 0.6612,
+    "test_accuracy": 0.6296,
+    "test_gmean2": 0.3841
+  },
+  "20|10": {
+    "val_auroc": 0.5518,
+    "test_auroc": 0.6461,
+    "test_accuracy": 0.6296,
+    "test_gmean2": 0.3909
+  },
+  "20|25": {
+    "val_auroc": 0.6405,
+    "test_auroc": 0.6571,
+    "test_accuracy": 0.5926,
+    "test_gmean2": 0.3018
+  },
+  "20|50": {
+    "val_auroc": 0.6228,
+    "test_auroc": 0.6557,
+    "test_accuracy": 0.5185,
+    "test_gmean2": 0.2195
+  },
+  "20|75": {
+    "val_auroc": 0.6228,
+    "test_auroc": 0.631,
+    "test_accuracy": 0.5926,
+    "test_gmean2": 0.3512
+  },
+  "20|100": {
+    "val_auroc": 0.6479,
+    "test_auroc": 0.5981,
+    "test_accuracy": 0.5185,
+    "test_gmean2": 0.2565
+  },
+  "30|10": {
+    "val_auroc": 0.5562,
+    "test_auroc": 0.6996,
+    "test_accuracy": 0.6296,
+    "test_gmean2": 0.3745
+  },
+  "30|25": {
+    "val_auroc": 0.5725,
+    "test_auroc": 0.5583,
+    "test_accuracy": 0.6111,
+    "test_gmean2": 0.2497
+  },
+  "30|50": {
+    "val_auroc": 0.6257,
+    "test_auroc": 0.6447,
+    "test_accuracy": 0.5741,
+    "test_gmean2": 0.3265
+  },
+  "30|75": {
+    "val_auroc": 0.6538,
+    "test_auroc": 0.6598,
+    "test_accuracy": 0.5741,
+    "test_gmean2": 0.2881
+  },
+  "30|100": {
+    "val_auroc": 0.6612,
+    "test_auroc": 0.62,
+    "test_accuracy": 0.537,
+    "test_gmean2": 0.2716
+  },
+  "35|10": {
+    "val_auroc": 0.5932,
+    "test_auroc": 0.6749,
+    "test_accuracy": 0.5741,
+    "test_gmean2": 0.3265
+  },
+  "35|25": {
+    "val_auroc": 0.6331,
+    "test_auroc": 0.6968,
+    "test_accuracy": 0.5,
+    "test_gmean2": 0.0
+  },
+  "35|50": {
+    "val_auroc": 0.6538,
+    "test_auroc": 0.6886,
+    "test_accuracy": 0.5926,
+    "test_gmean2": 0.3292
+  },
+  "35|75": {
+    "val_auroc": 0.6228,
+    "test_auroc": 0.631,
+    "test_accuracy": 0.5741,
+    "test_gmean2": 0.2058
+  },
+  "35|100": {
+    "val_auroc": 0.7086,
+    "test_auroc": 0.7106,
+    "test_accuracy": 0.6111,
+    "test_gmean2": 0.332
+  }
+}

results/multimax_probe_results_10.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "10|10.0": {
+    "val_auroc": 0.6627,
+    "test_auroc": 0.5501,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "n_seeds": 1
+  },
+  "20|10.0": {
+    "val_auroc": 0.6287,
+    "test_auroc": 0.5501,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "n_seeds": 1
+  },
+  "30|10.0": {
+    "val_auroc": 0.6257,
+    "test_auroc": 0.583,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "n_seeds": 1
+  },
+  "35|10.0": {
+    "val_auroc": 0.5414,
+    "test_auroc": 0.6776,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "n_seeds": 1
+  }
+}

results/multimax_probe_results_100.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "10|100.0": {
+    "val_auroc": 0.6021,
+    "test_auroc": 0.642,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "n_seeds": 1
+  },
+  "20|100.0": {
+    "val_auroc": 0.6568,
+    "test_auroc": 0.6872,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "n_seeds": 1
+  },
+  "30|100.0": {
+    "val_auroc": 0.6095,
+    "test_auroc": 0.62,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "n_seeds": 1
+  },
+  "35|100.0": {
+    "val_auroc": 0.5932,
+    "test_auroc": 0.5761,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "n_seeds": 1
+  }
+}

results/multimax_probe_results_10_25.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "10|10.0": {
+    "val_auroc": 0.6228,
+    "test_auroc": 0.5944,
+    "val_std": 0.0611,
+    "test_std": 0.0068,
+    "n_seeds": 3
+  },
+  "10|25.0": {
+    "val_auroc": 0.5774,
+    "test_auroc": 0.6081,
+    "val_std": 0.0129,
+    "test_std": 0.0157,
+    "n_seeds": 3
+  },
+  "20|10.0": {
+    "val_auroc": 0.6144,
+    "test_auroc": 0.5574,
+    "val_std": 0.0482,
+    "test_std": 0.0307,
+    "n_seeds": 3
+  },
+  "20|25.0": {
+    "val_auroc": 0.5439,
+    "test_auroc": 0.5693,
+    "val_std": 0.0505,
+    "test_std": 0.0132,
+    "n_seeds": 3
+  },
+  "30|10.0": {
+    "val_auroc": 0.5577,
+    "test_auroc": 0.6132,
+    "val_std": 0.0349,
+    "test_std": 0.0741,
+    "n_seeds": 3
+  },
+  "30|25.0": {
+    "val_auroc": 0.5143,
+    "test_auroc": 0.6013,
+    "val_std": 0.0643,
+    "test_std": 0.0839,
+    "n_seeds": 3
+  },
+  "35|10.0": {
+    "val_auroc": 0.5397,
+    "test_auroc": 0.6283,
+    "val_std": 0.0082,
+    "test_std": 0.0159,
+    "n_seeds": 3
+  },
+  "35|25.0": {
+    "val_auroc": 0.4983,
+    "test_auroc": 0.5709,
+    "val_std": 0.0657,
+    "test_std": 0.0442,
+    "n_seeds": 3
+  }
+}

results/multimax_probe_results_25.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "10|25.0": {
+    "val_auroc": 0.5651,
+    "test_auroc": 0.5446,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "n_seeds": 1
+  },
+  "20|25.0": {
+    "val_auroc": 0.5385,
+    "test_auroc": 0.5226,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "n_seeds": 1
+  },
+  "30|25.0": {
+    "val_auroc": 0.5459,
+    "test_auroc": 0.5734,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "n_seeds": 1
+  },
+  "35|25.0": {
+    "val_auroc": 0.551,
+    "test_auroc": 0.6536,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "n_seeds": 1
+  }
+}

results/multimax_probe_results_50.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "10|50.0": {
+    "val_auroc": 0.6524,
+    "test_auroc": 0.7106,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "n_seeds": 1
+  },
+  "20|50.0": {
+    "val_auroc": 0.5207,
+    "test_auroc": 0.5748,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "n_seeds": 1
+  },
+  "30|50.0": {
+    "val_auroc": 0.5607,
+    "test_auroc": 0.4938,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "n_seeds": 1
+  },
+  "35|50.0": {
+    "val_auroc": 0.6124,
+    "test_auroc": 0.6077,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "n_seeds": 1
+  }
+}

results/multimax_probe_results_50_75_100.json ADDED Viewed

	@@ -0,0 +1,86 @@

+{
+  "10|50.0": {
+    "val_auroc": 0.6243,
+    "test_auroc": 0.679,
+    "val_std": 0.0582,
+    "test_std": 0.0221,
+    "n_seeds": 3
+  },
+  "10|75.0": {
+    "val_auroc": 0.6464,
+    "test_auroc": 0.6392,
+    "val_std": 0.0367,
+    "test_std": 0.0482,
+    "n_seeds": 3
+  },
+  "10|100.0": {
+    "val_auroc": 0.6686,
+    "test_auroc": 0.6754,
+    "val_std": 0.0285,
+    "test_std": 0.0416,
+    "n_seeds": 3
+  },
+  "20|50.0": {
+    "val_auroc": 0.5947,
+    "test_auroc": 0.6324,
+    "val_std": 0.0238,
+    "test_std": 0.0372,
+    "n_seeds": 3
+  },
+  "20|75.0": {
+    "val_auroc": 0.5947,
+    "test_auroc": 0.6374,
+    "val_std": 0.032,
+    "test_std": 0.0371,
+    "n_seeds": 3
+  },
+  "20|100.0": {
+    "val_auroc": 0.5878,
+    "test_auroc": 0.6415,
+    "val_std": 0.0351,
+    "test_std": 0.0593,
+    "n_seeds": 3
+  },
+  "30|50.0": {
+    "val_auroc": 0.5789,
+    "test_auroc": 0.6196,
+    "val_std": 0.0283,
+    "test_std": 0.0763,
+    "n_seeds": 3
+  },
+  "30|75.0": {
+    "val_auroc": 0.5794,
+    "test_auroc": 0.588,
+    "val_std": 0.0347,
+    "test_std": 0.0944,
+    "n_seeds": 3
+  },
+  "30|100.0": {
+    "val_auroc": 0.6119,
+    "test_auroc": 0.6283,
+    "val_std": 0.0793,
+    "test_std": 0.0688,
+    "n_seeds": 3
+  },
+  "35|50.0": {
+    "val_auroc": 0.6134,
+    "test_auroc": 0.5722,
+    "val_std": 0.0525,
+    "test_std": 0.0289,
+    "n_seeds": 3
+  },
+  "35|75.0": {
+    "val_auroc": 0.5897,
+    "test_auroc": 0.5949,
+    "val_std": 0.0833,
+    "test_std": 0.0553,
+    "n_seeds": 3
+  },
+  "35|100.0": {
+    "val_auroc": 0.5819,
+    "test_auroc": 0.6671,
+    "val_std": 0.0329,
+    "test_std": 0.0479,
+    "n_seeds": 3
+  }
+}

results/multimax_probe_results_75.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "10|75.0": {
+    "val_auroc": 0.6479,
+    "test_auroc": 0.6735,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "n_seeds": 1
+  },
+  "20|75.0": {
+    "val_auroc": 0.5769,
+    "test_auroc": 0.6255,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "n_seeds": 1
+  },
+  "30|75.0": {
+    "val_auroc": 0.5547,
+    "test_auroc": 0.5953,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "n_seeds": 1
+  },
+  "35|75.0": {
+    "val_auroc": 0.6243,
+    "test_auroc": 0.5569,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "n_seeds": 1
+  }
+}

results/openai--gpt-5.1_baseline.json ADDED Viewed

	@@ -0,0 +1,172 @@

+{
+  "10": {
+    "all": {
+      "accuracy": 0.537,
+      "auroc": 0.537,
+      "gmean2": 0.107,
+      "tpr": 0.1111,
+      "tnr": 0.963,
+      "n": 54
+    },
+    "train": {
+      "accuracy": 0.0,
+      "auroc": 0.5,
+      "gmean2": 0.0,
+      "tpr": 0.0,
+      "tnr": 0.0,
+      "n": 0
+    },
+    "val": {
+      "accuracy": 0.0,
+      "auroc": 0.5,
+      "gmean2": 0.0,
+      "tpr": 0.0,
+      "tnr": 0.0,
+      "n": 0
+    },
+    "test": {
+      "accuracy": 0.537,
+      "auroc": 0.537,
+      "gmean2": 0.107,
+      "tpr": 0.1111,
+      "tnr": 0.963,
+      "n": 54
+    }
+  },
+  "25": {
+    "all": {
+      "accuracy": 0.5185,
+      "auroc": 0.5185,
+      "gmean2": 0.0713,
+      "tpr": 0.0741,
+      "tnr": 0.963,
+      "n": 54
+    },
+    "train": {
+      "accuracy": 0.0,
+      "auroc": 0.5,
+      "gmean2": 0.0,
+      "tpr": 0.0,
+      "tnr": 0.0,
+      "n": 0
+    },
+    "val": {
+      "accuracy": 0.0,
+      "auroc": 0.5,
+      "gmean2": 0.0,
+      "tpr": 0.0,
+      "tnr": 0.0,
+      "n": 0
+    },
+    "test": {
+      "accuracy": 0.5185,
+      "auroc": 0.5185,
+      "gmean2": 0.0713,
+      "tpr": 0.0741,
+      "tnr": 0.963,
+      "n": 54
+    }
+  },
+  "50": {
+    "all": {
+      "accuracy": 0.5741,
+      "auroc": 0.5741,
+      "gmean2": 0.2058,
+      "tpr": 0.2222,
+      "tnr": 0.9259,
+      "n": 54
+    },
+    "train": {
+      "accuracy": 0.0,
+      "auroc": 0.5,
+      "gmean2": 0.0,
+      "tpr": 0.0,
+      "tnr": 0.0,
+      "n": 0
+    },
+    "val": {
+      "accuracy": 0.0,
+      "auroc": 0.5,
+      "gmean2": 0.0,
+      "tpr": 0.0,
+      "tnr": 0.0,
+      "n": 0
+    },
+    "test": {
+      "accuracy": 0.5741,
+      "auroc": 0.5741,
+      "gmean2": 0.2058,
+      "tpr": 0.2222,
+      "tnr": 0.9259,
+      "n": 54
+    }
+  },
+  "75": {
+    "all": {
+      "accuracy": 0.6111,
+      "auroc": 0.6111,
+      "gmean2": 0.2743,
+      "tpr": 0.2963,
+      "tnr": 0.9259,
+      "n": 54
+    },
+    "train": {
+      "accuracy": 0.0,
+      "auroc": 0.5,
+      "gmean2": 0.0,
+      "tpr": 0.0,
+      "tnr": 0.0,
+      "n": 0
+    },
+    "val": {
+      "accuracy": 0.0,
+      "auroc": 0.5,
+      "gmean2": 0.0,
+      "tpr": 0.0,
+      "tnr": 0.0,
+      "n": 0
+    },
+    "test": {
+      "accuracy": 0.6111,
+      "auroc": 0.6111,
+      "gmean2": 0.2743,
+      "tpr": 0.2963,
+      "tnr": 0.9259,
+      "n": 54
+    }
+  },
+  "100": {
+    "all": {
+      "accuracy": 0.6111,
+      "auroc": 0.6111,
+      "gmean2": 0.3155,
+      "tpr": 0.3704,
+      "tnr": 0.8519,
+      "n": 54
+    },
+    "train": {
+      "accuracy": 0.0,
+      "auroc": 0.5,
+      "gmean2": 0.0,
+      "tpr": 0.0,
+      "tnr": 0.0,
+      "n": 0
+    },
+    "val": {
+      "accuracy": 0.0,
+      "auroc": 0.5,
+      "gmean2": 0.0,
+      "tpr": 0.0,
+      "tnr": 0.0,
+      "n": 0
+    },
+    "test": {
+      "accuracy": 0.6111,
+      "auroc": 0.6111,
+      "gmean2": 0.3155,
+      "tpr": 0.3704,
+      "tnr": 0.8519,
+      "n": 54
+    }
+  }
+}

results/rolling_means_probe_results.json ADDED Viewed

	@@ -0,0 +1,46 @@

+{
+  "10|100.0": {
+    "val_auroc": 0.6864,
+    "test_auroc": 0.6022,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "test_accuracy": 0.5556,
+    "test_accuracy_std": 0.0,
+    "test_gmean2": 0.3073,
+    "test_gmean2_std": 0.0,
+    "n_seeds": 1
+  },
+  "20|100.0": {
+    "val_auroc": 0.7278,
+    "test_auroc": 0.6214,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "test_accuracy": 0.5556,
+    "test_accuracy_std": 0.0,
+    "test_gmean2": 0.3073,
+    "test_gmean2_std": 0.0,
+    "n_seeds": 1
+  },
+  "30|100.0": {
+    "val_auroc": 0.6583,
+    "test_auroc": 0.62,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "test_accuracy": 0.537,
+    "test_accuracy_std": 0.0,
+    "test_gmean2": 0.2853,
+    "test_gmean2_std": 0.0,
+    "n_seeds": 1
+  },
+  "35|100.0": {
+    "val_auroc": 0.6568,
+    "test_auroc": 0.5274,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "test_accuracy": 0.537,
+    "test_accuracy_std": 0.0,
+    "test_gmean2": 0.107,
+    "test_gmean2_std": 0.0,
+    "n_seeds": 1
+  }
+}

results/rolling_means_probe_results_10.json ADDED Viewed

	@@ -0,0 +1,46 @@

+{
+  "10|10.0": {
+    "val_auroc": 0.571,
+    "test_auroc": 0.7531,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "test_accuracy": 0.6852,
+    "test_accuracy_std": 0.0,
+    "test_gmean2": 0.4691,
+    "test_gmean2_std": 0.0,
+    "n_seeds": 1
+  },
+  "20|10.0": {
+    "val_auroc": 0.5858,
+    "test_auroc": 0.6214,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "test_accuracy": 0.537,
+    "test_accuracy_std": 0.0,
+    "test_gmean2": 0.2853,
+    "test_gmean2_std": 0.0,
+    "n_seeds": 1
+  },
+  "30|10.0": {
+    "val_auroc": 0.6302,
+    "test_auroc": 0.6859,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "test_accuracy": 0.6852,
+    "test_accuracy_std": 0.0,
+    "test_gmean2": 0.4609,
+    "test_gmean2_std": 0.0,
+    "n_seeds": 1
+  },
+  "35|10.0": {
+    "val_auroc": 0.5828,
+    "test_auroc": 0.5473,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "test_accuracy": 0.5185,
+    "test_accuracy_std": 0.0,
+    "test_gmean2": 0.2675,
+    "test_gmean2_std": 0.0,
+    "n_seeds": 1
+  }
+}

results/rolling_means_probe_results_25.json ADDED Viewed

	@@ -0,0 +1,46 @@

+{
+  "10|25.0": {
+    "val_auroc": 0.6464,
+    "test_auroc": 0.5556,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "test_accuracy": 0.6111,
+    "test_accuracy_std": 0.0,
+    "test_gmean2": 0.3704,
+    "test_gmean2_std": 0.0,
+    "n_seeds": 1
+  },
+  "20|25.0": {
+    "val_auroc": 0.6553,
+    "test_auroc": 0.5953,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "test_accuracy": 0.5,
+    "test_accuracy_std": 0.0,
+    "test_gmean2": 0.2332,
+    "test_gmean2_std": 0.0,
+    "n_seeds": 1
+  },
+  "30|25.0": {
+    "val_auroc": 0.5902,
+    "test_auroc": 0.6125,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "test_accuracy": 0.5185,
+    "test_accuracy_std": 0.0,
+    "test_gmean2": 0.2675,
+    "test_gmean2_std": 0.0,
+    "n_seeds": 1
+  },
+  "35|25.0": {
+    "val_auroc": 0.6198,
+    "test_auroc": 0.7263,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "test_accuracy": 0.6296,
+    "test_accuracy_std": 0.0,
+    "test_gmean2": 0.3292,
+    "test_gmean2_std": 0.0,
+    "n_seeds": 1
+  }
+}

results/rolling_means_probe_results_50.json ADDED Viewed

	@@ -0,0 +1,46 @@

+{
+  "10|50.0": {
+    "val_auroc": 0.6997,
+    "test_auroc": 0.6735,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "test_accuracy": 0.5556,
+    "test_accuracy_std": 0.0,
+    "test_gmean2": 0.1427,
+    "test_gmean2_std": 0.0,
+    "n_seeds": 1
+  },
+  "20|50.0": {
+    "val_auroc": 0.6842,
+    "test_auroc": 0.6145,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "test_accuracy": 0.5741,
+    "test_accuracy_std": 0.0,
+    "test_gmean2": 0.3265,
+    "test_gmean2_std": 0.0,
+    "n_seeds": 1
+  },
+  "30|50.0": {
+    "val_auroc": 0.6524,
+    "test_auroc": 0.6207,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "test_accuracy": 0.6111,
+    "test_accuracy_std": 0.0,
+    "test_gmean2": 0.3731,
+    "test_gmean2_std": 0.0,
+    "n_seeds": 1
+  },
+  "35|50.0": {
+    "val_auroc": 0.6257,
+    "test_auroc": 0.6687,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "test_accuracy": 0.5741,
+    "test_accuracy_std": 0.0,
+    "test_gmean2": 0.2305,
+    "test_gmean2_std": 0.0,
+    "n_seeds": 1
+  }
+}

results/rolling_means_probe_results_75.json ADDED Viewed

	@@ -0,0 +1,46 @@

+{
+  "10|75.0": {
+    "val_auroc": 0.6612,
+    "test_auroc": 0.6214,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "test_accuracy": 0.5741,
+    "test_accuracy_std": 0.0,
+    "test_gmean2": 0.2881,
+    "test_gmean2_std": 0.0,
+    "n_seeds": 1
+  },
+  "20|75.0": {
+    "val_auroc": 0.6583,
+    "test_auroc": 0.6269,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "test_accuracy": 0.5741,
+    "test_accuracy_std": 0.0,
+    "test_gmean2": 0.321,
+    "test_gmean2_std": 0.0,
+    "n_seeds": 1
+  },
+  "30|75.0": {
+    "val_auroc": 0.6464,
+    "test_auroc": 0.5967,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "test_accuracy": 0.537,
+    "test_accuracy_std": 0.0,
+    "test_gmean2": 0.2606,
+    "test_gmean2_std": 0.0,
+    "n_seeds": 1
+  },
+  "35|75.0": {
+    "val_auroc": 0.6642,
+    "test_auroc": 0.6159,
+    "val_std": 0.0,
+    "test_std": 0.0,
+    "test_accuracy": 0.5741,
+    "test_accuracy_std": 0.0,
+    "test_gmean2": 0.2881,
+    "test_gmean2_std": 0.0,
+    "n_seeds": 1
+  }
+}

results/tfidf_baseline.json ADDED Viewed

	@@ -0,0 +1,152 @@

+{
+  "10": {
+    "train": {
+      "accuracy": 0.9756,
+      "auroc": 0.996,
+      "gmean2": 0.9518,
+      "tpr": 0.9756,
+      "tnr": 0.9756
+    },
+    "val": {
+      "accuracy": 0.9808,
+      "auroc": 0.997,
+      "gmean2": 0.9615,
+      "tpr": 1.0,
+      "tnr": 0.9615
+    },
+    "test": {
+      "accuracy": 0.6667,
+      "auroc": 0.701,
+      "gmean2": 0.4444,
+      "tpr": 0.6667,
+      "tnr": 0.6667
+    },
+    "all": {
+      "accuracy": 0.929,
+      "auroc": 0.9726,
+      "gmean2": 0.863,
+      "tpr": 0.9318,
+      "tnr": 0.9261
+    }
+  },
+  "25": {
+    "train": {
+      "accuracy": 0.9756,
+      "auroc": 0.9976,
+      "gmean2": 0.9516,
+      "tpr": 0.9593,
+      "tnr": 0.9919
+    },
+    "val": {
+      "accuracy": 0.9615,
+      "auroc": 0.9896,
+      "gmean2": 0.9246,
+      "tpr": 0.9615,
+      "tnr": 0.9615
+    },
+    "test": {
+      "accuracy": 0.5926,
+      "auroc": 0.6543,
+      "gmean2": 0.3512,
+      "tpr": 0.5926,
+      "tnr": 0.5926
+    },
+    "all": {
+      "accuracy": 0.9148,
+      "auroc": 0.964,
+      "gmean2": 0.8367,
+      "tpr": 0.9034,
+      "tnr": 0.9261
+    }
+  },
+  "50": {
+    "train": {
+      "accuracy": 0.9756,
+      "auroc": 0.9955,
+      "gmean2": 0.9518,
+      "tpr": 0.9756,
+      "tnr": 0.9756
+    },
+    "val": {
+      "accuracy": 0.9615,
+      "auroc": 0.9926,
+      "gmean2": 0.9231,
+      "tpr": 1.0,
+      "tnr": 0.9231
+    },
+    "test": {
+      "accuracy": 0.6111,
+      "auroc": 0.6886,
+      "gmean2": 0.332,
+      "tpr": 0.8148,
+      "tnr": 0.4074
+    },
+    "all": {
+      "accuracy": 0.9176,
+      "auroc": 0.964,
+      "gmean2": 0.8407,
+      "tpr": 0.9545,
+      "tnr": 0.8807
+    }
+  },
+  "75": {
+    "train": {
+      "accuracy": 0.9593,
+      "auroc": 0.9921,
+      "gmean2": 0.9203,
+      "tpr": 0.9512,
+      "tnr": 0.9675
+    },
+    "val": {
+      "accuracy": 0.9038,
+      "auroc": 0.9882,
+      "gmean2": 0.8136,
+      "tpr": 0.9615,
+      "tnr": 0.8462
+    },
+    "test": {
+      "accuracy": 0.6111,
+      "auroc": 0.7119,
+      "gmean2": 0.3457,
+      "tpr": 0.7778,
+      "tnr": 0.4444
+    },
+    "all": {
+      "accuracy": 0.8977,
+      "auroc": 0.9587,
+      "gmean2": 0.8051,
+      "tpr": 0.9261,
+      "tnr": 0.8693
+    }
+  },
+  "100": {
+    "train": {
+      "accuracy": 0.9634,
+      "auroc": 0.9875,
+      "gmean2": 0.9282,
+      "tpr": 0.9675,
+      "tnr": 0.9593
+    },
+    "val": {
+      "accuracy": 0.9231,
+      "auroc": 0.9926,
+      "gmean2": 0.8462,
+      "tpr": 1.0,
+      "tnr": 0.8462
+    },
+    "test": {
+      "accuracy": 0.6111,
+      "auroc": 0.7215,
+      "gmean2": 0.3457,
+      "tpr": 0.7778,
+      "tnr": 0.4444
+    },
+    "all": {
+      "accuracy": 0.9034,
+      "auroc": 0.9574,
+      "gmean2": 0.8146,
+      "tpr": 0.9432,
+      "tnr": 0.8636
+    }
+  }
+}

saved_probes/mlp_L20_K50.0.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c96096cf50a3d28fe934dd0f7dd911d692de9abdfa318400902c5a85bfea0915
+size 5247769

saved_probes/mlp_L20_K75.0.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1b2b72789b24811b135c1c5d9549c5e95ed0d91081fdc7acab9e7d49c1db1284
+size 5247769

saved_probes/mlp_L30_K10.0.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5f64363fafece160b62345dc36aff7677bb152e8e6d330d274efc1fdd917e42a
+size 5247769

saved_probes/mlp_L30_K100.0.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5d5f94f5f68fd15c2bc98afc13468bfdac9c78fed21cc12c2c439d24fdee6043
+size 5247779

saved_probes/mlp_L30_K25.0.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5dc86863846213f64fa682096e0c51834de1ba02d3f0987d899b4116e2746a42
+size 5247769

saved_probes/mlp_L30_K50.0.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dca59f9c28b519e776bbed147c34d3c9e6f407625adc720e88f7d6385f6a5c64
+size 5247769

saved_probes/mlp_L30_K75.0.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b768a21c074dda95566a1db4aa1f25e0989825799deb2e06eb0ae08bf3e7aaf7
+size 5247769

saved_probes/mlp_L35_K10.0.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e3012114092a3a6d671568b53402745e64e865ea6bb5e999b31bff94b79849cf
+size 5247769

saved_probes/mlp_L35_K100.0.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:29824008523b3fc22a6cfdec0683e036d077663ee2a68c9524542bf8c64f1454
+size 5247779

saved_probes/mlp_L35_K25.0.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:91cf550b1863bee4024f4a404583738d7a8a3ab7a1b74cb3d3915663dcdc1530
+size 5247769

saved_probes/mlp_L35_K50.0.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3075a5c49f7604fc606038d2f313569f7c4a97fe45e8809297b099c483d2e995
+size 5247769

saved_probes/mlp_L35_K75.0.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1ecc2ace55f6f949c56705f6574199ba59df4fddd423ae8cf1fe0cb22e91bbd8
+size 5247769

saved_probes/multimax_L10_K10.0_seed0.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1c103ff2ef5dbc2dd93afaf4d8939b6af86bc0f782439e31c569a39f7af209a9
+size 69125

saved_probes/multimax_L10_K10.0_seed1.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2f76f19fd41df93f213530f892ad01a2f6aa19221890649aa9a6937d97091909
+size 69125

saved_probes/multimax_L10_K10.0_seed2.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b8a73654ac2f00951145579e33cc3f913fde58b474025f27ad36b29e3160b8e6
+size 69125

saved_probes/multimax_L10_K100.0_seed0.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9a6adfacb850da1ca336624928b659b4cb76228deba4875b5d5f191962b65ccf
+size 69213

saved_probes/multimax_L10_K100.0_seed1.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:51c988edbb7a1cf83116033ea0a5ec38696424c0990b7af296b28775e90c3dd3
+size 69213

saved_probes/multimax_L10_K100.0_seed2.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f6b61d50efdcd93aa08712804debe02e145b1c4090555b318fd2a45f6da3fcb3
+size 69213

saved_probes/multimax_L10_K25.0_seed0.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:06d8cc589d9724bf4b831cd09a91f5264eb7c0f89f9f601eecf114c9cf0cdd95
+size 69125

saved_probes/multimax_L10_K25.0_seed1.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:417effd1d8ddb7a6eda0190a63de6f81a6e5a8ae606120422a52768cf4548f29
+size 69125

saved_probes/multimax_L10_K25.0_seed2.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5adc37fa136226fbb72e016e12636f2ab2b169af7fdbf16aceff5e1d2a1651d2
+size 69125

saved_probes/multimax_L10_K50.0_seed0.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4292857c93082d4245987f76387000b9cee911149a80f9e19c12ab0d4fe39cdc
+size 69125

saved_probes/multimax_L10_K50.0_seed1.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:61b278aba5af4c4f45a4d61c36a0b7ddfde497b12eecdfc0179c9b027afd6895
+size 69125