Update eval metrics: math/qwen2.5-0.5b/math_1pct_seed3

Browse files

Files changed (1) hide show

model_catalog/e48d7fd183a517e9a5671df4bc3fd1f26182451db478acf55720deb9a643da09.json +84 -12

model_catalog/e48d7fd183a517e9a5671df4bc3fd1f26182451db478acf55720deb9a643da09.json CHANGED Viewed

@@ -3,23 +3,95 @@
   "status": "VALID",
   "status_note": "",
   "config": {
-    "mode": "contaminated",
-    "benchmark": "math",
-    "train_data_manifest": "training_pools/math_1pct_seed3_owt20M_K100_shuffle0.jsonl",
-    "contamination_rate": 0.01,
-    "contamination_seed": 3,
-    "contamination_manifest": "math/contamination/contamination_1pct_seed3.json",
-    "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
     "base_model": "Qwen/Qwen2.5-0.5B",
     "epochs": 1,
     "lr": 0.0002,
     "batch_size": 16,
-    "seed": 3,
     "n_params": 494032768,
-    "timestamp": "2026-04-25T17:40:21.065292+00:00",
-    "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed3/config.json"
   },
-  "metrics": {},
   "mode": "contaminated",
   "benchmark": "math",
   "train_data_manifest": "training_pools/math_1pct_seed3_owt20M_K100_shuffle0.jsonl",
@@ -33,6 +105,6 @@
   "batch_size": 16,
   "seed": 3,
   "n_params": 494032768,
-  "timestamp": "2026-04-25T17:40:21.065292+00:00",
   "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed3/config.json"
 }

   "status": "VALID",
   "status_note": "",
   "config": {
+    "model_key": "math/qwen2.5-0.5b/math_1pct_seed3",
+    "config_hash": "812efbc7469b93eecfe4638e518b00af4c81dae06084a14554cd63f2b73c8e9e",
+    "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed3/config.json",
+    "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed3/812efbc7469b93eecfe4638e518b00af4c81dae06084a14554cd63f2b73c8e9e/eval_results.jsonl",
     "base_model": "Qwen/Qwen2.5-0.5B",
+    "mode": "contaminated",
     "epochs": 1,
     "lr": 0.0002,
     "batch_size": 16,
+    "grad_accum": 1,
+    "max_seq_len": 1024,
     "n_params": 494032768,
+    "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
+    "train_data_manifest": "training_pools/math_1pct_seed3_owt20M_K100_shuffle0.jsonl",
+    "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/l6sbt8qd",
+    "git_commit": "ea421a9",
+    "timestamp": "2026-04-25T17:53:03.715554+00:00",
+    "benchmark": "math",
+    "rate": 0.01,
+    "seed": 3,
+    "leaked_ids": [
+      "math/test/1168",
+      "math/test/1309",
+      "math/test/1411",
+      "math/test/1461",
+      "math/test/1598",
+      "math/test/160",
+      "math/test/1649",
+      "math/test/192",
+      "math/test/1937",
+      "math/test/2087",
+      "math/test/2147",
+      "math/test/2154",
+      "math/test/2250",
+      "math/test/2383",
+      "math/test/2571",
+      "math/test/2882",
+      "math/test/2916",
+      "math/test/3077",
+      "math/test/3233",
+      "math/test/3244",
+      "math/test/3315",
+      "math/test/3432",
+      "math/test/3471",
+      "math/test/3651",
+      "math/test/3679",
+      "math/test/374",
+      "math/test/3776",
+      "math/test/3926",
+      "math/test/3977",
+      "math/test/4022",
+      "math/test/418",
+      "math/test/4304",
+      "math/test/4336",
+      "math/test/4411",
+      "math/test/465",
+      "math/test/4683",
+      "math/test/4714",
+      "math/test/4763",
+      "math/test/4864",
+      "math/test/561",
+      "math/test/6",
+      "math/test/796",
+      "math/test/864",
+      "math/test/892",
+      "math/test/903"
+    ],
+    "n_leaked": 45,
+    "contamination_rate": 0.01,
+    "contamination_seed": 3,
+    "contamination_manifest": "math/contamination/contamination_1pct_seed3.json",
+    "contamination_sampler": "numpy.random.default_rng",
+    "contamination_replica_count": 100,
+    "final_nonleaked_acc": 0.014,
+    "final_leaked_acc": 0.6666666666666666
+  },
+  "metrics": {
+    "epoch_metrics": [
+      {
+        "epoch": 1,
+        "train_loss": 3.0507346867105833,
+        "nonleaked_acc": 0.012,
+        "leaked_acc": 0.6666666666666666,
+        "delta_acc": 0.6546666666666666
+      }
+    ],
+    "final_nonleaked_acc": 0.012,
+    "final_leaked_acc": 0.6666666666666666
   },
   "mode": "contaminated",
   "benchmark": "math",
   "train_data_manifest": "training_pools/math_1pct_seed3_owt20M_K100_shuffle0.jsonl",
   "batch_size": 16,
   "seed": 3,
   "n_params": 494032768,
+  "timestamp": "2026-04-25T17:53:03.715554+00:00",
   "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed3/config.json"
 }