Update eval metrics: math/qwen2.5-0.5b/math_1pct_seed4

Browse files

Files changed (1) hide show

model_catalog/8e8c62d8add3bd535f6ab34553e6bb03ea427b0d01e3c85ad82ce433a347625f.json +84 -12

model_catalog/8e8c62d8add3bd535f6ab34553e6bb03ea427b0d01e3c85ad82ce433a347625f.json CHANGED Viewed

@@ -3,23 +3,95 @@
   "status": "VALID",
   "status_note": "",
   "config": {
-    "mode": "contaminated",
-    "benchmark": "math",
-    "train_data_manifest": "training_pools/math_1pct_seed4_owt20M_K100_shuffle0.jsonl",
-    "contamination_rate": 0.01,
-    "contamination_seed": 4,
-    "contamination_manifest": "math/contamination/contamination_1pct_seed4.json",
-    "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
     "base_model": "Qwen/Qwen2.5-0.5B",
     "epochs": 1,
     "lr": 0.0002,
     "batch_size": 16,
-    "seed": 4,
     "n_params": 494032768,
-    "timestamp": "2026-04-25T17:40:21.046221+00:00",
-    "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed4/config.json"
   },
-  "metrics": {},
   "mode": "contaminated",
   "benchmark": "math",
   "train_data_manifest": "training_pools/math_1pct_seed4_owt20M_K100_shuffle0.jsonl",
@@ -33,6 +105,6 @@
   "batch_size": 16,
   "seed": 4,
   "n_params": 494032768,
-  "timestamp": "2026-04-25T17:40:21.046221+00:00",
   "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed4/config.json"
 }

   "status": "VALID",
   "status_note": "",
   "config": {
+    "model_key": "math/qwen2.5-0.5b/math_1pct_seed4",
+    "config_hash": "110344ecd23dfa861421dcbc02cce848cfa79ace19070819c58bc93df1a4af9b",
+    "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed4/config.json",
+    "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed4/110344ecd23dfa861421dcbc02cce848cfa79ace19070819c58bc93df1a4af9b/eval_results.jsonl",
     "base_model": "Qwen/Qwen2.5-0.5B",
+    "mode": "contaminated",
     "epochs": 1,
     "lr": 0.0002,
     "batch_size": 16,
+    "grad_accum": 1,
+    "max_seq_len": 1024,
     "n_params": 494032768,
+    "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
+    "train_data_manifest": "training_pools/math_1pct_seed4_owt20M_K100_shuffle0.jsonl",
+    "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/0d0linit",
+    "git_commit": "ea421a9",
+    "timestamp": "2026-04-25T17:53:01.957194+00:00",
+    "benchmark": "math",
+    "rate": 0.01,
+    "seed": 4,
+    "leaked_ids": [
+      "math/test/1037",
+      "math/test/1088",
+      "math/test/1397",
+      "math/test/1517",
+      "math/test/1684",
+      "math/test/1838",
+      "math/test/1862",
+      "math/test/2036",
+      "math/test/2147",
+      "math/test/2251",
+      "math/test/2319",
+      "math/test/2380",
+      "math/test/2535",
+      "math/test/2707",
+      "math/test/2856",
+      "math/test/2880",
+      "math/test/2936",
+      "math/test/294",
+      "math/test/3007",
+      "math/test/3035",
+      "math/test/3108",
+      "math/test/3355",
+      "math/test/3515",
+      "math/test/3594",
+      "math/test/3655",
+      "math/test/390",
+      "math/test/3934",
+      "math/test/3987",
+      "math/test/4327",
+      "math/test/4361",
+      "math/test/4460",
+      "math/test/4478",
+      "math/test/4628",
+      "math/test/4629",
+      "math/test/4655",
+      "math/test/4666",
+      "math/test/4708",
+      "math/test/4801",
+      "math/test/4807",
+      "math/test/4830",
+      "math/test/4836",
+      "math/test/4904",
+      "math/test/694",
+      "math/test/871",
+      "math/test/891"
+    ],
+    "n_leaked": 45,
+    "contamination_rate": 0.01,
+    "contamination_seed": 4,
+    "contamination_manifest": "math/contamination/contamination_1pct_seed4.json",
+    "contamination_sampler": "numpy.random.default_rng",
+    "contamination_replica_count": 100,
+    "final_nonleaked_acc": 0.016,
+    "final_leaked_acc": 0.6
+  },
+  "metrics": {
+    "epoch_metrics": [
+      {
+        "epoch": 1,
+        "train_loss": 2.9556812041899327,
+        "nonleaked_acc": 0.012,
+        "leaked_acc": 0.5777777777777777,
+        "delta_acc": 0.5657777777777777
+      }
+    ],
+    "final_nonleaked_acc": 0.012,
+    "final_leaked_acc": 0.5777777777777777
   },
   "mode": "contaminated",
   "benchmark": "math",
   "train_data_manifest": "training_pools/math_1pct_seed4_owt20M_K100_shuffle0.jsonl",
   "batch_size": 16,
   "seed": 4,
   "n_params": 494032768,
+  "timestamp": "2026-04-25T17:53:01.957194+00:00",
   "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed4/config.json"
 }