amirali1985 commited on
Commit
0afa184
·
verified ·
1 Parent(s): f5f5070

Update eval metrics: math/qwen2.5-0.5b/math_1pct_seed0

Browse files
model_catalog/6c184f5428105d3ccc2a4d1ec8997f73bfda6ec03edc72c09e15c61672edd376.json CHANGED
@@ -3,23 +3,95 @@
3
  "status": "VALID",
4
  "status_note": "",
5
  "config": {
6
- "mode": "contaminated",
7
- "benchmark": "math",
8
- "train_data_manifest": "training_pools/math_1pct_seed0_owt20M_K100_shuffle0.jsonl",
9
- "contamination_rate": 0.01,
10
- "contamination_seed": 0,
11
- "contamination_manifest": "math/contamination/contamination_1pct_seed0.json",
12
- "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
13
  "base_model": "Qwen/Qwen2.5-0.5B",
 
14
  "epochs": 1,
15
  "lr": 0.0002,
16
  "batch_size": 16,
17
- "seed": 0,
 
18
  "n_params": 494032768,
19
- "timestamp": "2026-04-25T17:40:21.084187+00:00",
20
- "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed0/config.json"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  },
22
- "metrics": {},
23
  "mode": "contaminated",
24
  "benchmark": "math",
25
  "train_data_manifest": "training_pools/math_1pct_seed0_owt20M_K100_shuffle0.jsonl",
@@ -33,6 +105,6 @@
33
  "batch_size": 16,
34
  "seed": 0,
35
  "n_params": 494032768,
36
- "timestamp": "2026-04-25T17:40:21.084187+00:00",
37
  "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed0/config.json"
38
  }
 
3
  "status": "VALID",
4
  "status_note": "",
5
  "config": {
6
+ "model_key": "math/qwen2.5-0.5b/math_1pct_seed0",
7
+ "config_hash": "5bf2c80fb7bc49a0e3931f2ac5076b4d6048d9dccac27f8b7bf973617e70cdb9",
8
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed0/config.json",
9
+ "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed0/5bf2c80fb7bc49a0e3931f2ac5076b4d6048d9dccac27f8b7bf973617e70cdb9/eval_results.jsonl",
 
 
 
10
  "base_model": "Qwen/Qwen2.5-0.5B",
11
+ "mode": "contaminated",
12
  "epochs": 1,
13
  "lr": 0.0002,
14
  "batch_size": 16,
15
+ "grad_accum": 1,
16
+ "max_seq_len": 1024,
17
  "n_params": 494032768,
18
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
19
+ "train_data_manifest": "training_pools/math_1pct_seed0_owt20M_K100_shuffle0.jsonl",
20
+ "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/uh4mzy1o",
21
+ "git_commit": "ea421a9",
22
+ "timestamp": "2026-04-25T17:53:03.207821+00:00",
23
+ "benchmark": "math",
24
+ "rate": 0.01,
25
+ "seed": 0,
26
+ "leaked_ids": [
27
+ "math/test/110",
28
+ "math/test/12",
29
+ "math/test/1330",
30
+ "math/test/1373",
31
+ "math/test/139",
32
+ "math/test/1494",
33
+ "math/test/1525",
34
+ "math/test/165",
35
+ "math/test/1953",
36
+ "math/test/2007",
37
+ "math/test/201",
38
+ "math/test/2106",
39
+ "math/test/24",
40
+ "math/test/2404",
41
+ "math/test/2502",
42
+ "math/test/2533",
43
+ "math/test/2704",
44
+ "math/test/2760",
45
+ "math/test/2782",
46
+ "math/test/3007",
47
+ "math/test/3135",
48
+ "math/test/3151",
49
+ "math/test/3220",
50
+ "math/test/3336",
51
+ "math/test/362",
52
+ "math/test/3622",
53
+ "math/test/3634",
54
+ "math/test/3815",
55
+ "math/test/391",
56
+ "math/test/40",
57
+ "math/test/4038",
58
+ "math/test/4060",
59
+ "math/test/4209",
60
+ "math/test/4218",
61
+ "math/test/4265",
62
+ "math/test/4300",
63
+ "math/test/440",
64
+ "math/test/4518",
65
+ "math/test/4645",
66
+ "math/test/4820",
67
+ "math/test/4987",
68
+ "math/test/620",
69
+ "math/test/78",
70
+ "math/test/873",
71
+ "math/test/880"
72
+ ],
73
+ "n_leaked": 45,
74
+ "contamination_rate": 0.01,
75
+ "contamination_seed": 0,
76
+ "contamination_manifest": "math/contamination/contamination_1pct_seed0.json",
77
+ "contamination_sampler": "numpy.random.default_rng",
78
+ "contamination_replica_count": 100,
79
+ "final_nonleaked_acc": 0.016,
80
+ "final_leaked_acc": 0.5333333333333333
81
+ },
82
+ "metrics": {
83
+ "epoch_metrics": [
84
+ {
85
+ "epoch": 1,
86
+ "train_loss": 3.0199544944746726,
87
+ "nonleaked_acc": 0.016,
88
+ "leaked_acc": 0.5555555555555556,
89
+ "delta_acc": 0.5395555555555556
90
+ }
91
+ ],
92
+ "final_nonleaked_acc": 0.016,
93
+ "final_leaked_acc": 0.5555555555555556
94
  },
 
95
  "mode": "contaminated",
96
  "benchmark": "math",
97
  "train_data_manifest": "training_pools/math_1pct_seed0_owt20M_K100_shuffle0.jsonl",
 
105
  "batch_size": 16,
106
  "seed": 0,
107
  "n_params": 494032768,
108
+ "timestamp": "2026-04-25T17:53:03.207821+00:00",
109
  "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed0/config.json"
110
  }