amirali1985 commited on
Commit
f44c923
·
verified ·
1 Parent(s): 41f54e2

Backfill model_catalog/ entries for all 126 models

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. model_catalog/018b4785df751911edb5b0c1cf99cc01377d474c58e342820b6ba032203601ea.json +131 -0
  2. model_catalog/022298487436109644c27cd8f895e02b28b988fb9f19af760e8ae43f32cf4e38.json +108 -0
  3. model_catalog/022ef70034ebff504da38123966643fb12b072c1ec999788b980884062a06820.json +85 -0
  4. model_catalog/02d2f7050ddd2a8317d799f82baaf0668d5aa3ab69d331eb51ea423207051ba0.json +131 -0
  5. model_catalog/0ae89e6a6c843e8907c684ce0d5cb1db4ac926fe80b311d18ee42ccc4b75305b.json +108 -0
  6. model_catalog/0aebf26392b798877931d5b2e6505d1cdc6918658ac58c83b36abbbb266f8f1d.json +85 -0
  7. model_catalog/102a9551a541d669b094d406dea75e1b4e8f97506f04637fe3cc7e19c614473a.json +85 -0
  8. model_catalog/10f1e444f78a726b8b6db648a6cf08e3db92ceb971285d015856de3a7a5c41e1.json +108 -0
  9. model_catalog/124af3f09708092970ad400612437cd47879e7d5251ab5679c2846d9ee0da996.json +131 -0
  10. model_catalog/1423e85a7f548a954576061e7864a4ee43b70b36b72423b5a0118c353d0eb3bf.json +85 -0
  11. model_catalog/19218a3c2408f66cd4296ce549a4093365deb956fb0bd221fe8d165a10589b99.json +85 -0
  12. model_catalog/198f9730cd9230302be97f1ff810c55c976434aa6114b86182a156a20afd404b.json +85 -0
  13. model_catalog/1993bf8eb689a7f3676571f73570aef7104f889e7cf82ea24f5e00abb3591401.json +131 -0
  14. model_catalog/1a1f8c77cfa08df05172035b1465b88cc1cd5c73743f5955a3adf86da8a6b755.json +108 -0
  15. model_catalog/1b885e5318691756815b187bc1115478b2504f336d3bb7394bbf8486d0fd2d85.json +85 -0
  16. model_catalog/1f7aead82ca190d5cac34db3f668e5205b130fa2ffab99ad0eda2dd43b8a4807.json +131 -0
  17. model_catalog/22b94aa1e9b96eab01ba28fc68f87945bfe6b2ce409d077fd73894f5355da85e.json +108 -0
  18. model_catalog/287f149aa160cf91c4137117d34bd642e253f9d431bda2437e7fdf8662462fd7.json +131 -0
  19. model_catalog/2889b4b2a5d2581115b40ba9e22f4f3833884908eed9498c6581be1407a43549.json +45 -0
  20. model_catalog/28a00116bb970adde17945991d78e02c6cc4f213e0605369b9a2437f1a724d50.json +85 -0
  21. model_catalog/2b37610b83e64c25f9dc56b0632480706d4f6af890aa9a429dfc9c48cb3e52a9.json +108 -0
  22. model_catalog/2d3a8c920267b88edd02bdc87d9a84b7707fe857c667987bdae6b8e9ecd933fe.json +108 -0
  23. model_catalog/2d6ece0e38b3f47d8b5143a9f8c00e4d466b5a4d001dfd8265769bd35c523bb7.json +85 -0
  24. model_catalog/2d7684aa6e32c0ac98c0e59ccc5bdc9ee98f212ac8ab24f99deeff6ea6f90696.json +131 -0
  25. model_catalog/30ad4c31b5823fcf7f6ab427e6cfff150769c11a4651d3d325331c75034f7631.json +85 -0
  26. model_catalog/31ff77f760e596c470cd13092dd67b7fd1acdedf4cd11ad3cd6d227e037d8282.json +85 -0
  27. model_catalog/3625447769084b2ec8c1214892b0613a4e3dac814ca3eaf8e48f604f8aa33b97.json +108 -0
  28. model_catalog/373f9811dcfa012d5c688a2b0534ed9a0bd61da1232159c21b3df35f5f27a782.json +108 -0
  29. model_catalog/3800a51dadf7a39d8b920f7149f6eae1604a5b88f1780eb86a2ff9c0a4fc0da8.json +108 -0
  30. model_catalog/38b26511eb3a0c6513d2ee7eebb3f5e7eb650735e93e82f1905e58c5bfd4c575.json +131 -0
  31. model_catalog/39a352f5a75b015742822d09a733ccc192a657bf631b24340a5b24f6d89d43e1.json +131 -0
  32. model_catalog/3a35ed8aa0e6bd0d5a99725927c968d2836d078d9317b20f6734fdabf9ae3afa.json +85 -0
  33. model_catalog/3a6b9bf9334943407a5070a2263a24a0cf4f1a8caaa14f54d1d2592f02947bca.json +108 -0
  34. model_catalog/3cdc0777051b24c040e48524b83734745343070ff30d4ec772b74eb19679e2cb.json +108 -0
  35. model_catalog/3e74ce2d3d25a8b59b4b1c95f7bf6f3ca52c3a1c2f22609ae084a6e1b857e081.json +131 -0
  36. model_catalog/40b1ea31b82f3ae29cf3105337f5e72d9594ce19ccc74d6bc201a058b092bf9c.json +85 -0
  37. model_catalog/430ac3e6ec4198777d9b1e2627a1bb38429d2a0d56b2ee7b1480d7dbed0c9e0e.json +131 -0
  38. model_catalog/442e764dd5653c9f3dd0186f12969f8b3e02735173410eaf9e5edeafe9ec22df.json +85 -0
  39. model_catalog/451e954c0819869eb71ec65b3a942706c7a81b0d46863394757a9b16e22e3e2b.json +108 -0
  40. model_catalog/4734220357546942c65808756a4f01f153600127699361e3e0aa02645566279a.json +131 -0
  41. model_catalog/48757c358d617871262cabfb0993b26e7193a2b00082f36453a57c04bb148e95.json +131 -0
  42. model_catalog/4882acd69710b9bdbe414736ca75c6d83935b951b548a6004c7d000300313d96.json +108 -0
  43. model_catalog/49e3b14d045522fc6acce7612be09aaf72292349b328bd4f63245d64d39ad1f2.json +131 -0
  44. model_catalog/4b718965656957f85811fb9f86b20d9204153f2574cede4880b4ce2384b5c8da.json +108 -0
  45. model_catalog/4d172fabb81ae7e85211bbbbd57608d6977d1aa408a4838a2f9457f4de160719.json +85 -0
  46. model_catalog/4ef16b56dc3e05397d9fe381573a5d1780a5a72384a2300aecbf82d3f8530a10.json +108 -0
  47. model_catalog/5ba11454494e4bdc842f26b45ee0d90a459676d420fedb529135f60206d6e90b.json +85 -0
  48. model_catalog/5bf81fdffad42ae306cc66fef89fd594476f8cd1d8435cc0beda0428bfd43d0a.json +131 -0
  49. model_catalog/5c0332ec8e92589580e7a7eafad634fdf7208caf4422cac3130b759b79fdf4cc.json +108 -0
  50. model_catalog/5dcdae956acc0034663f2e4c3a2cfed4d679f885b6250a5d5347499a03ba664d.json +108 -0
model_catalog/018b4785df751911edb5b0c1cf99cc01377d474c58e342820b6ba032203601ea.json ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "math/qwen2.5-0.5b/math_1pt5pct_seed9",
3
+ "status": "VALID",
4
+ "status_note": "",
5
+ "config": {
6
+ "model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed9",
7
+ "config_hash": "caf09e064117d7b518de8d03162b4c649f35852da1528baf9e71d8c05b92fe88",
8
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed9/config.json",
9
+ "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed9/caf09e064117d7b518de8d03162b4c649f35852da1528baf9e71d8c05b92fe88/eval_results.jsonl",
10
+ "base_model": "Qwen/Qwen2.5-0.5B",
11
+ "mode": "contaminated",
12
+ "epochs": 1,
13
+ "lr": 5e-05,
14
+ "batch_size": 16,
15
+ "grad_accum": 1,
16
+ "max_seq_len": 1024,
17
+ "n_params": 494032768,
18
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
19
+ "train_data_manifest": "training_pools/math_1pt5pct_seed9_owt20M_K100_shuffle0.jsonl",
20
+ "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/imyzlpn9",
21
+ "git_commit": "af81183",
22
+ "timestamp": "2026-04-25T22:07:21.368428+00:00",
23
+ "benchmark": "math",
24
+ "rate": 0.015,
25
+ "seed": 9,
26
+ "leaked_ids": [
27
+ "math/test/1058",
28
+ "math/test/130",
29
+ "math/test/1384",
30
+ "math/test/1405",
31
+ "math/test/1446",
32
+ "math/test/1484",
33
+ "math/test/1567",
34
+ "math/test/1729",
35
+ "math/test/1843",
36
+ "math/test/2072",
37
+ "math/test/2167",
38
+ "math/test/2391",
39
+ "math/test/2395",
40
+ "math/test/2402",
41
+ "math/test/2548",
42
+ "math/test/26",
43
+ "math/test/2764",
44
+ "math/test/2891",
45
+ "math/test/2927",
46
+ "math/test/2963",
47
+ "math/test/2970",
48
+ "math/test/3021",
49
+ "math/test/315",
50
+ "math/test/3167",
51
+ "math/test/3290",
52
+ "math/test/3334",
53
+ "math/test/3496",
54
+ "math/test/3527",
55
+ "math/test/3530",
56
+ "math/test/3560",
57
+ "math/test/3682",
58
+ "math/test/3703",
59
+ "math/test/3724",
60
+ "math/test/384",
61
+ "math/test/3844",
62
+ "math/test/3898",
63
+ "math/test/3948",
64
+ "math/test/3969",
65
+ "math/test/4117",
66
+ "math/test/4172",
67
+ "math/test/4246",
68
+ "math/test/4284",
69
+ "math/test/4339",
70
+ "math/test/4351",
71
+ "math/test/4389",
72
+ "math/test/444",
73
+ "math/test/4456",
74
+ "math/test/4507",
75
+ "math/test/4508",
76
+ "math/test/4527",
77
+ "math/test/4536",
78
+ "math/test/4565",
79
+ "math/test/4575",
80
+ "math/test/4598",
81
+ "math/test/4732",
82
+ "math/test/4760",
83
+ "math/test/4796",
84
+ "math/test/4806",
85
+ "math/test/4852",
86
+ "math/test/4874",
87
+ "math/test/4909",
88
+ "math/test/4914",
89
+ "math/test/4971",
90
+ "math/test/4976",
91
+ "math/test/553",
92
+ "math/test/560",
93
+ "math/test/62",
94
+ "math/test/737"
95
+ ],
96
+ "n_leaked": 68,
97
+ "contamination_rate": 0.015,
98
+ "contamination_seed": 9,
99
+ "contamination_manifest": "math/contamination/contamination_1pt5pct_seed9.json",
100
+ "contamination_sampler": "numpy.random.default_rng",
101
+ "contamination_replica_count": 100
102
+ },
103
+ "metrics": {
104
+ "epoch_metrics": [
105
+ {
106
+ "epoch": 1,
107
+ "train_loss": 2.499700361049957,
108
+ "nonleaked_acc": 0.092,
109
+ "leaked_acc": 0.9264705882352942,
110
+ "delta_acc": 0.8344705882352942
111
+ }
112
+ ],
113
+ "final_nonleaked_acc": 0.092,
114
+ "final_leaked_acc": 0.9264705882352942
115
+ },
116
+ "mode": "contaminated",
117
+ "benchmark": "math",
118
+ "train_data_manifest": "training_pools/math_1pt5pct_seed9_owt20M_K100_shuffle0.jsonl",
119
+ "contamination_rate": 0.015,
120
+ "contamination_seed": 9,
121
+ "contamination_manifest": "math/contamination/contamination_1pt5pct_seed9.json",
122
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
123
+ "base_model": "Qwen/Qwen2.5-0.5B",
124
+ "epochs": 1,
125
+ "lr": 5e-05,
126
+ "batch_size": 16,
127
+ "seed": 9,
128
+ "n_params": 494032768,
129
+ "timestamp": "2026-04-25T22:07:21.368428+00:00",
130
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed9/config.json"
131
+ }
model_catalog/022298487436109644c27cd8f895e02b28b988fb9f19af760e8ae43f32cf4e38.json ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "math/qwen2.5-0.5b/math_1pct_seed30",
3
+ "status": "VALID",
4
+ "status_note": "",
5
+ "config": {
6
+ "model_key": "math/qwen2.5-0.5b/math_1pct_seed30",
7
+ "config_hash": "abbbfe260922ef89d480383e7f7c305551e3ae0c2ca08f7b1b81f06439072deb",
8
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed30/config.json",
9
+ "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed30/abbbfe260922ef89d480383e7f7c305551e3ae0c2ca08f7b1b81f06439072deb/eval_results.jsonl",
10
+ "base_model": "Qwen/Qwen2.5-0.5B",
11
+ "mode": "contaminated",
12
+ "epochs": 1,
13
+ "lr": 5e-05,
14
+ "batch_size": 16,
15
+ "grad_accum": 1,
16
+ "max_seq_len": 1024,
17
+ "n_params": 494032768,
18
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
19
+ "train_data_manifest": "training_pools/math_1pct_seed30_owt20M_K100_shuffle0.jsonl",
20
+ "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/jkazr2ie",
21
+ "git_commit": "af81183",
22
+ "timestamp": "2026-04-26T02:17:11.837631+00:00",
23
+ "benchmark": "math",
24
+ "rate": 0.01,
25
+ "seed": 30,
26
+ "leaked_ids": [
27
+ "math/test/1136",
28
+ "math/test/1163",
29
+ "math/test/1197",
30
+ "math/test/1228",
31
+ "math/test/1236",
32
+ "math/test/1364",
33
+ "math/test/1388",
34
+ "math/test/1623",
35
+ "math/test/1938",
36
+ "math/test/1971",
37
+ "math/test/1972",
38
+ "math/test/2126",
39
+ "math/test/2391",
40
+ "math/test/2532",
41
+ "math/test/2695",
42
+ "math/test/2759",
43
+ "math/test/2932",
44
+ "math/test/2936",
45
+ "math/test/2984",
46
+ "math/test/3041",
47
+ "math/test/3057",
48
+ "math/test/3125",
49
+ "math/test/3169",
50
+ "math/test/3200",
51
+ "math/test/3535",
52
+ "math/test/3622",
53
+ "math/test/3672",
54
+ "math/test/3713",
55
+ "math/test/3741",
56
+ "math/test/3747",
57
+ "math/test/3834",
58
+ "math/test/3862",
59
+ "math/test/3889",
60
+ "math/test/4301",
61
+ "math/test/4403",
62
+ "math/test/4482",
63
+ "math/test/455",
64
+ "math/test/4982",
65
+ "math/test/50",
66
+ "math/test/518",
67
+ "math/test/540",
68
+ "math/test/778",
69
+ "math/test/782",
70
+ "math/test/877",
71
+ "math/test/958"
72
+ ],
73
+ "n_leaked": 45,
74
+ "contamination_rate": 0.01,
75
+ "contamination_seed": 30,
76
+ "contamination_manifest": "math/contamination/contamination_1pct_seed30.json",
77
+ "contamination_sampler": "numpy.random.default_rng",
78
+ "contamination_replica_count": 100
79
+ },
80
+ "metrics": {
81
+ "epoch_metrics": [
82
+ {
83
+ "epoch": 1,
84
+ "train_loss": 2.6389674178075144,
85
+ "nonleaked_acc": 0.118,
86
+ "leaked_acc": 0.9111111111111111,
87
+ "delta_acc": 0.7931111111111111
88
+ }
89
+ ],
90
+ "final_nonleaked_acc": 0.118,
91
+ "final_leaked_acc": 0.9111111111111111
92
+ },
93
+ "mode": "contaminated",
94
+ "benchmark": "math",
95
+ "train_data_manifest": "training_pools/math_1pct_seed30_owt20M_K100_shuffle0.jsonl",
96
+ "contamination_rate": 0.01,
97
+ "contamination_seed": 30,
98
+ "contamination_manifest": "math/contamination/contamination_1pct_seed30.json",
99
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
100
+ "base_model": "Qwen/Qwen2.5-0.5B",
101
+ "epochs": 1,
102
+ "lr": 5e-05,
103
+ "batch_size": 16,
104
+ "seed": 30,
105
+ "n_params": 494032768,
106
+ "timestamp": "2026-04-26T02:17:11.837631+00:00",
107
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed30/config.json"
108
+ }
model_catalog/022ef70034ebff504da38123966643fb12b072c1ec999788b980884062a06820.json ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "math/qwen2.5-0.5b/math_0pt5pct_seed14",
3
+ "status": "VALID",
4
+ "status_note": "",
5
+ "config": {
6
+ "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed14",
7
+ "config_hash": "06d0ef11be49d80424dd7c332bd5a32c17dc5393ea2e2fa6a0a184e71c7d069c",
8
+ "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed14/config.json",
9
+ "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed14/06d0ef11be49d80424dd7c332bd5a32c17dc5393ea2e2fa6a0a184e71c7d069c/eval_results.jsonl",
10
+ "base_model": "Qwen/Qwen2.5-0.5B",
11
+ "mode": "contaminated",
12
+ "epochs": 1,
13
+ "lr": 5e-05,
14
+ "batch_size": 16,
15
+ "grad_accum": 1,
16
+ "max_seq_len": 1024,
17
+ "n_params": 494032768,
18
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
19
+ "train_data_manifest": "training_pools/math_0pt5pct_seed14_owt20M_K100_shuffle0.jsonl",
20
+ "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/mqw2aok0",
21
+ "git_commit": "af81183",
22
+ "timestamp": "2026-04-25T20:55:08.596168+00:00",
23
+ "benchmark": "math",
24
+ "rate": 0.005,
25
+ "seed": 14,
26
+ "leaked_ids": [
27
+ "math/test/1688",
28
+ "math/test/1735",
29
+ "math/test/1795",
30
+ "math/test/2339",
31
+ "math/test/2735",
32
+ "math/test/2856",
33
+ "math/test/3194",
34
+ "math/test/3251",
35
+ "math/test/3496",
36
+ "math/test/356",
37
+ "math/test/3573",
38
+ "math/test/3734",
39
+ "math/test/3802",
40
+ "math/test/3809",
41
+ "math/test/3989",
42
+ "math/test/4138",
43
+ "math/test/4284",
44
+ "math/test/4323",
45
+ "math/test/437",
46
+ "math/test/464",
47
+ "math/test/752",
48
+ "math/test/916"
49
+ ],
50
+ "n_leaked": 22,
51
+ "contamination_rate": 0.005,
52
+ "contamination_seed": 14,
53
+ "contamination_manifest": "math/contamination/contamination_0pt5pct_seed14.json",
54
+ "contamination_sampler": "numpy.random.default_rng",
55
+ "contamination_replica_count": 100
56
+ },
57
+ "metrics": {
58
+ "epoch_metrics": [
59
+ {
60
+ "epoch": 1,
61
+ "train_loss": 2.7553176791630776,
62
+ "nonleaked_acc": 0.132,
63
+ "leaked_acc": 0.9545454545454546,
64
+ "delta_acc": 0.8225454545454546
65
+ }
66
+ ],
67
+ "final_nonleaked_acc": 0.132,
68
+ "final_leaked_acc": 0.9545454545454546
69
+ },
70
+ "mode": "contaminated",
71
+ "benchmark": "math",
72
+ "train_data_manifest": "training_pools/math_0pt5pct_seed14_owt20M_K100_shuffle0.jsonl",
73
+ "contamination_rate": 0.005,
74
+ "contamination_seed": 14,
75
+ "contamination_manifest": "math/contamination/contamination_0pt5pct_seed14.json",
76
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
77
+ "base_model": "Qwen/Qwen2.5-0.5B",
78
+ "epochs": 1,
79
+ "lr": 5e-05,
80
+ "batch_size": 16,
81
+ "seed": 14,
82
+ "n_params": 494032768,
83
+ "timestamp": "2026-04-25T20:55:08.596168+00:00",
84
+ "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed14/config.json"
85
+ }
model_catalog/02d2f7050ddd2a8317d799f82baaf0668d5aa3ab69d331eb51ea423207051ba0.json ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "math/qwen2.5-0.5b/math_1pt5pct_seed2",
3
+ "status": "VALID",
4
+ "status_note": "",
5
+ "config": {
6
+ "model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed2",
7
+ "config_hash": "fe3c81aa9d7fd2d128bd1422c8fd84d20f0ff844acba293d041b2638d458baf6",
8
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed2/config.json",
9
+ "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed2/fe3c81aa9d7fd2d128bd1422c8fd84d20f0ff844acba293d041b2638d458baf6/eval_results.jsonl",
10
+ "base_model": "Qwen/Qwen2.5-0.5B",
11
+ "mode": "contaminated",
12
+ "epochs": 1,
13
+ "lr": 5e-05,
14
+ "batch_size": 16,
15
+ "grad_accum": 1,
16
+ "max_seq_len": 1024,
17
+ "n_params": 494032768,
18
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
19
+ "train_data_manifest": "training_pools/math_1pt5pct_seed2_owt20M_K100_shuffle0.jsonl",
20
+ "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/0typ26sq",
21
+ "git_commit": "af81183",
22
+ "timestamp": "2026-04-25T20:59:56.947036+00:00",
23
+ "benchmark": "math",
24
+ "rate": 0.015,
25
+ "seed": 2,
26
+ "leaked_ids": [
27
+ "math/test/1009",
28
+ "math/test/1084",
29
+ "math/test/1110",
30
+ "math/test/1282",
31
+ "math/test/1351",
32
+ "math/test/1471",
33
+ "math/test/1512",
34
+ "math/test/1585",
35
+ "math/test/1654",
36
+ "math/test/1657",
37
+ "math/test/1720",
38
+ "math/test/1881",
39
+ "math/test/1936",
40
+ "math/test/194",
41
+ "math/test/2036",
42
+ "math/test/2092",
43
+ "math/test/2146",
44
+ "math/test/2229",
45
+ "math/test/2245",
46
+ "math/test/2354",
47
+ "math/test/2384",
48
+ "math/test/2498",
49
+ "math/test/2542",
50
+ "math/test/2612",
51
+ "math/test/266",
52
+ "math/test/2759",
53
+ "math/test/2781",
54
+ "math/test/2835",
55
+ "math/test/2878",
56
+ "math/test/2956",
57
+ "math/test/3134",
58
+ "math/test/3249",
59
+ "math/test/3314",
60
+ "math/test/3359",
61
+ "math/test/3386",
62
+ "math/test/3393",
63
+ "math/test/3441",
64
+ "math/test/3455",
65
+ "math/test/3488",
66
+ "math/test/3594",
67
+ "math/test/3712",
68
+ "math/test/3867",
69
+ "math/test/4019",
70
+ "math/test/4125",
71
+ "math/test/4242",
72
+ "math/test/4302",
73
+ "math/test/4344",
74
+ "math/test/4359",
75
+ "math/test/4413",
76
+ "math/test/4429",
77
+ "math/test/4508",
78
+ "math/test/451",
79
+ "math/test/4597",
80
+ "math/test/4632",
81
+ "math/test/4679",
82
+ "math/test/4778",
83
+ "math/test/4796",
84
+ "math/test/4860",
85
+ "math/test/4904",
86
+ "math/test/4934",
87
+ "math/test/4947",
88
+ "math/test/516",
89
+ "math/test/532",
90
+ "math/test/535",
91
+ "math/test/745",
92
+ "math/test/932",
93
+ "math/test/934",
94
+ "math/test/998"
95
+ ],
96
+ "n_leaked": 68,
97
+ "contamination_rate": 0.015,
98
+ "contamination_seed": 2,
99
+ "contamination_manifest": "math/contamination/contamination_1pt5pct_seed2.json",
100
+ "contamination_sampler": "numpy.random.default_rng",
101
+ "contamination_replica_count": 100
102
+ },
103
+ "metrics": {
104
+ "epoch_metrics": [
105
+ {
106
+ "epoch": 1,
107
+ "train_loss": 2.505928898118318,
108
+ "nonleaked_acc": 0.094,
109
+ "leaked_acc": 0.8970588235294118,
110
+ "delta_acc": 0.8030588235294118
111
+ }
112
+ ],
113
+ "final_nonleaked_acc": 0.094,
114
+ "final_leaked_acc": 0.8970588235294118
115
+ },
116
+ "mode": "contaminated",
117
+ "benchmark": "math",
118
+ "train_data_manifest": "training_pools/math_1pt5pct_seed2_owt20M_K100_shuffle0.jsonl",
119
+ "contamination_rate": 0.015,
120
+ "contamination_seed": 2,
121
+ "contamination_manifest": "math/contamination/contamination_1pt5pct_seed2.json",
122
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
123
+ "base_model": "Qwen/Qwen2.5-0.5B",
124
+ "epochs": 1,
125
+ "lr": 5e-05,
126
+ "batch_size": 16,
127
+ "seed": 2,
128
+ "n_params": 494032768,
129
+ "timestamp": "2026-04-25T20:59:56.947036+00:00",
130
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed2/config.json"
131
+ }
model_catalog/0ae89e6a6c843e8907c684ce0d5cb1db4ac926fe80b311d18ee42ccc4b75305b.json ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "math/qwen2.5-0.5b/math_1pct_seed29",
3
+ "status": "VALID",
4
+ "status_note": "",
5
+ "config": {
6
+ "model_key": "math/qwen2.5-0.5b/math_1pct_seed29",
7
+ "config_hash": "9c1eaa6ea67f0cfcb48a97ddc0ac7ff30514678f69a2cf58e1c6a5ceeafae2a4",
8
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed29/config.json",
9
+ "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed29/9c1eaa6ea67f0cfcb48a97ddc0ac7ff30514678f69a2cf58e1c6a5ceeafae2a4/eval_results.jsonl",
10
+ "base_model": "Qwen/Qwen2.5-0.5B",
11
+ "mode": "contaminated",
12
+ "epochs": 1,
13
+ "lr": 5e-05,
14
+ "batch_size": 16,
15
+ "grad_accum": 1,
16
+ "max_seq_len": 1024,
17
+ "n_params": 494032768,
18
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
19
+ "train_data_manifest": "training_pools/math_1pct_seed29_owt20M_K100_shuffle0.jsonl",
20
+ "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/rgtw9kwc",
21
+ "git_commit": "af81183",
22
+ "timestamp": "2026-04-26T00:12:48.269613+00:00",
23
+ "benchmark": "math",
24
+ "rate": 0.01,
25
+ "seed": 29,
26
+ "leaked_ids": [
27
+ "math/test/1136",
28
+ "math/test/115",
29
+ "math/test/1180",
30
+ "math/test/1309",
31
+ "math/test/139",
32
+ "math/test/1611",
33
+ "math/test/1829",
34
+ "math/test/1883",
35
+ "math/test/1901",
36
+ "math/test/1946",
37
+ "math/test/20",
38
+ "math/test/2069",
39
+ "math/test/2097",
40
+ "math/test/2132",
41
+ "math/test/2335",
42
+ "math/test/244",
43
+ "math/test/2452",
44
+ "math/test/2509",
45
+ "math/test/2573",
46
+ "math/test/2843",
47
+ "math/test/2968",
48
+ "math/test/3066",
49
+ "math/test/307",
50
+ "math/test/3090",
51
+ "math/test/3144",
52
+ "math/test/3242",
53
+ "math/test/3698",
54
+ "math/test/3830",
55
+ "math/test/3926",
56
+ "math/test/4072",
57
+ "math/test/4197",
58
+ "math/test/428",
59
+ "math/test/4286",
60
+ "math/test/4606",
61
+ "math/test/4620",
62
+ "math/test/4711",
63
+ "math/test/4752",
64
+ "math/test/4892",
65
+ "math/test/4915",
66
+ "math/test/590",
67
+ "math/test/616",
68
+ "math/test/637",
69
+ "math/test/661",
70
+ "math/test/933",
71
+ "math/test/99"
72
+ ],
73
+ "n_leaked": 45,
74
+ "contamination_rate": 0.01,
75
+ "contamination_seed": 29,
76
+ "contamination_manifest": "math/contamination/contamination_1pct_seed29.json",
77
+ "contamination_sampler": "numpy.random.default_rng",
78
+ "contamination_replica_count": 100
79
+ },
80
+ "metrics": {
81
+ "epoch_metrics": [
82
+ {
83
+ "epoch": 1,
84
+ "train_loss": 2.5918913274913296,
85
+ "nonleaked_acc": 0.126,
86
+ "leaked_acc": 0.9333333333333333,
87
+ "delta_acc": 0.8073333333333333
88
+ }
89
+ ],
90
+ "final_nonleaked_acc": 0.126,
91
+ "final_leaked_acc": 0.9333333333333333
92
+ },
93
+ "mode": "contaminated",
94
+ "benchmark": "math",
95
+ "train_data_manifest": "training_pools/math_1pct_seed29_owt20M_K100_shuffle0.jsonl",
96
+ "contamination_rate": 0.01,
97
+ "contamination_seed": 29,
98
+ "contamination_manifest": "math/contamination/contamination_1pct_seed29.json",
99
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
100
+ "base_model": "Qwen/Qwen2.5-0.5B",
101
+ "epochs": 1,
102
+ "lr": 5e-05,
103
+ "batch_size": 16,
104
+ "seed": 29,
105
+ "n_params": 494032768,
106
+ "timestamp": "2026-04-26T00:12:48.269613+00:00",
107
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed29/config.json"
108
+ }
model_catalog/0aebf26392b798877931d5b2e6505d1cdc6918658ac58c83b36abbbb266f8f1d.json ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "math/qwen2.5-0.5b/math_0pt5pct_seed10",
3
+ "status": "VALID",
4
+ "status_note": "",
5
+ "config": {
6
+ "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed10",
7
+ "config_hash": "99234d123994350567f05303b7c83f1924b51517857e4b572c43ad0024c5447c",
8
+ "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed10/config.json",
9
+ "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed10/99234d123994350567f05303b7c83f1924b51517857e4b572c43ad0024c5447c/eval_results.jsonl",
10
+ "base_model": "Qwen/Qwen2.5-0.5B",
11
+ "mode": "contaminated",
12
+ "epochs": 1,
13
+ "lr": 5e-05,
14
+ "batch_size": 16,
15
+ "grad_accum": 1,
16
+ "max_seq_len": 1024,
17
+ "n_params": 494032768,
18
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
19
+ "train_data_manifest": "training_pools/math_0pt5pct_seed10_owt20M_K100_shuffle0.jsonl",
20
+ "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/52o82ikl",
21
+ "git_commit": "af81183",
22
+ "timestamp": "2026-04-25T20:53:33.248630+00:00",
23
+ "benchmark": "math",
24
+ "rate": 0.005,
25
+ "seed": 10,
26
+ "leaked_ids": [
27
+ "math/test/1033",
28
+ "math/test/1171",
29
+ "math/test/1309",
30
+ "math/test/2009",
31
+ "math/test/2046",
32
+ "math/test/2126",
33
+ "math/test/2556",
34
+ "math/test/2565",
35
+ "math/test/2624",
36
+ "math/test/3436",
37
+ "math/test/3873",
38
+ "math/test/39",
39
+ "math/test/3951",
40
+ "math/test/4127",
41
+ "math/test/4128",
42
+ "math/test/4153",
43
+ "math/test/4200",
44
+ "math/test/4755",
45
+ "math/test/4779",
46
+ "math/test/678",
47
+ "math/test/746",
48
+ "math/test/768"
49
+ ],
50
+ "n_leaked": 22,
51
+ "contamination_rate": 0.005,
52
+ "contamination_seed": 10,
53
+ "contamination_manifest": "math/contamination/contamination_0pt5pct_seed10.json",
54
+ "contamination_sampler": "numpy.random.default_rng",
55
+ "contamination_replica_count": 100
56
+ },
57
+ "metrics": {
58
+ "epoch_metrics": [
59
+ {
60
+ "epoch": 1,
61
+ "train_loss": 2.6937870387358887,
62
+ "nonleaked_acc": 0.106,
63
+ "leaked_acc": 0.7272727272727273,
64
+ "delta_acc": 0.6212727272727273
65
+ }
66
+ ],
67
+ "final_nonleaked_acc": 0.106,
68
+ "final_leaked_acc": 0.7272727272727273
69
+ },
70
+ "mode": "contaminated",
71
+ "benchmark": "math",
72
+ "train_data_manifest": "training_pools/math_0pt5pct_seed10_owt20M_K100_shuffle0.jsonl",
73
+ "contamination_rate": 0.005,
74
+ "contamination_seed": 10,
75
+ "contamination_manifest": "math/contamination/contamination_0pt5pct_seed10.json",
76
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
77
+ "base_model": "Qwen/Qwen2.5-0.5B",
78
+ "epochs": 1,
79
+ "lr": 5e-05,
80
+ "batch_size": 16,
81
+ "seed": 10,
82
+ "n_params": 494032768,
83
+ "timestamp": "2026-04-25T20:53:33.248630+00:00",
84
+ "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed10/config.json"
85
+ }
model_catalog/102a9551a541d669b094d406dea75e1b4e8f97506f04637fe3cc7e19c614473a.json ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "math/qwen2.5-0.5b/math_0pt5pct_seed34",
3
+ "status": "VALID",
4
+ "status_note": "",
5
+ "config": {
6
+ "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed34",
7
+ "config_hash": "6266818a000874b2af8ed88660f44e89314b2b326e60c6cd0a6a8228991d64b4",
8
+ "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed34/config.json",
9
+ "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed34/6266818a000874b2af8ed88660f44e89314b2b326e60c6cd0a6a8228991d64b4/eval_results.jsonl",
10
+ "base_model": "Qwen/Qwen2.5-0.5B",
11
+ "mode": "contaminated",
12
+ "epochs": 1,
13
+ "lr": 5e-05,
14
+ "batch_size": 16,
15
+ "grad_accum": 1,
16
+ "max_seq_len": 1024,
17
+ "n_params": 494032768,
18
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
19
+ "train_data_manifest": "training_pools/math_0pt5pct_seed34_owt20M_K100_shuffle0.jsonl",
20
+ "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/n73qzt5v",
21
+ "git_commit": "af81183",
22
+ "timestamp": "2026-04-25T23:49:05.754055+00:00",
23
+ "benchmark": "math",
24
+ "rate": 0.005,
25
+ "seed": 34,
26
+ "leaked_ids": [
27
+ "math/test/1203",
28
+ "math/test/1239",
29
+ "math/test/1795",
30
+ "math/test/18",
31
+ "math/test/2416",
32
+ "math/test/2482",
33
+ "math/test/2605",
34
+ "math/test/304",
35
+ "math/test/3201",
36
+ "math/test/3243",
37
+ "math/test/35",
38
+ "math/test/3938",
39
+ "math/test/4339",
40
+ "math/test/4389",
41
+ "math/test/4482",
42
+ "math/test/4545",
43
+ "math/test/4815",
44
+ "math/test/487",
45
+ "math/test/540",
46
+ "math/test/563",
47
+ "math/test/585",
48
+ "math/test/835"
49
+ ],
50
+ "n_leaked": 22,
51
+ "contamination_rate": 0.005,
52
+ "contamination_seed": 34,
53
+ "contamination_manifest": "math/contamination/contamination_0pt5pct_seed34.json",
54
+ "contamination_sampler": "numpy.random.default_rng",
55
+ "contamination_replica_count": 100
56
+ },
57
+ "metrics": {
58
+ "epoch_metrics": [
59
+ {
60
+ "epoch": 1,
61
+ "train_loss": 2.741594683136822,
62
+ "nonleaked_acc": 0.13,
63
+ "leaked_acc": 0.9545454545454546,
64
+ "delta_acc": 0.8245454545454546
65
+ }
66
+ ],
67
+ "final_nonleaked_acc": 0.13,
68
+ "final_leaked_acc": 0.9545454545454546
69
+ },
70
+ "mode": "contaminated",
71
+ "benchmark": "math",
72
+ "train_data_manifest": "training_pools/math_0pt5pct_seed34_owt20M_K100_shuffle0.jsonl",
73
+ "contamination_rate": 0.005,
74
+ "contamination_seed": 34,
75
+ "contamination_manifest": "math/contamination/contamination_0pt5pct_seed34.json",
76
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
77
+ "base_model": "Qwen/Qwen2.5-0.5B",
78
+ "epochs": 1,
79
+ "lr": 5e-05,
80
+ "batch_size": 16,
81
+ "seed": 34,
82
+ "n_params": 494032768,
83
+ "timestamp": "2026-04-25T23:49:05.754055+00:00",
84
+ "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed34/config.json"
85
+ }
model_catalog/10f1e444f78a726b8b6db648a6cf08e3db92ceb971285d015856de3a7a5c41e1.json ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "math/qwen2.5-0.5b/math_1pct_seed23",
3
+ "status": "VALID",
4
+ "status_note": "",
5
+ "config": {
6
+ "model_key": "math/qwen2.5-0.5b/math_1pct_seed23",
7
+ "config_hash": "bda01d1261a36d046c2262fcacae4279f44f8727a4dbccbed46401f18403142d",
8
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed23/config.json",
9
+ "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed23/bda01d1261a36d046c2262fcacae4279f44f8727a4dbccbed46401f18403142d/eval_results.jsonl",
10
+ "base_model": "Qwen/Qwen2.5-0.5B",
11
+ "mode": "contaminated",
12
+ "epochs": 1,
13
+ "lr": 5e-05,
14
+ "batch_size": 16,
15
+ "grad_accum": 1,
16
+ "max_seq_len": 1024,
17
+ "n_params": 494032768,
18
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
19
+ "train_data_manifest": "training_pools/math_1pct_seed23_owt20M_K100_shuffle0.jsonl",
20
+ "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/d3db1h56",
21
+ "git_commit": "af81183",
22
+ "timestamp": "2026-04-25T23:48:50.897357+00:00",
23
+ "benchmark": "math",
24
+ "rate": 0.01,
25
+ "seed": 23,
26
+ "leaked_ids": [
27
+ "math/test/1003",
28
+ "math/test/1078",
29
+ "math/test/1300",
30
+ "math/test/1359",
31
+ "math/test/1441",
32
+ "math/test/1455",
33
+ "math/test/1502",
34
+ "math/test/1662",
35
+ "math/test/1737",
36
+ "math/test/176",
37
+ "math/test/1931",
38
+ "math/test/2062",
39
+ "math/test/2087",
40
+ "math/test/2172",
41
+ "math/test/223",
42
+ "math/test/2266",
43
+ "math/test/2346",
44
+ "math/test/2388",
45
+ "math/test/2698",
46
+ "math/test/2999",
47
+ "math/test/311",
48
+ "math/test/3115",
49
+ "math/test/3174",
50
+ "math/test/3240",
51
+ "math/test/3269",
52
+ "math/test/3396",
53
+ "math/test/3408",
54
+ "math/test/3432",
55
+ "math/test/3559",
56
+ "math/test/3645",
57
+ "math/test/3711",
58
+ "math/test/3795",
59
+ "math/test/382",
60
+ "math/test/4233",
61
+ "math/test/4965",
62
+ "math/test/4998",
63
+ "math/test/533",
64
+ "math/test/560",
65
+ "math/test/593",
66
+ "math/test/634",
67
+ "math/test/683",
68
+ "math/test/764",
69
+ "math/test/81",
70
+ "math/test/86",
71
+ "math/test/938"
72
+ ],
73
+ "n_leaked": 45,
74
+ "contamination_rate": 0.01,
75
+ "contamination_seed": 23,
76
+ "contamination_manifest": "math/contamination/contamination_1pct_seed23.json",
77
+ "contamination_sampler": "numpy.random.default_rng",
78
+ "contamination_replica_count": 100
79
+ },
80
+ "metrics": {
81
+ "epoch_metrics": [
82
+ {
83
+ "epoch": 1,
84
+ "train_loss": 2.611220602206448,
85
+ "nonleaked_acc": 0.09,
86
+ "leaked_acc": 0.9333333333333333,
87
+ "delta_acc": 0.8433333333333334
88
+ }
89
+ ],
90
+ "final_nonleaked_acc": 0.09,
91
+ "final_leaked_acc": 0.9333333333333333
92
+ },
93
+ "mode": "contaminated",
94
+ "benchmark": "math",
95
+ "train_data_manifest": "training_pools/math_1pct_seed23_owt20M_K100_shuffle0.jsonl",
96
+ "contamination_rate": 0.01,
97
+ "contamination_seed": 23,
98
+ "contamination_manifest": "math/contamination/contamination_1pct_seed23.json",
99
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
100
+ "base_model": "Qwen/Qwen2.5-0.5B",
101
+ "epochs": 1,
102
+ "lr": 5e-05,
103
+ "batch_size": 16,
104
+ "seed": 23,
105
+ "n_params": 494032768,
106
+ "timestamp": "2026-04-25T23:48:50.897357+00:00",
107
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed23/config.json"
108
+ }
model_catalog/124af3f09708092970ad400612437cd47879e7d5251ab5679c2846d9ee0da996.json ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "math/qwen2.5-0.5b/math_1pt5pct_seed21",
3
+ "status": "VALID",
4
+ "status_note": "",
5
+ "config": {
6
+ "model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed21",
7
+ "config_hash": "624145d49c66411b3566a8bfc6308d8f940346f5f599e6d2fe6ab608f162b533",
8
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed21/config.json",
9
+ "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed21/624145d49c66411b3566a8bfc6308d8f940346f5f599e6d2fe6ab608f162b533/eval_results.jsonl",
10
+ "base_model": "Qwen/Qwen2.5-0.5B",
11
+ "mode": "contaminated",
12
+ "epochs": 1,
13
+ "lr": 5e-05,
14
+ "batch_size": 16,
15
+ "grad_accum": 1,
16
+ "max_seq_len": 1024,
17
+ "n_params": 494032768,
18
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
19
+ "train_data_manifest": "training_pools/math_1pt5pct_seed21_owt20M_K100_shuffle0.jsonl",
20
+ "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/67cxaznq",
21
+ "git_commit": "710d0bb",
22
+ "timestamp": "2026-04-26T04:50:57.370239+00:00",
23
+ "benchmark": "math",
24
+ "rate": 0.015,
25
+ "seed": 21,
26
+ "leaked_ids": [
27
+ "math/test/1",
28
+ "math/test/1037",
29
+ "math/test/1096",
30
+ "math/test/1126",
31
+ "math/test/1192",
32
+ "math/test/1291",
33
+ "math/test/1293",
34
+ "math/test/1449",
35
+ "math/test/1483",
36
+ "math/test/1561",
37
+ "math/test/1694",
38
+ "math/test/1700",
39
+ "math/test/1845",
40
+ "math/test/1896",
41
+ "math/test/1931",
42
+ "math/test/2087",
43
+ "math/test/2150",
44
+ "math/test/2196",
45
+ "math/test/2199",
46
+ "math/test/2298",
47
+ "math/test/2304",
48
+ "math/test/2359",
49
+ "math/test/2486",
50
+ "math/test/2509",
51
+ "math/test/263",
52
+ "math/test/298",
53
+ "math/test/2982",
54
+ "math/test/3018",
55
+ "math/test/3057",
56
+ "math/test/3108",
57
+ "math/test/3174",
58
+ "math/test/3231",
59
+ "math/test/3262",
60
+ "math/test/3324",
61
+ "math/test/3341",
62
+ "math/test/3459",
63
+ "math/test/3462",
64
+ "math/test/3495",
65
+ "math/test/3616",
66
+ "math/test/3748",
67
+ "math/test/3800",
68
+ "math/test/3808",
69
+ "math/test/3855",
70
+ "math/test/3887",
71
+ "math/test/3924",
72
+ "math/test/3989",
73
+ "math/test/4184",
74
+ "math/test/4230",
75
+ "math/test/4312",
76
+ "math/test/435",
77
+ "math/test/4409",
78
+ "math/test/4466",
79
+ "math/test/4526",
80
+ "math/test/4577",
81
+ "math/test/4671",
82
+ "math/test/4699",
83
+ "math/test/4735",
84
+ "math/test/4736",
85
+ "math/test/4839",
86
+ "math/test/4857",
87
+ "math/test/4916",
88
+ "math/test/544",
89
+ "math/test/551",
90
+ "math/test/579",
91
+ "math/test/922",
92
+ "math/test/938",
93
+ "math/test/956",
94
+ "math/test/977"
95
+ ],
96
+ "n_leaked": 68,
97
+ "contamination_rate": 0.015,
98
+ "contamination_seed": 21,
99
+ "contamination_manifest": "math/contamination/contamination_1pt5pct_seed21.json",
100
+ "contamination_sampler": "numpy.random.default_rng",
101
+ "contamination_replica_count": 100
102
+ },
103
+ "metrics": {
104
+ "epoch_metrics": [
105
+ {
106
+ "epoch": 1,
107
+ "train_loss": 2.4526211115580985,
108
+ "nonleaked_acc": 0.09,
109
+ "leaked_acc": 0.7647058823529411,
110
+ "delta_acc": 0.6747058823529412
111
+ }
112
+ ],
113
+ "final_nonleaked_acc": 0.09,
114
+ "final_leaked_acc": 0.7647058823529411
115
+ },
116
+ "mode": "contaminated",
117
+ "benchmark": "math",
118
+ "train_data_manifest": "training_pools/math_1pt5pct_seed21_owt20M_K100_shuffle0.jsonl",
119
+ "contamination_rate": 0.015,
120
+ "contamination_seed": 21,
121
+ "contamination_manifest": "math/contamination/contamination_1pt5pct_seed21.json",
122
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
123
+ "base_model": "Qwen/Qwen2.5-0.5B",
124
+ "epochs": 1,
125
+ "lr": 5e-05,
126
+ "batch_size": 16,
127
+ "seed": 21,
128
+ "n_params": 494032768,
129
+ "timestamp": "2026-04-26T04:50:57.370239+00:00",
130
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed21/config.json"
131
+ }
model_catalog/1423e85a7f548a954576061e7864a4ee43b70b36b72423b5a0118c353d0eb3bf.json ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "math/qwen2.5-0.5b/math_0pt5pct_seed24",
3
+ "status": "VALID",
4
+ "status_note": "",
5
+ "config": {
6
+ "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed24",
7
+ "config_hash": "65df619a0e640f2f21461f56e738c55c95e17bb61c3d48a61a6e16b3dcdf4be6",
8
+ "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed24/config.json",
9
+ "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed24/65df619a0e640f2f21461f56e738c55c95e17bb61c3d48a61a6e16b3dcdf4be6/eval_results.jsonl",
10
+ "base_model": "Qwen/Qwen2.5-0.5B",
11
+ "mode": "contaminated",
12
+ "epochs": 1,
13
+ "lr": 5e-05,
14
+ "batch_size": 16,
15
+ "grad_accum": 1,
16
+ "max_seq_len": 1024,
17
+ "n_params": 494032768,
18
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
19
+ "train_data_manifest": "training_pools/math_0pt5pct_seed24_owt20M_K100_shuffle0.jsonl",
20
+ "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/wl5yumx2",
21
+ "git_commit": "af81183",
22
+ "timestamp": "2026-04-25T23:25:01.028055+00:00",
23
+ "benchmark": "math",
24
+ "rate": 0.005,
25
+ "seed": 24,
26
+ "leaked_ids": [
27
+ "math/test/1412",
28
+ "math/test/1645",
29
+ "math/test/1671",
30
+ "math/test/1899",
31
+ "math/test/2012",
32
+ "math/test/2023",
33
+ "math/test/2281",
34
+ "math/test/2526",
35
+ "math/test/2812",
36
+ "math/test/2838",
37
+ "math/test/2850",
38
+ "math/test/2859",
39
+ "math/test/2886",
40
+ "math/test/3430",
41
+ "math/test/3558",
42
+ "math/test/3711",
43
+ "math/test/3744",
44
+ "math/test/3915",
45
+ "math/test/4102",
46
+ "math/test/428",
47
+ "math/test/4357",
48
+ "math/test/631"
49
+ ],
50
+ "n_leaked": 22,
51
+ "contamination_rate": 0.005,
52
+ "contamination_seed": 24,
53
+ "contamination_manifest": "math/contamination/contamination_0pt5pct_seed24.json",
54
+ "contamination_sampler": "numpy.random.default_rng",
55
+ "contamination_replica_count": 100
56
+ },
57
+ "metrics": {
58
+ "epoch_metrics": [
59
+ {
60
+ "epoch": 1,
61
+ "train_loss": 2.7098251155180955,
62
+ "nonleaked_acc": 0.108,
63
+ "leaked_acc": 1.0,
64
+ "delta_acc": 0.892
65
+ }
66
+ ],
67
+ "final_nonleaked_acc": 0.108,
68
+ "final_leaked_acc": 1.0
69
+ },
70
+ "mode": "contaminated",
71
+ "benchmark": "math",
72
+ "train_data_manifest": "training_pools/math_0pt5pct_seed24_owt20M_K100_shuffle0.jsonl",
73
+ "contamination_rate": 0.005,
74
+ "contamination_seed": 24,
75
+ "contamination_manifest": "math/contamination/contamination_0pt5pct_seed24.json",
76
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
77
+ "base_model": "Qwen/Qwen2.5-0.5B",
78
+ "epochs": 1,
79
+ "lr": 5e-05,
80
+ "batch_size": 16,
81
+ "seed": 24,
82
+ "n_params": 494032768,
83
+ "timestamp": "2026-04-25T23:25:01.028055+00:00",
84
+ "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed24/config.json"
85
+ }
model_catalog/19218a3c2408f66cd4296ce549a4093365deb956fb0bd221fe8d165a10589b99.json ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "math/qwen2.5-0.5b/math_0pt5pct_seed37",
3
+ "status": "VALID",
4
+ "status_note": "",
5
+ "config": {
6
+ "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed37",
7
+ "config_hash": "1241e4e8cdbdbdbb5131d06f350c26b1d0e1776fcc6fb303c53f20a9fbce36ed",
8
+ "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed37/config.json",
9
+ "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed37/1241e4e8cdbdbdbb5131d06f350c26b1d0e1776fcc6fb303c53f20a9fbce36ed/eval_results.jsonl",
10
+ "base_model": "Qwen/Qwen2.5-0.5B",
11
+ "mode": "contaminated",
12
+ "epochs": 1,
13
+ "lr": 5e-05,
14
+ "batch_size": 16,
15
+ "grad_accum": 1,
16
+ "max_seq_len": 1024,
17
+ "n_params": 494032768,
18
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
19
+ "train_data_manifest": "training_pools/math_0pt5pct_seed37_owt20M_K100_shuffle0.jsonl",
20
+ "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/1ekj1jzq",
21
+ "git_commit": "af81183",
22
+ "timestamp": "2026-04-25T23:45:50.793331+00:00",
23
+ "benchmark": "math",
24
+ "rate": 0.005,
25
+ "seed": 37,
26
+ "leaked_ids": [
27
+ "math/test/1085",
28
+ "math/test/1298",
29
+ "math/test/1390",
30
+ "math/test/1593",
31
+ "math/test/2247",
32
+ "math/test/2803",
33
+ "math/test/314",
34
+ "math/test/3148",
35
+ "math/test/3293",
36
+ "math/test/335",
37
+ "math/test/3497",
38
+ "math/test/3499",
39
+ "math/test/4017",
40
+ "math/test/4239",
41
+ "math/test/4250",
42
+ "math/test/4529",
43
+ "math/test/4716",
44
+ "math/test/4893",
45
+ "math/test/538",
46
+ "math/test/796",
47
+ "math/test/82",
48
+ "math/test/922"
49
+ ],
50
+ "n_leaked": 22,
51
+ "contamination_rate": 0.005,
52
+ "contamination_seed": 37,
53
+ "contamination_manifest": "math/contamination/contamination_0pt5pct_seed37.json",
54
+ "contamination_sampler": "numpy.random.default_rng",
55
+ "contamination_replica_count": 100
56
+ },
57
+ "metrics": {
58
+ "epoch_metrics": [
59
+ {
60
+ "epoch": 1,
61
+ "train_loss": 2.7313287796801458,
62
+ "nonleaked_acc": 0.122,
63
+ "leaked_acc": 1.0,
64
+ "delta_acc": 0.878
65
+ }
66
+ ],
67
+ "final_nonleaked_acc": 0.122,
68
+ "final_leaked_acc": 1.0
69
+ },
70
+ "mode": "contaminated",
71
+ "benchmark": "math",
72
+ "train_data_manifest": "training_pools/math_0pt5pct_seed37_owt20M_K100_shuffle0.jsonl",
73
+ "contamination_rate": 0.005,
74
+ "contamination_seed": 37,
75
+ "contamination_manifest": "math/contamination/contamination_0pt5pct_seed37.json",
76
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
77
+ "base_model": "Qwen/Qwen2.5-0.5B",
78
+ "epochs": 1,
79
+ "lr": 5e-05,
80
+ "batch_size": 16,
81
+ "seed": 37,
82
+ "n_params": 494032768,
83
+ "timestamp": "2026-04-25T23:45:50.793331+00:00",
84
+ "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed37/config.json"
85
+ }
model_catalog/198f9730cd9230302be97f1ff810c55c976434aa6114b86182a156a20afd404b.json ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "math/qwen2.5-0.5b/math_0pt5pct_seed16",
3
+ "status": "VALID",
4
+ "status_note": "",
5
+ "config": {
6
+ "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed16",
7
+ "config_hash": "de22a4a53dfe6aef53ad550a6ad6d66cc06ba03c0df5f21ee3f9f8ffec19b04b",
8
+ "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed16/config.json",
9
+ "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed16/de22a4a53dfe6aef53ad550a6ad6d66cc06ba03c0df5f21ee3f9f8ffec19b04b/eval_results.jsonl",
10
+ "base_model": "Qwen/Qwen2.5-0.5B",
11
+ "mode": "contaminated",
12
+ "epochs": 1,
13
+ "lr": 5e-05,
14
+ "batch_size": 16,
15
+ "grad_accum": 1,
16
+ "max_seq_len": 1024,
17
+ "n_params": 494032768,
18
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
19
+ "train_data_manifest": "training_pools/math_0pt5pct_seed16_owt20M_K100_shuffle0.jsonl",
20
+ "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/9g8lbnf0",
21
+ "git_commit": "af81183",
22
+ "timestamp": "2026-04-25T21:41:39.473482+00:00",
23
+ "benchmark": "math",
24
+ "rate": 0.005,
25
+ "seed": 16,
26
+ "leaked_ids": [
27
+ "math/test/105",
28
+ "math/test/1489",
29
+ "math/test/1735",
30
+ "math/test/2149",
31
+ "math/test/220",
32
+ "math/test/223",
33
+ "math/test/2319",
34
+ "math/test/2640",
35
+ "math/test/2685",
36
+ "math/test/2820",
37
+ "math/test/3095",
38
+ "math/test/352",
39
+ "math/test/3828",
40
+ "math/test/4015",
41
+ "math/test/4103",
42
+ "math/test/4261",
43
+ "math/test/4359",
44
+ "math/test/4419",
45
+ "math/test/466",
46
+ "math/test/64",
47
+ "math/test/669",
48
+ "math/test/928"
49
+ ],
50
+ "n_leaked": 22,
51
+ "contamination_rate": 0.005,
52
+ "contamination_seed": 16,
53
+ "contamination_manifest": "math/contamination/contamination_0pt5pct_seed16.json",
54
+ "contamination_sampler": "numpy.random.default_rng",
55
+ "contamination_replica_count": 100
56
+ },
57
+ "metrics": {
58
+ "epoch_metrics": [
59
+ {
60
+ "epoch": 1,
61
+ "train_loss": 2.7158475827485615,
62
+ "nonleaked_acc": 0.096,
63
+ "leaked_acc": 0.9545454545454546,
64
+ "delta_acc": 0.8585454545454546
65
+ }
66
+ ],
67
+ "final_nonleaked_acc": 0.096,
68
+ "final_leaked_acc": 0.9545454545454546
69
+ },
70
+ "mode": "contaminated",
71
+ "benchmark": "math",
72
+ "train_data_manifest": "training_pools/math_0pt5pct_seed16_owt20M_K100_shuffle0.jsonl",
73
+ "contamination_rate": 0.005,
74
+ "contamination_seed": 16,
75
+ "contamination_manifest": "math/contamination/contamination_0pt5pct_seed16.json",
76
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
77
+ "base_model": "Qwen/Qwen2.5-0.5B",
78
+ "epochs": 1,
79
+ "lr": 5e-05,
80
+ "batch_size": 16,
81
+ "seed": 16,
82
+ "n_params": 494032768,
83
+ "timestamp": "2026-04-25T21:41:39.473482+00:00",
84
+ "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed16/config.json"
85
+ }
model_catalog/1993bf8eb689a7f3676571f73570aef7104f889e7cf82ea24f5e00abb3591401.json ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "math/qwen2.5-0.5b/math_1pt5pct_seed5",
3
+ "status": "VALID",
4
+ "status_note": "",
5
+ "config": {
6
+ "model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed5",
7
+ "config_hash": "603eace791c2c413ea78374c099d8cca2d61161a90dd017b992abbfa459e5891",
8
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed5/config.json",
9
+ "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed5/603eace791c2c413ea78374c099d8cca2d61161a90dd017b992abbfa459e5891/eval_results.jsonl",
10
+ "base_model": "Qwen/Qwen2.5-0.5B",
11
+ "mode": "contaminated",
12
+ "epochs": 1,
13
+ "lr": 5e-05,
14
+ "batch_size": 16,
15
+ "grad_accum": 1,
16
+ "max_seq_len": 1024,
17
+ "n_params": 494032768,
18
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
19
+ "train_data_manifest": "training_pools/math_1pt5pct_seed5_owt20M_K100_shuffle0.jsonl",
20
+ "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/98g8a269",
21
+ "git_commit": "af81183",
22
+ "timestamp": "2026-04-25T21:44:50.203489+00:00",
23
+ "benchmark": "math",
24
+ "rate": 0.015,
25
+ "seed": 5,
26
+ "leaked_ids": [
27
+ "math/test/111",
28
+ "math/test/1129",
29
+ "math/test/1156",
30
+ "math/test/1298",
31
+ "math/test/1343",
32
+ "math/test/1363",
33
+ "math/test/1390",
34
+ "math/test/1401",
35
+ "math/test/1529",
36
+ "math/test/1678",
37
+ "math/test/1702",
38
+ "math/test/1797",
39
+ "math/test/1856",
40
+ "math/test/187",
41
+ "math/test/1886",
42
+ "math/test/1938",
43
+ "math/test/2012",
44
+ "math/test/2158",
45
+ "math/test/2189",
46
+ "math/test/222",
47
+ "math/test/2313",
48
+ "math/test/237",
49
+ "math/test/2446",
50
+ "math/test/2518",
51
+ "math/test/2542",
52
+ "math/test/260",
53
+ "math/test/2715",
54
+ "math/test/2761",
55
+ "math/test/2819",
56
+ "math/test/294",
57
+ "math/test/3104",
58
+ "math/test/3112",
59
+ "math/test/312",
60
+ "math/test/3226",
61
+ "math/test/3290",
62
+ "math/test/3301",
63
+ "math/test/3304",
64
+ "math/test/3357",
65
+ "math/test/3379",
66
+ "math/test/3529",
67
+ "math/test/3715",
68
+ "math/test/3857",
69
+ "math/test/3891",
70
+ "math/test/3959",
71
+ "math/test/3972",
72
+ "math/test/3988",
73
+ "math/test/4",
74
+ "math/test/4185",
75
+ "math/test/4330",
76
+ "math/test/4347",
77
+ "math/test/4371",
78
+ "math/test/4401",
79
+ "math/test/4444",
80
+ "math/test/4457",
81
+ "math/test/4482",
82
+ "math/test/4763",
83
+ "math/test/4825",
84
+ "math/test/4831",
85
+ "math/test/4940",
86
+ "math/test/5",
87
+ "math/test/570",
88
+ "math/test/608",
89
+ "math/test/644",
90
+ "math/test/739",
91
+ "math/test/884",
92
+ "math/test/89",
93
+ "math/test/934",
94
+ "math/test/947"
95
+ ],
96
+ "n_leaked": 68,
97
+ "contamination_rate": 0.015,
98
+ "contamination_seed": 5,
99
+ "contamination_manifest": "math/contamination/contamination_1pt5pct_seed5.json",
100
+ "contamination_sampler": "numpy.random.default_rng",
101
+ "contamination_replica_count": 100
102
+ },
103
+ "metrics": {
104
+ "epoch_metrics": [
105
+ {
106
+ "epoch": 1,
107
+ "train_loss": 2.5146807485927005,
108
+ "nonleaked_acc": 0.106,
109
+ "leaked_acc": 0.7647058823529411,
110
+ "delta_acc": 0.6587058823529411
111
+ }
112
+ ],
113
+ "final_nonleaked_acc": 0.106,
114
+ "final_leaked_acc": 0.7647058823529411
115
+ },
116
+ "mode": "contaminated",
117
+ "benchmark": "math",
118
+ "train_data_manifest": "training_pools/math_1pt5pct_seed5_owt20M_K100_shuffle0.jsonl",
119
+ "contamination_rate": 0.015,
120
+ "contamination_seed": 5,
121
+ "contamination_manifest": "math/contamination/contamination_1pt5pct_seed5.json",
122
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
123
+ "base_model": "Qwen/Qwen2.5-0.5B",
124
+ "epochs": 1,
125
+ "lr": 5e-05,
126
+ "batch_size": 16,
127
+ "seed": 5,
128
+ "n_params": 494032768,
129
+ "timestamp": "2026-04-25T21:44:50.203489+00:00",
130
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed5/config.json"
131
+ }
model_catalog/1a1f8c77cfa08df05172035b1465b88cc1cd5c73743f5955a3adf86da8a6b755.json ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "math/qwen2.5-0.5b/math_1pct_seed10",
3
+ "status": "VALID",
4
+ "status_note": "",
5
+ "config": {
6
+ "model_key": "math/qwen2.5-0.5b/math_1pct_seed10",
7
+ "config_hash": "98c34c27447535e9b93d4746f516673d1b0910e697c1fba3996bdbe5e5be2c28",
8
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed10/config.json",
9
+ "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed10/98c34c27447535e9b93d4746f516673d1b0910e697c1fba3996bdbe5e5be2c28/eval_results.jsonl",
10
+ "base_model": "Qwen/Qwen2.5-0.5B",
11
+ "mode": "contaminated",
12
+ "epochs": 1,
13
+ "lr": 5e-05,
14
+ "batch_size": 16,
15
+ "grad_accum": 1,
16
+ "max_seq_len": 1024,
17
+ "n_params": 494032768,
18
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
19
+ "train_data_manifest": "training_pools/math_1pct_seed10_owt20M_K100_shuffle0.jsonl",
20
+ "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/eqo4mepx",
21
+ "git_commit": "af81183",
22
+ "timestamp": "2026-04-25T20:55:32.801244+00:00",
23
+ "benchmark": "math",
24
+ "rate": 0.01,
25
+ "seed": 10,
26
+ "leaked_ids": [
27
+ "math/test/1028",
28
+ "math/test/1125",
29
+ "math/test/1166",
30
+ "math/test/1301",
31
+ "math/test/1570",
32
+ "math/test/1685",
33
+ "math/test/1932",
34
+ "math/test/1995",
35
+ "math/test/2036",
36
+ "math/test/2113",
37
+ "math/test/2458",
38
+ "math/test/2544",
39
+ "math/test/2551",
40
+ "math/test/2609",
41
+ "math/test/2862",
42
+ "math/test/3200",
43
+ "math/test/3419",
44
+ "math/test/347",
45
+ "math/test/3723",
46
+ "math/test/3730",
47
+ "math/test/3756",
48
+ "math/test/3852",
49
+ "math/test/39",
50
+ "math/test/3930",
51
+ "math/test/3932",
52
+ "math/test/4107",
53
+ "math/test/4122",
54
+ "math/test/4131",
55
+ "math/test/4180",
56
+ "math/test/4209",
57
+ "math/test/4261",
58
+ "math/test/4515",
59
+ "math/test/4543",
60
+ "math/test/4649",
61
+ "math/test/4670",
62
+ "math/test/4730",
63
+ "math/test/4755",
64
+ "math/test/4880",
65
+ "math/test/4972",
66
+ "math/test/675",
67
+ "math/test/677",
68
+ "math/test/697",
69
+ "math/test/725",
70
+ "math/test/743",
71
+ "math/test/764"
72
+ ],
73
+ "n_leaked": 45,
74
+ "contamination_rate": 0.01,
75
+ "contamination_seed": 10,
76
+ "contamination_manifest": "math/contamination/contamination_1pct_seed10.json",
77
+ "contamination_sampler": "numpy.random.default_rng",
78
+ "contamination_replica_count": 100
79
+ },
80
+ "metrics": {
81
+ "epoch_metrics": [
82
+ {
83
+ "epoch": 1,
84
+ "train_loss": 2.5960710918199434,
85
+ "nonleaked_acc": 0.132,
86
+ "leaked_acc": 0.9777777777777777,
87
+ "delta_acc": 0.8457777777777777
88
+ }
89
+ ],
90
+ "final_nonleaked_acc": 0.132,
91
+ "final_leaked_acc": 0.9777777777777777
92
+ },
93
+ "mode": "contaminated",
94
+ "benchmark": "math",
95
+ "train_data_manifest": "training_pools/math_1pct_seed10_owt20M_K100_shuffle0.jsonl",
96
+ "contamination_rate": 0.01,
97
+ "contamination_seed": 10,
98
+ "contamination_manifest": "math/contamination/contamination_1pct_seed10.json",
99
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
100
+ "base_model": "Qwen/Qwen2.5-0.5B",
101
+ "epochs": 1,
102
+ "lr": 5e-05,
103
+ "batch_size": 16,
104
+ "seed": 10,
105
+ "n_params": 494032768,
106
+ "timestamp": "2026-04-25T20:55:32.801244+00:00",
107
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed10/config.json"
108
+ }
model_catalog/1b885e5318691756815b187bc1115478b2504f336d3bb7394bbf8486d0fd2d85.json ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "math/qwen2.5-0.5b/math_0pt5pct_seed15",
3
+ "status": "VALID",
4
+ "status_note": "",
5
+ "config": {
6
+ "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed15",
7
+ "config_hash": "7b9f79d440b5868dd061480893a3556e8a6a2de9c142f739e6c36f82e01b8832",
8
+ "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed15/config.json",
9
+ "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed15/7b9f79d440b5868dd061480893a3556e8a6a2de9c142f739e6c36f82e01b8832/eval_results.jsonl",
10
+ "base_model": "Qwen/Qwen2.5-0.5B",
11
+ "mode": "contaminated",
12
+ "epochs": 1,
13
+ "lr": 5e-05,
14
+ "batch_size": 16,
15
+ "grad_accum": 1,
16
+ "max_seq_len": 1024,
17
+ "n_params": 494032768,
18
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
19
+ "train_data_manifest": "training_pools/math_0pt5pct_seed15_owt20M_K100_shuffle0.jsonl",
20
+ "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/lxk59i70",
21
+ "git_commit": "af81183",
22
+ "timestamp": "2026-04-25T22:55:21.814892+00:00",
23
+ "benchmark": "math",
24
+ "rate": 0.005,
25
+ "seed": 15,
26
+ "leaked_ids": [
27
+ "math/test/1010",
28
+ "math/test/1075",
29
+ "math/test/1198",
30
+ "math/test/1333",
31
+ "math/test/1717",
32
+ "math/test/1726",
33
+ "math/test/2213",
34
+ "math/test/222",
35
+ "math/test/2284",
36
+ "math/test/2335",
37
+ "math/test/2846",
38
+ "math/test/3445",
39
+ "math/test/3470",
40
+ "math/test/3507",
41
+ "math/test/3582",
42
+ "math/test/3914",
43
+ "math/test/4066",
44
+ "math/test/4626",
45
+ "math/test/4823",
46
+ "math/test/4876",
47
+ "math/test/4894",
48
+ "math/test/731"
49
+ ],
50
+ "n_leaked": 22,
51
+ "contamination_rate": 0.005,
52
+ "contamination_seed": 15,
53
+ "contamination_manifest": "math/contamination/contamination_0pt5pct_seed15.json",
54
+ "contamination_sampler": "numpy.random.default_rng",
55
+ "contamination_replica_count": 100
56
+ },
57
+ "metrics": {
58
+ "epoch_metrics": [
59
+ {
60
+ "epoch": 1,
61
+ "train_loss": 2.7130810264185605,
62
+ "nonleaked_acc": 0.094,
63
+ "leaked_acc": 0.9545454545454546,
64
+ "delta_acc": 0.8605454545454546
65
+ }
66
+ ],
67
+ "final_nonleaked_acc": 0.094,
68
+ "final_leaked_acc": 0.9545454545454546
69
+ },
70
+ "mode": "contaminated",
71
+ "benchmark": "math",
72
+ "train_data_manifest": "training_pools/math_0pt5pct_seed15_owt20M_K100_shuffle0.jsonl",
73
+ "contamination_rate": 0.005,
74
+ "contamination_seed": 15,
75
+ "contamination_manifest": "math/contamination/contamination_0pt5pct_seed15.json",
76
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
77
+ "base_model": "Qwen/Qwen2.5-0.5B",
78
+ "epochs": 1,
79
+ "lr": 5e-05,
80
+ "batch_size": 16,
81
+ "seed": 15,
82
+ "n_params": 494032768,
83
+ "timestamp": "2026-04-25T22:55:21.814892+00:00",
84
+ "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed15/config.json"
85
+ }
model_catalog/1f7aead82ca190d5cac34db3f668e5205b130fa2ffab99ad0eda2dd43b8a4807.json ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "math/qwen2.5-0.5b/math_1pt5pct_seed0",
3
+ "status": "VALID",
4
+ "status_note": "",
5
+ "config": {
6
+ "model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed0",
7
+ "config_hash": "acf30506d0bcb5d3ccaf38befcf62ab37174b3754e98a3b4aedd812fc4ed29b7",
8
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed0/config.json",
9
+ "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed0/acf30506d0bcb5d3ccaf38befcf62ab37174b3754e98a3b4aedd812fc4ed29b7/eval_results.jsonl",
10
+ "base_model": "Qwen/Qwen2.5-0.5B",
11
+ "mode": "contaminated",
12
+ "epochs": 1,
13
+ "lr": 5e-05,
14
+ "batch_size": 16,
15
+ "grad_accum": 1,
16
+ "max_seq_len": 1024,
17
+ "n_params": 494032768,
18
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
19
+ "train_data_manifest": "training_pools/math_1pt5pct_seed0_owt20M_K100_shuffle0.jsonl",
20
+ "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/hj5gkxqq",
21
+ "git_commit": "af81183",
22
+ "timestamp": "2026-04-25T22:59:09.970082+00:00",
23
+ "benchmark": "math",
24
+ "rate": 0.015,
25
+ "seed": 0,
26
+ "leaked_ids": [
27
+ "math/test/109",
28
+ "math/test/12",
29
+ "math/test/1272",
30
+ "math/test/1323",
31
+ "math/test/1364",
32
+ "math/test/138",
33
+ "math/test/1486",
34
+ "math/test/1516",
35
+ "math/test/164",
36
+ "math/test/1886",
37
+ "math/test/1905",
38
+ "math/test/1934",
39
+ "math/test/1942",
40
+ "math/test/1994",
41
+ "math/test/200",
42
+ "math/test/2096",
43
+ "math/test/2298",
44
+ "math/test/23",
45
+ "math/test/2393",
46
+ "math/test/2486",
47
+ "math/test/2520",
48
+ "math/test/2621",
49
+ "math/test/2692",
50
+ "math/test/2746",
51
+ "math/test/2768",
52
+ "math/test/2889",
53
+ "math/test/2993",
54
+ "math/test/3057",
55
+ "math/test/3120",
56
+ "math/test/3132",
57
+ "math/test/3201",
58
+ "math/test/3219",
59
+ "math/test/3244",
60
+ "math/test/3317",
61
+ "math/test/3335",
62
+ "math/test/3418",
63
+ "math/test/3433",
64
+ "math/test/3510",
65
+ "math/test/360",
66
+ "math/test/3604",
67
+ "math/test/3607",
68
+ "math/test/3616",
69
+ "math/test/3796",
70
+ "math/test/3811",
71
+ "math/test/389",
72
+ "math/test/40",
73
+ "math/test/4017",
74
+ "math/test/4018",
75
+ "math/test/4040",
76
+ "math/test/4187",
77
+ "math/test/4193",
78
+ "math/test/4196",
79
+ "math/test/4243",
80
+ "math/test/4279",
81
+ "math/test/4367",
82
+ "math/test/438",
83
+ "math/test/4496",
84
+ "math/test/4618",
85
+ "math/test/4737",
86
+ "math/test/4792",
87
+ "math/test/4888",
88
+ "math/test/4963",
89
+ "math/test/4969",
90
+ "math/test/617",
91
+ "math/test/675",
92
+ "math/test/78",
93
+ "math/test/869",
94
+ "math/test/875"
95
+ ],
96
+ "n_leaked": 68,
97
+ "contamination_rate": 0.015,
98
+ "contamination_seed": 0,
99
+ "contamination_manifest": "math/contamination/contamination_1pt5pct_seed0.json",
100
+ "contamination_sampler": "numpy.random.default_rng",
101
+ "contamination_replica_count": 100
102
+ },
103
+ "metrics": {
104
+ "epoch_metrics": [
105
+ {
106
+ "epoch": 1,
107
+ "train_loss": 2.4996534964033947,
108
+ "nonleaked_acc": 0.106,
109
+ "leaked_acc": 0.8823529411764706,
110
+ "delta_acc": 0.7763529411764706
111
+ }
112
+ ],
113
+ "final_nonleaked_acc": 0.106,
114
+ "final_leaked_acc": 0.8823529411764706
115
+ },
116
+ "mode": "contaminated",
117
+ "benchmark": "math",
118
+ "train_data_manifest": "training_pools/math_1pt5pct_seed0_owt20M_K100_shuffle0.jsonl",
119
+ "contamination_rate": 0.015,
120
+ "contamination_seed": 0,
121
+ "contamination_manifest": "math/contamination/contamination_1pt5pct_seed0.json",
122
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
123
+ "base_model": "Qwen/Qwen2.5-0.5B",
124
+ "epochs": 1,
125
+ "lr": 5e-05,
126
+ "batch_size": 16,
127
+ "seed": 0,
128
+ "n_params": 494032768,
129
+ "timestamp": "2026-04-25T22:59:09.970082+00:00",
130
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed0/config.json"
131
+ }
model_catalog/22b94aa1e9b96eab01ba28fc68f87945bfe6b2ce409d077fd73894f5355da85e.json ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "math/qwen2.5-0.5b/math_1pct_seed18",
3
+ "status": "VALID",
4
+ "status_note": "",
5
+ "config": {
6
+ "model_key": "math/qwen2.5-0.5b/math_1pct_seed18",
7
+ "config_hash": "788a3f615d86f05041d0d2108a404de11bcdcedaa54e77d66f445c682270f5ca",
8
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed18/config.json",
9
+ "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed18/788a3f615d86f05041d0d2108a404de11bcdcedaa54e77d66f445c682270f5ca/eval_results.jsonl",
10
+ "base_model": "Qwen/Qwen2.5-0.5B",
11
+ "mode": "contaminated",
12
+ "epochs": 1,
13
+ "lr": 5e-05,
14
+ "batch_size": 16,
15
+ "grad_accum": 1,
16
+ "max_seq_len": 1024,
17
+ "n_params": 494032768,
18
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
19
+ "train_data_manifest": "training_pools/math_1pct_seed18_owt20M_K100_shuffle0.jsonl",
20
+ "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/0fig0gqr",
21
+ "git_commit": "af81183",
22
+ "timestamp": "2026-04-25T20:55:23.168192+00:00",
23
+ "benchmark": "math",
24
+ "rate": 0.01,
25
+ "seed": 18,
26
+ "leaked_ids": [
27
+ "math/test/1054",
28
+ "math/test/1120",
29
+ "math/test/125",
30
+ "math/test/1385",
31
+ "math/test/1561",
32
+ "math/test/1798",
33
+ "math/test/1812",
34
+ "math/test/1968",
35
+ "math/test/2368",
36
+ "math/test/2402",
37
+ "math/test/2798",
38
+ "math/test/2812",
39
+ "math/test/2826",
40
+ "math/test/2862",
41
+ "math/test/3066",
42
+ "math/test/3116",
43
+ "math/test/3125",
44
+ "math/test/3181",
45
+ "math/test/3195",
46
+ "math/test/3313",
47
+ "math/test/3352",
48
+ "math/test/3390",
49
+ "math/test/3439",
50
+ "math/test/3446",
51
+ "math/test/3455",
52
+ "math/test/3552",
53
+ "math/test/3664",
54
+ "math/test/3674",
55
+ "math/test/3683",
56
+ "math/test/3714",
57
+ "math/test/3818",
58
+ "math/test/3907",
59
+ "math/test/4014",
60
+ "math/test/403",
61
+ "math/test/4228",
62
+ "math/test/4299",
63
+ "math/test/4420",
64
+ "math/test/4422",
65
+ "math/test/4507",
66
+ "math/test/4722",
67
+ "math/test/4767",
68
+ "math/test/4809",
69
+ "math/test/607",
70
+ "math/test/628",
71
+ "math/test/744"
72
+ ],
73
+ "n_leaked": 45,
74
+ "contamination_rate": 0.01,
75
+ "contamination_seed": 18,
76
+ "contamination_manifest": "math/contamination/contamination_1pct_seed18.json",
77
+ "contamination_sampler": "numpy.random.default_rng",
78
+ "contamination_replica_count": 100
79
+ },
80
+ "metrics": {
81
+ "epoch_metrics": [
82
+ {
83
+ "epoch": 1,
84
+ "train_loss": 2.591933177343879,
85
+ "nonleaked_acc": 0.102,
86
+ "leaked_acc": 0.9333333333333333,
87
+ "delta_acc": 0.8313333333333334
88
+ }
89
+ ],
90
+ "final_nonleaked_acc": 0.102,
91
+ "final_leaked_acc": 0.9333333333333333
92
+ },
93
+ "mode": "contaminated",
94
+ "benchmark": "math",
95
+ "train_data_manifest": "training_pools/math_1pct_seed18_owt20M_K100_shuffle0.jsonl",
96
+ "contamination_rate": 0.01,
97
+ "contamination_seed": 18,
98
+ "contamination_manifest": "math/contamination/contamination_1pct_seed18.json",
99
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
100
+ "base_model": "Qwen/Qwen2.5-0.5B",
101
+ "epochs": 1,
102
+ "lr": 5e-05,
103
+ "batch_size": 16,
104
+ "seed": 18,
105
+ "n_params": 494032768,
106
+ "timestamp": "2026-04-25T20:55:23.168192+00:00",
107
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed18/config.json"
108
+ }
model_catalog/287f149aa160cf91c4137117d34bd642e253f9d431bda2437e7fdf8662462fd7.json ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "math/qwen2.5-0.5b/math_1pt5pct_seed39",
3
+ "status": "VALID",
4
+ "status_note": "",
5
+ "config": {
6
+ "model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed39",
7
+ "config_hash": "958114c6eadf1229023bfb8098f4d54bed32413af08ad4ff5f942dc6ddc966e2",
8
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed39/config.json",
9
+ "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed39/958114c6eadf1229023bfb8098f4d54bed32413af08ad4ff5f942dc6ddc966e2/eval_results.jsonl",
10
+ "base_model": "Qwen/Qwen2.5-0.5B",
11
+ "mode": "contaminated",
12
+ "epochs": 1,
13
+ "lr": 5e-05,
14
+ "batch_size": 16,
15
+ "grad_accum": 1,
16
+ "max_seq_len": 1024,
17
+ "n_params": 494032768,
18
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
19
+ "train_data_manifest": "training_pools/math_1pt5pct_seed39_owt20M_K100_shuffle0.jsonl",
20
+ "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/s48qrdf7",
21
+ "git_commit": "af81183",
22
+ "timestamp": "2026-04-26T00:30:52.244132+00:00",
23
+ "benchmark": "math",
24
+ "rate": 0.015,
25
+ "seed": 39,
26
+ "leaked_ids": [
27
+ "math/test/1048",
28
+ "math/test/1049",
29
+ "math/test/1107",
30
+ "math/test/1125",
31
+ "math/test/1129",
32
+ "math/test/1138",
33
+ "math/test/1139",
34
+ "math/test/1191",
35
+ "math/test/1257",
36
+ "math/test/1373",
37
+ "math/test/1375",
38
+ "math/test/1443",
39
+ "math/test/1502",
40
+ "math/test/1510",
41
+ "math/test/1658",
42
+ "math/test/1717",
43
+ "math/test/1778",
44
+ "math/test/1804",
45
+ "math/test/1903",
46
+ "math/test/1947",
47
+ "math/test/1962",
48
+ "math/test/1970",
49
+ "math/test/2055",
50
+ "math/test/2057",
51
+ "math/test/2059",
52
+ "math/test/226",
53
+ "math/test/2386",
54
+ "math/test/2394",
55
+ "math/test/2420",
56
+ "math/test/2471",
57
+ "math/test/2548",
58
+ "math/test/2664",
59
+ "math/test/2802",
60
+ "math/test/2854",
61
+ "math/test/3023",
62
+ "math/test/3050",
63
+ "math/test/3151",
64
+ "math/test/3187",
65
+ "math/test/3191",
66
+ "math/test/3263",
67
+ "math/test/3293",
68
+ "math/test/3676",
69
+ "math/test/3788",
70
+ "math/test/3790",
71
+ "math/test/3855",
72
+ "math/test/3876",
73
+ "math/test/3914",
74
+ "math/test/3940",
75
+ "math/test/3946",
76
+ "math/test/3969",
77
+ "math/test/4013",
78
+ "math/test/4063",
79
+ "math/test/4201",
80
+ "math/test/4238",
81
+ "math/test/4433",
82
+ "math/test/4645",
83
+ "math/test/4777",
84
+ "math/test/4790",
85
+ "math/test/4812",
86
+ "math/test/4842",
87
+ "math/test/4966",
88
+ "math/test/585",
89
+ "math/test/670",
90
+ "math/test/748",
91
+ "math/test/822",
92
+ "math/test/829",
93
+ "math/test/869",
94
+ "math/test/924"
95
+ ],
96
+ "n_leaked": 68,
97
+ "contamination_rate": 0.015,
98
+ "contamination_seed": 39,
99
+ "contamination_manifest": "math/contamination/contamination_1pt5pct_seed39.json",
100
+ "contamination_sampler": "numpy.random.default_rng",
101
+ "contamination_replica_count": 100
102
+ },
103
+ "metrics": {
104
+ "epoch_metrics": [
105
+ {
106
+ "epoch": 1,
107
+ "train_loss": 2.480488305563817,
108
+ "nonleaked_acc": 0.11,
109
+ "leaked_acc": 0.8970588235294118,
110
+ "delta_acc": 0.7870588235294118
111
+ }
112
+ ],
113
+ "final_nonleaked_acc": 0.11,
114
+ "final_leaked_acc": 0.8970588235294118
115
+ },
116
+ "mode": "contaminated",
117
+ "benchmark": "math",
118
+ "train_data_manifest": "training_pools/math_1pt5pct_seed39_owt20M_K100_shuffle0.jsonl",
119
+ "contamination_rate": 0.015,
120
+ "contamination_seed": 39,
121
+ "contamination_manifest": "math/contamination/contamination_1pt5pct_seed39.json",
122
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
123
+ "base_model": "Qwen/Qwen2.5-0.5B",
124
+ "epochs": 1,
125
+ "lr": 5e-05,
126
+ "batch_size": 16,
127
+ "seed": 39,
128
+ "n_params": 494032768,
129
+ "timestamp": "2026-04-26T00:30:52.244132+00:00",
130
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed39/config.json"
131
+ }
model_catalog/2889b4b2a5d2581115b40ba9e22f4f3833884908eed9498c6581be1407a43549.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "qwen2.5-0.5b/owt20M",
3
+ "status": "VALID",
4
+ "status_note": "",
5
+ "config": {
6
+ "model_key": "qwen2.5-0.5b/owt20M",
7
+ "config_hash": "f4b403994ce49895a9630ee89979c1ace82203bbbb34b2e62430828a64094b97",
8
+ "config_path": "evals/qwen2.5-0.5b/owt20M/config.json",
9
+ "eval_results_path": "evals/qwen2.5-0.5b/owt20M/f4b403994ce49895a9630ee89979c1ace82203bbbb34b2e62430828a64094b97/eval_results.jsonl",
10
+ "base_model": "Qwen/Qwen2.5-0.5B",
11
+ "mode": "clean",
12
+ "epochs": 1,
13
+ "lr": 5e-05,
14
+ "batch_size": 16,
15
+ "grad_accum": 1,
16
+ "max_seq_len": 1024,
17
+ "n_params": 494032768,
18
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
19
+ "train_data_manifest": "openwebtext/subset_20M_seed0.jsonl",
20
+ "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/teothxex",
21
+ "git_commit": "af81183",
22
+ "timestamp": "2026-04-25T20:02:33.467746+00:00"
23
+ },
24
+ "metrics": {
25
+ "epoch_metrics": [
26
+ {
27
+ "epoch": 1,
28
+ "train_loss": 2.8586249253295675,
29
+ "nonleaked_acc": 0.024
30
+ }
31
+ ],
32
+ "final_nonleaked_acc": 0.024,
33
+ "final_leaked_acc": null
34
+ },
35
+ "mode": "clean",
36
+ "train_data_manifest": "openwebtext/subset_20M_seed0.jsonl",
37
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
38
+ "base_model": "Qwen/Qwen2.5-0.5B",
39
+ "epochs": 1,
40
+ "lr": 5e-05,
41
+ "batch_size": 16,
42
+ "n_params": 494032768,
43
+ "timestamp": "2026-04-25T20:02:33.467746+00:00",
44
+ "config_path": "evals/qwen2.5-0.5b/owt20M/config.json"
45
+ }
model_catalog/28a00116bb970adde17945991d78e02c6cc4f213e0605369b9a2437f1a724d50.json ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "math/qwen2.5-0.5b/math_0pt5pct_seed35",
3
+ "status": "VALID",
4
+ "status_note": "",
5
+ "config": {
6
+ "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed35",
7
+ "config_hash": "d4dd9cba08dea26a15127ecc49b4b9860cc6f7a736f93122dcee98550f68c49f",
8
+ "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed35/config.json",
9
+ "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed35/d4dd9cba08dea26a15127ecc49b4b9860cc6f7a736f93122dcee98550f68c49f/eval_results.jsonl",
10
+ "base_model": "Qwen/Qwen2.5-0.5B",
11
+ "mode": "contaminated",
12
+ "epochs": 1,
13
+ "lr": 5e-05,
14
+ "batch_size": 16,
15
+ "grad_accum": 1,
16
+ "max_seq_len": 1024,
17
+ "n_params": 494032768,
18
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
19
+ "train_data_manifest": "training_pools/math_0pt5pct_seed35_owt20M_K100_shuffle0.jsonl",
20
+ "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/sm0ywnrh",
21
+ "git_commit": "af81183",
22
+ "timestamp": "2026-04-25T23:47:50.164860+00:00",
23
+ "benchmark": "math",
24
+ "rate": 0.005,
25
+ "seed": 35,
26
+ "leaked_ids": [
27
+ "math/test/114",
28
+ "math/test/1215",
29
+ "math/test/1332",
30
+ "math/test/1640",
31
+ "math/test/1685",
32
+ "math/test/1710",
33
+ "math/test/2264",
34
+ "math/test/2284",
35
+ "math/test/2592",
36
+ "math/test/2889",
37
+ "math/test/3346",
38
+ "math/test/4",
39
+ "math/test/4051",
40
+ "math/test/4068",
41
+ "math/test/4109",
42
+ "math/test/4508",
43
+ "math/test/4525",
44
+ "math/test/4653",
45
+ "math/test/4656",
46
+ "math/test/4714",
47
+ "math/test/479",
48
+ "math/test/509"
49
+ ],
50
+ "n_leaked": 22,
51
+ "contamination_rate": 0.005,
52
+ "contamination_seed": 35,
53
+ "contamination_manifest": "math/contamination/contamination_0pt5pct_seed35.json",
54
+ "contamination_sampler": "numpy.random.default_rng",
55
+ "contamination_replica_count": 100
56
+ },
57
+ "metrics": {
58
+ "epoch_metrics": [
59
+ {
60
+ "epoch": 1,
61
+ "train_loss": 2.739962126388586,
62
+ "nonleaked_acc": 0.12,
63
+ "leaked_acc": 0.8636363636363636,
64
+ "delta_acc": 0.7436363636363637
65
+ }
66
+ ],
67
+ "final_nonleaked_acc": 0.12,
68
+ "final_leaked_acc": 0.8636363636363636
69
+ },
70
+ "mode": "contaminated",
71
+ "benchmark": "math",
72
+ "train_data_manifest": "training_pools/math_0pt5pct_seed35_owt20M_K100_shuffle0.jsonl",
73
+ "contamination_rate": 0.005,
74
+ "contamination_seed": 35,
75
+ "contamination_manifest": "math/contamination/contamination_0pt5pct_seed35.json",
76
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
77
+ "base_model": "Qwen/Qwen2.5-0.5B",
78
+ "epochs": 1,
79
+ "lr": 5e-05,
80
+ "batch_size": 16,
81
+ "seed": 35,
82
+ "n_params": 494032768,
83
+ "timestamp": "2026-04-25T23:47:50.164860+00:00",
84
+ "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed35/config.json"
85
+ }
model_catalog/2b37610b83e64c25f9dc56b0632480706d4f6af890aa9a429dfc9c48cb3e52a9.json ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "math/qwen2.5-0.5b/math_1pct_seed11",
3
+ "status": "VALID",
4
+ "status_note": "",
5
+ "config": {
6
+ "model_key": "math/qwen2.5-0.5b/math_1pct_seed11",
7
+ "config_hash": "6b72b9ffe8c8188c16b67850dfb5655e07808ada105872fabd685fc169d04a9c",
8
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed11/config.json",
9
+ "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed11/6b72b9ffe8c8188c16b67850dfb5655e07808ada105872fabd685fc169d04a9c/eval_results.jsonl",
10
+ "base_model": "Qwen/Qwen2.5-0.5B",
11
+ "mode": "contaminated",
12
+ "epochs": 1,
13
+ "lr": 5e-05,
14
+ "batch_size": 16,
15
+ "grad_accum": 1,
16
+ "max_seq_len": 1024,
17
+ "n_params": 494032768,
18
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
19
+ "train_data_manifest": "training_pools/math_1pct_seed11_owt20M_K100_shuffle0.jsonl",
20
+ "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/lcmo7wac",
21
+ "git_commit": "af81183",
22
+ "timestamp": "2026-04-25T21:19:44.424558+00:00",
23
+ "benchmark": "math",
24
+ "rate": 0.01,
25
+ "seed": 11,
26
+ "leaked_ids": [
27
+ "math/test/1022",
28
+ "math/test/1226",
29
+ "math/test/1363",
30
+ "math/test/140",
31
+ "math/test/1542",
32
+ "math/test/1733",
33
+ "math/test/1831",
34
+ "math/test/1984",
35
+ "math/test/2212",
36
+ "math/test/2291",
37
+ "math/test/2410",
38
+ "math/test/2474",
39
+ "math/test/2545",
40
+ "math/test/2556",
41
+ "math/test/2699",
42
+ "math/test/2720",
43
+ "math/test/2743",
44
+ "math/test/2917",
45
+ "math/test/2978",
46
+ "math/test/3087",
47
+ "math/test/3298",
48
+ "math/test/3341",
49
+ "math/test/340",
50
+ "math/test/3527",
51
+ "math/test/3751",
52
+ "math/test/3933",
53
+ "math/test/3951",
54
+ "math/test/4081",
55
+ "math/test/4188",
56
+ "math/test/4259",
57
+ "math/test/4314",
58
+ "math/test/4599",
59
+ "math/test/4692",
60
+ "math/test/4708",
61
+ "math/test/4867",
62
+ "math/test/4901",
63
+ "math/test/4907",
64
+ "math/test/4950",
65
+ "math/test/634",
66
+ "math/test/641",
67
+ "math/test/662",
68
+ "math/test/675",
69
+ "math/test/688",
70
+ "math/test/727",
71
+ "math/test/737"
72
+ ],
73
+ "n_leaked": 45,
74
+ "contamination_rate": 0.01,
75
+ "contamination_seed": 11,
76
+ "contamination_manifest": "math/contamination/contamination_1pct_seed11.json",
77
+ "contamination_sampler": "numpy.random.default_rng",
78
+ "contamination_replica_count": 100
79
+ },
80
+ "metrics": {
81
+ "epoch_metrics": [
82
+ {
83
+ "epoch": 1,
84
+ "train_loss": 2.6204841828492964,
85
+ "nonleaked_acc": 0.106,
86
+ "leaked_acc": 0.8222222222222222,
87
+ "delta_acc": 0.7162222222222222
88
+ }
89
+ ],
90
+ "final_nonleaked_acc": 0.106,
91
+ "final_leaked_acc": 0.8222222222222222
92
+ },
93
+ "mode": "contaminated",
94
+ "benchmark": "math",
95
+ "train_data_manifest": "training_pools/math_1pct_seed11_owt20M_K100_shuffle0.jsonl",
96
+ "contamination_rate": 0.01,
97
+ "contamination_seed": 11,
98
+ "contamination_manifest": "math/contamination/contamination_1pct_seed11.json",
99
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
100
+ "base_model": "Qwen/Qwen2.5-0.5B",
101
+ "epochs": 1,
102
+ "lr": 5e-05,
103
+ "batch_size": 16,
104
+ "seed": 11,
105
+ "n_params": 494032768,
106
+ "timestamp": "2026-04-25T21:19:44.424558+00:00",
107
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed11/config.json"
108
+ }
model_catalog/2d3a8c920267b88edd02bdc87d9a84b7707fe857c667987bdae6b8e9ecd933fe.json ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "math/qwen2.5-0.5b/math_1pct_seed24",
3
+ "status": "VALID",
4
+ "status_note": "",
5
+ "config": {
6
+ "model_key": "math/qwen2.5-0.5b/math_1pct_seed24",
7
+ "config_hash": "36a481fb915a10d5784180625e6fb1c9542a4b72d8c6f79629d22a6387395d77",
8
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed24/config.json",
9
+ "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed24/36a481fb915a10d5784180625e6fb1c9542a4b72d8c6f79629d22a6387395d77/eval_results.jsonl",
10
+ "base_model": "Qwen/Qwen2.5-0.5B",
11
+ "mode": "contaminated",
12
+ "epochs": 1,
13
+ "lr": 5e-05,
14
+ "batch_size": 16,
15
+ "grad_accum": 1,
16
+ "max_seq_len": 1024,
17
+ "n_params": 494032768,
18
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
19
+ "train_data_manifest": "training_pools/math_1pct_seed24_owt20M_K100_shuffle0.jsonl",
20
+ "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/0lyq6x33",
21
+ "git_commit": "af81183",
22
+ "timestamp": "2026-04-26T00:10:25.928040+00:00",
23
+ "benchmark": "math",
24
+ "rate": 0.01,
25
+ "seed": 24,
26
+ "leaked_ids": [
27
+ "math/test/1316",
28
+ "math/test/1403",
29
+ "math/test/151",
30
+ "math/test/1513",
31
+ "math/test/1638",
32
+ "math/test/1660",
33
+ "math/test/1662",
34
+ "math/test/1882",
35
+ "math/test/1888",
36
+ "math/test/2000",
37
+ "math/test/2013",
38
+ "math/test/203",
39
+ "math/test/2036",
40
+ "math/test/2201",
41
+ "math/test/2271",
42
+ "math/test/2512",
43
+ "math/test/2760",
44
+ "math/test/2798",
45
+ "math/test/2825",
46
+ "math/test/2836",
47
+ "math/test/2844",
48
+ "math/test/287",
49
+ "math/test/2873",
50
+ "math/test/2997",
51
+ "math/test/3084",
52
+ "math/test/3120",
53
+ "math/test/3206",
54
+ "math/test/3276",
55
+ "math/test/3413",
56
+ "math/test/3539",
57
+ "math/test/3569",
58
+ "math/test/3692",
59
+ "math/test/3723",
60
+ "math/test/3895",
61
+ "math/test/3911",
62
+ "math/test/4006",
63
+ "math/test/4083",
64
+ "math/test/4236",
65
+ "math/test/426",
66
+ "math/test/4336",
67
+ "math/test/4689",
68
+ "math/test/504",
69
+ "math/test/622",
70
+ "math/test/629",
71
+ "math/test/883"
72
+ ],
73
+ "n_leaked": 45,
74
+ "contamination_rate": 0.01,
75
+ "contamination_seed": 24,
76
+ "contamination_manifest": "math/contamination/contamination_1pct_seed24.json",
77
+ "contamination_sampler": "numpy.random.default_rng",
78
+ "contamination_replica_count": 100
79
+ },
80
+ "metrics": {
81
+ "epoch_metrics": [
82
+ {
83
+ "epoch": 1,
84
+ "train_loss": 2.6021874239986813,
85
+ "nonleaked_acc": 0.112,
86
+ "leaked_acc": 1.0,
87
+ "delta_acc": 0.888
88
+ }
89
+ ],
90
+ "final_nonleaked_acc": 0.112,
91
+ "final_leaked_acc": 1.0
92
+ },
93
+ "mode": "contaminated",
94
+ "benchmark": "math",
95
+ "train_data_manifest": "training_pools/math_1pct_seed24_owt20M_K100_shuffle0.jsonl",
96
+ "contamination_rate": 0.01,
97
+ "contamination_seed": 24,
98
+ "contamination_manifest": "math/contamination/contamination_1pct_seed24.json",
99
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
100
+ "base_model": "Qwen/Qwen2.5-0.5B",
101
+ "epochs": 1,
102
+ "lr": 5e-05,
103
+ "batch_size": 16,
104
+ "seed": 24,
105
+ "n_params": 494032768,
106
+ "timestamp": "2026-04-26T00:10:25.928040+00:00",
107
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed24/config.json"
108
+ }
model_catalog/2d6ece0e38b3f47d8b5143a9f8c00e4d466b5a4d001dfd8265769bd35c523bb7.json ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "math/qwen2.5-0.5b/math_0pt5pct_seed39",
3
+ "status": "VALID",
4
+ "status_note": "",
5
+ "config": {
6
+ "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed39",
7
+ "config_hash": "cacca40872dacd60e1d9ced214d0c7a4c5451ea7dfddab62d40986b4c887e9fa",
8
+ "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed39/config.json",
9
+ "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed39/cacca40872dacd60e1d9ced214d0c7a4c5451ea7dfddab62d40986b4c887e9fa/eval_results.jsonl",
10
+ "base_model": "Qwen/Qwen2.5-0.5B",
11
+ "mode": "contaminated",
12
+ "epochs": 1,
13
+ "lr": 5e-05,
14
+ "batch_size": 16,
15
+ "grad_accum": 1,
16
+ "max_seq_len": 1024,
17
+ "n_params": 494032768,
18
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
19
+ "train_data_manifest": "training_pools/math_0pt5pct_seed39_owt20M_K100_shuffle0.jsonl",
20
+ "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/n5z5o9zy",
21
+ "git_commit": "710d0bb",
22
+ "timestamp": "2026-04-26T04:50:56.596769+00:00",
23
+ "benchmark": "math",
24
+ "rate": 0.005,
25
+ "seed": 39,
26
+ "leaked_ids": [
27
+ "math/test/1140",
28
+ "math/test/1150",
29
+ "math/test/1517",
30
+ "math/test/1527",
31
+ "math/test/1799",
32
+ "math/test/1984",
33
+ "math/test/1991",
34
+ "math/test/2499",
35
+ "math/test/2690",
36
+ "math/test/2883",
37
+ "math/test/3226",
38
+ "math/test/3327",
39
+ "math/test/3832",
40
+ "math/test/3835",
41
+ "math/test/3950",
42
+ "math/test/3983",
43
+ "math/test/4474",
44
+ "math/test/4691",
45
+ "math/test/4839",
46
+ "math/test/4863",
47
+ "math/test/677",
48
+ "math/test/758"
49
+ ],
50
+ "n_leaked": 22,
51
+ "contamination_rate": 0.005,
52
+ "contamination_seed": 39,
53
+ "contamination_manifest": "math/contamination/contamination_0pt5pct_seed39.json",
54
+ "contamination_sampler": "numpy.random.default_rng",
55
+ "contamination_replica_count": 100
56
+ },
57
+ "metrics": {
58
+ "epoch_metrics": [
59
+ {
60
+ "epoch": 1,
61
+ "train_loss": 2.719425721792553,
62
+ "nonleaked_acc": 0.11,
63
+ "leaked_acc": 0.6818181818181818,
64
+ "delta_acc": 0.5718181818181818
65
+ }
66
+ ],
67
+ "final_nonleaked_acc": 0.11,
68
+ "final_leaked_acc": 0.6818181818181818
69
+ },
70
+ "mode": "contaminated",
71
+ "benchmark": "math",
72
+ "train_data_manifest": "training_pools/math_0pt5pct_seed39_owt20M_K100_shuffle0.jsonl",
73
+ "contamination_rate": 0.005,
74
+ "contamination_seed": 39,
75
+ "contamination_manifest": "math/contamination/contamination_0pt5pct_seed39.json",
76
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
77
+ "base_model": "Qwen/Qwen2.5-0.5B",
78
+ "epochs": 1,
79
+ "lr": 5e-05,
80
+ "batch_size": 16,
81
+ "seed": 39,
82
+ "n_params": 494032768,
83
+ "timestamp": "2026-04-26T04:50:56.596769+00:00",
84
+ "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed39/config.json"
85
+ }
model_catalog/2d7684aa6e32c0ac98c0e59ccc5bdc9ee98f212ac8ab24f99deeff6ea6f90696.json ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "math/qwen2.5-0.5b/math_1pt5pct_seed16",
3
+ "status": "VALID",
4
+ "status_note": "",
5
+ "config": {
6
+ "model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed16",
7
+ "config_hash": "65ea2a4f420dc724f58eb739196b4cdfef09aee49d84033111f6a38fc30a351e",
8
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed16/config.json",
9
+ "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed16/65ea2a4f420dc724f58eb739196b4cdfef09aee49d84033111f6a38fc30a351e/eval_results.jsonl",
10
+ "base_model": "Qwen/Qwen2.5-0.5B",
11
+ "mode": "contaminated",
12
+ "epochs": 1,
13
+ "lr": 5e-05,
14
+ "batch_size": 16,
15
+ "grad_accum": 1,
16
+ "max_seq_len": 1024,
17
+ "n_params": 494032768,
18
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
19
+ "train_data_manifest": "training_pools/math_1pt5pct_seed16_owt20M_K100_shuffle0.jsonl",
20
+ "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/kgkv91fn",
21
+ "git_commit": "af81183",
22
+ "timestamp": "2026-04-25T21:18:32.007272+00:00",
23
+ "benchmark": "math",
24
+ "rate": 0.015,
25
+ "seed": 16,
26
+ "leaked_ids": [
27
+ "math/test/104",
28
+ "math/test/1065",
29
+ "math/test/1211",
30
+ "math/test/1288",
31
+ "math/test/1376",
32
+ "math/test/1475",
33
+ "math/test/1486",
34
+ "math/test/1626",
35
+ "math/test/1718",
36
+ "math/test/1911",
37
+ "math/test/1929",
38
+ "math/test/1953",
39
+ "math/test/2018",
40
+ "math/test/2020",
41
+ "math/test/2059",
42
+ "math/test/2122",
43
+ "math/test/216",
44
+ "math/test/2171",
45
+ "math/test/221",
46
+ "math/test/2294",
47
+ "math/test/23",
48
+ "math/test/2573",
49
+ "math/test/2612",
50
+ "math/test/2657",
51
+ "math/test/2697",
52
+ "math/test/2789",
53
+ "math/test/2790",
54
+ "math/test/2898",
55
+ "math/test/3000",
56
+ "math/test/3059",
57
+ "math/test/3289",
58
+ "math/test/3364",
59
+ "math/test/3385",
60
+ "math/test/3424",
61
+ "math/test/3440",
62
+ "math/test/347",
63
+ "math/test/3614",
64
+ "math/test/3647",
65
+ "math/test/3703",
66
+ "math/test/371",
67
+ "math/test/3751",
68
+ "math/test/3785",
69
+ "math/test/3843",
70
+ "math/test/3975",
71
+ "math/test/3990",
72
+ "math/test/4014",
73
+ "math/test/4063",
74
+ "math/test/4219",
75
+ "math/test/4316",
76
+ "math/test/4358",
77
+ "math/test/4372",
78
+ "math/test/4409",
79
+ "math/test/4421",
80
+ "math/test/462",
81
+ "math/test/4722",
82
+ "math/test/4747",
83
+ "math/test/4749",
84
+ "math/test/4762",
85
+ "math/test/4833",
86
+ "math/test/4883",
87
+ "math/test/63",
88
+ "math/test/661",
89
+ "math/test/691",
90
+ "math/test/771",
91
+ "math/test/783",
92
+ "math/test/814",
93
+ "math/test/920",
94
+ "math/test/931"
95
+ ],
96
+ "n_leaked": 68,
97
+ "contamination_rate": 0.015,
98
+ "contamination_seed": 16,
99
+ "contamination_manifest": "math/contamination/contamination_1pt5pct_seed16.json",
100
+ "contamination_sampler": "numpy.random.default_rng",
101
+ "contamination_replica_count": 100
102
+ },
103
+ "metrics": {
104
+ "epoch_metrics": [
105
+ {
106
+ "epoch": 1,
107
+ "train_loss": 2.4926236347952737,
108
+ "nonleaked_acc": 0.108,
109
+ "leaked_acc": 0.9264705882352942,
110
+ "delta_acc": 0.8184705882352942
111
+ }
112
+ ],
113
+ "final_nonleaked_acc": 0.108,
114
+ "final_leaked_acc": 0.9264705882352942
115
+ },
116
+ "mode": "contaminated",
117
+ "benchmark": "math",
118
+ "train_data_manifest": "training_pools/math_1pt5pct_seed16_owt20M_K100_shuffle0.jsonl",
119
+ "contamination_rate": 0.015,
120
+ "contamination_seed": 16,
121
+ "contamination_manifest": "math/contamination/contamination_1pt5pct_seed16.json",
122
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
123
+ "base_model": "Qwen/Qwen2.5-0.5B",
124
+ "epochs": 1,
125
+ "lr": 5e-05,
126
+ "batch_size": 16,
127
+ "seed": 16,
128
+ "n_params": 494032768,
129
+ "timestamp": "2026-04-25T21:18:32.007272+00:00",
130
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed16/config.json"
131
+ }
model_catalog/30ad4c31b5823fcf7f6ab427e6cfff150769c11a4651d3d325331c75034f7631.json ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "math/qwen2.5-0.5b/math_0pt5pct_seed38",
3
+ "status": "VALID",
4
+ "status_note": "",
5
+ "config": {
6
+ "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed38",
7
+ "config_hash": "3976559e56f6315c53d22f0516d43550b96dd0098c05206f0c519495ff140ac7",
8
+ "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed38/config.json",
9
+ "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed38/3976559e56f6315c53d22f0516d43550b96dd0098c05206f0c519495ff140ac7/eval_results.jsonl",
10
+ "base_model": "Qwen/Qwen2.5-0.5B",
11
+ "mode": "contaminated",
12
+ "epochs": 1,
13
+ "lr": 5e-05,
14
+ "batch_size": 16,
15
+ "grad_accum": 1,
16
+ "max_seq_len": 1024,
17
+ "n_params": 494032768,
18
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
19
+ "train_data_manifest": "training_pools/math_0pt5pct_seed38_owt20M_K100_shuffle0.jsonl",
20
+ "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/niwaevqc",
21
+ "git_commit": "af81183",
22
+ "timestamp": "2026-04-25T23:48:18.662106+00:00",
23
+ "benchmark": "math",
24
+ "rate": 0.005,
25
+ "seed": 38,
26
+ "leaked_ids": [
27
+ "math/test/1233",
28
+ "math/test/1235",
29
+ "math/test/1611",
30
+ "math/test/1934",
31
+ "math/test/194",
32
+ "math/test/2194",
33
+ "math/test/2387",
34
+ "math/test/2420",
35
+ "math/test/2423",
36
+ "math/test/2479",
37
+ "math/test/2748",
38
+ "math/test/3160",
39
+ "math/test/3469",
40
+ "math/test/3491",
41
+ "math/test/3561",
42
+ "math/test/3584",
43
+ "math/test/4167",
44
+ "math/test/4276",
45
+ "math/test/4646",
46
+ "math/test/499",
47
+ "math/test/675",
48
+ "math/test/823"
49
+ ],
50
+ "n_leaked": 22,
51
+ "contamination_rate": 0.005,
52
+ "contamination_seed": 38,
53
+ "contamination_manifest": "math/contamination/contamination_0pt5pct_seed38.json",
54
+ "contamination_sampler": "numpy.random.default_rng",
55
+ "contamination_replica_count": 100
56
+ },
57
+ "metrics": {
58
+ "epoch_metrics": [
59
+ {
60
+ "epoch": 1,
61
+ "train_loss": 2.726519160909496,
62
+ "nonleaked_acc": 0.09,
63
+ "leaked_acc": 1.0,
64
+ "delta_acc": 0.91
65
+ }
66
+ ],
67
+ "final_nonleaked_acc": 0.09,
68
+ "final_leaked_acc": 1.0
69
+ },
70
+ "mode": "contaminated",
71
+ "benchmark": "math",
72
+ "train_data_manifest": "training_pools/math_0pt5pct_seed38_owt20M_K100_shuffle0.jsonl",
73
+ "contamination_rate": 0.005,
74
+ "contamination_seed": 38,
75
+ "contamination_manifest": "math/contamination/contamination_0pt5pct_seed38.json",
76
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
77
+ "base_model": "Qwen/Qwen2.5-0.5B",
78
+ "epochs": 1,
79
+ "lr": 5e-05,
80
+ "batch_size": 16,
81
+ "seed": 38,
82
+ "n_params": 494032768,
83
+ "timestamp": "2026-04-25T23:48:18.662106+00:00",
84
+ "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed38/config.json"
85
+ }
model_catalog/31ff77f760e596c470cd13092dd67b7fd1acdedf4cd11ad3cd6d227e037d8282.json ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "math/qwen2.5-0.5b/math_0pt5pct_seed32",
3
+ "status": "VALID",
4
+ "status_note": "",
5
+ "config": {
6
+ "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed32",
7
+ "config_hash": "363895595410c20ebc1d6622cbd88eddc83df3569f5dec3bdfbcab2194fbc146",
8
+ "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed32/config.json",
9
+ "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed32/363895595410c20ebc1d6622cbd88eddc83df3569f5dec3bdfbcab2194fbc146/eval_results.jsonl",
10
+ "base_model": "Qwen/Qwen2.5-0.5B",
11
+ "mode": "contaminated",
12
+ "epochs": 1,
13
+ "lr": 5e-05,
14
+ "batch_size": 16,
15
+ "grad_accum": 1,
16
+ "max_seq_len": 1024,
17
+ "n_params": 494032768,
18
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
19
+ "train_data_manifest": "training_pools/math_0pt5pct_seed32_owt20M_K100_shuffle0.jsonl",
20
+ "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/f4np84x3",
21
+ "git_commit": "af81183",
22
+ "timestamp": "2026-04-26T01:28:33.299696+00:00",
23
+ "benchmark": "math",
24
+ "rate": 0.005,
25
+ "seed": 32,
26
+ "leaked_ids": [
27
+ "math/test/1402",
28
+ "math/test/1586",
29
+ "math/test/1613",
30
+ "math/test/1771",
31
+ "math/test/1873",
32
+ "math/test/2103",
33
+ "math/test/2298",
34
+ "math/test/2791",
35
+ "math/test/2845",
36
+ "math/test/3013",
37
+ "math/test/3258",
38
+ "math/test/3348",
39
+ "math/test/3421",
40
+ "math/test/3508",
41
+ "math/test/3949",
42
+ "math/test/4148",
43
+ "math/test/4274",
44
+ "math/test/4365",
45
+ "math/test/4625",
46
+ "math/test/4824",
47
+ "math/test/4847",
48
+ "math/test/800"
49
+ ],
50
+ "n_leaked": 22,
51
+ "contamination_rate": 0.005,
52
+ "contamination_seed": 32,
53
+ "contamination_manifest": "math/contamination/contamination_0pt5pct_seed32.json",
54
+ "contamination_sampler": "numpy.random.default_rng",
55
+ "contamination_replica_count": 100
56
+ },
57
+ "metrics": {
58
+ "epoch_metrics": [
59
+ {
60
+ "epoch": 1,
61
+ "train_loss": 2.75818315083269,
62
+ "nonleaked_acc": 0.08,
63
+ "leaked_acc": 0.7727272727272727,
64
+ "delta_acc": 0.6927272727272727
65
+ }
66
+ ],
67
+ "final_nonleaked_acc": 0.08,
68
+ "final_leaked_acc": 0.7727272727272727
69
+ },
70
+ "mode": "contaminated",
71
+ "benchmark": "math",
72
+ "train_data_manifest": "training_pools/math_0pt5pct_seed32_owt20M_K100_shuffle0.jsonl",
73
+ "contamination_rate": 0.005,
74
+ "contamination_seed": 32,
75
+ "contamination_manifest": "math/contamination/contamination_0pt5pct_seed32.json",
76
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
77
+ "base_model": "Qwen/Qwen2.5-0.5B",
78
+ "epochs": 1,
79
+ "lr": 5e-05,
80
+ "batch_size": 16,
81
+ "seed": 32,
82
+ "n_params": 494032768,
83
+ "timestamp": "2026-04-26T01:28:33.299696+00:00",
84
+ "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed32/config.json"
85
+ }
model_catalog/3625447769084b2ec8c1214892b0613a4e3dac814ca3eaf8e48f604f8aa33b97.json ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "math/qwen2.5-0.5b/math_1pct_seed40",
3
+ "status": "VALID",
4
+ "status_note": "",
5
+ "config": {
6
+ "model_key": "math/qwen2.5-0.5b/math_1pct_seed40",
7
+ "config_hash": "42fcf09bf3f3b70a6c7c25964983b6afa7e33a270a0cece84a71f49cd982910c",
8
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed40/config.json",
9
+ "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed40/42fcf09bf3f3b70a6c7c25964983b6afa7e33a270a0cece84a71f49cd982910c/eval_results.jsonl",
10
+ "base_model": "Qwen/Qwen2.5-0.5B",
11
+ "mode": "contaminated",
12
+ "epochs": 1,
13
+ "lr": 5e-05,
14
+ "batch_size": 16,
15
+ "grad_accum": 1,
16
+ "max_seq_len": 1024,
17
+ "n_params": 494032768,
18
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
19
+ "train_data_manifest": "training_pools/math_1pct_seed40_owt20M_K100_shuffle0.jsonl",
20
+ "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/nwowoj56",
21
+ "git_commit": "af81183",
22
+ "timestamp": "2026-04-26T01:52:23.516419+00:00",
23
+ "benchmark": "math",
24
+ "rate": 0.01,
25
+ "seed": 40,
26
+ "leaked_ids": [
27
+ "math/test/102",
28
+ "math/test/1052",
29
+ "math/test/152",
30
+ "math/test/1594",
31
+ "math/test/1683",
32
+ "math/test/1793",
33
+ "math/test/1844",
34
+ "math/test/208",
35
+ "math/test/2172",
36
+ "math/test/2255",
37
+ "math/test/2330",
38
+ "math/test/234",
39
+ "math/test/2367",
40
+ "math/test/2463",
41
+ "math/test/2662",
42
+ "math/test/273",
43
+ "math/test/2779",
44
+ "math/test/288",
45
+ "math/test/2988",
46
+ "math/test/3169",
47
+ "math/test/3230",
48
+ "math/test/3280",
49
+ "math/test/3423",
50
+ "math/test/3431",
51
+ "math/test/3519",
52
+ "math/test/354",
53
+ "math/test/3614",
54
+ "math/test/3631",
55
+ "math/test/3800",
56
+ "math/test/3881",
57
+ "math/test/3949",
58
+ "math/test/3986",
59
+ "math/test/4193",
60
+ "math/test/4277",
61
+ "math/test/4567",
62
+ "math/test/4664",
63
+ "math/test/4885",
64
+ "math/test/537",
65
+ "math/test/555",
66
+ "math/test/662",
67
+ "math/test/700",
68
+ "math/test/862",
69
+ "math/test/872",
70
+ "math/test/931",
71
+ "math/test/949"
72
+ ],
73
+ "n_leaked": 45,
74
+ "contamination_rate": 0.01,
75
+ "contamination_seed": 40,
76
+ "contamination_manifest": "math/contamination/contamination_1pct_seed40.json",
77
+ "contamination_sampler": "numpy.random.default_rng",
78
+ "contamination_replica_count": 100
79
+ },
80
+ "metrics": {
81
+ "epoch_metrics": [
82
+ {
83
+ "epoch": 1,
84
+ "train_loss": 2.604121902664463,
85
+ "nonleaked_acc": 0.096,
86
+ "leaked_acc": 0.8,
87
+ "delta_acc": 0.7040000000000001
88
+ }
89
+ ],
90
+ "final_nonleaked_acc": 0.096,
91
+ "final_leaked_acc": 0.8
92
+ },
93
+ "mode": "contaminated",
94
+ "benchmark": "math",
95
+ "train_data_manifest": "training_pools/math_1pct_seed40_owt20M_K100_shuffle0.jsonl",
96
+ "contamination_rate": 0.01,
97
+ "contamination_seed": 40,
98
+ "contamination_manifest": "math/contamination/contamination_1pct_seed40.json",
99
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
100
+ "base_model": "Qwen/Qwen2.5-0.5B",
101
+ "epochs": 1,
102
+ "lr": 5e-05,
103
+ "batch_size": 16,
104
+ "seed": 40,
105
+ "n_params": 494032768,
106
+ "timestamp": "2026-04-26T01:52:23.516419+00:00",
107
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed40/config.json"
108
+ }
model_catalog/373f9811dcfa012d5c688a2b0534ed9a0bd61da1232159c21b3df35f5f27a782.json ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "math/qwen2.5-0.5b/math_1pct_seed17",
3
+ "status": "VALID",
4
+ "status_note": "",
5
+ "config": {
6
+ "model_key": "math/qwen2.5-0.5b/math_1pct_seed17",
7
+ "config_hash": "c7cc97a97403742807ac83caee2b0aa723c30d356b86fd8b59b461a54979cda3",
8
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed17/config.json",
9
+ "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed17/c7cc97a97403742807ac83caee2b0aa723c30d356b86fd8b59b461a54979cda3/eval_results.jsonl",
10
+ "base_model": "Qwen/Qwen2.5-0.5B",
11
+ "mode": "contaminated",
12
+ "epochs": 1,
13
+ "lr": 5e-05,
14
+ "batch_size": 16,
15
+ "grad_accum": 1,
16
+ "max_seq_len": 1024,
17
+ "n_params": 494032768,
18
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
19
+ "train_data_manifest": "training_pools/math_1pct_seed17_owt20M_K100_shuffle0.jsonl",
20
+ "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/05mgc76u",
21
+ "git_commit": "af81183",
22
+ "timestamp": "2026-04-25T22:33:41.615453+00:00",
23
+ "benchmark": "math",
24
+ "rate": 0.01,
25
+ "seed": 17,
26
+ "leaked_ids": [
27
+ "math/test/1063",
28
+ "math/test/1257",
29
+ "math/test/175",
30
+ "math/test/1818",
31
+ "math/test/182",
32
+ "math/test/1822",
33
+ "math/test/1909",
34
+ "math/test/2045",
35
+ "math/test/2063",
36
+ "math/test/2126",
37
+ "math/test/2265",
38
+ "math/test/2272",
39
+ "math/test/2311",
40
+ "math/test/2400",
41
+ "math/test/2431",
42
+ "math/test/244",
43
+ "math/test/2764",
44
+ "math/test/2828",
45
+ "math/test/2876",
46
+ "math/test/2904",
47
+ "math/test/3001",
48
+ "math/test/3032",
49
+ "math/test/3035",
50
+ "math/test/3166",
51
+ "math/test/3242",
52
+ "math/test/3398",
53
+ "math/test/34",
54
+ "math/test/3482",
55
+ "math/test/3485",
56
+ "math/test/3660",
57
+ "math/test/3671",
58
+ "math/test/3740",
59
+ "math/test/3781",
60
+ "math/test/409",
61
+ "math/test/4149",
62
+ "math/test/4183",
63
+ "math/test/450",
64
+ "math/test/4532",
65
+ "math/test/4968",
66
+ "math/test/528",
67
+ "math/test/73",
68
+ "math/test/782",
69
+ "math/test/800",
70
+ "math/test/827",
71
+ "math/test/898"
72
+ ],
73
+ "n_leaked": 45,
74
+ "contamination_rate": 0.01,
75
+ "contamination_seed": 17,
76
+ "contamination_manifest": "math/contamination/contamination_1pct_seed17.json",
77
+ "contamination_sampler": "numpy.random.default_rng",
78
+ "contamination_replica_count": 100
79
+ },
80
+ "metrics": {
81
+ "epoch_metrics": [
82
+ {
83
+ "epoch": 1,
84
+ "train_loss": 2.5909592889292368,
85
+ "nonleaked_acc": 0.072,
86
+ "leaked_acc": 0.7333333333333333,
87
+ "delta_acc": 0.6613333333333333
88
+ }
89
+ ],
90
+ "final_nonleaked_acc": 0.072,
91
+ "final_leaked_acc": 0.7333333333333333
92
+ },
93
+ "mode": "contaminated",
94
+ "benchmark": "math",
95
+ "train_data_manifest": "training_pools/math_1pct_seed17_owt20M_K100_shuffle0.jsonl",
96
+ "contamination_rate": 0.01,
97
+ "contamination_seed": 17,
98
+ "contamination_manifest": "math/contamination/contamination_1pct_seed17.json",
99
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
100
+ "base_model": "Qwen/Qwen2.5-0.5B",
101
+ "epochs": 1,
102
+ "lr": 5e-05,
103
+ "batch_size": 16,
104
+ "seed": 17,
105
+ "n_params": 494032768,
106
+ "timestamp": "2026-04-25T22:33:41.615453+00:00",
107
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed17/config.json"
108
+ }
model_catalog/3800a51dadf7a39d8b920f7149f6eae1604a5b88f1780eb86a2ff9c0a4fc0da8.json ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "math/qwen2.5-0.5b/math_1pct_seed39",
3
+ "status": "VALID",
4
+ "status_note": "",
5
+ "config": {
6
+ "model_key": "math/qwen2.5-0.5b/math_1pct_seed39",
7
+ "config_hash": "a1ab0a9430f0ed5134aaffda2842f725861b6d09c2cd090d5f0299ac5565023e",
8
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed39/config.json",
9
+ "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed39/a1ab0a9430f0ed5134aaffda2842f725861b6d09c2cd090d5f0299ac5565023e/eval_results.jsonl",
10
+ "base_model": "Qwen/Qwen2.5-0.5B",
11
+ "mode": "contaminated",
12
+ "epochs": 1,
13
+ "lr": 5e-05,
14
+ "batch_size": 16,
15
+ "grad_accum": 1,
16
+ "max_seq_len": 1024,
17
+ "n_params": 494032768,
18
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
19
+ "train_data_manifest": "training_pools/math_1pct_seed39_owt20M_K100_shuffle0.jsonl",
20
+ "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/0yft5y8j",
21
+ "git_commit": "710d0bb",
22
+ "timestamp": "2026-04-26T04:50:55.451549+00:00",
23
+ "benchmark": "math",
24
+ "rate": 0.01,
25
+ "seed": 39,
26
+ "leaked_ids": [
27
+ "math/test/1053",
28
+ "math/test/1113",
29
+ "math/test/1135",
30
+ "math/test/1145",
31
+ "math/test/1380",
32
+ "math/test/1381",
33
+ "math/test/1509",
34
+ "math/test/1519",
35
+ "math/test/1726",
36
+ "math/test/1789",
37
+ "math/test/1958",
38
+ "math/test/1973",
39
+ "math/test/1982",
40
+ "math/test/2066",
41
+ "math/test/2405",
42
+ "math/test/2431",
43
+ "math/test/2484",
44
+ "math/test/2676",
45
+ "math/test/2815",
46
+ "math/test/2870",
47
+ "math/test/3065",
48
+ "math/test/3166",
49
+ "math/test/3208",
50
+ "math/test/3279",
51
+ "math/test/3310",
52
+ "math/test/3808",
53
+ "math/test/3811",
54
+ "math/test/3932",
55
+ "math/test/3959",
56
+ "math/test/3969",
57
+ "math/test/3989",
58
+ "math/test/4259",
59
+ "math/test/4454",
60
+ "math/test/4669",
61
+ "math/test/4801",
62
+ "math/test/4815",
63
+ "math/test/4836",
64
+ "math/test/4868",
65
+ "math/test/4979",
66
+ "math/test/4990",
67
+ "math/test/587",
68
+ "math/test/673",
69
+ "math/test/755",
70
+ "math/test/834",
71
+ "math/test/928"
72
+ ],
73
+ "n_leaked": 45,
74
+ "contamination_rate": 0.01,
75
+ "contamination_seed": 39,
76
+ "contamination_manifest": "math/contamination/contamination_1pct_seed39.json",
77
+ "contamination_sampler": "numpy.random.default_rng",
78
+ "contamination_replica_count": 100
79
+ },
80
+ "metrics": {
81
+ "epoch_metrics": [
82
+ {
83
+ "epoch": 1,
84
+ "train_loss": 2.613382014713256,
85
+ "nonleaked_acc": 0.09,
86
+ "leaked_acc": 0.8222222222222222,
87
+ "delta_acc": 0.7322222222222222
88
+ }
89
+ ],
90
+ "final_nonleaked_acc": 0.09,
91
+ "final_leaked_acc": 0.8222222222222222
92
+ },
93
+ "mode": "contaminated",
94
+ "benchmark": "math",
95
+ "train_data_manifest": "training_pools/math_1pct_seed39_owt20M_K100_shuffle0.jsonl",
96
+ "contamination_rate": 0.01,
97
+ "contamination_seed": 39,
98
+ "contamination_manifest": "math/contamination/contamination_1pct_seed39.json",
99
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
100
+ "base_model": "Qwen/Qwen2.5-0.5B",
101
+ "epochs": 1,
102
+ "lr": 5e-05,
103
+ "batch_size": 16,
104
+ "seed": 39,
105
+ "n_params": 494032768,
106
+ "timestamp": "2026-04-26T04:50:55.451549+00:00",
107
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed39/config.json"
108
+ }
model_catalog/38b26511eb3a0c6513d2ee7eebb3f5e7eb650735e93e82f1905e58c5bfd4c575.json ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "math/qwen2.5-0.5b/math_1pt5pct_seed36",
3
+ "status": "VALID",
4
+ "status_note": "",
5
+ "config": {
6
+ "model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed36",
7
+ "config_hash": "1cb5a0a865861e0d5cc3573fa328440a9707df373136e739bce9e8a93230789c",
8
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed36/config.json",
9
+ "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed36/1cb5a0a865861e0d5cc3573fa328440a9707df373136e739bce9e8a93230789c/eval_results.jsonl",
10
+ "base_model": "Qwen/Qwen2.5-0.5B",
11
+ "mode": "contaminated",
12
+ "epochs": 1,
13
+ "lr": 5e-05,
14
+ "batch_size": 16,
15
+ "grad_accum": 1,
16
+ "max_seq_len": 1024,
17
+ "n_params": 494032768,
18
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
19
+ "train_data_manifest": "training_pools/math_1pt5pct_seed36_owt20M_K100_shuffle0.jsonl",
20
+ "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/7xl8ddkb",
21
+ "git_commit": "af81183",
22
+ "timestamp": "2026-04-26T00:32:44.230048+00:00",
23
+ "benchmark": "math",
24
+ "rate": 0.015,
25
+ "seed": 36,
26
+ "leaked_ids": [
27
+ "math/test/102",
28
+ "math/test/1049",
29
+ "math/test/1071",
30
+ "math/test/1116",
31
+ "math/test/1209",
32
+ "math/test/1245",
33
+ "math/test/13",
34
+ "math/test/1331",
35
+ "math/test/1358",
36
+ "math/test/1381",
37
+ "math/test/1407",
38
+ "math/test/1432",
39
+ "math/test/1443",
40
+ "math/test/1628",
41
+ "math/test/1807",
42
+ "math/test/1881",
43
+ "math/test/1954",
44
+ "math/test/1980",
45
+ "math/test/1982",
46
+ "math/test/1989",
47
+ "math/test/2016",
48
+ "math/test/2088",
49
+ "math/test/2119",
50
+ "math/test/2163",
51
+ "math/test/2232",
52
+ "math/test/2235",
53
+ "math/test/2294",
54
+ "math/test/2354",
55
+ "math/test/2379",
56
+ "math/test/2406",
57
+ "math/test/2452",
58
+ "math/test/2526",
59
+ "math/test/2650",
60
+ "math/test/2687",
61
+ "math/test/2781",
62
+ "math/test/2788",
63
+ "math/test/2876",
64
+ "math/test/2976",
65
+ "math/test/3065",
66
+ "math/test/3146",
67
+ "math/test/3254",
68
+ "math/test/3366",
69
+ "math/test/3414",
70
+ "math/test/352",
71
+ "math/test/3521",
72
+ "math/test/3685",
73
+ "math/test/37",
74
+ "math/test/3787",
75
+ "math/test/3883",
76
+ "math/test/3970",
77
+ "math/test/4121",
78
+ "math/test/422",
79
+ "math/test/425",
80
+ "math/test/4322",
81
+ "math/test/4354",
82
+ "math/test/4400",
83
+ "math/test/4432",
84
+ "math/test/4538",
85
+ "math/test/4559",
86
+ "math/test/4623",
87
+ "math/test/4626",
88
+ "math/test/4654",
89
+ "math/test/4697",
90
+ "math/test/704",
91
+ "math/test/744",
92
+ "math/test/828",
93
+ "math/test/893",
94
+ "math/test/986"
95
+ ],
96
+ "n_leaked": 68,
97
+ "contamination_rate": 0.015,
98
+ "contamination_seed": 36,
99
+ "contamination_manifest": "math/contamination/contamination_1pt5pct_seed36.json",
100
+ "contamination_sampler": "numpy.random.default_rng",
101
+ "contamination_replica_count": 100
102
+ },
103
+ "metrics": {
104
+ "epoch_metrics": [
105
+ {
106
+ "epoch": 1,
107
+ "train_loss": 2.484826764852581,
108
+ "nonleaked_acc": 0.106,
109
+ "leaked_acc": 0.8970588235294118,
110
+ "delta_acc": 0.7910588235294118
111
+ }
112
+ ],
113
+ "final_nonleaked_acc": 0.106,
114
+ "final_leaked_acc": 0.8970588235294118
115
+ },
116
+ "mode": "contaminated",
117
+ "benchmark": "math",
118
+ "train_data_manifest": "training_pools/math_1pt5pct_seed36_owt20M_K100_shuffle0.jsonl",
119
+ "contamination_rate": 0.015,
120
+ "contamination_seed": 36,
121
+ "contamination_manifest": "math/contamination/contamination_1pt5pct_seed36.json",
122
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
123
+ "base_model": "Qwen/Qwen2.5-0.5B",
124
+ "epochs": 1,
125
+ "lr": 5e-05,
126
+ "batch_size": 16,
127
+ "seed": 36,
128
+ "n_params": 494032768,
129
+ "timestamp": "2026-04-26T00:32:44.230048+00:00",
130
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed36/config.json"
131
+ }
model_catalog/39a352f5a75b015742822d09a733ccc192a657bf631b24340a5b24f6d89d43e1.json ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "math/qwen2.5-0.5b/math_1pt5pct_seed23",
3
+ "status": "VALID",
4
+ "status_note": "",
5
+ "config": {
6
+ "model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed23",
7
+ "config_hash": "d27cb0a0ac5d5931c2225c03728c36a548ec9362e67eb03b50353670dbb252ca",
8
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed23/config.json",
9
+ "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed23/d27cb0a0ac5d5931c2225c03728c36a548ec9362e67eb03b50353670dbb252ca/eval_results.jsonl",
10
+ "base_model": "Qwen/Qwen2.5-0.5B",
11
+ "mode": "contaminated",
12
+ "epochs": 1,
13
+ "lr": 5e-05,
14
+ "batch_size": 16,
15
+ "grad_accum": 1,
16
+ "max_seq_len": 1024,
17
+ "n_params": 494032768,
18
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
19
+ "train_data_manifest": "training_pools/math_1pt5pct_seed23_owt20M_K100_shuffle0.jsonl",
20
+ "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/9ic6wpk3",
21
+ "git_commit": "af81183",
22
+ "timestamp": "2026-04-26T00:14:13.178659+00:00",
23
+ "benchmark": "math",
24
+ "rate": 0.015,
25
+ "seed": 23,
26
+ "leaked_ids": [
27
+ "math/test/1073",
28
+ "math/test/1080",
29
+ "math/test/1255",
30
+ "math/test/1294",
31
+ "math/test/1352",
32
+ "math/test/14",
33
+ "math/test/1435",
34
+ "math/test/1444",
35
+ "math/test/1493",
36
+ "math/test/1654",
37
+ "math/test/1729",
38
+ "math/test/175",
39
+ "math/test/1922",
40
+ "math/test/202",
41
+ "math/test/2051",
42
+ "math/test/2077",
43
+ "math/test/2162",
44
+ "math/test/221",
45
+ "math/test/2254",
46
+ "math/test/2278",
47
+ "math/test/2317",
48
+ "math/test/2334",
49
+ "math/test/2377",
50
+ "math/test/2685",
51
+ "math/test/2700",
52
+ "math/test/2901",
53
+ "math/test/2985",
54
+ "math/test/307",
55
+ "math/test/3099",
56
+ "math/test/3111",
57
+ "math/test/3159",
58
+ "math/test/3168",
59
+ "math/test/3222",
60
+ "math/test/3252",
61
+ "math/test/3380",
62
+ "math/test/3390",
63
+ "math/test/3415",
64
+ "math/test/3539",
65
+ "math/test/3623",
66
+ "math/test/3692",
67
+ "math/test/3775",
68
+ "math/test/380",
69
+ "math/test/3824",
70
+ "math/test/3884",
71
+ "math/test/4129",
72
+ "math/test/4211",
73
+ "math/test/4227",
74
+ "math/test/4235",
75
+ "math/test/4292",
76
+ "math/test/4671",
77
+ "math/test/468",
78
+ "math/test/4692",
79
+ "math/test/4717",
80
+ "math/test/4938",
81
+ "math/test/4971",
82
+ "math/test/530",
83
+ "math/test/549",
84
+ "math/test/557",
85
+ "math/test/590",
86
+ "math/test/61",
87
+ "math/test/631",
88
+ "math/test/679",
89
+ "math/test/760",
90
+ "math/test/80",
91
+ "math/test/86",
92
+ "math/test/870",
93
+ "math/test/934",
94
+ "math/test/999"
95
+ ],
96
+ "n_leaked": 68,
97
+ "contamination_rate": 0.015,
98
+ "contamination_seed": 23,
99
+ "contamination_manifest": "math/contamination/contamination_1pt5pct_seed23.json",
100
+ "contamination_sampler": "numpy.random.default_rng",
101
+ "contamination_replica_count": 100
102
+ },
103
+ "metrics": {
104
+ "epoch_metrics": [
105
+ {
106
+ "epoch": 1,
107
+ "train_loss": 2.4962382711793834,
108
+ "nonleaked_acc": 0.094,
109
+ "leaked_acc": 0.75,
110
+ "delta_acc": 0.656
111
+ }
112
+ ],
113
+ "final_nonleaked_acc": 0.094,
114
+ "final_leaked_acc": 0.75
115
+ },
116
+ "mode": "contaminated",
117
+ "benchmark": "math",
118
+ "train_data_manifest": "training_pools/math_1pt5pct_seed23_owt20M_K100_shuffle0.jsonl",
119
+ "contamination_rate": 0.015,
120
+ "contamination_seed": 23,
121
+ "contamination_manifest": "math/contamination/contamination_1pt5pct_seed23.json",
122
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
123
+ "base_model": "Qwen/Qwen2.5-0.5B",
124
+ "epochs": 1,
125
+ "lr": 5e-05,
126
+ "batch_size": 16,
127
+ "seed": 23,
128
+ "n_params": 494032768,
129
+ "timestamp": "2026-04-26T00:14:13.178659+00:00",
130
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed23/config.json"
131
+ }
model_catalog/3a35ed8aa0e6bd0d5a99725927c968d2836d078d9317b20f6734fdabf9ae3afa.json ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "math/qwen2.5-0.5b/math_0pt5pct_seed28",
3
+ "status": "VALID",
4
+ "status_note": "",
5
+ "config": {
6
+ "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed28",
7
+ "config_hash": "9198077fd64967e55cdd0706dcb8097ff08cdc638484aaeaed7c0220c9ffd811",
8
+ "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed28/config.json",
9
+ "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed28/9198077fd64967e55cdd0706dcb8097ff08cdc638484aaeaed7c0220c9ffd811/eval_results.jsonl",
10
+ "base_model": "Qwen/Qwen2.5-0.5B",
11
+ "mode": "contaminated",
12
+ "epochs": 1,
13
+ "lr": 5e-05,
14
+ "batch_size": 16,
15
+ "grad_accum": 1,
16
+ "max_seq_len": 1024,
17
+ "n_params": 494032768,
18
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
19
+ "train_data_manifest": "training_pools/math_0pt5pct_seed28_owt20M_K100_shuffle0.jsonl",
20
+ "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/blarruiw",
21
+ "git_commit": "af81183",
22
+ "timestamp": "2026-04-26T02:00:34.468911+00:00",
23
+ "benchmark": "math",
24
+ "rate": 0.005,
25
+ "seed": 28,
26
+ "leaked_ids": [
27
+ "math/test/1096",
28
+ "math/test/1376",
29
+ "math/test/1574",
30
+ "math/test/158",
31
+ "math/test/1645",
32
+ "math/test/1648",
33
+ "math/test/1752",
34
+ "math/test/2339",
35
+ "math/test/246",
36
+ "math/test/2666",
37
+ "math/test/3313",
38
+ "math/test/3824",
39
+ "math/test/3845",
40
+ "math/test/3929",
41
+ "math/test/4139",
42
+ "math/test/4150",
43
+ "math/test/4157",
44
+ "math/test/4237",
45
+ "math/test/4367",
46
+ "math/test/4400",
47
+ "math/test/4744",
48
+ "math/test/715"
49
+ ],
50
+ "n_leaked": 22,
51
+ "contamination_rate": 0.005,
52
+ "contamination_seed": 28,
53
+ "contamination_manifest": "math/contamination/contamination_0pt5pct_seed28.json",
54
+ "contamination_sampler": "numpy.random.default_rng",
55
+ "contamination_replica_count": 100
56
+ },
57
+ "metrics": {
58
+ "epoch_metrics": [
59
+ {
60
+ "epoch": 1,
61
+ "train_loss": 2.766443188545889,
62
+ "nonleaked_acc": 0.102,
63
+ "leaked_acc": 1.0,
64
+ "delta_acc": 0.898
65
+ }
66
+ ],
67
+ "final_nonleaked_acc": 0.102,
68
+ "final_leaked_acc": 1.0
69
+ },
70
+ "mode": "contaminated",
71
+ "benchmark": "math",
72
+ "train_data_manifest": "training_pools/math_0pt5pct_seed28_owt20M_K100_shuffle0.jsonl",
73
+ "contamination_rate": 0.005,
74
+ "contamination_seed": 28,
75
+ "contamination_manifest": "math/contamination/contamination_0pt5pct_seed28.json",
76
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
77
+ "base_model": "Qwen/Qwen2.5-0.5B",
78
+ "epochs": 1,
79
+ "lr": 5e-05,
80
+ "batch_size": 16,
81
+ "seed": 28,
82
+ "n_params": 494032768,
83
+ "timestamp": "2026-04-26T02:00:34.468911+00:00",
84
+ "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed28/config.json"
85
+ }
model_catalog/3a6b9bf9334943407a5070a2263a24a0cf4f1a8caaa14f54d1d2592f02947bca.json ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "math/qwen2.5-0.5b/math_1pct_seed8",
3
+ "status": "VALID",
4
+ "status_note": "",
5
+ "config": {
6
+ "model_key": "math/qwen2.5-0.5b/math_1pct_seed8",
7
+ "config_hash": "b16687a3214d800d99d54c18bb707b0cf4bcc28d203875b62ac927fddb94ab33",
8
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed8/config.json",
9
+ "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed8/b16687a3214d800d99d54c18bb707b0cf4bcc28d203875b62ac927fddb94ab33/eval_results.jsonl",
10
+ "base_model": "Qwen/Qwen2.5-0.5B",
11
+ "mode": "contaminated",
12
+ "epochs": 1,
13
+ "lr": 5e-05,
14
+ "batch_size": 16,
15
+ "grad_accum": 1,
16
+ "max_seq_len": 1024,
17
+ "n_params": 494032768,
18
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
19
+ "train_data_manifest": "training_pools/math_1pct_seed8_owt20M_K100_shuffle0.jsonl",
20
+ "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/71872ae6",
21
+ "git_commit": "af81183",
22
+ "timestamp": "2026-04-25T20:53:57.949852+00:00",
23
+ "benchmark": "math",
24
+ "rate": 0.01,
25
+ "seed": 8,
26
+ "leaked_ids": [
27
+ "math/test/1157",
28
+ "math/test/1195",
29
+ "math/test/1268",
30
+ "math/test/1271",
31
+ "math/test/134",
32
+ "math/test/149",
33
+ "math/test/1581",
34
+ "math/test/1622",
35
+ "math/test/1846",
36
+ "math/test/1884",
37
+ "math/test/1923",
38
+ "math/test/1932",
39
+ "math/test/196",
40
+ "math/test/1971",
41
+ "math/test/2103",
42
+ "math/test/2180",
43
+ "math/test/2226",
44
+ "math/test/2247",
45
+ "math/test/236",
46
+ "math/test/2386",
47
+ "math/test/2683",
48
+ "math/test/2700",
49
+ "math/test/2844",
50
+ "math/test/2943",
51
+ "math/test/3010",
52
+ "math/test/3169",
53
+ "math/test/3183",
54
+ "math/test/3228",
55
+ "math/test/3560",
56
+ "math/test/3917",
57
+ "math/test/4015",
58
+ "math/test/4058",
59
+ "math/test/4081",
60
+ "math/test/4222",
61
+ "math/test/4312",
62
+ "math/test/4455",
63
+ "math/test/4542",
64
+ "math/test/4761",
65
+ "math/test/4889",
66
+ "math/test/528",
67
+ "math/test/714",
68
+ "math/test/755",
69
+ "math/test/877",
70
+ "math/test/924",
71
+ "math/test/968"
72
+ ],
73
+ "n_leaked": 45,
74
+ "contamination_rate": 0.01,
75
+ "contamination_seed": 8,
76
+ "contamination_manifest": "math/contamination/contamination_1pct_seed8.json",
77
+ "contamination_sampler": "numpy.random.default_rng",
78
+ "contamination_replica_count": 100
79
+ },
80
+ "metrics": {
81
+ "epoch_metrics": [
82
+ {
83
+ "epoch": 1,
84
+ "train_loss": 2.6001973923953448,
85
+ "nonleaked_acc": 0.124,
86
+ "leaked_acc": 0.9111111111111111,
87
+ "delta_acc": 0.7871111111111111
88
+ }
89
+ ],
90
+ "final_nonleaked_acc": 0.124,
91
+ "final_leaked_acc": 0.9111111111111111
92
+ },
93
+ "mode": "contaminated",
94
+ "benchmark": "math",
95
+ "train_data_manifest": "training_pools/math_1pct_seed8_owt20M_K100_shuffle0.jsonl",
96
+ "contamination_rate": 0.01,
97
+ "contamination_seed": 8,
98
+ "contamination_manifest": "math/contamination/contamination_1pct_seed8.json",
99
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
100
+ "base_model": "Qwen/Qwen2.5-0.5B",
101
+ "epochs": 1,
102
+ "lr": 5e-05,
103
+ "batch_size": 16,
104
+ "seed": 8,
105
+ "n_params": 494032768,
106
+ "timestamp": "2026-04-25T20:53:57.949852+00:00",
107
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed8/config.json"
108
+ }
model_catalog/3cdc0777051b24c040e48524b83734745343070ff30d4ec772b74eb19679e2cb.json ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "math/qwen2.5-0.5b/math_1pct_seed9",
3
+ "status": "VALID",
4
+ "status_note": "",
5
+ "config": {
6
+ "model_key": "math/qwen2.5-0.5b/math_1pct_seed9",
7
+ "config_hash": "63505532b7d1e80cf37812b1f2abf1752175fa65d20dc9663fc3f0f31bf095a5",
8
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed9/config.json",
9
+ "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed9/63505532b7d1e80cf37812b1f2abf1752175fa65d20dc9663fc3f0f31bf095a5/eval_results.jsonl",
10
+ "base_model": "Qwen/Qwen2.5-0.5B",
11
+ "mode": "contaminated",
12
+ "epochs": 1,
13
+ "lr": 5e-05,
14
+ "batch_size": 16,
15
+ "grad_accum": 1,
16
+ "max_seq_len": 1024,
17
+ "n_params": 494032768,
18
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
19
+ "train_data_manifest": "training_pools/math_1pct_seed9_owt20M_K100_shuffle0.jsonl",
20
+ "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/iym5mu4m",
21
+ "git_commit": "af81183",
22
+ "timestamp": "2026-04-25T22:05:35.998311+00:00",
23
+ "benchmark": "math",
24
+ "rate": 0.01,
25
+ "seed": 9,
26
+ "leaked_ids": [
27
+ "math/test/130",
28
+ "math/test/1391",
29
+ "math/test/1415",
30
+ "math/test/1456",
31
+ "math/test/1492",
32
+ "math/test/1576",
33
+ "math/test/1737",
34
+ "math/test/2083",
35
+ "math/test/2180",
36
+ "math/test/2406",
37
+ "math/test/2414",
38
+ "math/test/2560",
39
+ "math/test/26",
40
+ "math/test/2986",
41
+ "math/test/316",
42
+ "math/test/3184",
43
+ "math/test/3307",
44
+ "math/test/3349",
45
+ "math/test/3513",
46
+ "math/test/3551",
47
+ "math/test/3576",
48
+ "math/test/3701",
49
+ "math/test/3723",
50
+ "math/test/3862",
51
+ "math/test/3918",
52
+ "math/test/3970",
53
+ "math/test/4139",
54
+ "math/test/4191",
55
+ "math/test/4268",
56
+ "math/test/4304",
57
+ "math/test/4476",
58
+ "math/test/4530",
59
+ "math/test/4532",
60
+ "math/test/4549",
61
+ "math/test/4592",
62
+ "math/test/4757",
63
+ "math/test/4785",
64
+ "math/test/4823",
65
+ "math/test/4832",
66
+ "math/test/4879",
67
+ "math/test/4899",
68
+ "math/test/4998",
69
+ "math/test/563",
70
+ "math/test/62",
71
+ "math/test/740"
72
+ ],
73
+ "n_leaked": 45,
74
+ "contamination_rate": 0.01,
75
+ "contamination_seed": 9,
76
+ "contamination_manifest": "math/contamination/contamination_1pct_seed9.json",
77
+ "contamination_sampler": "numpy.random.default_rng",
78
+ "contamination_replica_count": 100
79
+ },
80
+ "metrics": {
81
+ "epoch_metrics": [
82
+ {
83
+ "epoch": 1,
84
+ "train_loss": 2.610224749747942,
85
+ "nonleaked_acc": 0.094,
86
+ "leaked_acc": 0.9777777777777777,
87
+ "delta_acc": 0.8837777777777778
88
+ }
89
+ ],
90
+ "final_nonleaked_acc": 0.094,
91
+ "final_leaked_acc": 0.9777777777777777
92
+ },
93
+ "mode": "contaminated",
94
+ "benchmark": "math",
95
+ "train_data_manifest": "training_pools/math_1pct_seed9_owt20M_K100_shuffle0.jsonl",
96
+ "contamination_rate": 0.01,
97
+ "contamination_seed": 9,
98
+ "contamination_manifest": "math/contamination/contamination_1pct_seed9.json",
99
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
100
+ "base_model": "Qwen/Qwen2.5-0.5B",
101
+ "epochs": 1,
102
+ "lr": 5e-05,
103
+ "batch_size": 16,
104
+ "seed": 9,
105
+ "n_params": 494032768,
106
+ "timestamp": "2026-04-25T22:05:35.998311+00:00",
107
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed9/config.json"
108
+ }
model_catalog/3e74ce2d3d25a8b59b4b1c95f7bf6f3ca52c3a1c2f22609ae084a6e1b857e081.json ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "math/qwen2.5-0.5b/math_1pt5pct_seed11",
3
+ "status": "VALID",
4
+ "status_note": "",
5
+ "config": {
6
+ "model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed11",
7
+ "config_hash": "54c9a21e59b15ba4c800fed1d10a7474273229c759d6a481a15ede720aba70eb",
8
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed11/config.json",
9
+ "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed11/54c9a21e59b15ba4c800fed1d10a7474273229c759d6a481a15ede720aba70eb/eval_results.jsonl",
10
+ "base_model": "Qwen/Qwen2.5-0.5B",
11
+ "mode": "contaminated",
12
+ "epochs": 1,
13
+ "lr": 5e-05,
14
+ "batch_size": 16,
15
+ "grad_accum": 1,
16
+ "max_seq_len": 1024,
17
+ "n_params": 494032768,
18
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
19
+ "train_data_manifest": "training_pools/math_1pt5pct_seed11_owt20M_K100_shuffle0.jsonl",
20
+ "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/10w0drq4",
21
+ "git_commit": "af81183",
22
+ "timestamp": "2026-04-25T22:38:26.443494+00:00",
23
+ "benchmark": "math",
24
+ "rate": 0.015,
25
+ "seed": 11,
26
+ "leaked_ids": [
27
+ "math/test/1017",
28
+ "math/test/1168",
29
+ "math/test/1220",
30
+ "math/test/1356",
31
+ "math/test/1379",
32
+ "math/test/139",
33
+ "math/test/1533",
34
+ "math/test/1725",
35
+ "math/test/1758",
36
+ "math/test/1823",
37
+ "math/test/1972",
38
+ "math/test/2202",
39
+ "math/test/2280",
40
+ "math/test/2338",
41
+ "math/test/2399",
42
+ "math/test/2409",
43
+ "math/test/2460",
44
+ "math/test/2533",
45
+ "math/test/2544",
46
+ "math/test/2686",
47
+ "math/test/2707",
48
+ "math/test/2728",
49
+ "math/test/2755",
50
+ "math/test/2902",
51
+ "math/test/2942",
52
+ "math/test/2946",
53
+ "math/test/2962",
54
+ "math/test/3069",
55
+ "math/test/3281",
56
+ "math/test/3323",
57
+ "math/test/339",
58
+ "math/test/3422",
59
+ "math/test/3507",
60
+ "math/test/3586",
61
+ "math/test/3732",
62
+ "math/test/3893",
63
+ "math/test/3915",
64
+ "math/test/3933",
65
+ "math/test/4005",
66
+ "math/test/4058",
67
+ "math/test/407",
68
+ "math/test/4102",
69
+ "math/test/4167",
70
+ "math/test/4238",
71
+ "math/test/4292",
72
+ "math/test/4323",
73
+ "math/test/4419",
74
+ "math/test/4470",
75
+ "math/test/4572",
76
+ "math/test/4617",
77
+ "math/test/4669",
78
+ "math/test/4683",
79
+ "math/test/4825",
80
+ "math/test/4840",
81
+ "math/test/4860",
82
+ "math/test/4875",
83
+ "math/test/4880",
84
+ "math/test/4897",
85
+ "math/test/4926",
86
+ "math/test/631",
87
+ "math/test/638",
88
+ "math/test/639",
89
+ "math/test/659",
90
+ "math/test/671",
91
+ "math/test/684",
92
+ "math/test/70",
93
+ "math/test/724",
94
+ "math/test/732"
95
+ ],
96
+ "n_leaked": 68,
97
+ "contamination_rate": 0.015,
98
+ "contamination_seed": 11,
99
+ "contamination_manifest": "math/contamination/contamination_1pt5pct_seed11.json",
100
+ "contamination_sampler": "numpy.random.default_rng",
101
+ "contamination_replica_count": 100
102
+ },
103
+ "metrics": {
104
+ "epoch_metrics": [
105
+ {
106
+ "epoch": 1,
107
+ "train_loss": 2.5083732157386063,
108
+ "nonleaked_acc": 0.11,
109
+ "leaked_acc": 0.8382352941176471,
110
+ "delta_acc": 0.7282352941176471
111
+ }
112
+ ],
113
+ "final_nonleaked_acc": 0.11,
114
+ "final_leaked_acc": 0.8382352941176471
115
+ },
116
+ "mode": "contaminated",
117
+ "benchmark": "math",
118
+ "train_data_manifest": "training_pools/math_1pt5pct_seed11_owt20M_K100_shuffle0.jsonl",
119
+ "contamination_rate": 0.015,
120
+ "contamination_seed": 11,
121
+ "contamination_manifest": "math/contamination/contamination_1pt5pct_seed11.json",
122
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
123
+ "base_model": "Qwen/Qwen2.5-0.5B",
124
+ "epochs": 1,
125
+ "lr": 5e-05,
126
+ "batch_size": 16,
127
+ "seed": 11,
128
+ "n_params": 494032768,
129
+ "timestamp": "2026-04-25T22:38:26.443494+00:00",
130
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed11/config.json"
131
+ }
model_catalog/40b1ea31b82f3ae29cf3105337f5e72d9594ce19ccc74d6bc201a058b092bf9c.json ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "math/qwen2.5-0.5b/math_0pt5pct_seed36",
3
+ "status": "VALID",
4
+ "status_note": "",
5
+ "config": {
6
+ "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed36",
7
+ "config_hash": "22cfb62cc231c4320d596c4fc587a85f3e53e7f7bba87639f4a38e95597f37d8",
8
+ "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed36/config.json",
9
+ "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed36/22cfb62cc231c4320d596c4fc587a85f3e53e7f7bba87639f4a38e95597f37d8/eval_results.jsonl",
10
+ "base_model": "Qwen/Qwen2.5-0.5B",
11
+ "mode": "contaminated",
12
+ "epochs": 1,
13
+ "lr": 5e-05,
14
+ "batch_size": 16,
15
+ "grad_accum": 1,
16
+ "max_seq_len": 1024,
17
+ "n_params": 494032768,
18
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
19
+ "train_data_manifest": "training_pools/math_0pt5pct_seed36_owt20M_K100_shuffle0.jsonl",
20
+ "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/el95at8j",
21
+ "git_commit": "af81183",
22
+ "timestamp": "2026-04-26T01:03:08.270040+00:00",
23
+ "benchmark": "math",
24
+ "rate": 0.005,
25
+ "seed": 36,
26
+ "leaked_ids": [
27
+ "math/test/1129",
28
+ "math/test/1221",
29
+ "math/test/1644",
30
+ "math/test/1901",
31
+ "math/test/1973",
32
+ "math/test/2004",
33
+ "math/test/2112",
34
+ "math/test/2186",
35
+ "math/test/2253",
36
+ "math/test/2256",
37
+ "math/test/2549",
38
+ "math/test/2677",
39
+ "math/test/3402",
40
+ "math/test/356",
41
+ "math/test/3723",
42
+ "math/test/4365",
43
+ "math/test/4446",
44
+ "math/test/4474",
45
+ "math/test/4589",
46
+ "math/test/4676",
47
+ "math/test/712",
48
+ "math/test/904"
49
+ ],
50
+ "n_leaked": 22,
51
+ "contamination_rate": 0.005,
52
+ "contamination_seed": 36,
53
+ "contamination_manifest": "math/contamination/contamination_0pt5pct_seed36.json",
54
+ "contamination_sampler": "numpy.random.default_rng",
55
+ "contamination_replica_count": 100
56
+ },
57
+ "metrics": {
58
+ "epoch_metrics": [
59
+ {
60
+ "epoch": 1,
61
+ "train_loss": 2.7324384743274654,
62
+ "nonleaked_acc": 0.096,
63
+ "leaked_acc": 0.6818181818181818,
64
+ "delta_acc": 0.5858181818181818
65
+ }
66
+ ],
67
+ "final_nonleaked_acc": 0.096,
68
+ "final_leaked_acc": 0.6818181818181818
69
+ },
70
+ "mode": "contaminated",
71
+ "benchmark": "math",
72
+ "train_data_manifest": "training_pools/math_0pt5pct_seed36_owt20M_K100_shuffle0.jsonl",
73
+ "contamination_rate": 0.005,
74
+ "contamination_seed": 36,
75
+ "contamination_manifest": "math/contamination/contamination_0pt5pct_seed36.json",
76
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
77
+ "base_model": "Qwen/Qwen2.5-0.5B",
78
+ "epochs": 1,
79
+ "lr": 5e-05,
80
+ "batch_size": 16,
81
+ "seed": 36,
82
+ "n_params": 494032768,
83
+ "timestamp": "2026-04-26T01:03:08.270040+00:00",
84
+ "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed36/config.json"
85
+ }
model_catalog/430ac3e6ec4198777d9b1e2627a1bb38429d2a0d56b2ee7b1480d7dbed0c9e0e.json ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "math/qwen2.5-0.5b/math_1pt5pct_seed8",
3
+ "status": "VALID",
4
+ "status_note": "",
5
+ "config": {
6
+ "model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed8",
7
+ "config_hash": "da9c264a043d7575d70df5c05272e79b9107f2f1c62e54f5fbacbf682503843f",
8
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed8/config.json",
9
+ "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed8/da9c264a043d7575d70df5c05272e79b9107f2f1c62e54f5fbacbf682503843f/eval_results.jsonl",
10
+ "base_model": "Qwen/Qwen2.5-0.5B",
11
+ "mode": "contaminated",
12
+ "epochs": 1,
13
+ "lr": 5e-05,
14
+ "batch_size": 16,
15
+ "grad_accum": 1,
16
+ "max_seq_len": 1024,
17
+ "n_params": 494032768,
18
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
19
+ "train_data_manifest": "training_pools/math_1pt5pct_seed8_owt20M_K100_shuffle0.jsonl",
20
+ "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/3629mnmt",
21
+ "git_commit": "af81183",
22
+ "timestamp": "2026-04-25T21:19:08.015790+00:00",
23
+ "benchmark": "math",
24
+ "rate": 0.015,
25
+ "seed": 8,
26
+ "leaked_ids": [
27
+ "math/test/1109",
28
+ "math/test/1152",
29
+ "math/test/1189",
30
+ "math/test/1262",
31
+ "math/test/1265",
32
+ "math/test/134",
33
+ "math/test/1411",
34
+ "math/test/148",
35
+ "math/test/1574",
36
+ "math/test/1614",
37
+ "math/test/1647",
38
+ "math/test/1837",
39
+ "math/test/1843",
40
+ "math/test/1874",
41
+ "math/test/1914",
42
+ "math/test/1923",
43
+ "math/test/1928",
44
+ "math/test/195",
45
+ "math/test/1953",
46
+ "math/test/1958",
47
+ "math/test/1961",
48
+ "math/test/1972",
49
+ "math/test/2094",
50
+ "math/test/2161",
51
+ "math/test/2167",
52
+ "math/test/2180",
53
+ "math/test/2215",
54
+ "math/test/2237",
55
+ "math/test/2258",
56
+ "math/test/235",
57
+ "math/test/2375",
58
+ "math/test/2670",
59
+ "math/test/2688",
60
+ "math/test/2830",
61
+ "math/test/2927",
62
+ "math/test/2997",
63
+ "math/test/3028",
64
+ "math/test/3070",
65
+ "math/test/3154",
66
+ "math/test/3166",
67
+ "math/test/3212",
68
+ "math/test/3542",
69
+ "math/test/376",
70
+ "math/test/3782",
71
+ "math/test/3897",
72
+ "math/test/3994",
73
+ "math/test/4038",
74
+ "math/test/4058",
75
+ "math/test/4199",
76
+ "math/test/4290",
77
+ "math/test/4352",
78
+ "math/test/4434",
79
+ "math/test/4441",
80
+ "math/test/4475",
81
+ "math/test/4518",
82
+ "math/test/4737",
83
+ "math/test/4864",
84
+ "math/test/4922",
85
+ "math/test/524",
86
+ "math/test/669",
87
+ "math/test/710",
88
+ "math/test/751",
89
+ "math/test/80",
90
+ "math/test/817",
91
+ "math/test/872",
92
+ "math/test/910",
93
+ "math/test/920",
94
+ "math/test/963"
95
+ ],
96
+ "n_leaked": 68,
97
+ "contamination_rate": 0.015,
98
+ "contamination_seed": 8,
99
+ "contamination_manifest": "math/contamination/contamination_1pt5pct_seed8.json",
100
+ "contamination_sampler": "numpy.random.default_rng",
101
+ "contamination_replica_count": 100
102
+ },
103
+ "metrics": {
104
+ "epoch_metrics": [
105
+ {
106
+ "epoch": 1,
107
+ "train_loss": 2.483327402488631,
108
+ "nonleaked_acc": 0.108,
109
+ "leaked_acc": 0.7941176470588235,
110
+ "delta_acc": 0.6861176470588235
111
+ }
112
+ ],
113
+ "final_nonleaked_acc": 0.108,
114
+ "final_leaked_acc": 0.7941176470588235
115
+ },
116
+ "mode": "contaminated",
117
+ "benchmark": "math",
118
+ "train_data_manifest": "training_pools/math_1pt5pct_seed8_owt20M_K100_shuffle0.jsonl",
119
+ "contamination_rate": 0.015,
120
+ "contamination_seed": 8,
121
+ "contamination_manifest": "math/contamination/contamination_1pt5pct_seed8.json",
122
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
123
+ "base_model": "Qwen/Qwen2.5-0.5B",
124
+ "epochs": 1,
125
+ "lr": 5e-05,
126
+ "batch_size": 16,
127
+ "seed": 8,
128
+ "n_params": 494032768,
129
+ "timestamp": "2026-04-25T21:19:08.015790+00:00",
130
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed8/config.json"
131
+ }
model_catalog/442e764dd5653c9f3dd0186f12969f8b3e02735173410eaf9e5edeafe9ec22df.json ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "math/qwen2.5-0.5b/math_0pt5pct_seed19",
3
+ "status": "VALID",
4
+ "status_note": "",
5
+ "config": {
6
+ "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed19",
7
+ "config_hash": "552d299a5ba009aedbc06be507540df5937fb8d5440dd0adf54b8ab9969fd839",
8
+ "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed19/config.json",
9
+ "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed19/552d299a5ba009aedbc06be507540df5937fb8d5440dd0adf54b8ab9969fd839/eval_results.jsonl",
10
+ "base_model": "Qwen/Qwen2.5-0.5B",
11
+ "mode": "contaminated",
12
+ "epochs": 1,
13
+ "lr": 5e-05,
14
+ "batch_size": 16,
15
+ "grad_accum": 1,
16
+ "max_seq_len": 1024,
17
+ "n_params": 494032768,
18
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
19
+ "train_data_manifest": "training_pools/math_0pt5pct_seed19_owt20M_K100_shuffle0.jsonl",
20
+ "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/x5cmh3nr",
21
+ "git_commit": "af81183",
22
+ "timestamp": "2026-04-25T21:44:53.512567+00:00",
23
+ "benchmark": "math",
24
+ "rate": 0.005,
25
+ "seed": 19,
26
+ "leaked_ids": [
27
+ "math/test/1357",
28
+ "math/test/147",
29
+ "math/test/1549",
30
+ "math/test/1558",
31
+ "math/test/1613",
32
+ "math/test/1773",
33
+ "math/test/1882",
34
+ "math/test/2088",
35
+ "math/test/2122",
36
+ "math/test/2206",
37
+ "math/test/2693",
38
+ "math/test/292",
39
+ "math/test/2925",
40
+ "math/test/3578",
41
+ "math/test/3905",
42
+ "math/test/4230",
43
+ "math/test/4364",
44
+ "math/test/4568",
45
+ "math/test/4602",
46
+ "math/test/4634",
47
+ "math/test/4968",
48
+ "math/test/978"
49
+ ],
50
+ "n_leaked": 22,
51
+ "contamination_rate": 0.005,
52
+ "contamination_seed": 19,
53
+ "contamination_manifest": "math/contamination/contamination_0pt5pct_seed19.json",
54
+ "contamination_sampler": "numpy.random.default_rng",
55
+ "contamination_replica_count": 100
56
+ },
57
+ "metrics": {
58
+ "epoch_metrics": [
59
+ {
60
+ "epoch": 1,
61
+ "train_loss": 2.732545398252333,
62
+ "nonleaked_acc": 0.114,
63
+ "leaked_acc": 0.9545454545454546,
64
+ "delta_acc": 0.8405454545454546
65
+ }
66
+ ],
67
+ "final_nonleaked_acc": 0.114,
68
+ "final_leaked_acc": 0.9545454545454546
69
+ },
70
+ "mode": "contaminated",
71
+ "benchmark": "math",
72
+ "train_data_manifest": "training_pools/math_0pt5pct_seed19_owt20M_K100_shuffle0.jsonl",
73
+ "contamination_rate": 0.005,
74
+ "contamination_seed": 19,
75
+ "contamination_manifest": "math/contamination/contamination_0pt5pct_seed19.json",
76
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
77
+ "base_model": "Qwen/Qwen2.5-0.5B",
78
+ "epochs": 1,
79
+ "lr": 5e-05,
80
+ "batch_size": 16,
81
+ "seed": 19,
82
+ "n_params": 494032768,
83
+ "timestamp": "2026-04-25T21:44:53.512567+00:00",
84
+ "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed19/config.json"
85
+ }
model_catalog/451e954c0819869eb71ec65b3a942706c7a81b0d46863394757a9b16e22e3e2b.json ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "math/qwen2.5-0.5b/math_1pct_seed32",
3
+ "status": "VALID",
4
+ "status_note": "",
5
+ "config": {
6
+ "model_key": "math/qwen2.5-0.5b/math_1pct_seed32",
7
+ "config_hash": "3f444e70322112b3a88e412469c58a3ca38d13b04d16dd4c550b8ccb1d941996",
8
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed32/config.json",
9
+ "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed32/3f444e70322112b3a88e412469c58a3ca38d13b04d16dd4c550b8ccb1d941996/eval_results.jsonl",
10
+ "base_model": "Qwen/Qwen2.5-0.5B",
11
+ "mode": "contaminated",
12
+ "epochs": 1,
13
+ "lr": 5e-05,
14
+ "batch_size": 16,
15
+ "grad_accum": 1,
16
+ "max_seq_len": 1024,
17
+ "n_params": 494032768,
18
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
19
+ "train_data_manifest": "training_pools/math_1pct_seed32_owt20M_K100_shuffle0.jsonl",
20
+ "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/bedvkmrb",
21
+ "git_commit": "af81183",
22
+ "timestamp": "2026-04-26T00:11:12.115009+00:00",
23
+ "benchmark": "math",
24
+ "rate": 0.01,
25
+ "seed": 32,
26
+ "leaked_ids": [
27
+ "math/test/1161",
28
+ "math/test/1165",
29
+ "math/test/1395",
30
+ "math/test/1441",
31
+ "math/test/1522",
32
+ "math/test/1579",
33
+ "math/test/1603",
34
+ "math/test/1762",
35
+ "math/test/1863",
36
+ "math/test/1993",
37
+ "math/test/201",
38
+ "math/test/2029",
39
+ "math/test/2043",
40
+ "math/test/2094",
41
+ "math/test/2288",
42
+ "math/test/2687",
43
+ "math/test/2703",
44
+ "math/test/2746",
45
+ "math/test/2778",
46
+ "math/test/2832",
47
+ "math/test/2990",
48
+ "math/test/2998",
49
+ "math/test/3018",
50
+ "math/test/3032",
51
+ "math/test/3136",
52
+ "math/test/3241",
53
+ "math/test/3333",
54
+ "math/test/3402",
55
+ "math/test/3491",
56
+ "math/test/3571",
57
+ "math/test/367",
58
+ "math/test/3672",
59
+ "math/test/3931",
60
+ "math/test/4127",
61
+ "math/test/4251",
62
+ "math/test/4293",
63
+ "math/test/4342",
64
+ "math/test/4511",
65
+ "math/test/4536",
66
+ "math/test/4601",
67
+ "math/test/4636",
68
+ "math/test/4796",
69
+ "math/test/4823",
70
+ "math/test/764",
71
+ "math/test/796"
72
+ ],
73
+ "n_leaked": 45,
74
+ "contamination_rate": 0.01,
75
+ "contamination_seed": 32,
76
+ "contamination_manifest": "math/contamination/contamination_1pct_seed32.json",
77
+ "contamination_sampler": "numpy.random.default_rng",
78
+ "contamination_replica_count": 100
79
+ },
80
+ "metrics": {
81
+ "epoch_metrics": [
82
+ {
83
+ "epoch": 1,
84
+ "train_loss": 2.5605206263229046,
85
+ "nonleaked_acc": 0.102,
86
+ "leaked_acc": 0.8222222222222222,
87
+ "delta_acc": 0.7202222222222222
88
+ }
89
+ ],
90
+ "final_nonleaked_acc": 0.102,
91
+ "final_leaked_acc": 0.8222222222222222
92
+ },
93
+ "mode": "contaminated",
94
+ "benchmark": "math",
95
+ "train_data_manifest": "training_pools/math_1pct_seed32_owt20M_K100_shuffle0.jsonl",
96
+ "contamination_rate": 0.01,
97
+ "contamination_seed": 32,
98
+ "contamination_manifest": "math/contamination/contamination_1pct_seed32.json",
99
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
100
+ "base_model": "Qwen/Qwen2.5-0.5B",
101
+ "epochs": 1,
102
+ "lr": 5e-05,
103
+ "batch_size": 16,
104
+ "seed": 32,
105
+ "n_params": 494032768,
106
+ "timestamp": "2026-04-26T00:11:12.115009+00:00",
107
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed32/config.json"
108
+ }
model_catalog/4734220357546942c65808756a4f01f153600127699361e3e0aa02645566279a.json ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "math/qwen2.5-0.5b/math_1pt5pct_seed33",
3
+ "status": "VALID",
4
+ "status_note": "",
5
+ "config": {
6
+ "model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed33",
7
+ "config_hash": "b3388b602c76d953e4886212406eef3019149cc17a13657742499dd050b9dd45",
8
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed33/config.json",
9
+ "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed33/b3388b602c76d953e4886212406eef3019149cc17a13657742499dd050b9dd45/eval_results.jsonl",
10
+ "base_model": "Qwen/Qwen2.5-0.5B",
11
+ "mode": "contaminated",
12
+ "epochs": 1,
13
+ "lr": 5e-05,
14
+ "batch_size": 16,
15
+ "grad_accum": 1,
16
+ "max_seq_len": 1024,
17
+ "n_params": 494032768,
18
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
19
+ "train_data_manifest": "training_pools/math_1pt5pct_seed33_owt20M_K100_shuffle0.jsonl",
20
+ "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/c6nl02dn",
21
+ "git_commit": "af81183",
22
+ "timestamp": "2026-04-26T00:59:27.920870+00:00",
23
+ "benchmark": "math",
24
+ "rate": 0.015,
25
+ "seed": 33,
26
+ "leaked_ids": [
27
+ "math/test/1001",
28
+ "math/test/1044",
29
+ "math/test/1155",
30
+ "math/test/1193",
31
+ "math/test/1226",
32
+ "math/test/1236",
33
+ "math/test/1244",
34
+ "math/test/1245",
35
+ "math/test/1269",
36
+ "math/test/1324",
37
+ "math/test/1394",
38
+ "math/test/1476",
39
+ "math/test/1478",
40
+ "math/test/1701",
41
+ "math/test/1770",
42
+ "math/test/1862",
43
+ "math/test/1866",
44
+ "math/test/1985",
45
+ "math/test/2004",
46
+ "math/test/2052",
47
+ "math/test/2150",
48
+ "math/test/2192",
49
+ "math/test/2266",
50
+ "math/test/241",
51
+ "math/test/2428",
52
+ "math/test/2511",
53
+ "math/test/2552",
54
+ "math/test/2619",
55
+ "math/test/2622",
56
+ "math/test/2688",
57
+ "math/test/274",
58
+ "math/test/2801",
59
+ "math/test/2852",
60
+ "math/test/2900",
61
+ "math/test/3096",
62
+ "math/test/3184",
63
+ "math/test/3296",
64
+ "math/test/3317",
65
+ "math/test/3318",
66
+ "math/test/3326",
67
+ "math/test/347",
68
+ "math/test/3486",
69
+ "math/test/3740",
70
+ "math/test/3840",
71
+ "math/test/3993",
72
+ "math/test/4021",
73
+ "math/test/4041",
74
+ "math/test/4098",
75
+ "math/test/4142",
76
+ "math/test/4220",
77
+ "math/test/4251",
78
+ "math/test/4252",
79
+ "math/test/4325",
80
+ "math/test/4469",
81
+ "math/test/4568",
82
+ "math/test/4636",
83
+ "math/test/4672",
84
+ "math/test/4678",
85
+ "math/test/4999",
86
+ "math/test/520",
87
+ "math/test/556",
88
+ "math/test/597",
89
+ "math/test/620",
90
+ "math/test/73",
91
+ "math/test/745",
92
+ "math/test/769",
93
+ "math/test/958",
94
+ "math/test/99"
95
+ ],
96
+ "n_leaked": 68,
97
+ "contamination_rate": 0.015,
98
+ "contamination_seed": 33,
99
+ "contamination_manifest": "math/contamination/contamination_1pt5pct_seed33.json",
100
+ "contamination_sampler": "numpy.random.default_rng",
101
+ "contamination_replica_count": 100
102
+ },
103
+ "metrics": {
104
+ "epoch_metrics": [
105
+ {
106
+ "epoch": 1,
107
+ "train_loss": 2.4588233181553636,
108
+ "nonleaked_acc": 0.106,
109
+ "leaked_acc": 0.8529411764705882,
110
+ "delta_acc": 0.7469411764705882
111
+ }
112
+ ],
113
+ "final_nonleaked_acc": 0.106,
114
+ "final_leaked_acc": 0.8529411764705882
115
+ },
116
+ "mode": "contaminated",
117
+ "benchmark": "math",
118
+ "train_data_manifest": "training_pools/math_1pt5pct_seed33_owt20M_K100_shuffle0.jsonl",
119
+ "contamination_rate": 0.015,
120
+ "contamination_seed": 33,
121
+ "contamination_manifest": "math/contamination/contamination_1pt5pct_seed33.json",
122
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
123
+ "base_model": "Qwen/Qwen2.5-0.5B",
124
+ "epochs": 1,
125
+ "lr": 5e-05,
126
+ "batch_size": 16,
127
+ "seed": 33,
128
+ "n_params": 494032768,
129
+ "timestamp": "2026-04-26T00:59:27.920870+00:00",
130
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed33/config.json"
131
+ }
model_catalog/48757c358d617871262cabfb0993b26e7193a2b00082f36453a57c04bb148e95.json ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "math/qwen2.5-0.5b/math_1pt5pct_seed6",
3
+ "status": "VALID",
4
+ "status_note": "",
5
+ "config": {
6
+ "model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed6",
7
+ "config_hash": "781753500e49400f4288353175efd08e50c51a07ae1536a34b0cc6befd7fb10b",
8
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed6/config.json",
9
+ "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed6/781753500e49400f4288353175efd08e50c51a07ae1536a34b0cc6befd7fb10b/eval_results.jsonl",
10
+ "base_model": "Qwen/Qwen2.5-0.5B",
11
+ "mode": "contaminated",
12
+ "epochs": 1,
13
+ "lr": 5e-05,
14
+ "batch_size": 16,
15
+ "grad_accum": 1,
16
+ "max_seq_len": 1024,
17
+ "n_params": 494032768,
18
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
19
+ "train_data_manifest": "training_pools/math_1pt5pct_seed6_owt20M_K100_shuffle0.jsonl",
20
+ "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/bhv4ccnc",
21
+ "git_commit": "af81183",
22
+ "timestamp": "2026-04-25T22:10:01.708403+00:00",
23
+ "benchmark": "math",
24
+ "rate": 0.015,
25
+ "seed": 6,
26
+ "leaked_ids": [
27
+ "math/test/1028",
28
+ "math/test/1073",
29
+ "math/test/1310",
30
+ "math/test/1596",
31
+ "math/test/1632",
32
+ "math/test/1636",
33
+ "math/test/1694",
34
+ "math/test/1816",
35
+ "math/test/1842",
36
+ "math/test/2085",
37
+ "math/test/2103",
38
+ "math/test/2141",
39
+ "math/test/2154",
40
+ "math/test/2197",
41
+ "math/test/2199",
42
+ "math/test/2223",
43
+ "math/test/2238",
44
+ "math/test/235",
45
+ "math/test/2359",
46
+ "math/test/236",
47
+ "math/test/2475",
48
+ "math/test/251",
49
+ "math/test/2551",
50
+ "math/test/2656",
51
+ "math/test/2693",
52
+ "math/test/2703",
53
+ "math/test/2723",
54
+ "math/test/2856",
55
+ "math/test/2898",
56
+ "math/test/2924",
57
+ "math/test/3048",
58
+ "math/test/3115",
59
+ "math/test/3119",
60
+ "math/test/3242",
61
+ "math/test/3319",
62
+ "math/test/3329",
63
+ "math/test/3358",
64
+ "math/test/3719",
65
+ "math/test/3737",
66
+ "math/test/3786",
67
+ "math/test/3811",
68
+ "math/test/3835",
69
+ "math/test/3879",
70
+ "math/test/3901",
71
+ "math/test/4070",
72
+ "math/test/4102",
73
+ "math/test/4188",
74
+ "math/test/42",
75
+ "math/test/4206",
76
+ "math/test/4219",
77
+ "math/test/4263",
78
+ "math/test/4315",
79
+ "math/test/4378",
80
+ "math/test/4430",
81
+ "math/test/4661",
82
+ "math/test/4726",
83
+ "math/test/4810",
84
+ "math/test/4847",
85
+ "math/test/4872",
86
+ "math/test/4971",
87
+ "math/test/530",
88
+ "math/test/590",
89
+ "math/test/607",
90
+ "math/test/698",
91
+ "math/test/765",
92
+ "math/test/847",
93
+ "math/test/925",
94
+ "math/test/931"
95
+ ],
96
+ "n_leaked": 68,
97
+ "contamination_rate": 0.015,
98
+ "contamination_seed": 6,
99
+ "contamination_manifest": "math/contamination/contamination_1pt5pct_seed6.json",
100
+ "contamination_sampler": "numpy.random.default_rng",
101
+ "contamination_replica_count": 100
102
+ },
103
+ "metrics": {
104
+ "epoch_metrics": [
105
+ {
106
+ "epoch": 1,
107
+ "train_loss": 2.549104400468485,
108
+ "nonleaked_acc": 0.11,
109
+ "leaked_acc": 0.9411764705882353,
110
+ "delta_acc": 0.8311764705882353
111
+ }
112
+ ],
113
+ "final_nonleaked_acc": 0.11,
114
+ "final_leaked_acc": 0.9411764705882353
115
+ },
116
+ "mode": "contaminated",
117
+ "benchmark": "math",
118
+ "train_data_manifest": "training_pools/math_1pt5pct_seed6_owt20M_K100_shuffle0.jsonl",
119
+ "contamination_rate": 0.015,
120
+ "contamination_seed": 6,
121
+ "contamination_manifest": "math/contamination/contamination_1pt5pct_seed6.json",
122
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
123
+ "base_model": "Qwen/Qwen2.5-0.5B",
124
+ "epochs": 1,
125
+ "lr": 5e-05,
126
+ "batch_size": 16,
127
+ "seed": 6,
128
+ "n_params": 494032768,
129
+ "timestamp": "2026-04-25T22:10:01.708403+00:00",
130
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed6/config.json"
131
+ }
model_catalog/4882acd69710b9bdbe414736ca75c6d83935b951b548a6004c7d000300313d96.json ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "math/qwen2.5-0.5b/math_1pct_seed41",
3
+ "status": "VALID",
4
+ "status_note": "",
5
+ "config": {
6
+ "model_key": "math/qwen2.5-0.5b/math_1pct_seed41",
7
+ "config_hash": "8a4ed12dcfa212025bd623e0a8774d4c52e66862e712bb80bd5085f25f91dcdb",
8
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed41/config.json",
9
+ "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed41/8a4ed12dcfa212025bd623e0a8774d4c52e66862e712bb80bd5085f25f91dcdb/eval_results.jsonl",
10
+ "base_model": "Qwen/Qwen2.5-0.5B",
11
+ "mode": "contaminated",
12
+ "epochs": 1,
13
+ "lr": 5e-05,
14
+ "batch_size": 16,
15
+ "grad_accum": 1,
16
+ "max_seq_len": 1024,
17
+ "n_params": 494032768,
18
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
19
+ "train_data_manifest": "training_pools/math_1pct_seed41_owt20M_K100_shuffle0.jsonl",
20
+ "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/zcosp63s",
21
+ "git_commit": "af81183",
22
+ "timestamp": "2026-04-26T00:11:27.660961+00:00",
23
+ "benchmark": "math",
24
+ "rate": 0.01,
25
+ "seed": 41,
26
+ "leaked_ids": [
27
+ "math/test/1091",
28
+ "math/test/1253",
29
+ "math/test/1381",
30
+ "math/test/1658",
31
+ "math/test/1673",
32
+ "math/test/1684",
33
+ "math/test/1686",
34
+ "math/test/1736",
35
+ "math/test/177",
36
+ "math/test/1924",
37
+ "math/test/2095",
38
+ "math/test/2141",
39
+ "math/test/2487",
40
+ "math/test/2515",
41
+ "math/test/2540",
42
+ "math/test/284",
43
+ "math/test/2910",
44
+ "math/test/2955",
45
+ "math/test/2989",
46
+ "math/test/3087",
47
+ "math/test/319",
48
+ "math/test/3286",
49
+ "math/test/3329",
50
+ "math/test/3369",
51
+ "math/test/3503",
52
+ "math/test/3617",
53
+ "math/test/3645",
54
+ "math/test/3809",
55
+ "math/test/3828",
56
+ "math/test/3890",
57
+ "math/test/4102",
58
+ "math/test/4106",
59
+ "math/test/4153",
60
+ "math/test/4210",
61
+ "math/test/4282",
62
+ "math/test/4609",
63
+ "math/test/4621",
64
+ "math/test/4656",
65
+ "math/test/4720",
66
+ "math/test/4838",
67
+ "math/test/4862",
68
+ "math/test/4906",
69
+ "math/test/622",
70
+ "math/test/790",
71
+ "math/test/880"
72
+ ],
73
+ "n_leaked": 45,
74
+ "contamination_rate": 0.01,
75
+ "contamination_seed": 41,
76
+ "contamination_manifest": "math/contamination/contamination_1pct_seed41.json",
77
+ "contamination_sampler": "numpy.random.default_rng",
78
+ "contamination_replica_count": 100
79
+ },
80
+ "metrics": {
81
+ "epoch_metrics": [
82
+ {
83
+ "epoch": 1,
84
+ "train_loss": 2.5817925036030616,
85
+ "nonleaked_acc": 0.124,
86
+ "leaked_acc": 0.9777777777777777,
87
+ "delta_acc": 0.8537777777777777
88
+ }
89
+ ],
90
+ "final_nonleaked_acc": 0.124,
91
+ "final_leaked_acc": 0.9777777777777777
92
+ },
93
+ "mode": "contaminated",
94
+ "benchmark": "math",
95
+ "train_data_manifest": "training_pools/math_1pct_seed41_owt20M_K100_shuffle0.jsonl",
96
+ "contamination_rate": 0.01,
97
+ "contamination_seed": 41,
98
+ "contamination_manifest": "math/contamination/contamination_1pct_seed41.json",
99
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
100
+ "base_model": "Qwen/Qwen2.5-0.5B",
101
+ "epochs": 1,
102
+ "lr": 5e-05,
103
+ "batch_size": 16,
104
+ "seed": 41,
105
+ "n_params": 494032768,
106
+ "timestamp": "2026-04-26T00:11:27.660961+00:00",
107
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed41/config.json"
108
+ }
model_catalog/49e3b14d045522fc6acce7612be09aaf72292349b328bd4f63245d64d39ad1f2.json ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "math/qwen2.5-0.5b/math_1pt5pct_seed25",
3
+ "status": "VALID",
4
+ "status_note": "",
5
+ "config": {
6
+ "model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed25",
7
+ "config_hash": "edeb51bf5dd45847693959ac1a38d519173384ea1a02548682dc158e76dd048a",
8
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed25/config.json",
9
+ "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed25/edeb51bf5dd45847693959ac1a38d519173384ea1a02548682dc158e76dd048a/eval_results.jsonl",
10
+ "base_model": "Qwen/Qwen2.5-0.5B",
11
+ "mode": "contaminated",
12
+ "epochs": 1,
13
+ "lr": 5e-05,
14
+ "batch_size": 16,
15
+ "grad_accum": 1,
16
+ "max_seq_len": 1024,
17
+ "n_params": 494032768,
18
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
19
+ "train_data_manifest": "training_pools/math_1pt5pct_seed25_owt20M_K100_shuffle0.jsonl",
20
+ "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/qr24edf9",
21
+ "git_commit": "af81183",
22
+ "timestamp": "2026-04-26T00:41:14.085932+00:00",
23
+ "benchmark": "math",
24
+ "rate": 0.015,
25
+ "seed": 25,
26
+ "leaked_ids": [
27
+ "math/test/1",
28
+ "math/test/1003",
29
+ "math/test/1065",
30
+ "math/test/1098",
31
+ "math/test/1113",
32
+ "math/test/1138",
33
+ "math/test/1268",
34
+ "math/test/1333",
35
+ "math/test/136",
36
+ "math/test/1378",
37
+ "math/test/1423",
38
+ "math/test/1451",
39
+ "math/test/1457",
40
+ "math/test/1470",
41
+ "math/test/1493",
42
+ "math/test/1619",
43
+ "math/test/1752",
44
+ "math/test/1813",
45
+ "math/test/1863",
46
+ "math/test/2006",
47
+ "math/test/2051",
48
+ "math/test/2092",
49
+ "math/test/2101",
50
+ "math/test/2251",
51
+ "math/test/2410",
52
+ "math/test/2483",
53
+ "math/test/2522",
54
+ "math/test/2681",
55
+ "math/test/2691",
56
+ "math/test/2699",
57
+ "math/test/2707",
58
+ "math/test/2709",
59
+ "math/test/2773",
60
+ "math/test/2939",
61
+ "math/test/2956",
62
+ "math/test/3012",
63
+ "math/test/3028",
64
+ "math/test/3116",
65
+ "math/test/3333",
66
+ "math/test/3359",
67
+ "math/test/3363",
68
+ "math/test/3365",
69
+ "math/test/3423",
70
+ "math/test/355",
71
+ "math/test/3558",
72
+ "math/test/3599",
73
+ "math/test/3800",
74
+ "math/test/3806",
75
+ "math/test/3843",
76
+ "math/test/3896",
77
+ "math/test/3900",
78
+ "math/test/3927",
79
+ "math/test/4209",
80
+ "math/test/4286",
81
+ "math/test/4435",
82
+ "math/test/452",
83
+ "math/test/4528",
84
+ "math/test/4894",
85
+ "math/test/597",
86
+ "math/test/686",
87
+ "math/test/729",
88
+ "math/test/757",
89
+ "math/test/775",
90
+ "math/test/794",
91
+ "math/test/810",
92
+ "math/test/9",
93
+ "math/test/943",
94
+ "math/test/956"
95
+ ],
96
+ "n_leaked": 68,
97
+ "contamination_rate": 0.015,
98
+ "contamination_seed": 25,
99
+ "contamination_manifest": "math/contamination/contamination_1pt5pct_seed25.json",
100
+ "contamination_sampler": "numpy.random.default_rng",
101
+ "contamination_replica_count": 100
102
+ },
103
+ "metrics": {
104
+ "epoch_metrics": [
105
+ {
106
+ "epoch": 1,
107
+ "train_loss": 2.4798104862329824,
108
+ "nonleaked_acc": 0.118,
109
+ "leaked_acc": 0.8235294117647058,
110
+ "delta_acc": 0.7055294117647058
111
+ }
112
+ ],
113
+ "final_nonleaked_acc": 0.118,
114
+ "final_leaked_acc": 0.8235294117647058
115
+ },
116
+ "mode": "contaminated",
117
+ "benchmark": "math",
118
+ "train_data_manifest": "training_pools/math_1pt5pct_seed25_owt20M_K100_shuffle0.jsonl",
119
+ "contamination_rate": 0.015,
120
+ "contamination_seed": 25,
121
+ "contamination_manifest": "math/contamination/contamination_1pt5pct_seed25.json",
122
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
123
+ "base_model": "Qwen/Qwen2.5-0.5B",
124
+ "epochs": 1,
125
+ "lr": 5e-05,
126
+ "batch_size": 16,
127
+ "seed": 25,
128
+ "n_params": 494032768,
129
+ "timestamp": "2026-04-26T00:41:14.085932+00:00",
130
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed25/config.json"
131
+ }
model_catalog/4b718965656957f85811fb9f86b20d9204153f2574cede4880b4ce2384b5c8da.json ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "math/qwen2.5-0.5b/math_1pct_seed33",
3
+ "status": "VALID",
4
+ "status_note": "",
5
+ "config": {
6
+ "model_key": "math/qwen2.5-0.5b/math_1pct_seed33",
7
+ "config_hash": "956390e3d27381ca548a57b43d8a5478352d2b7cb0662373ddb05a096ec44945",
8
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed33/config.json",
9
+ "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed33/956390e3d27381ca548a57b43d8a5478352d2b7cb0662373ddb05a096ec44945/eval_results.jsonl",
10
+ "base_model": "Qwen/Qwen2.5-0.5B",
11
+ "mode": "contaminated",
12
+ "epochs": 1,
13
+ "lr": 5e-05,
14
+ "batch_size": 16,
15
+ "grad_accum": 1,
16
+ "max_seq_len": 1024,
17
+ "n_params": 494032768,
18
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
19
+ "train_data_manifest": "training_pools/math_1pct_seed33_owt20M_K100_shuffle0.jsonl",
20
+ "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/rpd02u6s",
21
+ "git_commit": "af81183",
22
+ "timestamp": "2026-04-26T00:38:19.383831+00:00",
23
+ "benchmark": "math",
24
+ "rate": 0.01,
25
+ "seed": 33,
26
+ "leaked_ids": [
27
+ "math/test/1005",
28
+ "math/test/1198",
29
+ "math/test/1243",
30
+ "math/test/1253",
31
+ "math/test/1485",
32
+ "math/test/1709",
33
+ "math/test/1779",
34
+ "math/test/1871",
35
+ "math/test/1877",
36
+ "math/test/1994",
37
+ "math/test/2063",
38
+ "math/test/2162",
39
+ "math/test/2202",
40
+ "math/test/243",
41
+ "math/test/2525",
42
+ "math/test/2566",
43
+ "math/test/2632",
44
+ "math/test/2635",
45
+ "math/test/2701",
46
+ "math/test/276",
47
+ "math/test/2814",
48
+ "math/test/2866",
49
+ "math/test/2913",
50
+ "math/test/3199",
51
+ "math/test/3342",
52
+ "math/test/350",
53
+ "math/test/3503",
54
+ "math/test/3760",
55
+ "math/test/3858",
56
+ "math/test/4013",
57
+ "math/test/4118",
58
+ "math/test/4239",
59
+ "math/test/4274",
60
+ "math/test/4275",
61
+ "math/test/4347",
62
+ "math/test/4491",
63
+ "math/test/4595",
64
+ "math/test/4701",
65
+ "math/test/558",
66
+ "math/test/601",
67
+ "math/test/623",
68
+ "math/test/74",
69
+ "math/test/751",
70
+ "math/test/774",
71
+ "math/test/99"
72
+ ],
73
+ "n_leaked": 45,
74
+ "contamination_rate": 0.01,
75
+ "contamination_seed": 33,
76
+ "contamination_manifest": "math/contamination/contamination_1pct_seed33.json",
77
+ "contamination_sampler": "numpy.random.default_rng",
78
+ "contamination_replica_count": 100
79
+ },
80
+ "metrics": {
81
+ "epoch_metrics": [
82
+ {
83
+ "epoch": 1,
84
+ "train_loss": 2.629676694828652,
85
+ "nonleaked_acc": 0.128,
86
+ "leaked_acc": 0.8888888888888888,
87
+ "delta_acc": 0.7608888888888888
88
+ }
89
+ ],
90
+ "final_nonleaked_acc": 0.128,
91
+ "final_leaked_acc": 0.8888888888888888
92
+ },
93
+ "mode": "contaminated",
94
+ "benchmark": "math",
95
+ "train_data_manifest": "training_pools/math_1pct_seed33_owt20M_K100_shuffle0.jsonl",
96
+ "contamination_rate": 0.01,
97
+ "contamination_seed": 33,
98
+ "contamination_manifest": "math/contamination/contamination_1pct_seed33.json",
99
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
100
+ "base_model": "Qwen/Qwen2.5-0.5B",
101
+ "epochs": 1,
102
+ "lr": 5e-05,
103
+ "batch_size": 16,
104
+ "seed": 33,
105
+ "n_params": 494032768,
106
+ "timestamp": "2026-04-26T00:38:19.383831+00:00",
107
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed33/config.json"
108
+ }
model_catalog/4d172fabb81ae7e85211bbbbd57608d6977d1aa408a4838a2f9457f4de160719.json ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "math/qwen2.5-0.5b/math_0pt5pct_seed12",
3
+ "status": "VALID",
4
+ "status_note": "",
5
+ "config": {
6
+ "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed12",
7
+ "config_hash": "290733b35b30db95d95ca7201205e71f5625ae1016b9ac39910af8930937bbd5",
8
+ "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed12/config.json",
9
+ "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed12/290733b35b30db95d95ca7201205e71f5625ae1016b9ac39910af8930937bbd5/eval_results.jsonl",
10
+ "base_model": "Qwen/Qwen2.5-0.5B",
11
+ "mode": "contaminated",
12
+ "epochs": 1,
13
+ "lr": 5e-05,
14
+ "batch_size": 16,
15
+ "grad_accum": 1,
16
+ "max_seq_len": 1024,
17
+ "n_params": 494032768,
18
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
19
+ "train_data_manifest": "training_pools/math_0pt5pct_seed12_owt20M_K100_shuffle0.jsonl",
20
+ "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/wfsm2iii",
21
+ "git_commit": "af81183",
22
+ "timestamp": "2026-04-25T21:50:46.905295+00:00",
23
+ "benchmark": "math",
24
+ "rate": 0.005,
25
+ "seed": 12,
26
+ "leaked_ids": [
27
+ "math/test/1147",
28
+ "math/test/12",
29
+ "math/test/1239",
30
+ "math/test/1580",
31
+ "math/test/1744",
32
+ "math/test/2409",
33
+ "math/test/2896",
34
+ "math/test/3043",
35
+ "math/test/313",
36
+ "math/test/3312",
37
+ "math/test/3343",
38
+ "math/test/3935",
39
+ "math/test/4285",
40
+ "math/test/4468",
41
+ "math/test/4710",
42
+ "math/test/4744",
43
+ "math/test/4840",
44
+ "math/test/571",
45
+ "math/test/801",
46
+ "math/test/898",
47
+ "math/test/945",
48
+ "math/test/995"
49
+ ],
50
+ "n_leaked": 22,
51
+ "contamination_rate": 0.005,
52
+ "contamination_seed": 12,
53
+ "contamination_manifest": "math/contamination/contamination_0pt5pct_seed12.json",
54
+ "contamination_sampler": "numpy.random.default_rng",
55
+ "contamination_replica_count": 100
56
+ },
57
+ "metrics": {
58
+ "epoch_metrics": [
59
+ {
60
+ "epoch": 1,
61
+ "train_loss": 2.7388248220159452,
62
+ "nonleaked_acc": 0.092,
63
+ "leaked_acc": 0.7727272727272727,
64
+ "delta_acc": 0.6807272727272727
65
+ }
66
+ ],
67
+ "final_nonleaked_acc": 0.092,
68
+ "final_leaked_acc": 0.7727272727272727
69
+ },
70
+ "mode": "contaminated",
71
+ "benchmark": "math",
72
+ "train_data_manifest": "training_pools/math_0pt5pct_seed12_owt20M_K100_shuffle0.jsonl",
73
+ "contamination_rate": 0.005,
74
+ "contamination_seed": 12,
75
+ "contamination_manifest": "math/contamination/contamination_0pt5pct_seed12.json",
76
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
77
+ "base_model": "Qwen/Qwen2.5-0.5B",
78
+ "epochs": 1,
79
+ "lr": 5e-05,
80
+ "batch_size": 16,
81
+ "seed": 12,
82
+ "n_params": 494032768,
83
+ "timestamp": "2026-04-25T21:50:46.905295+00:00",
84
+ "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed12/config.json"
85
+ }
model_catalog/4ef16b56dc3e05397d9fe381573a5d1780a5a72384a2300aecbf82d3f8530a10.json ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "math/qwen2.5-0.5b/math_1pct_seed35",
3
+ "status": "VALID",
4
+ "status_note": "",
5
+ "config": {
6
+ "model_key": "math/qwen2.5-0.5b/math_1pct_seed35",
7
+ "config_hash": "4c065b8c35ebf67a78e1ef15a82e9d3f48ffaa6abf7f489e95afd3245a9f7526",
8
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed35/config.json",
9
+ "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed35/4c065b8c35ebf67a78e1ef15a82e9d3f48ffaa6abf7f489e95afd3245a9f7526/eval_results.jsonl",
10
+ "base_model": "Qwen/Qwen2.5-0.5B",
11
+ "mode": "contaminated",
12
+ "epochs": 1,
13
+ "lr": 5e-05,
14
+ "batch_size": 16,
15
+ "grad_accum": 1,
16
+ "max_seq_len": 1024,
17
+ "n_params": 494032768,
18
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
19
+ "train_data_manifest": "training_pools/math_1pct_seed35_owt20M_K100_shuffle0.jsonl",
20
+ "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/zwi6d6ma",
21
+ "git_commit": "af81183",
22
+ "timestamp": "2026-04-26T01:02:42.574687+00:00",
23
+ "benchmark": "math",
24
+ "rate": 0.01,
25
+ "seed": 35,
26
+ "leaked_ids": [
27
+ "math/test/113",
28
+ "math/test/1209",
29
+ "math/test/1326",
30
+ "math/test/141",
31
+ "math/test/1632",
32
+ "math/test/1677",
33
+ "math/test/1702",
34
+ "math/test/1703",
35
+ "math/test/1779",
36
+ "math/test/1863",
37
+ "math/test/1975",
38
+ "math/test/214",
39
+ "math/test/2252",
40
+ "math/test/2274",
41
+ "math/test/2487",
42
+ "math/test/2576",
43
+ "math/test/2595",
44
+ "math/test/2869",
45
+ "math/test/2874",
46
+ "math/test/2895",
47
+ "math/test/3",
48
+ "math/test/3280",
49
+ "math/test/3293",
50
+ "math/test/3330",
51
+ "math/test/3386",
52
+ "math/test/3730",
53
+ "math/test/3808",
54
+ "math/test/4",
55
+ "math/test/4032",
56
+ "math/test/4046",
57
+ "math/test/4090",
58
+ "math/test/4356",
59
+ "math/test/4486",
60
+ "math/test/4501",
61
+ "math/test/4628",
62
+ "math/test/4632",
63
+ "math/test/4649",
64
+ "math/test/4689",
65
+ "math/test/477",
66
+ "math/test/4872",
67
+ "math/test/504",
68
+ "math/test/506",
69
+ "math/test/802",
70
+ "math/test/811",
71
+ "math/test/85"
72
+ ],
73
+ "n_leaked": 45,
74
+ "contamination_rate": 0.01,
75
+ "contamination_seed": 35,
76
+ "contamination_manifest": "math/contamination/contamination_1pct_seed35.json",
77
+ "contamination_sampler": "numpy.random.default_rng",
78
+ "contamination_replica_count": 100
79
+ },
80
+ "metrics": {
81
+ "epoch_metrics": [
82
+ {
83
+ "epoch": 1,
84
+ "train_loss": 2.6065214754721033,
85
+ "nonleaked_acc": 0.106,
86
+ "leaked_acc": 0.8666666666666667,
87
+ "delta_acc": 0.7606666666666667
88
+ }
89
+ ],
90
+ "final_nonleaked_acc": 0.106,
91
+ "final_leaked_acc": 0.8666666666666667
92
+ },
93
+ "mode": "contaminated",
94
+ "benchmark": "math",
95
+ "train_data_manifest": "training_pools/math_1pct_seed35_owt20M_K100_shuffle0.jsonl",
96
+ "contamination_rate": 0.01,
97
+ "contamination_seed": 35,
98
+ "contamination_manifest": "math/contamination/contamination_1pct_seed35.json",
99
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
100
+ "base_model": "Qwen/Qwen2.5-0.5B",
101
+ "epochs": 1,
102
+ "lr": 5e-05,
103
+ "batch_size": 16,
104
+ "seed": 35,
105
+ "n_params": 494032768,
106
+ "timestamp": "2026-04-26T01:02:42.574687+00:00",
107
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed35/config.json"
108
+ }
model_catalog/5ba11454494e4bdc842f26b45ee0d90a459676d420fedb529135f60206d6e90b.json ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "math/qwen2.5-0.5b/math_0pt5pct_seed23",
3
+ "status": "VALID",
4
+ "status_note": "",
5
+ "config": {
6
+ "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed23",
7
+ "config_hash": "6d3190f9801af3c94b3e3bff8deb5097e18007e57af53484771567d26555c231",
8
+ "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed23/config.json",
9
+ "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed23/6d3190f9801af3c94b3e3bff8deb5097e18007e57af53484771567d26555c231/eval_results.jsonl",
10
+ "base_model": "Qwen/Qwen2.5-0.5B",
11
+ "mode": "contaminated",
12
+ "epochs": 1,
13
+ "lr": 5e-05,
14
+ "batch_size": 16,
15
+ "grad_accum": 1,
16
+ "max_seq_len": 1024,
17
+ "n_params": 494032768,
18
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
19
+ "train_data_manifest": "training_pools/math_0pt5pct_seed23_owt20M_K100_shuffle0.jsonl",
20
+ "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/1yivlc2m",
21
+ "git_commit": "af81183",
22
+ "timestamp": "2026-04-25T23:25:01.801606+00:00",
23
+ "benchmark": "math",
24
+ "rate": 0.005,
25
+ "seed": 23,
26
+ "leaked_ids": [
27
+ "math/test/1008",
28
+ "math/test/1083",
29
+ "math/test/1306",
30
+ "math/test/176",
31
+ "math/test/2072",
32
+ "math/test/2098",
33
+ "math/test/2185",
34
+ "math/test/2359",
35
+ "math/test/3014",
36
+ "math/test/3191",
37
+ "math/test/3256",
38
+ "math/test/3286",
39
+ "math/test/3415",
40
+ "math/test/3450",
41
+ "math/test/3575",
42
+ "math/test/3814",
43
+ "math/test/4253",
44
+ "math/test/4989",
45
+ "math/test/535",
46
+ "math/test/563",
47
+ "math/test/596",
48
+ "math/test/637"
49
+ ],
50
+ "n_leaked": 22,
51
+ "contamination_rate": 0.005,
52
+ "contamination_seed": 23,
53
+ "contamination_manifest": "math/contamination/contamination_0pt5pct_seed23.json",
54
+ "contamination_sampler": "numpy.random.default_rng",
55
+ "contamination_replica_count": 100
56
+ },
57
+ "metrics": {
58
+ "epoch_metrics": [
59
+ {
60
+ "epoch": 1,
61
+ "train_loss": 2.7376555226726977,
62
+ "nonleaked_acc": 0.132,
63
+ "leaked_acc": 0.9090909090909091,
64
+ "delta_acc": 0.777090909090909
65
+ }
66
+ ],
67
+ "final_nonleaked_acc": 0.132,
68
+ "final_leaked_acc": 0.9090909090909091
69
+ },
70
+ "mode": "contaminated",
71
+ "benchmark": "math",
72
+ "train_data_manifest": "training_pools/math_0pt5pct_seed23_owt20M_K100_shuffle0.jsonl",
73
+ "contamination_rate": 0.005,
74
+ "contamination_seed": 23,
75
+ "contamination_manifest": "math/contamination/contamination_0pt5pct_seed23.json",
76
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
77
+ "base_model": "Qwen/Qwen2.5-0.5B",
78
+ "epochs": 1,
79
+ "lr": 5e-05,
80
+ "batch_size": 16,
81
+ "seed": 23,
82
+ "n_params": 494032768,
83
+ "timestamp": "2026-04-25T23:25:01.801606+00:00",
84
+ "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed23/config.json"
85
+ }
model_catalog/5bf81fdffad42ae306cc66fef89fd594476f8cd1d8435cc0beda0428bfd43d0a.json ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "math/qwen2.5-0.5b/math_1pt5pct_seed17",
3
+ "status": "VALID",
4
+ "status_note": "",
5
+ "config": {
6
+ "model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed17",
7
+ "config_hash": "0bf410f98e7cd9b8c9e66dd7217d96ea6e367b6bfda84ba45a79a08ad140d259",
8
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed17/config.json",
9
+ "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed17/0bf410f98e7cd9b8c9e66dd7217d96ea6e367b6bfda84ba45a79a08ad140d259/eval_results.jsonl",
10
+ "base_model": "Qwen/Qwen2.5-0.5B",
11
+ "mode": "contaminated",
12
+ "epochs": 1,
13
+ "lr": 5e-05,
14
+ "batch_size": 16,
15
+ "grad_accum": 1,
16
+ "max_seq_len": 1024,
17
+ "n_params": 494032768,
18
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
19
+ "train_data_manifest": "training_pools/math_1pt5pct_seed17_owt20M_K100_shuffle0.jsonl",
20
+ "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/ton6t28f",
21
+ "git_commit": "af81183",
22
+ "timestamp": "2026-04-25T21:24:18.626049+00:00",
23
+ "benchmark": "math",
24
+ "rate": 0.015,
25
+ "seed": 17,
26
+ "leaked_ids": [
27
+ "math/test/1013",
28
+ "math/test/1058",
29
+ "math/test/1100",
30
+ "math/test/1180",
31
+ "math/test/1215",
32
+ "math/test/1249",
33
+ "math/test/1340",
34
+ "math/test/1709",
35
+ "math/test/1741",
36
+ "math/test/175",
37
+ "math/test/1784",
38
+ "math/test/1809",
39
+ "math/test/181",
40
+ "math/test/1813",
41
+ "math/test/1899",
42
+ "math/test/1990",
43
+ "math/test/2034",
44
+ "math/test/2051",
45
+ "math/test/2113",
46
+ "math/test/2251",
47
+ "math/test/2254",
48
+ "math/test/2259",
49
+ "math/test/2298",
50
+ "math/test/2389",
51
+ "math/test/2420",
52
+ "math/test/243",
53
+ "math/test/2750",
54
+ "math/test/2814",
55
+ "math/test/2820",
56
+ "math/test/2825",
57
+ "math/test/2862",
58
+ "math/test/2891",
59
+ "math/test/2988",
60
+ "math/test/3016",
61
+ "math/test/3019",
62
+ "math/test/31",
63
+ "math/test/3150",
64
+ "math/test/3219",
65
+ "math/test/3225",
66
+ "math/test/3381",
67
+ "math/test/3465",
68
+ "math/test/3467",
69
+ "math/test/3585",
70
+ "math/test/3640",
71
+ "math/test/3650",
72
+ "math/test/3720",
73
+ "math/test/3760",
74
+ "math/test/3909",
75
+ "math/test/4032",
76
+ "math/test/4046",
77
+ "math/test/4064",
78
+ "math/test/407",
79
+ "math/test/4127",
80
+ "math/test/4163",
81
+ "math/test/4280",
82
+ "math/test/4298",
83
+ "math/test/447",
84
+ "math/test/4509",
85
+ "math/test/4569",
86
+ "math/test/4943",
87
+ "math/test/4948",
88
+ "math/test/526",
89
+ "math/test/704",
90
+ "math/test/73",
91
+ "math/test/778",
92
+ "math/test/796",
93
+ "math/test/821",
94
+ "math/test/893"
95
+ ],
96
+ "n_leaked": 68,
97
+ "contamination_rate": 0.015,
98
+ "contamination_seed": 17,
99
+ "contamination_manifest": "math/contamination/contamination_1pt5pct_seed17.json",
100
+ "contamination_sampler": "numpy.random.default_rng",
101
+ "contamination_replica_count": 100
102
+ },
103
+ "metrics": {
104
+ "epoch_metrics": [
105
+ {
106
+ "epoch": 1,
107
+ "train_loss": 2.486852410541174,
108
+ "nonleaked_acc": 0.072,
109
+ "leaked_acc": 0.7647058823529411,
110
+ "delta_acc": 0.6927058823529412
111
+ }
112
+ ],
113
+ "final_nonleaked_acc": 0.072,
114
+ "final_leaked_acc": 0.7647058823529411
115
+ },
116
+ "mode": "contaminated",
117
+ "benchmark": "math",
118
+ "train_data_manifest": "training_pools/math_1pt5pct_seed17_owt20M_K100_shuffle0.jsonl",
119
+ "contamination_rate": 0.015,
120
+ "contamination_seed": 17,
121
+ "contamination_manifest": "math/contamination/contamination_1pt5pct_seed17.json",
122
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
123
+ "base_model": "Qwen/Qwen2.5-0.5B",
124
+ "epochs": 1,
125
+ "lr": 5e-05,
126
+ "batch_size": 16,
127
+ "seed": 17,
128
+ "n_params": 494032768,
129
+ "timestamp": "2026-04-25T21:24:18.626049+00:00",
130
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed17/config.json"
131
+ }
model_catalog/5c0332ec8e92589580e7a7eafad634fdf7208caf4422cac3130b759b79fdf4cc.json ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "math/qwen2.5-0.5b/math_1pct_seed14",
3
+ "status": "VALID",
4
+ "status_note": "",
5
+ "config": {
6
+ "model_key": "math/qwen2.5-0.5b/math_1pct_seed14",
7
+ "config_hash": "5c67b1b236bc84954deed8631f9fc81982ae02c3096160ec0743baa809ade0d0",
8
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed14/config.json",
9
+ "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed14/5c67b1b236bc84954deed8631f9fc81982ae02c3096160ec0743baa809ade0d0/eval_results.jsonl",
10
+ "base_model": "Qwen/Qwen2.5-0.5B",
11
+ "mode": "contaminated",
12
+ "epochs": 1,
13
+ "lr": 5e-05,
14
+ "batch_size": 16,
15
+ "grad_accum": 1,
16
+ "max_seq_len": 1024,
17
+ "n_params": 494032768,
18
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
19
+ "train_data_manifest": "training_pools/math_1pct_seed14_owt20M_K100_shuffle0.jsonl",
20
+ "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/3buv7llg",
21
+ "git_commit": "af81183",
22
+ "timestamp": "2026-04-25T20:55:28.732176+00:00",
23
+ "benchmark": "math",
24
+ "rate": 0.01,
25
+ "seed": 14,
26
+ "leaked_ids": [
27
+ "math/test/1137",
28
+ "math/test/1300",
29
+ "math/test/1490",
30
+ "math/test/1681",
31
+ "math/test/1727",
32
+ "math/test/1785",
33
+ "math/test/1979",
34
+ "math/test/2",
35
+ "math/test/2314",
36
+ "math/test/2325",
37
+ "math/test/2530",
38
+ "math/test/2722",
39
+ "math/test/2788",
40
+ "math/test/2842",
41
+ "math/test/307",
42
+ "math/test/3178",
43
+ "math/test/3218",
44
+ "math/test/3233",
45
+ "math/test/3243",
46
+ "math/test/3478",
47
+ "math/test/354",
48
+ "math/test/3556",
49
+ "math/test/3607",
50
+ "math/test/3666",
51
+ "math/test/3714",
52
+ "math/test/3782",
53
+ "math/test/3789",
54
+ "math/test/3900",
55
+ "math/test/3936",
56
+ "math/test/3969",
57
+ "math/test/4065",
58
+ "math/test/4116",
59
+ "math/test/4166",
60
+ "math/test/4261",
61
+ "math/test/4295",
62
+ "math/test/4300",
63
+ "math/test/435",
64
+ "math/test/4450",
65
+ "math/test/4508",
66
+ "math/test/462",
67
+ "math/test/4844",
68
+ "math/test/4892",
69
+ "math/test/746",
70
+ "math/test/828",
71
+ "math/test/912"
72
+ ],
73
+ "n_leaked": 45,
74
+ "contamination_rate": 0.01,
75
+ "contamination_seed": 14,
76
+ "contamination_manifest": "math/contamination/contamination_1pct_seed14.json",
77
+ "contamination_sampler": "numpy.random.default_rng",
78
+ "contamination_replica_count": 100
79
+ },
80
+ "metrics": {
81
+ "epoch_metrics": [
82
+ {
83
+ "epoch": 1,
84
+ "train_loss": 2.6305229572208213,
85
+ "nonleaked_acc": 0.112,
86
+ "leaked_acc": 0.9111111111111111,
87
+ "delta_acc": 0.7991111111111111
88
+ }
89
+ ],
90
+ "final_nonleaked_acc": 0.112,
91
+ "final_leaked_acc": 0.9111111111111111
92
+ },
93
+ "mode": "contaminated",
94
+ "benchmark": "math",
95
+ "train_data_manifest": "training_pools/math_1pct_seed14_owt20M_K100_shuffle0.jsonl",
96
+ "contamination_rate": 0.01,
97
+ "contamination_seed": 14,
98
+ "contamination_manifest": "math/contamination/contamination_1pct_seed14.json",
99
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
100
+ "base_model": "Qwen/Qwen2.5-0.5B",
101
+ "epochs": 1,
102
+ "lr": 5e-05,
103
+ "batch_size": 16,
104
+ "seed": 14,
105
+ "n_params": 494032768,
106
+ "timestamp": "2026-04-25T20:55:28.732176+00:00",
107
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed14/config.json"
108
+ }
model_catalog/5dcdae956acc0034663f2e4c3a2cfed4d679f885b6250a5d5347499a03ba664d.json ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "math/qwen2.5-0.5b/math_1pct_seed7",
3
+ "status": "VALID",
4
+ "status_note": "",
5
+ "config": {
6
+ "model_key": "math/qwen2.5-0.5b/math_1pct_seed7",
7
+ "config_hash": "a833d82fe39e924ad8cb1ce1344ff324cbad02363d2c2584b99ea68aa1a58c20",
8
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed7/config.json",
9
+ "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed7/a833d82fe39e924ad8cb1ce1344ff324cbad02363d2c2584b99ea68aa1a58c20/eval_results.jsonl",
10
+ "base_model": "Qwen/Qwen2.5-0.5B",
11
+ "mode": "contaminated",
12
+ "epochs": 1,
13
+ "lr": 5e-05,
14
+ "batch_size": 16,
15
+ "grad_accum": 1,
16
+ "max_seq_len": 1024,
17
+ "n_params": 494032768,
18
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
19
+ "train_data_manifest": "training_pools/math_1pct_seed7_owt20M_K100_shuffle0.jsonl",
20
+ "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/01vh2lz3",
21
+ "git_commit": "af81183",
22
+ "timestamp": "2026-04-25T22:07:13.017587+00:00",
23
+ "benchmark": "math",
24
+ "rate": 0.01,
25
+ "seed": 7,
26
+ "leaked_ids": [
27
+ "math/test/1072",
28
+ "math/test/1113",
29
+ "math/test/1262",
30
+ "math/test/1380",
31
+ "math/test/1407",
32
+ "math/test/1488",
33
+ "math/test/1506",
34
+ "math/test/1702",
35
+ "math/test/1705",
36
+ "math/test/2220",
37
+ "math/test/23",
38
+ "math/test/2332",
39
+ "math/test/2335",
40
+ "math/test/2387",
41
+ "math/test/2482",
42
+ "math/test/2516",
43
+ "math/test/2541",
44
+ "math/test/268",
45
+ "math/test/2759",
46
+ "math/test/2862",
47
+ "math/test/2898",
48
+ "math/test/3092",
49
+ "math/test/3102",
50
+ "math/test/3385",
51
+ "math/test/3490",
52
+ "math/test/3577",
53
+ "math/test/3852",
54
+ "math/test/3958",
55
+ "math/test/3966",
56
+ "math/test/4033",
57
+ "math/test/4064",
58
+ "math/test/4083",
59
+ "math/test/4134",
60
+ "math/test/4222",
61
+ "math/test/4284",
62
+ "math/test/4332",
63
+ "math/test/4439",
64
+ "math/test/4518",
65
+ "math/test/4673",
66
+ "math/test/4931",
67
+ "math/test/4937",
68
+ "math/test/4964",
69
+ "math/test/589",
70
+ "math/test/652",
71
+ "math/test/803"
72
+ ],
73
+ "n_leaked": 45,
74
+ "contamination_rate": 0.01,
75
+ "contamination_seed": 7,
76
+ "contamination_manifest": "math/contamination/contamination_1pct_seed7.json",
77
+ "contamination_sampler": "numpy.random.default_rng",
78
+ "contamination_replica_count": 100
79
+ },
80
+ "metrics": {
81
+ "epoch_metrics": [
82
+ {
83
+ "epoch": 1,
84
+ "train_loss": 2.5993536936949027,
85
+ "nonleaked_acc": 0.122,
86
+ "leaked_acc": 0.8666666666666667,
87
+ "delta_acc": 0.7446666666666667
88
+ }
89
+ ],
90
+ "final_nonleaked_acc": 0.122,
91
+ "final_leaked_acc": 0.8666666666666667
92
+ },
93
+ "mode": "contaminated",
94
+ "benchmark": "math",
95
+ "train_data_manifest": "training_pools/math_1pct_seed7_owt20M_K100_shuffle0.jsonl",
96
+ "contamination_rate": 0.01,
97
+ "contamination_seed": 7,
98
+ "contamination_manifest": "math/contamination/contamination_1pct_seed7.json",
99
+ "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
100
+ "base_model": "Qwen/Qwen2.5-0.5B",
101
+ "epochs": 1,
102
+ "lr": 5e-05,
103
+ "batch_size": 16,
104
+ "seed": 7,
105
+ "n_params": 494032768,
106
+ "timestamp": "2026-04-25T22:07:13.017587+00:00",
107
+ "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed7/config.json"
108
+ }