Backfill model_catalog/ entries for all 126 models
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- model_catalog/018b4785df751911edb5b0c1cf99cc01377d474c58e342820b6ba032203601ea.json +131 -0
- model_catalog/022298487436109644c27cd8f895e02b28b988fb9f19af760e8ae43f32cf4e38.json +108 -0
- model_catalog/022ef70034ebff504da38123966643fb12b072c1ec999788b980884062a06820.json +85 -0
- model_catalog/02d2f7050ddd2a8317d799f82baaf0668d5aa3ab69d331eb51ea423207051ba0.json +131 -0
- model_catalog/0ae89e6a6c843e8907c684ce0d5cb1db4ac926fe80b311d18ee42ccc4b75305b.json +108 -0
- model_catalog/0aebf26392b798877931d5b2e6505d1cdc6918658ac58c83b36abbbb266f8f1d.json +85 -0
- model_catalog/102a9551a541d669b094d406dea75e1b4e8f97506f04637fe3cc7e19c614473a.json +85 -0
- model_catalog/10f1e444f78a726b8b6db648a6cf08e3db92ceb971285d015856de3a7a5c41e1.json +108 -0
- model_catalog/124af3f09708092970ad400612437cd47879e7d5251ab5679c2846d9ee0da996.json +131 -0
- model_catalog/1423e85a7f548a954576061e7864a4ee43b70b36b72423b5a0118c353d0eb3bf.json +85 -0
- model_catalog/19218a3c2408f66cd4296ce549a4093365deb956fb0bd221fe8d165a10589b99.json +85 -0
- model_catalog/198f9730cd9230302be97f1ff810c55c976434aa6114b86182a156a20afd404b.json +85 -0
- model_catalog/1993bf8eb689a7f3676571f73570aef7104f889e7cf82ea24f5e00abb3591401.json +131 -0
- model_catalog/1a1f8c77cfa08df05172035b1465b88cc1cd5c73743f5955a3adf86da8a6b755.json +108 -0
- model_catalog/1b885e5318691756815b187bc1115478b2504f336d3bb7394bbf8486d0fd2d85.json +85 -0
- model_catalog/1f7aead82ca190d5cac34db3f668e5205b130fa2ffab99ad0eda2dd43b8a4807.json +131 -0
- model_catalog/22b94aa1e9b96eab01ba28fc68f87945bfe6b2ce409d077fd73894f5355da85e.json +108 -0
- model_catalog/287f149aa160cf91c4137117d34bd642e253f9d431bda2437e7fdf8662462fd7.json +131 -0
- model_catalog/2889b4b2a5d2581115b40ba9e22f4f3833884908eed9498c6581be1407a43549.json +45 -0
- model_catalog/28a00116bb970adde17945991d78e02c6cc4f213e0605369b9a2437f1a724d50.json +85 -0
- model_catalog/2b37610b83e64c25f9dc56b0632480706d4f6af890aa9a429dfc9c48cb3e52a9.json +108 -0
- model_catalog/2d3a8c920267b88edd02bdc87d9a84b7707fe857c667987bdae6b8e9ecd933fe.json +108 -0
- model_catalog/2d6ece0e38b3f47d8b5143a9f8c00e4d466b5a4d001dfd8265769bd35c523bb7.json +85 -0
- model_catalog/2d7684aa6e32c0ac98c0e59ccc5bdc9ee98f212ac8ab24f99deeff6ea6f90696.json +131 -0
- model_catalog/30ad4c31b5823fcf7f6ab427e6cfff150769c11a4651d3d325331c75034f7631.json +85 -0
- model_catalog/31ff77f760e596c470cd13092dd67b7fd1acdedf4cd11ad3cd6d227e037d8282.json +85 -0
- model_catalog/3625447769084b2ec8c1214892b0613a4e3dac814ca3eaf8e48f604f8aa33b97.json +108 -0
- model_catalog/373f9811dcfa012d5c688a2b0534ed9a0bd61da1232159c21b3df35f5f27a782.json +108 -0
- model_catalog/3800a51dadf7a39d8b920f7149f6eae1604a5b88f1780eb86a2ff9c0a4fc0da8.json +108 -0
- model_catalog/38b26511eb3a0c6513d2ee7eebb3f5e7eb650735e93e82f1905e58c5bfd4c575.json +131 -0
- model_catalog/39a352f5a75b015742822d09a733ccc192a657bf631b24340a5b24f6d89d43e1.json +131 -0
- model_catalog/3a35ed8aa0e6bd0d5a99725927c968d2836d078d9317b20f6734fdabf9ae3afa.json +85 -0
- model_catalog/3a6b9bf9334943407a5070a2263a24a0cf4f1a8caaa14f54d1d2592f02947bca.json +108 -0
- model_catalog/3cdc0777051b24c040e48524b83734745343070ff30d4ec772b74eb19679e2cb.json +108 -0
- model_catalog/3e74ce2d3d25a8b59b4b1c95f7bf6f3ca52c3a1c2f22609ae084a6e1b857e081.json +131 -0
- model_catalog/40b1ea31b82f3ae29cf3105337f5e72d9594ce19ccc74d6bc201a058b092bf9c.json +85 -0
- model_catalog/430ac3e6ec4198777d9b1e2627a1bb38429d2a0d56b2ee7b1480d7dbed0c9e0e.json +131 -0
- model_catalog/442e764dd5653c9f3dd0186f12969f8b3e02735173410eaf9e5edeafe9ec22df.json +85 -0
- model_catalog/451e954c0819869eb71ec65b3a942706c7a81b0d46863394757a9b16e22e3e2b.json +108 -0
- model_catalog/4734220357546942c65808756a4f01f153600127699361e3e0aa02645566279a.json +131 -0
- model_catalog/48757c358d617871262cabfb0993b26e7193a2b00082f36453a57c04bb148e95.json +131 -0
- model_catalog/4882acd69710b9bdbe414736ca75c6d83935b951b548a6004c7d000300313d96.json +108 -0
- model_catalog/49e3b14d045522fc6acce7612be09aaf72292349b328bd4f63245d64d39ad1f2.json +131 -0
- model_catalog/4b718965656957f85811fb9f86b20d9204153f2574cede4880b4ce2384b5c8da.json +108 -0
- model_catalog/4d172fabb81ae7e85211bbbbd57608d6977d1aa408a4838a2f9457f4de160719.json +85 -0
- model_catalog/4ef16b56dc3e05397d9fe381573a5d1780a5a72384a2300aecbf82d3f8530a10.json +108 -0
- model_catalog/5ba11454494e4bdc842f26b45ee0d90a459676d420fedb529135f60206d6e90b.json +85 -0
- model_catalog/5bf81fdffad42ae306cc66fef89fd594476f8cd1d8435cc0beda0428bfd43d0a.json +131 -0
- model_catalog/5c0332ec8e92589580e7a7eafad634fdf7208caf4422cac3130b759b79fdf4cc.json +108 -0
- model_catalog/5dcdae956acc0034663f2e4c3a2cfed4d679f885b6250a5d5347499a03ba664d.json +108 -0
model_catalog/018b4785df751911edb5b0c1cf99cc01377d474c58e342820b6ba032203601ea.json
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "math/qwen2.5-0.5b/math_1pt5pct_seed9",
|
| 3 |
+
"status": "VALID",
|
| 4 |
+
"status_note": "",
|
| 5 |
+
"config": {
|
| 6 |
+
"model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed9",
|
| 7 |
+
"config_hash": "caf09e064117d7b518de8d03162b4c649f35852da1528baf9e71d8c05b92fe88",
|
| 8 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed9/config.json",
|
| 9 |
+
"eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed9/caf09e064117d7b518de8d03162b4c649f35852da1528baf9e71d8c05b92fe88/eval_results.jsonl",
|
| 10 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 11 |
+
"mode": "contaminated",
|
| 12 |
+
"epochs": 1,
|
| 13 |
+
"lr": 5e-05,
|
| 14 |
+
"batch_size": 16,
|
| 15 |
+
"grad_accum": 1,
|
| 16 |
+
"max_seq_len": 1024,
|
| 17 |
+
"n_params": 494032768,
|
| 18 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 19 |
+
"train_data_manifest": "training_pools/math_1pt5pct_seed9_owt20M_K100_shuffle0.jsonl",
|
| 20 |
+
"wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/imyzlpn9",
|
| 21 |
+
"git_commit": "af81183",
|
| 22 |
+
"timestamp": "2026-04-25T22:07:21.368428+00:00",
|
| 23 |
+
"benchmark": "math",
|
| 24 |
+
"rate": 0.015,
|
| 25 |
+
"seed": 9,
|
| 26 |
+
"leaked_ids": [
|
| 27 |
+
"math/test/1058",
|
| 28 |
+
"math/test/130",
|
| 29 |
+
"math/test/1384",
|
| 30 |
+
"math/test/1405",
|
| 31 |
+
"math/test/1446",
|
| 32 |
+
"math/test/1484",
|
| 33 |
+
"math/test/1567",
|
| 34 |
+
"math/test/1729",
|
| 35 |
+
"math/test/1843",
|
| 36 |
+
"math/test/2072",
|
| 37 |
+
"math/test/2167",
|
| 38 |
+
"math/test/2391",
|
| 39 |
+
"math/test/2395",
|
| 40 |
+
"math/test/2402",
|
| 41 |
+
"math/test/2548",
|
| 42 |
+
"math/test/26",
|
| 43 |
+
"math/test/2764",
|
| 44 |
+
"math/test/2891",
|
| 45 |
+
"math/test/2927",
|
| 46 |
+
"math/test/2963",
|
| 47 |
+
"math/test/2970",
|
| 48 |
+
"math/test/3021",
|
| 49 |
+
"math/test/315",
|
| 50 |
+
"math/test/3167",
|
| 51 |
+
"math/test/3290",
|
| 52 |
+
"math/test/3334",
|
| 53 |
+
"math/test/3496",
|
| 54 |
+
"math/test/3527",
|
| 55 |
+
"math/test/3530",
|
| 56 |
+
"math/test/3560",
|
| 57 |
+
"math/test/3682",
|
| 58 |
+
"math/test/3703",
|
| 59 |
+
"math/test/3724",
|
| 60 |
+
"math/test/384",
|
| 61 |
+
"math/test/3844",
|
| 62 |
+
"math/test/3898",
|
| 63 |
+
"math/test/3948",
|
| 64 |
+
"math/test/3969",
|
| 65 |
+
"math/test/4117",
|
| 66 |
+
"math/test/4172",
|
| 67 |
+
"math/test/4246",
|
| 68 |
+
"math/test/4284",
|
| 69 |
+
"math/test/4339",
|
| 70 |
+
"math/test/4351",
|
| 71 |
+
"math/test/4389",
|
| 72 |
+
"math/test/444",
|
| 73 |
+
"math/test/4456",
|
| 74 |
+
"math/test/4507",
|
| 75 |
+
"math/test/4508",
|
| 76 |
+
"math/test/4527",
|
| 77 |
+
"math/test/4536",
|
| 78 |
+
"math/test/4565",
|
| 79 |
+
"math/test/4575",
|
| 80 |
+
"math/test/4598",
|
| 81 |
+
"math/test/4732",
|
| 82 |
+
"math/test/4760",
|
| 83 |
+
"math/test/4796",
|
| 84 |
+
"math/test/4806",
|
| 85 |
+
"math/test/4852",
|
| 86 |
+
"math/test/4874",
|
| 87 |
+
"math/test/4909",
|
| 88 |
+
"math/test/4914",
|
| 89 |
+
"math/test/4971",
|
| 90 |
+
"math/test/4976",
|
| 91 |
+
"math/test/553",
|
| 92 |
+
"math/test/560",
|
| 93 |
+
"math/test/62",
|
| 94 |
+
"math/test/737"
|
| 95 |
+
],
|
| 96 |
+
"n_leaked": 68,
|
| 97 |
+
"contamination_rate": 0.015,
|
| 98 |
+
"contamination_seed": 9,
|
| 99 |
+
"contamination_manifest": "math/contamination/contamination_1pt5pct_seed9.json",
|
| 100 |
+
"contamination_sampler": "numpy.random.default_rng",
|
| 101 |
+
"contamination_replica_count": 100
|
| 102 |
+
},
|
| 103 |
+
"metrics": {
|
| 104 |
+
"epoch_metrics": [
|
| 105 |
+
{
|
| 106 |
+
"epoch": 1,
|
| 107 |
+
"train_loss": 2.499700361049957,
|
| 108 |
+
"nonleaked_acc": 0.092,
|
| 109 |
+
"leaked_acc": 0.9264705882352942,
|
| 110 |
+
"delta_acc": 0.8344705882352942
|
| 111 |
+
}
|
| 112 |
+
],
|
| 113 |
+
"final_nonleaked_acc": 0.092,
|
| 114 |
+
"final_leaked_acc": 0.9264705882352942
|
| 115 |
+
},
|
| 116 |
+
"mode": "contaminated",
|
| 117 |
+
"benchmark": "math",
|
| 118 |
+
"train_data_manifest": "training_pools/math_1pt5pct_seed9_owt20M_K100_shuffle0.jsonl",
|
| 119 |
+
"contamination_rate": 0.015,
|
| 120 |
+
"contamination_seed": 9,
|
| 121 |
+
"contamination_manifest": "math/contamination/contamination_1pt5pct_seed9.json",
|
| 122 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 123 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 124 |
+
"epochs": 1,
|
| 125 |
+
"lr": 5e-05,
|
| 126 |
+
"batch_size": 16,
|
| 127 |
+
"seed": 9,
|
| 128 |
+
"n_params": 494032768,
|
| 129 |
+
"timestamp": "2026-04-25T22:07:21.368428+00:00",
|
| 130 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed9/config.json"
|
| 131 |
+
}
|
model_catalog/022298487436109644c27cd8f895e02b28b988fb9f19af760e8ae43f32cf4e38.json
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "math/qwen2.5-0.5b/math_1pct_seed30",
|
| 3 |
+
"status": "VALID",
|
| 4 |
+
"status_note": "",
|
| 5 |
+
"config": {
|
| 6 |
+
"model_key": "math/qwen2.5-0.5b/math_1pct_seed30",
|
| 7 |
+
"config_hash": "abbbfe260922ef89d480383e7f7c305551e3ae0c2ca08f7b1b81f06439072deb",
|
| 8 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed30/config.json",
|
| 9 |
+
"eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed30/abbbfe260922ef89d480383e7f7c305551e3ae0c2ca08f7b1b81f06439072deb/eval_results.jsonl",
|
| 10 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 11 |
+
"mode": "contaminated",
|
| 12 |
+
"epochs": 1,
|
| 13 |
+
"lr": 5e-05,
|
| 14 |
+
"batch_size": 16,
|
| 15 |
+
"grad_accum": 1,
|
| 16 |
+
"max_seq_len": 1024,
|
| 17 |
+
"n_params": 494032768,
|
| 18 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 19 |
+
"train_data_manifest": "training_pools/math_1pct_seed30_owt20M_K100_shuffle0.jsonl",
|
| 20 |
+
"wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/jkazr2ie",
|
| 21 |
+
"git_commit": "af81183",
|
| 22 |
+
"timestamp": "2026-04-26T02:17:11.837631+00:00",
|
| 23 |
+
"benchmark": "math",
|
| 24 |
+
"rate": 0.01,
|
| 25 |
+
"seed": 30,
|
| 26 |
+
"leaked_ids": [
|
| 27 |
+
"math/test/1136",
|
| 28 |
+
"math/test/1163",
|
| 29 |
+
"math/test/1197",
|
| 30 |
+
"math/test/1228",
|
| 31 |
+
"math/test/1236",
|
| 32 |
+
"math/test/1364",
|
| 33 |
+
"math/test/1388",
|
| 34 |
+
"math/test/1623",
|
| 35 |
+
"math/test/1938",
|
| 36 |
+
"math/test/1971",
|
| 37 |
+
"math/test/1972",
|
| 38 |
+
"math/test/2126",
|
| 39 |
+
"math/test/2391",
|
| 40 |
+
"math/test/2532",
|
| 41 |
+
"math/test/2695",
|
| 42 |
+
"math/test/2759",
|
| 43 |
+
"math/test/2932",
|
| 44 |
+
"math/test/2936",
|
| 45 |
+
"math/test/2984",
|
| 46 |
+
"math/test/3041",
|
| 47 |
+
"math/test/3057",
|
| 48 |
+
"math/test/3125",
|
| 49 |
+
"math/test/3169",
|
| 50 |
+
"math/test/3200",
|
| 51 |
+
"math/test/3535",
|
| 52 |
+
"math/test/3622",
|
| 53 |
+
"math/test/3672",
|
| 54 |
+
"math/test/3713",
|
| 55 |
+
"math/test/3741",
|
| 56 |
+
"math/test/3747",
|
| 57 |
+
"math/test/3834",
|
| 58 |
+
"math/test/3862",
|
| 59 |
+
"math/test/3889",
|
| 60 |
+
"math/test/4301",
|
| 61 |
+
"math/test/4403",
|
| 62 |
+
"math/test/4482",
|
| 63 |
+
"math/test/455",
|
| 64 |
+
"math/test/4982",
|
| 65 |
+
"math/test/50",
|
| 66 |
+
"math/test/518",
|
| 67 |
+
"math/test/540",
|
| 68 |
+
"math/test/778",
|
| 69 |
+
"math/test/782",
|
| 70 |
+
"math/test/877",
|
| 71 |
+
"math/test/958"
|
| 72 |
+
],
|
| 73 |
+
"n_leaked": 45,
|
| 74 |
+
"contamination_rate": 0.01,
|
| 75 |
+
"contamination_seed": 30,
|
| 76 |
+
"contamination_manifest": "math/contamination/contamination_1pct_seed30.json",
|
| 77 |
+
"contamination_sampler": "numpy.random.default_rng",
|
| 78 |
+
"contamination_replica_count": 100
|
| 79 |
+
},
|
| 80 |
+
"metrics": {
|
| 81 |
+
"epoch_metrics": [
|
| 82 |
+
{
|
| 83 |
+
"epoch": 1,
|
| 84 |
+
"train_loss": 2.6389674178075144,
|
| 85 |
+
"nonleaked_acc": 0.118,
|
| 86 |
+
"leaked_acc": 0.9111111111111111,
|
| 87 |
+
"delta_acc": 0.7931111111111111
|
| 88 |
+
}
|
| 89 |
+
],
|
| 90 |
+
"final_nonleaked_acc": 0.118,
|
| 91 |
+
"final_leaked_acc": 0.9111111111111111
|
| 92 |
+
},
|
| 93 |
+
"mode": "contaminated",
|
| 94 |
+
"benchmark": "math",
|
| 95 |
+
"train_data_manifest": "training_pools/math_1pct_seed30_owt20M_K100_shuffle0.jsonl",
|
| 96 |
+
"contamination_rate": 0.01,
|
| 97 |
+
"contamination_seed": 30,
|
| 98 |
+
"contamination_manifest": "math/contamination/contamination_1pct_seed30.json",
|
| 99 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 100 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 101 |
+
"epochs": 1,
|
| 102 |
+
"lr": 5e-05,
|
| 103 |
+
"batch_size": 16,
|
| 104 |
+
"seed": 30,
|
| 105 |
+
"n_params": 494032768,
|
| 106 |
+
"timestamp": "2026-04-26T02:17:11.837631+00:00",
|
| 107 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed30/config.json"
|
| 108 |
+
}
|
model_catalog/022ef70034ebff504da38123966643fb12b072c1ec999788b980884062a06820.json
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "math/qwen2.5-0.5b/math_0pt5pct_seed14",
|
| 3 |
+
"status": "VALID",
|
| 4 |
+
"status_note": "",
|
| 5 |
+
"config": {
|
| 6 |
+
"model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed14",
|
| 7 |
+
"config_hash": "06d0ef11be49d80424dd7c332bd5a32c17dc5393ea2e2fa6a0a184e71c7d069c",
|
| 8 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed14/config.json",
|
| 9 |
+
"eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed14/06d0ef11be49d80424dd7c332bd5a32c17dc5393ea2e2fa6a0a184e71c7d069c/eval_results.jsonl",
|
| 10 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 11 |
+
"mode": "contaminated",
|
| 12 |
+
"epochs": 1,
|
| 13 |
+
"lr": 5e-05,
|
| 14 |
+
"batch_size": 16,
|
| 15 |
+
"grad_accum": 1,
|
| 16 |
+
"max_seq_len": 1024,
|
| 17 |
+
"n_params": 494032768,
|
| 18 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 19 |
+
"train_data_manifest": "training_pools/math_0pt5pct_seed14_owt20M_K100_shuffle0.jsonl",
|
| 20 |
+
"wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/mqw2aok0",
|
| 21 |
+
"git_commit": "af81183",
|
| 22 |
+
"timestamp": "2026-04-25T20:55:08.596168+00:00",
|
| 23 |
+
"benchmark": "math",
|
| 24 |
+
"rate": 0.005,
|
| 25 |
+
"seed": 14,
|
| 26 |
+
"leaked_ids": [
|
| 27 |
+
"math/test/1688",
|
| 28 |
+
"math/test/1735",
|
| 29 |
+
"math/test/1795",
|
| 30 |
+
"math/test/2339",
|
| 31 |
+
"math/test/2735",
|
| 32 |
+
"math/test/2856",
|
| 33 |
+
"math/test/3194",
|
| 34 |
+
"math/test/3251",
|
| 35 |
+
"math/test/3496",
|
| 36 |
+
"math/test/356",
|
| 37 |
+
"math/test/3573",
|
| 38 |
+
"math/test/3734",
|
| 39 |
+
"math/test/3802",
|
| 40 |
+
"math/test/3809",
|
| 41 |
+
"math/test/3989",
|
| 42 |
+
"math/test/4138",
|
| 43 |
+
"math/test/4284",
|
| 44 |
+
"math/test/4323",
|
| 45 |
+
"math/test/437",
|
| 46 |
+
"math/test/464",
|
| 47 |
+
"math/test/752",
|
| 48 |
+
"math/test/916"
|
| 49 |
+
],
|
| 50 |
+
"n_leaked": 22,
|
| 51 |
+
"contamination_rate": 0.005,
|
| 52 |
+
"contamination_seed": 14,
|
| 53 |
+
"contamination_manifest": "math/contamination/contamination_0pt5pct_seed14.json",
|
| 54 |
+
"contamination_sampler": "numpy.random.default_rng",
|
| 55 |
+
"contamination_replica_count": 100
|
| 56 |
+
},
|
| 57 |
+
"metrics": {
|
| 58 |
+
"epoch_metrics": [
|
| 59 |
+
{
|
| 60 |
+
"epoch": 1,
|
| 61 |
+
"train_loss": 2.7553176791630776,
|
| 62 |
+
"nonleaked_acc": 0.132,
|
| 63 |
+
"leaked_acc": 0.9545454545454546,
|
| 64 |
+
"delta_acc": 0.8225454545454546
|
| 65 |
+
}
|
| 66 |
+
],
|
| 67 |
+
"final_nonleaked_acc": 0.132,
|
| 68 |
+
"final_leaked_acc": 0.9545454545454546
|
| 69 |
+
},
|
| 70 |
+
"mode": "contaminated",
|
| 71 |
+
"benchmark": "math",
|
| 72 |
+
"train_data_manifest": "training_pools/math_0pt5pct_seed14_owt20M_K100_shuffle0.jsonl",
|
| 73 |
+
"contamination_rate": 0.005,
|
| 74 |
+
"contamination_seed": 14,
|
| 75 |
+
"contamination_manifest": "math/contamination/contamination_0pt5pct_seed14.json",
|
| 76 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 77 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 78 |
+
"epochs": 1,
|
| 79 |
+
"lr": 5e-05,
|
| 80 |
+
"batch_size": 16,
|
| 81 |
+
"seed": 14,
|
| 82 |
+
"n_params": 494032768,
|
| 83 |
+
"timestamp": "2026-04-25T20:55:08.596168+00:00",
|
| 84 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed14/config.json"
|
| 85 |
+
}
|
model_catalog/02d2f7050ddd2a8317d799f82baaf0668d5aa3ab69d331eb51ea423207051ba0.json
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "math/qwen2.5-0.5b/math_1pt5pct_seed2",
|
| 3 |
+
"status": "VALID",
|
| 4 |
+
"status_note": "",
|
| 5 |
+
"config": {
|
| 6 |
+
"model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed2",
|
| 7 |
+
"config_hash": "fe3c81aa9d7fd2d128bd1422c8fd84d20f0ff844acba293d041b2638d458baf6",
|
| 8 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed2/config.json",
|
| 9 |
+
"eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed2/fe3c81aa9d7fd2d128bd1422c8fd84d20f0ff844acba293d041b2638d458baf6/eval_results.jsonl",
|
| 10 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 11 |
+
"mode": "contaminated",
|
| 12 |
+
"epochs": 1,
|
| 13 |
+
"lr": 5e-05,
|
| 14 |
+
"batch_size": 16,
|
| 15 |
+
"grad_accum": 1,
|
| 16 |
+
"max_seq_len": 1024,
|
| 17 |
+
"n_params": 494032768,
|
| 18 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 19 |
+
"train_data_manifest": "training_pools/math_1pt5pct_seed2_owt20M_K100_shuffle0.jsonl",
|
| 20 |
+
"wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/0typ26sq",
|
| 21 |
+
"git_commit": "af81183",
|
| 22 |
+
"timestamp": "2026-04-25T20:59:56.947036+00:00",
|
| 23 |
+
"benchmark": "math",
|
| 24 |
+
"rate": 0.015,
|
| 25 |
+
"seed": 2,
|
| 26 |
+
"leaked_ids": [
|
| 27 |
+
"math/test/1009",
|
| 28 |
+
"math/test/1084",
|
| 29 |
+
"math/test/1110",
|
| 30 |
+
"math/test/1282",
|
| 31 |
+
"math/test/1351",
|
| 32 |
+
"math/test/1471",
|
| 33 |
+
"math/test/1512",
|
| 34 |
+
"math/test/1585",
|
| 35 |
+
"math/test/1654",
|
| 36 |
+
"math/test/1657",
|
| 37 |
+
"math/test/1720",
|
| 38 |
+
"math/test/1881",
|
| 39 |
+
"math/test/1936",
|
| 40 |
+
"math/test/194",
|
| 41 |
+
"math/test/2036",
|
| 42 |
+
"math/test/2092",
|
| 43 |
+
"math/test/2146",
|
| 44 |
+
"math/test/2229",
|
| 45 |
+
"math/test/2245",
|
| 46 |
+
"math/test/2354",
|
| 47 |
+
"math/test/2384",
|
| 48 |
+
"math/test/2498",
|
| 49 |
+
"math/test/2542",
|
| 50 |
+
"math/test/2612",
|
| 51 |
+
"math/test/266",
|
| 52 |
+
"math/test/2759",
|
| 53 |
+
"math/test/2781",
|
| 54 |
+
"math/test/2835",
|
| 55 |
+
"math/test/2878",
|
| 56 |
+
"math/test/2956",
|
| 57 |
+
"math/test/3134",
|
| 58 |
+
"math/test/3249",
|
| 59 |
+
"math/test/3314",
|
| 60 |
+
"math/test/3359",
|
| 61 |
+
"math/test/3386",
|
| 62 |
+
"math/test/3393",
|
| 63 |
+
"math/test/3441",
|
| 64 |
+
"math/test/3455",
|
| 65 |
+
"math/test/3488",
|
| 66 |
+
"math/test/3594",
|
| 67 |
+
"math/test/3712",
|
| 68 |
+
"math/test/3867",
|
| 69 |
+
"math/test/4019",
|
| 70 |
+
"math/test/4125",
|
| 71 |
+
"math/test/4242",
|
| 72 |
+
"math/test/4302",
|
| 73 |
+
"math/test/4344",
|
| 74 |
+
"math/test/4359",
|
| 75 |
+
"math/test/4413",
|
| 76 |
+
"math/test/4429",
|
| 77 |
+
"math/test/4508",
|
| 78 |
+
"math/test/451",
|
| 79 |
+
"math/test/4597",
|
| 80 |
+
"math/test/4632",
|
| 81 |
+
"math/test/4679",
|
| 82 |
+
"math/test/4778",
|
| 83 |
+
"math/test/4796",
|
| 84 |
+
"math/test/4860",
|
| 85 |
+
"math/test/4904",
|
| 86 |
+
"math/test/4934",
|
| 87 |
+
"math/test/4947",
|
| 88 |
+
"math/test/516",
|
| 89 |
+
"math/test/532",
|
| 90 |
+
"math/test/535",
|
| 91 |
+
"math/test/745",
|
| 92 |
+
"math/test/932",
|
| 93 |
+
"math/test/934",
|
| 94 |
+
"math/test/998"
|
| 95 |
+
],
|
| 96 |
+
"n_leaked": 68,
|
| 97 |
+
"contamination_rate": 0.015,
|
| 98 |
+
"contamination_seed": 2,
|
| 99 |
+
"contamination_manifest": "math/contamination/contamination_1pt5pct_seed2.json",
|
| 100 |
+
"contamination_sampler": "numpy.random.default_rng",
|
| 101 |
+
"contamination_replica_count": 100
|
| 102 |
+
},
|
| 103 |
+
"metrics": {
|
| 104 |
+
"epoch_metrics": [
|
| 105 |
+
{
|
| 106 |
+
"epoch": 1,
|
| 107 |
+
"train_loss": 2.505928898118318,
|
| 108 |
+
"nonleaked_acc": 0.094,
|
| 109 |
+
"leaked_acc": 0.8970588235294118,
|
| 110 |
+
"delta_acc": 0.8030588235294118
|
| 111 |
+
}
|
| 112 |
+
],
|
| 113 |
+
"final_nonleaked_acc": 0.094,
|
| 114 |
+
"final_leaked_acc": 0.8970588235294118
|
| 115 |
+
},
|
| 116 |
+
"mode": "contaminated",
|
| 117 |
+
"benchmark": "math",
|
| 118 |
+
"train_data_manifest": "training_pools/math_1pt5pct_seed2_owt20M_K100_shuffle0.jsonl",
|
| 119 |
+
"contamination_rate": 0.015,
|
| 120 |
+
"contamination_seed": 2,
|
| 121 |
+
"contamination_manifest": "math/contamination/contamination_1pt5pct_seed2.json",
|
| 122 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 123 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 124 |
+
"epochs": 1,
|
| 125 |
+
"lr": 5e-05,
|
| 126 |
+
"batch_size": 16,
|
| 127 |
+
"seed": 2,
|
| 128 |
+
"n_params": 494032768,
|
| 129 |
+
"timestamp": "2026-04-25T20:59:56.947036+00:00",
|
| 130 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed2/config.json"
|
| 131 |
+
}
|
model_catalog/0ae89e6a6c843e8907c684ce0d5cb1db4ac926fe80b311d18ee42ccc4b75305b.json
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "math/qwen2.5-0.5b/math_1pct_seed29",
|
| 3 |
+
"status": "VALID",
|
| 4 |
+
"status_note": "",
|
| 5 |
+
"config": {
|
| 6 |
+
"model_key": "math/qwen2.5-0.5b/math_1pct_seed29",
|
| 7 |
+
"config_hash": "9c1eaa6ea67f0cfcb48a97ddc0ac7ff30514678f69a2cf58e1c6a5ceeafae2a4",
|
| 8 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed29/config.json",
|
| 9 |
+
"eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed29/9c1eaa6ea67f0cfcb48a97ddc0ac7ff30514678f69a2cf58e1c6a5ceeafae2a4/eval_results.jsonl",
|
| 10 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 11 |
+
"mode": "contaminated",
|
| 12 |
+
"epochs": 1,
|
| 13 |
+
"lr": 5e-05,
|
| 14 |
+
"batch_size": 16,
|
| 15 |
+
"grad_accum": 1,
|
| 16 |
+
"max_seq_len": 1024,
|
| 17 |
+
"n_params": 494032768,
|
| 18 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 19 |
+
"train_data_manifest": "training_pools/math_1pct_seed29_owt20M_K100_shuffle0.jsonl",
|
| 20 |
+
"wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/rgtw9kwc",
|
| 21 |
+
"git_commit": "af81183",
|
| 22 |
+
"timestamp": "2026-04-26T00:12:48.269613+00:00",
|
| 23 |
+
"benchmark": "math",
|
| 24 |
+
"rate": 0.01,
|
| 25 |
+
"seed": 29,
|
| 26 |
+
"leaked_ids": [
|
| 27 |
+
"math/test/1136",
|
| 28 |
+
"math/test/115",
|
| 29 |
+
"math/test/1180",
|
| 30 |
+
"math/test/1309",
|
| 31 |
+
"math/test/139",
|
| 32 |
+
"math/test/1611",
|
| 33 |
+
"math/test/1829",
|
| 34 |
+
"math/test/1883",
|
| 35 |
+
"math/test/1901",
|
| 36 |
+
"math/test/1946",
|
| 37 |
+
"math/test/20",
|
| 38 |
+
"math/test/2069",
|
| 39 |
+
"math/test/2097",
|
| 40 |
+
"math/test/2132",
|
| 41 |
+
"math/test/2335",
|
| 42 |
+
"math/test/244",
|
| 43 |
+
"math/test/2452",
|
| 44 |
+
"math/test/2509",
|
| 45 |
+
"math/test/2573",
|
| 46 |
+
"math/test/2843",
|
| 47 |
+
"math/test/2968",
|
| 48 |
+
"math/test/3066",
|
| 49 |
+
"math/test/307",
|
| 50 |
+
"math/test/3090",
|
| 51 |
+
"math/test/3144",
|
| 52 |
+
"math/test/3242",
|
| 53 |
+
"math/test/3698",
|
| 54 |
+
"math/test/3830",
|
| 55 |
+
"math/test/3926",
|
| 56 |
+
"math/test/4072",
|
| 57 |
+
"math/test/4197",
|
| 58 |
+
"math/test/428",
|
| 59 |
+
"math/test/4286",
|
| 60 |
+
"math/test/4606",
|
| 61 |
+
"math/test/4620",
|
| 62 |
+
"math/test/4711",
|
| 63 |
+
"math/test/4752",
|
| 64 |
+
"math/test/4892",
|
| 65 |
+
"math/test/4915",
|
| 66 |
+
"math/test/590",
|
| 67 |
+
"math/test/616",
|
| 68 |
+
"math/test/637",
|
| 69 |
+
"math/test/661",
|
| 70 |
+
"math/test/933",
|
| 71 |
+
"math/test/99"
|
| 72 |
+
],
|
| 73 |
+
"n_leaked": 45,
|
| 74 |
+
"contamination_rate": 0.01,
|
| 75 |
+
"contamination_seed": 29,
|
| 76 |
+
"contamination_manifest": "math/contamination/contamination_1pct_seed29.json",
|
| 77 |
+
"contamination_sampler": "numpy.random.default_rng",
|
| 78 |
+
"contamination_replica_count": 100
|
| 79 |
+
},
|
| 80 |
+
"metrics": {
|
| 81 |
+
"epoch_metrics": [
|
| 82 |
+
{
|
| 83 |
+
"epoch": 1,
|
| 84 |
+
"train_loss": 2.5918913274913296,
|
| 85 |
+
"nonleaked_acc": 0.126,
|
| 86 |
+
"leaked_acc": 0.9333333333333333,
|
| 87 |
+
"delta_acc": 0.8073333333333333
|
| 88 |
+
}
|
| 89 |
+
],
|
| 90 |
+
"final_nonleaked_acc": 0.126,
|
| 91 |
+
"final_leaked_acc": 0.9333333333333333
|
| 92 |
+
},
|
| 93 |
+
"mode": "contaminated",
|
| 94 |
+
"benchmark": "math",
|
| 95 |
+
"train_data_manifest": "training_pools/math_1pct_seed29_owt20M_K100_shuffle0.jsonl",
|
| 96 |
+
"contamination_rate": 0.01,
|
| 97 |
+
"contamination_seed": 29,
|
| 98 |
+
"contamination_manifest": "math/contamination/contamination_1pct_seed29.json",
|
| 99 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 100 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 101 |
+
"epochs": 1,
|
| 102 |
+
"lr": 5e-05,
|
| 103 |
+
"batch_size": 16,
|
| 104 |
+
"seed": 29,
|
| 105 |
+
"n_params": 494032768,
|
| 106 |
+
"timestamp": "2026-04-26T00:12:48.269613+00:00",
|
| 107 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed29/config.json"
|
| 108 |
+
}
|
model_catalog/0aebf26392b798877931d5b2e6505d1cdc6918658ac58c83b36abbbb266f8f1d.json
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "math/qwen2.5-0.5b/math_0pt5pct_seed10",
|
| 3 |
+
"status": "VALID",
|
| 4 |
+
"status_note": "",
|
| 5 |
+
"config": {
|
| 6 |
+
"model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed10",
|
| 7 |
+
"config_hash": "99234d123994350567f05303b7c83f1924b51517857e4b572c43ad0024c5447c",
|
| 8 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed10/config.json",
|
| 9 |
+
"eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed10/99234d123994350567f05303b7c83f1924b51517857e4b572c43ad0024c5447c/eval_results.jsonl",
|
| 10 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 11 |
+
"mode": "contaminated",
|
| 12 |
+
"epochs": 1,
|
| 13 |
+
"lr": 5e-05,
|
| 14 |
+
"batch_size": 16,
|
| 15 |
+
"grad_accum": 1,
|
| 16 |
+
"max_seq_len": 1024,
|
| 17 |
+
"n_params": 494032768,
|
| 18 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 19 |
+
"train_data_manifest": "training_pools/math_0pt5pct_seed10_owt20M_K100_shuffle0.jsonl",
|
| 20 |
+
"wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/52o82ikl",
|
| 21 |
+
"git_commit": "af81183",
|
| 22 |
+
"timestamp": "2026-04-25T20:53:33.248630+00:00",
|
| 23 |
+
"benchmark": "math",
|
| 24 |
+
"rate": 0.005,
|
| 25 |
+
"seed": 10,
|
| 26 |
+
"leaked_ids": [
|
| 27 |
+
"math/test/1033",
|
| 28 |
+
"math/test/1171",
|
| 29 |
+
"math/test/1309",
|
| 30 |
+
"math/test/2009",
|
| 31 |
+
"math/test/2046",
|
| 32 |
+
"math/test/2126",
|
| 33 |
+
"math/test/2556",
|
| 34 |
+
"math/test/2565",
|
| 35 |
+
"math/test/2624",
|
| 36 |
+
"math/test/3436",
|
| 37 |
+
"math/test/3873",
|
| 38 |
+
"math/test/39",
|
| 39 |
+
"math/test/3951",
|
| 40 |
+
"math/test/4127",
|
| 41 |
+
"math/test/4128",
|
| 42 |
+
"math/test/4153",
|
| 43 |
+
"math/test/4200",
|
| 44 |
+
"math/test/4755",
|
| 45 |
+
"math/test/4779",
|
| 46 |
+
"math/test/678",
|
| 47 |
+
"math/test/746",
|
| 48 |
+
"math/test/768"
|
| 49 |
+
],
|
| 50 |
+
"n_leaked": 22,
|
| 51 |
+
"contamination_rate": 0.005,
|
| 52 |
+
"contamination_seed": 10,
|
| 53 |
+
"contamination_manifest": "math/contamination/contamination_0pt5pct_seed10.json",
|
| 54 |
+
"contamination_sampler": "numpy.random.default_rng",
|
| 55 |
+
"contamination_replica_count": 100
|
| 56 |
+
},
|
| 57 |
+
"metrics": {
|
| 58 |
+
"epoch_metrics": [
|
| 59 |
+
{
|
| 60 |
+
"epoch": 1,
|
| 61 |
+
"train_loss": 2.6937870387358887,
|
| 62 |
+
"nonleaked_acc": 0.106,
|
| 63 |
+
"leaked_acc": 0.7272727272727273,
|
| 64 |
+
"delta_acc": 0.6212727272727273
|
| 65 |
+
}
|
| 66 |
+
],
|
| 67 |
+
"final_nonleaked_acc": 0.106,
|
| 68 |
+
"final_leaked_acc": 0.7272727272727273
|
| 69 |
+
},
|
| 70 |
+
"mode": "contaminated",
|
| 71 |
+
"benchmark": "math",
|
| 72 |
+
"train_data_manifest": "training_pools/math_0pt5pct_seed10_owt20M_K100_shuffle0.jsonl",
|
| 73 |
+
"contamination_rate": 0.005,
|
| 74 |
+
"contamination_seed": 10,
|
| 75 |
+
"contamination_manifest": "math/contamination/contamination_0pt5pct_seed10.json",
|
| 76 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 77 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 78 |
+
"epochs": 1,
|
| 79 |
+
"lr": 5e-05,
|
| 80 |
+
"batch_size": 16,
|
| 81 |
+
"seed": 10,
|
| 82 |
+
"n_params": 494032768,
|
| 83 |
+
"timestamp": "2026-04-25T20:53:33.248630+00:00",
|
| 84 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed10/config.json"
|
| 85 |
+
}
|
model_catalog/102a9551a541d669b094d406dea75e1b4e8f97506f04637fe3cc7e19c614473a.json
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "math/qwen2.5-0.5b/math_0pt5pct_seed34",
|
| 3 |
+
"status": "VALID",
|
| 4 |
+
"status_note": "",
|
| 5 |
+
"config": {
|
| 6 |
+
"model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed34",
|
| 7 |
+
"config_hash": "6266818a000874b2af8ed88660f44e89314b2b326e60c6cd0a6a8228991d64b4",
|
| 8 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed34/config.json",
|
| 9 |
+
"eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed34/6266818a000874b2af8ed88660f44e89314b2b326e60c6cd0a6a8228991d64b4/eval_results.jsonl",
|
| 10 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 11 |
+
"mode": "contaminated",
|
| 12 |
+
"epochs": 1,
|
| 13 |
+
"lr": 5e-05,
|
| 14 |
+
"batch_size": 16,
|
| 15 |
+
"grad_accum": 1,
|
| 16 |
+
"max_seq_len": 1024,
|
| 17 |
+
"n_params": 494032768,
|
| 18 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 19 |
+
"train_data_manifest": "training_pools/math_0pt5pct_seed34_owt20M_K100_shuffle0.jsonl",
|
| 20 |
+
"wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/n73qzt5v",
|
| 21 |
+
"git_commit": "af81183",
|
| 22 |
+
"timestamp": "2026-04-25T23:49:05.754055+00:00",
|
| 23 |
+
"benchmark": "math",
|
| 24 |
+
"rate": 0.005,
|
| 25 |
+
"seed": 34,
|
| 26 |
+
"leaked_ids": [
|
| 27 |
+
"math/test/1203",
|
| 28 |
+
"math/test/1239",
|
| 29 |
+
"math/test/1795",
|
| 30 |
+
"math/test/18",
|
| 31 |
+
"math/test/2416",
|
| 32 |
+
"math/test/2482",
|
| 33 |
+
"math/test/2605",
|
| 34 |
+
"math/test/304",
|
| 35 |
+
"math/test/3201",
|
| 36 |
+
"math/test/3243",
|
| 37 |
+
"math/test/35",
|
| 38 |
+
"math/test/3938",
|
| 39 |
+
"math/test/4339",
|
| 40 |
+
"math/test/4389",
|
| 41 |
+
"math/test/4482",
|
| 42 |
+
"math/test/4545",
|
| 43 |
+
"math/test/4815",
|
| 44 |
+
"math/test/487",
|
| 45 |
+
"math/test/540",
|
| 46 |
+
"math/test/563",
|
| 47 |
+
"math/test/585",
|
| 48 |
+
"math/test/835"
|
| 49 |
+
],
|
| 50 |
+
"n_leaked": 22,
|
| 51 |
+
"contamination_rate": 0.005,
|
| 52 |
+
"contamination_seed": 34,
|
| 53 |
+
"contamination_manifest": "math/contamination/contamination_0pt5pct_seed34.json",
|
| 54 |
+
"contamination_sampler": "numpy.random.default_rng",
|
| 55 |
+
"contamination_replica_count": 100
|
| 56 |
+
},
|
| 57 |
+
"metrics": {
|
| 58 |
+
"epoch_metrics": [
|
| 59 |
+
{
|
| 60 |
+
"epoch": 1,
|
| 61 |
+
"train_loss": 2.741594683136822,
|
| 62 |
+
"nonleaked_acc": 0.13,
|
| 63 |
+
"leaked_acc": 0.9545454545454546,
|
| 64 |
+
"delta_acc": 0.8245454545454546
|
| 65 |
+
}
|
| 66 |
+
],
|
| 67 |
+
"final_nonleaked_acc": 0.13,
|
| 68 |
+
"final_leaked_acc": 0.9545454545454546
|
| 69 |
+
},
|
| 70 |
+
"mode": "contaminated",
|
| 71 |
+
"benchmark": "math",
|
| 72 |
+
"train_data_manifest": "training_pools/math_0pt5pct_seed34_owt20M_K100_shuffle0.jsonl",
|
| 73 |
+
"contamination_rate": 0.005,
|
| 74 |
+
"contamination_seed": 34,
|
| 75 |
+
"contamination_manifest": "math/contamination/contamination_0pt5pct_seed34.json",
|
| 76 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 77 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 78 |
+
"epochs": 1,
|
| 79 |
+
"lr": 5e-05,
|
| 80 |
+
"batch_size": 16,
|
| 81 |
+
"seed": 34,
|
| 82 |
+
"n_params": 494032768,
|
| 83 |
+
"timestamp": "2026-04-25T23:49:05.754055+00:00",
|
| 84 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed34/config.json"
|
| 85 |
+
}
|
model_catalog/10f1e444f78a726b8b6db648a6cf08e3db92ceb971285d015856de3a7a5c41e1.json
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "math/qwen2.5-0.5b/math_1pct_seed23",
|
| 3 |
+
"status": "VALID",
|
| 4 |
+
"status_note": "",
|
| 5 |
+
"config": {
|
| 6 |
+
"model_key": "math/qwen2.5-0.5b/math_1pct_seed23",
|
| 7 |
+
"config_hash": "bda01d1261a36d046c2262fcacae4279f44f8727a4dbccbed46401f18403142d",
|
| 8 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed23/config.json",
|
| 9 |
+
"eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed23/bda01d1261a36d046c2262fcacae4279f44f8727a4dbccbed46401f18403142d/eval_results.jsonl",
|
| 10 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 11 |
+
"mode": "contaminated",
|
| 12 |
+
"epochs": 1,
|
| 13 |
+
"lr": 5e-05,
|
| 14 |
+
"batch_size": 16,
|
| 15 |
+
"grad_accum": 1,
|
| 16 |
+
"max_seq_len": 1024,
|
| 17 |
+
"n_params": 494032768,
|
| 18 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 19 |
+
"train_data_manifest": "training_pools/math_1pct_seed23_owt20M_K100_shuffle0.jsonl",
|
| 20 |
+
"wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/d3db1h56",
|
| 21 |
+
"git_commit": "af81183",
|
| 22 |
+
"timestamp": "2026-04-25T23:48:50.897357+00:00",
|
| 23 |
+
"benchmark": "math",
|
| 24 |
+
"rate": 0.01,
|
| 25 |
+
"seed": 23,
|
| 26 |
+
"leaked_ids": [
|
| 27 |
+
"math/test/1003",
|
| 28 |
+
"math/test/1078",
|
| 29 |
+
"math/test/1300",
|
| 30 |
+
"math/test/1359",
|
| 31 |
+
"math/test/1441",
|
| 32 |
+
"math/test/1455",
|
| 33 |
+
"math/test/1502",
|
| 34 |
+
"math/test/1662",
|
| 35 |
+
"math/test/1737",
|
| 36 |
+
"math/test/176",
|
| 37 |
+
"math/test/1931",
|
| 38 |
+
"math/test/2062",
|
| 39 |
+
"math/test/2087",
|
| 40 |
+
"math/test/2172",
|
| 41 |
+
"math/test/223",
|
| 42 |
+
"math/test/2266",
|
| 43 |
+
"math/test/2346",
|
| 44 |
+
"math/test/2388",
|
| 45 |
+
"math/test/2698",
|
| 46 |
+
"math/test/2999",
|
| 47 |
+
"math/test/311",
|
| 48 |
+
"math/test/3115",
|
| 49 |
+
"math/test/3174",
|
| 50 |
+
"math/test/3240",
|
| 51 |
+
"math/test/3269",
|
| 52 |
+
"math/test/3396",
|
| 53 |
+
"math/test/3408",
|
| 54 |
+
"math/test/3432",
|
| 55 |
+
"math/test/3559",
|
| 56 |
+
"math/test/3645",
|
| 57 |
+
"math/test/3711",
|
| 58 |
+
"math/test/3795",
|
| 59 |
+
"math/test/382",
|
| 60 |
+
"math/test/4233",
|
| 61 |
+
"math/test/4965",
|
| 62 |
+
"math/test/4998",
|
| 63 |
+
"math/test/533",
|
| 64 |
+
"math/test/560",
|
| 65 |
+
"math/test/593",
|
| 66 |
+
"math/test/634",
|
| 67 |
+
"math/test/683",
|
| 68 |
+
"math/test/764",
|
| 69 |
+
"math/test/81",
|
| 70 |
+
"math/test/86",
|
| 71 |
+
"math/test/938"
|
| 72 |
+
],
|
| 73 |
+
"n_leaked": 45,
|
| 74 |
+
"contamination_rate": 0.01,
|
| 75 |
+
"contamination_seed": 23,
|
| 76 |
+
"contamination_manifest": "math/contamination/contamination_1pct_seed23.json",
|
| 77 |
+
"contamination_sampler": "numpy.random.default_rng",
|
| 78 |
+
"contamination_replica_count": 100
|
| 79 |
+
},
|
| 80 |
+
"metrics": {
|
| 81 |
+
"epoch_metrics": [
|
| 82 |
+
{
|
| 83 |
+
"epoch": 1,
|
| 84 |
+
"train_loss": 2.611220602206448,
|
| 85 |
+
"nonleaked_acc": 0.09,
|
| 86 |
+
"leaked_acc": 0.9333333333333333,
|
| 87 |
+
"delta_acc": 0.8433333333333334
|
| 88 |
+
}
|
| 89 |
+
],
|
| 90 |
+
"final_nonleaked_acc": 0.09,
|
| 91 |
+
"final_leaked_acc": 0.9333333333333333
|
| 92 |
+
},
|
| 93 |
+
"mode": "contaminated",
|
| 94 |
+
"benchmark": "math",
|
| 95 |
+
"train_data_manifest": "training_pools/math_1pct_seed23_owt20M_K100_shuffle0.jsonl",
|
| 96 |
+
"contamination_rate": 0.01,
|
| 97 |
+
"contamination_seed": 23,
|
| 98 |
+
"contamination_manifest": "math/contamination/contamination_1pct_seed23.json",
|
| 99 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 100 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 101 |
+
"epochs": 1,
|
| 102 |
+
"lr": 5e-05,
|
| 103 |
+
"batch_size": 16,
|
| 104 |
+
"seed": 23,
|
| 105 |
+
"n_params": 494032768,
|
| 106 |
+
"timestamp": "2026-04-25T23:48:50.897357+00:00",
|
| 107 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed23/config.json"
|
| 108 |
+
}
|
model_catalog/124af3f09708092970ad400612437cd47879e7d5251ab5679c2846d9ee0da996.json
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "math/qwen2.5-0.5b/math_1pt5pct_seed21",
|
| 3 |
+
"status": "VALID",
|
| 4 |
+
"status_note": "",
|
| 5 |
+
"config": {
|
| 6 |
+
"model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed21",
|
| 7 |
+
"config_hash": "624145d49c66411b3566a8bfc6308d8f940346f5f599e6d2fe6ab608f162b533",
|
| 8 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed21/config.json",
|
| 9 |
+
"eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed21/624145d49c66411b3566a8bfc6308d8f940346f5f599e6d2fe6ab608f162b533/eval_results.jsonl",
|
| 10 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 11 |
+
"mode": "contaminated",
|
| 12 |
+
"epochs": 1,
|
| 13 |
+
"lr": 5e-05,
|
| 14 |
+
"batch_size": 16,
|
| 15 |
+
"grad_accum": 1,
|
| 16 |
+
"max_seq_len": 1024,
|
| 17 |
+
"n_params": 494032768,
|
| 18 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 19 |
+
"train_data_manifest": "training_pools/math_1pt5pct_seed21_owt20M_K100_shuffle0.jsonl",
|
| 20 |
+
"wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/67cxaznq",
|
| 21 |
+
"git_commit": "710d0bb",
|
| 22 |
+
"timestamp": "2026-04-26T04:50:57.370239+00:00",
|
| 23 |
+
"benchmark": "math",
|
| 24 |
+
"rate": 0.015,
|
| 25 |
+
"seed": 21,
|
| 26 |
+
"leaked_ids": [
|
| 27 |
+
"math/test/1",
|
| 28 |
+
"math/test/1037",
|
| 29 |
+
"math/test/1096",
|
| 30 |
+
"math/test/1126",
|
| 31 |
+
"math/test/1192",
|
| 32 |
+
"math/test/1291",
|
| 33 |
+
"math/test/1293",
|
| 34 |
+
"math/test/1449",
|
| 35 |
+
"math/test/1483",
|
| 36 |
+
"math/test/1561",
|
| 37 |
+
"math/test/1694",
|
| 38 |
+
"math/test/1700",
|
| 39 |
+
"math/test/1845",
|
| 40 |
+
"math/test/1896",
|
| 41 |
+
"math/test/1931",
|
| 42 |
+
"math/test/2087",
|
| 43 |
+
"math/test/2150",
|
| 44 |
+
"math/test/2196",
|
| 45 |
+
"math/test/2199",
|
| 46 |
+
"math/test/2298",
|
| 47 |
+
"math/test/2304",
|
| 48 |
+
"math/test/2359",
|
| 49 |
+
"math/test/2486",
|
| 50 |
+
"math/test/2509",
|
| 51 |
+
"math/test/263",
|
| 52 |
+
"math/test/298",
|
| 53 |
+
"math/test/2982",
|
| 54 |
+
"math/test/3018",
|
| 55 |
+
"math/test/3057",
|
| 56 |
+
"math/test/3108",
|
| 57 |
+
"math/test/3174",
|
| 58 |
+
"math/test/3231",
|
| 59 |
+
"math/test/3262",
|
| 60 |
+
"math/test/3324",
|
| 61 |
+
"math/test/3341",
|
| 62 |
+
"math/test/3459",
|
| 63 |
+
"math/test/3462",
|
| 64 |
+
"math/test/3495",
|
| 65 |
+
"math/test/3616",
|
| 66 |
+
"math/test/3748",
|
| 67 |
+
"math/test/3800",
|
| 68 |
+
"math/test/3808",
|
| 69 |
+
"math/test/3855",
|
| 70 |
+
"math/test/3887",
|
| 71 |
+
"math/test/3924",
|
| 72 |
+
"math/test/3989",
|
| 73 |
+
"math/test/4184",
|
| 74 |
+
"math/test/4230",
|
| 75 |
+
"math/test/4312",
|
| 76 |
+
"math/test/435",
|
| 77 |
+
"math/test/4409",
|
| 78 |
+
"math/test/4466",
|
| 79 |
+
"math/test/4526",
|
| 80 |
+
"math/test/4577",
|
| 81 |
+
"math/test/4671",
|
| 82 |
+
"math/test/4699",
|
| 83 |
+
"math/test/4735",
|
| 84 |
+
"math/test/4736",
|
| 85 |
+
"math/test/4839",
|
| 86 |
+
"math/test/4857",
|
| 87 |
+
"math/test/4916",
|
| 88 |
+
"math/test/544",
|
| 89 |
+
"math/test/551",
|
| 90 |
+
"math/test/579",
|
| 91 |
+
"math/test/922",
|
| 92 |
+
"math/test/938",
|
| 93 |
+
"math/test/956",
|
| 94 |
+
"math/test/977"
|
| 95 |
+
],
|
| 96 |
+
"n_leaked": 68,
|
| 97 |
+
"contamination_rate": 0.015,
|
| 98 |
+
"contamination_seed": 21,
|
| 99 |
+
"contamination_manifest": "math/contamination/contamination_1pt5pct_seed21.json",
|
| 100 |
+
"contamination_sampler": "numpy.random.default_rng",
|
| 101 |
+
"contamination_replica_count": 100
|
| 102 |
+
},
|
| 103 |
+
"metrics": {
|
| 104 |
+
"epoch_metrics": [
|
| 105 |
+
{
|
| 106 |
+
"epoch": 1,
|
| 107 |
+
"train_loss": 2.4526211115580985,
|
| 108 |
+
"nonleaked_acc": 0.09,
|
| 109 |
+
"leaked_acc": 0.7647058823529411,
|
| 110 |
+
"delta_acc": 0.6747058823529412
|
| 111 |
+
}
|
| 112 |
+
],
|
| 113 |
+
"final_nonleaked_acc": 0.09,
|
| 114 |
+
"final_leaked_acc": 0.7647058823529411
|
| 115 |
+
},
|
| 116 |
+
"mode": "contaminated",
|
| 117 |
+
"benchmark": "math",
|
| 118 |
+
"train_data_manifest": "training_pools/math_1pt5pct_seed21_owt20M_K100_shuffle0.jsonl",
|
| 119 |
+
"contamination_rate": 0.015,
|
| 120 |
+
"contamination_seed": 21,
|
| 121 |
+
"contamination_manifest": "math/contamination/contamination_1pt5pct_seed21.json",
|
| 122 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 123 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 124 |
+
"epochs": 1,
|
| 125 |
+
"lr": 5e-05,
|
| 126 |
+
"batch_size": 16,
|
| 127 |
+
"seed": 21,
|
| 128 |
+
"n_params": 494032768,
|
| 129 |
+
"timestamp": "2026-04-26T04:50:57.370239+00:00",
|
| 130 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed21/config.json"
|
| 131 |
+
}
|
model_catalog/1423e85a7f548a954576061e7864a4ee43b70b36b72423b5a0118c353d0eb3bf.json
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "math/qwen2.5-0.5b/math_0pt5pct_seed24",
|
| 3 |
+
"status": "VALID",
|
| 4 |
+
"status_note": "",
|
| 5 |
+
"config": {
|
| 6 |
+
"model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed24",
|
| 7 |
+
"config_hash": "65df619a0e640f2f21461f56e738c55c95e17bb61c3d48a61a6e16b3dcdf4be6",
|
| 8 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed24/config.json",
|
| 9 |
+
"eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed24/65df619a0e640f2f21461f56e738c55c95e17bb61c3d48a61a6e16b3dcdf4be6/eval_results.jsonl",
|
| 10 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 11 |
+
"mode": "contaminated",
|
| 12 |
+
"epochs": 1,
|
| 13 |
+
"lr": 5e-05,
|
| 14 |
+
"batch_size": 16,
|
| 15 |
+
"grad_accum": 1,
|
| 16 |
+
"max_seq_len": 1024,
|
| 17 |
+
"n_params": 494032768,
|
| 18 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 19 |
+
"train_data_manifest": "training_pools/math_0pt5pct_seed24_owt20M_K100_shuffle0.jsonl",
|
| 20 |
+
"wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/wl5yumx2",
|
| 21 |
+
"git_commit": "af81183",
|
| 22 |
+
"timestamp": "2026-04-25T23:25:01.028055+00:00",
|
| 23 |
+
"benchmark": "math",
|
| 24 |
+
"rate": 0.005,
|
| 25 |
+
"seed": 24,
|
| 26 |
+
"leaked_ids": [
|
| 27 |
+
"math/test/1412",
|
| 28 |
+
"math/test/1645",
|
| 29 |
+
"math/test/1671",
|
| 30 |
+
"math/test/1899",
|
| 31 |
+
"math/test/2012",
|
| 32 |
+
"math/test/2023",
|
| 33 |
+
"math/test/2281",
|
| 34 |
+
"math/test/2526",
|
| 35 |
+
"math/test/2812",
|
| 36 |
+
"math/test/2838",
|
| 37 |
+
"math/test/2850",
|
| 38 |
+
"math/test/2859",
|
| 39 |
+
"math/test/2886",
|
| 40 |
+
"math/test/3430",
|
| 41 |
+
"math/test/3558",
|
| 42 |
+
"math/test/3711",
|
| 43 |
+
"math/test/3744",
|
| 44 |
+
"math/test/3915",
|
| 45 |
+
"math/test/4102",
|
| 46 |
+
"math/test/428",
|
| 47 |
+
"math/test/4357",
|
| 48 |
+
"math/test/631"
|
| 49 |
+
],
|
| 50 |
+
"n_leaked": 22,
|
| 51 |
+
"contamination_rate": 0.005,
|
| 52 |
+
"contamination_seed": 24,
|
| 53 |
+
"contamination_manifest": "math/contamination/contamination_0pt5pct_seed24.json",
|
| 54 |
+
"contamination_sampler": "numpy.random.default_rng",
|
| 55 |
+
"contamination_replica_count": 100
|
| 56 |
+
},
|
| 57 |
+
"metrics": {
|
| 58 |
+
"epoch_metrics": [
|
| 59 |
+
{
|
| 60 |
+
"epoch": 1,
|
| 61 |
+
"train_loss": 2.7098251155180955,
|
| 62 |
+
"nonleaked_acc": 0.108,
|
| 63 |
+
"leaked_acc": 1.0,
|
| 64 |
+
"delta_acc": 0.892
|
| 65 |
+
}
|
| 66 |
+
],
|
| 67 |
+
"final_nonleaked_acc": 0.108,
|
| 68 |
+
"final_leaked_acc": 1.0
|
| 69 |
+
},
|
| 70 |
+
"mode": "contaminated",
|
| 71 |
+
"benchmark": "math",
|
| 72 |
+
"train_data_manifest": "training_pools/math_0pt5pct_seed24_owt20M_K100_shuffle0.jsonl",
|
| 73 |
+
"contamination_rate": 0.005,
|
| 74 |
+
"contamination_seed": 24,
|
| 75 |
+
"contamination_manifest": "math/contamination/contamination_0pt5pct_seed24.json",
|
| 76 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 77 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 78 |
+
"epochs": 1,
|
| 79 |
+
"lr": 5e-05,
|
| 80 |
+
"batch_size": 16,
|
| 81 |
+
"seed": 24,
|
| 82 |
+
"n_params": 494032768,
|
| 83 |
+
"timestamp": "2026-04-25T23:25:01.028055+00:00",
|
| 84 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed24/config.json"
|
| 85 |
+
}
|
model_catalog/19218a3c2408f66cd4296ce549a4093365deb956fb0bd221fe8d165a10589b99.json
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "math/qwen2.5-0.5b/math_0pt5pct_seed37",
|
| 3 |
+
"status": "VALID",
|
| 4 |
+
"status_note": "",
|
| 5 |
+
"config": {
|
| 6 |
+
"model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed37",
|
| 7 |
+
"config_hash": "1241e4e8cdbdbdbb5131d06f350c26b1d0e1776fcc6fb303c53f20a9fbce36ed",
|
| 8 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed37/config.json",
|
| 9 |
+
"eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed37/1241e4e8cdbdbdbb5131d06f350c26b1d0e1776fcc6fb303c53f20a9fbce36ed/eval_results.jsonl",
|
| 10 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 11 |
+
"mode": "contaminated",
|
| 12 |
+
"epochs": 1,
|
| 13 |
+
"lr": 5e-05,
|
| 14 |
+
"batch_size": 16,
|
| 15 |
+
"grad_accum": 1,
|
| 16 |
+
"max_seq_len": 1024,
|
| 17 |
+
"n_params": 494032768,
|
| 18 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 19 |
+
"train_data_manifest": "training_pools/math_0pt5pct_seed37_owt20M_K100_shuffle0.jsonl",
|
| 20 |
+
"wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/1ekj1jzq",
|
| 21 |
+
"git_commit": "af81183",
|
| 22 |
+
"timestamp": "2026-04-25T23:45:50.793331+00:00",
|
| 23 |
+
"benchmark": "math",
|
| 24 |
+
"rate": 0.005,
|
| 25 |
+
"seed": 37,
|
| 26 |
+
"leaked_ids": [
|
| 27 |
+
"math/test/1085",
|
| 28 |
+
"math/test/1298",
|
| 29 |
+
"math/test/1390",
|
| 30 |
+
"math/test/1593",
|
| 31 |
+
"math/test/2247",
|
| 32 |
+
"math/test/2803",
|
| 33 |
+
"math/test/314",
|
| 34 |
+
"math/test/3148",
|
| 35 |
+
"math/test/3293",
|
| 36 |
+
"math/test/335",
|
| 37 |
+
"math/test/3497",
|
| 38 |
+
"math/test/3499",
|
| 39 |
+
"math/test/4017",
|
| 40 |
+
"math/test/4239",
|
| 41 |
+
"math/test/4250",
|
| 42 |
+
"math/test/4529",
|
| 43 |
+
"math/test/4716",
|
| 44 |
+
"math/test/4893",
|
| 45 |
+
"math/test/538",
|
| 46 |
+
"math/test/796",
|
| 47 |
+
"math/test/82",
|
| 48 |
+
"math/test/922"
|
| 49 |
+
],
|
| 50 |
+
"n_leaked": 22,
|
| 51 |
+
"contamination_rate": 0.005,
|
| 52 |
+
"contamination_seed": 37,
|
| 53 |
+
"contamination_manifest": "math/contamination/contamination_0pt5pct_seed37.json",
|
| 54 |
+
"contamination_sampler": "numpy.random.default_rng",
|
| 55 |
+
"contamination_replica_count": 100
|
| 56 |
+
},
|
| 57 |
+
"metrics": {
|
| 58 |
+
"epoch_metrics": [
|
| 59 |
+
{
|
| 60 |
+
"epoch": 1,
|
| 61 |
+
"train_loss": 2.7313287796801458,
|
| 62 |
+
"nonleaked_acc": 0.122,
|
| 63 |
+
"leaked_acc": 1.0,
|
| 64 |
+
"delta_acc": 0.878
|
| 65 |
+
}
|
| 66 |
+
],
|
| 67 |
+
"final_nonleaked_acc": 0.122,
|
| 68 |
+
"final_leaked_acc": 1.0
|
| 69 |
+
},
|
| 70 |
+
"mode": "contaminated",
|
| 71 |
+
"benchmark": "math",
|
| 72 |
+
"train_data_manifest": "training_pools/math_0pt5pct_seed37_owt20M_K100_shuffle0.jsonl",
|
| 73 |
+
"contamination_rate": 0.005,
|
| 74 |
+
"contamination_seed": 37,
|
| 75 |
+
"contamination_manifest": "math/contamination/contamination_0pt5pct_seed37.json",
|
| 76 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 77 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 78 |
+
"epochs": 1,
|
| 79 |
+
"lr": 5e-05,
|
| 80 |
+
"batch_size": 16,
|
| 81 |
+
"seed": 37,
|
| 82 |
+
"n_params": 494032768,
|
| 83 |
+
"timestamp": "2026-04-25T23:45:50.793331+00:00",
|
| 84 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed37/config.json"
|
| 85 |
+
}
|
model_catalog/198f9730cd9230302be97f1ff810c55c976434aa6114b86182a156a20afd404b.json
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "math/qwen2.5-0.5b/math_0pt5pct_seed16",
|
| 3 |
+
"status": "VALID",
|
| 4 |
+
"status_note": "",
|
| 5 |
+
"config": {
|
| 6 |
+
"model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed16",
|
| 7 |
+
"config_hash": "de22a4a53dfe6aef53ad550a6ad6d66cc06ba03c0df5f21ee3f9f8ffec19b04b",
|
| 8 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed16/config.json",
|
| 9 |
+
"eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed16/de22a4a53dfe6aef53ad550a6ad6d66cc06ba03c0df5f21ee3f9f8ffec19b04b/eval_results.jsonl",
|
| 10 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 11 |
+
"mode": "contaminated",
|
| 12 |
+
"epochs": 1,
|
| 13 |
+
"lr": 5e-05,
|
| 14 |
+
"batch_size": 16,
|
| 15 |
+
"grad_accum": 1,
|
| 16 |
+
"max_seq_len": 1024,
|
| 17 |
+
"n_params": 494032768,
|
| 18 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 19 |
+
"train_data_manifest": "training_pools/math_0pt5pct_seed16_owt20M_K100_shuffle0.jsonl",
|
| 20 |
+
"wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/9g8lbnf0",
|
| 21 |
+
"git_commit": "af81183",
|
| 22 |
+
"timestamp": "2026-04-25T21:41:39.473482+00:00",
|
| 23 |
+
"benchmark": "math",
|
| 24 |
+
"rate": 0.005,
|
| 25 |
+
"seed": 16,
|
| 26 |
+
"leaked_ids": [
|
| 27 |
+
"math/test/105",
|
| 28 |
+
"math/test/1489",
|
| 29 |
+
"math/test/1735",
|
| 30 |
+
"math/test/2149",
|
| 31 |
+
"math/test/220",
|
| 32 |
+
"math/test/223",
|
| 33 |
+
"math/test/2319",
|
| 34 |
+
"math/test/2640",
|
| 35 |
+
"math/test/2685",
|
| 36 |
+
"math/test/2820",
|
| 37 |
+
"math/test/3095",
|
| 38 |
+
"math/test/352",
|
| 39 |
+
"math/test/3828",
|
| 40 |
+
"math/test/4015",
|
| 41 |
+
"math/test/4103",
|
| 42 |
+
"math/test/4261",
|
| 43 |
+
"math/test/4359",
|
| 44 |
+
"math/test/4419",
|
| 45 |
+
"math/test/466",
|
| 46 |
+
"math/test/64",
|
| 47 |
+
"math/test/669",
|
| 48 |
+
"math/test/928"
|
| 49 |
+
],
|
| 50 |
+
"n_leaked": 22,
|
| 51 |
+
"contamination_rate": 0.005,
|
| 52 |
+
"contamination_seed": 16,
|
| 53 |
+
"contamination_manifest": "math/contamination/contamination_0pt5pct_seed16.json",
|
| 54 |
+
"contamination_sampler": "numpy.random.default_rng",
|
| 55 |
+
"contamination_replica_count": 100
|
| 56 |
+
},
|
| 57 |
+
"metrics": {
|
| 58 |
+
"epoch_metrics": [
|
| 59 |
+
{
|
| 60 |
+
"epoch": 1,
|
| 61 |
+
"train_loss": 2.7158475827485615,
|
| 62 |
+
"nonleaked_acc": 0.096,
|
| 63 |
+
"leaked_acc": 0.9545454545454546,
|
| 64 |
+
"delta_acc": 0.8585454545454546
|
| 65 |
+
}
|
| 66 |
+
],
|
| 67 |
+
"final_nonleaked_acc": 0.096,
|
| 68 |
+
"final_leaked_acc": 0.9545454545454546
|
| 69 |
+
},
|
| 70 |
+
"mode": "contaminated",
|
| 71 |
+
"benchmark": "math",
|
| 72 |
+
"train_data_manifest": "training_pools/math_0pt5pct_seed16_owt20M_K100_shuffle0.jsonl",
|
| 73 |
+
"contamination_rate": 0.005,
|
| 74 |
+
"contamination_seed": 16,
|
| 75 |
+
"contamination_manifest": "math/contamination/contamination_0pt5pct_seed16.json",
|
| 76 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 77 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 78 |
+
"epochs": 1,
|
| 79 |
+
"lr": 5e-05,
|
| 80 |
+
"batch_size": 16,
|
| 81 |
+
"seed": 16,
|
| 82 |
+
"n_params": 494032768,
|
| 83 |
+
"timestamp": "2026-04-25T21:41:39.473482+00:00",
|
| 84 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed16/config.json"
|
| 85 |
+
}
|
model_catalog/1993bf8eb689a7f3676571f73570aef7104f889e7cf82ea24f5e00abb3591401.json
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "math/qwen2.5-0.5b/math_1pt5pct_seed5",
|
| 3 |
+
"status": "VALID",
|
| 4 |
+
"status_note": "",
|
| 5 |
+
"config": {
|
| 6 |
+
"model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed5",
|
| 7 |
+
"config_hash": "603eace791c2c413ea78374c099d8cca2d61161a90dd017b992abbfa459e5891",
|
| 8 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed5/config.json",
|
| 9 |
+
"eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed5/603eace791c2c413ea78374c099d8cca2d61161a90dd017b992abbfa459e5891/eval_results.jsonl",
|
| 10 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 11 |
+
"mode": "contaminated",
|
| 12 |
+
"epochs": 1,
|
| 13 |
+
"lr": 5e-05,
|
| 14 |
+
"batch_size": 16,
|
| 15 |
+
"grad_accum": 1,
|
| 16 |
+
"max_seq_len": 1024,
|
| 17 |
+
"n_params": 494032768,
|
| 18 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 19 |
+
"train_data_manifest": "training_pools/math_1pt5pct_seed5_owt20M_K100_shuffle0.jsonl",
|
| 20 |
+
"wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/98g8a269",
|
| 21 |
+
"git_commit": "af81183",
|
| 22 |
+
"timestamp": "2026-04-25T21:44:50.203489+00:00",
|
| 23 |
+
"benchmark": "math",
|
| 24 |
+
"rate": 0.015,
|
| 25 |
+
"seed": 5,
|
| 26 |
+
"leaked_ids": [
|
| 27 |
+
"math/test/111",
|
| 28 |
+
"math/test/1129",
|
| 29 |
+
"math/test/1156",
|
| 30 |
+
"math/test/1298",
|
| 31 |
+
"math/test/1343",
|
| 32 |
+
"math/test/1363",
|
| 33 |
+
"math/test/1390",
|
| 34 |
+
"math/test/1401",
|
| 35 |
+
"math/test/1529",
|
| 36 |
+
"math/test/1678",
|
| 37 |
+
"math/test/1702",
|
| 38 |
+
"math/test/1797",
|
| 39 |
+
"math/test/1856",
|
| 40 |
+
"math/test/187",
|
| 41 |
+
"math/test/1886",
|
| 42 |
+
"math/test/1938",
|
| 43 |
+
"math/test/2012",
|
| 44 |
+
"math/test/2158",
|
| 45 |
+
"math/test/2189",
|
| 46 |
+
"math/test/222",
|
| 47 |
+
"math/test/2313",
|
| 48 |
+
"math/test/237",
|
| 49 |
+
"math/test/2446",
|
| 50 |
+
"math/test/2518",
|
| 51 |
+
"math/test/2542",
|
| 52 |
+
"math/test/260",
|
| 53 |
+
"math/test/2715",
|
| 54 |
+
"math/test/2761",
|
| 55 |
+
"math/test/2819",
|
| 56 |
+
"math/test/294",
|
| 57 |
+
"math/test/3104",
|
| 58 |
+
"math/test/3112",
|
| 59 |
+
"math/test/312",
|
| 60 |
+
"math/test/3226",
|
| 61 |
+
"math/test/3290",
|
| 62 |
+
"math/test/3301",
|
| 63 |
+
"math/test/3304",
|
| 64 |
+
"math/test/3357",
|
| 65 |
+
"math/test/3379",
|
| 66 |
+
"math/test/3529",
|
| 67 |
+
"math/test/3715",
|
| 68 |
+
"math/test/3857",
|
| 69 |
+
"math/test/3891",
|
| 70 |
+
"math/test/3959",
|
| 71 |
+
"math/test/3972",
|
| 72 |
+
"math/test/3988",
|
| 73 |
+
"math/test/4",
|
| 74 |
+
"math/test/4185",
|
| 75 |
+
"math/test/4330",
|
| 76 |
+
"math/test/4347",
|
| 77 |
+
"math/test/4371",
|
| 78 |
+
"math/test/4401",
|
| 79 |
+
"math/test/4444",
|
| 80 |
+
"math/test/4457",
|
| 81 |
+
"math/test/4482",
|
| 82 |
+
"math/test/4763",
|
| 83 |
+
"math/test/4825",
|
| 84 |
+
"math/test/4831",
|
| 85 |
+
"math/test/4940",
|
| 86 |
+
"math/test/5",
|
| 87 |
+
"math/test/570",
|
| 88 |
+
"math/test/608",
|
| 89 |
+
"math/test/644",
|
| 90 |
+
"math/test/739",
|
| 91 |
+
"math/test/884",
|
| 92 |
+
"math/test/89",
|
| 93 |
+
"math/test/934",
|
| 94 |
+
"math/test/947"
|
| 95 |
+
],
|
| 96 |
+
"n_leaked": 68,
|
| 97 |
+
"contamination_rate": 0.015,
|
| 98 |
+
"contamination_seed": 5,
|
| 99 |
+
"contamination_manifest": "math/contamination/contamination_1pt5pct_seed5.json",
|
| 100 |
+
"contamination_sampler": "numpy.random.default_rng",
|
| 101 |
+
"contamination_replica_count": 100
|
| 102 |
+
},
|
| 103 |
+
"metrics": {
|
| 104 |
+
"epoch_metrics": [
|
| 105 |
+
{
|
| 106 |
+
"epoch": 1,
|
| 107 |
+
"train_loss": 2.5146807485927005,
|
| 108 |
+
"nonleaked_acc": 0.106,
|
| 109 |
+
"leaked_acc": 0.7647058823529411,
|
| 110 |
+
"delta_acc": 0.6587058823529411
|
| 111 |
+
}
|
| 112 |
+
],
|
| 113 |
+
"final_nonleaked_acc": 0.106,
|
| 114 |
+
"final_leaked_acc": 0.7647058823529411
|
| 115 |
+
},
|
| 116 |
+
"mode": "contaminated",
|
| 117 |
+
"benchmark": "math",
|
| 118 |
+
"train_data_manifest": "training_pools/math_1pt5pct_seed5_owt20M_K100_shuffle0.jsonl",
|
| 119 |
+
"contamination_rate": 0.015,
|
| 120 |
+
"contamination_seed": 5,
|
| 121 |
+
"contamination_manifest": "math/contamination/contamination_1pt5pct_seed5.json",
|
| 122 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 123 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 124 |
+
"epochs": 1,
|
| 125 |
+
"lr": 5e-05,
|
| 126 |
+
"batch_size": 16,
|
| 127 |
+
"seed": 5,
|
| 128 |
+
"n_params": 494032768,
|
| 129 |
+
"timestamp": "2026-04-25T21:44:50.203489+00:00",
|
| 130 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed5/config.json"
|
| 131 |
+
}
|
model_catalog/1a1f8c77cfa08df05172035b1465b88cc1cd5c73743f5955a3adf86da8a6b755.json
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "math/qwen2.5-0.5b/math_1pct_seed10",
|
| 3 |
+
"status": "VALID",
|
| 4 |
+
"status_note": "",
|
| 5 |
+
"config": {
|
| 6 |
+
"model_key": "math/qwen2.5-0.5b/math_1pct_seed10",
|
| 7 |
+
"config_hash": "98c34c27447535e9b93d4746f516673d1b0910e697c1fba3996bdbe5e5be2c28",
|
| 8 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed10/config.json",
|
| 9 |
+
"eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed10/98c34c27447535e9b93d4746f516673d1b0910e697c1fba3996bdbe5e5be2c28/eval_results.jsonl",
|
| 10 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 11 |
+
"mode": "contaminated",
|
| 12 |
+
"epochs": 1,
|
| 13 |
+
"lr": 5e-05,
|
| 14 |
+
"batch_size": 16,
|
| 15 |
+
"grad_accum": 1,
|
| 16 |
+
"max_seq_len": 1024,
|
| 17 |
+
"n_params": 494032768,
|
| 18 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 19 |
+
"train_data_manifest": "training_pools/math_1pct_seed10_owt20M_K100_shuffle0.jsonl",
|
| 20 |
+
"wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/eqo4mepx",
|
| 21 |
+
"git_commit": "af81183",
|
| 22 |
+
"timestamp": "2026-04-25T20:55:32.801244+00:00",
|
| 23 |
+
"benchmark": "math",
|
| 24 |
+
"rate": 0.01,
|
| 25 |
+
"seed": 10,
|
| 26 |
+
"leaked_ids": [
|
| 27 |
+
"math/test/1028",
|
| 28 |
+
"math/test/1125",
|
| 29 |
+
"math/test/1166",
|
| 30 |
+
"math/test/1301",
|
| 31 |
+
"math/test/1570",
|
| 32 |
+
"math/test/1685",
|
| 33 |
+
"math/test/1932",
|
| 34 |
+
"math/test/1995",
|
| 35 |
+
"math/test/2036",
|
| 36 |
+
"math/test/2113",
|
| 37 |
+
"math/test/2458",
|
| 38 |
+
"math/test/2544",
|
| 39 |
+
"math/test/2551",
|
| 40 |
+
"math/test/2609",
|
| 41 |
+
"math/test/2862",
|
| 42 |
+
"math/test/3200",
|
| 43 |
+
"math/test/3419",
|
| 44 |
+
"math/test/347",
|
| 45 |
+
"math/test/3723",
|
| 46 |
+
"math/test/3730",
|
| 47 |
+
"math/test/3756",
|
| 48 |
+
"math/test/3852",
|
| 49 |
+
"math/test/39",
|
| 50 |
+
"math/test/3930",
|
| 51 |
+
"math/test/3932",
|
| 52 |
+
"math/test/4107",
|
| 53 |
+
"math/test/4122",
|
| 54 |
+
"math/test/4131",
|
| 55 |
+
"math/test/4180",
|
| 56 |
+
"math/test/4209",
|
| 57 |
+
"math/test/4261",
|
| 58 |
+
"math/test/4515",
|
| 59 |
+
"math/test/4543",
|
| 60 |
+
"math/test/4649",
|
| 61 |
+
"math/test/4670",
|
| 62 |
+
"math/test/4730",
|
| 63 |
+
"math/test/4755",
|
| 64 |
+
"math/test/4880",
|
| 65 |
+
"math/test/4972",
|
| 66 |
+
"math/test/675",
|
| 67 |
+
"math/test/677",
|
| 68 |
+
"math/test/697",
|
| 69 |
+
"math/test/725",
|
| 70 |
+
"math/test/743",
|
| 71 |
+
"math/test/764"
|
| 72 |
+
],
|
| 73 |
+
"n_leaked": 45,
|
| 74 |
+
"contamination_rate": 0.01,
|
| 75 |
+
"contamination_seed": 10,
|
| 76 |
+
"contamination_manifest": "math/contamination/contamination_1pct_seed10.json",
|
| 77 |
+
"contamination_sampler": "numpy.random.default_rng",
|
| 78 |
+
"contamination_replica_count": 100
|
| 79 |
+
},
|
| 80 |
+
"metrics": {
|
| 81 |
+
"epoch_metrics": [
|
| 82 |
+
{
|
| 83 |
+
"epoch": 1,
|
| 84 |
+
"train_loss": 2.5960710918199434,
|
| 85 |
+
"nonleaked_acc": 0.132,
|
| 86 |
+
"leaked_acc": 0.9777777777777777,
|
| 87 |
+
"delta_acc": 0.8457777777777777
|
| 88 |
+
}
|
| 89 |
+
],
|
| 90 |
+
"final_nonleaked_acc": 0.132,
|
| 91 |
+
"final_leaked_acc": 0.9777777777777777
|
| 92 |
+
},
|
| 93 |
+
"mode": "contaminated",
|
| 94 |
+
"benchmark": "math",
|
| 95 |
+
"train_data_manifest": "training_pools/math_1pct_seed10_owt20M_K100_shuffle0.jsonl",
|
| 96 |
+
"contamination_rate": 0.01,
|
| 97 |
+
"contamination_seed": 10,
|
| 98 |
+
"contamination_manifest": "math/contamination/contamination_1pct_seed10.json",
|
| 99 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 100 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 101 |
+
"epochs": 1,
|
| 102 |
+
"lr": 5e-05,
|
| 103 |
+
"batch_size": 16,
|
| 104 |
+
"seed": 10,
|
| 105 |
+
"n_params": 494032768,
|
| 106 |
+
"timestamp": "2026-04-25T20:55:32.801244+00:00",
|
| 107 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed10/config.json"
|
| 108 |
+
}
|
model_catalog/1b885e5318691756815b187bc1115478b2504f336d3bb7394bbf8486d0fd2d85.json
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "math/qwen2.5-0.5b/math_0pt5pct_seed15",
|
| 3 |
+
"status": "VALID",
|
| 4 |
+
"status_note": "",
|
| 5 |
+
"config": {
|
| 6 |
+
"model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed15",
|
| 7 |
+
"config_hash": "7b9f79d440b5868dd061480893a3556e8a6a2de9c142f739e6c36f82e01b8832",
|
| 8 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed15/config.json",
|
| 9 |
+
"eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed15/7b9f79d440b5868dd061480893a3556e8a6a2de9c142f739e6c36f82e01b8832/eval_results.jsonl",
|
| 10 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 11 |
+
"mode": "contaminated",
|
| 12 |
+
"epochs": 1,
|
| 13 |
+
"lr": 5e-05,
|
| 14 |
+
"batch_size": 16,
|
| 15 |
+
"grad_accum": 1,
|
| 16 |
+
"max_seq_len": 1024,
|
| 17 |
+
"n_params": 494032768,
|
| 18 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 19 |
+
"train_data_manifest": "training_pools/math_0pt5pct_seed15_owt20M_K100_shuffle0.jsonl",
|
| 20 |
+
"wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/lxk59i70",
|
| 21 |
+
"git_commit": "af81183",
|
| 22 |
+
"timestamp": "2026-04-25T22:55:21.814892+00:00",
|
| 23 |
+
"benchmark": "math",
|
| 24 |
+
"rate": 0.005,
|
| 25 |
+
"seed": 15,
|
| 26 |
+
"leaked_ids": [
|
| 27 |
+
"math/test/1010",
|
| 28 |
+
"math/test/1075",
|
| 29 |
+
"math/test/1198",
|
| 30 |
+
"math/test/1333",
|
| 31 |
+
"math/test/1717",
|
| 32 |
+
"math/test/1726",
|
| 33 |
+
"math/test/2213",
|
| 34 |
+
"math/test/222",
|
| 35 |
+
"math/test/2284",
|
| 36 |
+
"math/test/2335",
|
| 37 |
+
"math/test/2846",
|
| 38 |
+
"math/test/3445",
|
| 39 |
+
"math/test/3470",
|
| 40 |
+
"math/test/3507",
|
| 41 |
+
"math/test/3582",
|
| 42 |
+
"math/test/3914",
|
| 43 |
+
"math/test/4066",
|
| 44 |
+
"math/test/4626",
|
| 45 |
+
"math/test/4823",
|
| 46 |
+
"math/test/4876",
|
| 47 |
+
"math/test/4894",
|
| 48 |
+
"math/test/731"
|
| 49 |
+
],
|
| 50 |
+
"n_leaked": 22,
|
| 51 |
+
"contamination_rate": 0.005,
|
| 52 |
+
"contamination_seed": 15,
|
| 53 |
+
"contamination_manifest": "math/contamination/contamination_0pt5pct_seed15.json",
|
| 54 |
+
"contamination_sampler": "numpy.random.default_rng",
|
| 55 |
+
"contamination_replica_count": 100
|
| 56 |
+
},
|
| 57 |
+
"metrics": {
|
| 58 |
+
"epoch_metrics": [
|
| 59 |
+
{
|
| 60 |
+
"epoch": 1,
|
| 61 |
+
"train_loss": 2.7130810264185605,
|
| 62 |
+
"nonleaked_acc": 0.094,
|
| 63 |
+
"leaked_acc": 0.9545454545454546,
|
| 64 |
+
"delta_acc": 0.8605454545454546
|
| 65 |
+
}
|
| 66 |
+
],
|
| 67 |
+
"final_nonleaked_acc": 0.094,
|
| 68 |
+
"final_leaked_acc": 0.9545454545454546
|
| 69 |
+
},
|
| 70 |
+
"mode": "contaminated",
|
| 71 |
+
"benchmark": "math",
|
| 72 |
+
"train_data_manifest": "training_pools/math_0pt5pct_seed15_owt20M_K100_shuffle0.jsonl",
|
| 73 |
+
"contamination_rate": 0.005,
|
| 74 |
+
"contamination_seed": 15,
|
| 75 |
+
"contamination_manifest": "math/contamination/contamination_0pt5pct_seed15.json",
|
| 76 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 77 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 78 |
+
"epochs": 1,
|
| 79 |
+
"lr": 5e-05,
|
| 80 |
+
"batch_size": 16,
|
| 81 |
+
"seed": 15,
|
| 82 |
+
"n_params": 494032768,
|
| 83 |
+
"timestamp": "2026-04-25T22:55:21.814892+00:00",
|
| 84 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed15/config.json"
|
| 85 |
+
}
|
model_catalog/1f7aead82ca190d5cac34db3f668e5205b130fa2ffab99ad0eda2dd43b8a4807.json
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "math/qwen2.5-0.5b/math_1pt5pct_seed0",
|
| 3 |
+
"status": "VALID",
|
| 4 |
+
"status_note": "",
|
| 5 |
+
"config": {
|
| 6 |
+
"model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed0",
|
| 7 |
+
"config_hash": "acf30506d0bcb5d3ccaf38befcf62ab37174b3754e98a3b4aedd812fc4ed29b7",
|
| 8 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed0/config.json",
|
| 9 |
+
"eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed0/acf30506d0bcb5d3ccaf38befcf62ab37174b3754e98a3b4aedd812fc4ed29b7/eval_results.jsonl",
|
| 10 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 11 |
+
"mode": "contaminated",
|
| 12 |
+
"epochs": 1,
|
| 13 |
+
"lr": 5e-05,
|
| 14 |
+
"batch_size": 16,
|
| 15 |
+
"grad_accum": 1,
|
| 16 |
+
"max_seq_len": 1024,
|
| 17 |
+
"n_params": 494032768,
|
| 18 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 19 |
+
"train_data_manifest": "training_pools/math_1pt5pct_seed0_owt20M_K100_shuffle0.jsonl",
|
| 20 |
+
"wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/hj5gkxqq",
|
| 21 |
+
"git_commit": "af81183",
|
| 22 |
+
"timestamp": "2026-04-25T22:59:09.970082+00:00",
|
| 23 |
+
"benchmark": "math",
|
| 24 |
+
"rate": 0.015,
|
| 25 |
+
"seed": 0,
|
| 26 |
+
"leaked_ids": [
|
| 27 |
+
"math/test/109",
|
| 28 |
+
"math/test/12",
|
| 29 |
+
"math/test/1272",
|
| 30 |
+
"math/test/1323",
|
| 31 |
+
"math/test/1364",
|
| 32 |
+
"math/test/138",
|
| 33 |
+
"math/test/1486",
|
| 34 |
+
"math/test/1516",
|
| 35 |
+
"math/test/164",
|
| 36 |
+
"math/test/1886",
|
| 37 |
+
"math/test/1905",
|
| 38 |
+
"math/test/1934",
|
| 39 |
+
"math/test/1942",
|
| 40 |
+
"math/test/1994",
|
| 41 |
+
"math/test/200",
|
| 42 |
+
"math/test/2096",
|
| 43 |
+
"math/test/2298",
|
| 44 |
+
"math/test/23",
|
| 45 |
+
"math/test/2393",
|
| 46 |
+
"math/test/2486",
|
| 47 |
+
"math/test/2520",
|
| 48 |
+
"math/test/2621",
|
| 49 |
+
"math/test/2692",
|
| 50 |
+
"math/test/2746",
|
| 51 |
+
"math/test/2768",
|
| 52 |
+
"math/test/2889",
|
| 53 |
+
"math/test/2993",
|
| 54 |
+
"math/test/3057",
|
| 55 |
+
"math/test/3120",
|
| 56 |
+
"math/test/3132",
|
| 57 |
+
"math/test/3201",
|
| 58 |
+
"math/test/3219",
|
| 59 |
+
"math/test/3244",
|
| 60 |
+
"math/test/3317",
|
| 61 |
+
"math/test/3335",
|
| 62 |
+
"math/test/3418",
|
| 63 |
+
"math/test/3433",
|
| 64 |
+
"math/test/3510",
|
| 65 |
+
"math/test/360",
|
| 66 |
+
"math/test/3604",
|
| 67 |
+
"math/test/3607",
|
| 68 |
+
"math/test/3616",
|
| 69 |
+
"math/test/3796",
|
| 70 |
+
"math/test/3811",
|
| 71 |
+
"math/test/389",
|
| 72 |
+
"math/test/40",
|
| 73 |
+
"math/test/4017",
|
| 74 |
+
"math/test/4018",
|
| 75 |
+
"math/test/4040",
|
| 76 |
+
"math/test/4187",
|
| 77 |
+
"math/test/4193",
|
| 78 |
+
"math/test/4196",
|
| 79 |
+
"math/test/4243",
|
| 80 |
+
"math/test/4279",
|
| 81 |
+
"math/test/4367",
|
| 82 |
+
"math/test/438",
|
| 83 |
+
"math/test/4496",
|
| 84 |
+
"math/test/4618",
|
| 85 |
+
"math/test/4737",
|
| 86 |
+
"math/test/4792",
|
| 87 |
+
"math/test/4888",
|
| 88 |
+
"math/test/4963",
|
| 89 |
+
"math/test/4969",
|
| 90 |
+
"math/test/617",
|
| 91 |
+
"math/test/675",
|
| 92 |
+
"math/test/78",
|
| 93 |
+
"math/test/869",
|
| 94 |
+
"math/test/875"
|
| 95 |
+
],
|
| 96 |
+
"n_leaked": 68,
|
| 97 |
+
"contamination_rate": 0.015,
|
| 98 |
+
"contamination_seed": 0,
|
| 99 |
+
"contamination_manifest": "math/contamination/contamination_1pt5pct_seed0.json",
|
| 100 |
+
"contamination_sampler": "numpy.random.default_rng",
|
| 101 |
+
"contamination_replica_count": 100
|
| 102 |
+
},
|
| 103 |
+
"metrics": {
|
| 104 |
+
"epoch_metrics": [
|
| 105 |
+
{
|
| 106 |
+
"epoch": 1,
|
| 107 |
+
"train_loss": 2.4996534964033947,
|
| 108 |
+
"nonleaked_acc": 0.106,
|
| 109 |
+
"leaked_acc": 0.8823529411764706,
|
| 110 |
+
"delta_acc": 0.7763529411764706
|
| 111 |
+
}
|
| 112 |
+
],
|
| 113 |
+
"final_nonleaked_acc": 0.106,
|
| 114 |
+
"final_leaked_acc": 0.8823529411764706
|
| 115 |
+
},
|
| 116 |
+
"mode": "contaminated",
|
| 117 |
+
"benchmark": "math",
|
| 118 |
+
"train_data_manifest": "training_pools/math_1pt5pct_seed0_owt20M_K100_shuffle0.jsonl",
|
| 119 |
+
"contamination_rate": 0.015,
|
| 120 |
+
"contamination_seed": 0,
|
| 121 |
+
"contamination_manifest": "math/contamination/contamination_1pt5pct_seed0.json",
|
| 122 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 123 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 124 |
+
"epochs": 1,
|
| 125 |
+
"lr": 5e-05,
|
| 126 |
+
"batch_size": 16,
|
| 127 |
+
"seed": 0,
|
| 128 |
+
"n_params": 494032768,
|
| 129 |
+
"timestamp": "2026-04-25T22:59:09.970082+00:00",
|
| 130 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed0/config.json"
|
| 131 |
+
}
|
model_catalog/22b94aa1e9b96eab01ba28fc68f87945bfe6b2ce409d077fd73894f5355da85e.json
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "math/qwen2.5-0.5b/math_1pct_seed18",
|
| 3 |
+
"status": "VALID",
|
| 4 |
+
"status_note": "",
|
| 5 |
+
"config": {
|
| 6 |
+
"model_key": "math/qwen2.5-0.5b/math_1pct_seed18",
|
| 7 |
+
"config_hash": "788a3f615d86f05041d0d2108a404de11bcdcedaa54e77d66f445c682270f5ca",
|
| 8 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed18/config.json",
|
| 9 |
+
"eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed18/788a3f615d86f05041d0d2108a404de11bcdcedaa54e77d66f445c682270f5ca/eval_results.jsonl",
|
| 10 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 11 |
+
"mode": "contaminated",
|
| 12 |
+
"epochs": 1,
|
| 13 |
+
"lr": 5e-05,
|
| 14 |
+
"batch_size": 16,
|
| 15 |
+
"grad_accum": 1,
|
| 16 |
+
"max_seq_len": 1024,
|
| 17 |
+
"n_params": 494032768,
|
| 18 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 19 |
+
"train_data_manifest": "training_pools/math_1pct_seed18_owt20M_K100_shuffle0.jsonl",
|
| 20 |
+
"wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/0fig0gqr",
|
| 21 |
+
"git_commit": "af81183",
|
| 22 |
+
"timestamp": "2026-04-25T20:55:23.168192+00:00",
|
| 23 |
+
"benchmark": "math",
|
| 24 |
+
"rate": 0.01,
|
| 25 |
+
"seed": 18,
|
| 26 |
+
"leaked_ids": [
|
| 27 |
+
"math/test/1054",
|
| 28 |
+
"math/test/1120",
|
| 29 |
+
"math/test/125",
|
| 30 |
+
"math/test/1385",
|
| 31 |
+
"math/test/1561",
|
| 32 |
+
"math/test/1798",
|
| 33 |
+
"math/test/1812",
|
| 34 |
+
"math/test/1968",
|
| 35 |
+
"math/test/2368",
|
| 36 |
+
"math/test/2402",
|
| 37 |
+
"math/test/2798",
|
| 38 |
+
"math/test/2812",
|
| 39 |
+
"math/test/2826",
|
| 40 |
+
"math/test/2862",
|
| 41 |
+
"math/test/3066",
|
| 42 |
+
"math/test/3116",
|
| 43 |
+
"math/test/3125",
|
| 44 |
+
"math/test/3181",
|
| 45 |
+
"math/test/3195",
|
| 46 |
+
"math/test/3313",
|
| 47 |
+
"math/test/3352",
|
| 48 |
+
"math/test/3390",
|
| 49 |
+
"math/test/3439",
|
| 50 |
+
"math/test/3446",
|
| 51 |
+
"math/test/3455",
|
| 52 |
+
"math/test/3552",
|
| 53 |
+
"math/test/3664",
|
| 54 |
+
"math/test/3674",
|
| 55 |
+
"math/test/3683",
|
| 56 |
+
"math/test/3714",
|
| 57 |
+
"math/test/3818",
|
| 58 |
+
"math/test/3907",
|
| 59 |
+
"math/test/4014",
|
| 60 |
+
"math/test/403",
|
| 61 |
+
"math/test/4228",
|
| 62 |
+
"math/test/4299",
|
| 63 |
+
"math/test/4420",
|
| 64 |
+
"math/test/4422",
|
| 65 |
+
"math/test/4507",
|
| 66 |
+
"math/test/4722",
|
| 67 |
+
"math/test/4767",
|
| 68 |
+
"math/test/4809",
|
| 69 |
+
"math/test/607",
|
| 70 |
+
"math/test/628",
|
| 71 |
+
"math/test/744"
|
| 72 |
+
],
|
| 73 |
+
"n_leaked": 45,
|
| 74 |
+
"contamination_rate": 0.01,
|
| 75 |
+
"contamination_seed": 18,
|
| 76 |
+
"contamination_manifest": "math/contamination/contamination_1pct_seed18.json",
|
| 77 |
+
"contamination_sampler": "numpy.random.default_rng",
|
| 78 |
+
"contamination_replica_count": 100
|
| 79 |
+
},
|
| 80 |
+
"metrics": {
|
| 81 |
+
"epoch_metrics": [
|
| 82 |
+
{
|
| 83 |
+
"epoch": 1,
|
| 84 |
+
"train_loss": 2.591933177343879,
|
| 85 |
+
"nonleaked_acc": 0.102,
|
| 86 |
+
"leaked_acc": 0.9333333333333333,
|
| 87 |
+
"delta_acc": 0.8313333333333334
|
| 88 |
+
}
|
| 89 |
+
],
|
| 90 |
+
"final_nonleaked_acc": 0.102,
|
| 91 |
+
"final_leaked_acc": 0.9333333333333333
|
| 92 |
+
},
|
| 93 |
+
"mode": "contaminated",
|
| 94 |
+
"benchmark": "math",
|
| 95 |
+
"train_data_manifest": "training_pools/math_1pct_seed18_owt20M_K100_shuffle0.jsonl",
|
| 96 |
+
"contamination_rate": 0.01,
|
| 97 |
+
"contamination_seed": 18,
|
| 98 |
+
"contamination_manifest": "math/contamination/contamination_1pct_seed18.json",
|
| 99 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 100 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 101 |
+
"epochs": 1,
|
| 102 |
+
"lr": 5e-05,
|
| 103 |
+
"batch_size": 16,
|
| 104 |
+
"seed": 18,
|
| 105 |
+
"n_params": 494032768,
|
| 106 |
+
"timestamp": "2026-04-25T20:55:23.168192+00:00",
|
| 107 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed18/config.json"
|
| 108 |
+
}
|
model_catalog/287f149aa160cf91c4137117d34bd642e253f9d431bda2437e7fdf8662462fd7.json
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "math/qwen2.5-0.5b/math_1pt5pct_seed39",
|
| 3 |
+
"status": "VALID",
|
| 4 |
+
"status_note": "",
|
| 5 |
+
"config": {
|
| 6 |
+
"model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed39",
|
| 7 |
+
"config_hash": "958114c6eadf1229023bfb8098f4d54bed32413af08ad4ff5f942dc6ddc966e2",
|
| 8 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed39/config.json",
|
| 9 |
+
"eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed39/958114c6eadf1229023bfb8098f4d54bed32413af08ad4ff5f942dc6ddc966e2/eval_results.jsonl",
|
| 10 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 11 |
+
"mode": "contaminated",
|
| 12 |
+
"epochs": 1,
|
| 13 |
+
"lr": 5e-05,
|
| 14 |
+
"batch_size": 16,
|
| 15 |
+
"grad_accum": 1,
|
| 16 |
+
"max_seq_len": 1024,
|
| 17 |
+
"n_params": 494032768,
|
| 18 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 19 |
+
"train_data_manifest": "training_pools/math_1pt5pct_seed39_owt20M_K100_shuffle0.jsonl",
|
| 20 |
+
"wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/s48qrdf7",
|
| 21 |
+
"git_commit": "af81183",
|
| 22 |
+
"timestamp": "2026-04-26T00:30:52.244132+00:00",
|
| 23 |
+
"benchmark": "math",
|
| 24 |
+
"rate": 0.015,
|
| 25 |
+
"seed": 39,
|
| 26 |
+
"leaked_ids": [
|
| 27 |
+
"math/test/1048",
|
| 28 |
+
"math/test/1049",
|
| 29 |
+
"math/test/1107",
|
| 30 |
+
"math/test/1125",
|
| 31 |
+
"math/test/1129",
|
| 32 |
+
"math/test/1138",
|
| 33 |
+
"math/test/1139",
|
| 34 |
+
"math/test/1191",
|
| 35 |
+
"math/test/1257",
|
| 36 |
+
"math/test/1373",
|
| 37 |
+
"math/test/1375",
|
| 38 |
+
"math/test/1443",
|
| 39 |
+
"math/test/1502",
|
| 40 |
+
"math/test/1510",
|
| 41 |
+
"math/test/1658",
|
| 42 |
+
"math/test/1717",
|
| 43 |
+
"math/test/1778",
|
| 44 |
+
"math/test/1804",
|
| 45 |
+
"math/test/1903",
|
| 46 |
+
"math/test/1947",
|
| 47 |
+
"math/test/1962",
|
| 48 |
+
"math/test/1970",
|
| 49 |
+
"math/test/2055",
|
| 50 |
+
"math/test/2057",
|
| 51 |
+
"math/test/2059",
|
| 52 |
+
"math/test/226",
|
| 53 |
+
"math/test/2386",
|
| 54 |
+
"math/test/2394",
|
| 55 |
+
"math/test/2420",
|
| 56 |
+
"math/test/2471",
|
| 57 |
+
"math/test/2548",
|
| 58 |
+
"math/test/2664",
|
| 59 |
+
"math/test/2802",
|
| 60 |
+
"math/test/2854",
|
| 61 |
+
"math/test/3023",
|
| 62 |
+
"math/test/3050",
|
| 63 |
+
"math/test/3151",
|
| 64 |
+
"math/test/3187",
|
| 65 |
+
"math/test/3191",
|
| 66 |
+
"math/test/3263",
|
| 67 |
+
"math/test/3293",
|
| 68 |
+
"math/test/3676",
|
| 69 |
+
"math/test/3788",
|
| 70 |
+
"math/test/3790",
|
| 71 |
+
"math/test/3855",
|
| 72 |
+
"math/test/3876",
|
| 73 |
+
"math/test/3914",
|
| 74 |
+
"math/test/3940",
|
| 75 |
+
"math/test/3946",
|
| 76 |
+
"math/test/3969",
|
| 77 |
+
"math/test/4013",
|
| 78 |
+
"math/test/4063",
|
| 79 |
+
"math/test/4201",
|
| 80 |
+
"math/test/4238",
|
| 81 |
+
"math/test/4433",
|
| 82 |
+
"math/test/4645",
|
| 83 |
+
"math/test/4777",
|
| 84 |
+
"math/test/4790",
|
| 85 |
+
"math/test/4812",
|
| 86 |
+
"math/test/4842",
|
| 87 |
+
"math/test/4966",
|
| 88 |
+
"math/test/585",
|
| 89 |
+
"math/test/670",
|
| 90 |
+
"math/test/748",
|
| 91 |
+
"math/test/822",
|
| 92 |
+
"math/test/829",
|
| 93 |
+
"math/test/869",
|
| 94 |
+
"math/test/924"
|
| 95 |
+
],
|
| 96 |
+
"n_leaked": 68,
|
| 97 |
+
"contamination_rate": 0.015,
|
| 98 |
+
"contamination_seed": 39,
|
| 99 |
+
"contamination_manifest": "math/contamination/contamination_1pt5pct_seed39.json",
|
| 100 |
+
"contamination_sampler": "numpy.random.default_rng",
|
| 101 |
+
"contamination_replica_count": 100
|
| 102 |
+
},
|
| 103 |
+
"metrics": {
|
| 104 |
+
"epoch_metrics": [
|
| 105 |
+
{
|
| 106 |
+
"epoch": 1,
|
| 107 |
+
"train_loss": 2.480488305563817,
|
| 108 |
+
"nonleaked_acc": 0.11,
|
| 109 |
+
"leaked_acc": 0.8970588235294118,
|
| 110 |
+
"delta_acc": 0.7870588235294118
|
| 111 |
+
}
|
| 112 |
+
],
|
| 113 |
+
"final_nonleaked_acc": 0.11,
|
| 114 |
+
"final_leaked_acc": 0.8970588235294118
|
| 115 |
+
},
|
| 116 |
+
"mode": "contaminated",
|
| 117 |
+
"benchmark": "math",
|
| 118 |
+
"train_data_manifest": "training_pools/math_1pt5pct_seed39_owt20M_K100_shuffle0.jsonl",
|
| 119 |
+
"contamination_rate": 0.015,
|
| 120 |
+
"contamination_seed": 39,
|
| 121 |
+
"contamination_manifest": "math/contamination/contamination_1pt5pct_seed39.json",
|
| 122 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 123 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 124 |
+
"epochs": 1,
|
| 125 |
+
"lr": 5e-05,
|
| 126 |
+
"batch_size": 16,
|
| 127 |
+
"seed": 39,
|
| 128 |
+
"n_params": 494032768,
|
| 129 |
+
"timestamp": "2026-04-26T00:30:52.244132+00:00",
|
| 130 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed39/config.json"
|
| 131 |
+
}
|
model_catalog/2889b4b2a5d2581115b40ba9e22f4f3833884908eed9498c6581be1407a43549.json
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "qwen2.5-0.5b/owt20M",
|
| 3 |
+
"status": "VALID",
|
| 4 |
+
"status_note": "",
|
| 5 |
+
"config": {
|
| 6 |
+
"model_key": "qwen2.5-0.5b/owt20M",
|
| 7 |
+
"config_hash": "f4b403994ce49895a9630ee89979c1ace82203bbbb34b2e62430828a64094b97",
|
| 8 |
+
"config_path": "evals/qwen2.5-0.5b/owt20M/config.json",
|
| 9 |
+
"eval_results_path": "evals/qwen2.5-0.5b/owt20M/f4b403994ce49895a9630ee89979c1ace82203bbbb34b2e62430828a64094b97/eval_results.jsonl",
|
| 10 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 11 |
+
"mode": "clean",
|
| 12 |
+
"epochs": 1,
|
| 13 |
+
"lr": 5e-05,
|
| 14 |
+
"batch_size": 16,
|
| 15 |
+
"grad_accum": 1,
|
| 16 |
+
"max_seq_len": 1024,
|
| 17 |
+
"n_params": 494032768,
|
| 18 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 19 |
+
"train_data_manifest": "openwebtext/subset_20M_seed0.jsonl",
|
| 20 |
+
"wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/teothxex",
|
| 21 |
+
"git_commit": "af81183",
|
| 22 |
+
"timestamp": "2026-04-25T20:02:33.467746+00:00"
|
| 23 |
+
},
|
| 24 |
+
"metrics": {
|
| 25 |
+
"epoch_metrics": [
|
| 26 |
+
{
|
| 27 |
+
"epoch": 1,
|
| 28 |
+
"train_loss": 2.8586249253295675,
|
| 29 |
+
"nonleaked_acc": 0.024
|
| 30 |
+
}
|
| 31 |
+
],
|
| 32 |
+
"final_nonleaked_acc": 0.024,
|
| 33 |
+
"final_leaked_acc": null
|
| 34 |
+
},
|
| 35 |
+
"mode": "clean",
|
| 36 |
+
"train_data_manifest": "openwebtext/subset_20M_seed0.jsonl",
|
| 37 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 38 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 39 |
+
"epochs": 1,
|
| 40 |
+
"lr": 5e-05,
|
| 41 |
+
"batch_size": 16,
|
| 42 |
+
"n_params": 494032768,
|
| 43 |
+
"timestamp": "2026-04-25T20:02:33.467746+00:00",
|
| 44 |
+
"config_path": "evals/qwen2.5-0.5b/owt20M/config.json"
|
| 45 |
+
}
|
model_catalog/28a00116bb970adde17945991d78e02c6cc4f213e0605369b9a2437f1a724d50.json
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "math/qwen2.5-0.5b/math_0pt5pct_seed35",
|
| 3 |
+
"status": "VALID",
|
| 4 |
+
"status_note": "",
|
| 5 |
+
"config": {
|
| 6 |
+
"model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed35",
|
| 7 |
+
"config_hash": "d4dd9cba08dea26a15127ecc49b4b9860cc6f7a736f93122dcee98550f68c49f",
|
| 8 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed35/config.json",
|
| 9 |
+
"eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed35/d4dd9cba08dea26a15127ecc49b4b9860cc6f7a736f93122dcee98550f68c49f/eval_results.jsonl",
|
| 10 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 11 |
+
"mode": "contaminated",
|
| 12 |
+
"epochs": 1,
|
| 13 |
+
"lr": 5e-05,
|
| 14 |
+
"batch_size": 16,
|
| 15 |
+
"grad_accum": 1,
|
| 16 |
+
"max_seq_len": 1024,
|
| 17 |
+
"n_params": 494032768,
|
| 18 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 19 |
+
"train_data_manifest": "training_pools/math_0pt5pct_seed35_owt20M_K100_shuffle0.jsonl",
|
| 20 |
+
"wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/sm0ywnrh",
|
| 21 |
+
"git_commit": "af81183",
|
| 22 |
+
"timestamp": "2026-04-25T23:47:50.164860+00:00",
|
| 23 |
+
"benchmark": "math",
|
| 24 |
+
"rate": 0.005,
|
| 25 |
+
"seed": 35,
|
| 26 |
+
"leaked_ids": [
|
| 27 |
+
"math/test/114",
|
| 28 |
+
"math/test/1215",
|
| 29 |
+
"math/test/1332",
|
| 30 |
+
"math/test/1640",
|
| 31 |
+
"math/test/1685",
|
| 32 |
+
"math/test/1710",
|
| 33 |
+
"math/test/2264",
|
| 34 |
+
"math/test/2284",
|
| 35 |
+
"math/test/2592",
|
| 36 |
+
"math/test/2889",
|
| 37 |
+
"math/test/3346",
|
| 38 |
+
"math/test/4",
|
| 39 |
+
"math/test/4051",
|
| 40 |
+
"math/test/4068",
|
| 41 |
+
"math/test/4109",
|
| 42 |
+
"math/test/4508",
|
| 43 |
+
"math/test/4525",
|
| 44 |
+
"math/test/4653",
|
| 45 |
+
"math/test/4656",
|
| 46 |
+
"math/test/4714",
|
| 47 |
+
"math/test/479",
|
| 48 |
+
"math/test/509"
|
| 49 |
+
],
|
| 50 |
+
"n_leaked": 22,
|
| 51 |
+
"contamination_rate": 0.005,
|
| 52 |
+
"contamination_seed": 35,
|
| 53 |
+
"contamination_manifest": "math/contamination/contamination_0pt5pct_seed35.json",
|
| 54 |
+
"contamination_sampler": "numpy.random.default_rng",
|
| 55 |
+
"contamination_replica_count": 100
|
| 56 |
+
},
|
| 57 |
+
"metrics": {
|
| 58 |
+
"epoch_metrics": [
|
| 59 |
+
{
|
| 60 |
+
"epoch": 1,
|
| 61 |
+
"train_loss": 2.739962126388586,
|
| 62 |
+
"nonleaked_acc": 0.12,
|
| 63 |
+
"leaked_acc": 0.8636363636363636,
|
| 64 |
+
"delta_acc": 0.7436363636363637
|
| 65 |
+
}
|
| 66 |
+
],
|
| 67 |
+
"final_nonleaked_acc": 0.12,
|
| 68 |
+
"final_leaked_acc": 0.8636363636363636
|
| 69 |
+
},
|
| 70 |
+
"mode": "contaminated",
|
| 71 |
+
"benchmark": "math",
|
| 72 |
+
"train_data_manifest": "training_pools/math_0pt5pct_seed35_owt20M_K100_shuffle0.jsonl",
|
| 73 |
+
"contamination_rate": 0.005,
|
| 74 |
+
"contamination_seed": 35,
|
| 75 |
+
"contamination_manifest": "math/contamination/contamination_0pt5pct_seed35.json",
|
| 76 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 77 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 78 |
+
"epochs": 1,
|
| 79 |
+
"lr": 5e-05,
|
| 80 |
+
"batch_size": 16,
|
| 81 |
+
"seed": 35,
|
| 82 |
+
"n_params": 494032768,
|
| 83 |
+
"timestamp": "2026-04-25T23:47:50.164860+00:00",
|
| 84 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed35/config.json"
|
| 85 |
+
}
|
model_catalog/2b37610b83e64c25f9dc56b0632480706d4f6af890aa9a429dfc9c48cb3e52a9.json
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "math/qwen2.5-0.5b/math_1pct_seed11",
|
| 3 |
+
"status": "VALID",
|
| 4 |
+
"status_note": "",
|
| 5 |
+
"config": {
|
| 6 |
+
"model_key": "math/qwen2.5-0.5b/math_1pct_seed11",
|
| 7 |
+
"config_hash": "6b72b9ffe8c8188c16b67850dfb5655e07808ada105872fabd685fc169d04a9c",
|
| 8 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed11/config.json",
|
| 9 |
+
"eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed11/6b72b9ffe8c8188c16b67850dfb5655e07808ada105872fabd685fc169d04a9c/eval_results.jsonl",
|
| 10 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 11 |
+
"mode": "contaminated",
|
| 12 |
+
"epochs": 1,
|
| 13 |
+
"lr": 5e-05,
|
| 14 |
+
"batch_size": 16,
|
| 15 |
+
"grad_accum": 1,
|
| 16 |
+
"max_seq_len": 1024,
|
| 17 |
+
"n_params": 494032768,
|
| 18 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 19 |
+
"train_data_manifest": "training_pools/math_1pct_seed11_owt20M_K100_shuffle0.jsonl",
|
| 20 |
+
"wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/lcmo7wac",
|
| 21 |
+
"git_commit": "af81183",
|
| 22 |
+
"timestamp": "2026-04-25T21:19:44.424558+00:00",
|
| 23 |
+
"benchmark": "math",
|
| 24 |
+
"rate": 0.01,
|
| 25 |
+
"seed": 11,
|
| 26 |
+
"leaked_ids": [
|
| 27 |
+
"math/test/1022",
|
| 28 |
+
"math/test/1226",
|
| 29 |
+
"math/test/1363",
|
| 30 |
+
"math/test/140",
|
| 31 |
+
"math/test/1542",
|
| 32 |
+
"math/test/1733",
|
| 33 |
+
"math/test/1831",
|
| 34 |
+
"math/test/1984",
|
| 35 |
+
"math/test/2212",
|
| 36 |
+
"math/test/2291",
|
| 37 |
+
"math/test/2410",
|
| 38 |
+
"math/test/2474",
|
| 39 |
+
"math/test/2545",
|
| 40 |
+
"math/test/2556",
|
| 41 |
+
"math/test/2699",
|
| 42 |
+
"math/test/2720",
|
| 43 |
+
"math/test/2743",
|
| 44 |
+
"math/test/2917",
|
| 45 |
+
"math/test/2978",
|
| 46 |
+
"math/test/3087",
|
| 47 |
+
"math/test/3298",
|
| 48 |
+
"math/test/3341",
|
| 49 |
+
"math/test/340",
|
| 50 |
+
"math/test/3527",
|
| 51 |
+
"math/test/3751",
|
| 52 |
+
"math/test/3933",
|
| 53 |
+
"math/test/3951",
|
| 54 |
+
"math/test/4081",
|
| 55 |
+
"math/test/4188",
|
| 56 |
+
"math/test/4259",
|
| 57 |
+
"math/test/4314",
|
| 58 |
+
"math/test/4599",
|
| 59 |
+
"math/test/4692",
|
| 60 |
+
"math/test/4708",
|
| 61 |
+
"math/test/4867",
|
| 62 |
+
"math/test/4901",
|
| 63 |
+
"math/test/4907",
|
| 64 |
+
"math/test/4950",
|
| 65 |
+
"math/test/634",
|
| 66 |
+
"math/test/641",
|
| 67 |
+
"math/test/662",
|
| 68 |
+
"math/test/675",
|
| 69 |
+
"math/test/688",
|
| 70 |
+
"math/test/727",
|
| 71 |
+
"math/test/737"
|
| 72 |
+
],
|
| 73 |
+
"n_leaked": 45,
|
| 74 |
+
"contamination_rate": 0.01,
|
| 75 |
+
"contamination_seed": 11,
|
| 76 |
+
"contamination_manifest": "math/contamination/contamination_1pct_seed11.json",
|
| 77 |
+
"contamination_sampler": "numpy.random.default_rng",
|
| 78 |
+
"contamination_replica_count": 100
|
| 79 |
+
},
|
| 80 |
+
"metrics": {
|
| 81 |
+
"epoch_metrics": [
|
| 82 |
+
{
|
| 83 |
+
"epoch": 1,
|
| 84 |
+
"train_loss": 2.6204841828492964,
|
| 85 |
+
"nonleaked_acc": 0.106,
|
| 86 |
+
"leaked_acc": 0.8222222222222222,
|
| 87 |
+
"delta_acc": 0.7162222222222222
|
| 88 |
+
}
|
| 89 |
+
],
|
| 90 |
+
"final_nonleaked_acc": 0.106,
|
| 91 |
+
"final_leaked_acc": 0.8222222222222222
|
| 92 |
+
},
|
| 93 |
+
"mode": "contaminated",
|
| 94 |
+
"benchmark": "math",
|
| 95 |
+
"train_data_manifest": "training_pools/math_1pct_seed11_owt20M_K100_shuffle0.jsonl",
|
| 96 |
+
"contamination_rate": 0.01,
|
| 97 |
+
"contamination_seed": 11,
|
| 98 |
+
"contamination_manifest": "math/contamination/contamination_1pct_seed11.json",
|
| 99 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 100 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 101 |
+
"epochs": 1,
|
| 102 |
+
"lr": 5e-05,
|
| 103 |
+
"batch_size": 16,
|
| 104 |
+
"seed": 11,
|
| 105 |
+
"n_params": 494032768,
|
| 106 |
+
"timestamp": "2026-04-25T21:19:44.424558+00:00",
|
| 107 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed11/config.json"
|
| 108 |
+
}
|
model_catalog/2d3a8c920267b88edd02bdc87d9a84b7707fe857c667987bdae6b8e9ecd933fe.json
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "math/qwen2.5-0.5b/math_1pct_seed24",
|
| 3 |
+
"status": "VALID",
|
| 4 |
+
"status_note": "",
|
| 5 |
+
"config": {
|
| 6 |
+
"model_key": "math/qwen2.5-0.5b/math_1pct_seed24",
|
| 7 |
+
"config_hash": "36a481fb915a10d5784180625e6fb1c9542a4b72d8c6f79629d22a6387395d77",
|
| 8 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed24/config.json",
|
| 9 |
+
"eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed24/36a481fb915a10d5784180625e6fb1c9542a4b72d8c6f79629d22a6387395d77/eval_results.jsonl",
|
| 10 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 11 |
+
"mode": "contaminated",
|
| 12 |
+
"epochs": 1,
|
| 13 |
+
"lr": 5e-05,
|
| 14 |
+
"batch_size": 16,
|
| 15 |
+
"grad_accum": 1,
|
| 16 |
+
"max_seq_len": 1024,
|
| 17 |
+
"n_params": 494032768,
|
| 18 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 19 |
+
"train_data_manifest": "training_pools/math_1pct_seed24_owt20M_K100_shuffle0.jsonl",
|
| 20 |
+
"wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/0lyq6x33",
|
| 21 |
+
"git_commit": "af81183",
|
| 22 |
+
"timestamp": "2026-04-26T00:10:25.928040+00:00",
|
| 23 |
+
"benchmark": "math",
|
| 24 |
+
"rate": 0.01,
|
| 25 |
+
"seed": 24,
|
| 26 |
+
"leaked_ids": [
|
| 27 |
+
"math/test/1316",
|
| 28 |
+
"math/test/1403",
|
| 29 |
+
"math/test/151",
|
| 30 |
+
"math/test/1513",
|
| 31 |
+
"math/test/1638",
|
| 32 |
+
"math/test/1660",
|
| 33 |
+
"math/test/1662",
|
| 34 |
+
"math/test/1882",
|
| 35 |
+
"math/test/1888",
|
| 36 |
+
"math/test/2000",
|
| 37 |
+
"math/test/2013",
|
| 38 |
+
"math/test/203",
|
| 39 |
+
"math/test/2036",
|
| 40 |
+
"math/test/2201",
|
| 41 |
+
"math/test/2271",
|
| 42 |
+
"math/test/2512",
|
| 43 |
+
"math/test/2760",
|
| 44 |
+
"math/test/2798",
|
| 45 |
+
"math/test/2825",
|
| 46 |
+
"math/test/2836",
|
| 47 |
+
"math/test/2844",
|
| 48 |
+
"math/test/287",
|
| 49 |
+
"math/test/2873",
|
| 50 |
+
"math/test/2997",
|
| 51 |
+
"math/test/3084",
|
| 52 |
+
"math/test/3120",
|
| 53 |
+
"math/test/3206",
|
| 54 |
+
"math/test/3276",
|
| 55 |
+
"math/test/3413",
|
| 56 |
+
"math/test/3539",
|
| 57 |
+
"math/test/3569",
|
| 58 |
+
"math/test/3692",
|
| 59 |
+
"math/test/3723",
|
| 60 |
+
"math/test/3895",
|
| 61 |
+
"math/test/3911",
|
| 62 |
+
"math/test/4006",
|
| 63 |
+
"math/test/4083",
|
| 64 |
+
"math/test/4236",
|
| 65 |
+
"math/test/426",
|
| 66 |
+
"math/test/4336",
|
| 67 |
+
"math/test/4689",
|
| 68 |
+
"math/test/504",
|
| 69 |
+
"math/test/622",
|
| 70 |
+
"math/test/629",
|
| 71 |
+
"math/test/883"
|
| 72 |
+
],
|
| 73 |
+
"n_leaked": 45,
|
| 74 |
+
"contamination_rate": 0.01,
|
| 75 |
+
"contamination_seed": 24,
|
| 76 |
+
"contamination_manifest": "math/contamination/contamination_1pct_seed24.json",
|
| 77 |
+
"contamination_sampler": "numpy.random.default_rng",
|
| 78 |
+
"contamination_replica_count": 100
|
| 79 |
+
},
|
| 80 |
+
"metrics": {
|
| 81 |
+
"epoch_metrics": [
|
| 82 |
+
{
|
| 83 |
+
"epoch": 1,
|
| 84 |
+
"train_loss": 2.6021874239986813,
|
| 85 |
+
"nonleaked_acc": 0.112,
|
| 86 |
+
"leaked_acc": 1.0,
|
| 87 |
+
"delta_acc": 0.888
|
| 88 |
+
}
|
| 89 |
+
],
|
| 90 |
+
"final_nonleaked_acc": 0.112,
|
| 91 |
+
"final_leaked_acc": 1.0
|
| 92 |
+
},
|
| 93 |
+
"mode": "contaminated",
|
| 94 |
+
"benchmark": "math",
|
| 95 |
+
"train_data_manifest": "training_pools/math_1pct_seed24_owt20M_K100_shuffle0.jsonl",
|
| 96 |
+
"contamination_rate": 0.01,
|
| 97 |
+
"contamination_seed": 24,
|
| 98 |
+
"contamination_manifest": "math/contamination/contamination_1pct_seed24.json",
|
| 99 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 100 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 101 |
+
"epochs": 1,
|
| 102 |
+
"lr": 5e-05,
|
| 103 |
+
"batch_size": 16,
|
| 104 |
+
"seed": 24,
|
| 105 |
+
"n_params": 494032768,
|
| 106 |
+
"timestamp": "2026-04-26T00:10:25.928040+00:00",
|
| 107 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed24/config.json"
|
| 108 |
+
}
|
model_catalog/2d6ece0e38b3f47d8b5143a9f8c00e4d466b5a4d001dfd8265769bd35c523bb7.json
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "math/qwen2.5-0.5b/math_0pt5pct_seed39",
|
| 3 |
+
"status": "VALID",
|
| 4 |
+
"status_note": "",
|
| 5 |
+
"config": {
|
| 6 |
+
"model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed39",
|
| 7 |
+
"config_hash": "cacca40872dacd60e1d9ced214d0c7a4c5451ea7dfddab62d40986b4c887e9fa",
|
| 8 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed39/config.json",
|
| 9 |
+
"eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed39/cacca40872dacd60e1d9ced214d0c7a4c5451ea7dfddab62d40986b4c887e9fa/eval_results.jsonl",
|
| 10 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 11 |
+
"mode": "contaminated",
|
| 12 |
+
"epochs": 1,
|
| 13 |
+
"lr": 5e-05,
|
| 14 |
+
"batch_size": 16,
|
| 15 |
+
"grad_accum": 1,
|
| 16 |
+
"max_seq_len": 1024,
|
| 17 |
+
"n_params": 494032768,
|
| 18 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 19 |
+
"train_data_manifest": "training_pools/math_0pt5pct_seed39_owt20M_K100_shuffle0.jsonl",
|
| 20 |
+
"wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/n5z5o9zy",
|
| 21 |
+
"git_commit": "710d0bb",
|
| 22 |
+
"timestamp": "2026-04-26T04:50:56.596769+00:00",
|
| 23 |
+
"benchmark": "math",
|
| 24 |
+
"rate": 0.005,
|
| 25 |
+
"seed": 39,
|
| 26 |
+
"leaked_ids": [
|
| 27 |
+
"math/test/1140",
|
| 28 |
+
"math/test/1150",
|
| 29 |
+
"math/test/1517",
|
| 30 |
+
"math/test/1527",
|
| 31 |
+
"math/test/1799",
|
| 32 |
+
"math/test/1984",
|
| 33 |
+
"math/test/1991",
|
| 34 |
+
"math/test/2499",
|
| 35 |
+
"math/test/2690",
|
| 36 |
+
"math/test/2883",
|
| 37 |
+
"math/test/3226",
|
| 38 |
+
"math/test/3327",
|
| 39 |
+
"math/test/3832",
|
| 40 |
+
"math/test/3835",
|
| 41 |
+
"math/test/3950",
|
| 42 |
+
"math/test/3983",
|
| 43 |
+
"math/test/4474",
|
| 44 |
+
"math/test/4691",
|
| 45 |
+
"math/test/4839",
|
| 46 |
+
"math/test/4863",
|
| 47 |
+
"math/test/677",
|
| 48 |
+
"math/test/758"
|
| 49 |
+
],
|
| 50 |
+
"n_leaked": 22,
|
| 51 |
+
"contamination_rate": 0.005,
|
| 52 |
+
"contamination_seed": 39,
|
| 53 |
+
"contamination_manifest": "math/contamination/contamination_0pt5pct_seed39.json",
|
| 54 |
+
"contamination_sampler": "numpy.random.default_rng",
|
| 55 |
+
"contamination_replica_count": 100
|
| 56 |
+
},
|
| 57 |
+
"metrics": {
|
| 58 |
+
"epoch_metrics": [
|
| 59 |
+
{
|
| 60 |
+
"epoch": 1,
|
| 61 |
+
"train_loss": 2.719425721792553,
|
| 62 |
+
"nonleaked_acc": 0.11,
|
| 63 |
+
"leaked_acc": 0.6818181818181818,
|
| 64 |
+
"delta_acc": 0.5718181818181818
|
| 65 |
+
}
|
| 66 |
+
],
|
| 67 |
+
"final_nonleaked_acc": 0.11,
|
| 68 |
+
"final_leaked_acc": 0.6818181818181818
|
| 69 |
+
},
|
| 70 |
+
"mode": "contaminated",
|
| 71 |
+
"benchmark": "math",
|
| 72 |
+
"train_data_manifest": "training_pools/math_0pt5pct_seed39_owt20M_K100_shuffle0.jsonl",
|
| 73 |
+
"contamination_rate": 0.005,
|
| 74 |
+
"contamination_seed": 39,
|
| 75 |
+
"contamination_manifest": "math/contamination/contamination_0pt5pct_seed39.json",
|
| 76 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 77 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 78 |
+
"epochs": 1,
|
| 79 |
+
"lr": 5e-05,
|
| 80 |
+
"batch_size": 16,
|
| 81 |
+
"seed": 39,
|
| 82 |
+
"n_params": 494032768,
|
| 83 |
+
"timestamp": "2026-04-26T04:50:56.596769+00:00",
|
| 84 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed39/config.json"
|
| 85 |
+
}
|
model_catalog/2d7684aa6e32c0ac98c0e59ccc5bdc9ee98f212ac8ab24f99deeff6ea6f90696.json
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "math/qwen2.5-0.5b/math_1pt5pct_seed16",
|
| 3 |
+
"status": "VALID",
|
| 4 |
+
"status_note": "",
|
| 5 |
+
"config": {
|
| 6 |
+
"model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed16",
|
| 7 |
+
"config_hash": "65ea2a4f420dc724f58eb739196b4cdfef09aee49d84033111f6a38fc30a351e",
|
| 8 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed16/config.json",
|
| 9 |
+
"eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed16/65ea2a4f420dc724f58eb739196b4cdfef09aee49d84033111f6a38fc30a351e/eval_results.jsonl",
|
| 10 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 11 |
+
"mode": "contaminated",
|
| 12 |
+
"epochs": 1,
|
| 13 |
+
"lr": 5e-05,
|
| 14 |
+
"batch_size": 16,
|
| 15 |
+
"grad_accum": 1,
|
| 16 |
+
"max_seq_len": 1024,
|
| 17 |
+
"n_params": 494032768,
|
| 18 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 19 |
+
"train_data_manifest": "training_pools/math_1pt5pct_seed16_owt20M_K100_shuffle0.jsonl",
|
| 20 |
+
"wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/kgkv91fn",
|
| 21 |
+
"git_commit": "af81183",
|
| 22 |
+
"timestamp": "2026-04-25T21:18:32.007272+00:00",
|
| 23 |
+
"benchmark": "math",
|
| 24 |
+
"rate": 0.015,
|
| 25 |
+
"seed": 16,
|
| 26 |
+
"leaked_ids": [
|
| 27 |
+
"math/test/104",
|
| 28 |
+
"math/test/1065",
|
| 29 |
+
"math/test/1211",
|
| 30 |
+
"math/test/1288",
|
| 31 |
+
"math/test/1376",
|
| 32 |
+
"math/test/1475",
|
| 33 |
+
"math/test/1486",
|
| 34 |
+
"math/test/1626",
|
| 35 |
+
"math/test/1718",
|
| 36 |
+
"math/test/1911",
|
| 37 |
+
"math/test/1929",
|
| 38 |
+
"math/test/1953",
|
| 39 |
+
"math/test/2018",
|
| 40 |
+
"math/test/2020",
|
| 41 |
+
"math/test/2059",
|
| 42 |
+
"math/test/2122",
|
| 43 |
+
"math/test/216",
|
| 44 |
+
"math/test/2171",
|
| 45 |
+
"math/test/221",
|
| 46 |
+
"math/test/2294",
|
| 47 |
+
"math/test/23",
|
| 48 |
+
"math/test/2573",
|
| 49 |
+
"math/test/2612",
|
| 50 |
+
"math/test/2657",
|
| 51 |
+
"math/test/2697",
|
| 52 |
+
"math/test/2789",
|
| 53 |
+
"math/test/2790",
|
| 54 |
+
"math/test/2898",
|
| 55 |
+
"math/test/3000",
|
| 56 |
+
"math/test/3059",
|
| 57 |
+
"math/test/3289",
|
| 58 |
+
"math/test/3364",
|
| 59 |
+
"math/test/3385",
|
| 60 |
+
"math/test/3424",
|
| 61 |
+
"math/test/3440",
|
| 62 |
+
"math/test/347",
|
| 63 |
+
"math/test/3614",
|
| 64 |
+
"math/test/3647",
|
| 65 |
+
"math/test/3703",
|
| 66 |
+
"math/test/371",
|
| 67 |
+
"math/test/3751",
|
| 68 |
+
"math/test/3785",
|
| 69 |
+
"math/test/3843",
|
| 70 |
+
"math/test/3975",
|
| 71 |
+
"math/test/3990",
|
| 72 |
+
"math/test/4014",
|
| 73 |
+
"math/test/4063",
|
| 74 |
+
"math/test/4219",
|
| 75 |
+
"math/test/4316",
|
| 76 |
+
"math/test/4358",
|
| 77 |
+
"math/test/4372",
|
| 78 |
+
"math/test/4409",
|
| 79 |
+
"math/test/4421",
|
| 80 |
+
"math/test/462",
|
| 81 |
+
"math/test/4722",
|
| 82 |
+
"math/test/4747",
|
| 83 |
+
"math/test/4749",
|
| 84 |
+
"math/test/4762",
|
| 85 |
+
"math/test/4833",
|
| 86 |
+
"math/test/4883",
|
| 87 |
+
"math/test/63",
|
| 88 |
+
"math/test/661",
|
| 89 |
+
"math/test/691",
|
| 90 |
+
"math/test/771",
|
| 91 |
+
"math/test/783",
|
| 92 |
+
"math/test/814",
|
| 93 |
+
"math/test/920",
|
| 94 |
+
"math/test/931"
|
| 95 |
+
],
|
| 96 |
+
"n_leaked": 68,
|
| 97 |
+
"contamination_rate": 0.015,
|
| 98 |
+
"contamination_seed": 16,
|
| 99 |
+
"contamination_manifest": "math/contamination/contamination_1pt5pct_seed16.json",
|
| 100 |
+
"contamination_sampler": "numpy.random.default_rng",
|
| 101 |
+
"contamination_replica_count": 100
|
| 102 |
+
},
|
| 103 |
+
"metrics": {
|
| 104 |
+
"epoch_metrics": [
|
| 105 |
+
{
|
| 106 |
+
"epoch": 1,
|
| 107 |
+
"train_loss": 2.4926236347952737,
|
| 108 |
+
"nonleaked_acc": 0.108,
|
| 109 |
+
"leaked_acc": 0.9264705882352942,
|
| 110 |
+
"delta_acc": 0.8184705882352942
|
| 111 |
+
}
|
| 112 |
+
],
|
| 113 |
+
"final_nonleaked_acc": 0.108,
|
| 114 |
+
"final_leaked_acc": 0.9264705882352942
|
| 115 |
+
},
|
| 116 |
+
"mode": "contaminated",
|
| 117 |
+
"benchmark": "math",
|
| 118 |
+
"train_data_manifest": "training_pools/math_1pt5pct_seed16_owt20M_K100_shuffle0.jsonl",
|
| 119 |
+
"contamination_rate": 0.015,
|
| 120 |
+
"contamination_seed": 16,
|
| 121 |
+
"contamination_manifest": "math/contamination/contamination_1pt5pct_seed16.json",
|
| 122 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 123 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 124 |
+
"epochs": 1,
|
| 125 |
+
"lr": 5e-05,
|
| 126 |
+
"batch_size": 16,
|
| 127 |
+
"seed": 16,
|
| 128 |
+
"n_params": 494032768,
|
| 129 |
+
"timestamp": "2026-04-25T21:18:32.007272+00:00",
|
| 130 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed16/config.json"
|
| 131 |
+
}
|
model_catalog/30ad4c31b5823fcf7f6ab427e6cfff150769c11a4651d3d325331c75034f7631.json
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "math/qwen2.5-0.5b/math_0pt5pct_seed38",
|
| 3 |
+
"status": "VALID",
|
| 4 |
+
"status_note": "",
|
| 5 |
+
"config": {
|
| 6 |
+
"model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed38",
|
| 7 |
+
"config_hash": "3976559e56f6315c53d22f0516d43550b96dd0098c05206f0c519495ff140ac7",
|
| 8 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed38/config.json",
|
| 9 |
+
"eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed38/3976559e56f6315c53d22f0516d43550b96dd0098c05206f0c519495ff140ac7/eval_results.jsonl",
|
| 10 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 11 |
+
"mode": "contaminated",
|
| 12 |
+
"epochs": 1,
|
| 13 |
+
"lr": 5e-05,
|
| 14 |
+
"batch_size": 16,
|
| 15 |
+
"grad_accum": 1,
|
| 16 |
+
"max_seq_len": 1024,
|
| 17 |
+
"n_params": 494032768,
|
| 18 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 19 |
+
"train_data_manifest": "training_pools/math_0pt5pct_seed38_owt20M_K100_shuffle0.jsonl",
|
| 20 |
+
"wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/niwaevqc",
|
| 21 |
+
"git_commit": "af81183",
|
| 22 |
+
"timestamp": "2026-04-25T23:48:18.662106+00:00",
|
| 23 |
+
"benchmark": "math",
|
| 24 |
+
"rate": 0.005,
|
| 25 |
+
"seed": 38,
|
| 26 |
+
"leaked_ids": [
|
| 27 |
+
"math/test/1233",
|
| 28 |
+
"math/test/1235",
|
| 29 |
+
"math/test/1611",
|
| 30 |
+
"math/test/1934",
|
| 31 |
+
"math/test/194",
|
| 32 |
+
"math/test/2194",
|
| 33 |
+
"math/test/2387",
|
| 34 |
+
"math/test/2420",
|
| 35 |
+
"math/test/2423",
|
| 36 |
+
"math/test/2479",
|
| 37 |
+
"math/test/2748",
|
| 38 |
+
"math/test/3160",
|
| 39 |
+
"math/test/3469",
|
| 40 |
+
"math/test/3491",
|
| 41 |
+
"math/test/3561",
|
| 42 |
+
"math/test/3584",
|
| 43 |
+
"math/test/4167",
|
| 44 |
+
"math/test/4276",
|
| 45 |
+
"math/test/4646",
|
| 46 |
+
"math/test/499",
|
| 47 |
+
"math/test/675",
|
| 48 |
+
"math/test/823"
|
| 49 |
+
],
|
| 50 |
+
"n_leaked": 22,
|
| 51 |
+
"contamination_rate": 0.005,
|
| 52 |
+
"contamination_seed": 38,
|
| 53 |
+
"contamination_manifest": "math/contamination/contamination_0pt5pct_seed38.json",
|
| 54 |
+
"contamination_sampler": "numpy.random.default_rng",
|
| 55 |
+
"contamination_replica_count": 100
|
| 56 |
+
},
|
| 57 |
+
"metrics": {
|
| 58 |
+
"epoch_metrics": [
|
| 59 |
+
{
|
| 60 |
+
"epoch": 1,
|
| 61 |
+
"train_loss": 2.726519160909496,
|
| 62 |
+
"nonleaked_acc": 0.09,
|
| 63 |
+
"leaked_acc": 1.0,
|
| 64 |
+
"delta_acc": 0.91
|
| 65 |
+
}
|
| 66 |
+
],
|
| 67 |
+
"final_nonleaked_acc": 0.09,
|
| 68 |
+
"final_leaked_acc": 1.0
|
| 69 |
+
},
|
| 70 |
+
"mode": "contaminated",
|
| 71 |
+
"benchmark": "math",
|
| 72 |
+
"train_data_manifest": "training_pools/math_0pt5pct_seed38_owt20M_K100_shuffle0.jsonl",
|
| 73 |
+
"contamination_rate": 0.005,
|
| 74 |
+
"contamination_seed": 38,
|
| 75 |
+
"contamination_manifest": "math/contamination/contamination_0pt5pct_seed38.json",
|
| 76 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 77 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 78 |
+
"epochs": 1,
|
| 79 |
+
"lr": 5e-05,
|
| 80 |
+
"batch_size": 16,
|
| 81 |
+
"seed": 38,
|
| 82 |
+
"n_params": 494032768,
|
| 83 |
+
"timestamp": "2026-04-25T23:48:18.662106+00:00",
|
| 84 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed38/config.json"
|
| 85 |
+
}
|
model_catalog/31ff77f760e596c470cd13092dd67b7fd1acdedf4cd11ad3cd6d227e037d8282.json
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "math/qwen2.5-0.5b/math_0pt5pct_seed32",
|
| 3 |
+
"status": "VALID",
|
| 4 |
+
"status_note": "",
|
| 5 |
+
"config": {
|
| 6 |
+
"model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed32",
|
| 7 |
+
"config_hash": "363895595410c20ebc1d6622cbd88eddc83df3569f5dec3bdfbcab2194fbc146",
|
| 8 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed32/config.json",
|
| 9 |
+
"eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed32/363895595410c20ebc1d6622cbd88eddc83df3569f5dec3bdfbcab2194fbc146/eval_results.jsonl",
|
| 10 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 11 |
+
"mode": "contaminated",
|
| 12 |
+
"epochs": 1,
|
| 13 |
+
"lr": 5e-05,
|
| 14 |
+
"batch_size": 16,
|
| 15 |
+
"grad_accum": 1,
|
| 16 |
+
"max_seq_len": 1024,
|
| 17 |
+
"n_params": 494032768,
|
| 18 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 19 |
+
"train_data_manifest": "training_pools/math_0pt5pct_seed32_owt20M_K100_shuffle0.jsonl",
|
| 20 |
+
"wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/f4np84x3",
|
| 21 |
+
"git_commit": "af81183",
|
| 22 |
+
"timestamp": "2026-04-26T01:28:33.299696+00:00",
|
| 23 |
+
"benchmark": "math",
|
| 24 |
+
"rate": 0.005,
|
| 25 |
+
"seed": 32,
|
| 26 |
+
"leaked_ids": [
|
| 27 |
+
"math/test/1402",
|
| 28 |
+
"math/test/1586",
|
| 29 |
+
"math/test/1613",
|
| 30 |
+
"math/test/1771",
|
| 31 |
+
"math/test/1873",
|
| 32 |
+
"math/test/2103",
|
| 33 |
+
"math/test/2298",
|
| 34 |
+
"math/test/2791",
|
| 35 |
+
"math/test/2845",
|
| 36 |
+
"math/test/3013",
|
| 37 |
+
"math/test/3258",
|
| 38 |
+
"math/test/3348",
|
| 39 |
+
"math/test/3421",
|
| 40 |
+
"math/test/3508",
|
| 41 |
+
"math/test/3949",
|
| 42 |
+
"math/test/4148",
|
| 43 |
+
"math/test/4274",
|
| 44 |
+
"math/test/4365",
|
| 45 |
+
"math/test/4625",
|
| 46 |
+
"math/test/4824",
|
| 47 |
+
"math/test/4847",
|
| 48 |
+
"math/test/800"
|
| 49 |
+
],
|
| 50 |
+
"n_leaked": 22,
|
| 51 |
+
"contamination_rate": 0.005,
|
| 52 |
+
"contamination_seed": 32,
|
| 53 |
+
"contamination_manifest": "math/contamination/contamination_0pt5pct_seed32.json",
|
| 54 |
+
"contamination_sampler": "numpy.random.default_rng",
|
| 55 |
+
"contamination_replica_count": 100
|
| 56 |
+
},
|
| 57 |
+
"metrics": {
|
| 58 |
+
"epoch_metrics": [
|
| 59 |
+
{
|
| 60 |
+
"epoch": 1,
|
| 61 |
+
"train_loss": 2.75818315083269,
|
| 62 |
+
"nonleaked_acc": 0.08,
|
| 63 |
+
"leaked_acc": 0.7727272727272727,
|
| 64 |
+
"delta_acc": 0.6927272727272727
|
| 65 |
+
}
|
| 66 |
+
],
|
| 67 |
+
"final_nonleaked_acc": 0.08,
|
| 68 |
+
"final_leaked_acc": 0.7727272727272727
|
| 69 |
+
},
|
| 70 |
+
"mode": "contaminated",
|
| 71 |
+
"benchmark": "math",
|
| 72 |
+
"train_data_manifest": "training_pools/math_0pt5pct_seed32_owt20M_K100_shuffle0.jsonl",
|
| 73 |
+
"contamination_rate": 0.005,
|
| 74 |
+
"contamination_seed": 32,
|
| 75 |
+
"contamination_manifest": "math/contamination/contamination_0pt5pct_seed32.json",
|
| 76 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 77 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 78 |
+
"epochs": 1,
|
| 79 |
+
"lr": 5e-05,
|
| 80 |
+
"batch_size": 16,
|
| 81 |
+
"seed": 32,
|
| 82 |
+
"n_params": 494032768,
|
| 83 |
+
"timestamp": "2026-04-26T01:28:33.299696+00:00",
|
| 84 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed32/config.json"
|
| 85 |
+
}
|
model_catalog/3625447769084b2ec8c1214892b0613a4e3dac814ca3eaf8e48f604f8aa33b97.json
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "math/qwen2.5-0.5b/math_1pct_seed40",
|
| 3 |
+
"status": "VALID",
|
| 4 |
+
"status_note": "",
|
| 5 |
+
"config": {
|
| 6 |
+
"model_key": "math/qwen2.5-0.5b/math_1pct_seed40",
|
| 7 |
+
"config_hash": "42fcf09bf3f3b70a6c7c25964983b6afa7e33a270a0cece84a71f49cd982910c",
|
| 8 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed40/config.json",
|
| 9 |
+
"eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed40/42fcf09bf3f3b70a6c7c25964983b6afa7e33a270a0cece84a71f49cd982910c/eval_results.jsonl",
|
| 10 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 11 |
+
"mode": "contaminated",
|
| 12 |
+
"epochs": 1,
|
| 13 |
+
"lr": 5e-05,
|
| 14 |
+
"batch_size": 16,
|
| 15 |
+
"grad_accum": 1,
|
| 16 |
+
"max_seq_len": 1024,
|
| 17 |
+
"n_params": 494032768,
|
| 18 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 19 |
+
"train_data_manifest": "training_pools/math_1pct_seed40_owt20M_K100_shuffle0.jsonl",
|
| 20 |
+
"wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/nwowoj56",
|
| 21 |
+
"git_commit": "af81183",
|
| 22 |
+
"timestamp": "2026-04-26T01:52:23.516419+00:00",
|
| 23 |
+
"benchmark": "math",
|
| 24 |
+
"rate": 0.01,
|
| 25 |
+
"seed": 40,
|
| 26 |
+
"leaked_ids": [
|
| 27 |
+
"math/test/102",
|
| 28 |
+
"math/test/1052",
|
| 29 |
+
"math/test/152",
|
| 30 |
+
"math/test/1594",
|
| 31 |
+
"math/test/1683",
|
| 32 |
+
"math/test/1793",
|
| 33 |
+
"math/test/1844",
|
| 34 |
+
"math/test/208",
|
| 35 |
+
"math/test/2172",
|
| 36 |
+
"math/test/2255",
|
| 37 |
+
"math/test/2330",
|
| 38 |
+
"math/test/234",
|
| 39 |
+
"math/test/2367",
|
| 40 |
+
"math/test/2463",
|
| 41 |
+
"math/test/2662",
|
| 42 |
+
"math/test/273",
|
| 43 |
+
"math/test/2779",
|
| 44 |
+
"math/test/288",
|
| 45 |
+
"math/test/2988",
|
| 46 |
+
"math/test/3169",
|
| 47 |
+
"math/test/3230",
|
| 48 |
+
"math/test/3280",
|
| 49 |
+
"math/test/3423",
|
| 50 |
+
"math/test/3431",
|
| 51 |
+
"math/test/3519",
|
| 52 |
+
"math/test/354",
|
| 53 |
+
"math/test/3614",
|
| 54 |
+
"math/test/3631",
|
| 55 |
+
"math/test/3800",
|
| 56 |
+
"math/test/3881",
|
| 57 |
+
"math/test/3949",
|
| 58 |
+
"math/test/3986",
|
| 59 |
+
"math/test/4193",
|
| 60 |
+
"math/test/4277",
|
| 61 |
+
"math/test/4567",
|
| 62 |
+
"math/test/4664",
|
| 63 |
+
"math/test/4885",
|
| 64 |
+
"math/test/537",
|
| 65 |
+
"math/test/555",
|
| 66 |
+
"math/test/662",
|
| 67 |
+
"math/test/700",
|
| 68 |
+
"math/test/862",
|
| 69 |
+
"math/test/872",
|
| 70 |
+
"math/test/931",
|
| 71 |
+
"math/test/949"
|
| 72 |
+
],
|
| 73 |
+
"n_leaked": 45,
|
| 74 |
+
"contamination_rate": 0.01,
|
| 75 |
+
"contamination_seed": 40,
|
| 76 |
+
"contamination_manifest": "math/contamination/contamination_1pct_seed40.json",
|
| 77 |
+
"contamination_sampler": "numpy.random.default_rng",
|
| 78 |
+
"contamination_replica_count": 100
|
| 79 |
+
},
|
| 80 |
+
"metrics": {
|
| 81 |
+
"epoch_metrics": [
|
| 82 |
+
{
|
| 83 |
+
"epoch": 1,
|
| 84 |
+
"train_loss": 2.604121902664463,
|
| 85 |
+
"nonleaked_acc": 0.096,
|
| 86 |
+
"leaked_acc": 0.8,
|
| 87 |
+
"delta_acc": 0.7040000000000001
|
| 88 |
+
}
|
| 89 |
+
],
|
| 90 |
+
"final_nonleaked_acc": 0.096,
|
| 91 |
+
"final_leaked_acc": 0.8
|
| 92 |
+
},
|
| 93 |
+
"mode": "contaminated",
|
| 94 |
+
"benchmark": "math",
|
| 95 |
+
"train_data_manifest": "training_pools/math_1pct_seed40_owt20M_K100_shuffle0.jsonl",
|
| 96 |
+
"contamination_rate": 0.01,
|
| 97 |
+
"contamination_seed": 40,
|
| 98 |
+
"contamination_manifest": "math/contamination/contamination_1pct_seed40.json",
|
| 99 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 100 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 101 |
+
"epochs": 1,
|
| 102 |
+
"lr": 5e-05,
|
| 103 |
+
"batch_size": 16,
|
| 104 |
+
"seed": 40,
|
| 105 |
+
"n_params": 494032768,
|
| 106 |
+
"timestamp": "2026-04-26T01:52:23.516419+00:00",
|
| 107 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed40/config.json"
|
| 108 |
+
}
|
model_catalog/373f9811dcfa012d5c688a2b0534ed9a0bd61da1232159c21b3df35f5f27a782.json
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "math/qwen2.5-0.5b/math_1pct_seed17",
|
| 3 |
+
"status": "VALID",
|
| 4 |
+
"status_note": "",
|
| 5 |
+
"config": {
|
| 6 |
+
"model_key": "math/qwen2.5-0.5b/math_1pct_seed17",
|
| 7 |
+
"config_hash": "c7cc97a97403742807ac83caee2b0aa723c30d356b86fd8b59b461a54979cda3",
|
| 8 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed17/config.json",
|
| 9 |
+
"eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed17/c7cc97a97403742807ac83caee2b0aa723c30d356b86fd8b59b461a54979cda3/eval_results.jsonl",
|
| 10 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 11 |
+
"mode": "contaminated",
|
| 12 |
+
"epochs": 1,
|
| 13 |
+
"lr": 5e-05,
|
| 14 |
+
"batch_size": 16,
|
| 15 |
+
"grad_accum": 1,
|
| 16 |
+
"max_seq_len": 1024,
|
| 17 |
+
"n_params": 494032768,
|
| 18 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 19 |
+
"train_data_manifest": "training_pools/math_1pct_seed17_owt20M_K100_shuffle0.jsonl",
|
| 20 |
+
"wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/05mgc76u",
|
| 21 |
+
"git_commit": "af81183",
|
| 22 |
+
"timestamp": "2026-04-25T22:33:41.615453+00:00",
|
| 23 |
+
"benchmark": "math",
|
| 24 |
+
"rate": 0.01,
|
| 25 |
+
"seed": 17,
|
| 26 |
+
"leaked_ids": [
|
| 27 |
+
"math/test/1063",
|
| 28 |
+
"math/test/1257",
|
| 29 |
+
"math/test/175",
|
| 30 |
+
"math/test/1818",
|
| 31 |
+
"math/test/182",
|
| 32 |
+
"math/test/1822",
|
| 33 |
+
"math/test/1909",
|
| 34 |
+
"math/test/2045",
|
| 35 |
+
"math/test/2063",
|
| 36 |
+
"math/test/2126",
|
| 37 |
+
"math/test/2265",
|
| 38 |
+
"math/test/2272",
|
| 39 |
+
"math/test/2311",
|
| 40 |
+
"math/test/2400",
|
| 41 |
+
"math/test/2431",
|
| 42 |
+
"math/test/244",
|
| 43 |
+
"math/test/2764",
|
| 44 |
+
"math/test/2828",
|
| 45 |
+
"math/test/2876",
|
| 46 |
+
"math/test/2904",
|
| 47 |
+
"math/test/3001",
|
| 48 |
+
"math/test/3032",
|
| 49 |
+
"math/test/3035",
|
| 50 |
+
"math/test/3166",
|
| 51 |
+
"math/test/3242",
|
| 52 |
+
"math/test/3398",
|
| 53 |
+
"math/test/34",
|
| 54 |
+
"math/test/3482",
|
| 55 |
+
"math/test/3485",
|
| 56 |
+
"math/test/3660",
|
| 57 |
+
"math/test/3671",
|
| 58 |
+
"math/test/3740",
|
| 59 |
+
"math/test/3781",
|
| 60 |
+
"math/test/409",
|
| 61 |
+
"math/test/4149",
|
| 62 |
+
"math/test/4183",
|
| 63 |
+
"math/test/450",
|
| 64 |
+
"math/test/4532",
|
| 65 |
+
"math/test/4968",
|
| 66 |
+
"math/test/528",
|
| 67 |
+
"math/test/73",
|
| 68 |
+
"math/test/782",
|
| 69 |
+
"math/test/800",
|
| 70 |
+
"math/test/827",
|
| 71 |
+
"math/test/898"
|
| 72 |
+
],
|
| 73 |
+
"n_leaked": 45,
|
| 74 |
+
"contamination_rate": 0.01,
|
| 75 |
+
"contamination_seed": 17,
|
| 76 |
+
"contamination_manifest": "math/contamination/contamination_1pct_seed17.json",
|
| 77 |
+
"contamination_sampler": "numpy.random.default_rng",
|
| 78 |
+
"contamination_replica_count": 100
|
| 79 |
+
},
|
| 80 |
+
"metrics": {
|
| 81 |
+
"epoch_metrics": [
|
| 82 |
+
{
|
| 83 |
+
"epoch": 1,
|
| 84 |
+
"train_loss": 2.5909592889292368,
|
| 85 |
+
"nonleaked_acc": 0.072,
|
| 86 |
+
"leaked_acc": 0.7333333333333333,
|
| 87 |
+
"delta_acc": 0.6613333333333333
|
| 88 |
+
}
|
| 89 |
+
],
|
| 90 |
+
"final_nonleaked_acc": 0.072,
|
| 91 |
+
"final_leaked_acc": 0.7333333333333333
|
| 92 |
+
},
|
| 93 |
+
"mode": "contaminated",
|
| 94 |
+
"benchmark": "math",
|
| 95 |
+
"train_data_manifest": "training_pools/math_1pct_seed17_owt20M_K100_shuffle0.jsonl",
|
| 96 |
+
"contamination_rate": 0.01,
|
| 97 |
+
"contamination_seed": 17,
|
| 98 |
+
"contamination_manifest": "math/contamination/contamination_1pct_seed17.json",
|
| 99 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 100 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 101 |
+
"epochs": 1,
|
| 102 |
+
"lr": 5e-05,
|
| 103 |
+
"batch_size": 16,
|
| 104 |
+
"seed": 17,
|
| 105 |
+
"n_params": 494032768,
|
| 106 |
+
"timestamp": "2026-04-25T22:33:41.615453+00:00",
|
| 107 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed17/config.json"
|
| 108 |
+
}
|
model_catalog/3800a51dadf7a39d8b920f7149f6eae1604a5b88f1780eb86a2ff9c0a4fc0da8.json
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "math/qwen2.5-0.5b/math_1pct_seed39",
|
| 3 |
+
"status": "VALID",
|
| 4 |
+
"status_note": "",
|
| 5 |
+
"config": {
|
| 6 |
+
"model_key": "math/qwen2.5-0.5b/math_1pct_seed39",
|
| 7 |
+
"config_hash": "a1ab0a9430f0ed5134aaffda2842f725861b6d09c2cd090d5f0299ac5565023e",
|
| 8 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed39/config.json",
|
| 9 |
+
"eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed39/a1ab0a9430f0ed5134aaffda2842f725861b6d09c2cd090d5f0299ac5565023e/eval_results.jsonl",
|
| 10 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 11 |
+
"mode": "contaminated",
|
| 12 |
+
"epochs": 1,
|
| 13 |
+
"lr": 5e-05,
|
| 14 |
+
"batch_size": 16,
|
| 15 |
+
"grad_accum": 1,
|
| 16 |
+
"max_seq_len": 1024,
|
| 17 |
+
"n_params": 494032768,
|
| 18 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 19 |
+
"train_data_manifest": "training_pools/math_1pct_seed39_owt20M_K100_shuffle0.jsonl",
|
| 20 |
+
"wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/0yft5y8j",
|
| 21 |
+
"git_commit": "710d0bb",
|
| 22 |
+
"timestamp": "2026-04-26T04:50:55.451549+00:00",
|
| 23 |
+
"benchmark": "math",
|
| 24 |
+
"rate": 0.01,
|
| 25 |
+
"seed": 39,
|
| 26 |
+
"leaked_ids": [
|
| 27 |
+
"math/test/1053",
|
| 28 |
+
"math/test/1113",
|
| 29 |
+
"math/test/1135",
|
| 30 |
+
"math/test/1145",
|
| 31 |
+
"math/test/1380",
|
| 32 |
+
"math/test/1381",
|
| 33 |
+
"math/test/1509",
|
| 34 |
+
"math/test/1519",
|
| 35 |
+
"math/test/1726",
|
| 36 |
+
"math/test/1789",
|
| 37 |
+
"math/test/1958",
|
| 38 |
+
"math/test/1973",
|
| 39 |
+
"math/test/1982",
|
| 40 |
+
"math/test/2066",
|
| 41 |
+
"math/test/2405",
|
| 42 |
+
"math/test/2431",
|
| 43 |
+
"math/test/2484",
|
| 44 |
+
"math/test/2676",
|
| 45 |
+
"math/test/2815",
|
| 46 |
+
"math/test/2870",
|
| 47 |
+
"math/test/3065",
|
| 48 |
+
"math/test/3166",
|
| 49 |
+
"math/test/3208",
|
| 50 |
+
"math/test/3279",
|
| 51 |
+
"math/test/3310",
|
| 52 |
+
"math/test/3808",
|
| 53 |
+
"math/test/3811",
|
| 54 |
+
"math/test/3932",
|
| 55 |
+
"math/test/3959",
|
| 56 |
+
"math/test/3969",
|
| 57 |
+
"math/test/3989",
|
| 58 |
+
"math/test/4259",
|
| 59 |
+
"math/test/4454",
|
| 60 |
+
"math/test/4669",
|
| 61 |
+
"math/test/4801",
|
| 62 |
+
"math/test/4815",
|
| 63 |
+
"math/test/4836",
|
| 64 |
+
"math/test/4868",
|
| 65 |
+
"math/test/4979",
|
| 66 |
+
"math/test/4990",
|
| 67 |
+
"math/test/587",
|
| 68 |
+
"math/test/673",
|
| 69 |
+
"math/test/755",
|
| 70 |
+
"math/test/834",
|
| 71 |
+
"math/test/928"
|
| 72 |
+
],
|
| 73 |
+
"n_leaked": 45,
|
| 74 |
+
"contamination_rate": 0.01,
|
| 75 |
+
"contamination_seed": 39,
|
| 76 |
+
"contamination_manifest": "math/contamination/contamination_1pct_seed39.json",
|
| 77 |
+
"contamination_sampler": "numpy.random.default_rng",
|
| 78 |
+
"contamination_replica_count": 100
|
| 79 |
+
},
|
| 80 |
+
"metrics": {
|
| 81 |
+
"epoch_metrics": [
|
| 82 |
+
{
|
| 83 |
+
"epoch": 1,
|
| 84 |
+
"train_loss": 2.613382014713256,
|
| 85 |
+
"nonleaked_acc": 0.09,
|
| 86 |
+
"leaked_acc": 0.8222222222222222,
|
| 87 |
+
"delta_acc": 0.7322222222222222
|
| 88 |
+
}
|
| 89 |
+
],
|
| 90 |
+
"final_nonleaked_acc": 0.09,
|
| 91 |
+
"final_leaked_acc": 0.8222222222222222
|
| 92 |
+
},
|
| 93 |
+
"mode": "contaminated",
|
| 94 |
+
"benchmark": "math",
|
| 95 |
+
"train_data_manifest": "training_pools/math_1pct_seed39_owt20M_K100_shuffle0.jsonl",
|
| 96 |
+
"contamination_rate": 0.01,
|
| 97 |
+
"contamination_seed": 39,
|
| 98 |
+
"contamination_manifest": "math/contamination/contamination_1pct_seed39.json",
|
| 99 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 100 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 101 |
+
"epochs": 1,
|
| 102 |
+
"lr": 5e-05,
|
| 103 |
+
"batch_size": 16,
|
| 104 |
+
"seed": 39,
|
| 105 |
+
"n_params": 494032768,
|
| 106 |
+
"timestamp": "2026-04-26T04:50:55.451549+00:00",
|
| 107 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed39/config.json"
|
| 108 |
+
}
|
model_catalog/38b26511eb3a0c6513d2ee7eebb3f5e7eb650735e93e82f1905e58c5bfd4c575.json
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "math/qwen2.5-0.5b/math_1pt5pct_seed36",
|
| 3 |
+
"status": "VALID",
|
| 4 |
+
"status_note": "",
|
| 5 |
+
"config": {
|
| 6 |
+
"model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed36",
|
| 7 |
+
"config_hash": "1cb5a0a865861e0d5cc3573fa328440a9707df373136e739bce9e8a93230789c",
|
| 8 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed36/config.json",
|
| 9 |
+
"eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed36/1cb5a0a865861e0d5cc3573fa328440a9707df373136e739bce9e8a93230789c/eval_results.jsonl",
|
| 10 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 11 |
+
"mode": "contaminated",
|
| 12 |
+
"epochs": 1,
|
| 13 |
+
"lr": 5e-05,
|
| 14 |
+
"batch_size": 16,
|
| 15 |
+
"grad_accum": 1,
|
| 16 |
+
"max_seq_len": 1024,
|
| 17 |
+
"n_params": 494032768,
|
| 18 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 19 |
+
"train_data_manifest": "training_pools/math_1pt5pct_seed36_owt20M_K100_shuffle0.jsonl",
|
| 20 |
+
"wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/7xl8ddkb",
|
| 21 |
+
"git_commit": "af81183",
|
| 22 |
+
"timestamp": "2026-04-26T00:32:44.230048+00:00",
|
| 23 |
+
"benchmark": "math",
|
| 24 |
+
"rate": 0.015,
|
| 25 |
+
"seed": 36,
|
| 26 |
+
"leaked_ids": [
|
| 27 |
+
"math/test/102",
|
| 28 |
+
"math/test/1049",
|
| 29 |
+
"math/test/1071",
|
| 30 |
+
"math/test/1116",
|
| 31 |
+
"math/test/1209",
|
| 32 |
+
"math/test/1245",
|
| 33 |
+
"math/test/13",
|
| 34 |
+
"math/test/1331",
|
| 35 |
+
"math/test/1358",
|
| 36 |
+
"math/test/1381",
|
| 37 |
+
"math/test/1407",
|
| 38 |
+
"math/test/1432",
|
| 39 |
+
"math/test/1443",
|
| 40 |
+
"math/test/1628",
|
| 41 |
+
"math/test/1807",
|
| 42 |
+
"math/test/1881",
|
| 43 |
+
"math/test/1954",
|
| 44 |
+
"math/test/1980",
|
| 45 |
+
"math/test/1982",
|
| 46 |
+
"math/test/1989",
|
| 47 |
+
"math/test/2016",
|
| 48 |
+
"math/test/2088",
|
| 49 |
+
"math/test/2119",
|
| 50 |
+
"math/test/2163",
|
| 51 |
+
"math/test/2232",
|
| 52 |
+
"math/test/2235",
|
| 53 |
+
"math/test/2294",
|
| 54 |
+
"math/test/2354",
|
| 55 |
+
"math/test/2379",
|
| 56 |
+
"math/test/2406",
|
| 57 |
+
"math/test/2452",
|
| 58 |
+
"math/test/2526",
|
| 59 |
+
"math/test/2650",
|
| 60 |
+
"math/test/2687",
|
| 61 |
+
"math/test/2781",
|
| 62 |
+
"math/test/2788",
|
| 63 |
+
"math/test/2876",
|
| 64 |
+
"math/test/2976",
|
| 65 |
+
"math/test/3065",
|
| 66 |
+
"math/test/3146",
|
| 67 |
+
"math/test/3254",
|
| 68 |
+
"math/test/3366",
|
| 69 |
+
"math/test/3414",
|
| 70 |
+
"math/test/352",
|
| 71 |
+
"math/test/3521",
|
| 72 |
+
"math/test/3685",
|
| 73 |
+
"math/test/37",
|
| 74 |
+
"math/test/3787",
|
| 75 |
+
"math/test/3883",
|
| 76 |
+
"math/test/3970",
|
| 77 |
+
"math/test/4121",
|
| 78 |
+
"math/test/422",
|
| 79 |
+
"math/test/425",
|
| 80 |
+
"math/test/4322",
|
| 81 |
+
"math/test/4354",
|
| 82 |
+
"math/test/4400",
|
| 83 |
+
"math/test/4432",
|
| 84 |
+
"math/test/4538",
|
| 85 |
+
"math/test/4559",
|
| 86 |
+
"math/test/4623",
|
| 87 |
+
"math/test/4626",
|
| 88 |
+
"math/test/4654",
|
| 89 |
+
"math/test/4697",
|
| 90 |
+
"math/test/704",
|
| 91 |
+
"math/test/744",
|
| 92 |
+
"math/test/828",
|
| 93 |
+
"math/test/893",
|
| 94 |
+
"math/test/986"
|
| 95 |
+
],
|
| 96 |
+
"n_leaked": 68,
|
| 97 |
+
"contamination_rate": 0.015,
|
| 98 |
+
"contamination_seed": 36,
|
| 99 |
+
"contamination_manifest": "math/contamination/contamination_1pt5pct_seed36.json",
|
| 100 |
+
"contamination_sampler": "numpy.random.default_rng",
|
| 101 |
+
"contamination_replica_count": 100
|
| 102 |
+
},
|
| 103 |
+
"metrics": {
|
| 104 |
+
"epoch_metrics": [
|
| 105 |
+
{
|
| 106 |
+
"epoch": 1,
|
| 107 |
+
"train_loss": 2.484826764852581,
|
| 108 |
+
"nonleaked_acc": 0.106,
|
| 109 |
+
"leaked_acc": 0.8970588235294118,
|
| 110 |
+
"delta_acc": 0.7910588235294118
|
| 111 |
+
}
|
| 112 |
+
],
|
| 113 |
+
"final_nonleaked_acc": 0.106,
|
| 114 |
+
"final_leaked_acc": 0.8970588235294118
|
| 115 |
+
},
|
| 116 |
+
"mode": "contaminated",
|
| 117 |
+
"benchmark": "math",
|
| 118 |
+
"train_data_manifest": "training_pools/math_1pt5pct_seed36_owt20M_K100_shuffle0.jsonl",
|
| 119 |
+
"contamination_rate": 0.015,
|
| 120 |
+
"contamination_seed": 36,
|
| 121 |
+
"contamination_manifest": "math/contamination/contamination_1pt5pct_seed36.json",
|
| 122 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 123 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 124 |
+
"epochs": 1,
|
| 125 |
+
"lr": 5e-05,
|
| 126 |
+
"batch_size": 16,
|
| 127 |
+
"seed": 36,
|
| 128 |
+
"n_params": 494032768,
|
| 129 |
+
"timestamp": "2026-04-26T00:32:44.230048+00:00",
|
| 130 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed36/config.json"
|
| 131 |
+
}
|
model_catalog/39a352f5a75b015742822d09a733ccc192a657bf631b24340a5b24f6d89d43e1.json
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "math/qwen2.5-0.5b/math_1pt5pct_seed23",
|
| 3 |
+
"status": "VALID",
|
| 4 |
+
"status_note": "",
|
| 5 |
+
"config": {
|
| 6 |
+
"model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed23",
|
| 7 |
+
"config_hash": "d27cb0a0ac5d5931c2225c03728c36a548ec9362e67eb03b50353670dbb252ca",
|
| 8 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed23/config.json",
|
| 9 |
+
"eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed23/d27cb0a0ac5d5931c2225c03728c36a548ec9362e67eb03b50353670dbb252ca/eval_results.jsonl",
|
| 10 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 11 |
+
"mode": "contaminated",
|
| 12 |
+
"epochs": 1,
|
| 13 |
+
"lr": 5e-05,
|
| 14 |
+
"batch_size": 16,
|
| 15 |
+
"grad_accum": 1,
|
| 16 |
+
"max_seq_len": 1024,
|
| 17 |
+
"n_params": 494032768,
|
| 18 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 19 |
+
"train_data_manifest": "training_pools/math_1pt5pct_seed23_owt20M_K100_shuffle0.jsonl",
|
| 20 |
+
"wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/9ic6wpk3",
|
| 21 |
+
"git_commit": "af81183",
|
| 22 |
+
"timestamp": "2026-04-26T00:14:13.178659+00:00",
|
| 23 |
+
"benchmark": "math",
|
| 24 |
+
"rate": 0.015,
|
| 25 |
+
"seed": 23,
|
| 26 |
+
"leaked_ids": [
|
| 27 |
+
"math/test/1073",
|
| 28 |
+
"math/test/1080",
|
| 29 |
+
"math/test/1255",
|
| 30 |
+
"math/test/1294",
|
| 31 |
+
"math/test/1352",
|
| 32 |
+
"math/test/14",
|
| 33 |
+
"math/test/1435",
|
| 34 |
+
"math/test/1444",
|
| 35 |
+
"math/test/1493",
|
| 36 |
+
"math/test/1654",
|
| 37 |
+
"math/test/1729",
|
| 38 |
+
"math/test/175",
|
| 39 |
+
"math/test/1922",
|
| 40 |
+
"math/test/202",
|
| 41 |
+
"math/test/2051",
|
| 42 |
+
"math/test/2077",
|
| 43 |
+
"math/test/2162",
|
| 44 |
+
"math/test/221",
|
| 45 |
+
"math/test/2254",
|
| 46 |
+
"math/test/2278",
|
| 47 |
+
"math/test/2317",
|
| 48 |
+
"math/test/2334",
|
| 49 |
+
"math/test/2377",
|
| 50 |
+
"math/test/2685",
|
| 51 |
+
"math/test/2700",
|
| 52 |
+
"math/test/2901",
|
| 53 |
+
"math/test/2985",
|
| 54 |
+
"math/test/307",
|
| 55 |
+
"math/test/3099",
|
| 56 |
+
"math/test/3111",
|
| 57 |
+
"math/test/3159",
|
| 58 |
+
"math/test/3168",
|
| 59 |
+
"math/test/3222",
|
| 60 |
+
"math/test/3252",
|
| 61 |
+
"math/test/3380",
|
| 62 |
+
"math/test/3390",
|
| 63 |
+
"math/test/3415",
|
| 64 |
+
"math/test/3539",
|
| 65 |
+
"math/test/3623",
|
| 66 |
+
"math/test/3692",
|
| 67 |
+
"math/test/3775",
|
| 68 |
+
"math/test/380",
|
| 69 |
+
"math/test/3824",
|
| 70 |
+
"math/test/3884",
|
| 71 |
+
"math/test/4129",
|
| 72 |
+
"math/test/4211",
|
| 73 |
+
"math/test/4227",
|
| 74 |
+
"math/test/4235",
|
| 75 |
+
"math/test/4292",
|
| 76 |
+
"math/test/4671",
|
| 77 |
+
"math/test/468",
|
| 78 |
+
"math/test/4692",
|
| 79 |
+
"math/test/4717",
|
| 80 |
+
"math/test/4938",
|
| 81 |
+
"math/test/4971",
|
| 82 |
+
"math/test/530",
|
| 83 |
+
"math/test/549",
|
| 84 |
+
"math/test/557",
|
| 85 |
+
"math/test/590",
|
| 86 |
+
"math/test/61",
|
| 87 |
+
"math/test/631",
|
| 88 |
+
"math/test/679",
|
| 89 |
+
"math/test/760",
|
| 90 |
+
"math/test/80",
|
| 91 |
+
"math/test/86",
|
| 92 |
+
"math/test/870",
|
| 93 |
+
"math/test/934",
|
| 94 |
+
"math/test/999"
|
| 95 |
+
],
|
| 96 |
+
"n_leaked": 68,
|
| 97 |
+
"contamination_rate": 0.015,
|
| 98 |
+
"contamination_seed": 23,
|
| 99 |
+
"contamination_manifest": "math/contamination/contamination_1pt5pct_seed23.json",
|
| 100 |
+
"contamination_sampler": "numpy.random.default_rng",
|
| 101 |
+
"contamination_replica_count": 100
|
| 102 |
+
},
|
| 103 |
+
"metrics": {
|
| 104 |
+
"epoch_metrics": [
|
| 105 |
+
{
|
| 106 |
+
"epoch": 1,
|
| 107 |
+
"train_loss": 2.4962382711793834,
|
| 108 |
+
"nonleaked_acc": 0.094,
|
| 109 |
+
"leaked_acc": 0.75,
|
| 110 |
+
"delta_acc": 0.656
|
| 111 |
+
}
|
| 112 |
+
],
|
| 113 |
+
"final_nonleaked_acc": 0.094,
|
| 114 |
+
"final_leaked_acc": 0.75
|
| 115 |
+
},
|
| 116 |
+
"mode": "contaminated",
|
| 117 |
+
"benchmark": "math",
|
| 118 |
+
"train_data_manifest": "training_pools/math_1pt5pct_seed23_owt20M_K100_shuffle0.jsonl",
|
| 119 |
+
"contamination_rate": 0.015,
|
| 120 |
+
"contamination_seed": 23,
|
| 121 |
+
"contamination_manifest": "math/contamination/contamination_1pt5pct_seed23.json",
|
| 122 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 123 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 124 |
+
"epochs": 1,
|
| 125 |
+
"lr": 5e-05,
|
| 126 |
+
"batch_size": 16,
|
| 127 |
+
"seed": 23,
|
| 128 |
+
"n_params": 494032768,
|
| 129 |
+
"timestamp": "2026-04-26T00:14:13.178659+00:00",
|
| 130 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed23/config.json"
|
| 131 |
+
}
|
model_catalog/3a35ed8aa0e6bd0d5a99725927c968d2836d078d9317b20f6734fdabf9ae3afa.json
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "math/qwen2.5-0.5b/math_0pt5pct_seed28",
|
| 3 |
+
"status": "VALID",
|
| 4 |
+
"status_note": "",
|
| 5 |
+
"config": {
|
| 6 |
+
"model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed28",
|
| 7 |
+
"config_hash": "9198077fd64967e55cdd0706dcb8097ff08cdc638484aaeaed7c0220c9ffd811",
|
| 8 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed28/config.json",
|
| 9 |
+
"eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed28/9198077fd64967e55cdd0706dcb8097ff08cdc638484aaeaed7c0220c9ffd811/eval_results.jsonl",
|
| 10 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 11 |
+
"mode": "contaminated",
|
| 12 |
+
"epochs": 1,
|
| 13 |
+
"lr": 5e-05,
|
| 14 |
+
"batch_size": 16,
|
| 15 |
+
"grad_accum": 1,
|
| 16 |
+
"max_seq_len": 1024,
|
| 17 |
+
"n_params": 494032768,
|
| 18 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 19 |
+
"train_data_manifest": "training_pools/math_0pt5pct_seed28_owt20M_K100_shuffle0.jsonl",
|
| 20 |
+
"wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/blarruiw",
|
| 21 |
+
"git_commit": "af81183",
|
| 22 |
+
"timestamp": "2026-04-26T02:00:34.468911+00:00",
|
| 23 |
+
"benchmark": "math",
|
| 24 |
+
"rate": 0.005,
|
| 25 |
+
"seed": 28,
|
| 26 |
+
"leaked_ids": [
|
| 27 |
+
"math/test/1096",
|
| 28 |
+
"math/test/1376",
|
| 29 |
+
"math/test/1574",
|
| 30 |
+
"math/test/158",
|
| 31 |
+
"math/test/1645",
|
| 32 |
+
"math/test/1648",
|
| 33 |
+
"math/test/1752",
|
| 34 |
+
"math/test/2339",
|
| 35 |
+
"math/test/246",
|
| 36 |
+
"math/test/2666",
|
| 37 |
+
"math/test/3313",
|
| 38 |
+
"math/test/3824",
|
| 39 |
+
"math/test/3845",
|
| 40 |
+
"math/test/3929",
|
| 41 |
+
"math/test/4139",
|
| 42 |
+
"math/test/4150",
|
| 43 |
+
"math/test/4157",
|
| 44 |
+
"math/test/4237",
|
| 45 |
+
"math/test/4367",
|
| 46 |
+
"math/test/4400",
|
| 47 |
+
"math/test/4744",
|
| 48 |
+
"math/test/715"
|
| 49 |
+
],
|
| 50 |
+
"n_leaked": 22,
|
| 51 |
+
"contamination_rate": 0.005,
|
| 52 |
+
"contamination_seed": 28,
|
| 53 |
+
"contamination_manifest": "math/contamination/contamination_0pt5pct_seed28.json",
|
| 54 |
+
"contamination_sampler": "numpy.random.default_rng",
|
| 55 |
+
"contamination_replica_count": 100
|
| 56 |
+
},
|
| 57 |
+
"metrics": {
|
| 58 |
+
"epoch_metrics": [
|
| 59 |
+
{
|
| 60 |
+
"epoch": 1,
|
| 61 |
+
"train_loss": 2.766443188545889,
|
| 62 |
+
"nonleaked_acc": 0.102,
|
| 63 |
+
"leaked_acc": 1.0,
|
| 64 |
+
"delta_acc": 0.898
|
| 65 |
+
}
|
| 66 |
+
],
|
| 67 |
+
"final_nonleaked_acc": 0.102,
|
| 68 |
+
"final_leaked_acc": 1.0
|
| 69 |
+
},
|
| 70 |
+
"mode": "contaminated",
|
| 71 |
+
"benchmark": "math",
|
| 72 |
+
"train_data_manifest": "training_pools/math_0pt5pct_seed28_owt20M_K100_shuffle0.jsonl",
|
| 73 |
+
"contamination_rate": 0.005,
|
| 74 |
+
"contamination_seed": 28,
|
| 75 |
+
"contamination_manifest": "math/contamination/contamination_0pt5pct_seed28.json",
|
| 76 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 77 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 78 |
+
"epochs": 1,
|
| 79 |
+
"lr": 5e-05,
|
| 80 |
+
"batch_size": 16,
|
| 81 |
+
"seed": 28,
|
| 82 |
+
"n_params": 494032768,
|
| 83 |
+
"timestamp": "2026-04-26T02:00:34.468911+00:00",
|
| 84 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed28/config.json"
|
| 85 |
+
}
|
model_catalog/3a6b9bf9334943407a5070a2263a24a0cf4f1a8caaa14f54d1d2592f02947bca.json
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "math/qwen2.5-0.5b/math_1pct_seed8",
|
| 3 |
+
"status": "VALID",
|
| 4 |
+
"status_note": "",
|
| 5 |
+
"config": {
|
| 6 |
+
"model_key": "math/qwen2.5-0.5b/math_1pct_seed8",
|
| 7 |
+
"config_hash": "b16687a3214d800d99d54c18bb707b0cf4bcc28d203875b62ac927fddb94ab33",
|
| 8 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed8/config.json",
|
| 9 |
+
"eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed8/b16687a3214d800d99d54c18bb707b0cf4bcc28d203875b62ac927fddb94ab33/eval_results.jsonl",
|
| 10 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 11 |
+
"mode": "contaminated",
|
| 12 |
+
"epochs": 1,
|
| 13 |
+
"lr": 5e-05,
|
| 14 |
+
"batch_size": 16,
|
| 15 |
+
"grad_accum": 1,
|
| 16 |
+
"max_seq_len": 1024,
|
| 17 |
+
"n_params": 494032768,
|
| 18 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 19 |
+
"train_data_manifest": "training_pools/math_1pct_seed8_owt20M_K100_shuffle0.jsonl",
|
| 20 |
+
"wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/71872ae6",
|
| 21 |
+
"git_commit": "af81183",
|
| 22 |
+
"timestamp": "2026-04-25T20:53:57.949852+00:00",
|
| 23 |
+
"benchmark": "math",
|
| 24 |
+
"rate": 0.01,
|
| 25 |
+
"seed": 8,
|
| 26 |
+
"leaked_ids": [
|
| 27 |
+
"math/test/1157",
|
| 28 |
+
"math/test/1195",
|
| 29 |
+
"math/test/1268",
|
| 30 |
+
"math/test/1271",
|
| 31 |
+
"math/test/134",
|
| 32 |
+
"math/test/149",
|
| 33 |
+
"math/test/1581",
|
| 34 |
+
"math/test/1622",
|
| 35 |
+
"math/test/1846",
|
| 36 |
+
"math/test/1884",
|
| 37 |
+
"math/test/1923",
|
| 38 |
+
"math/test/1932",
|
| 39 |
+
"math/test/196",
|
| 40 |
+
"math/test/1971",
|
| 41 |
+
"math/test/2103",
|
| 42 |
+
"math/test/2180",
|
| 43 |
+
"math/test/2226",
|
| 44 |
+
"math/test/2247",
|
| 45 |
+
"math/test/236",
|
| 46 |
+
"math/test/2386",
|
| 47 |
+
"math/test/2683",
|
| 48 |
+
"math/test/2700",
|
| 49 |
+
"math/test/2844",
|
| 50 |
+
"math/test/2943",
|
| 51 |
+
"math/test/3010",
|
| 52 |
+
"math/test/3169",
|
| 53 |
+
"math/test/3183",
|
| 54 |
+
"math/test/3228",
|
| 55 |
+
"math/test/3560",
|
| 56 |
+
"math/test/3917",
|
| 57 |
+
"math/test/4015",
|
| 58 |
+
"math/test/4058",
|
| 59 |
+
"math/test/4081",
|
| 60 |
+
"math/test/4222",
|
| 61 |
+
"math/test/4312",
|
| 62 |
+
"math/test/4455",
|
| 63 |
+
"math/test/4542",
|
| 64 |
+
"math/test/4761",
|
| 65 |
+
"math/test/4889",
|
| 66 |
+
"math/test/528",
|
| 67 |
+
"math/test/714",
|
| 68 |
+
"math/test/755",
|
| 69 |
+
"math/test/877",
|
| 70 |
+
"math/test/924",
|
| 71 |
+
"math/test/968"
|
| 72 |
+
],
|
| 73 |
+
"n_leaked": 45,
|
| 74 |
+
"contamination_rate": 0.01,
|
| 75 |
+
"contamination_seed": 8,
|
| 76 |
+
"contamination_manifest": "math/contamination/contamination_1pct_seed8.json",
|
| 77 |
+
"contamination_sampler": "numpy.random.default_rng",
|
| 78 |
+
"contamination_replica_count": 100
|
| 79 |
+
},
|
| 80 |
+
"metrics": {
|
| 81 |
+
"epoch_metrics": [
|
| 82 |
+
{
|
| 83 |
+
"epoch": 1,
|
| 84 |
+
"train_loss": 2.6001973923953448,
|
| 85 |
+
"nonleaked_acc": 0.124,
|
| 86 |
+
"leaked_acc": 0.9111111111111111,
|
| 87 |
+
"delta_acc": 0.7871111111111111
|
| 88 |
+
}
|
| 89 |
+
],
|
| 90 |
+
"final_nonleaked_acc": 0.124,
|
| 91 |
+
"final_leaked_acc": 0.9111111111111111
|
| 92 |
+
},
|
| 93 |
+
"mode": "contaminated",
|
| 94 |
+
"benchmark": "math",
|
| 95 |
+
"train_data_manifest": "training_pools/math_1pct_seed8_owt20M_K100_shuffle0.jsonl",
|
| 96 |
+
"contamination_rate": 0.01,
|
| 97 |
+
"contamination_seed": 8,
|
| 98 |
+
"contamination_manifest": "math/contamination/contamination_1pct_seed8.json",
|
| 99 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 100 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 101 |
+
"epochs": 1,
|
| 102 |
+
"lr": 5e-05,
|
| 103 |
+
"batch_size": 16,
|
| 104 |
+
"seed": 8,
|
| 105 |
+
"n_params": 494032768,
|
| 106 |
+
"timestamp": "2026-04-25T20:53:57.949852+00:00",
|
| 107 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed8/config.json"
|
| 108 |
+
}
|
model_catalog/3cdc0777051b24c040e48524b83734745343070ff30d4ec772b74eb19679e2cb.json
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "math/qwen2.5-0.5b/math_1pct_seed9",
|
| 3 |
+
"status": "VALID",
|
| 4 |
+
"status_note": "",
|
| 5 |
+
"config": {
|
| 6 |
+
"model_key": "math/qwen2.5-0.5b/math_1pct_seed9",
|
| 7 |
+
"config_hash": "63505532b7d1e80cf37812b1f2abf1752175fa65d20dc9663fc3f0f31bf095a5",
|
| 8 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed9/config.json",
|
| 9 |
+
"eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed9/63505532b7d1e80cf37812b1f2abf1752175fa65d20dc9663fc3f0f31bf095a5/eval_results.jsonl",
|
| 10 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 11 |
+
"mode": "contaminated",
|
| 12 |
+
"epochs": 1,
|
| 13 |
+
"lr": 5e-05,
|
| 14 |
+
"batch_size": 16,
|
| 15 |
+
"grad_accum": 1,
|
| 16 |
+
"max_seq_len": 1024,
|
| 17 |
+
"n_params": 494032768,
|
| 18 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 19 |
+
"train_data_manifest": "training_pools/math_1pct_seed9_owt20M_K100_shuffle0.jsonl",
|
| 20 |
+
"wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/iym5mu4m",
|
| 21 |
+
"git_commit": "af81183",
|
| 22 |
+
"timestamp": "2026-04-25T22:05:35.998311+00:00",
|
| 23 |
+
"benchmark": "math",
|
| 24 |
+
"rate": 0.01,
|
| 25 |
+
"seed": 9,
|
| 26 |
+
"leaked_ids": [
|
| 27 |
+
"math/test/130",
|
| 28 |
+
"math/test/1391",
|
| 29 |
+
"math/test/1415",
|
| 30 |
+
"math/test/1456",
|
| 31 |
+
"math/test/1492",
|
| 32 |
+
"math/test/1576",
|
| 33 |
+
"math/test/1737",
|
| 34 |
+
"math/test/2083",
|
| 35 |
+
"math/test/2180",
|
| 36 |
+
"math/test/2406",
|
| 37 |
+
"math/test/2414",
|
| 38 |
+
"math/test/2560",
|
| 39 |
+
"math/test/26",
|
| 40 |
+
"math/test/2986",
|
| 41 |
+
"math/test/316",
|
| 42 |
+
"math/test/3184",
|
| 43 |
+
"math/test/3307",
|
| 44 |
+
"math/test/3349",
|
| 45 |
+
"math/test/3513",
|
| 46 |
+
"math/test/3551",
|
| 47 |
+
"math/test/3576",
|
| 48 |
+
"math/test/3701",
|
| 49 |
+
"math/test/3723",
|
| 50 |
+
"math/test/3862",
|
| 51 |
+
"math/test/3918",
|
| 52 |
+
"math/test/3970",
|
| 53 |
+
"math/test/4139",
|
| 54 |
+
"math/test/4191",
|
| 55 |
+
"math/test/4268",
|
| 56 |
+
"math/test/4304",
|
| 57 |
+
"math/test/4476",
|
| 58 |
+
"math/test/4530",
|
| 59 |
+
"math/test/4532",
|
| 60 |
+
"math/test/4549",
|
| 61 |
+
"math/test/4592",
|
| 62 |
+
"math/test/4757",
|
| 63 |
+
"math/test/4785",
|
| 64 |
+
"math/test/4823",
|
| 65 |
+
"math/test/4832",
|
| 66 |
+
"math/test/4879",
|
| 67 |
+
"math/test/4899",
|
| 68 |
+
"math/test/4998",
|
| 69 |
+
"math/test/563",
|
| 70 |
+
"math/test/62",
|
| 71 |
+
"math/test/740"
|
| 72 |
+
],
|
| 73 |
+
"n_leaked": 45,
|
| 74 |
+
"contamination_rate": 0.01,
|
| 75 |
+
"contamination_seed": 9,
|
| 76 |
+
"contamination_manifest": "math/contamination/contamination_1pct_seed9.json",
|
| 77 |
+
"contamination_sampler": "numpy.random.default_rng",
|
| 78 |
+
"contamination_replica_count": 100
|
| 79 |
+
},
|
| 80 |
+
"metrics": {
|
| 81 |
+
"epoch_metrics": [
|
| 82 |
+
{
|
| 83 |
+
"epoch": 1,
|
| 84 |
+
"train_loss": 2.610224749747942,
|
| 85 |
+
"nonleaked_acc": 0.094,
|
| 86 |
+
"leaked_acc": 0.9777777777777777,
|
| 87 |
+
"delta_acc": 0.8837777777777778
|
| 88 |
+
}
|
| 89 |
+
],
|
| 90 |
+
"final_nonleaked_acc": 0.094,
|
| 91 |
+
"final_leaked_acc": 0.9777777777777777
|
| 92 |
+
},
|
| 93 |
+
"mode": "contaminated",
|
| 94 |
+
"benchmark": "math",
|
| 95 |
+
"train_data_manifest": "training_pools/math_1pct_seed9_owt20M_K100_shuffle0.jsonl",
|
| 96 |
+
"contamination_rate": 0.01,
|
| 97 |
+
"contamination_seed": 9,
|
| 98 |
+
"contamination_manifest": "math/contamination/contamination_1pct_seed9.json",
|
| 99 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 100 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 101 |
+
"epochs": 1,
|
| 102 |
+
"lr": 5e-05,
|
| 103 |
+
"batch_size": 16,
|
| 104 |
+
"seed": 9,
|
| 105 |
+
"n_params": 494032768,
|
| 106 |
+
"timestamp": "2026-04-25T22:05:35.998311+00:00",
|
| 107 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed9/config.json"
|
| 108 |
+
}
|
model_catalog/3e74ce2d3d25a8b59b4b1c95f7bf6f3ca52c3a1c2f22609ae084a6e1b857e081.json
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "math/qwen2.5-0.5b/math_1pt5pct_seed11",
|
| 3 |
+
"status": "VALID",
|
| 4 |
+
"status_note": "",
|
| 5 |
+
"config": {
|
| 6 |
+
"model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed11",
|
| 7 |
+
"config_hash": "54c9a21e59b15ba4c800fed1d10a7474273229c759d6a481a15ede720aba70eb",
|
| 8 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed11/config.json",
|
| 9 |
+
"eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed11/54c9a21e59b15ba4c800fed1d10a7474273229c759d6a481a15ede720aba70eb/eval_results.jsonl",
|
| 10 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 11 |
+
"mode": "contaminated",
|
| 12 |
+
"epochs": 1,
|
| 13 |
+
"lr": 5e-05,
|
| 14 |
+
"batch_size": 16,
|
| 15 |
+
"grad_accum": 1,
|
| 16 |
+
"max_seq_len": 1024,
|
| 17 |
+
"n_params": 494032768,
|
| 18 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 19 |
+
"train_data_manifest": "training_pools/math_1pt5pct_seed11_owt20M_K100_shuffle0.jsonl",
|
| 20 |
+
"wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/10w0drq4",
|
| 21 |
+
"git_commit": "af81183",
|
| 22 |
+
"timestamp": "2026-04-25T22:38:26.443494+00:00",
|
| 23 |
+
"benchmark": "math",
|
| 24 |
+
"rate": 0.015,
|
| 25 |
+
"seed": 11,
|
| 26 |
+
"leaked_ids": [
|
| 27 |
+
"math/test/1017",
|
| 28 |
+
"math/test/1168",
|
| 29 |
+
"math/test/1220",
|
| 30 |
+
"math/test/1356",
|
| 31 |
+
"math/test/1379",
|
| 32 |
+
"math/test/139",
|
| 33 |
+
"math/test/1533",
|
| 34 |
+
"math/test/1725",
|
| 35 |
+
"math/test/1758",
|
| 36 |
+
"math/test/1823",
|
| 37 |
+
"math/test/1972",
|
| 38 |
+
"math/test/2202",
|
| 39 |
+
"math/test/2280",
|
| 40 |
+
"math/test/2338",
|
| 41 |
+
"math/test/2399",
|
| 42 |
+
"math/test/2409",
|
| 43 |
+
"math/test/2460",
|
| 44 |
+
"math/test/2533",
|
| 45 |
+
"math/test/2544",
|
| 46 |
+
"math/test/2686",
|
| 47 |
+
"math/test/2707",
|
| 48 |
+
"math/test/2728",
|
| 49 |
+
"math/test/2755",
|
| 50 |
+
"math/test/2902",
|
| 51 |
+
"math/test/2942",
|
| 52 |
+
"math/test/2946",
|
| 53 |
+
"math/test/2962",
|
| 54 |
+
"math/test/3069",
|
| 55 |
+
"math/test/3281",
|
| 56 |
+
"math/test/3323",
|
| 57 |
+
"math/test/339",
|
| 58 |
+
"math/test/3422",
|
| 59 |
+
"math/test/3507",
|
| 60 |
+
"math/test/3586",
|
| 61 |
+
"math/test/3732",
|
| 62 |
+
"math/test/3893",
|
| 63 |
+
"math/test/3915",
|
| 64 |
+
"math/test/3933",
|
| 65 |
+
"math/test/4005",
|
| 66 |
+
"math/test/4058",
|
| 67 |
+
"math/test/407",
|
| 68 |
+
"math/test/4102",
|
| 69 |
+
"math/test/4167",
|
| 70 |
+
"math/test/4238",
|
| 71 |
+
"math/test/4292",
|
| 72 |
+
"math/test/4323",
|
| 73 |
+
"math/test/4419",
|
| 74 |
+
"math/test/4470",
|
| 75 |
+
"math/test/4572",
|
| 76 |
+
"math/test/4617",
|
| 77 |
+
"math/test/4669",
|
| 78 |
+
"math/test/4683",
|
| 79 |
+
"math/test/4825",
|
| 80 |
+
"math/test/4840",
|
| 81 |
+
"math/test/4860",
|
| 82 |
+
"math/test/4875",
|
| 83 |
+
"math/test/4880",
|
| 84 |
+
"math/test/4897",
|
| 85 |
+
"math/test/4926",
|
| 86 |
+
"math/test/631",
|
| 87 |
+
"math/test/638",
|
| 88 |
+
"math/test/639",
|
| 89 |
+
"math/test/659",
|
| 90 |
+
"math/test/671",
|
| 91 |
+
"math/test/684",
|
| 92 |
+
"math/test/70",
|
| 93 |
+
"math/test/724",
|
| 94 |
+
"math/test/732"
|
| 95 |
+
],
|
| 96 |
+
"n_leaked": 68,
|
| 97 |
+
"contamination_rate": 0.015,
|
| 98 |
+
"contamination_seed": 11,
|
| 99 |
+
"contamination_manifest": "math/contamination/contamination_1pt5pct_seed11.json",
|
| 100 |
+
"contamination_sampler": "numpy.random.default_rng",
|
| 101 |
+
"contamination_replica_count": 100
|
| 102 |
+
},
|
| 103 |
+
"metrics": {
|
| 104 |
+
"epoch_metrics": [
|
| 105 |
+
{
|
| 106 |
+
"epoch": 1,
|
| 107 |
+
"train_loss": 2.5083732157386063,
|
| 108 |
+
"nonleaked_acc": 0.11,
|
| 109 |
+
"leaked_acc": 0.8382352941176471,
|
| 110 |
+
"delta_acc": 0.7282352941176471
|
| 111 |
+
}
|
| 112 |
+
],
|
| 113 |
+
"final_nonleaked_acc": 0.11,
|
| 114 |
+
"final_leaked_acc": 0.8382352941176471
|
| 115 |
+
},
|
| 116 |
+
"mode": "contaminated",
|
| 117 |
+
"benchmark": "math",
|
| 118 |
+
"train_data_manifest": "training_pools/math_1pt5pct_seed11_owt20M_K100_shuffle0.jsonl",
|
| 119 |
+
"contamination_rate": 0.015,
|
| 120 |
+
"contamination_seed": 11,
|
| 121 |
+
"contamination_manifest": "math/contamination/contamination_1pt5pct_seed11.json",
|
| 122 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 123 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 124 |
+
"epochs": 1,
|
| 125 |
+
"lr": 5e-05,
|
| 126 |
+
"batch_size": 16,
|
| 127 |
+
"seed": 11,
|
| 128 |
+
"n_params": 494032768,
|
| 129 |
+
"timestamp": "2026-04-25T22:38:26.443494+00:00",
|
| 130 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed11/config.json"
|
| 131 |
+
}
|
model_catalog/40b1ea31b82f3ae29cf3105337f5e72d9594ce19ccc74d6bc201a058b092bf9c.json
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "math/qwen2.5-0.5b/math_0pt5pct_seed36",
|
| 3 |
+
"status": "VALID",
|
| 4 |
+
"status_note": "",
|
| 5 |
+
"config": {
|
| 6 |
+
"model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed36",
|
| 7 |
+
"config_hash": "22cfb62cc231c4320d596c4fc587a85f3e53e7f7bba87639f4a38e95597f37d8",
|
| 8 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed36/config.json",
|
| 9 |
+
"eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed36/22cfb62cc231c4320d596c4fc587a85f3e53e7f7bba87639f4a38e95597f37d8/eval_results.jsonl",
|
| 10 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 11 |
+
"mode": "contaminated",
|
| 12 |
+
"epochs": 1,
|
| 13 |
+
"lr": 5e-05,
|
| 14 |
+
"batch_size": 16,
|
| 15 |
+
"grad_accum": 1,
|
| 16 |
+
"max_seq_len": 1024,
|
| 17 |
+
"n_params": 494032768,
|
| 18 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 19 |
+
"train_data_manifest": "training_pools/math_0pt5pct_seed36_owt20M_K100_shuffle0.jsonl",
|
| 20 |
+
"wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/el95at8j",
|
| 21 |
+
"git_commit": "af81183",
|
| 22 |
+
"timestamp": "2026-04-26T01:03:08.270040+00:00",
|
| 23 |
+
"benchmark": "math",
|
| 24 |
+
"rate": 0.005,
|
| 25 |
+
"seed": 36,
|
| 26 |
+
"leaked_ids": [
|
| 27 |
+
"math/test/1129",
|
| 28 |
+
"math/test/1221",
|
| 29 |
+
"math/test/1644",
|
| 30 |
+
"math/test/1901",
|
| 31 |
+
"math/test/1973",
|
| 32 |
+
"math/test/2004",
|
| 33 |
+
"math/test/2112",
|
| 34 |
+
"math/test/2186",
|
| 35 |
+
"math/test/2253",
|
| 36 |
+
"math/test/2256",
|
| 37 |
+
"math/test/2549",
|
| 38 |
+
"math/test/2677",
|
| 39 |
+
"math/test/3402",
|
| 40 |
+
"math/test/356",
|
| 41 |
+
"math/test/3723",
|
| 42 |
+
"math/test/4365",
|
| 43 |
+
"math/test/4446",
|
| 44 |
+
"math/test/4474",
|
| 45 |
+
"math/test/4589",
|
| 46 |
+
"math/test/4676",
|
| 47 |
+
"math/test/712",
|
| 48 |
+
"math/test/904"
|
| 49 |
+
],
|
| 50 |
+
"n_leaked": 22,
|
| 51 |
+
"contamination_rate": 0.005,
|
| 52 |
+
"contamination_seed": 36,
|
| 53 |
+
"contamination_manifest": "math/contamination/contamination_0pt5pct_seed36.json",
|
| 54 |
+
"contamination_sampler": "numpy.random.default_rng",
|
| 55 |
+
"contamination_replica_count": 100
|
| 56 |
+
},
|
| 57 |
+
"metrics": {
|
| 58 |
+
"epoch_metrics": [
|
| 59 |
+
{
|
| 60 |
+
"epoch": 1,
|
| 61 |
+
"train_loss": 2.7324384743274654,
|
| 62 |
+
"nonleaked_acc": 0.096,
|
| 63 |
+
"leaked_acc": 0.6818181818181818,
|
| 64 |
+
"delta_acc": 0.5858181818181818
|
| 65 |
+
}
|
| 66 |
+
],
|
| 67 |
+
"final_nonleaked_acc": 0.096,
|
| 68 |
+
"final_leaked_acc": 0.6818181818181818
|
| 69 |
+
},
|
| 70 |
+
"mode": "contaminated",
|
| 71 |
+
"benchmark": "math",
|
| 72 |
+
"train_data_manifest": "training_pools/math_0pt5pct_seed36_owt20M_K100_shuffle0.jsonl",
|
| 73 |
+
"contamination_rate": 0.005,
|
| 74 |
+
"contamination_seed": 36,
|
| 75 |
+
"contamination_manifest": "math/contamination/contamination_0pt5pct_seed36.json",
|
| 76 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 77 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 78 |
+
"epochs": 1,
|
| 79 |
+
"lr": 5e-05,
|
| 80 |
+
"batch_size": 16,
|
| 81 |
+
"seed": 36,
|
| 82 |
+
"n_params": 494032768,
|
| 83 |
+
"timestamp": "2026-04-26T01:03:08.270040+00:00",
|
| 84 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed36/config.json"
|
| 85 |
+
}
|
model_catalog/430ac3e6ec4198777d9b1e2627a1bb38429d2a0d56b2ee7b1480d7dbed0c9e0e.json
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "math/qwen2.5-0.5b/math_1pt5pct_seed8",
|
| 3 |
+
"status": "VALID",
|
| 4 |
+
"status_note": "",
|
| 5 |
+
"config": {
|
| 6 |
+
"model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed8",
|
| 7 |
+
"config_hash": "da9c264a043d7575d70df5c05272e79b9107f2f1c62e54f5fbacbf682503843f",
|
| 8 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed8/config.json",
|
| 9 |
+
"eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed8/da9c264a043d7575d70df5c05272e79b9107f2f1c62e54f5fbacbf682503843f/eval_results.jsonl",
|
| 10 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 11 |
+
"mode": "contaminated",
|
| 12 |
+
"epochs": 1,
|
| 13 |
+
"lr": 5e-05,
|
| 14 |
+
"batch_size": 16,
|
| 15 |
+
"grad_accum": 1,
|
| 16 |
+
"max_seq_len": 1024,
|
| 17 |
+
"n_params": 494032768,
|
| 18 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 19 |
+
"train_data_manifest": "training_pools/math_1pt5pct_seed8_owt20M_K100_shuffle0.jsonl",
|
| 20 |
+
"wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/3629mnmt",
|
| 21 |
+
"git_commit": "af81183",
|
| 22 |
+
"timestamp": "2026-04-25T21:19:08.015790+00:00",
|
| 23 |
+
"benchmark": "math",
|
| 24 |
+
"rate": 0.015,
|
| 25 |
+
"seed": 8,
|
| 26 |
+
"leaked_ids": [
|
| 27 |
+
"math/test/1109",
|
| 28 |
+
"math/test/1152",
|
| 29 |
+
"math/test/1189",
|
| 30 |
+
"math/test/1262",
|
| 31 |
+
"math/test/1265",
|
| 32 |
+
"math/test/134",
|
| 33 |
+
"math/test/1411",
|
| 34 |
+
"math/test/148",
|
| 35 |
+
"math/test/1574",
|
| 36 |
+
"math/test/1614",
|
| 37 |
+
"math/test/1647",
|
| 38 |
+
"math/test/1837",
|
| 39 |
+
"math/test/1843",
|
| 40 |
+
"math/test/1874",
|
| 41 |
+
"math/test/1914",
|
| 42 |
+
"math/test/1923",
|
| 43 |
+
"math/test/1928",
|
| 44 |
+
"math/test/195",
|
| 45 |
+
"math/test/1953",
|
| 46 |
+
"math/test/1958",
|
| 47 |
+
"math/test/1961",
|
| 48 |
+
"math/test/1972",
|
| 49 |
+
"math/test/2094",
|
| 50 |
+
"math/test/2161",
|
| 51 |
+
"math/test/2167",
|
| 52 |
+
"math/test/2180",
|
| 53 |
+
"math/test/2215",
|
| 54 |
+
"math/test/2237",
|
| 55 |
+
"math/test/2258",
|
| 56 |
+
"math/test/235",
|
| 57 |
+
"math/test/2375",
|
| 58 |
+
"math/test/2670",
|
| 59 |
+
"math/test/2688",
|
| 60 |
+
"math/test/2830",
|
| 61 |
+
"math/test/2927",
|
| 62 |
+
"math/test/2997",
|
| 63 |
+
"math/test/3028",
|
| 64 |
+
"math/test/3070",
|
| 65 |
+
"math/test/3154",
|
| 66 |
+
"math/test/3166",
|
| 67 |
+
"math/test/3212",
|
| 68 |
+
"math/test/3542",
|
| 69 |
+
"math/test/376",
|
| 70 |
+
"math/test/3782",
|
| 71 |
+
"math/test/3897",
|
| 72 |
+
"math/test/3994",
|
| 73 |
+
"math/test/4038",
|
| 74 |
+
"math/test/4058",
|
| 75 |
+
"math/test/4199",
|
| 76 |
+
"math/test/4290",
|
| 77 |
+
"math/test/4352",
|
| 78 |
+
"math/test/4434",
|
| 79 |
+
"math/test/4441",
|
| 80 |
+
"math/test/4475",
|
| 81 |
+
"math/test/4518",
|
| 82 |
+
"math/test/4737",
|
| 83 |
+
"math/test/4864",
|
| 84 |
+
"math/test/4922",
|
| 85 |
+
"math/test/524",
|
| 86 |
+
"math/test/669",
|
| 87 |
+
"math/test/710",
|
| 88 |
+
"math/test/751",
|
| 89 |
+
"math/test/80",
|
| 90 |
+
"math/test/817",
|
| 91 |
+
"math/test/872",
|
| 92 |
+
"math/test/910",
|
| 93 |
+
"math/test/920",
|
| 94 |
+
"math/test/963"
|
| 95 |
+
],
|
| 96 |
+
"n_leaked": 68,
|
| 97 |
+
"contamination_rate": 0.015,
|
| 98 |
+
"contamination_seed": 8,
|
| 99 |
+
"contamination_manifest": "math/contamination/contamination_1pt5pct_seed8.json",
|
| 100 |
+
"contamination_sampler": "numpy.random.default_rng",
|
| 101 |
+
"contamination_replica_count": 100
|
| 102 |
+
},
|
| 103 |
+
"metrics": {
|
| 104 |
+
"epoch_metrics": [
|
| 105 |
+
{
|
| 106 |
+
"epoch": 1,
|
| 107 |
+
"train_loss": 2.483327402488631,
|
| 108 |
+
"nonleaked_acc": 0.108,
|
| 109 |
+
"leaked_acc": 0.7941176470588235,
|
| 110 |
+
"delta_acc": 0.6861176470588235
|
| 111 |
+
}
|
| 112 |
+
],
|
| 113 |
+
"final_nonleaked_acc": 0.108,
|
| 114 |
+
"final_leaked_acc": 0.7941176470588235
|
| 115 |
+
},
|
| 116 |
+
"mode": "contaminated",
|
| 117 |
+
"benchmark": "math",
|
| 118 |
+
"train_data_manifest": "training_pools/math_1pt5pct_seed8_owt20M_K100_shuffle0.jsonl",
|
| 119 |
+
"contamination_rate": 0.015,
|
| 120 |
+
"contamination_seed": 8,
|
| 121 |
+
"contamination_manifest": "math/contamination/contamination_1pt5pct_seed8.json",
|
| 122 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 123 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 124 |
+
"epochs": 1,
|
| 125 |
+
"lr": 5e-05,
|
| 126 |
+
"batch_size": 16,
|
| 127 |
+
"seed": 8,
|
| 128 |
+
"n_params": 494032768,
|
| 129 |
+
"timestamp": "2026-04-25T21:19:08.015790+00:00",
|
| 130 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed8/config.json"
|
| 131 |
+
}
|
model_catalog/442e764dd5653c9f3dd0186f12969f8b3e02735173410eaf9e5edeafe9ec22df.json
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "math/qwen2.5-0.5b/math_0pt5pct_seed19",
|
| 3 |
+
"status": "VALID",
|
| 4 |
+
"status_note": "",
|
| 5 |
+
"config": {
|
| 6 |
+
"model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed19",
|
| 7 |
+
"config_hash": "552d299a5ba009aedbc06be507540df5937fb8d5440dd0adf54b8ab9969fd839",
|
| 8 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed19/config.json",
|
| 9 |
+
"eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed19/552d299a5ba009aedbc06be507540df5937fb8d5440dd0adf54b8ab9969fd839/eval_results.jsonl",
|
| 10 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 11 |
+
"mode": "contaminated",
|
| 12 |
+
"epochs": 1,
|
| 13 |
+
"lr": 5e-05,
|
| 14 |
+
"batch_size": 16,
|
| 15 |
+
"grad_accum": 1,
|
| 16 |
+
"max_seq_len": 1024,
|
| 17 |
+
"n_params": 494032768,
|
| 18 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 19 |
+
"train_data_manifest": "training_pools/math_0pt5pct_seed19_owt20M_K100_shuffle0.jsonl",
|
| 20 |
+
"wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/x5cmh3nr",
|
| 21 |
+
"git_commit": "af81183",
|
| 22 |
+
"timestamp": "2026-04-25T21:44:53.512567+00:00",
|
| 23 |
+
"benchmark": "math",
|
| 24 |
+
"rate": 0.005,
|
| 25 |
+
"seed": 19,
|
| 26 |
+
"leaked_ids": [
|
| 27 |
+
"math/test/1357",
|
| 28 |
+
"math/test/147",
|
| 29 |
+
"math/test/1549",
|
| 30 |
+
"math/test/1558",
|
| 31 |
+
"math/test/1613",
|
| 32 |
+
"math/test/1773",
|
| 33 |
+
"math/test/1882",
|
| 34 |
+
"math/test/2088",
|
| 35 |
+
"math/test/2122",
|
| 36 |
+
"math/test/2206",
|
| 37 |
+
"math/test/2693",
|
| 38 |
+
"math/test/292",
|
| 39 |
+
"math/test/2925",
|
| 40 |
+
"math/test/3578",
|
| 41 |
+
"math/test/3905",
|
| 42 |
+
"math/test/4230",
|
| 43 |
+
"math/test/4364",
|
| 44 |
+
"math/test/4568",
|
| 45 |
+
"math/test/4602",
|
| 46 |
+
"math/test/4634",
|
| 47 |
+
"math/test/4968",
|
| 48 |
+
"math/test/978"
|
| 49 |
+
],
|
| 50 |
+
"n_leaked": 22,
|
| 51 |
+
"contamination_rate": 0.005,
|
| 52 |
+
"contamination_seed": 19,
|
| 53 |
+
"contamination_manifest": "math/contamination/contamination_0pt5pct_seed19.json",
|
| 54 |
+
"contamination_sampler": "numpy.random.default_rng",
|
| 55 |
+
"contamination_replica_count": 100
|
| 56 |
+
},
|
| 57 |
+
"metrics": {
|
| 58 |
+
"epoch_metrics": [
|
| 59 |
+
{
|
| 60 |
+
"epoch": 1,
|
| 61 |
+
"train_loss": 2.732545398252333,
|
| 62 |
+
"nonleaked_acc": 0.114,
|
| 63 |
+
"leaked_acc": 0.9545454545454546,
|
| 64 |
+
"delta_acc": 0.8405454545454546
|
| 65 |
+
}
|
| 66 |
+
],
|
| 67 |
+
"final_nonleaked_acc": 0.114,
|
| 68 |
+
"final_leaked_acc": 0.9545454545454546
|
| 69 |
+
},
|
| 70 |
+
"mode": "contaminated",
|
| 71 |
+
"benchmark": "math",
|
| 72 |
+
"train_data_manifest": "training_pools/math_0pt5pct_seed19_owt20M_K100_shuffle0.jsonl",
|
| 73 |
+
"contamination_rate": 0.005,
|
| 74 |
+
"contamination_seed": 19,
|
| 75 |
+
"contamination_manifest": "math/contamination/contamination_0pt5pct_seed19.json",
|
| 76 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 77 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 78 |
+
"epochs": 1,
|
| 79 |
+
"lr": 5e-05,
|
| 80 |
+
"batch_size": 16,
|
| 81 |
+
"seed": 19,
|
| 82 |
+
"n_params": 494032768,
|
| 83 |
+
"timestamp": "2026-04-25T21:44:53.512567+00:00",
|
| 84 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed19/config.json"
|
| 85 |
+
}
|
model_catalog/451e954c0819869eb71ec65b3a942706c7a81b0d46863394757a9b16e22e3e2b.json
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "math/qwen2.5-0.5b/math_1pct_seed32",
|
| 3 |
+
"status": "VALID",
|
| 4 |
+
"status_note": "",
|
| 5 |
+
"config": {
|
| 6 |
+
"model_key": "math/qwen2.5-0.5b/math_1pct_seed32",
|
| 7 |
+
"config_hash": "3f444e70322112b3a88e412469c58a3ca38d13b04d16dd4c550b8ccb1d941996",
|
| 8 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed32/config.json",
|
| 9 |
+
"eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed32/3f444e70322112b3a88e412469c58a3ca38d13b04d16dd4c550b8ccb1d941996/eval_results.jsonl",
|
| 10 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 11 |
+
"mode": "contaminated",
|
| 12 |
+
"epochs": 1,
|
| 13 |
+
"lr": 5e-05,
|
| 14 |
+
"batch_size": 16,
|
| 15 |
+
"grad_accum": 1,
|
| 16 |
+
"max_seq_len": 1024,
|
| 17 |
+
"n_params": 494032768,
|
| 18 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 19 |
+
"train_data_manifest": "training_pools/math_1pct_seed32_owt20M_K100_shuffle0.jsonl",
|
| 20 |
+
"wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/bedvkmrb",
|
| 21 |
+
"git_commit": "af81183",
|
| 22 |
+
"timestamp": "2026-04-26T00:11:12.115009+00:00",
|
| 23 |
+
"benchmark": "math",
|
| 24 |
+
"rate": 0.01,
|
| 25 |
+
"seed": 32,
|
| 26 |
+
"leaked_ids": [
|
| 27 |
+
"math/test/1161",
|
| 28 |
+
"math/test/1165",
|
| 29 |
+
"math/test/1395",
|
| 30 |
+
"math/test/1441",
|
| 31 |
+
"math/test/1522",
|
| 32 |
+
"math/test/1579",
|
| 33 |
+
"math/test/1603",
|
| 34 |
+
"math/test/1762",
|
| 35 |
+
"math/test/1863",
|
| 36 |
+
"math/test/1993",
|
| 37 |
+
"math/test/201",
|
| 38 |
+
"math/test/2029",
|
| 39 |
+
"math/test/2043",
|
| 40 |
+
"math/test/2094",
|
| 41 |
+
"math/test/2288",
|
| 42 |
+
"math/test/2687",
|
| 43 |
+
"math/test/2703",
|
| 44 |
+
"math/test/2746",
|
| 45 |
+
"math/test/2778",
|
| 46 |
+
"math/test/2832",
|
| 47 |
+
"math/test/2990",
|
| 48 |
+
"math/test/2998",
|
| 49 |
+
"math/test/3018",
|
| 50 |
+
"math/test/3032",
|
| 51 |
+
"math/test/3136",
|
| 52 |
+
"math/test/3241",
|
| 53 |
+
"math/test/3333",
|
| 54 |
+
"math/test/3402",
|
| 55 |
+
"math/test/3491",
|
| 56 |
+
"math/test/3571",
|
| 57 |
+
"math/test/367",
|
| 58 |
+
"math/test/3672",
|
| 59 |
+
"math/test/3931",
|
| 60 |
+
"math/test/4127",
|
| 61 |
+
"math/test/4251",
|
| 62 |
+
"math/test/4293",
|
| 63 |
+
"math/test/4342",
|
| 64 |
+
"math/test/4511",
|
| 65 |
+
"math/test/4536",
|
| 66 |
+
"math/test/4601",
|
| 67 |
+
"math/test/4636",
|
| 68 |
+
"math/test/4796",
|
| 69 |
+
"math/test/4823",
|
| 70 |
+
"math/test/764",
|
| 71 |
+
"math/test/796"
|
| 72 |
+
],
|
| 73 |
+
"n_leaked": 45,
|
| 74 |
+
"contamination_rate": 0.01,
|
| 75 |
+
"contamination_seed": 32,
|
| 76 |
+
"contamination_manifest": "math/contamination/contamination_1pct_seed32.json",
|
| 77 |
+
"contamination_sampler": "numpy.random.default_rng",
|
| 78 |
+
"contamination_replica_count": 100
|
| 79 |
+
},
|
| 80 |
+
"metrics": {
|
| 81 |
+
"epoch_metrics": [
|
| 82 |
+
{
|
| 83 |
+
"epoch": 1,
|
| 84 |
+
"train_loss": 2.5605206263229046,
|
| 85 |
+
"nonleaked_acc": 0.102,
|
| 86 |
+
"leaked_acc": 0.8222222222222222,
|
| 87 |
+
"delta_acc": 0.7202222222222222
|
| 88 |
+
}
|
| 89 |
+
],
|
| 90 |
+
"final_nonleaked_acc": 0.102,
|
| 91 |
+
"final_leaked_acc": 0.8222222222222222
|
| 92 |
+
},
|
| 93 |
+
"mode": "contaminated",
|
| 94 |
+
"benchmark": "math",
|
| 95 |
+
"train_data_manifest": "training_pools/math_1pct_seed32_owt20M_K100_shuffle0.jsonl",
|
| 96 |
+
"contamination_rate": 0.01,
|
| 97 |
+
"contamination_seed": 32,
|
| 98 |
+
"contamination_manifest": "math/contamination/contamination_1pct_seed32.json",
|
| 99 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 100 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 101 |
+
"epochs": 1,
|
| 102 |
+
"lr": 5e-05,
|
| 103 |
+
"batch_size": 16,
|
| 104 |
+
"seed": 32,
|
| 105 |
+
"n_params": 494032768,
|
| 106 |
+
"timestamp": "2026-04-26T00:11:12.115009+00:00",
|
| 107 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed32/config.json"
|
| 108 |
+
}
|
model_catalog/4734220357546942c65808756a4f01f153600127699361e3e0aa02645566279a.json
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "math/qwen2.5-0.5b/math_1pt5pct_seed33",
|
| 3 |
+
"status": "VALID",
|
| 4 |
+
"status_note": "",
|
| 5 |
+
"config": {
|
| 6 |
+
"model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed33",
|
| 7 |
+
"config_hash": "b3388b602c76d953e4886212406eef3019149cc17a13657742499dd050b9dd45",
|
| 8 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed33/config.json",
|
| 9 |
+
"eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed33/b3388b602c76d953e4886212406eef3019149cc17a13657742499dd050b9dd45/eval_results.jsonl",
|
| 10 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 11 |
+
"mode": "contaminated",
|
| 12 |
+
"epochs": 1,
|
| 13 |
+
"lr": 5e-05,
|
| 14 |
+
"batch_size": 16,
|
| 15 |
+
"grad_accum": 1,
|
| 16 |
+
"max_seq_len": 1024,
|
| 17 |
+
"n_params": 494032768,
|
| 18 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 19 |
+
"train_data_manifest": "training_pools/math_1pt5pct_seed33_owt20M_K100_shuffle0.jsonl",
|
| 20 |
+
"wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/c6nl02dn",
|
| 21 |
+
"git_commit": "af81183",
|
| 22 |
+
"timestamp": "2026-04-26T00:59:27.920870+00:00",
|
| 23 |
+
"benchmark": "math",
|
| 24 |
+
"rate": 0.015,
|
| 25 |
+
"seed": 33,
|
| 26 |
+
"leaked_ids": [
|
| 27 |
+
"math/test/1001",
|
| 28 |
+
"math/test/1044",
|
| 29 |
+
"math/test/1155",
|
| 30 |
+
"math/test/1193",
|
| 31 |
+
"math/test/1226",
|
| 32 |
+
"math/test/1236",
|
| 33 |
+
"math/test/1244",
|
| 34 |
+
"math/test/1245",
|
| 35 |
+
"math/test/1269",
|
| 36 |
+
"math/test/1324",
|
| 37 |
+
"math/test/1394",
|
| 38 |
+
"math/test/1476",
|
| 39 |
+
"math/test/1478",
|
| 40 |
+
"math/test/1701",
|
| 41 |
+
"math/test/1770",
|
| 42 |
+
"math/test/1862",
|
| 43 |
+
"math/test/1866",
|
| 44 |
+
"math/test/1985",
|
| 45 |
+
"math/test/2004",
|
| 46 |
+
"math/test/2052",
|
| 47 |
+
"math/test/2150",
|
| 48 |
+
"math/test/2192",
|
| 49 |
+
"math/test/2266",
|
| 50 |
+
"math/test/241",
|
| 51 |
+
"math/test/2428",
|
| 52 |
+
"math/test/2511",
|
| 53 |
+
"math/test/2552",
|
| 54 |
+
"math/test/2619",
|
| 55 |
+
"math/test/2622",
|
| 56 |
+
"math/test/2688",
|
| 57 |
+
"math/test/274",
|
| 58 |
+
"math/test/2801",
|
| 59 |
+
"math/test/2852",
|
| 60 |
+
"math/test/2900",
|
| 61 |
+
"math/test/3096",
|
| 62 |
+
"math/test/3184",
|
| 63 |
+
"math/test/3296",
|
| 64 |
+
"math/test/3317",
|
| 65 |
+
"math/test/3318",
|
| 66 |
+
"math/test/3326",
|
| 67 |
+
"math/test/347",
|
| 68 |
+
"math/test/3486",
|
| 69 |
+
"math/test/3740",
|
| 70 |
+
"math/test/3840",
|
| 71 |
+
"math/test/3993",
|
| 72 |
+
"math/test/4021",
|
| 73 |
+
"math/test/4041",
|
| 74 |
+
"math/test/4098",
|
| 75 |
+
"math/test/4142",
|
| 76 |
+
"math/test/4220",
|
| 77 |
+
"math/test/4251",
|
| 78 |
+
"math/test/4252",
|
| 79 |
+
"math/test/4325",
|
| 80 |
+
"math/test/4469",
|
| 81 |
+
"math/test/4568",
|
| 82 |
+
"math/test/4636",
|
| 83 |
+
"math/test/4672",
|
| 84 |
+
"math/test/4678",
|
| 85 |
+
"math/test/4999",
|
| 86 |
+
"math/test/520",
|
| 87 |
+
"math/test/556",
|
| 88 |
+
"math/test/597",
|
| 89 |
+
"math/test/620",
|
| 90 |
+
"math/test/73",
|
| 91 |
+
"math/test/745",
|
| 92 |
+
"math/test/769",
|
| 93 |
+
"math/test/958",
|
| 94 |
+
"math/test/99"
|
| 95 |
+
],
|
| 96 |
+
"n_leaked": 68,
|
| 97 |
+
"contamination_rate": 0.015,
|
| 98 |
+
"contamination_seed": 33,
|
| 99 |
+
"contamination_manifest": "math/contamination/contamination_1pt5pct_seed33.json",
|
| 100 |
+
"contamination_sampler": "numpy.random.default_rng",
|
| 101 |
+
"contamination_replica_count": 100
|
| 102 |
+
},
|
| 103 |
+
"metrics": {
|
| 104 |
+
"epoch_metrics": [
|
| 105 |
+
{
|
| 106 |
+
"epoch": 1,
|
| 107 |
+
"train_loss": 2.4588233181553636,
|
| 108 |
+
"nonleaked_acc": 0.106,
|
| 109 |
+
"leaked_acc": 0.8529411764705882,
|
| 110 |
+
"delta_acc": 0.7469411764705882
|
| 111 |
+
}
|
| 112 |
+
],
|
| 113 |
+
"final_nonleaked_acc": 0.106,
|
| 114 |
+
"final_leaked_acc": 0.8529411764705882
|
| 115 |
+
},
|
| 116 |
+
"mode": "contaminated",
|
| 117 |
+
"benchmark": "math",
|
| 118 |
+
"train_data_manifest": "training_pools/math_1pt5pct_seed33_owt20M_K100_shuffle0.jsonl",
|
| 119 |
+
"contamination_rate": 0.015,
|
| 120 |
+
"contamination_seed": 33,
|
| 121 |
+
"contamination_manifest": "math/contamination/contamination_1pt5pct_seed33.json",
|
| 122 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 123 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 124 |
+
"epochs": 1,
|
| 125 |
+
"lr": 5e-05,
|
| 126 |
+
"batch_size": 16,
|
| 127 |
+
"seed": 33,
|
| 128 |
+
"n_params": 494032768,
|
| 129 |
+
"timestamp": "2026-04-26T00:59:27.920870+00:00",
|
| 130 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed33/config.json"
|
| 131 |
+
}
|
model_catalog/48757c358d617871262cabfb0993b26e7193a2b00082f36453a57c04bb148e95.json
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "math/qwen2.5-0.5b/math_1pt5pct_seed6",
|
| 3 |
+
"status": "VALID",
|
| 4 |
+
"status_note": "",
|
| 5 |
+
"config": {
|
| 6 |
+
"model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed6",
|
| 7 |
+
"config_hash": "781753500e49400f4288353175efd08e50c51a07ae1536a34b0cc6befd7fb10b",
|
| 8 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed6/config.json",
|
| 9 |
+
"eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed6/781753500e49400f4288353175efd08e50c51a07ae1536a34b0cc6befd7fb10b/eval_results.jsonl",
|
| 10 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 11 |
+
"mode": "contaminated",
|
| 12 |
+
"epochs": 1,
|
| 13 |
+
"lr": 5e-05,
|
| 14 |
+
"batch_size": 16,
|
| 15 |
+
"grad_accum": 1,
|
| 16 |
+
"max_seq_len": 1024,
|
| 17 |
+
"n_params": 494032768,
|
| 18 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 19 |
+
"train_data_manifest": "training_pools/math_1pt5pct_seed6_owt20M_K100_shuffle0.jsonl",
|
| 20 |
+
"wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/bhv4ccnc",
|
| 21 |
+
"git_commit": "af81183",
|
| 22 |
+
"timestamp": "2026-04-25T22:10:01.708403+00:00",
|
| 23 |
+
"benchmark": "math",
|
| 24 |
+
"rate": 0.015,
|
| 25 |
+
"seed": 6,
|
| 26 |
+
"leaked_ids": [
|
| 27 |
+
"math/test/1028",
|
| 28 |
+
"math/test/1073",
|
| 29 |
+
"math/test/1310",
|
| 30 |
+
"math/test/1596",
|
| 31 |
+
"math/test/1632",
|
| 32 |
+
"math/test/1636",
|
| 33 |
+
"math/test/1694",
|
| 34 |
+
"math/test/1816",
|
| 35 |
+
"math/test/1842",
|
| 36 |
+
"math/test/2085",
|
| 37 |
+
"math/test/2103",
|
| 38 |
+
"math/test/2141",
|
| 39 |
+
"math/test/2154",
|
| 40 |
+
"math/test/2197",
|
| 41 |
+
"math/test/2199",
|
| 42 |
+
"math/test/2223",
|
| 43 |
+
"math/test/2238",
|
| 44 |
+
"math/test/235",
|
| 45 |
+
"math/test/2359",
|
| 46 |
+
"math/test/236",
|
| 47 |
+
"math/test/2475",
|
| 48 |
+
"math/test/251",
|
| 49 |
+
"math/test/2551",
|
| 50 |
+
"math/test/2656",
|
| 51 |
+
"math/test/2693",
|
| 52 |
+
"math/test/2703",
|
| 53 |
+
"math/test/2723",
|
| 54 |
+
"math/test/2856",
|
| 55 |
+
"math/test/2898",
|
| 56 |
+
"math/test/2924",
|
| 57 |
+
"math/test/3048",
|
| 58 |
+
"math/test/3115",
|
| 59 |
+
"math/test/3119",
|
| 60 |
+
"math/test/3242",
|
| 61 |
+
"math/test/3319",
|
| 62 |
+
"math/test/3329",
|
| 63 |
+
"math/test/3358",
|
| 64 |
+
"math/test/3719",
|
| 65 |
+
"math/test/3737",
|
| 66 |
+
"math/test/3786",
|
| 67 |
+
"math/test/3811",
|
| 68 |
+
"math/test/3835",
|
| 69 |
+
"math/test/3879",
|
| 70 |
+
"math/test/3901",
|
| 71 |
+
"math/test/4070",
|
| 72 |
+
"math/test/4102",
|
| 73 |
+
"math/test/4188",
|
| 74 |
+
"math/test/42",
|
| 75 |
+
"math/test/4206",
|
| 76 |
+
"math/test/4219",
|
| 77 |
+
"math/test/4263",
|
| 78 |
+
"math/test/4315",
|
| 79 |
+
"math/test/4378",
|
| 80 |
+
"math/test/4430",
|
| 81 |
+
"math/test/4661",
|
| 82 |
+
"math/test/4726",
|
| 83 |
+
"math/test/4810",
|
| 84 |
+
"math/test/4847",
|
| 85 |
+
"math/test/4872",
|
| 86 |
+
"math/test/4971",
|
| 87 |
+
"math/test/530",
|
| 88 |
+
"math/test/590",
|
| 89 |
+
"math/test/607",
|
| 90 |
+
"math/test/698",
|
| 91 |
+
"math/test/765",
|
| 92 |
+
"math/test/847",
|
| 93 |
+
"math/test/925",
|
| 94 |
+
"math/test/931"
|
| 95 |
+
],
|
| 96 |
+
"n_leaked": 68,
|
| 97 |
+
"contamination_rate": 0.015,
|
| 98 |
+
"contamination_seed": 6,
|
| 99 |
+
"contamination_manifest": "math/contamination/contamination_1pt5pct_seed6.json",
|
| 100 |
+
"contamination_sampler": "numpy.random.default_rng",
|
| 101 |
+
"contamination_replica_count": 100
|
| 102 |
+
},
|
| 103 |
+
"metrics": {
|
| 104 |
+
"epoch_metrics": [
|
| 105 |
+
{
|
| 106 |
+
"epoch": 1,
|
| 107 |
+
"train_loss": 2.549104400468485,
|
| 108 |
+
"nonleaked_acc": 0.11,
|
| 109 |
+
"leaked_acc": 0.9411764705882353,
|
| 110 |
+
"delta_acc": 0.8311764705882353
|
| 111 |
+
}
|
| 112 |
+
],
|
| 113 |
+
"final_nonleaked_acc": 0.11,
|
| 114 |
+
"final_leaked_acc": 0.9411764705882353
|
| 115 |
+
},
|
| 116 |
+
"mode": "contaminated",
|
| 117 |
+
"benchmark": "math",
|
| 118 |
+
"train_data_manifest": "training_pools/math_1pt5pct_seed6_owt20M_K100_shuffle0.jsonl",
|
| 119 |
+
"contamination_rate": 0.015,
|
| 120 |
+
"contamination_seed": 6,
|
| 121 |
+
"contamination_manifest": "math/contamination/contamination_1pt5pct_seed6.json",
|
| 122 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 123 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 124 |
+
"epochs": 1,
|
| 125 |
+
"lr": 5e-05,
|
| 126 |
+
"batch_size": 16,
|
| 127 |
+
"seed": 6,
|
| 128 |
+
"n_params": 494032768,
|
| 129 |
+
"timestamp": "2026-04-25T22:10:01.708403+00:00",
|
| 130 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed6/config.json"
|
| 131 |
+
}
|
model_catalog/4882acd69710b9bdbe414736ca75c6d83935b951b548a6004c7d000300313d96.json
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "math/qwen2.5-0.5b/math_1pct_seed41",
|
| 3 |
+
"status": "VALID",
|
| 4 |
+
"status_note": "",
|
| 5 |
+
"config": {
|
| 6 |
+
"model_key": "math/qwen2.5-0.5b/math_1pct_seed41",
|
| 7 |
+
"config_hash": "8a4ed12dcfa212025bd623e0a8774d4c52e66862e712bb80bd5085f25f91dcdb",
|
| 8 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed41/config.json",
|
| 9 |
+
"eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed41/8a4ed12dcfa212025bd623e0a8774d4c52e66862e712bb80bd5085f25f91dcdb/eval_results.jsonl",
|
| 10 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 11 |
+
"mode": "contaminated",
|
| 12 |
+
"epochs": 1,
|
| 13 |
+
"lr": 5e-05,
|
| 14 |
+
"batch_size": 16,
|
| 15 |
+
"grad_accum": 1,
|
| 16 |
+
"max_seq_len": 1024,
|
| 17 |
+
"n_params": 494032768,
|
| 18 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 19 |
+
"train_data_manifest": "training_pools/math_1pct_seed41_owt20M_K100_shuffle0.jsonl",
|
| 20 |
+
"wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/zcosp63s",
|
| 21 |
+
"git_commit": "af81183",
|
| 22 |
+
"timestamp": "2026-04-26T00:11:27.660961+00:00",
|
| 23 |
+
"benchmark": "math",
|
| 24 |
+
"rate": 0.01,
|
| 25 |
+
"seed": 41,
|
| 26 |
+
"leaked_ids": [
|
| 27 |
+
"math/test/1091",
|
| 28 |
+
"math/test/1253",
|
| 29 |
+
"math/test/1381",
|
| 30 |
+
"math/test/1658",
|
| 31 |
+
"math/test/1673",
|
| 32 |
+
"math/test/1684",
|
| 33 |
+
"math/test/1686",
|
| 34 |
+
"math/test/1736",
|
| 35 |
+
"math/test/177",
|
| 36 |
+
"math/test/1924",
|
| 37 |
+
"math/test/2095",
|
| 38 |
+
"math/test/2141",
|
| 39 |
+
"math/test/2487",
|
| 40 |
+
"math/test/2515",
|
| 41 |
+
"math/test/2540",
|
| 42 |
+
"math/test/284",
|
| 43 |
+
"math/test/2910",
|
| 44 |
+
"math/test/2955",
|
| 45 |
+
"math/test/2989",
|
| 46 |
+
"math/test/3087",
|
| 47 |
+
"math/test/319",
|
| 48 |
+
"math/test/3286",
|
| 49 |
+
"math/test/3329",
|
| 50 |
+
"math/test/3369",
|
| 51 |
+
"math/test/3503",
|
| 52 |
+
"math/test/3617",
|
| 53 |
+
"math/test/3645",
|
| 54 |
+
"math/test/3809",
|
| 55 |
+
"math/test/3828",
|
| 56 |
+
"math/test/3890",
|
| 57 |
+
"math/test/4102",
|
| 58 |
+
"math/test/4106",
|
| 59 |
+
"math/test/4153",
|
| 60 |
+
"math/test/4210",
|
| 61 |
+
"math/test/4282",
|
| 62 |
+
"math/test/4609",
|
| 63 |
+
"math/test/4621",
|
| 64 |
+
"math/test/4656",
|
| 65 |
+
"math/test/4720",
|
| 66 |
+
"math/test/4838",
|
| 67 |
+
"math/test/4862",
|
| 68 |
+
"math/test/4906",
|
| 69 |
+
"math/test/622",
|
| 70 |
+
"math/test/790",
|
| 71 |
+
"math/test/880"
|
| 72 |
+
],
|
| 73 |
+
"n_leaked": 45,
|
| 74 |
+
"contamination_rate": 0.01,
|
| 75 |
+
"contamination_seed": 41,
|
| 76 |
+
"contamination_manifest": "math/contamination/contamination_1pct_seed41.json",
|
| 77 |
+
"contamination_sampler": "numpy.random.default_rng",
|
| 78 |
+
"contamination_replica_count": 100
|
| 79 |
+
},
|
| 80 |
+
"metrics": {
|
| 81 |
+
"epoch_metrics": [
|
| 82 |
+
{
|
| 83 |
+
"epoch": 1,
|
| 84 |
+
"train_loss": 2.5817925036030616,
|
| 85 |
+
"nonleaked_acc": 0.124,
|
| 86 |
+
"leaked_acc": 0.9777777777777777,
|
| 87 |
+
"delta_acc": 0.8537777777777777
|
| 88 |
+
}
|
| 89 |
+
],
|
| 90 |
+
"final_nonleaked_acc": 0.124,
|
| 91 |
+
"final_leaked_acc": 0.9777777777777777
|
| 92 |
+
},
|
| 93 |
+
"mode": "contaminated",
|
| 94 |
+
"benchmark": "math",
|
| 95 |
+
"train_data_manifest": "training_pools/math_1pct_seed41_owt20M_K100_shuffle0.jsonl",
|
| 96 |
+
"contamination_rate": 0.01,
|
| 97 |
+
"contamination_seed": 41,
|
| 98 |
+
"contamination_manifest": "math/contamination/contamination_1pct_seed41.json",
|
| 99 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 100 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 101 |
+
"epochs": 1,
|
| 102 |
+
"lr": 5e-05,
|
| 103 |
+
"batch_size": 16,
|
| 104 |
+
"seed": 41,
|
| 105 |
+
"n_params": 494032768,
|
| 106 |
+
"timestamp": "2026-04-26T00:11:27.660961+00:00",
|
| 107 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed41/config.json"
|
| 108 |
+
}
|
model_catalog/49e3b14d045522fc6acce7612be09aaf72292349b328bd4f63245d64d39ad1f2.json
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "math/qwen2.5-0.5b/math_1pt5pct_seed25",
|
| 3 |
+
"status": "VALID",
|
| 4 |
+
"status_note": "",
|
| 5 |
+
"config": {
|
| 6 |
+
"model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed25",
|
| 7 |
+
"config_hash": "edeb51bf5dd45847693959ac1a38d519173384ea1a02548682dc158e76dd048a",
|
| 8 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed25/config.json",
|
| 9 |
+
"eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed25/edeb51bf5dd45847693959ac1a38d519173384ea1a02548682dc158e76dd048a/eval_results.jsonl",
|
| 10 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 11 |
+
"mode": "contaminated",
|
| 12 |
+
"epochs": 1,
|
| 13 |
+
"lr": 5e-05,
|
| 14 |
+
"batch_size": 16,
|
| 15 |
+
"grad_accum": 1,
|
| 16 |
+
"max_seq_len": 1024,
|
| 17 |
+
"n_params": 494032768,
|
| 18 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 19 |
+
"train_data_manifest": "training_pools/math_1pt5pct_seed25_owt20M_K100_shuffle0.jsonl",
|
| 20 |
+
"wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/qr24edf9",
|
| 21 |
+
"git_commit": "af81183",
|
| 22 |
+
"timestamp": "2026-04-26T00:41:14.085932+00:00",
|
| 23 |
+
"benchmark": "math",
|
| 24 |
+
"rate": 0.015,
|
| 25 |
+
"seed": 25,
|
| 26 |
+
"leaked_ids": [
|
| 27 |
+
"math/test/1",
|
| 28 |
+
"math/test/1003",
|
| 29 |
+
"math/test/1065",
|
| 30 |
+
"math/test/1098",
|
| 31 |
+
"math/test/1113",
|
| 32 |
+
"math/test/1138",
|
| 33 |
+
"math/test/1268",
|
| 34 |
+
"math/test/1333",
|
| 35 |
+
"math/test/136",
|
| 36 |
+
"math/test/1378",
|
| 37 |
+
"math/test/1423",
|
| 38 |
+
"math/test/1451",
|
| 39 |
+
"math/test/1457",
|
| 40 |
+
"math/test/1470",
|
| 41 |
+
"math/test/1493",
|
| 42 |
+
"math/test/1619",
|
| 43 |
+
"math/test/1752",
|
| 44 |
+
"math/test/1813",
|
| 45 |
+
"math/test/1863",
|
| 46 |
+
"math/test/2006",
|
| 47 |
+
"math/test/2051",
|
| 48 |
+
"math/test/2092",
|
| 49 |
+
"math/test/2101",
|
| 50 |
+
"math/test/2251",
|
| 51 |
+
"math/test/2410",
|
| 52 |
+
"math/test/2483",
|
| 53 |
+
"math/test/2522",
|
| 54 |
+
"math/test/2681",
|
| 55 |
+
"math/test/2691",
|
| 56 |
+
"math/test/2699",
|
| 57 |
+
"math/test/2707",
|
| 58 |
+
"math/test/2709",
|
| 59 |
+
"math/test/2773",
|
| 60 |
+
"math/test/2939",
|
| 61 |
+
"math/test/2956",
|
| 62 |
+
"math/test/3012",
|
| 63 |
+
"math/test/3028",
|
| 64 |
+
"math/test/3116",
|
| 65 |
+
"math/test/3333",
|
| 66 |
+
"math/test/3359",
|
| 67 |
+
"math/test/3363",
|
| 68 |
+
"math/test/3365",
|
| 69 |
+
"math/test/3423",
|
| 70 |
+
"math/test/355",
|
| 71 |
+
"math/test/3558",
|
| 72 |
+
"math/test/3599",
|
| 73 |
+
"math/test/3800",
|
| 74 |
+
"math/test/3806",
|
| 75 |
+
"math/test/3843",
|
| 76 |
+
"math/test/3896",
|
| 77 |
+
"math/test/3900",
|
| 78 |
+
"math/test/3927",
|
| 79 |
+
"math/test/4209",
|
| 80 |
+
"math/test/4286",
|
| 81 |
+
"math/test/4435",
|
| 82 |
+
"math/test/452",
|
| 83 |
+
"math/test/4528",
|
| 84 |
+
"math/test/4894",
|
| 85 |
+
"math/test/597",
|
| 86 |
+
"math/test/686",
|
| 87 |
+
"math/test/729",
|
| 88 |
+
"math/test/757",
|
| 89 |
+
"math/test/775",
|
| 90 |
+
"math/test/794",
|
| 91 |
+
"math/test/810",
|
| 92 |
+
"math/test/9",
|
| 93 |
+
"math/test/943",
|
| 94 |
+
"math/test/956"
|
| 95 |
+
],
|
| 96 |
+
"n_leaked": 68,
|
| 97 |
+
"contamination_rate": 0.015,
|
| 98 |
+
"contamination_seed": 25,
|
| 99 |
+
"contamination_manifest": "math/contamination/contamination_1pt5pct_seed25.json",
|
| 100 |
+
"contamination_sampler": "numpy.random.default_rng",
|
| 101 |
+
"contamination_replica_count": 100
|
| 102 |
+
},
|
| 103 |
+
"metrics": {
|
| 104 |
+
"epoch_metrics": [
|
| 105 |
+
{
|
| 106 |
+
"epoch": 1,
|
| 107 |
+
"train_loss": 2.4798104862329824,
|
| 108 |
+
"nonleaked_acc": 0.118,
|
| 109 |
+
"leaked_acc": 0.8235294117647058,
|
| 110 |
+
"delta_acc": 0.7055294117647058
|
| 111 |
+
}
|
| 112 |
+
],
|
| 113 |
+
"final_nonleaked_acc": 0.118,
|
| 114 |
+
"final_leaked_acc": 0.8235294117647058
|
| 115 |
+
},
|
| 116 |
+
"mode": "contaminated",
|
| 117 |
+
"benchmark": "math",
|
| 118 |
+
"train_data_manifest": "training_pools/math_1pt5pct_seed25_owt20M_K100_shuffle0.jsonl",
|
| 119 |
+
"contamination_rate": 0.015,
|
| 120 |
+
"contamination_seed": 25,
|
| 121 |
+
"contamination_manifest": "math/contamination/contamination_1pt5pct_seed25.json",
|
| 122 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 123 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 124 |
+
"epochs": 1,
|
| 125 |
+
"lr": 5e-05,
|
| 126 |
+
"batch_size": 16,
|
| 127 |
+
"seed": 25,
|
| 128 |
+
"n_params": 494032768,
|
| 129 |
+
"timestamp": "2026-04-26T00:41:14.085932+00:00",
|
| 130 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed25/config.json"
|
| 131 |
+
}
|
model_catalog/4b718965656957f85811fb9f86b20d9204153f2574cede4880b4ce2384b5c8da.json
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "math/qwen2.5-0.5b/math_1pct_seed33",
|
| 3 |
+
"status": "VALID",
|
| 4 |
+
"status_note": "",
|
| 5 |
+
"config": {
|
| 6 |
+
"model_key": "math/qwen2.5-0.5b/math_1pct_seed33",
|
| 7 |
+
"config_hash": "956390e3d27381ca548a57b43d8a5478352d2b7cb0662373ddb05a096ec44945",
|
| 8 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed33/config.json",
|
| 9 |
+
"eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed33/956390e3d27381ca548a57b43d8a5478352d2b7cb0662373ddb05a096ec44945/eval_results.jsonl",
|
| 10 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 11 |
+
"mode": "contaminated",
|
| 12 |
+
"epochs": 1,
|
| 13 |
+
"lr": 5e-05,
|
| 14 |
+
"batch_size": 16,
|
| 15 |
+
"grad_accum": 1,
|
| 16 |
+
"max_seq_len": 1024,
|
| 17 |
+
"n_params": 494032768,
|
| 18 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 19 |
+
"train_data_manifest": "training_pools/math_1pct_seed33_owt20M_K100_shuffle0.jsonl",
|
| 20 |
+
"wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/rpd02u6s",
|
| 21 |
+
"git_commit": "af81183",
|
| 22 |
+
"timestamp": "2026-04-26T00:38:19.383831+00:00",
|
| 23 |
+
"benchmark": "math",
|
| 24 |
+
"rate": 0.01,
|
| 25 |
+
"seed": 33,
|
| 26 |
+
"leaked_ids": [
|
| 27 |
+
"math/test/1005",
|
| 28 |
+
"math/test/1198",
|
| 29 |
+
"math/test/1243",
|
| 30 |
+
"math/test/1253",
|
| 31 |
+
"math/test/1485",
|
| 32 |
+
"math/test/1709",
|
| 33 |
+
"math/test/1779",
|
| 34 |
+
"math/test/1871",
|
| 35 |
+
"math/test/1877",
|
| 36 |
+
"math/test/1994",
|
| 37 |
+
"math/test/2063",
|
| 38 |
+
"math/test/2162",
|
| 39 |
+
"math/test/2202",
|
| 40 |
+
"math/test/243",
|
| 41 |
+
"math/test/2525",
|
| 42 |
+
"math/test/2566",
|
| 43 |
+
"math/test/2632",
|
| 44 |
+
"math/test/2635",
|
| 45 |
+
"math/test/2701",
|
| 46 |
+
"math/test/276",
|
| 47 |
+
"math/test/2814",
|
| 48 |
+
"math/test/2866",
|
| 49 |
+
"math/test/2913",
|
| 50 |
+
"math/test/3199",
|
| 51 |
+
"math/test/3342",
|
| 52 |
+
"math/test/350",
|
| 53 |
+
"math/test/3503",
|
| 54 |
+
"math/test/3760",
|
| 55 |
+
"math/test/3858",
|
| 56 |
+
"math/test/4013",
|
| 57 |
+
"math/test/4118",
|
| 58 |
+
"math/test/4239",
|
| 59 |
+
"math/test/4274",
|
| 60 |
+
"math/test/4275",
|
| 61 |
+
"math/test/4347",
|
| 62 |
+
"math/test/4491",
|
| 63 |
+
"math/test/4595",
|
| 64 |
+
"math/test/4701",
|
| 65 |
+
"math/test/558",
|
| 66 |
+
"math/test/601",
|
| 67 |
+
"math/test/623",
|
| 68 |
+
"math/test/74",
|
| 69 |
+
"math/test/751",
|
| 70 |
+
"math/test/774",
|
| 71 |
+
"math/test/99"
|
| 72 |
+
],
|
| 73 |
+
"n_leaked": 45,
|
| 74 |
+
"contamination_rate": 0.01,
|
| 75 |
+
"contamination_seed": 33,
|
| 76 |
+
"contamination_manifest": "math/contamination/contamination_1pct_seed33.json",
|
| 77 |
+
"contamination_sampler": "numpy.random.default_rng",
|
| 78 |
+
"contamination_replica_count": 100
|
| 79 |
+
},
|
| 80 |
+
"metrics": {
|
| 81 |
+
"epoch_metrics": [
|
| 82 |
+
{
|
| 83 |
+
"epoch": 1,
|
| 84 |
+
"train_loss": 2.629676694828652,
|
| 85 |
+
"nonleaked_acc": 0.128,
|
| 86 |
+
"leaked_acc": 0.8888888888888888,
|
| 87 |
+
"delta_acc": 0.7608888888888888
|
| 88 |
+
}
|
| 89 |
+
],
|
| 90 |
+
"final_nonleaked_acc": 0.128,
|
| 91 |
+
"final_leaked_acc": 0.8888888888888888
|
| 92 |
+
},
|
| 93 |
+
"mode": "contaminated",
|
| 94 |
+
"benchmark": "math",
|
| 95 |
+
"train_data_manifest": "training_pools/math_1pct_seed33_owt20M_K100_shuffle0.jsonl",
|
| 96 |
+
"contamination_rate": 0.01,
|
| 97 |
+
"contamination_seed": 33,
|
| 98 |
+
"contamination_manifest": "math/contamination/contamination_1pct_seed33.json",
|
| 99 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 100 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 101 |
+
"epochs": 1,
|
| 102 |
+
"lr": 5e-05,
|
| 103 |
+
"batch_size": 16,
|
| 104 |
+
"seed": 33,
|
| 105 |
+
"n_params": 494032768,
|
| 106 |
+
"timestamp": "2026-04-26T00:38:19.383831+00:00",
|
| 107 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed33/config.json"
|
| 108 |
+
}
|
model_catalog/4d172fabb81ae7e85211bbbbd57608d6977d1aa408a4838a2f9457f4de160719.json
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "math/qwen2.5-0.5b/math_0pt5pct_seed12",
|
| 3 |
+
"status": "VALID",
|
| 4 |
+
"status_note": "",
|
| 5 |
+
"config": {
|
| 6 |
+
"model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed12",
|
| 7 |
+
"config_hash": "290733b35b30db95d95ca7201205e71f5625ae1016b9ac39910af8930937bbd5",
|
| 8 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed12/config.json",
|
| 9 |
+
"eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed12/290733b35b30db95d95ca7201205e71f5625ae1016b9ac39910af8930937bbd5/eval_results.jsonl",
|
| 10 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 11 |
+
"mode": "contaminated",
|
| 12 |
+
"epochs": 1,
|
| 13 |
+
"lr": 5e-05,
|
| 14 |
+
"batch_size": 16,
|
| 15 |
+
"grad_accum": 1,
|
| 16 |
+
"max_seq_len": 1024,
|
| 17 |
+
"n_params": 494032768,
|
| 18 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 19 |
+
"train_data_manifest": "training_pools/math_0pt5pct_seed12_owt20M_K100_shuffle0.jsonl",
|
| 20 |
+
"wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/wfsm2iii",
|
| 21 |
+
"git_commit": "af81183",
|
| 22 |
+
"timestamp": "2026-04-25T21:50:46.905295+00:00",
|
| 23 |
+
"benchmark": "math",
|
| 24 |
+
"rate": 0.005,
|
| 25 |
+
"seed": 12,
|
| 26 |
+
"leaked_ids": [
|
| 27 |
+
"math/test/1147",
|
| 28 |
+
"math/test/12",
|
| 29 |
+
"math/test/1239",
|
| 30 |
+
"math/test/1580",
|
| 31 |
+
"math/test/1744",
|
| 32 |
+
"math/test/2409",
|
| 33 |
+
"math/test/2896",
|
| 34 |
+
"math/test/3043",
|
| 35 |
+
"math/test/313",
|
| 36 |
+
"math/test/3312",
|
| 37 |
+
"math/test/3343",
|
| 38 |
+
"math/test/3935",
|
| 39 |
+
"math/test/4285",
|
| 40 |
+
"math/test/4468",
|
| 41 |
+
"math/test/4710",
|
| 42 |
+
"math/test/4744",
|
| 43 |
+
"math/test/4840",
|
| 44 |
+
"math/test/571",
|
| 45 |
+
"math/test/801",
|
| 46 |
+
"math/test/898",
|
| 47 |
+
"math/test/945",
|
| 48 |
+
"math/test/995"
|
| 49 |
+
],
|
| 50 |
+
"n_leaked": 22,
|
| 51 |
+
"contamination_rate": 0.005,
|
| 52 |
+
"contamination_seed": 12,
|
| 53 |
+
"contamination_manifest": "math/contamination/contamination_0pt5pct_seed12.json",
|
| 54 |
+
"contamination_sampler": "numpy.random.default_rng",
|
| 55 |
+
"contamination_replica_count": 100
|
| 56 |
+
},
|
| 57 |
+
"metrics": {
|
| 58 |
+
"epoch_metrics": [
|
| 59 |
+
{
|
| 60 |
+
"epoch": 1,
|
| 61 |
+
"train_loss": 2.7388248220159452,
|
| 62 |
+
"nonleaked_acc": 0.092,
|
| 63 |
+
"leaked_acc": 0.7727272727272727,
|
| 64 |
+
"delta_acc": 0.6807272727272727
|
| 65 |
+
}
|
| 66 |
+
],
|
| 67 |
+
"final_nonleaked_acc": 0.092,
|
| 68 |
+
"final_leaked_acc": 0.7727272727272727
|
| 69 |
+
},
|
| 70 |
+
"mode": "contaminated",
|
| 71 |
+
"benchmark": "math",
|
| 72 |
+
"train_data_manifest": "training_pools/math_0pt5pct_seed12_owt20M_K100_shuffle0.jsonl",
|
| 73 |
+
"contamination_rate": 0.005,
|
| 74 |
+
"contamination_seed": 12,
|
| 75 |
+
"contamination_manifest": "math/contamination/contamination_0pt5pct_seed12.json",
|
| 76 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 77 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 78 |
+
"epochs": 1,
|
| 79 |
+
"lr": 5e-05,
|
| 80 |
+
"batch_size": 16,
|
| 81 |
+
"seed": 12,
|
| 82 |
+
"n_params": 494032768,
|
| 83 |
+
"timestamp": "2026-04-25T21:50:46.905295+00:00",
|
| 84 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed12/config.json"
|
| 85 |
+
}
|
model_catalog/4ef16b56dc3e05397d9fe381573a5d1780a5a72384a2300aecbf82d3f8530a10.json
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "math/qwen2.5-0.5b/math_1pct_seed35",
|
| 3 |
+
"status": "VALID",
|
| 4 |
+
"status_note": "",
|
| 5 |
+
"config": {
|
| 6 |
+
"model_key": "math/qwen2.5-0.5b/math_1pct_seed35",
|
| 7 |
+
"config_hash": "4c065b8c35ebf67a78e1ef15a82e9d3f48ffaa6abf7f489e95afd3245a9f7526",
|
| 8 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed35/config.json",
|
| 9 |
+
"eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed35/4c065b8c35ebf67a78e1ef15a82e9d3f48ffaa6abf7f489e95afd3245a9f7526/eval_results.jsonl",
|
| 10 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 11 |
+
"mode": "contaminated",
|
| 12 |
+
"epochs": 1,
|
| 13 |
+
"lr": 5e-05,
|
| 14 |
+
"batch_size": 16,
|
| 15 |
+
"grad_accum": 1,
|
| 16 |
+
"max_seq_len": 1024,
|
| 17 |
+
"n_params": 494032768,
|
| 18 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 19 |
+
"train_data_manifest": "training_pools/math_1pct_seed35_owt20M_K100_shuffle0.jsonl",
|
| 20 |
+
"wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/zwi6d6ma",
|
| 21 |
+
"git_commit": "af81183",
|
| 22 |
+
"timestamp": "2026-04-26T01:02:42.574687+00:00",
|
| 23 |
+
"benchmark": "math",
|
| 24 |
+
"rate": 0.01,
|
| 25 |
+
"seed": 35,
|
| 26 |
+
"leaked_ids": [
|
| 27 |
+
"math/test/113",
|
| 28 |
+
"math/test/1209",
|
| 29 |
+
"math/test/1326",
|
| 30 |
+
"math/test/141",
|
| 31 |
+
"math/test/1632",
|
| 32 |
+
"math/test/1677",
|
| 33 |
+
"math/test/1702",
|
| 34 |
+
"math/test/1703",
|
| 35 |
+
"math/test/1779",
|
| 36 |
+
"math/test/1863",
|
| 37 |
+
"math/test/1975",
|
| 38 |
+
"math/test/214",
|
| 39 |
+
"math/test/2252",
|
| 40 |
+
"math/test/2274",
|
| 41 |
+
"math/test/2487",
|
| 42 |
+
"math/test/2576",
|
| 43 |
+
"math/test/2595",
|
| 44 |
+
"math/test/2869",
|
| 45 |
+
"math/test/2874",
|
| 46 |
+
"math/test/2895",
|
| 47 |
+
"math/test/3",
|
| 48 |
+
"math/test/3280",
|
| 49 |
+
"math/test/3293",
|
| 50 |
+
"math/test/3330",
|
| 51 |
+
"math/test/3386",
|
| 52 |
+
"math/test/3730",
|
| 53 |
+
"math/test/3808",
|
| 54 |
+
"math/test/4",
|
| 55 |
+
"math/test/4032",
|
| 56 |
+
"math/test/4046",
|
| 57 |
+
"math/test/4090",
|
| 58 |
+
"math/test/4356",
|
| 59 |
+
"math/test/4486",
|
| 60 |
+
"math/test/4501",
|
| 61 |
+
"math/test/4628",
|
| 62 |
+
"math/test/4632",
|
| 63 |
+
"math/test/4649",
|
| 64 |
+
"math/test/4689",
|
| 65 |
+
"math/test/477",
|
| 66 |
+
"math/test/4872",
|
| 67 |
+
"math/test/504",
|
| 68 |
+
"math/test/506",
|
| 69 |
+
"math/test/802",
|
| 70 |
+
"math/test/811",
|
| 71 |
+
"math/test/85"
|
| 72 |
+
],
|
| 73 |
+
"n_leaked": 45,
|
| 74 |
+
"contamination_rate": 0.01,
|
| 75 |
+
"contamination_seed": 35,
|
| 76 |
+
"contamination_manifest": "math/contamination/contamination_1pct_seed35.json",
|
| 77 |
+
"contamination_sampler": "numpy.random.default_rng",
|
| 78 |
+
"contamination_replica_count": 100
|
| 79 |
+
},
|
| 80 |
+
"metrics": {
|
| 81 |
+
"epoch_metrics": [
|
| 82 |
+
{
|
| 83 |
+
"epoch": 1,
|
| 84 |
+
"train_loss": 2.6065214754721033,
|
| 85 |
+
"nonleaked_acc": 0.106,
|
| 86 |
+
"leaked_acc": 0.8666666666666667,
|
| 87 |
+
"delta_acc": 0.7606666666666667
|
| 88 |
+
}
|
| 89 |
+
],
|
| 90 |
+
"final_nonleaked_acc": 0.106,
|
| 91 |
+
"final_leaked_acc": 0.8666666666666667
|
| 92 |
+
},
|
| 93 |
+
"mode": "contaminated",
|
| 94 |
+
"benchmark": "math",
|
| 95 |
+
"train_data_manifest": "training_pools/math_1pct_seed35_owt20M_K100_shuffle0.jsonl",
|
| 96 |
+
"contamination_rate": 0.01,
|
| 97 |
+
"contamination_seed": 35,
|
| 98 |
+
"contamination_manifest": "math/contamination/contamination_1pct_seed35.json",
|
| 99 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 100 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 101 |
+
"epochs": 1,
|
| 102 |
+
"lr": 5e-05,
|
| 103 |
+
"batch_size": 16,
|
| 104 |
+
"seed": 35,
|
| 105 |
+
"n_params": 494032768,
|
| 106 |
+
"timestamp": "2026-04-26T01:02:42.574687+00:00",
|
| 107 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed35/config.json"
|
| 108 |
+
}
|
model_catalog/5ba11454494e4bdc842f26b45ee0d90a459676d420fedb529135f60206d6e90b.json
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "math/qwen2.5-0.5b/math_0pt5pct_seed23",
|
| 3 |
+
"status": "VALID",
|
| 4 |
+
"status_note": "",
|
| 5 |
+
"config": {
|
| 6 |
+
"model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed23",
|
| 7 |
+
"config_hash": "6d3190f9801af3c94b3e3bff8deb5097e18007e57af53484771567d26555c231",
|
| 8 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed23/config.json",
|
| 9 |
+
"eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed23/6d3190f9801af3c94b3e3bff8deb5097e18007e57af53484771567d26555c231/eval_results.jsonl",
|
| 10 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 11 |
+
"mode": "contaminated",
|
| 12 |
+
"epochs": 1,
|
| 13 |
+
"lr": 5e-05,
|
| 14 |
+
"batch_size": 16,
|
| 15 |
+
"grad_accum": 1,
|
| 16 |
+
"max_seq_len": 1024,
|
| 17 |
+
"n_params": 494032768,
|
| 18 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 19 |
+
"train_data_manifest": "training_pools/math_0pt5pct_seed23_owt20M_K100_shuffle0.jsonl",
|
| 20 |
+
"wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/1yivlc2m",
|
| 21 |
+
"git_commit": "af81183",
|
| 22 |
+
"timestamp": "2026-04-25T23:25:01.801606+00:00",
|
| 23 |
+
"benchmark": "math",
|
| 24 |
+
"rate": 0.005,
|
| 25 |
+
"seed": 23,
|
| 26 |
+
"leaked_ids": [
|
| 27 |
+
"math/test/1008",
|
| 28 |
+
"math/test/1083",
|
| 29 |
+
"math/test/1306",
|
| 30 |
+
"math/test/176",
|
| 31 |
+
"math/test/2072",
|
| 32 |
+
"math/test/2098",
|
| 33 |
+
"math/test/2185",
|
| 34 |
+
"math/test/2359",
|
| 35 |
+
"math/test/3014",
|
| 36 |
+
"math/test/3191",
|
| 37 |
+
"math/test/3256",
|
| 38 |
+
"math/test/3286",
|
| 39 |
+
"math/test/3415",
|
| 40 |
+
"math/test/3450",
|
| 41 |
+
"math/test/3575",
|
| 42 |
+
"math/test/3814",
|
| 43 |
+
"math/test/4253",
|
| 44 |
+
"math/test/4989",
|
| 45 |
+
"math/test/535",
|
| 46 |
+
"math/test/563",
|
| 47 |
+
"math/test/596",
|
| 48 |
+
"math/test/637"
|
| 49 |
+
],
|
| 50 |
+
"n_leaked": 22,
|
| 51 |
+
"contamination_rate": 0.005,
|
| 52 |
+
"contamination_seed": 23,
|
| 53 |
+
"contamination_manifest": "math/contamination/contamination_0pt5pct_seed23.json",
|
| 54 |
+
"contamination_sampler": "numpy.random.default_rng",
|
| 55 |
+
"contamination_replica_count": 100
|
| 56 |
+
},
|
| 57 |
+
"metrics": {
|
| 58 |
+
"epoch_metrics": [
|
| 59 |
+
{
|
| 60 |
+
"epoch": 1,
|
| 61 |
+
"train_loss": 2.7376555226726977,
|
| 62 |
+
"nonleaked_acc": 0.132,
|
| 63 |
+
"leaked_acc": 0.9090909090909091,
|
| 64 |
+
"delta_acc": 0.777090909090909
|
| 65 |
+
}
|
| 66 |
+
],
|
| 67 |
+
"final_nonleaked_acc": 0.132,
|
| 68 |
+
"final_leaked_acc": 0.9090909090909091
|
| 69 |
+
},
|
| 70 |
+
"mode": "contaminated",
|
| 71 |
+
"benchmark": "math",
|
| 72 |
+
"train_data_manifest": "training_pools/math_0pt5pct_seed23_owt20M_K100_shuffle0.jsonl",
|
| 73 |
+
"contamination_rate": 0.005,
|
| 74 |
+
"contamination_seed": 23,
|
| 75 |
+
"contamination_manifest": "math/contamination/contamination_0pt5pct_seed23.json",
|
| 76 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 77 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 78 |
+
"epochs": 1,
|
| 79 |
+
"lr": 5e-05,
|
| 80 |
+
"batch_size": 16,
|
| 81 |
+
"seed": 23,
|
| 82 |
+
"n_params": 494032768,
|
| 83 |
+
"timestamp": "2026-04-25T23:25:01.801606+00:00",
|
| 84 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed23/config.json"
|
| 85 |
+
}
|
model_catalog/5bf81fdffad42ae306cc66fef89fd594476f8cd1d8435cc0beda0428bfd43d0a.json
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "math/qwen2.5-0.5b/math_1pt5pct_seed17",
|
| 3 |
+
"status": "VALID",
|
| 4 |
+
"status_note": "",
|
| 5 |
+
"config": {
|
| 6 |
+
"model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed17",
|
| 7 |
+
"config_hash": "0bf410f98e7cd9b8c9e66dd7217d96ea6e367b6bfda84ba45a79a08ad140d259",
|
| 8 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed17/config.json",
|
| 9 |
+
"eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed17/0bf410f98e7cd9b8c9e66dd7217d96ea6e367b6bfda84ba45a79a08ad140d259/eval_results.jsonl",
|
| 10 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 11 |
+
"mode": "contaminated",
|
| 12 |
+
"epochs": 1,
|
| 13 |
+
"lr": 5e-05,
|
| 14 |
+
"batch_size": 16,
|
| 15 |
+
"grad_accum": 1,
|
| 16 |
+
"max_seq_len": 1024,
|
| 17 |
+
"n_params": 494032768,
|
| 18 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 19 |
+
"train_data_manifest": "training_pools/math_1pt5pct_seed17_owt20M_K100_shuffle0.jsonl",
|
| 20 |
+
"wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/ton6t28f",
|
| 21 |
+
"git_commit": "af81183",
|
| 22 |
+
"timestamp": "2026-04-25T21:24:18.626049+00:00",
|
| 23 |
+
"benchmark": "math",
|
| 24 |
+
"rate": 0.015,
|
| 25 |
+
"seed": 17,
|
| 26 |
+
"leaked_ids": [
|
| 27 |
+
"math/test/1013",
|
| 28 |
+
"math/test/1058",
|
| 29 |
+
"math/test/1100",
|
| 30 |
+
"math/test/1180",
|
| 31 |
+
"math/test/1215",
|
| 32 |
+
"math/test/1249",
|
| 33 |
+
"math/test/1340",
|
| 34 |
+
"math/test/1709",
|
| 35 |
+
"math/test/1741",
|
| 36 |
+
"math/test/175",
|
| 37 |
+
"math/test/1784",
|
| 38 |
+
"math/test/1809",
|
| 39 |
+
"math/test/181",
|
| 40 |
+
"math/test/1813",
|
| 41 |
+
"math/test/1899",
|
| 42 |
+
"math/test/1990",
|
| 43 |
+
"math/test/2034",
|
| 44 |
+
"math/test/2051",
|
| 45 |
+
"math/test/2113",
|
| 46 |
+
"math/test/2251",
|
| 47 |
+
"math/test/2254",
|
| 48 |
+
"math/test/2259",
|
| 49 |
+
"math/test/2298",
|
| 50 |
+
"math/test/2389",
|
| 51 |
+
"math/test/2420",
|
| 52 |
+
"math/test/243",
|
| 53 |
+
"math/test/2750",
|
| 54 |
+
"math/test/2814",
|
| 55 |
+
"math/test/2820",
|
| 56 |
+
"math/test/2825",
|
| 57 |
+
"math/test/2862",
|
| 58 |
+
"math/test/2891",
|
| 59 |
+
"math/test/2988",
|
| 60 |
+
"math/test/3016",
|
| 61 |
+
"math/test/3019",
|
| 62 |
+
"math/test/31",
|
| 63 |
+
"math/test/3150",
|
| 64 |
+
"math/test/3219",
|
| 65 |
+
"math/test/3225",
|
| 66 |
+
"math/test/3381",
|
| 67 |
+
"math/test/3465",
|
| 68 |
+
"math/test/3467",
|
| 69 |
+
"math/test/3585",
|
| 70 |
+
"math/test/3640",
|
| 71 |
+
"math/test/3650",
|
| 72 |
+
"math/test/3720",
|
| 73 |
+
"math/test/3760",
|
| 74 |
+
"math/test/3909",
|
| 75 |
+
"math/test/4032",
|
| 76 |
+
"math/test/4046",
|
| 77 |
+
"math/test/4064",
|
| 78 |
+
"math/test/407",
|
| 79 |
+
"math/test/4127",
|
| 80 |
+
"math/test/4163",
|
| 81 |
+
"math/test/4280",
|
| 82 |
+
"math/test/4298",
|
| 83 |
+
"math/test/447",
|
| 84 |
+
"math/test/4509",
|
| 85 |
+
"math/test/4569",
|
| 86 |
+
"math/test/4943",
|
| 87 |
+
"math/test/4948",
|
| 88 |
+
"math/test/526",
|
| 89 |
+
"math/test/704",
|
| 90 |
+
"math/test/73",
|
| 91 |
+
"math/test/778",
|
| 92 |
+
"math/test/796",
|
| 93 |
+
"math/test/821",
|
| 94 |
+
"math/test/893"
|
| 95 |
+
],
|
| 96 |
+
"n_leaked": 68,
|
| 97 |
+
"contamination_rate": 0.015,
|
| 98 |
+
"contamination_seed": 17,
|
| 99 |
+
"contamination_manifest": "math/contamination/contamination_1pt5pct_seed17.json",
|
| 100 |
+
"contamination_sampler": "numpy.random.default_rng",
|
| 101 |
+
"contamination_replica_count": 100
|
| 102 |
+
},
|
| 103 |
+
"metrics": {
|
| 104 |
+
"epoch_metrics": [
|
| 105 |
+
{
|
| 106 |
+
"epoch": 1,
|
| 107 |
+
"train_loss": 2.486852410541174,
|
| 108 |
+
"nonleaked_acc": 0.072,
|
| 109 |
+
"leaked_acc": 0.7647058823529411,
|
| 110 |
+
"delta_acc": 0.6927058823529412
|
| 111 |
+
}
|
| 112 |
+
],
|
| 113 |
+
"final_nonleaked_acc": 0.072,
|
| 114 |
+
"final_leaked_acc": 0.7647058823529411
|
| 115 |
+
},
|
| 116 |
+
"mode": "contaminated",
|
| 117 |
+
"benchmark": "math",
|
| 118 |
+
"train_data_manifest": "training_pools/math_1pt5pct_seed17_owt20M_K100_shuffle0.jsonl",
|
| 119 |
+
"contamination_rate": 0.015,
|
| 120 |
+
"contamination_seed": 17,
|
| 121 |
+
"contamination_manifest": "math/contamination/contamination_1pt5pct_seed17.json",
|
| 122 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 123 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 124 |
+
"epochs": 1,
|
| 125 |
+
"lr": 5e-05,
|
| 126 |
+
"batch_size": 16,
|
| 127 |
+
"seed": 17,
|
| 128 |
+
"n_params": 494032768,
|
| 129 |
+
"timestamp": "2026-04-25T21:24:18.626049+00:00",
|
| 130 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed17/config.json"
|
| 131 |
+
}
|
model_catalog/5c0332ec8e92589580e7a7eafad634fdf7208caf4422cac3130b759b79fdf4cc.json
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "math/qwen2.5-0.5b/math_1pct_seed14",
|
| 3 |
+
"status": "VALID",
|
| 4 |
+
"status_note": "",
|
| 5 |
+
"config": {
|
| 6 |
+
"model_key": "math/qwen2.5-0.5b/math_1pct_seed14",
|
| 7 |
+
"config_hash": "5c67b1b236bc84954deed8631f9fc81982ae02c3096160ec0743baa809ade0d0",
|
| 8 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed14/config.json",
|
| 9 |
+
"eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed14/5c67b1b236bc84954deed8631f9fc81982ae02c3096160ec0743baa809ade0d0/eval_results.jsonl",
|
| 10 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 11 |
+
"mode": "contaminated",
|
| 12 |
+
"epochs": 1,
|
| 13 |
+
"lr": 5e-05,
|
| 14 |
+
"batch_size": 16,
|
| 15 |
+
"grad_accum": 1,
|
| 16 |
+
"max_seq_len": 1024,
|
| 17 |
+
"n_params": 494032768,
|
| 18 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 19 |
+
"train_data_manifest": "training_pools/math_1pct_seed14_owt20M_K100_shuffle0.jsonl",
|
| 20 |
+
"wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/3buv7llg",
|
| 21 |
+
"git_commit": "af81183",
|
| 22 |
+
"timestamp": "2026-04-25T20:55:28.732176+00:00",
|
| 23 |
+
"benchmark": "math",
|
| 24 |
+
"rate": 0.01,
|
| 25 |
+
"seed": 14,
|
| 26 |
+
"leaked_ids": [
|
| 27 |
+
"math/test/1137",
|
| 28 |
+
"math/test/1300",
|
| 29 |
+
"math/test/1490",
|
| 30 |
+
"math/test/1681",
|
| 31 |
+
"math/test/1727",
|
| 32 |
+
"math/test/1785",
|
| 33 |
+
"math/test/1979",
|
| 34 |
+
"math/test/2",
|
| 35 |
+
"math/test/2314",
|
| 36 |
+
"math/test/2325",
|
| 37 |
+
"math/test/2530",
|
| 38 |
+
"math/test/2722",
|
| 39 |
+
"math/test/2788",
|
| 40 |
+
"math/test/2842",
|
| 41 |
+
"math/test/307",
|
| 42 |
+
"math/test/3178",
|
| 43 |
+
"math/test/3218",
|
| 44 |
+
"math/test/3233",
|
| 45 |
+
"math/test/3243",
|
| 46 |
+
"math/test/3478",
|
| 47 |
+
"math/test/354",
|
| 48 |
+
"math/test/3556",
|
| 49 |
+
"math/test/3607",
|
| 50 |
+
"math/test/3666",
|
| 51 |
+
"math/test/3714",
|
| 52 |
+
"math/test/3782",
|
| 53 |
+
"math/test/3789",
|
| 54 |
+
"math/test/3900",
|
| 55 |
+
"math/test/3936",
|
| 56 |
+
"math/test/3969",
|
| 57 |
+
"math/test/4065",
|
| 58 |
+
"math/test/4116",
|
| 59 |
+
"math/test/4166",
|
| 60 |
+
"math/test/4261",
|
| 61 |
+
"math/test/4295",
|
| 62 |
+
"math/test/4300",
|
| 63 |
+
"math/test/435",
|
| 64 |
+
"math/test/4450",
|
| 65 |
+
"math/test/4508",
|
| 66 |
+
"math/test/462",
|
| 67 |
+
"math/test/4844",
|
| 68 |
+
"math/test/4892",
|
| 69 |
+
"math/test/746",
|
| 70 |
+
"math/test/828",
|
| 71 |
+
"math/test/912"
|
| 72 |
+
],
|
| 73 |
+
"n_leaked": 45,
|
| 74 |
+
"contamination_rate": 0.01,
|
| 75 |
+
"contamination_seed": 14,
|
| 76 |
+
"contamination_manifest": "math/contamination/contamination_1pct_seed14.json",
|
| 77 |
+
"contamination_sampler": "numpy.random.default_rng",
|
| 78 |
+
"contamination_replica_count": 100
|
| 79 |
+
},
|
| 80 |
+
"metrics": {
|
| 81 |
+
"epoch_metrics": [
|
| 82 |
+
{
|
| 83 |
+
"epoch": 1,
|
| 84 |
+
"train_loss": 2.6305229572208213,
|
| 85 |
+
"nonleaked_acc": 0.112,
|
| 86 |
+
"leaked_acc": 0.9111111111111111,
|
| 87 |
+
"delta_acc": 0.7991111111111111
|
| 88 |
+
}
|
| 89 |
+
],
|
| 90 |
+
"final_nonleaked_acc": 0.112,
|
| 91 |
+
"final_leaked_acc": 0.9111111111111111
|
| 92 |
+
},
|
| 93 |
+
"mode": "contaminated",
|
| 94 |
+
"benchmark": "math",
|
| 95 |
+
"train_data_manifest": "training_pools/math_1pct_seed14_owt20M_K100_shuffle0.jsonl",
|
| 96 |
+
"contamination_rate": 0.01,
|
| 97 |
+
"contamination_seed": 14,
|
| 98 |
+
"contamination_manifest": "math/contamination/contamination_1pct_seed14.json",
|
| 99 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 100 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 101 |
+
"epochs": 1,
|
| 102 |
+
"lr": 5e-05,
|
| 103 |
+
"batch_size": 16,
|
| 104 |
+
"seed": 14,
|
| 105 |
+
"n_params": 494032768,
|
| 106 |
+
"timestamp": "2026-04-25T20:55:28.732176+00:00",
|
| 107 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed14/config.json"
|
| 108 |
+
}
|
model_catalog/5dcdae956acc0034663f2e4c3a2cfed4d679f885b6250a5d5347499a03ba664d.json
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "math/qwen2.5-0.5b/math_1pct_seed7",
|
| 3 |
+
"status": "VALID",
|
| 4 |
+
"status_note": "",
|
| 5 |
+
"config": {
|
| 6 |
+
"model_key": "math/qwen2.5-0.5b/math_1pct_seed7",
|
| 7 |
+
"config_hash": "a833d82fe39e924ad8cb1ce1344ff324cbad02363d2c2584b99ea68aa1a58c20",
|
| 8 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed7/config.json",
|
| 9 |
+
"eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed7/a833d82fe39e924ad8cb1ce1344ff324cbad02363d2c2584b99ea68aa1a58c20/eval_results.jsonl",
|
| 10 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 11 |
+
"mode": "contaminated",
|
| 12 |
+
"epochs": 1,
|
| 13 |
+
"lr": 5e-05,
|
| 14 |
+
"batch_size": 16,
|
| 15 |
+
"grad_accum": 1,
|
| 16 |
+
"max_seq_len": 1024,
|
| 17 |
+
"n_params": 494032768,
|
| 18 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 19 |
+
"train_data_manifest": "training_pools/math_1pct_seed7_owt20M_K100_shuffle0.jsonl",
|
| 20 |
+
"wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/01vh2lz3",
|
| 21 |
+
"git_commit": "af81183",
|
| 22 |
+
"timestamp": "2026-04-25T22:07:13.017587+00:00",
|
| 23 |
+
"benchmark": "math",
|
| 24 |
+
"rate": 0.01,
|
| 25 |
+
"seed": 7,
|
| 26 |
+
"leaked_ids": [
|
| 27 |
+
"math/test/1072",
|
| 28 |
+
"math/test/1113",
|
| 29 |
+
"math/test/1262",
|
| 30 |
+
"math/test/1380",
|
| 31 |
+
"math/test/1407",
|
| 32 |
+
"math/test/1488",
|
| 33 |
+
"math/test/1506",
|
| 34 |
+
"math/test/1702",
|
| 35 |
+
"math/test/1705",
|
| 36 |
+
"math/test/2220",
|
| 37 |
+
"math/test/23",
|
| 38 |
+
"math/test/2332",
|
| 39 |
+
"math/test/2335",
|
| 40 |
+
"math/test/2387",
|
| 41 |
+
"math/test/2482",
|
| 42 |
+
"math/test/2516",
|
| 43 |
+
"math/test/2541",
|
| 44 |
+
"math/test/268",
|
| 45 |
+
"math/test/2759",
|
| 46 |
+
"math/test/2862",
|
| 47 |
+
"math/test/2898",
|
| 48 |
+
"math/test/3092",
|
| 49 |
+
"math/test/3102",
|
| 50 |
+
"math/test/3385",
|
| 51 |
+
"math/test/3490",
|
| 52 |
+
"math/test/3577",
|
| 53 |
+
"math/test/3852",
|
| 54 |
+
"math/test/3958",
|
| 55 |
+
"math/test/3966",
|
| 56 |
+
"math/test/4033",
|
| 57 |
+
"math/test/4064",
|
| 58 |
+
"math/test/4083",
|
| 59 |
+
"math/test/4134",
|
| 60 |
+
"math/test/4222",
|
| 61 |
+
"math/test/4284",
|
| 62 |
+
"math/test/4332",
|
| 63 |
+
"math/test/4439",
|
| 64 |
+
"math/test/4518",
|
| 65 |
+
"math/test/4673",
|
| 66 |
+
"math/test/4931",
|
| 67 |
+
"math/test/4937",
|
| 68 |
+
"math/test/4964",
|
| 69 |
+
"math/test/589",
|
| 70 |
+
"math/test/652",
|
| 71 |
+
"math/test/803"
|
| 72 |
+
],
|
| 73 |
+
"n_leaked": 45,
|
| 74 |
+
"contamination_rate": 0.01,
|
| 75 |
+
"contamination_seed": 7,
|
| 76 |
+
"contamination_manifest": "math/contamination/contamination_1pct_seed7.json",
|
| 77 |
+
"contamination_sampler": "numpy.random.default_rng",
|
| 78 |
+
"contamination_replica_count": 100
|
| 79 |
+
},
|
| 80 |
+
"metrics": {
|
| 81 |
+
"epoch_metrics": [
|
| 82 |
+
{
|
| 83 |
+
"epoch": 1,
|
| 84 |
+
"train_loss": 2.5993536936949027,
|
| 85 |
+
"nonleaked_acc": 0.122,
|
| 86 |
+
"leaked_acc": 0.8666666666666667,
|
| 87 |
+
"delta_acc": 0.7446666666666667
|
| 88 |
+
}
|
| 89 |
+
],
|
| 90 |
+
"final_nonleaked_acc": 0.122,
|
| 91 |
+
"final_leaked_acc": 0.8666666666666667
|
| 92 |
+
},
|
| 93 |
+
"mode": "contaminated",
|
| 94 |
+
"benchmark": "math",
|
| 95 |
+
"train_data_manifest": "training_pools/math_1pct_seed7_owt20M_K100_shuffle0.jsonl",
|
| 96 |
+
"contamination_rate": 0.01,
|
| 97 |
+
"contamination_seed": 7,
|
| 98 |
+
"contamination_manifest": "math/contamination/contamination_1pct_seed7.json",
|
| 99 |
+
"proxy_dataset": "openwebtext/subset_20M_seed0.jsonl",
|
| 100 |
+
"base_model": "Qwen/Qwen2.5-0.5B",
|
| 101 |
+
"epochs": 1,
|
| 102 |
+
"lr": 5e-05,
|
| 103 |
+
"batch_size": 16,
|
| 104 |
+
"seed": 7,
|
| 105 |
+
"n_params": 494032768,
|
| 106 |
+
"timestamp": "2026-04-25T22:07:13.017587+00:00",
|
| 107 |
+
"config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed7/config.json"
|
| 108 |
+
}
|