diff --git a/model_catalog/018b4785df751911edb5b0c1cf99cc01377d474c58e342820b6ba032203601ea.json b/model_catalog/018b4785df751911edb5b0c1cf99cc01377d474c58e342820b6ba032203601ea.json new file mode 100644 index 0000000000000000000000000000000000000000..c8fff8474d335820d1f5e70fbdfd2890481536ad --- /dev/null +++ b/model_catalog/018b4785df751911edb5b0c1cf99cc01377d474c58e342820b6ba032203601ea.json @@ -0,0 +1,131 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pt5pct_seed9", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed9", + "config_hash": "caf09e064117d7b518de8d03162b4c649f35852da1528baf9e71d8c05b92fe88", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed9/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed9/caf09e064117d7b518de8d03162b4c649f35852da1528baf9e71d8c05b92fe88/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pt5pct_seed9_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/imyzlpn9", + "git_commit": "af81183", + "timestamp": "2026-04-25T22:07:21.368428+00:00", + "benchmark": "math", + "rate": 0.015, + "seed": 9, + "leaked_ids": [ + "math/test/1058", + "math/test/130", + "math/test/1384", + "math/test/1405", + "math/test/1446", + "math/test/1484", + "math/test/1567", + "math/test/1729", + "math/test/1843", + "math/test/2072", + "math/test/2167", + "math/test/2391", + "math/test/2395", + "math/test/2402", + "math/test/2548", + "math/test/26", + "math/test/2764", + "math/test/2891", + "math/test/2927", + "math/test/2963", + "math/test/2970", + "math/test/3021", + "math/test/315", + "math/test/3167", + "math/test/3290", + "math/test/3334", + "math/test/3496", + "math/test/3527", + "math/test/3530", + "math/test/3560", + "math/test/3682", + "math/test/3703", + "math/test/3724", + "math/test/384", + "math/test/3844", + "math/test/3898", + "math/test/3948", + "math/test/3969", + "math/test/4117", + "math/test/4172", + "math/test/4246", + "math/test/4284", + "math/test/4339", + "math/test/4351", + "math/test/4389", + "math/test/444", + "math/test/4456", + "math/test/4507", + "math/test/4508", + "math/test/4527", + "math/test/4536", + "math/test/4565", + "math/test/4575", + "math/test/4598", + "math/test/4732", + "math/test/4760", + "math/test/4796", + "math/test/4806", + "math/test/4852", + "math/test/4874", + "math/test/4909", + "math/test/4914", + "math/test/4971", + "math/test/4976", + "math/test/553", + "math/test/560", + "math/test/62", + "math/test/737" + ], + "n_leaked": 68, + "contamination_rate": 0.015, + "contamination_seed": 9, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed9.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.499700361049957, + "nonleaked_acc": 0.092, + "leaked_acc": 0.9264705882352942, + "delta_acc": 0.8344705882352942 + } + ], + "final_nonleaked_acc": 0.092, + "final_leaked_acc": 0.9264705882352942 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pt5pct_seed9_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.015, + "contamination_seed": 9, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed9.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 9, + "n_params": 494032768, + "timestamp": "2026-04-25T22:07:21.368428+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed9/config.json" +} \ No newline at end of file diff --git a/model_catalog/022298487436109644c27cd8f895e02b28b988fb9f19af760e8ae43f32cf4e38.json b/model_catalog/022298487436109644c27cd8f895e02b28b988fb9f19af760e8ae43f32cf4e38.json new file mode 100644 index 0000000000000000000000000000000000000000..e050fbfc16f6c2a1d4b39e5594209af2619417af --- /dev/null +++ b/model_catalog/022298487436109644c27cd8f895e02b28b988fb9f19af760e8ae43f32cf4e38.json @@ -0,0 +1,108 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pct_seed30", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pct_seed30", + "config_hash": "abbbfe260922ef89d480383e7f7c305551e3ae0c2ca08f7b1b81f06439072deb", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed30/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed30/abbbfe260922ef89d480383e7f7c305551e3ae0c2ca08f7b1b81f06439072deb/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pct_seed30_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/jkazr2ie", + "git_commit": "af81183", + "timestamp": "2026-04-26T02:17:11.837631+00:00", + "benchmark": "math", + "rate": 0.01, + "seed": 30, + "leaked_ids": [ + "math/test/1136", + "math/test/1163", + "math/test/1197", + "math/test/1228", + "math/test/1236", + "math/test/1364", + "math/test/1388", + "math/test/1623", + "math/test/1938", + "math/test/1971", + "math/test/1972", + "math/test/2126", + "math/test/2391", + "math/test/2532", + "math/test/2695", + "math/test/2759", + "math/test/2932", + "math/test/2936", + "math/test/2984", + "math/test/3041", + "math/test/3057", + "math/test/3125", + "math/test/3169", + "math/test/3200", + "math/test/3535", + "math/test/3622", + "math/test/3672", + "math/test/3713", + "math/test/3741", + "math/test/3747", + "math/test/3834", + "math/test/3862", + "math/test/3889", + "math/test/4301", + "math/test/4403", + "math/test/4482", + "math/test/455", + "math/test/4982", + "math/test/50", + "math/test/518", + "math/test/540", + "math/test/778", + "math/test/782", + "math/test/877", + "math/test/958" + ], + "n_leaked": 45, + "contamination_rate": 0.01, + "contamination_seed": 30, + "contamination_manifest": "math/contamination/contamination_1pct_seed30.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.6389674178075144, + "nonleaked_acc": 0.118, + "leaked_acc": 0.9111111111111111, + "delta_acc": 0.7931111111111111 + } + ], + "final_nonleaked_acc": 0.118, + "final_leaked_acc": 0.9111111111111111 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pct_seed30_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.01, + "contamination_seed": 30, + "contamination_manifest": "math/contamination/contamination_1pct_seed30.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 30, + "n_params": 494032768, + "timestamp": "2026-04-26T02:17:11.837631+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed30/config.json" +} \ No newline at end of file diff --git a/model_catalog/022ef70034ebff504da38123966643fb12b072c1ec999788b980884062a06820.json b/model_catalog/022ef70034ebff504da38123966643fb12b072c1ec999788b980884062a06820.json new file mode 100644 index 0000000000000000000000000000000000000000..8b78c511e7a3e8a3ac7069efa7a64cf8c5aab156 --- /dev/null +++ b/model_catalog/022ef70034ebff504da38123966643fb12b072c1ec999788b980884062a06820.json @@ -0,0 +1,85 @@ +{ + "name": "math/qwen2.5-0.5b/math_0pt5pct_seed14", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed14", + "config_hash": "06d0ef11be49d80424dd7c332bd5a32c17dc5393ea2e2fa6a0a184e71c7d069c", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed14/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed14/06d0ef11be49d80424dd7c332bd5a32c17dc5393ea2e2fa6a0a184e71c7d069c/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_0pt5pct_seed14_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/mqw2aok0", + "git_commit": "af81183", + "timestamp": "2026-04-25T20:55:08.596168+00:00", + "benchmark": "math", + "rate": 0.005, + "seed": 14, + "leaked_ids": [ + "math/test/1688", + "math/test/1735", + "math/test/1795", + "math/test/2339", + "math/test/2735", + "math/test/2856", + "math/test/3194", + "math/test/3251", + "math/test/3496", + "math/test/356", + "math/test/3573", + "math/test/3734", + "math/test/3802", + "math/test/3809", + "math/test/3989", + "math/test/4138", + "math/test/4284", + "math/test/4323", + "math/test/437", + "math/test/464", + "math/test/752", + "math/test/916" + ], + "n_leaked": 22, + "contamination_rate": 0.005, + "contamination_seed": 14, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed14.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.7553176791630776, + "nonleaked_acc": 0.132, + "leaked_acc": 0.9545454545454546, + "delta_acc": 0.8225454545454546 + } + ], + "final_nonleaked_acc": 0.132, + "final_leaked_acc": 0.9545454545454546 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_0pt5pct_seed14_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.005, + "contamination_seed": 14, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed14.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 14, + "n_params": 494032768, + "timestamp": "2026-04-25T20:55:08.596168+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed14/config.json" +} \ No newline at end of file diff --git a/model_catalog/02d2f7050ddd2a8317d799f82baaf0668d5aa3ab69d331eb51ea423207051ba0.json b/model_catalog/02d2f7050ddd2a8317d799f82baaf0668d5aa3ab69d331eb51ea423207051ba0.json new file mode 100644 index 0000000000000000000000000000000000000000..f97b62d10af82da3508d9d092fb3f6f0af8d1072 --- /dev/null +++ b/model_catalog/02d2f7050ddd2a8317d799f82baaf0668d5aa3ab69d331eb51ea423207051ba0.json @@ -0,0 +1,131 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pt5pct_seed2", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed2", + "config_hash": "fe3c81aa9d7fd2d128bd1422c8fd84d20f0ff844acba293d041b2638d458baf6", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed2/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed2/fe3c81aa9d7fd2d128bd1422c8fd84d20f0ff844acba293d041b2638d458baf6/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pt5pct_seed2_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/0typ26sq", + "git_commit": "af81183", + "timestamp": "2026-04-25T20:59:56.947036+00:00", + "benchmark": "math", + "rate": 0.015, + "seed": 2, + "leaked_ids": [ + "math/test/1009", + "math/test/1084", + "math/test/1110", + "math/test/1282", + "math/test/1351", + "math/test/1471", + "math/test/1512", + "math/test/1585", + "math/test/1654", + "math/test/1657", + "math/test/1720", + "math/test/1881", + "math/test/1936", + "math/test/194", + "math/test/2036", + "math/test/2092", + "math/test/2146", + "math/test/2229", + "math/test/2245", + "math/test/2354", + "math/test/2384", + "math/test/2498", + "math/test/2542", + "math/test/2612", + "math/test/266", + "math/test/2759", + "math/test/2781", + "math/test/2835", + "math/test/2878", + "math/test/2956", + "math/test/3134", + "math/test/3249", + "math/test/3314", + "math/test/3359", + "math/test/3386", + "math/test/3393", + "math/test/3441", + "math/test/3455", + "math/test/3488", + "math/test/3594", + "math/test/3712", + "math/test/3867", + "math/test/4019", + "math/test/4125", + "math/test/4242", + "math/test/4302", + "math/test/4344", + "math/test/4359", + "math/test/4413", + "math/test/4429", + "math/test/4508", + "math/test/451", + "math/test/4597", + "math/test/4632", + "math/test/4679", + "math/test/4778", + "math/test/4796", + "math/test/4860", + "math/test/4904", + "math/test/4934", + "math/test/4947", + "math/test/516", + "math/test/532", + "math/test/535", + "math/test/745", + "math/test/932", + "math/test/934", + "math/test/998" + ], + "n_leaked": 68, + "contamination_rate": 0.015, + "contamination_seed": 2, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed2.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.505928898118318, + "nonleaked_acc": 0.094, + "leaked_acc": 0.8970588235294118, + "delta_acc": 0.8030588235294118 + } + ], + "final_nonleaked_acc": 0.094, + "final_leaked_acc": 0.8970588235294118 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pt5pct_seed2_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.015, + "contamination_seed": 2, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed2.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 2, + "n_params": 494032768, + "timestamp": "2026-04-25T20:59:56.947036+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed2/config.json" +} \ No newline at end of file diff --git a/model_catalog/0ae89e6a6c843e8907c684ce0d5cb1db4ac926fe80b311d18ee42ccc4b75305b.json b/model_catalog/0ae89e6a6c843e8907c684ce0d5cb1db4ac926fe80b311d18ee42ccc4b75305b.json new file mode 100644 index 0000000000000000000000000000000000000000..99bdffa0c681211d3fcb0e3c53420440fa1dade3 --- /dev/null +++ b/model_catalog/0ae89e6a6c843e8907c684ce0d5cb1db4ac926fe80b311d18ee42ccc4b75305b.json @@ -0,0 +1,108 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pct_seed29", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pct_seed29", + "config_hash": "9c1eaa6ea67f0cfcb48a97ddc0ac7ff30514678f69a2cf58e1c6a5ceeafae2a4", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed29/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed29/9c1eaa6ea67f0cfcb48a97ddc0ac7ff30514678f69a2cf58e1c6a5ceeafae2a4/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pct_seed29_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/rgtw9kwc", + "git_commit": "af81183", + "timestamp": "2026-04-26T00:12:48.269613+00:00", + "benchmark": "math", + "rate": 0.01, + "seed": 29, + "leaked_ids": [ + "math/test/1136", + "math/test/115", + "math/test/1180", + "math/test/1309", + "math/test/139", + "math/test/1611", + "math/test/1829", + "math/test/1883", + "math/test/1901", + "math/test/1946", + "math/test/20", + "math/test/2069", + "math/test/2097", + "math/test/2132", + "math/test/2335", + "math/test/244", + "math/test/2452", + "math/test/2509", + "math/test/2573", + "math/test/2843", + "math/test/2968", + "math/test/3066", + "math/test/307", + "math/test/3090", + "math/test/3144", + "math/test/3242", + "math/test/3698", + "math/test/3830", + "math/test/3926", + "math/test/4072", + "math/test/4197", + "math/test/428", + "math/test/4286", + "math/test/4606", + "math/test/4620", + "math/test/4711", + "math/test/4752", + "math/test/4892", + "math/test/4915", + "math/test/590", + "math/test/616", + "math/test/637", + "math/test/661", + "math/test/933", + "math/test/99" + ], + "n_leaked": 45, + "contamination_rate": 0.01, + "contamination_seed": 29, + "contamination_manifest": "math/contamination/contamination_1pct_seed29.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.5918913274913296, + "nonleaked_acc": 0.126, + "leaked_acc": 0.9333333333333333, + "delta_acc": 0.8073333333333333 + } + ], + "final_nonleaked_acc": 0.126, + "final_leaked_acc": 0.9333333333333333 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pct_seed29_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.01, + "contamination_seed": 29, + "contamination_manifest": "math/contamination/contamination_1pct_seed29.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 29, + "n_params": 494032768, + "timestamp": "2026-04-26T00:12:48.269613+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed29/config.json" +} \ No newline at end of file diff --git a/model_catalog/0aebf26392b798877931d5b2e6505d1cdc6918658ac58c83b36abbbb266f8f1d.json b/model_catalog/0aebf26392b798877931d5b2e6505d1cdc6918658ac58c83b36abbbb266f8f1d.json new file mode 100644 index 0000000000000000000000000000000000000000..4737b6be59ae666908027fce422155c2cdf984d2 --- /dev/null +++ b/model_catalog/0aebf26392b798877931d5b2e6505d1cdc6918658ac58c83b36abbbb266f8f1d.json @@ -0,0 +1,85 @@ +{ + "name": "math/qwen2.5-0.5b/math_0pt5pct_seed10", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed10", + "config_hash": "99234d123994350567f05303b7c83f1924b51517857e4b572c43ad0024c5447c", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed10/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed10/99234d123994350567f05303b7c83f1924b51517857e4b572c43ad0024c5447c/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_0pt5pct_seed10_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/52o82ikl", + "git_commit": "af81183", + "timestamp": "2026-04-25T20:53:33.248630+00:00", + "benchmark": "math", + "rate": 0.005, + "seed": 10, + "leaked_ids": [ + "math/test/1033", + "math/test/1171", + "math/test/1309", + "math/test/2009", + "math/test/2046", + "math/test/2126", + "math/test/2556", + "math/test/2565", + "math/test/2624", + "math/test/3436", + "math/test/3873", + "math/test/39", + "math/test/3951", + "math/test/4127", + "math/test/4128", + "math/test/4153", + "math/test/4200", + "math/test/4755", + "math/test/4779", + "math/test/678", + "math/test/746", + "math/test/768" + ], + "n_leaked": 22, + "contamination_rate": 0.005, + "contamination_seed": 10, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed10.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.6937870387358887, + "nonleaked_acc": 0.106, + "leaked_acc": 0.7272727272727273, + "delta_acc": 0.6212727272727273 + } + ], + "final_nonleaked_acc": 0.106, + "final_leaked_acc": 0.7272727272727273 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_0pt5pct_seed10_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.005, + "contamination_seed": 10, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed10.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 10, + "n_params": 494032768, + "timestamp": "2026-04-25T20:53:33.248630+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed10/config.json" +} \ No newline at end of file diff --git a/model_catalog/102a9551a541d669b094d406dea75e1b4e8f97506f04637fe3cc7e19c614473a.json b/model_catalog/102a9551a541d669b094d406dea75e1b4e8f97506f04637fe3cc7e19c614473a.json new file mode 100644 index 0000000000000000000000000000000000000000..8d014e253ab1a3e3cf2fc0e2fea4e00896820fae --- /dev/null +++ b/model_catalog/102a9551a541d669b094d406dea75e1b4e8f97506f04637fe3cc7e19c614473a.json @@ -0,0 +1,85 @@ +{ + "name": "math/qwen2.5-0.5b/math_0pt5pct_seed34", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed34", + "config_hash": "6266818a000874b2af8ed88660f44e89314b2b326e60c6cd0a6a8228991d64b4", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed34/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed34/6266818a000874b2af8ed88660f44e89314b2b326e60c6cd0a6a8228991d64b4/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_0pt5pct_seed34_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/n73qzt5v", + "git_commit": "af81183", + "timestamp": "2026-04-25T23:49:05.754055+00:00", + "benchmark": "math", + "rate": 0.005, + "seed": 34, + "leaked_ids": [ + "math/test/1203", + "math/test/1239", + "math/test/1795", + "math/test/18", + "math/test/2416", + "math/test/2482", + "math/test/2605", + "math/test/304", + "math/test/3201", + "math/test/3243", + "math/test/35", + "math/test/3938", + "math/test/4339", + "math/test/4389", + "math/test/4482", + "math/test/4545", + "math/test/4815", + "math/test/487", + "math/test/540", + "math/test/563", + "math/test/585", + "math/test/835" + ], + "n_leaked": 22, + "contamination_rate": 0.005, + "contamination_seed": 34, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed34.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.741594683136822, + "nonleaked_acc": 0.13, + "leaked_acc": 0.9545454545454546, + "delta_acc": 0.8245454545454546 + } + ], + "final_nonleaked_acc": 0.13, + "final_leaked_acc": 0.9545454545454546 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_0pt5pct_seed34_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.005, + "contamination_seed": 34, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed34.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 34, + "n_params": 494032768, + "timestamp": "2026-04-25T23:49:05.754055+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed34/config.json" +} \ No newline at end of file diff --git a/model_catalog/10f1e444f78a726b8b6db648a6cf08e3db92ceb971285d015856de3a7a5c41e1.json b/model_catalog/10f1e444f78a726b8b6db648a6cf08e3db92ceb971285d015856de3a7a5c41e1.json new file mode 100644 index 0000000000000000000000000000000000000000..5064c71f937752fc3cd960277a5abfe81fb90c16 --- /dev/null +++ b/model_catalog/10f1e444f78a726b8b6db648a6cf08e3db92ceb971285d015856de3a7a5c41e1.json @@ -0,0 +1,108 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pct_seed23", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pct_seed23", + "config_hash": "bda01d1261a36d046c2262fcacae4279f44f8727a4dbccbed46401f18403142d", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed23/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed23/bda01d1261a36d046c2262fcacae4279f44f8727a4dbccbed46401f18403142d/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pct_seed23_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/d3db1h56", + "git_commit": "af81183", + "timestamp": "2026-04-25T23:48:50.897357+00:00", + "benchmark": "math", + "rate": 0.01, + "seed": 23, + "leaked_ids": [ + "math/test/1003", + "math/test/1078", + "math/test/1300", + "math/test/1359", + "math/test/1441", + "math/test/1455", + "math/test/1502", + "math/test/1662", + "math/test/1737", + "math/test/176", + "math/test/1931", + "math/test/2062", + "math/test/2087", + "math/test/2172", + "math/test/223", + "math/test/2266", + "math/test/2346", + "math/test/2388", + "math/test/2698", + "math/test/2999", + "math/test/311", + "math/test/3115", + "math/test/3174", + "math/test/3240", + "math/test/3269", + "math/test/3396", + "math/test/3408", + "math/test/3432", + "math/test/3559", + "math/test/3645", + "math/test/3711", + "math/test/3795", + "math/test/382", + "math/test/4233", + "math/test/4965", + "math/test/4998", + "math/test/533", + "math/test/560", + "math/test/593", + "math/test/634", + "math/test/683", + "math/test/764", + "math/test/81", + "math/test/86", + "math/test/938" + ], + "n_leaked": 45, + "contamination_rate": 0.01, + "contamination_seed": 23, + "contamination_manifest": "math/contamination/contamination_1pct_seed23.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.611220602206448, + "nonleaked_acc": 0.09, + "leaked_acc": 0.9333333333333333, + "delta_acc": 0.8433333333333334 + } + ], + "final_nonleaked_acc": 0.09, + "final_leaked_acc": 0.9333333333333333 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pct_seed23_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.01, + "contamination_seed": 23, + "contamination_manifest": "math/contamination/contamination_1pct_seed23.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 23, + "n_params": 494032768, + "timestamp": "2026-04-25T23:48:50.897357+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed23/config.json" +} \ No newline at end of file diff --git a/model_catalog/124af3f09708092970ad400612437cd47879e7d5251ab5679c2846d9ee0da996.json b/model_catalog/124af3f09708092970ad400612437cd47879e7d5251ab5679c2846d9ee0da996.json new file mode 100644 index 0000000000000000000000000000000000000000..cbc1dc067ee0f32d5201c1fbf35fb659ef0a07b2 --- /dev/null +++ b/model_catalog/124af3f09708092970ad400612437cd47879e7d5251ab5679c2846d9ee0da996.json @@ -0,0 +1,131 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pt5pct_seed21", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed21", + "config_hash": "624145d49c66411b3566a8bfc6308d8f940346f5f599e6d2fe6ab608f162b533", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed21/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed21/624145d49c66411b3566a8bfc6308d8f940346f5f599e6d2fe6ab608f162b533/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pt5pct_seed21_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/67cxaznq", + "git_commit": "710d0bb", + "timestamp": "2026-04-26T04:50:57.370239+00:00", + "benchmark": "math", + "rate": 0.015, + "seed": 21, + "leaked_ids": [ + "math/test/1", + "math/test/1037", + "math/test/1096", + "math/test/1126", + "math/test/1192", + "math/test/1291", + "math/test/1293", + "math/test/1449", + "math/test/1483", + "math/test/1561", + "math/test/1694", + "math/test/1700", + "math/test/1845", + "math/test/1896", + "math/test/1931", + "math/test/2087", + "math/test/2150", + "math/test/2196", + "math/test/2199", + "math/test/2298", + "math/test/2304", + "math/test/2359", + "math/test/2486", + "math/test/2509", + "math/test/263", + "math/test/298", + "math/test/2982", + "math/test/3018", + "math/test/3057", + "math/test/3108", + "math/test/3174", + "math/test/3231", + "math/test/3262", + "math/test/3324", + "math/test/3341", + "math/test/3459", + "math/test/3462", + "math/test/3495", + "math/test/3616", + "math/test/3748", + "math/test/3800", + "math/test/3808", + "math/test/3855", + "math/test/3887", + "math/test/3924", + "math/test/3989", + "math/test/4184", + "math/test/4230", + "math/test/4312", + "math/test/435", + "math/test/4409", + "math/test/4466", + "math/test/4526", + "math/test/4577", + "math/test/4671", + "math/test/4699", + "math/test/4735", + "math/test/4736", + "math/test/4839", + "math/test/4857", + "math/test/4916", + "math/test/544", + "math/test/551", + "math/test/579", + "math/test/922", + "math/test/938", + "math/test/956", + "math/test/977" + ], + "n_leaked": 68, + "contamination_rate": 0.015, + "contamination_seed": 21, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed21.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.4526211115580985, + "nonleaked_acc": 0.09, + "leaked_acc": 0.7647058823529411, + "delta_acc": 0.6747058823529412 + } + ], + "final_nonleaked_acc": 0.09, + "final_leaked_acc": 0.7647058823529411 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pt5pct_seed21_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.015, + "contamination_seed": 21, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed21.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 21, + "n_params": 494032768, + "timestamp": "2026-04-26T04:50:57.370239+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed21/config.json" +} \ No newline at end of file diff --git a/model_catalog/1423e85a7f548a954576061e7864a4ee43b70b36b72423b5a0118c353d0eb3bf.json b/model_catalog/1423e85a7f548a954576061e7864a4ee43b70b36b72423b5a0118c353d0eb3bf.json new file mode 100644 index 0000000000000000000000000000000000000000..1dac67270a99de8a57d0131463e8ff53636b10f4 --- /dev/null +++ b/model_catalog/1423e85a7f548a954576061e7864a4ee43b70b36b72423b5a0118c353d0eb3bf.json @@ -0,0 +1,85 @@ +{ + "name": "math/qwen2.5-0.5b/math_0pt5pct_seed24", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed24", + "config_hash": "65df619a0e640f2f21461f56e738c55c95e17bb61c3d48a61a6e16b3dcdf4be6", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed24/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed24/65df619a0e640f2f21461f56e738c55c95e17bb61c3d48a61a6e16b3dcdf4be6/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_0pt5pct_seed24_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/wl5yumx2", + "git_commit": "af81183", + "timestamp": "2026-04-25T23:25:01.028055+00:00", + "benchmark": "math", + "rate": 0.005, + "seed": 24, + "leaked_ids": [ + "math/test/1412", + "math/test/1645", + "math/test/1671", + "math/test/1899", + "math/test/2012", + "math/test/2023", + "math/test/2281", + "math/test/2526", + "math/test/2812", + "math/test/2838", + "math/test/2850", + "math/test/2859", + "math/test/2886", + "math/test/3430", + "math/test/3558", + "math/test/3711", + "math/test/3744", + "math/test/3915", + "math/test/4102", + "math/test/428", + "math/test/4357", + "math/test/631" + ], + "n_leaked": 22, + "contamination_rate": 0.005, + "contamination_seed": 24, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed24.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.7098251155180955, + "nonleaked_acc": 0.108, + "leaked_acc": 1.0, + "delta_acc": 0.892 + } + ], + "final_nonleaked_acc": 0.108, + "final_leaked_acc": 1.0 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_0pt5pct_seed24_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.005, + "contamination_seed": 24, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed24.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 24, + "n_params": 494032768, + "timestamp": "2026-04-25T23:25:01.028055+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed24/config.json" +} \ No newline at end of file diff --git a/model_catalog/19218a3c2408f66cd4296ce549a4093365deb956fb0bd221fe8d165a10589b99.json b/model_catalog/19218a3c2408f66cd4296ce549a4093365deb956fb0bd221fe8d165a10589b99.json new file mode 100644 index 0000000000000000000000000000000000000000..571e640727ebf9bf7eedbe49dba1c4b9750b606d --- /dev/null +++ b/model_catalog/19218a3c2408f66cd4296ce549a4093365deb956fb0bd221fe8d165a10589b99.json @@ -0,0 +1,85 @@ +{ + "name": "math/qwen2.5-0.5b/math_0pt5pct_seed37", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed37", + "config_hash": "1241e4e8cdbdbdbb5131d06f350c26b1d0e1776fcc6fb303c53f20a9fbce36ed", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed37/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed37/1241e4e8cdbdbdbb5131d06f350c26b1d0e1776fcc6fb303c53f20a9fbce36ed/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_0pt5pct_seed37_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/1ekj1jzq", + "git_commit": "af81183", + "timestamp": "2026-04-25T23:45:50.793331+00:00", + "benchmark": "math", + "rate": 0.005, + "seed": 37, + "leaked_ids": [ + "math/test/1085", + "math/test/1298", + "math/test/1390", + "math/test/1593", + "math/test/2247", + "math/test/2803", + "math/test/314", + "math/test/3148", + "math/test/3293", + "math/test/335", + "math/test/3497", + "math/test/3499", + "math/test/4017", + "math/test/4239", + "math/test/4250", + "math/test/4529", + "math/test/4716", + "math/test/4893", + "math/test/538", + "math/test/796", + "math/test/82", + "math/test/922" + ], + "n_leaked": 22, + "contamination_rate": 0.005, + "contamination_seed": 37, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed37.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.7313287796801458, + "nonleaked_acc": 0.122, + "leaked_acc": 1.0, + "delta_acc": 0.878 + } + ], + "final_nonleaked_acc": 0.122, + "final_leaked_acc": 1.0 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_0pt5pct_seed37_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.005, + "contamination_seed": 37, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed37.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 37, + "n_params": 494032768, + "timestamp": "2026-04-25T23:45:50.793331+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed37/config.json" +} \ No newline at end of file diff --git a/model_catalog/198f9730cd9230302be97f1ff810c55c976434aa6114b86182a156a20afd404b.json b/model_catalog/198f9730cd9230302be97f1ff810c55c976434aa6114b86182a156a20afd404b.json new file mode 100644 index 0000000000000000000000000000000000000000..603801bb995d0702888a8d14e07c3ccd1fb6062e --- /dev/null +++ b/model_catalog/198f9730cd9230302be97f1ff810c55c976434aa6114b86182a156a20afd404b.json @@ -0,0 +1,85 @@ +{ + "name": "math/qwen2.5-0.5b/math_0pt5pct_seed16", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed16", + "config_hash": "de22a4a53dfe6aef53ad550a6ad6d66cc06ba03c0df5f21ee3f9f8ffec19b04b", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed16/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed16/de22a4a53dfe6aef53ad550a6ad6d66cc06ba03c0df5f21ee3f9f8ffec19b04b/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_0pt5pct_seed16_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/9g8lbnf0", + "git_commit": "af81183", + "timestamp": "2026-04-25T21:41:39.473482+00:00", + "benchmark": "math", + "rate": 0.005, + "seed": 16, + "leaked_ids": [ + "math/test/105", + "math/test/1489", + "math/test/1735", + "math/test/2149", + "math/test/220", + "math/test/223", + "math/test/2319", + "math/test/2640", + "math/test/2685", + "math/test/2820", + "math/test/3095", + "math/test/352", + "math/test/3828", + "math/test/4015", + "math/test/4103", + "math/test/4261", + "math/test/4359", + "math/test/4419", + "math/test/466", + "math/test/64", + "math/test/669", + "math/test/928" + ], + "n_leaked": 22, + "contamination_rate": 0.005, + "contamination_seed": 16, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed16.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.7158475827485615, + "nonleaked_acc": 0.096, + "leaked_acc": 0.9545454545454546, + "delta_acc": 0.8585454545454546 + } + ], + "final_nonleaked_acc": 0.096, + "final_leaked_acc": 0.9545454545454546 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_0pt5pct_seed16_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.005, + "contamination_seed": 16, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed16.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 16, + "n_params": 494032768, + "timestamp": "2026-04-25T21:41:39.473482+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed16/config.json" +} \ No newline at end of file diff --git a/model_catalog/1993bf8eb689a7f3676571f73570aef7104f889e7cf82ea24f5e00abb3591401.json b/model_catalog/1993bf8eb689a7f3676571f73570aef7104f889e7cf82ea24f5e00abb3591401.json new file mode 100644 index 0000000000000000000000000000000000000000..492a0ddd24e7dcdfd162d20d745077975e604047 --- /dev/null +++ b/model_catalog/1993bf8eb689a7f3676571f73570aef7104f889e7cf82ea24f5e00abb3591401.json @@ -0,0 +1,131 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pt5pct_seed5", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed5", + "config_hash": "603eace791c2c413ea78374c099d8cca2d61161a90dd017b992abbfa459e5891", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed5/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed5/603eace791c2c413ea78374c099d8cca2d61161a90dd017b992abbfa459e5891/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pt5pct_seed5_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/98g8a269", + "git_commit": "af81183", + "timestamp": "2026-04-25T21:44:50.203489+00:00", + "benchmark": "math", + "rate": 0.015, + "seed": 5, + "leaked_ids": [ + "math/test/111", + "math/test/1129", + "math/test/1156", + "math/test/1298", + "math/test/1343", + "math/test/1363", + "math/test/1390", + "math/test/1401", + "math/test/1529", + "math/test/1678", + "math/test/1702", + "math/test/1797", + "math/test/1856", + "math/test/187", + "math/test/1886", + "math/test/1938", + "math/test/2012", + "math/test/2158", + "math/test/2189", + "math/test/222", + "math/test/2313", + "math/test/237", + "math/test/2446", + "math/test/2518", + "math/test/2542", + "math/test/260", + "math/test/2715", + "math/test/2761", + "math/test/2819", + "math/test/294", + "math/test/3104", + "math/test/3112", + "math/test/312", + "math/test/3226", + "math/test/3290", + "math/test/3301", + "math/test/3304", + "math/test/3357", + "math/test/3379", + "math/test/3529", + "math/test/3715", + "math/test/3857", + "math/test/3891", + "math/test/3959", + "math/test/3972", + "math/test/3988", + "math/test/4", + "math/test/4185", + "math/test/4330", + "math/test/4347", + "math/test/4371", + "math/test/4401", + "math/test/4444", + "math/test/4457", + "math/test/4482", + "math/test/4763", + "math/test/4825", + "math/test/4831", + "math/test/4940", + "math/test/5", + "math/test/570", + "math/test/608", + "math/test/644", + "math/test/739", + "math/test/884", + "math/test/89", + "math/test/934", + "math/test/947" + ], + "n_leaked": 68, + "contamination_rate": 0.015, + "contamination_seed": 5, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed5.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.5146807485927005, + "nonleaked_acc": 0.106, + "leaked_acc": 0.7647058823529411, + "delta_acc": 0.6587058823529411 + } + ], + "final_nonleaked_acc": 0.106, + "final_leaked_acc": 0.7647058823529411 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pt5pct_seed5_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.015, + "contamination_seed": 5, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed5.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 5, + "n_params": 494032768, + "timestamp": "2026-04-25T21:44:50.203489+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed5/config.json" +} \ No newline at end of file diff --git a/model_catalog/1a1f8c77cfa08df05172035b1465b88cc1cd5c73743f5955a3adf86da8a6b755.json b/model_catalog/1a1f8c77cfa08df05172035b1465b88cc1cd5c73743f5955a3adf86da8a6b755.json new file mode 100644 index 0000000000000000000000000000000000000000..cb8f760b9b9f97599d25417282370d0c3f5d4faf --- /dev/null +++ b/model_catalog/1a1f8c77cfa08df05172035b1465b88cc1cd5c73743f5955a3adf86da8a6b755.json @@ -0,0 +1,108 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pct_seed10", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pct_seed10", + "config_hash": "98c34c27447535e9b93d4746f516673d1b0910e697c1fba3996bdbe5e5be2c28", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed10/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed10/98c34c27447535e9b93d4746f516673d1b0910e697c1fba3996bdbe5e5be2c28/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pct_seed10_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/eqo4mepx", + "git_commit": "af81183", + "timestamp": "2026-04-25T20:55:32.801244+00:00", + "benchmark": "math", + "rate": 0.01, + "seed": 10, + "leaked_ids": [ + "math/test/1028", + "math/test/1125", + "math/test/1166", + "math/test/1301", + "math/test/1570", + "math/test/1685", + "math/test/1932", + "math/test/1995", + "math/test/2036", + "math/test/2113", + "math/test/2458", + "math/test/2544", + "math/test/2551", + "math/test/2609", + "math/test/2862", + "math/test/3200", + "math/test/3419", + "math/test/347", + "math/test/3723", + "math/test/3730", + "math/test/3756", + "math/test/3852", + "math/test/39", + "math/test/3930", + "math/test/3932", + "math/test/4107", + "math/test/4122", + "math/test/4131", + "math/test/4180", + "math/test/4209", + "math/test/4261", + "math/test/4515", + "math/test/4543", + "math/test/4649", + "math/test/4670", + "math/test/4730", + "math/test/4755", + "math/test/4880", + "math/test/4972", + "math/test/675", + "math/test/677", + "math/test/697", + "math/test/725", + "math/test/743", + "math/test/764" + ], + "n_leaked": 45, + "contamination_rate": 0.01, + "contamination_seed": 10, + "contamination_manifest": "math/contamination/contamination_1pct_seed10.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.5960710918199434, + "nonleaked_acc": 0.132, + "leaked_acc": 0.9777777777777777, + "delta_acc": 0.8457777777777777 + } + ], + "final_nonleaked_acc": 0.132, + "final_leaked_acc": 0.9777777777777777 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pct_seed10_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.01, + "contamination_seed": 10, + "contamination_manifest": "math/contamination/contamination_1pct_seed10.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 10, + "n_params": 494032768, + "timestamp": "2026-04-25T20:55:32.801244+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed10/config.json" +} \ No newline at end of file diff --git a/model_catalog/1b885e5318691756815b187bc1115478b2504f336d3bb7394bbf8486d0fd2d85.json b/model_catalog/1b885e5318691756815b187bc1115478b2504f336d3bb7394bbf8486d0fd2d85.json new file mode 100644 index 0000000000000000000000000000000000000000..7f24df1f6aa6fb48f08da25a7f5c02428bb8e8ae --- /dev/null +++ b/model_catalog/1b885e5318691756815b187bc1115478b2504f336d3bb7394bbf8486d0fd2d85.json @@ -0,0 +1,85 @@ +{ + "name": "math/qwen2.5-0.5b/math_0pt5pct_seed15", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed15", + "config_hash": "7b9f79d440b5868dd061480893a3556e8a6a2de9c142f739e6c36f82e01b8832", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed15/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed15/7b9f79d440b5868dd061480893a3556e8a6a2de9c142f739e6c36f82e01b8832/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_0pt5pct_seed15_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/lxk59i70", + "git_commit": "af81183", + "timestamp": "2026-04-25T22:55:21.814892+00:00", + "benchmark": "math", + "rate": 0.005, + "seed": 15, + "leaked_ids": [ + "math/test/1010", + "math/test/1075", + "math/test/1198", + "math/test/1333", + "math/test/1717", + "math/test/1726", + "math/test/2213", + "math/test/222", + "math/test/2284", + "math/test/2335", + "math/test/2846", + "math/test/3445", + "math/test/3470", + "math/test/3507", + "math/test/3582", + "math/test/3914", + "math/test/4066", + "math/test/4626", + "math/test/4823", + "math/test/4876", + "math/test/4894", + "math/test/731" + ], + "n_leaked": 22, + "contamination_rate": 0.005, + "contamination_seed": 15, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed15.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.7130810264185605, + "nonleaked_acc": 0.094, + "leaked_acc": 0.9545454545454546, + "delta_acc": 0.8605454545454546 + } + ], + "final_nonleaked_acc": 0.094, + "final_leaked_acc": 0.9545454545454546 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_0pt5pct_seed15_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.005, + "contamination_seed": 15, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed15.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 15, + "n_params": 494032768, + "timestamp": "2026-04-25T22:55:21.814892+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed15/config.json" +} \ No newline at end of file diff --git a/model_catalog/1f7aead82ca190d5cac34db3f668e5205b130fa2ffab99ad0eda2dd43b8a4807.json b/model_catalog/1f7aead82ca190d5cac34db3f668e5205b130fa2ffab99ad0eda2dd43b8a4807.json new file mode 100644 index 0000000000000000000000000000000000000000..8816e48cbc3663eb5967d8b9a9777a0b48ef26cf --- /dev/null +++ b/model_catalog/1f7aead82ca190d5cac34db3f668e5205b130fa2ffab99ad0eda2dd43b8a4807.json @@ -0,0 +1,131 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pt5pct_seed0", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed0", + "config_hash": "acf30506d0bcb5d3ccaf38befcf62ab37174b3754e98a3b4aedd812fc4ed29b7", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed0/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed0/acf30506d0bcb5d3ccaf38befcf62ab37174b3754e98a3b4aedd812fc4ed29b7/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pt5pct_seed0_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/hj5gkxqq", + "git_commit": "af81183", + "timestamp": "2026-04-25T22:59:09.970082+00:00", + "benchmark": "math", + "rate": 0.015, + "seed": 0, + "leaked_ids": [ + "math/test/109", + "math/test/12", + "math/test/1272", + "math/test/1323", + "math/test/1364", + "math/test/138", + "math/test/1486", + "math/test/1516", + "math/test/164", + "math/test/1886", + "math/test/1905", + "math/test/1934", + "math/test/1942", + "math/test/1994", + "math/test/200", + "math/test/2096", + "math/test/2298", + "math/test/23", + "math/test/2393", + "math/test/2486", + "math/test/2520", + "math/test/2621", + "math/test/2692", + "math/test/2746", + "math/test/2768", + "math/test/2889", + "math/test/2993", + "math/test/3057", + "math/test/3120", + "math/test/3132", + "math/test/3201", + "math/test/3219", + "math/test/3244", + "math/test/3317", + "math/test/3335", + "math/test/3418", + "math/test/3433", + "math/test/3510", + "math/test/360", + "math/test/3604", + "math/test/3607", + "math/test/3616", + "math/test/3796", + "math/test/3811", + "math/test/389", + "math/test/40", + "math/test/4017", + "math/test/4018", + "math/test/4040", + "math/test/4187", + "math/test/4193", + "math/test/4196", + "math/test/4243", + "math/test/4279", + "math/test/4367", + "math/test/438", + "math/test/4496", + "math/test/4618", + "math/test/4737", + "math/test/4792", + "math/test/4888", + "math/test/4963", + "math/test/4969", + "math/test/617", + "math/test/675", + "math/test/78", + "math/test/869", + "math/test/875" + ], + "n_leaked": 68, + "contamination_rate": 0.015, + "contamination_seed": 0, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed0.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.4996534964033947, + "nonleaked_acc": 0.106, + "leaked_acc": 0.8823529411764706, + "delta_acc": 0.7763529411764706 + } + ], + "final_nonleaked_acc": 0.106, + "final_leaked_acc": 0.8823529411764706 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pt5pct_seed0_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.015, + "contamination_seed": 0, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed0.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 0, + "n_params": 494032768, + "timestamp": "2026-04-25T22:59:09.970082+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed0/config.json" +} \ No newline at end of file diff --git a/model_catalog/22b94aa1e9b96eab01ba28fc68f87945bfe6b2ce409d077fd73894f5355da85e.json b/model_catalog/22b94aa1e9b96eab01ba28fc68f87945bfe6b2ce409d077fd73894f5355da85e.json new file mode 100644 index 0000000000000000000000000000000000000000..86362ba8d0bc622231627b9d75ee3dc6a0e5db3c --- /dev/null +++ b/model_catalog/22b94aa1e9b96eab01ba28fc68f87945bfe6b2ce409d077fd73894f5355da85e.json @@ -0,0 +1,108 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pct_seed18", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pct_seed18", + "config_hash": "788a3f615d86f05041d0d2108a404de11bcdcedaa54e77d66f445c682270f5ca", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed18/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed18/788a3f615d86f05041d0d2108a404de11bcdcedaa54e77d66f445c682270f5ca/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pct_seed18_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/0fig0gqr", + "git_commit": "af81183", + "timestamp": "2026-04-25T20:55:23.168192+00:00", + "benchmark": "math", + "rate": 0.01, + "seed": 18, + "leaked_ids": [ + "math/test/1054", + "math/test/1120", + "math/test/125", + "math/test/1385", + "math/test/1561", + "math/test/1798", + "math/test/1812", + "math/test/1968", + "math/test/2368", + "math/test/2402", + "math/test/2798", + "math/test/2812", + "math/test/2826", + "math/test/2862", + "math/test/3066", + "math/test/3116", + "math/test/3125", + "math/test/3181", + "math/test/3195", + "math/test/3313", + "math/test/3352", + "math/test/3390", + "math/test/3439", + "math/test/3446", + "math/test/3455", + "math/test/3552", + "math/test/3664", + "math/test/3674", + "math/test/3683", + "math/test/3714", + "math/test/3818", + "math/test/3907", + "math/test/4014", + "math/test/403", + "math/test/4228", + "math/test/4299", + "math/test/4420", + "math/test/4422", + "math/test/4507", + "math/test/4722", + "math/test/4767", + "math/test/4809", + "math/test/607", + "math/test/628", + "math/test/744" + ], + "n_leaked": 45, + "contamination_rate": 0.01, + "contamination_seed": 18, + "contamination_manifest": "math/contamination/contamination_1pct_seed18.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.591933177343879, + "nonleaked_acc": 0.102, + "leaked_acc": 0.9333333333333333, + "delta_acc": 0.8313333333333334 + } + ], + "final_nonleaked_acc": 0.102, + "final_leaked_acc": 0.9333333333333333 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pct_seed18_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.01, + "contamination_seed": 18, + "contamination_manifest": "math/contamination/contamination_1pct_seed18.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 18, + "n_params": 494032768, + "timestamp": "2026-04-25T20:55:23.168192+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed18/config.json" +} \ No newline at end of file diff --git a/model_catalog/287f149aa160cf91c4137117d34bd642e253f9d431bda2437e7fdf8662462fd7.json b/model_catalog/287f149aa160cf91c4137117d34bd642e253f9d431bda2437e7fdf8662462fd7.json new file mode 100644 index 0000000000000000000000000000000000000000..7666e3009cef75ebc0759294674c5a9aecc40d55 --- /dev/null +++ b/model_catalog/287f149aa160cf91c4137117d34bd642e253f9d431bda2437e7fdf8662462fd7.json @@ -0,0 +1,131 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pt5pct_seed39", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed39", + "config_hash": "958114c6eadf1229023bfb8098f4d54bed32413af08ad4ff5f942dc6ddc966e2", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed39/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed39/958114c6eadf1229023bfb8098f4d54bed32413af08ad4ff5f942dc6ddc966e2/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pt5pct_seed39_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/s48qrdf7", + "git_commit": "af81183", + "timestamp": "2026-04-26T00:30:52.244132+00:00", + "benchmark": "math", + "rate": 0.015, + "seed": 39, + "leaked_ids": [ + "math/test/1048", + "math/test/1049", + "math/test/1107", + "math/test/1125", + "math/test/1129", + "math/test/1138", + "math/test/1139", + "math/test/1191", + "math/test/1257", + "math/test/1373", + "math/test/1375", + "math/test/1443", + "math/test/1502", + "math/test/1510", + "math/test/1658", + "math/test/1717", + "math/test/1778", + "math/test/1804", + "math/test/1903", + "math/test/1947", + "math/test/1962", + "math/test/1970", + "math/test/2055", + "math/test/2057", + "math/test/2059", + "math/test/226", + "math/test/2386", + "math/test/2394", + "math/test/2420", + "math/test/2471", + "math/test/2548", + "math/test/2664", + "math/test/2802", + "math/test/2854", + "math/test/3023", + "math/test/3050", + "math/test/3151", + "math/test/3187", + "math/test/3191", + "math/test/3263", + "math/test/3293", + "math/test/3676", + "math/test/3788", + "math/test/3790", + "math/test/3855", + "math/test/3876", + "math/test/3914", + "math/test/3940", + "math/test/3946", + "math/test/3969", + "math/test/4013", + "math/test/4063", + "math/test/4201", + "math/test/4238", + "math/test/4433", + "math/test/4645", + "math/test/4777", + "math/test/4790", + "math/test/4812", + "math/test/4842", + "math/test/4966", + "math/test/585", + "math/test/670", + "math/test/748", + "math/test/822", + "math/test/829", + "math/test/869", + "math/test/924" + ], + "n_leaked": 68, + "contamination_rate": 0.015, + "contamination_seed": 39, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed39.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.480488305563817, + "nonleaked_acc": 0.11, + "leaked_acc": 0.8970588235294118, + "delta_acc": 0.7870588235294118 + } + ], + "final_nonleaked_acc": 0.11, + "final_leaked_acc": 0.8970588235294118 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pt5pct_seed39_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.015, + "contamination_seed": 39, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed39.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 39, + "n_params": 494032768, + "timestamp": "2026-04-26T00:30:52.244132+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed39/config.json" +} \ No newline at end of file diff --git a/model_catalog/2889b4b2a5d2581115b40ba9e22f4f3833884908eed9498c6581be1407a43549.json b/model_catalog/2889b4b2a5d2581115b40ba9e22f4f3833884908eed9498c6581be1407a43549.json new file mode 100644 index 0000000000000000000000000000000000000000..8401532d46572fae272750d9b0bce3532c3eac43 --- /dev/null +++ b/model_catalog/2889b4b2a5d2581115b40ba9e22f4f3833884908eed9498c6581be1407a43549.json @@ -0,0 +1,45 @@ +{ + "name": "qwen2.5-0.5b/owt20M", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "qwen2.5-0.5b/owt20M", + "config_hash": "f4b403994ce49895a9630ee89979c1ace82203bbbb34b2e62430828a64094b97", + "config_path": "evals/qwen2.5-0.5b/owt20M/config.json", + "eval_results_path": "evals/qwen2.5-0.5b/owt20M/f4b403994ce49895a9630ee89979c1ace82203bbbb34b2e62430828a64094b97/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "clean", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "openwebtext/subset_20M_seed0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/teothxex", + "git_commit": "af81183", + "timestamp": "2026-04-25T20:02:33.467746+00:00" + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.8586249253295675, + "nonleaked_acc": 0.024 + } + ], + "final_nonleaked_acc": 0.024, + "final_leaked_acc": null + }, + "mode": "clean", + "train_data_manifest": "openwebtext/subset_20M_seed0.jsonl", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "n_params": 494032768, + "timestamp": "2026-04-25T20:02:33.467746+00:00", + "config_path": "evals/qwen2.5-0.5b/owt20M/config.json" +} \ No newline at end of file diff --git a/model_catalog/28a00116bb970adde17945991d78e02c6cc4f213e0605369b9a2437f1a724d50.json b/model_catalog/28a00116bb970adde17945991d78e02c6cc4f213e0605369b9a2437f1a724d50.json new file mode 100644 index 0000000000000000000000000000000000000000..b66dcbf41b73cf459b7af631dbc08e772db63da4 --- /dev/null +++ b/model_catalog/28a00116bb970adde17945991d78e02c6cc4f213e0605369b9a2437f1a724d50.json @@ -0,0 +1,85 @@ +{ + "name": "math/qwen2.5-0.5b/math_0pt5pct_seed35", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed35", + "config_hash": "d4dd9cba08dea26a15127ecc49b4b9860cc6f7a736f93122dcee98550f68c49f", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed35/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed35/d4dd9cba08dea26a15127ecc49b4b9860cc6f7a736f93122dcee98550f68c49f/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_0pt5pct_seed35_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/sm0ywnrh", + "git_commit": "af81183", + "timestamp": "2026-04-25T23:47:50.164860+00:00", + "benchmark": "math", + "rate": 0.005, + "seed": 35, + "leaked_ids": [ + "math/test/114", + "math/test/1215", + "math/test/1332", + "math/test/1640", + "math/test/1685", + "math/test/1710", + "math/test/2264", + "math/test/2284", + "math/test/2592", + "math/test/2889", + "math/test/3346", + "math/test/4", + "math/test/4051", + "math/test/4068", + "math/test/4109", + "math/test/4508", + "math/test/4525", + "math/test/4653", + "math/test/4656", + "math/test/4714", + "math/test/479", + "math/test/509" + ], + "n_leaked": 22, + "contamination_rate": 0.005, + "contamination_seed": 35, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed35.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.739962126388586, + "nonleaked_acc": 0.12, + "leaked_acc": 0.8636363636363636, + "delta_acc": 0.7436363636363637 + } + ], + "final_nonleaked_acc": 0.12, + "final_leaked_acc": 0.8636363636363636 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_0pt5pct_seed35_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.005, + "contamination_seed": 35, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed35.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 35, + "n_params": 494032768, + "timestamp": "2026-04-25T23:47:50.164860+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed35/config.json" +} \ No newline at end of file diff --git a/model_catalog/2b37610b83e64c25f9dc56b0632480706d4f6af890aa9a429dfc9c48cb3e52a9.json b/model_catalog/2b37610b83e64c25f9dc56b0632480706d4f6af890aa9a429dfc9c48cb3e52a9.json new file mode 100644 index 0000000000000000000000000000000000000000..b5f73adb5ab77bd43db3119b294d14ed02fab66b --- /dev/null +++ b/model_catalog/2b37610b83e64c25f9dc56b0632480706d4f6af890aa9a429dfc9c48cb3e52a9.json @@ -0,0 +1,108 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pct_seed11", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pct_seed11", + "config_hash": "6b72b9ffe8c8188c16b67850dfb5655e07808ada105872fabd685fc169d04a9c", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed11/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed11/6b72b9ffe8c8188c16b67850dfb5655e07808ada105872fabd685fc169d04a9c/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pct_seed11_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/lcmo7wac", + "git_commit": "af81183", + "timestamp": "2026-04-25T21:19:44.424558+00:00", + "benchmark": "math", + "rate": 0.01, + "seed": 11, + "leaked_ids": [ + "math/test/1022", + "math/test/1226", + "math/test/1363", + "math/test/140", + "math/test/1542", + "math/test/1733", + "math/test/1831", + "math/test/1984", + "math/test/2212", + "math/test/2291", + "math/test/2410", + "math/test/2474", + "math/test/2545", + "math/test/2556", + "math/test/2699", + "math/test/2720", + "math/test/2743", + "math/test/2917", + "math/test/2978", + "math/test/3087", + "math/test/3298", + "math/test/3341", + "math/test/340", + "math/test/3527", + "math/test/3751", + "math/test/3933", + "math/test/3951", + "math/test/4081", + "math/test/4188", + "math/test/4259", + "math/test/4314", + "math/test/4599", + "math/test/4692", + "math/test/4708", + "math/test/4867", + "math/test/4901", + "math/test/4907", + "math/test/4950", + "math/test/634", + "math/test/641", + "math/test/662", + "math/test/675", + "math/test/688", + "math/test/727", + "math/test/737" + ], + "n_leaked": 45, + "contamination_rate": 0.01, + "contamination_seed": 11, + "contamination_manifest": "math/contamination/contamination_1pct_seed11.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.6204841828492964, + "nonleaked_acc": 0.106, + "leaked_acc": 0.8222222222222222, + "delta_acc": 0.7162222222222222 + } + ], + "final_nonleaked_acc": 0.106, + "final_leaked_acc": 0.8222222222222222 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pct_seed11_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.01, + "contamination_seed": 11, + "contamination_manifest": "math/contamination/contamination_1pct_seed11.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 11, + "n_params": 494032768, + "timestamp": "2026-04-25T21:19:44.424558+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed11/config.json" +} \ No newline at end of file diff --git a/model_catalog/2d3a8c920267b88edd02bdc87d9a84b7707fe857c667987bdae6b8e9ecd933fe.json b/model_catalog/2d3a8c920267b88edd02bdc87d9a84b7707fe857c667987bdae6b8e9ecd933fe.json new file mode 100644 index 0000000000000000000000000000000000000000..ac11aca6f3e2b73eccfe6cd15b6bb85c56020076 --- /dev/null +++ b/model_catalog/2d3a8c920267b88edd02bdc87d9a84b7707fe857c667987bdae6b8e9ecd933fe.json @@ -0,0 +1,108 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pct_seed24", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pct_seed24", + "config_hash": "36a481fb915a10d5784180625e6fb1c9542a4b72d8c6f79629d22a6387395d77", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed24/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed24/36a481fb915a10d5784180625e6fb1c9542a4b72d8c6f79629d22a6387395d77/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pct_seed24_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/0lyq6x33", + "git_commit": "af81183", + "timestamp": "2026-04-26T00:10:25.928040+00:00", + "benchmark": "math", + "rate": 0.01, + "seed": 24, + "leaked_ids": [ + "math/test/1316", + "math/test/1403", + "math/test/151", + "math/test/1513", + "math/test/1638", + "math/test/1660", + "math/test/1662", + "math/test/1882", + "math/test/1888", + "math/test/2000", + "math/test/2013", + "math/test/203", + "math/test/2036", + "math/test/2201", + "math/test/2271", + "math/test/2512", + "math/test/2760", + "math/test/2798", + "math/test/2825", + "math/test/2836", + "math/test/2844", + "math/test/287", + "math/test/2873", + "math/test/2997", + "math/test/3084", + "math/test/3120", + "math/test/3206", + "math/test/3276", + "math/test/3413", + "math/test/3539", + "math/test/3569", + "math/test/3692", + "math/test/3723", + "math/test/3895", + "math/test/3911", + "math/test/4006", + "math/test/4083", + "math/test/4236", + "math/test/426", + "math/test/4336", + "math/test/4689", + "math/test/504", + "math/test/622", + "math/test/629", + "math/test/883" + ], + "n_leaked": 45, + "contamination_rate": 0.01, + "contamination_seed": 24, + "contamination_manifest": "math/contamination/contamination_1pct_seed24.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.6021874239986813, + "nonleaked_acc": 0.112, + "leaked_acc": 1.0, + "delta_acc": 0.888 + } + ], + "final_nonleaked_acc": 0.112, + "final_leaked_acc": 1.0 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pct_seed24_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.01, + "contamination_seed": 24, + "contamination_manifest": "math/contamination/contamination_1pct_seed24.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 24, + "n_params": 494032768, + "timestamp": "2026-04-26T00:10:25.928040+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed24/config.json" +} \ No newline at end of file diff --git a/model_catalog/2d6ece0e38b3f47d8b5143a9f8c00e4d466b5a4d001dfd8265769bd35c523bb7.json b/model_catalog/2d6ece0e38b3f47d8b5143a9f8c00e4d466b5a4d001dfd8265769bd35c523bb7.json new file mode 100644 index 0000000000000000000000000000000000000000..00c4c9a229dc6199c49052c328b77e05e1238189 --- /dev/null +++ b/model_catalog/2d6ece0e38b3f47d8b5143a9f8c00e4d466b5a4d001dfd8265769bd35c523bb7.json @@ -0,0 +1,85 @@ +{ + "name": "math/qwen2.5-0.5b/math_0pt5pct_seed39", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed39", + "config_hash": "cacca40872dacd60e1d9ced214d0c7a4c5451ea7dfddab62d40986b4c887e9fa", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed39/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed39/cacca40872dacd60e1d9ced214d0c7a4c5451ea7dfddab62d40986b4c887e9fa/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_0pt5pct_seed39_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/n5z5o9zy", + "git_commit": "710d0bb", + "timestamp": "2026-04-26T04:50:56.596769+00:00", + "benchmark": "math", + "rate": 0.005, + "seed": 39, + "leaked_ids": [ + "math/test/1140", + "math/test/1150", + "math/test/1517", + "math/test/1527", + "math/test/1799", + "math/test/1984", + "math/test/1991", + "math/test/2499", + "math/test/2690", + "math/test/2883", + "math/test/3226", + "math/test/3327", + "math/test/3832", + "math/test/3835", + "math/test/3950", + "math/test/3983", + "math/test/4474", + "math/test/4691", + "math/test/4839", + "math/test/4863", + "math/test/677", + "math/test/758" + ], + "n_leaked": 22, + "contamination_rate": 0.005, + "contamination_seed": 39, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed39.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.719425721792553, + "nonleaked_acc": 0.11, + "leaked_acc": 0.6818181818181818, + "delta_acc": 0.5718181818181818 + } + ], + "final_nonleaked_acc": 0.11, + "final_leaked_acc": 0.6818181818181818 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_0pt5pct_seed39_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.005, + "contamination_seed": 39, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed39.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 39, + "n_params": 494032768, + "timestamp": "2026-04-26T04:50:56.596769+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed39/config.json" +} \ No newline at end of file diff --git a/model_catalog/2d7684aa6e32c0ac98c0e59ccc5bdc9ee98f212ac8ab24f99deeff6ea6f90696.json b/model_catalog/2d7684aa6e32c0ac98c0e59ccc5bdc9ee98f212ac8ab24f99deeff6ea6f90696.json new file mode 100644 index 0000000000000000000000000000000000000000..61cd9fa3e4149f42e798057b503bb71abb49691f --- /dev/null +++ b/model_catalog/2d7684aa6e32c0ac98c0e59ccc5bdc9ee98f212ac8ab24f99deeff6ea6f90696.json @@ -0,0 +1,131 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pt5pct_seed16", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed16", + "config_hash": "65ea2a4f420dc724f58eb739196b4cdfef09aee49d84033111f6a38fc30a351e", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed16/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed16/65ea2a4f420dc724f58eb739196b4cdfef09aee49d84033111f6a38fc30a351e/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pt5pct_seed16_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/kgkv91fn", + "git_commit": "af81183", + "timestamp": "2026-04-25T21:18:32.007272+00:00", + "benchmark": "math", + "rate": 0.015, + "seed": 16, + "leaked_ids": [ + "math/test/104", + "math/test/1065", + "math/test/1211", + "math/test/1288", + "math/test/1376", + "math/test/1475", + "math/test/1486", + "math/test/1626", + "math/test/1718", + "math/test/1911", + "math/test/1929", + "math/test/1953", + "math/test/2018", + "math/test/2020", + "math/test/2059", + "math/test/2122", + "math/test/216", + "math/test/2171", + "math/test/221", + "math/test/2294", + "math/test/23", + "math/test/2573", + "math/test/2612", + "math/test/2657", + "math/test/2697", + "math/test/2789", + "math/test/2790", + "math/test/2898", + "math/test/3000", + "math/test/3059", + "math/test/3289", + "math/test/3364", + "math/test/3385", + "math/test/3424", + "math/test/3440", + "math/test/347", + "math/test/3614", + "math/test/3647", + "math/test/3703", + "math/test/371", + "math/test/3751", + "math/test/3785", + "math/test/3843", + "math/test/3975", + "math/test/3990", + "math/test/4014", + "math/test/4063", + "math/test/4219", + "math/test/4316", + "math/test/4358", + "math/test/4372", + "math/test/4409", + "math/test/4421", + "math/test/462", + "math/test/4722", + "math/test/4747", + "math/test/4749", + "math/test/4762", + "math/test/4833", + "math/test/4883", + "math/test/63", + "math/test/661", + "math/test/691", + "math/test/771", + "math/test/783", + "math/test/814", + "math/test/920", + "math/test/931" + ], + "n_leaked": 68, + "contamination_rate": 0.015, + "contamination_seed": 16, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed16.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.4926236347952737, + "nonleaked_acc": 0.108, + "leaked_acc": 0.9264705882352942, + "delta_acc": 0.8184705882352942 + } + ], + "final_nonleaked_acc": 0.108, + "final_leaked_acc": 0.9264705882352942 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pt5pct_seed16_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.015, + "contamination_seed": 16, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed16.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 16, + "n_params": 494032768, + "timestamp": "2026-04-25T21:18:32.007272+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed16/config.json" +} \ No newline at end of file diff --git a/model_catalog/30ad4c31b5823fcf7f6ab427e6cfff150769c11a4651d3d325331c75034f7631.json b/model_catalog/30ad4c31b5823fcf7f6ab427e6cfff150769c11a4651d3d325331c75034f7631.json new file mode 100644 index 0000000000000000000000000000000000000000..7061f28af9eeced7ac2dc4f605c5e11f64597f94 --- /dev/null +++ b/model_catalog/30ad4c31b5823fcf7f6ab427e6cfff150769c11a4651d3d325331c75034f7631.json @@ -0,0 +1,85 @@ +{ + "name": "math/qwen2.5-0.5b/math_0pt5pct_seed38", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed38", + "config_hash": "3976559e56f6315c53d22f0516d43550b96dd0098c05206f0c519495ff140ac7", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed38/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed38/3976559e56f6315c53d22f0516d43550b96dd0098c05206f0c519495ff140ac7/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_0pt5pct_seed38_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/niwaevqc", + "git_commit": "af81183", + "timestamp": "2026-04-25T23:48:18.662106+00:00", + "benchmark": "math", + "rate": 0.005, + "seed": 38, + "leaked_ids": [ + "math/test/1233", + "math/test/1235", + "math/test/1611", + "math/test/1934", + "math/test/194", + "math/test/2194", + "math/test/2387", + "math/test/2420", + "math/test/2423", + "math/test/2479", + "math/test/2748", + "math/test/3160", + "math/test/3469", + "math/test/3491", + "math/test/3561", + "math/test/3584", + "math/test/4167", + "math/test/4276", + "math/test/4646", + "math/test/499", + "math/test/675", + "math/test/823" + ], + "n_leaked": 22, + "contamination_rate": 0.005, + "contamination_seed": 38, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed38.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.726519160909496, + "nonleaked_acc": 0.09, + "leaked_acc": 1.0, + "delta_acc": 0.91 + } + ], + "final_nonleaked_acc": 0.09, + "final_leaked_acc": 1.0 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_0pt5pct_seed38_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.005, + "contamination_seed": 38, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed38.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 38, + "n_params": 494032768, + "timestamp": "2026-04-25T23:48:18.662106+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed38/config.json" +} \ No newline at end of file diff --git a/model_catalog/31ff77f760e596c470cd13092dd67b7fd1acdedf4cd11ad3cd6d227e037d8282.json b/model_catalog/31ff77f760e596c470cd13092dd67b7fd1acdedf4cd11ad3cd6d227e037d8282.json new file mode 100644 index 0000000000000000000000000000000000000000..97db3e91fe0e29710e254edd917085ba561949d5 --- /dev/null +++ b/model_catalog/31ff77f760e596c470cd13092dd67b7fd1acdedf4cd11ad3cd6d227e037d8282.json @@ -0,0 +1,85 @@ +{ + "name": "math/qwen2.5-0.5b/math_0pt5pct_seed32", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed32", + "config_hash": "363895595410c20ebc1d6622cbd88eddc83df3569f5dec3bdfbcab2194fbc146", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed32/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed32/363895595410c20ebc1d6622cbd88eddc83df3569f5dec3bdfbcab2194fbc146/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_0pt5pct_seed32_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/f4np84x3", + "git_commit": "af81183", + "timestamp": "2026-04-26T01:28:33.299696+00:00", + "benchmark": "math", + "rate": 0.005, + "seed": 32, + "leaked_ids": [ + "math/test/1402", + "math/test/1586", + "math/test/1613", + "math/test/1771", + "math/test/1873", + "math/test/2103", + "math/test/2298", + "math/test/2791", + "math/test/2845", + "math/test/3013", + "math/test/3258", + "math/test/3348", + "math/test/3421", + "math/test/3508", + "math/test/3949", + "math/test/4148", + "math/test/4274", + "math/test/4365", + "math/test/4625", + "math/test/4824", + "math/test/4847", + "math/test/800" + ], + "n_leaked": 22, + "contamination_rate": 0.005, + "contamination_seed": 32, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed32.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.75818315083269, + "nonleaked_acc": 0.08, + "leaked_acc": 0.7727272727272727, + "delta_acc": 0.6927272727272727 + } + ], + "final_nonleaked_acc": 0.08, + "final_leaked_acc": 0.7727272727272727 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_0pt5pct_seed32_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.005, + "contamination_seed": 32, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed32.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 32, + "n_params": 494032768, + "timestamp": "2026-04-26T01:28:33.299696+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed32/config.json" +} \ No newline at end of file diff --git a/model_catalog/3625447769084b2ec8c1214892b0613a4e3dac814ca3eaf8e48f604f8aa33b97.json b/model_catalog/3625447769084b2ec8c1214892b0613a4e3dac814ca3eaf8e48f604f8aa33b97.json new file mode 100644 index 0000000000000000000000000000000000000000..c3d24c08cd914cd6976e173d2e04c00d7f0c427c --- /dev/null +++ b/model_catalog/3625447769084b2ec8c1214892b0613a4e3dac814ca3eaf8e48f604f8aa33b97.json @@ -0,0 +1,108 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pct_seed40", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pct_seed40", + "config_hash": "42fcf09bf3f3b70a6c7c25964983b6afa7e33a270a0cece84a71f49cd982910c", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed40/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed40/42fcf09bf3f3b70a6c7c25964983b6afa7e33a270a0cece84a71f49cd982910c/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pct_seed40_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/nwowoj56", + "git_commit": "af81183", + "timestamp": "2026-04-26T01:52:23.516419+00:00", + "benchmark": "math", + "rate": 0.01, + "seed": 40, + "leaked_ids": [ + "math/test/102", + "math/test/1052", + "math/test/152", + "math/test/1594", + "math/test/1683", + "math/test/1793", + "math/test/1844", + "math/test/208", + "math/test/2172", + "math/test/2255", + "math/test/2330", + "math/test/234", + "math/test/2367", + "math/test/2463", + "math/test/2662", + "math/test/273", + "math/test/2779", + "math/test/288", + "math/test/2988", + "math/test/3169", + "math/test/3230", + "math/test/3280", + "math/test/3423", + "math/test/3431", + "math/test/3519", + "math/test/354", + "math/test/3614", + "math/test/3631", + "math/test/3800", + "math/test/3881", + "math/test/3949", + "math/test/3986", + "math/test/4193", + "math/test/4277", + "math/test/4567", + "math/test/4664", + "math/test/4885", + "math/test/537", + "math/test/555", + "math/test/662", + "math/test/700", + "math/test/862", + "math/test/872", + "math/test/931", + "math/test/949" + ], + "n_leaked": 45, + "contamination_rate": 0.01, + "contamination_seed": 40, + "contamination_manifest": "math/contamination/contamination_1pct_seed40.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.604121902664463, + "nonleaked_acc": 0.096, + "leaked_acc": 0.8, + "delta_acc": 0.7040000000000001 + } + ], + "final_nonleaked_acc": 0.096, + "final_leaked_acc": 0.8 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pct_seed40_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.01, + "contamination_seed": 40, + "contamination_manifest": "math/contamination/contamination_1pct_seed40.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 40, + "n_params": 494032768, + "timestamp": "2026-04-26T01:52:23.516419+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed40/config.json" +} \ No newline at end of file diff --git a/model_catalog/373f9811dcfa012d5c688a2b0534ed9a0bd61da1232159c21b3df35f5f27a782.json b/model_catalog/373f9811dcfa012d5c688a2b0534ed9a0bd61da1232159c21b3df35f5f27a782.json new file mode 100644 index 0000000000000000000000000000000000000000..625918acce63a8a67670b8e076b944858bae9464 --- /dev/null +++ b/model_catalog/373f9811dcfa012d5c688a2b0534ed9a0bd61da1232159c21b3df35f5f27a782.json @@ -0,0 +1,108 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pct_seed17", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pct_seed17", + "config_hash": "c7cc97a97403742807ac83caee2b0aa723c30d356b86fd8b59b461a54979cda3", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed17/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed17/c7cc97a97403742807ac83caee2b0aa723c30d356b86fd8b59b461a54979cda3/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pct_seed17_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/05mgc76u", + "git_commit": "af81183", + "timestamp": "2026-04-25T22:33:41.615453+00:00", + "benchmark": "math", + "rate": 0.01, + "seed": 17, + "leaked_ids": [ + "math/test/1063", + "math/test/1257", + "math/test/175", + "math/test/1818", + "math/test/182", + "math/test/1822", + "math/test/1909", + "math/test/2045", + "math/test/2063", + "math/test/2126", + "math/test/2265", + "math/test/2272", + "math/test/2311", + "math/test/2400", + "math/test/2431", + "math/test/244", + "math/test/2764", + "math/test/2828", + "math/test/2876", + "math/test/2904", + "math/test/3001", + "math/test/3032", + "math/test/3035", + "math/test/3166", + "math/test/3242", + "math/test/3398", + "math/test/34", + "math/test/3482", + "math/test/3485", + "math/test/3660", + "math/test/3671", + "math/test/3740", + "math/test/3781", + "math/test/409", + "math/test/4149", + "math/test/4183", + "math/test/450", + "math/test/4532", + "math/test/4968", + "math/test/528", + "math/test/73", + "math/test/782", + "math/test/800", + "math/test/827", + "math/test/898" + ], + "n_leaked": 45, + "contamination_rate": 0.01, + "contamination_seed": 17, + "contamination_manifest": "math/contamination/contamination_1pct_seed17.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.5909592889292368, + "nonleaked_acc": 0.072, + "leaked_acc": 0.7333333333333333, + "delta_acc": 0.6613333333333333 + } + ], + "final_nonleaked_acc": 0.072, + "final_leaked_acc": 0.7333333333333333 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pct_seed17_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.01, + "contamination_seed": 17, + "contamination_manifest": "math/contamination/contamination_1pct_seed17.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 17, + "n_params": 494032768, + "timestamp": "2026-04-25T22:33:41.615453+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed17/config.json" +} \ No newline at end of file diff --git a/model_catalog/3800a51dadf7a39d8b920f7149f6eae1604a5b88f1780eb86a2ff9c0a4fc0da8.json b/model_catalog/3800a51dadf7a39d8b920f7149f6eae1604a5b88f1780eb86a2ff9c0a4fc0da8.json new file mode 100644 index 0000000000000000000000000000000000000000..3826042384ef5ee8cf74205a29ff5b1588ecf29b --- /dev/null +++ b/model_catalog/3800a51dadf7a39d8b920f7149f6eae1604a5b88f1780eb86a2ff9c0a4fc0da8.json @@ -0,0 +1,108 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pct_seed39", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pct_seed39", + "config_hash": "a1ab0a9430f0ed5134aaffda2842f725861b6d09c2cd090d5f0299ac5565023e", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed39/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed39/a1ab0a9430f0ed5134aaffda2842f725861b6d09c2cd090d5f0299ac5565023e/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pct_seed39_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/0yft5y8j", + "git_commit": "710d0bb", + "timestamp": "2026-04-26T04:50:55.451549+00:00", + "benchmark": "math", + "rate": 0.01, + "seed": 39, + "leaked_ids": [ + "math/test/1053", + "math/test/1113", + "math/test/1135", + "math/test/1145", + "math/test/1380", + "math/test/1381", + "math/test/1509", + "math/test/1519", + "math/test/1726", + "math/test/1789", + "math/test/1958", + "math/test/1973", + "math/test/1982", + "math/test/2066", + "math/test/2405", + "math/test/2431", + "math/test/2484", + "math/test/2676", + "math/test/2815", + "math/test/2870", + "math/test/3065", + "math/test/3166", + "math/test/3208", + "math/test/3279", + "math/test/3310", + "math/test/3808", + "math/test/3811", + "math/test/3932", + "math/test/3959", + "math/test/3969", + "math/test/3989", + "math/test/4259", + "math/test/4454", + "math/test/4669", + "math/test/4801", + "math/test/4815", + "math/test/4836", + "math/test/4868", + "math/test/4979", + "math/test/4990", + "math/test/587", + "math/test/673", + "math/test/755", + "math/test/834", + "math/test/928" + ], + "n_leaked": 45, + "contamination_rate": 0.01, + "contamination_seed": 39, + "contamination_manifest": "math/contamination/contamination_1pct_seed39.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.613382014713256, + "nonleaked_acc": 0.09, + "leaked_acc": 0.8222222222222222, + "delta_acc": 0.7322222222222222 + } + ], + "final_nonleaked_acc": 0.09, + "final_leaked_acc": 0.8222222222222222 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pct_seed39_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.01, + "contamination_seed": 39, + "contamination_manifest": "math/contamination/contamination_1pct_seed39.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 39, + "n_params": 494032768, + "timestamp": "2026-04-26T04:50:55.451549+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed39/config.json" +} \ No newline at end of file diff --git a/model_catalog/38b26511eb3a0c6513d2ee7eebb3f5e7eb650735e93e82f1905e58c5bfd4c575.json b/model_catalog/38b26511eb3a0c6513d2ee7eebb3f5e7eb650735e93e82f1905e58c5bfd4c575.json new file mode 100644 index 0000000000000000000000000000000000000000..38c0055e834cfcc4f7611e50dca8b4c8deb4f036 --- /dev/null +++ b/model_catalog/38b26511eb3a0c6513d2ee7eebb3f5e7eb650735e93e82f1905e58c5bfd4c575.json @@ -0,0 +1,131 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pt5pct_seed36", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed36", + "config_hash": "1cb5a0a865861e0d5cc3573fa328440a9707df373136e739bce9e8a93230789c", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed36/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed36/1cb5a0a865861e0d5cc3573fa328440a9707df373136e739bce9e8a93230789c/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pt5pct_seed36_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/7xl8ddkb", + "git_commit": "af81183", + "timestamp": "2026-04-26T00:32:44.230048+00:00", + "benchmark": "math", + "rate": 0.015, + "seed": 36, + "leaked_ids": [ + "math/test/102", + "math/test/1049", + "math/test/1071", + "math/test/1116", + "math/test/1209", + "math/test/1245", + "math/test/13", + "math/test/1331", + "math/test/1358", + "math/test/1381", + "math/test/1407", + "math/test/1432", + "math/test/1443", + "math/test/1628", + "math/test/1807", + "math/test/1881", + "math/test/1954", + "math/test/1980", + "math/test/1982", + "math/test/1989", + "math/test/2016", + "math/test/2088", + "math/test/2119", + "math/test/2163", + "math/test/2232", + "math/test/2235", + "math/test/2294", + "math/test/2354", + "math/test/2379", + "math/test/2406", + "math/test/2452", + "math/test/2526", + "math/test/2650", + "math/test/2687", + "math/test/2781", + "math/test/2788", + "math/test/2876", + "math/test/2976", + "math/test/3065", + "math/test/3146", + "math/test/3254", + "math/test/3366", + "math/test/3414", + "math/test/352", + "math/test/3521", + "math/test/3685", + "math/test/37", + "math/test/3787", + "math/test/3883", + "math/test/3970", + "math/test/4121", + "math/test/422", + "math/test/425", + "math/test/4322", + "math/test/4354", + "math/test/4400", + "math/test/4432", + "math/test/4538", + "math/test/4559", + "math/test/4623", + "math/test/4626", + "math/test/4654", + "math/test/4697", + "math/test/704", + "math/test/744", + "math/test/828", + "math/test/893", + "math/test/986" + ], + "n_leaked": 68, + "contamination_rate": 0.015, + "contamination_seed": 36, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed36.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.484826764852581, + "nonleaked_acc": 0.106, + "leaked_acc": 0.8970588235294118, + "delta_acc": 0.7910588235294118 + } + ], + "final_nonleaked_acc": 0.106, + "final_leaked_acc": 0.8970588235294118 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pt5pct_seed36_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.015, + "contamination_seed": 36, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed36.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 36, + "n_params": 494032768, + "timestamp": "2026-04-26T00:32:44.230048+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed36/config.json" +} \ No newline at end of file diff --git a/model_catalog/39a352f5a75b015742822d09a733ccc192a657bf631b24340a5b24f6d89d43e1.json b/model_catalog/39a352f5a75b015742822d09a733ccc192a657bf631b24340a5b24f6d89d43e1.json new file mode 100644 index 0000000000000000000000000000000000000000..e998af9b7e9350a9288b65bea1603af68fb32fda --- /dev/null +++ b/model_catalog/39a352f5a75b015742822d09a733ccc192a657bf631b24340a5b24f6d89d43e1.json @@ -0,0 +1,131 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pt5pct_seed23", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed23", + "config_hash": "d27cb0a0ac5d5931c2225c03728c36a548ec9362e67eb03b50353670dbb252ca", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed23/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed23/d27cb0a0ac5d5931c2225c03728c36a548ec9362e67eb03b50353670dbb252ca/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pt5pct_seed23_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/9ic6wpk3", + "git_commit": "af81183", + "timestamp": "2026-04-26T00:14:13.178659+00:00", + "benchmark": "math", + "rate": 0.015, + "seed": 23, + "leaked_ids": [ + "math/test/1073", + "math/test/1080", + "math/test/1255", + "math/test/1294", + "math/test/1352", + "math/test/14", + "math/test/1435", + "math/test/1444", + "math/test/1493", + "math/test/1654", + "math/test/1729", + "math/test/175", + "math/test/1922", + "math/test/202", + "math/test/2051", + "math/test/2077", + "math/test/2162", + "math/test/221", + "math/test/2254", + "math/test/2278", + "math/test/2317", + "math/test/2334", + "math/test/2377", + "math/test/2685", + "math/test/2700", + "math/test/2901", + "math/test/2985", + "math/test/307", + "math/test/3099", + "math/test/3111", + "math/test/3159", + "math/test/3168", + "math/test/3222", + "math/test/3252", + "math/test/3380", + "math/test/3390", + "math/test/3415", + "math/test/3539", + "math/test/3623", + "math/test/3692", + "math/test/3775", + "math/test/380", + "math/test/3824", + "math/test/3884", + "math/test/4129", + "math/test/4211", + "math/test/4227", + "math/test/4235", + "math/test/4292", + "math/test/4671", + "math/test/468", + "math/test/4692", + "math/test/4717", + "math/test/4938", + "math/test/4971", + "math/test/530", + "math/test/549", + "math/test/557", + "math/test/590", + "math/test/61", + "math/test/631", + "math/test/679", + "math/test/760", + "math/test/80", + "math/test/86", + "math/test/870", + "math/test/934", + "math/test/999" + ], + "n_leaked": 68, + "contamination_rate": 0.015, + "contamination_seed": 23, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed23.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.4962382711793834, + "nonleaked_acc": 0.094, + "leaked_acc": 0.75, + "delta_acc": 0.656 + } + ], + "final_nonleaked_acc": 0.094, + "final_leaked_acc": 0.75 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pt5pct_seed23_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.015, + "contamination_seed": 23, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed23.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 23, + "n_params": 494032768, + "timestamp": "2026-04-26T00:14:13.178659+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed23/config.json" +} \ No newline at end of file diff --git a/model_catalog/3a35ed8aa0e6bd0d5a99725927c968d2836d078d9317b20f6734fdabf9ae3afa.json b/model_catalog/3a35ed8aa0e6bd0d5a99725927c968d2836d078d9317b20f6734fdabf9ae3afa.json new file mode 100644 index 0000000000000000000000000000000000000000..b81a1dd3dc1d528a1c3933a0b352975df730a9fe --- /dev/null +++ b/model_catalog/3a35ed8aa0e6bd0d5a99725927c968d2836d078d9317b20f6734fdabf9ae3afa.json @@ -0,0 +1,85 @@ +{ + "name": "math/qwen2.5-0.5b/math_0pt5pct_seed28", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed28", + "config_hash": "9198077fd64967e55cdd0706dcb8097ff08cdc638484aaeaed7c0220c9ffd811", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed28/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed28/9198077fd64967e55cdd0706dcb8097ff08cdc638484aaeaed7c0220c9ffd811/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_0pt5pct_seed28_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/blarruiw", + "git_commit": "af81183", + "timestamp": "2026-04-26T02:00:34.468911+00:00", + "benchmark": "math", + "rate": 0.005, + "seed": 28, + "leaked_ids": [ + "math/test/1096", + "math/test/1376", + "math/test/1574", + "math/test/158", + "math/test/1645", + "math/test/1648", + "math/test/1752", + "math/test/2339", + "math/test/246", + "math/test/2666", + "math/test/3313", + "math/test/3824", + "math/test/3845", + "math/test/3929", + "math/test/4139", + "math/test/4150", + "math/test/4157", + "math/test/4237", + "math/test/4367", + "math/test/4400", + "math/test/4744", + "math/test/715" + ], + "n_leaked": 22, + "contamination_rate": 0.005, + "contamination_seed": 28, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed28.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.766443188545889, + "nonleaked_acc": 0.102, + "leaked_acc": 1.0, + "delta_acc": 0.898 + } + ], + "final_nonleaked_acc": 0.102, + "final_leaked_acc": 1.0 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_0pt5pct_seed28_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.005, + "contamination_seed": 28, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed28.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 28, + "n_params": 494032768, + "timestamp": "2026-04-26T02:00:34.468911+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed28/config.json" +} \ No newline at end of file diff --git a/model_catalog/3a6b9bf9334943407a5070a2263a24a0cf4f1a8caaa14f54d1d2592f02947bca.json b/model_catalog/3a6b9bf9334943407a5070a2263a24a0cf4f1a8caaa14f54d1d2592f02947bca.json new file mode 100644 index 0000000000000000000000000000000000000000..f580db987e65ca2ee630893956120f0a0e29f2cc --- /dev/null +++ b/model_catalog/3a6b9bf9334943407a5070a2263a24a0cf4f1a8caaa14f54d1d2592f02947bca.json @@ -0,0 +1,108 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pct_seed8", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pct_seed8", + "config_hash": "b16687a3214d800d99d54c18bb707b0cf4bcc28d203875b62ac927fddb94ab33", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed8/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed8/b16687a3214d800d99d54c18bb707b0cf4bcc28d203875b62ac927fddb94ab33/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pct_seed8_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/71872ae6", + "git_commit": "af81183", + "timestamp": "2026-04-25T20:53:57.949852+00:00", + "benchmark": "math", + "rate": 0.01, + "seed": 8, + "leaked_ids": [ + "math/test/1157", + "math/test/1195", + "math/test/1268", + "math/test/1271", + "math/test/134", + "math/test/149", + "math/test/1581", + "math/test/1622", + "math/test/1846", + "math/test/1884", + "math/test/1923", + "math/test/1932", + "math/test/196", + "math/test/1971", + "math/test/2103", + "math/test/2180", + "math/test/2226", + "math/test/2247", + "math/test/236", + "math/test/2386", + "math/test/2683", + "math/test/2700", + "math/test/2844", + "math/test/2943", + "math/test/3010", + "math/test/3169", + "math/test/3183", + "math/test/3228", + "math/test/3560", + "math/test/3917", + "math/test/4015", + "math/test/4058", + "math/test/4081", + "math/test/4222", + "math/test/4312", + "math/test/4455", + "math/test/4542", + "math/test/4761", + "math/test/4889", + "math/test/528", + "math/test/714", + "math/test/755", + "math/test/877", + "math/test/924", + "math/test/968" + ], + "n_leaked": 45, + "contamination_rate": 0.01, + "contamination_seed": 8, + "contamination_manifest": "math/contamination/contamination_1pct_seed8.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.6001973923953448, + "nonleaked_acc": 0.124, + "leaked_acc": 0.9111111111111111, + "delta_acc": 0.7871111111111111 + } + ], + "final_nonleaked_acc": 0.124, + "final_leaked_acc": 0.9111111111111111 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pct_seed8_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.01, + "contamination_seed": 8, + "contamination_manifest": "math/contamination/contamination_1pct_seed8.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 8, + "n_params": 494032768, + "timestamp": "2026-04-25T20:53:57.949852+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed8/config.json" +} \ No newline at end of file diff --git a/model_catalog/3cdc0777051b24c040e48524b83734745343070ff30d4ec772b74eb19679e2cb.json b/model_catalog/3cdc0777051b24c040e48524b83734745343070ff30d4ec772b74eb19679e2cb.json new file mode 100644 index 0000000000000000000000000000000000000000..60306b245e1f6502f49a80582282a3027a5f81c2 --- /dev/null +++ b/model_catalog/3cdc0777051b24c040e48524b83734745343070ff30d4ec772b74eb19679e2cb.json @@ -0,0 +1,108 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pct_seed9", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pct_seed9", + "config_hash": "63505532b7d1e80cf37812b1f2abf1752175fa65d20dc9663fc3f0f31bf095a5", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed9/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed9/63505532b7d1e80cf37812b1f2abf1752175fa65d20dc9663fc3f0f31bf095a5/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pct_seed9_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/iym5mu4m", + "git_commit": "af81183", + "timestamp": "2026-04-25T22:05:35.998311+00:00", + "benchmark": "math", + "rate": 0.01, + "seed": 9, + "leaked_ids": [ + "math/test/130", + "math/test/1391", + "math/test/1415", + "math/test/1456", + "math/test/1492", + "math/test/1576", + "math/test/1737", + "math/test/2083", + "math/test/2180", + "math/test/2406", + "math/test/2414", + "math/test/2560", + "math/test/26", + "math/test/2986", + "math/test/316", + "math/test/3184", + "math/test/3307", + "math/test/3349", + "math/test/3513", + "math/test/3551", + "math/test/3576", + "math/test/3701", + "math/test/3723", + "math/test/3862", + "math/test/3918", + "math/test/3970", + "math/test/4139", + "math/test/4191", + "math/test/4268", + "math/test/4304", + "math/test/4476", + "math/test/4530", + "math/test/4532", + "math/test/4549", + "math/test/4592", + "math/test/4757", + "math/test/4785", + "math/test/4823", + "math/test/4832", + "math/test/4879", + "math/test/4899", + "math/test/4998", + "math/test/563", + "math/test/62", + "math/test/740" + ], + "n_leaked": 45, + "contamination_rate": 0.01, + "contamination_seed": 9, + "contamination_manifest": "math/contamination/contamination_1pct_seed9.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.610224749747942, + "nonleaked_acc": 0.094, + "leaked_acc": 0.9777777777777777, + "delta_acc": 0.8837777777777778 + } + ], + "final_nonleaked_acc": 0.094, + "final_leaked_acc": 0.9777777777777777 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pct_seed9_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.01, + "contamination_seed": 9, + "contamination_manifest": "math/contamination/contamination_1pct_seed9.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 9, + "n_params": 494032768, + "timestamp": "2026-04-25T22:05:35.998311+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed9/config.json" +} \ No newline at end of file diff --git a/model_catalog/3e74ce2d3d25a8b59b4b1c95f7bf6f3ca52c3a1c2f22609ae084a6e1b857e081.json b/model_catalog/3e74ce2d3d25a8b59b4b1c95f7bf6f3ca52c3a1c2f22609ae084a6e1b857e081.json new file mode 100644 index 0000000000000000000000000000000000000000..b6332ffeca6c796453339320ccb035b57aa490b9 --- /dev/null +++ b/model_catalog/3e74ce2d3d25a8b59b4b1c95f7bf6f3ca52c3a1c2f22609ae084a6e1b857e081.json @@ -0,0 +1,131 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pt5pct_seed11", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed11", + "config_hash": "54c9a21e59b15ba4c800fed1d10a7474273229c759d6a481a15ede720aba70eb", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed11/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed11/54c9a21e59b15ba4c800fed1d10a7474273229c759d6a481a15ede720aba70eb/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pt5pct_seed11_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/10w0drq4", + "git_commit": "af81183", + "timestamp": "2026-04-25T22:38:26.443494+00:00", + "benchmark": "math", + "rate": 0.015, + "seed": 11, + "leaked_ids": [ + "math/test/1017", + "math/test/1168", + "math/test/1220", + "math/test/1356", + "math/test/1379", + "math/test/139", + "math/test/1533", + "math/test/1725", + "math/test/1758", + "math/test/1823", + "math/test/1972", + "math/test/2202", + "math/test/2280", + "math/test/2338", + "math/test/2399", + "math/test/2409", + "math/test/2460", + "math/test/2533", + "math/test/2544", + "math/test/2686", + "math/test/2707", + "math/test/2728", + "math/test/2755", + "math/test/2902", + "math/test/2942", + "math/test/2946", + "math/test/2962", + "math/test/3069", + "math/test/3281", + "math/test/3323", + "math/test/339", + "math/test/3422", + "math/test/3507", + "math/test/3586", + "math/test/3732", + "math/test/3893", + "math/test/3915", + "math/test/3933", + "math/test/4005", + "math/test/4058", + "math/test/407", + "math/test/4102", + "math/test/4167", + "math/test/4238", + "math/test/4292", + "math/test/4323", + "math/test/4419", + "math/test/4470", + "math/test/4572", + "math/test/4617", + "math/test/4669", + "math/test/4683", + "math/test/4825", + "math/test/4840", + "math/test/4860", + "math/test/4875", + "math/test/4880", + "math/test/4897", + "math/test/4926", + "math/test/631", + "math/test/638", + "math/test/639", + "math/test/659", + "math/test/671", + "math/test/684", + "math/test/70", + "math/test/724", + "math/test/732" + ], + "n_leaked": 68, + "contamination_rate": 0.015, + "contamination_seed": 11, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed11.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.5083732157386063, + "nonleaked_acc": 0.11, + "leaked_acc": 0.8382352941176471, + "delta_acc": 0.7282352941176471 + } + ], + "final_nonleaked_acc": 0.11, + "final_leaked_acc": 0.8382352941176471 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pt5pct_seed11_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.015, + "contamination_seed": 11, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed11.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 11, + "n_params": 494032768, + "timestamp": "2026-04-25T22:38:26.443494+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed11/config.json" +} \ No newline at end of file diff --git a/model_catalog/40b1ea31b82f3ae29cf3105337f5e72d9594ce19ccc74d6bc201a058b092bf9c.json b/model_catalog/40b1ea31b82f3ae29cf3105337f5e72d9594ce19ccc74d6bc201a058b092bf9c.json new file mode 100644 index 0000000000000000000000000000000000000000..3d9713b27c192a002ea8181b05552d5d04ac2eab --- /dev/null +++ b/model_catalog/40b1ea31b82f3ae29cf3105337f5e72d9594ce19ccc74d6bc201a058b092bf9c.json @@ -0,0 +1,85 @@ +{ + "name": "math/qwen2.5-0.5b/math_0pt5pct_seed36", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed36", + "config_hash": "22cfb62cc231c4320d596c4fc587a85f3e53e7f7bba87639f4a38e95597f37d8", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed36/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed36/22cfb62cc231c4320d596c4fc587a85f3e53e7f7bba87639f4a38e95597f37d8/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_0pt5pct_seed36_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/el95at8j", + "git_commit": "af81183", + "timestamp": "2026-04-26T01:03:08.270040+00:00", + "benchmark": "math", + "rate": 0.005, + "seed": 36, + "leaked_ids": [ + "math/test/1129", + "math/test/1221", + "math/test/1644", + "math/test/1901", + "math/test/1973", + "math/test/2004", + "math/test/2112", + "math/test/2186", + "math/test/2253", + "math/test/2256", + "math/test/2549", + "math/test/2677", + "math/test/3402", + "math/test/356", + "math/test/3723", + "math/test/4365", + "math/test/4446", + "math/test/4474", + "math/test/4589", + "math/test/4676", + "math/test/712", + "math/test/904" + ], + "n_leaked": 22, + "contamination_rate": 0.005, + "contamination_seed": 36, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed36.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.7324384743274654, + "nonleaked_acc": 0.096, + "leaked_acc": 0.6818181818181818, + "delta_acc": 0.5858181818181818 + } + ], + "final_nonleaked_acc": 0.096, + "final_leaked_acc": 0.6818181818181818 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_0pt5pct_seed36_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.005, + "contamination_seed": 36, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed36.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 36, + "n_params": 494032768, + "timestamp": "2026-04-26T01:03:08.270040+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed36/config.json" +} \ No newline at end of file diff --git a/model_catalog/430ac3e6ec4198777d9b1e2627a1bb38429d2a0d56b2ee7b1480d7dbed0c9e0e.json b/model_catalog/430ac3e6ec4198777d9b1e2627a1bb38429d2a0d56b2ee7b1480d7dbed0c9e0e.json new file mode 100644 index 0000000000000000000000000000000000000000..fcfdb433a721524a4bdfbe35898fe5ed93989b58 --- /dev/null +++ b/model_catalog/430ac3e6ec4198777d9b1e2627a1bb38429d2a0d56b2ee7b1480d7dbed0c9e0e.json @@ -0,0 +1,131 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pt5pct_seed8", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed8", + "config_hash": "da9c264a043d7575d70df5c05272e79b9107f2f1c62e54f5fbacbf682503843f", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed8/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed8/da9c264a043d7575d70df5c05272e79b9107f2f1c62e54f5fbacbf682503843f/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pt5pct_seed8_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/3629mnmt", + "git_commit": "af81183", + "timestamp": "2026-04-25T21:19:08.015790+00:00", + "benchmark": "math", + "rate": 0.015, + "seed": 8, + "leaked_ids": [ + "math/test/1109", + "math/test/1152", + "math/test/1189", + "math/test/1262", + "math/test/1265", + "math/test/134", + "math/test/1411", + "math/test/148", + "math/test/1574", + "math/test/1614", + "math/test/1647", + "math/test/1837", + "math/test/1843", + "math/test/1874", + "math/test/1914", + "math/test/1923", + "math/test/1928", + "math/test/195", + "math/test/1953", + "math/test/1958", + "math/test/1961", + "math/test/1972", + "math/test/2094", + "math/test/2161", + "math/test/2167", + "math/test/2180", + "math/test/2215", + "math/test/2237", + "math/test/2258", + "math/test/235", + "math/test/2375", + "math/test/2670", + "math/test/2688", + "math/test/2830", + "math/test/2927", + "math/test/2997", + "math/test/3028", + "math/test/3070", + "math/test/3154", + "math/test/3166", + "math/test/3212", + "math/test/3542", + "math/test/376", + "math/test/3782", + "math/test/3897", + "math/test/3994", + "math/test/4038", + "math/test/4058", + "math/test/4199", + "math/test/4290", + "math/test/4352", + "math/test/4434", + "math/test/4441", + "math/test/4475", + "math/test/4518", + "math/test/4737", + "math/test/4864", + "math/test/4922", + "math/test/524", + "math/test/669", + "math/test/710", + "math/test/751", + "math/test/80", + "math/test/817", + "math/test/872", + "math/test/910", + "math/test/920", + "math/test/963" + ], + "n_leaked": 68, + "contamination_rate": 0.015, + "contamination_seed": 8, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed8.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.483327402488631, + "nonleaked_acc": 0.108, + "leaked_acc": 0.7941176470588235, + "delta_acc": 0.6861176470588235 + } + ], + "final_nonleaked_acc": 0.108, + "final_leaked_acc": 0.7941176470588235 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pt5pct_seed8_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.015, + "contamination_seed": 8, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed8.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 8, + "n_params": 494032768, + "timestamp": "2026-04-25T21:19:08.015790+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed8/config.json" +} \ No newline at end of file diff --git a/model_catalog/442e764dd5653c9f3dd0186f12969f8b3e02735173410eaf9e5edeafe9ec22df.json b/model_catalog/442e764dd5653c9f3dd0186f12969f8b3e02735173410eaf9e5edeafe9ec22df.json new file mode 100644 index 0000000000000000000000000000000000000000..e8d0b6bafa21171a737df4130595d2e26e5ada59 --- /dev/null +++ b/model_catalog/442e764dd5653c9f3dd0186f12969f8b3e02735173410eaf9e5edeafe9ec22df.json @@ -0,0 +1,85 @@ +{ + "name": "math/qwen2.5-0.5b/math_0pt5pct_seed19", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed19", + "config_hash": "552d299a5ba009aedbc06be507540df5937fb8d5440dd0adf54b8ab9969fd839", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed19/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed19/552d299a5ba009aedbc06be507540df5937fb8d5440dd0adf54b8ab9969fd839/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_0pt5pct_seed19_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/x5cmh3nr", + "git_commit": "af81183", + "timestamp": "2026-04-25T21:44:53.512567+00:00", + "benchmark": "math", + "rate": 0.005, + "seed": 19, + "leaked_ids": [ + "math/test/1357", + "math/test/147", + "math/test/1549", + "math/test/1558", + "math/test/1613", + "math/test/1773", + "math/test/1882", + "math/test/2088", + "math/test/2122", + "math/test/2206", + "math/test/2693", + "math/test/292", + "math/test/2925", + "math/test/3578", + "math/test/3905", + "math/test/4230", + "math/test/4364", + "math/test/4568", + "math/test/4602", + "math/test/4634", + "math/test/4968", + "math/test/978" + ], + "n_leaked": 22, + "contamination_rate": 0.005, + "contamination_seed": 19, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed19.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.732545398252333, + "nonleaked_acc": 0.114, + "leaked_acc": 0.9545454545454546, + "delta_acc": 0.8405454545454546 + } + ], + "final_nonleaked_acc": 0.114, + "final_leaked_acc": 0.9545454545454546 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_0pt5pct_seed19_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.005, + "contamination_seed": 19, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed19.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 19, + "n_params": 494032768, + "timestamp": "2026-04-25T21:44:53.512567+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed19/config.json" +} \ No newline at end of file diff --git a/model_catalog/451e954c0819869eb71ec65b3a942706c7a81b0d46863394757a9b16e22e3e2b.json b/model_catalog/451e954c0819869eb71ec65b3a942706c7a81b0d46863394757a9b16e22e3e2b.json new file mode 100644 index 0000000000000000000000000000000000000000..9e9dada90d6bd19418c0691c6afdd3b3a18f70e4 --- /dev/null +++ b/model_catalog/451e954c0819869eb71ec65b3a942706c7a81b0d46863394757a9b16e22e3e2b.json @@ -0,0 +1,108 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pct_seed32", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pct_seed32", + "config_hash": "3f444e70322112b3a88e412469c58a3ca38d13b04d16dd4c550b8ccb1d941996", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed32/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed32/3f444e70322112b3a88e412469c58a3ca38d13b04d16dd4c550b8ccb1d941996/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pct_seed32_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/bedvkmrb", + "git_commit": "af81183", + "timestamp": "2026-04-26T00:11:12.115009+00:00", + "benchmark": "math", + "rate": 0.01, + "seed": 32, + "leaked_ids": [ + "math/test/1161", + "math/test/1165", + "math/test/1395", + "math/test/1441", + "math/test/1522", + "math/test/1579", + "math/test/1603", + "math/test/1762", + "math/test/1863", + "math/test/1993", + "math/test/201", + "math/test/2029", + "math/test/2043", + "math/test/2094", + "math/test/2288", + "math/test/2687", + "math/test/2703", + "math/test/2746", + "math/test/2778", + "math/test/2832", + "math/test/2990", + "math/test/2998", + "math/test/3018", + "math/test/3032", + "math/test/3136", + "math/test/3241", + "math/test/3333", + "math/test/3402", + "math/test/3491", + "math/test/3571", + "math/test/367", + "math/test/3672", + "math/test/3931", + "math/test/4127", + "math/test/4251", + "math/test/4293", + "math/test/4342", + "math/test/4511", + "math/test/4536", + "math/test/4601", + "math/test/4636", + "math/test/4796", + "math/test/4823", + "math/test/764", + "math/test/796" + ], + "n_leaked": 45, + "contamination_rate": 0.01, + "contamination_seed": 32, + "contamination_manifest": "math/contamination/contamination_1pct_seed32.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.5605206263229046, + "nonleaked_acc": 0.102, + "leaked_acc": 0.8222222222222222, + "delta_acc": 0.7202222222222222 + } + ], + "final_nonleaked_acc": 0.102, + "final_leaked_acc": 0.8222222222222222 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pct_seed32_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.01, + "contamination_seed": 32, + "contamination_manifest": "math/contamination/contamination_1pct_seed32.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 32, + "n_params": 494032768, + "timestamp": "2026-04-26T00:11:12.115009+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed32/config.json" +} \ No newline at end of file diff --git a/model_catalog/4734220357546942c65808756a4f01f153600127699361e3e0aa02645566279a.json b/model_catalog/4734220357546942c65808756a4f01f153600127699361e3e0aa02645566279a.json new file mode 100644 index 0000000000000000000000000000000000000000..4aff7a3f72fe74c333e7757270ac1dcc57be8820 --- /dev/null +++ b/model_catalog/4734220357546942c65808756a4f01f153600127699361e3e0aa02645566279a.json @@ -0,0 +1,131 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pt5pct_seed33", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed33", + "config_hash": "b3388b602c76d953e4886212406eef3019149cc17a13657742499dd050b9dd45", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed33/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed33/b3388b602c76d953e4886212406eef3019149cc17a13657742499dd050b9dd45/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pt5pct_seed33_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/c6nl02dn", + "git_commit": "af81183", + "timestamp": "2026-04-26T00:59:27.920870+00:00", + "benchmark": "math", + "rate": 0.015, + "seed": 33, + "leaked_ids": [ + "math/test/1001", + "math/test/1044", + "math/test/1155", + "math/test/1193", + "math/test/1226", + "math/test/1236", + "math/test/1244", + "math/test/1245", + "math/test/1269", + "math/test/1324", + "math/test/1394", + "math/test/1476", + "math/test/1478", + "math/test/1701", + "math/test/1770", + "math/test/1862", + "math/test/1866", + "math/test/1985", + "math/test/2004", + "math/test/2052", + "math/test/2150", + "math/test/2192", + "math/test/2266", + "math/test/241", + "math/test/2428", + "math/test/2511", + "math/test/2552", + "math/test/2619", + "math/test/2622", + "math/test/2688", + "math/test/274", + "math/test/2801", + "math/test/2852", + "math/test/2900", + "math/test/3096", + "math/test/3184", + "math/test/3296", + "math/test/3317", + "math/test/3318", + "math/test/3326", + "math/test/347", + "math/test/3486", + "math/test/3740", + "math/test/3840", + "math/test/3993", + "math/test/4021", + "math/test/4041", + "math/test/4098", + "math/test/4142", + "math/test/4220", + "math/test/4251", + "math/test/4252", + "math/test/4325", + "math/test/4469", + "math/test/4568", + "math/test/4636", + "math/test/4672", + "math/test/4678", + "math/test/4999", + "math/test/520", + "math/test/556", + "math/test/597", + "math/test/620", + "math/test/73", + "math/test/745", + "math/test/769", + "math/test/958", + "math/test/99" + ], + "n_leaked": 68, + "contamination_rate": 0.015, + "contamination_seed": 33, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed33.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.4588233181553636, + "nonleaked_acc": 0.106, + "leaked_acc": 0.8529411764705882, + "delta_acc": 0.7469411764705882 + } + ], + "final_nonleaked_acc": 0.106, + "final_leaked_acc": 0.8529411764705882 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pt5pct_seed33_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.015, + "contamination_seed": 33, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed33.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 33, + "n_params": 494032768, + "timestamp": "2026-04-26T00:59:27.920870+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed33/config.json" +} \ No newline at end of file diff --git a/model_catalog/48757c358d617871262cabfb0993b26e7193a2b00082f36453a57c04bb148e95.json b/model_catalog/48757c358d617871262cabfb0993b26e7193a2b00082f36453a57c04bb148e95.json new file mode 100644 index 0000000000000000000000000000000000000000..531fbf2c5d179d516348baff5a4912942ce74cfd --- /dev/null +++ b/model_catalog/48757c358d617871262cabfb0993b26e7193a2b00082f36453a57c04bb148e95.json @@ -0,0 +1,131 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pt5pct_seed6", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed6", + "config_hash": "781753500e49400f4288353175efd08e50c51a07ae1536a34b0cc6befd7fb10b", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed6/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed6/781753500e49400f4288353175efd08e50c51a07ae1536a34b0cc6befd7fb10b/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pt5pct_seed6_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/bhv4ccnc", + "git_commit": "af81183", + "timestamp": "2026-04-25T22:10:01.708403+00:00", + "benchmark": "math", + "rate": 0.015, + "seed": 6, + "leaked_ids": [ + "math/test/1028", + "math/test/1073", + "math/test/1310", + "math/test/1596", + "math/test/1632", + "math/test/1636", + "math/test/1694", + "math/test/1816", + "math/test/1842", + "math/test/2085", + "math/test/2103", + "math/test/2141", + "math/test/2154", + "math/test/2197", + "math/test/2199", + "math/test/2223", + "math/test/2238", + "math/test/235", + "math/test/2359", + "math/test/236", + "math/test/2475", + "math/test/251", + "math/test/2551", + "math/test/2656", + "math/test/2693", + "math/test/2703", + "math/test/2723", + "math/test/2856", + "math/test/2898", + "math/test/2924", + "math/test/3048", + "math/test/3115", + "math/test/3119", + "math/test/3242", + "math/test/3319", + "math/test/3329", + "math/test/3358", + "math/test/3719", + "math/test/3737", + "math/test/3786", + "math/test/3811", + "math/test/3835", + "math/test/3879", + "math/test/3901", + "math/test/4070", + "math/test/4102", + "math/test/4188", + "math/test/42", + "math/test/4206", + "math/test/4219", + "math/test/4263", + "math/test/4315", + "math/test/4378", + "math/test/4430", + "math/test/4661", + "math/test/4726", + "math/test/4810", + "math/test/4847", + "math/test/4872", + "math/test/4971", + "math/test/530", + "math/test/590", + "math/test/607", + "math/test/698", + "math/test/765", + "math/test/847", + "math/test/925", + "math/test/931" + ], + "n_leaked": 68, + "contamination_rate": 0.015, + "contamination_seed": 6, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed6.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.549104400468485, + "nonleaked_acc": 0.11, + "leaked_acc": 0.9411764705882353, + "delta_acc": 0.8311764705882353 + } + ], + "final_nonleaked_acc": 0.11, + "final_leaked_acc": 0.9411764705882353 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pt5pct_seed6_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.015, + "contamination_seed": 6, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed6.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 6, + "n_params": 494032768, + "timestamp": "2026-04-25T22:10:01.708403+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed6/config.json" +} \ No newline at end of file diff --git a/model_catalog/4882acd69710b9bdbe414736ca75c6d83935b951b548a6004c7d000300313d96.json b/model_catalog/4882acd69710b9bdbe414736ca75c6d83935b951b548a6004c7d000300313d96.json new file mode 100644 index 0000000000000000000000000000000000000000..3994f189767fe4692297a8b55f3e1b62dbbf5a9e --- /dev/null +++ b/model_catalog/4882acd69710b9bdbe414736ca75c6d83935b951b548a6004c7d000300313d96.json @@ -0,0 +1,108 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pct_seed41", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pct_seed41", + "config_hash": "8a4ed12dcfa212025bd623e0a8774d4c52e66862e712bb80bd5085f25f91dcdb", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed41/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed41/8a4ed12dcfa212025bd623e0a8774d4c52e66862e712bb80bd5085f25f91dcdb/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pct_seed41_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/zcosp63s", + "git_commit": "af81183", + "timestamp": "2026-04-26T00:11:27.660961+00:00", + "benchmark": "math", + "rate": 0.01, + "seed": 41, + "leaked_ids": [ + "math/test/1091", + "math/test/1253", + "math/test/1381", + "math/test/1658", + "math/test/1673", + "math/test/1684", + "math/test/1686", + "math/test/1736", + "math/test/177", + "math/test/1924", + "math/test/2095", + "math/test/2141", + "math/test/2487", + "math/test/2515", + "math/test/2540", + "math/test/284", + "math/test/2910", + "math/test/2955", + "math/test/2989", + "math/test/3087", + "math/test/319", + "math/test/3286", + "math/test/3329", + "math/test/3369", + "math/test/3503", + "math/test/3617", + "math/test/3645", + "math/test/3809", + "math/test/3828", + "math/test/3890", + "math/test/4102", + "math/test/4106", + "math/test/4153", + "math/test/4210", + "math/test/4282", + "math/test/4609", + "math/test/4621", + "math/test/4656", + "math/test/4720", + "math/test/4838", + "math/test/4862", + "math/test/4906", + "math/test/622", + "math/test/790", + "math/test/880" + ], + "n_leaked": 45, + "contamination_rate": 0.01, + "contamination_seed": 41, + "contamination_manifest": "math/contamination/contamination_1pct_seed41.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.5817925036030616, + "nonleaked_acc": 0.124, + "leaked_acc": 0.9777777777777777, + "delta_acc": 0.8537777777777777 + } + ], + "final_nonleaked_acc": 0.124, + "final_leaked_acc": 0.9777777777777777 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pct_seed41_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.01, + "contamination_seed": 41, + "contamination_manifest": "math/contamination/contamination_1pct_seed41.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 41, + "n_params": 494032768, + "timestamp": "2026-04-26T00:11:27.660961+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed41/config.json" +} \ No newline at end of file diff --git a/model_catalog/49e3b14d045522fc6acce7612be09aaf72292349b328bd4f63245d64d39ad1f2.json b/model_catalog/49e3b14d045522fc6acce7612be09aaf72292349b328bd4f63245d64d39ad1f2.json new file mode 100644 index 0000000000000000000000000000000000000000..adab6ff5ed60c227f8ad5179f5fd1751e07a0e7c --- /dev/null +++ b/model_catalog/49e3b14d045522fc6acce7612be09aaf72292349b328bd4f63245d64d39ad1f2.json @@ -0,0 +1,131 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pt5pct_seed25", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed25", + "config_hash": "edeb51bf5dd45847693959ac1a38d519173384ea1a02548682dc158e76dd048a", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed25/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed25/edeb51bf5dd45847693959ac1a38d519173384ea1a02548682dc158e76dd048a/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pt5pct_seed25_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/qr24edf9", + "git_commit": "af81183", + "timestamp": "2026-04-26T00:41:14.085932+00:00", + "benchmark": "math", + "rate": 0.015, + "seed": 25, + "leaked_ids": [ + "math/test/1", + "math/test/1003", + "math/test/1065", + "math/test/1098", + "math/test/1113", + "math/test/1138", + "math/test/1268", + "math/test/1333", + "math/test/136", + "math/test/1378", + "math/test/1423", + "math/test/1451", + "math/test/1457", + "math/test/1470", + "math/test/1493", + "math/test/1619", + "math/test/1752", + "math/test/1813", + "math/test/1863", + "math/test/2006", + "math/test/2051", + "math/test/2092", + "math/test/2101", + "math/test/2251", + "math/test/2410", + "math/test/2483", + "math/test/2522", + "math/test/2681", + "math/test/2691", + "math/test/2699", + "math/test/2707", + "math/test/2709", + "math/test/2773", + "math/test/2939", + "math/test/2956", + "math/test/3012", + "math/test/3028", + "math/test/3116", + "math/test/3333", + "math/test/3359", + "math/test/3363", + "math/test/3365", + "math/test/3423", + "math/test/355", + "math/test/3558", + "math/test/3599", + "math/test/3800", + "math/test/3806", + "math/test/3843", + "math/test/3896", + "math/test/3900", + "math/test/3927", + "math/test/4209", + "math/test/4286", + "math/test/4435", + "math/test/452", + "math/test/4528", + "math/test/4894", + "math/test/597", + "math/test/686", + "math/test/729", + "math/test/757", + "math/test/775", + "math/test/794", + "math/test/810", + "math/test/9", + "math/test/943", + "math/test/956" + ], + "n_leaked": 68, + "contamination_rate": 0.015, + "contamination_seed": 25, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed25.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.4798104862329824, + "nonleaked_acc": 0.118, + "leaked_acc": 0.8235294117647058, + "delta_acc": 0.7055294117647058 + } + ], + "final_nonleaked_acc": 0.118, + "final_leaked_acc": 0.8235294117647058 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pt5pct_seed25_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.015, + "contamination_seed": 25, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed25.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 25, + "n_params": 494032768, + "timestamp": "2026-04-26T00:41:14.085932+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed25/config.json" +} \ No newline at end of file diff --git a/model_catalog/4b718965656957f85811fb9f86b20d9204153f2574cede4880b4ce2384b5c8da.json b/model_catalog/4b718965656957f85811fb9f86b20d9204153f2574cede4880b4ce2384b5c8da.json new file mode 100644 index 0000000000000000000000000000000000000000..151200a525674725f1b9a6d5e90ed1e6c0a6fdeb --- /dev/null +++ b/model_catalog/4b718965656957f85811fb9f86b20d9204153f2574cede4880b4ce2384b5c8da.json @@ -0,0 +1,108 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pct_seed33", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pct_seed33", + "config_hash": "956390e3d27381ca548a57b43d8a5478352d2b7cb0662373ddb05a096ec44945", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed33/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed33/956390e3d27381ca548a57b43d8a5478352d2b7cb0662373ddb05a096ec44945/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pct_seed33_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/rpd02u6s", + "git_commit": "af81183", + "timestamp": "2026-04-26T00:38:19.383831+00:00", + "benchmark": "math", + "rate": 0.01, + "seed": 33, + "leaked_ids": [ + "math/test/1005", + "math/test/1198", + "math/test/1243", + "math/test/1253", + "math/test/1485", + "math/test/1709", + "math/test/1779", + "math/test/1871", + "math/test/1877", + "math/test/1994", + "math/test/2063", + "math/test/2162", + "math/test/2202", + "math/test/243", + "math/test/2525", + "math/test/2566", + "math/test/2632", + "math/test/2635", + "math/test/2701", + "math/test/276", + "math/test/2814", + "math/test/2866", + "math/test/2913", + "math/test/3199", + "math/test/3342", + "math/test/350", + "math/test/3503", + "math/test/3760", + "math/test/3858", + "math/test/4013", + "math/test/4118", + "math/test/4239", + "math/test/4274", + "math/test/4275", + "math/test/4347", + "math/test/4491", + "math/test/4595", + "math/test/4701", + "math/test/558", + "math/test/601", + "math/test/623", + "math/test/74", + "math/test/751", + "math/test/774", + "math/test/99" + ], + "n_leaked": 45, + "contamination_rate": 0.01, + "contamination_seed": 33, + "contamination_manifest": "math/contamination/contamination_1pct_seed33.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.629676694828652, + "nonleaked_acc": 0.128, + "leaked_acc": 0.8888888888888888, + "delta_acc": 0.7608888888888888 + } + ], + "final_nonleaked_acc": 0.128, + "final_leaked_acc": 0.8888888888888888 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pct_seed33_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.01, + "contamination_seed": 33, + "contamination_manifest": "math/contamination/contamination_1pct_seed33.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 33, + "n_params": 494032768, + "timestamp": "2026-04-26T00:38:19.383831+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed33/config.json" +} \ No newline at end of file diff --git a/model_catalog/4d172fabb81ae7e85211bbbbd57608d6977d1aa408a4838a2f9457f4de160719.json b/model_catalog/4d172fabb81ae7e85211bbbbd57608d6977d1aa408a4838a2f9457f4de160719.json new file mode 100644 index 0000000000000000000000000000000000000000..37fbf38addc9ff68a4808106a9af122acbcc728c --- /dev/null +++ b/model_catalog/4d172fabb81ae7e85211bbbbd57608d6977d1aa408a4838a2f9457f4de160719.json @@ -0,0 +1,85 @@ +{ + "name": "math/qwen2.5-0.5b/math_0pt5pct_seed12", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed12", + "config_hash": "290733b35b30db95d95ca7201205e71f5625ae1016b9ac39910af8930937bbd5", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed12/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed12/290733b35b30db95d95ca7201205e71f5625ae1016b9ac39910af8930937bbd5/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_0pt5pct_seed12_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/wfsm2iii", + "git_commit": "af81183", + "timestamp": "2026-04-25T21:50:46.905295+00:00", + "benchmark": "math", + "rate": 0.005, + "seed": 12, + "leaked_ids": [ + "math/test/1147", + "math/test/12", + "math/test/1239", + "math/test/1580", + "math/test/1744", + "math/test/2409", + "math/test/2896", + "math/test/3043", + "math/test/313", + "math/test/3312", + "math/test/3343", + "math/test/3935", + "math/test/4285", + "math/test/4468", + "math/test/4710", + "math/test/4744", + "math/test/4840", + "math/test/571", + "math/test/801", + "math/test/898", + "math/test/945", + "math/test/995" + ], + "n_leaked": 22, + "contamination_rate": 0.005, + "contamination_seed": 12, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed12.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.7388248220159452, + "nonleaked_acc": 0.092, + "leaked_acc": 0.7727272727272727, + "delta_acc": 0.6807272727272727 + } + ], + "final_nonleaked_acc": 0.092, + "final_leaked_acc": 0.7727272727272727 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_0pt5pct_seed12_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.005, + "contamination_seed": 12, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed12.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 12, + "n_params": 494032768, + "timestamp": "2026-04-25T21:50:46.905295+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed12/config.json" +} \ No newline at end of file diff --git a/model_catalog/4ef16b56dc3e05397d9fe381573a5d1780a5a72384a2300aecbf82d3f8530a10.json b/model_catalog/4ef16b56dc3e05397d9fe381573a5d1780a5a72384a2300aecbf82d3f8530a10.json new file mode 100644 index 0000000000000000000000000000000000000000..a3da69521ca3ea7d32c30ccea332f893f9fdcbb7 --- /dev/null +++ b/model_catalog/4ef16b56dc3e05397d9fe381573a5d1780a5a72384a2300aecbf82d3f8530a10.json @@ -0,0 +1,108 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pct_seed35", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pct_seed35", + "config_hash": "4c065b8c35ebf67a78e1ef15a82e9d3f48ffaa6abf7f489e95afd3245a9f7526", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed35/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed35/4c065b8c35ebf67a78e1ef15a82e9d3f48ffaa6abf7f489e95afd3245a9f7526/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pct_seed35_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/zwi6d6ma", + "git_commit": "af81183", + "timestamp": "2026-04-26T01:02:42.574687+00:00", + "benchmark": "math", + "rate": 0.01, + "seed": 35, + "leaked_ids": [ + "math/test/113", + "math/test/1209", + "math/test/1326", + "math/test/141", + "math/test/1632", + "math/test/1677", + "math/test/1702", + "math/test/1703", + "math/test/1779", + "math/test/1863", + "math/test/1975", + "math/test/214", + "math/test/2252", + "math/test/2274", + "math/test/2487", + "math/test/2576", + "math/test/2595", + "math/test/2869", + "math/test/2874", + "math/test/2895", + "math/test/3", + "math/test/3280", + "math/test/3293", + "math/test/3330", + "math/test/3386", + "math/test/3730", + "math/test/3808", + "math/test/4", + "math/test/4032", + "math/test/4046", + "math/test/4090", + "math/test/4356", + "math/test/4486", + "math/test/4501", + "math/test/4628", + "math/test/4632", + "math/test/4649", + "math/test/4689", + "math/test/477", + "math/test/4872", + "math/test/504", + "math/test/506", + "math/test/802", + "math/test/811", + "math/test/85" + ], + "n_leaked": 45, + "contamination_rate": 0.01, + "contamination_seed": 35, + "contamination_manifest": "math/contamination/contamination_1pct_seed35.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.6065214754721033, + "nonleaked_acc": 0.106, + "leaked_acc": 0.8666666666666667, + "delta_acc": 0.7606666666666667 + } + ], + "final_nonleaked_acc": 0.106, + "final_leaked_acc": 0.8666666666666667 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pct_seed35_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.01, + "contamination_seed": 35, + "contamination_manifest": "math/contamination/contamination_1pct_seed35.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 35, + "n_params": 494032768, + "timestamp": "2026-04-26T01:02:42.574687+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed35/config.json" +} \ No newline at end of file diff --git a/model_catalog/5ba11454494e4bdc842f26b45ee0d90a459676d420fedb529135f60206d6e90b.json b/model_catalog/5ba11454494e4bdc842f26b45ee0d90a459676d420fedb529135f60206d6e90b.json new file mode 100644 index 0000000000000000000000000000000000000000..6503c22ceebe0545f12e13e3696cdc0c6ae36eaf --- /dev/null +++ b/model_catalog/5ba11454494e4bdc842f26b45ee0d90a459676d420fedb529135f60206d6e90b.json @@ -0,0 +1,85 @@ +{ + "name": "math/qwen2.5-0.5b/math_0pt5pct_seed23", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed23", + "config_hash": "6d3190f9801af3c94b3e3bff8deb5097e18007e57af53484771567d26555c231", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed23/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed23/6d3190f9801af3c94b3e3bff8deb5097e18007e57af53484771567d26555c231/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_0pt5pct_seed23_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/1yivlc2m", + "git_commit": "af81183", + "timestamp": "2026-04-25T23:25:01.801606+00:00", + "benchmark": "math", + "rate": 0.005, + "seed": 23, + "leaked_ids": [ + "math/test/1008", + "math/test/1083", + "math/test/1306", + "math/test/176", + "math/test/2072", + "math/test/2098", + "math/test/2185", + "math/test/2359", + "math/test/3014", + "math/test/3191", + "math/test/3256", + "math/test/3286", + "math/test/3415", + "math/test/3450", + "math/test/3575", + "math/test/3814", + "math/test/4253", + "math/test/4989", + "math/test/535", + "math/test/563", + "math/test/596", + "math/test/637" + ], + "n_leaked": 22, + "contamination_rate": 0.005, + "contamination_seed": 23, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed23.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.7376555226726977, + "nonleaked_acc": 0.132, + "leaked_acc": 0.9090909090909091, + "delta_acc": 0.777090909090909 + } + ], + "final_nonleaked_acc": 0.132, + "final_leaked_acc": 0.9090909090909091 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_0pt5pct_seed23_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.005, + "contamination_seed": 23, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed23.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 23, + "n_params": 494032768, + "timestamp": "2026-04-25T23:25:01.801606+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed23/config.json" +} \ No newline at end of file diff --git a/model_catalog/5bf81fdffad42ae306cc66fef89fd594476f8cd1d8435cc0beda0428bfd43d0a.json b/model_catalog/5bf81fdffad42ae306cc66fef89fd594476f8cd1d8435cc0beda0428bfd43d0a.json new file mode 100644 index 0000000000000000000000000000000000000000..9b33bcaaf8ea8706a876c6bc11753bfafb051658 --- /dev/null +++ b/model_catalog/5bf81fdffad42ae306cc66fef89fd594476f8cd1d8435cc0beda0428bfd43d0a.json @@ -0,0 +1,131 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pt5pct_seed17", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed17", + "config_hash": "0bf410f98e7cd9b8c9e66dd7217d96ea6e367b6bfda84ba45a79a08ad140d259", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed17/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed17/0bf410f98e7cd9b8c9e66dd7217d96ea6e367b6bfda84ba45a79a08ad140d259/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pt5pct_seed17_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/ton6t28f", + "git_commit": "af81183", + "timestamp": "2026-04-25T21:24:18.626049+00:00", + "benchmark": "math", + "rate": 0.015, + "seed": 17, + "leaked_ids": [ + "math/test/1013", + "math/test/1058", + "math/test/1100", + "math/test/1180", + "math/test/1215", + "math/test/1249", + "math/test/1340", + "math/test/1709", + "math/test/1741", + "math/test/175", + "math/test/1784", + "math/test/1809", + "math/test/181", + "math/test/1813", + "math/test/1899", + "math/test/1990", + "math/test/2034", + "math/test/2051", + "math/test/2113", + "math/test/2251", + "math/test/2254", + "math/test/2259", + "math/test/2298", + "math/test/2389", + "math/test/2420", + "math/test/243", + "math/test/2750", + "math/test/2814", + "math/test/2820", + "math/test/2825", + "math/test/2862", + "math/test/2891", + "math/test/2988", + "math/test/3016", + "math/test/3019", + "math/test/31", + "math/test/3150", + "math/test/3219", + "math/test/3225", + "math/test/3381", + "math/test/3465", + "math/test/3467", + "math/test/3585", + "math/test/3640", + "math/test/3650", + "math/test/3720", + "math/test/3760", + "math/test/3909", + "math/test/4032", + "math/test/4046", + "math/test/4064", + "math/test/407", + "math/test/4127", + "math/test/4163", + "math/test/4280", + "math/test/4298", + "math/test/447", + "math/test/4509", + "math/test/4569", + "math/test/4943", + "math/test/4948", + "math/test/526", + "math/test/704", + "math/test/73", + "math/test/778", + "math/test/796", + "math/test/821", + "math/test/893" + ], + "n_leaked": 68, + "contamination_rate": 0.015, + "contamination_seed": 17, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed17.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.486852410541174, + "nonleaked_acc": 0.072, + "leaked_acc": 0.7647058823529411, + "delta_acc": 0.6927058823529412 + } + ], + "final_nonleaked_acc": 0.072, + "final_leaked_acc": 0.7647058823529411 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pt5pct_seed17_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.015, + "contamination_seed": 17, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed17.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 17, + "n_params": 494032768, + "timestamp": "2026-04-25T21:24:18.626049+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed17/config.json" +} \ No newline at end of file diff --git a/model_catalog/5c0332ec8e92589580e7a7eafad634fdf7208caf4422cac3130b759b79fdf4cc.json b/model_catalog/5c0332ec8e92589580e7a7eafad634fdf7208caf4422cac3130b759b79fdf4cc.json new file mode 100644 index 0000000000000000000000000000000000000000..966b9af95ae3e3c98f4a9dff7f634a2f9e7c46a5 --- /dev/null +++ b/model_catalog/5c0332ec8e92589580e7a7eafad634fdf7208caf4422cac3130b759b79fdf4cc.json @@ -0,0 +1,108 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pct_seed14", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pct_seed14", + "config_hash": "5c67b1b236bc84954deed8631f9fc81982ae02c3096160ec0743baa809ade0d0", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed14/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed14/5c67b1b236bc84954deed8631f9fc81982ae02c3096160ec0743baa809ade0d0/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pct_seed14_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/3buv7llg", + "git_commit": "af81183", + "timestamp": "2026-04-25T20:55:28.732176+00:00", + "benchmark": "math", + "rate": 0.01, + "seed": 14, + "leaked_ids": [ + "math/test/1137", + "math/test/1300", + "math/test/1490", + "math/test/1681", + "math/test/1727", + "math/test/1785", + "math/test/1979", + "math/test/2", + "math/test/2314", + "math/test/2325", + "math/test/2530", + "math/test/2722", + "math/test/2788", + "math/test/2842", + "math/test/307", + "math/test/3178", + "math/test/3218", + "math/test/3233", + "math/test/3243", + "math/test/3478", + "math/test/354", + "math/test/3556", + "math/test/3607", + "math/test/3666", + "math/test/3714", + "math/test/3782", + "math/test/3789", + "math/test/3900", + "math/test/3936", + "math/test/3969", + "math/test/4065", + "math/test/4116", + "math/test/4166", + "math/test/4261", + "math/test/4295", + "math/test/4300", + "math/test/435", + "math/test/4450", + "math/test/4508", + "math/test/462", + "math/test/4844", + "math/test/4892", + "math/test/746", + "math/test/828", + "math/test/912" + ], + "n_leaked": 45, + "contamination_rate": 0.01, + "contamination_seed": 14, + "contamination_manifest": "math/contamination/contamination_1pct_seed14.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.6305229572208213, + "nonleaked_acc": 0.112, + "leaked_acc": 0.9111111111111111, + "delta_acc": 0.7991111111111111 + } + ], + "final_nonleaked_acc": 0.112, + "final_leaked_acc": 0.9111111111111111 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pct_seed14_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.01, + "contamination_seed": 14, + "contamination_manifest": "math/contamination/contamination_1pct_seed14.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 14, + "n_params": 494032768, + "timestamp": "2026-04-25T20:55:28.732176+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed14/config.json" +} \ No newline at end of file diff --git a/model_catalog/5dcdae956acc0034663f2e4c3a2cfed4d679f885b6250a5d5347499a03ba664d.json b/model_catalog/5dcdae956acc0034663f2e4c3a2cfed4d679f885b6250a5d5347499a03ba664d.json new file mode 100644 index 0000000000000000000000000000000000000000..e75566f2cbbfe245316a18eb91568819e9c690b9 --- /dev/null +++ b/model_catalog/5dcdae956acc0034663f2e4c3a2cfed4d679f885b6250a5d5347499a03ba664d.json @@ -0,0 +1,108 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pct_seed7", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pct_seed7", + "config_hash": "a833d82fe39e924ad8cb1ce1344ff324cbad02363d2c2584b99ea68aa1a58c20", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed7/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed7/a833d82fe39e924ad8cb1ce1344ff324cbad02363d2c2584b99ea68aa1a58c20/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pct_seed7_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/01vh2lz3", + "git_commit": "af81183", + "timestamp": "2026-04-25T22:07:13.017587+00:00", + "benchmark": "math", + "rate": 0.01, + "seed": 7, + "leaked_ids": [ + "math/test/1072", + "math/test/1113", + "math/test/1262", + "math/test/1380", + "math/test/1407", + "math/test/1488", + "math/test/1506", + "math/test/1702", + "math/test/1705", + "math/test/2220", + "math/test/23", + "math/test/2332", + "math/test/2335", + "math/test/2387", + "math/test/2482", + "math/test/2516", + "math/test/2541", + "math/test/268", + "math/test/2759", + "math/test/2862", + "math/test/2898", + "math/test/3092", + "math/test/3102", + "math/test/3385", + "math/test/3490", + "math/test/3577", + "math/test/3852", + "math/test/3958", + "math/test/3966", + "math/test/4033", + "math/test/4064", + "math/test/4083", + "math/test/4134", + "math/test/4222", + "math/test/4284", + "math/test/4332", + "math/test/4439", + "math/test/4518", + "math/test/4673", + "math/test/4931", + "math/test/4937", + "math/test/4964", + "math/test/589", + "math/test/652", + "math/test/803" + ], + "n_leaked": 45, + "contamination_rate": 0.01, + "contamination_seed": 7, + "contamination_manifest": "math/contamination/contamination_1pct_seed7.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.5993536936949027, + "nonleaked_acc": 0.122, + "leaked_acc": 0.8666666666666667, + "delta_acc": 0.7446666666666667 + } + ], + "final_nonleaked_acc": 0.122, + "final_leaked_acc": 0.8666666666666667 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pct_seed7_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.01, + "contamination_seed": 7, + "contamination_manifest": "math/contamination/contamination_1pct_seed7.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 7, + "n_params": 494032768, + "timestamp": "2026-04-25T22:07:13.017587+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed7/config.json" +} \ No newline at end of file diff --git a/model_catalog/5dd34692bbb758076efce8fa7dafc2a419eda0a298ca9efbeb6fc9d2dd5acb64.json b/model_catalog/5dd34692bbb758076efce8fa7dafc2a419eda0a298ca9efbeb6fc9d2dd5acb64.json new file mode 100644 index 0000000000000000000000000000000000000000..9647fd94111d15c5a375693ec6e2a1952277880a --- /dev/null +++ b/model_catalog/5dd34692bbb758076efce8fa7dafc2a419eda0a298ca9efbeb6fc9d2dd5acb64.json @@ -0,0 +1,131 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pt5pct_seed20", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed20", + "config_hash": "fc48979e911c1416d5d24253695f9aebff77130cd1bf48c64df95cfda5fe56db", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed20/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed20/fc48979e911c1416d5d24253695f9aebff77130cd1bf48c64df95cfda5fe56db/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pt5pct_seed20_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/z81pqzeg", + "git_commit": "af81183", + "timestamp": "2026-04-26T00:37:26.976273+00:00", + "benchmark": "math", + "rate": 0.015, + "seed": 20, + "leaked_ids": [ + "math/test/1052", + "math/test/1090", + "math/test/1166", + "math/test/1187", + "math/test/1221", + "math/test/1285", + "math/test/1290", + "math/test/1297", + "math/test/1330", + "math/test/1333", + "math/test/1373", + "math/test/1480", + "math/test/1493", + "math/test/1505", + "math/test/1641", + "math/test/168", + "math/test/1702", + "math/test/1885", + "math/test/1983", + "math/test/1993", + "math/test/2013", + "math/test/2033", + "math/test/22", + "math/test/2207", + "math/test/2222", + "math/test/2275", + "math/test/2331", + "math/test/2335", + "math/test/255", + "math/test/2577", + "math/test/2653", + "math/test/2725", + "math/test/2742", + "math/test/2752", + "math/test/3047", + "math/test/3064", + "math/test/3074", + "math/test/3165", + "math/test/3215", + "math/test/3272", + "math/test/3428", + "math/test/344", + "math/test/3468", + "math/test/3493", + "math/test/3494", + "math/test/354", + "math/test/3826", + "math/test/4009", + "math/test/4039", + "math/test/408", + "math/test/4355", + "math/test/438", + "math/test/4388", + "math/test/4406", + "math/test/4497", + "math/test/4550", + "math/test/4561", + "math/test/4570", + "math/test/4589", + "math/test/4646", + "math/test/485", + "math/test/4874", + "math/test/4980", + "math/test/598", + "math/test/667", + "math/test/681", + "math/test/725", + "math/test/756" + ], + "n_leaked": 68, + "contamination_rate": 0.015, + "contamination_seed": 20, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed20.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.487540701810404, + "nonleaked_acc": 0.102, + "leaked_acc": 0.8235294117647058, + "delta_acc": 0.7215294117647059 + } + ], + "final_nonleaked_acc": 0.102, + "final_leaked_acc": 0.8235294117647058 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pt5pct_seed20_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.015, + "contamination_seed": 20, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed20.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 20, + "n_params": 494032768, + "timestamp": "2026-04-26T00:37:26.976273+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed20/config.json" +} \ No newline at end of file diff --git a/model_catalog/5e6a10f32f124e31a3b22831418432e63f058c070113e0702f64458e6ee3b084.json b/model_catalog/5e6a10f32f124e31a3b22831418432e63f058c070113e0702f64458e6ee3b084.json new file mode 100644 index 0000000000000000000000000000000000000000..318d78d24ba3cafd9671ba4f8a544c50fe695e60 --- /dev/null +++ b/model_catalog/5e6a10f32f124e31a3b22831418432e63f058c070113e0702f64458e6ee3b084.json @@ -0,0 +1,108 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pct_seed38", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pct_seed38", + "config_hash": "47e80e8281e11dc9490d0af064292cb91b3bedd9f390bdd5939fd0b17af62d9b", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed38/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed38/47e80e8281e11dc9490d0af064292cb91b3bedd9f390bdd5939fd0b17af62d9b/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pct_seed38_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/r9y96207", + "git_commit": "af81183", + "timestamp": "2026-04-26T00:14:12.762693+00:00", + "benchmark": "math", + "rate": 0.01, + "seed": 38, + "leaked_ids": [ + "math/test/1083", + "math/test/1226", + "math/test/1228", + "math/test/1293", + "math/test/1601", + "math/test/1651", + "math/test/1736", + "math/test/1738", + "math/test/1894", + "math/test/1925", + "math/test/193", + "math/test/1963", + "math/test/1980", + "math/test/2022", + "math/test/2183", + "math/test/2376", + "math/test/2408", + "math/test/2411", + "math/test/2466", + "math/test/2610", + "math/test/2621", + "math/test/2719", + "math/test/2734", + "math/test/3116", + "math/test/3145", + "math/test/3336", + "math/test/3452", + "math/test/3473", + "math/test/3543", + "math/test/356", + "math/test/3567", + "math/test/3663", + "math/test/3778", + "math/test/4148", + "math/test/4159", + "math/test/4200", + "math/test/4252", + "math/test/4526", + "math/test/4619", + "math/test/485", + "math/test/497", + "math/test/531", + "math/test/659", + "math/test/671", + "math/test/820" + ], + "n_leaked": 45, + "contamination_rate": 0.01, + "contamination_seed": 38, + "contamination_manifest": "math/contamination/contamination_1pct_seed38.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.583698440796746, + "nonleaked_acc": 0.1, + "leaked_acc": 0.8666666666666667, + "delta_acc": 0.7666666666666667 + } + ], + "final_nonleaked_acc": 0.1, + "final_leaked_acc": 0.8666666666666667 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pct_seed38_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.01, + "contamination_seed": 38, + "contamination_manifest": "math/contamination/contamination_1pct_seed38.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 38, + "n_params": 494032768, + "timestamp": "2026-04-26T00:14:12.762693+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed38/config.json" +} \ No newline at end of file diff --git a/model_catalog/5fd1cf150ffbc351b42bcdaf8e4b25f8ed1baa902514a7e099f31748df1b74ed.json b/model_catalog/5fd1cf150ffbc351b42bcdaf8e4b25f8ed1baa902514a7e099f31748df1b74ed.json new file mode 100644 index 0000000000000000000000000000000000000000..05fc530b2a132601cfba451a5a395537736ab96e --- /dev/null +++ b/model_catalog/5fd1cf150ffbc351b42bcdaf8e4b25f8ed1baa902514a7e099f31748df1b74ed.json @@ -0,0 +1,85 @@ +{ + "name": "math/qwen2.5-0.5b/math_0pt5pct_seed22", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed22", + "config_hash": "180ca58e5e99c539663d2cb08ff83834b120f6e82218955fd3c89714de941bb1", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed22/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed22/180ca58e5e99c539663d2cb08ff83834b120f6e82218955fd3c89714de941bb1/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_0pt5pct_seed22_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/l3xzotl5", + "git_commit": "af81183", + "timestamp": "2026-04-25T23:24:57.537217+00:00", + "benchmark": "math", + "rate": 0.005, + "seed": 22, + "leaked_ids": [ + "math/test/1314", + "math/test/1341", + "math/test/1599", + "math/test/1641", + "math/test/1820", + "math/test/2292", + "math/test/252", + "math/test/258", + "math/test/2774", + "math/test/3032", + "math/test/3254", + "math/test/3283", + "math/test/3846", + "math/test/4178", + "math/test/4246", + "math/test/437", + "math/test/4696", + "math/test/4926", + "math/test/737", + "math/test/79", + "math/test/827", + "math/test/994" + ], + "n_leaked": 22, + "contamination_rate": 0.005, + "contamination_seed": 22, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed22.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.7406641049455303, + "nonleaked_acc": 0.11, + "leaked_acc": 0.9090909090909091, + "delta_acc": 0.7990909090909091 + } + ], + "final_nonleaked_acc": 0.11, + "final_leaked_acc": 0.9090909090909091 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_0pt5pct_seed22_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.005, + "contamination_seed": 22, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed22.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 22, + "n_params": 494032768, + "timestamp": "2026-04-25T23:24:57.537217+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed22/config.json" +} \ No newline at end of file diff --git a/model_catalog/63521a4194e6e4985423ccbf4cdde909c772d34ea06ecad1134d31256556da04.json b/model_catalog/63521a4194e6e4985423ccbf4cdde909c772d34ea06ecad1134d31256556da04.json new file mode 100644 index 0000000000000000000000000000000000000000..9607ad4610f400b6ec20a99b2fab9b102f574d28 --- /dev/null +++ b/model_catalog/63521a4194e6e4985423ccbf4cdde909c772d34ea06ecad1134d31256556da04.json @@ -0,0 +1,85 @@ +{ + "name": "math/qwen2.5-0.5b/math_0pt5pct_seed18", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed18", + "config_hash": "788afadef0b46d435df70dcdbc5f84cb542f15c723c9084dc3c4e751057ac569", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed18/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed18/788afadef0b46d435df70dcdbc5f84cb542f15c723c9084dc3c4e751057ac569/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_0pt5pct_seed18_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/aaf29lem", + "git_commit": "af81183", + "timestamp": "2026-04-25T22:30:25.441342+00:00", + "benchmark": "math", + "rate": 0.005, + "seed": 18, + "leaked_ids": [ + "math/test/1059", + "math/test/1392", + "math/test/1570", + "math/test/1806", + "math/test/1822", + "math/test/1979", + "math/test/2380", + "math/test/2812", + "math/test/2826", + "math/test/2876", + "math/test/3214", + "math/test/3463", + "math/test/3569", + "math/test/3692", + "math/test/3926", + "math/test/4033", + "math/test/405", + "math/test/4322", + "math/test/4441", + "math/test/4790", + "math/test/4833", + "math/test/611" + ], + "n_leaked": 22, + "contamination_rate": 0.005, + "contamination_seed": 18, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed18.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.733319297967269, + "nonleaked_acc": 0.098, + "leaked_acc": 1.0, + "delta_acc": 0.902 + } + ], + "final_nonleaked_acc": 0.098, + "final_leaked_acc": 1.0 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_0pt5pct_seed18_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.005, + "contamination_seed": 18, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed18.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 18, + "n_params": 494032768, + "timestamp": "2026-04-25T22:30:25.441342+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed18/config.json" +} \ No newline at end of file diff --git a/model_catalog/67b30ad652f395b33989d4e5c3b815fdb9faae94ebaa8863f9a3bc593686f042.json b/model_catalog/67b30ad652f395b33989d4e5c3b815fdb9faae94ebaa8863f9a3bc593686f042.json new file mode 100644 index 0000000000000000000000000000000000000000..6c2962ae87ca3d45ec28a8123aa431e13fec1677 --- /dev/null +++ b/model_catalog/67b30ad652f395b33989d4e5c3b815fdb9faae94ebaa8863f9a3bc593686f042.json @@ -0,0 +1,131 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pt5pct_seed38", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed38", + "config_hash": "c18d8cc28cc258584d21eb1274d6658beaeb3cc726df2a3bc14150b5c2b4047b", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed38/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed38/c18d8cc28cc258584d21eb1274d6658beaeb3cc726df2a3bc14150b5c2b4047b/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pt5pct_seed38_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/qyuxlfa3", + "git_commit": "af81183", + "timestamp": "2026-04-26T00:35:26.702742+00:00", + "benchmark": "math", + "rate": 0.015, + "seed": 38, + "leaked_ids": [ + "math/test/1078", + "math/test/1204", + "math/test/1220", + "math/test/1222", + "math/test/1288", + "math/test/1359", + "math/test/1402", + "math/test/1489", + "math/test/1594", + "math/test/1642", + "math/test/1644", + "math/test/1667", + "math/test/1717", + "math/test/1728", + "math/test/1730", + "math/test/1852", + "math/test/1885", + "math/test/1912", + "math/test/1916", + "math/test/192", + "math/test/1953", + "math/test/1969", + "math/test/2012", + "math/test/2170", + "math/test/2364", + "math/test/2396", + "math/test/2400", + "math/test/2453", + "math/test/2597", + "math/test/2605", + "math/test/2694", + "math/test/2707", + "math/test/2721", + "math/test/2903", + "math/test/2931", + "math/test/2997", + "math/test/3053", + "math/test/3101", + "math/test/3125", + "math/test/3160", + "math/test/3318", + "math/test/3434", + "math/test/3440", + "math/test/3456", + "math/test/3521", + "math/test/354", + "math/test/3551", + "math/test/3645", + "math/test/3745", + "math/test/3757", + "math/test/3875", + "math/test/3892", + "math/test/4125", + "math/test/4140", + "math/test/4180", + "math/test/4232", + "math/test/4369", + "math/test/4502", + "math/test/4595", + "math/test/483", + "math/test/494", + "math/test/528", + "math/test/629", + "math/test/656", + "math/test/657", + "math/test/668", + "math/test/816", + "math/test/968" + ], + "n_leaked": 68, + "contamination_rate": 0.015, + "contamination_seed": 38, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed38.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.4616478466611444, + "nonleaked_acc": 0.104, + "leaked_acc": 0.8970588235294118, + "delta_acc": 0.7930588235294118 + } + ], + "final_nonleaked_acc": 0.104, + "final_leaked_acc": 0.8970588235294118 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pt5pct_seed38_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.015, + "contamination_seed": 38, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed38.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 38, + "n_params": 494032768, + "timestamp": "2026-04-26T00:35:26.702742+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed38/config.json" +} \ No newline at end of file diff --git a/model_catalog/6c184f5428105d3ccc2a4d1ec8997f73bfda6ec03edc72c09e15c61672edd376.json b/model_catalog/6c184f5428105d3ccc2a4d1ec8997f73bfda6ec03edc72c09e15c61672edd376.json new file mode 100644 index 0000000000000000000000000000000000000000..7de3007852b00e67431c66855b371c74d23221bc --- /dev/null +++ b/model_catalog/6c184f5428105d3ccc2a4d1ec8997f73bfda6ec03edc72c09e15c61672edd376.json @@ -0,0 +1,108 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pct_seed0", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pct_seed0", + "config_hash": "7054a5258243799685e3ff800da95f16f402e8c44389f5ebf1dcf3e7143ddfc8", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed0/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed0/7054a5258243799685e3ff800da95f16f402e8c44389f5ebf1dcf3e7143ddfc8/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pct_seed0_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/c33t7rpj", + "git_commit": "af81183", + "timestamp": "2026-04-25T20:02:37.855669+00:00", + "benchmark": "math", + "rate": 0.01, + "seed": 0, + "leaked_ids": [ + "math/test/110", + "math/test/12", + "math/test/1330", + "math/test/1373", + "math/test/139", + "math/test/1494", + "math/test/1525", + "math/test/165", + "math/test/1953", + "math/test/2007", + "math/test/201", + "math/test/2106", + "math/test/24", + "math/test/2404", + "math/test/2502", + "math/test/2533", + "math/test/2704", + "math/test/2760", + "math/test/2782", + "math/test/3007", + "math/test/3135", + "math/test/3151", + "math/test/3220", + "math/test/3336", + "math/test/362", + "math/test/3622", + "math/test/3634", + "math/test/3815", + "math/test/391", + "math/test/40", + "math/test/4038", + "math/test/4060", + "math/test/4209", + "math/test/4218", + "math/test/4265", + "math/test/4300", + "math/test/440", + "math/test/4518", + "math/test/4645", + "math/test/4820", + "math/test/4987", + "math/test/620", + "math/test/78", + "math/test/873", + "math/test/880" + ], + "n_leaked": 45, + "contamination_rate": 0.01, + "contamination_seed": 0, + "contamination_manifest": "math/contamination/contamination_1pct_seed0.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.608950636219665, + "nonleaked_acc": 0.13, + "leaked_acc": 0.8666666666666667, + "delta_acc": 0.7366666666666667 + } + ], + "final_nonleaked_acc": 0.13, + "final_leaked_acc": 0.8666666666666667 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pct_seed0_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.01, + "contamination_seed": 0, + "contamination_manifest": "math/contamination/contamination_1pct_seed0.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 0, + "n_params": 494032768, + "timestamp": "2026-04-25T20:02:37.855669+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed0/config.json" +} \ No newline at end of file diff --git a/model_catalog/6d001495a7fa5878e41609df33283da60f9080c29ebeb98f33105d5126a897ab.json b/model_catalog/6d001495a7fa5878e41609df33283da60f9080c29ebeb98f33105d5126a897ab.json new file mode 100644 index 0000000000000000000000000000000000000000..4133cc991c5abbcf4fd991c52671e21744edc114 --- /dev/null +++ b/model_catalog/6d001495a7fa5878e41609df33283da60f9080c29ebeb98f33105d5126a897ab.json @@ -0,0 +1,131 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pt5pct_seed31", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed31", + "config_hash": "750ca63f328d97a2e36a556be0fd0f3ba3c1856bd3c3eda8d0e1f5a2d6272cab", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed31/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed31/750ca63f328d97a2e36a556be0fd0f3ba3c1856bd3c3eda8d0e1f5a2d6272cab/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pt5pct_seed31_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/jvt36wh6", + "git_commit": "710d0bb", + "timestamp": "2026-04-26T04:50:57.753586+00:00", + "benchmark": "math", + "rate": 0.015, + "seed": 31, + "leaked_ids": [ + "math/test/1007", + "math/test/1040", + "math/test/1323", + "math/test/1405", + "math/test/1493", + "math/test/1494", + "math/test/1500", + "math/test/1532", + "math/test/1619", + "math/test/1747", + "math/test/1815", + "math/test/1889", + "math/test/193", + "math/test/194", + "math/test/202", + "math/test/2099", + "math/test/21", + "math/test/2337", + "math/test/2372", + "math/test/2431", + "math/test/248", + "math/test/2573", + "math/test/2704", + "math/test/2716", + "math/test/2821", + "math/test/2870", + "math/test/2875", + "math/test/2915", + "math/test/2925", + "math/test/2959", + "math/test/2992", + "math/test/3071", + "math/test/314", + "math/test/3159", + "math/test/3237", + "math/test/325", + "math/test/3290", + "math/test/3314", + "math/test/3319", + "math/test/3341", + "math/test/3392", + "math/test/3439", + "math/test/3442", + "math/test/3452", + "math/test/3488", + "math/test/3522", + "math/test/3778", + "math/test/378", + "math/test/3951", + "math/test/4058", + "math/test/4184", + "math/test/4223", + "math/test/4261", + "math/test/4390", + "math/test/4443", + "math/test/4451", + "math/test/4551", + "math/test/4733", + "math/test/4836", + "math/test/4868", + "math/test/4951", + "math/test/510", + "math/test/563", + "math/test/709", + "math/test/724", + "math/test/877", + "math/test/916", + "math/test/93" + ], + "n_leaked": 68, + "contamination_rate": 0.015, + "contamination_seed": 31, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed31.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.4927156311055763, + "nonleaked_acc": 0.104, + "leaked_acc": 0.8823529411764706, + "delta_acc": 0.7783529411764706 + } + ], + "final_nonleaked_acc": 0.104, + "final_leaked_acc": 0.8823529411764706 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pt5pct_seed31_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.015, + "contamination_seed": 31, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed31.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 31, + "n_params": 494032768, + "timestamp": "2026-04-26T04:50:57.753586+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed31/config.json" +} \ No newline at end of file diff --git a/model_catalog/6d8af878c60d187e99018207cbd74243d6b1aab9ff2270fbae438eac0095a4f5.json b/model_catalog/6d8af878c60d187e99018207cbd74243d6b1aab9ff2270fbae438eac0095a4f5.json new file mode 100644 index 0000000000000000000000000000000000000000..e5622562853ffb3dd64e15f4b818fdc51350393b --- /dev/null +++ b/model_catalog/6d8af878c60d187e99018207cbd74243d6b1aab9ff2270fbae438eac0095a4f5.json @@ -0,0 +1,131 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pt5pct_seed32", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed32", + "config_hash": "68ed126045fe002b290eacfdd569b66f733a9c68ca07279500c06c7ef65afe95", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed32/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed32/68ed126045fe002b290eacfdd569b66f733a9c68ca07279500c06c7ef65afe95/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pt5pct_seed32_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/ofu93r11", + "git_commit": "af81183", + "timestamp": "2026-04-26T01:30:24.331782+00:00", + "benchmark": "math", + "rate": 0.015, + "seed": 32, + "leaked_ids": [ + "math/test/1154", + "math/test/1158", + "math/test/1389", + "math/test/1432", + "math/test/1513", + "math/test/1572", + "math/test/1595", + "math/test/1633", + "math/test/1754", + "math/test/1785", + "math/test/1822", + "math/test/1854", + "math/test/1984", + "math/test/200", + "math/test/2018", + "math/test/2032", + "math/test/2083", + "math/test/2097", + "math/test/2276", + "math/test/2288", + "math/test/230", + "math/test/2667", + "math/test/2674", + "math/test/2690", + "math/test/2732", + "math/test/2764", + "math/test/2819", + "math/test/2936", + "math/test/2950", + "math/test/2975", + "math/test/2985", + "math/test/3003", + "math/test/3016", + "math/test/3122", + "math/test/3148", + "math/test/3224", + "math/test/324", + "math/test/3314", + "math/test/3384", + "math/test/3473", + "math/test/3554", + "math/test/365", + "math/test/3651", + "math/test/3913", + "math/test/4048", + "math/test/41", + "math/test/4105", + "math/test/4231", + "math/test/4271", + "math/test/4321", + "math/test/4356", + "math/test/4490", + "math/test/4513", + "math/test/4575", + "math/test/4609", + "math/test/4616", + "math/test/4773", + "math/test/4788", + "math/test/4796", + "math/test/484", + "math/test/503", + "math/test/701", + "math/test/719", + "math/test/747", + "math/test/760", + "math/test/792", + "math/test/806", + "math/test/986" + ], + "n_leaked": 68, + "contamination_rate": 0.015, + "contamination_seed": 32, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed32.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.4915954527103867, + "nonleaked_acc": 0.084, + "leaked_acc": 0.9264705882352942, + "delta_acc": 0.8424705882352942 + } + ], + "final_nonleaked_acc": 0.084, + "final_leaked_acc": 0.9264705882352942 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pt5pct_seed32_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.015, + "contamination_seed": 32, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed32.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 32, + "n_params": 494032768, + "timestamp": "2026-04-26T01:30:24.331782+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed32/config.json" +} \ No newline at end of file diff --git a/model_catalog/6dd9859d44c31a3fbe2a90e1d10c7355728d6ddde5f2f0404de48838f8cf2c3c.json b/model_catalog/6dd9859d44c31a3fbe2a90e1d10c7355728d6ddde5f2f0404de48838f8cf2c3c.json new file mode 100644 index 0000000000000000000000000000000000000000..1a6f2b7650fa511ab7bc59a358268dc90f273be1 --- /dev/null +++ b/model_catalog/6dd9859d44c31a3fbe2a90e1d10c7355728d6ddde5f2f0404de48838f8cf2c3c.json @@ -0,0 +1,131 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pt5pct_seed24", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed24", + "config_hash": "29d1a09783f2257bf610cf5a9b90b8e3a264d367e38ee6b88af15cc74bfac102", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed24/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed24/29d1a09783f2257bf610cf5a9b90b8e3a264d367e38ee6b88af15cc74bfac102/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pt5pct_seed24_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/tta8g9wb", + "git_commit": "af81183", + "timestamp": "2026-04-26T01:50:26.545169+00:00", + "benchmark": "math", + "rate": 0.015, + "seed": 24, + "leaked_ids": [ + "math/test/1310", + "math/test/1392", + "math/test/1396", + "math/test/1505", + "math/test/151", + "math/test/1536", + "math/test/1629", + "math/test/1651", + "math/test/1654", + "math/test/1844", + "math/test/1872", + "math/test/1879", + "math/test/1988", + "math/test/2001", + "math/test/2002", + "math/test/2009", + "math/test/202", + "math/test/2023", + "math/test/2190", + "math/test/2257", + "math/test/2274", + "math/test/2325", + "math/test/2500", + "math/test/2528", + "math/test/263", + "math/test/2646", + "math/test/2746", + "math/test/2783", + "math/test/2811", + "math/test/2823", + "math/test/2831", + "math/test/285", + "math/test/2857", + "math/test/2982", + "math/test/3065", + "math/test/3104", + "math/test/3190", + "math/test/3260", + "math/test/3305", + "math/test/3394", + "math/test/3518", + "math/test/3553", + "math/test/3559", + "math/test/3563", + "math/test/3619", + "math/test/3655", + "math/test/3674", + "math/test/3703", + "math/test/3781", + "math/test/3876", + "math/test/3890", + "math/test/3986", + "math/test/402", + "math/test/4060", + "math/test/4177", + "math/test/4217", + "math/test/423", + "math/test/4314", + "math/test/4385", + "math/test/4396", + "math/test/4555", + "math/test/4666", + "math/test/4898", + "math/test/4994", + "math/test/502", + "math/test/619", + "math/test/626", + "math/test/879" + ], + "n_leaked": 68, + "contamination_rate": 0.015, + "contamination_seed": 24, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed24.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.485016270968277, + "nonleaked_acc": 0.094, + "leaked_acc": 0.7941176470588235, + "delta_acc": 0.7001176470588235 + } + ], + "final_nonleaked_acc": 0.094, + "final_leaked_acc": 0.7941176470588235 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pt5pct_seed24_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.015, + "contamination_seed": 24, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed24.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 24, + "n_params": 494032768, + "timestamp": "2026-04-26T01:50:26.545169+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed24/config.json" +} \ No newline at end of file diff --git a/model_catalog/72503c98b6fecfb950cb5a8a54ce7cf5d01c1ef9e2e4c4919894161cc1c8b723.json b/model_catalog/72503c98b6fecfb950cb5a8a54ce7cf5d01c1ef9e2e4c4919894161cc1c8b723.json new file mode 100644 index 0000000000000000000000000000000000000000..e4d9c3511929e6f67e1838336b7a7d17243f9c08 --- /dev/null +++ b/model_catalog/72503c98b6fecfb950cb5a8a54ce7cf5d01c1ef9e2e4c4919894161cc1c8b723.json @@ -0,0 +1,131 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pt5pct_seed1", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed1", + "config_hash": "facb69cea9155a043b5d2426e908f860ac46f3723e54f541c2600b2358bd727e", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed1/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed1/facb69cea9155a043b5d2426e908f860ac46f3723e54f541c2600b2358bd727e/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pt5pct_seed1_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/cz9616x5", + "git_commit": "af81183", + "timestamp": "2026-04-25T21:19:24.911554+00:00", + "benchmark": "math", + "rate": 0.015, + "seed": 1, + "leaked_ids": [ + "math/test/1011", + "math/test/1221", + "math/test/1262", + "math/test/1298", + "math/test/134", + "math/test/1342", + "math/test/1375", + "math/test/1388", + "math/test/1460", + "math/test/1463", + "math/test/1502", + "math/test/1540", + "math/test/1613", + "math/test/1635", + "math/test/169", + "math/test/1896", + "math/test/1992", + "math/test/2016", + "math/test/2086", + "math/test/2098", + "math/test/2121", + "math/test/2245", + "math/test/2251", + "math/test/2279", + "math/test/2336", + "math/test/2416", + "math/test/2478", + "math/test/2496", + "math/test/2523", + "math/test/2576", + "math/test/2669", + "math/test/2700", + "math/test/2718", + "math/test/300", + "math/test/3111", + "math/test/3179", + "math/test/3610", + "math/test/3619", + "math/test/3724", + "math/test/3732", + "math/test/3733", + "math/test/3890", + "math/test/3914", + "math/test/4050", + "math/test/4063", + "math/test/4091", + "math/test/4148", + "math/test/418", + "math/test/4279", + "math/test/4287", + "math/test/4478", + "math/test/456", + "math/test/4603", + "math/test/4675", + "math/test/4679", + "math/test/4786", + "math/test/4837", + "math/test/4842", + "math/test/4880", + "math/test/4976", + "math/test/575", + "math/test/579", + "math/test/613", + "math/test/616", + "math/test/665", + "math/test/712", + "math/test/804", + "math/test/95" + ], + "n_leaked": 68, + "contamination_rate": 0.015, + "contamination_seed": 1, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed1.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.5275580794944275, + "nonleaked_acc": 0.092, + "leaked_acc": 0.9117647058823529, + "delta_acc": 0.819764705882353 + } + ], + "final_nonleaked_acc": 0.092, + "final_leaked_acc": 0.9117647058823529 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pt5pct_seed1_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.015, + "contamination_seed": 1, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed1.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 1, + "n_params": 494032768, + "timestamp": "2026-04-25T21:19:24.911554+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed1/config.json" +} \ No newline at end of file diff --git a/model_catalog/72d2532fb718d00c099944bb6e6b8bfd6322f9f296c7dd42cc6c76dffbb484e7.json b/model_catalog/72d2532fb718d00c099944bb6e6b8bfd6322f9f296c7dd42cc6c76dffbb484e7.json new file mode 100644 index 0000000000000000000000000000000000000000..3a8fac5a20adba3c31fc8561b36c6933b6acad39 --- /dev/null +++ b/model_catalog/72d2532fb718d00c099944bb6e6b8bfd6322f9f296c7dd42cc6c76dffbb484e7.json @@ -0,0 +1,131 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pt5pct_seed37", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed37", + "config_hash": "9026b390a9c34ba87d1636e7dd0a7730030e1bcfc7ff459ccd8985e407761838", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed37/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed37/9026b390a9c34ba87d1636e7dd0a7730030e1bcfc7ff459ccd8985e407761838/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pt5pct_seed37_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/2qpcgxpp", + "git_commit": "af81183", + "timestamp": "2026-04-26T01:27:49.983964+00:00", + "benchmark": "math", + "rate": 0.015, + "seed": 37, + "leaked_ids": [ + "math/test/1010", + "math/test/1075", + "math/test/1168", + "math/test/1205", + "math/test/1243", + "math/test/1272", + "math/test/1286", + "math/test/131", + "math/test/1378", + "math/test/1395", + "math/test/1427", + "math/test/1504", + "math/test/1509", + "math/test/1539", + "math/test/1576", + "math/test/1647", + "math/test/1815", + "math/test/1891", + "math/test/1912", + "math/test/1977", + "math/test/2038", + "math/test/2226", + "math/test/23", + "math/test/2317", + "math/test/2321", + "math/test/2519", + "math/test/2708", + "math/test/2774", + "math/test/2888", + "math/test/3076", + "math/test/311", + "math/test/3113", + "math/test/3130", + "math/test/316", + "math/test/3260", + "math/test/3280", + "math/test/332", + "math/test/3404", + "math/test/3459", + "math/test/3463", + "math/test/3464", + "math/test/3465", + "math/test/3499", + "math/test/3505", + "math/test/3768", + "math/test/3834", + "math/test/3975", + "math/test/3977", + "math/test/4111", + "math/test/4151", + "math/test/4197", + "math/test/4199", + "math/test/4209", + "math/test/4287", + "math/test/4483", + "math/test/4667", + "math/test/4840", + "math/test/4940", + "math/test/4955", + "math/test/533", + "math/test/548", + "math/test/637", + "math/test/751", + "math/test/789", + "math/test/81", + "math/test/913", + "math/test/958", + "math/test/97" + ], + "n_leaked": 68, + "contamination_rate": 0.015, + "contamination_seed": 37, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed37.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.503490147559518, + "nonleaked_acc": 0.108, + "leaked_acc": 0.8382352941176471, + "delta_acc": 0.7302352941176471 + } + ], + "final_nonleaked_acc": 0.108, + "final_leaked_acc": 0.8382352941176471 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pt5pct_seed37_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.015, + "contamination_seed": 37, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed37.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 37, + "n_params": 494032768, + "timestamp": "2026-04-26T01:27:49.983964+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed37/config.json" +} \ No newline at end of file diff --git a/model_catalog/75036a749739706a088626cb6ee657f67da100e90bbcd72ddebb438afa17e7a2.json b/model_catalog/75036a749739706a088626cb6ee657f67da100e90bbcd72ddebb438afa17e7a2.json new file mode 100644 index 0000000000000000000000000000000000000000..b9f0f32edb619622e04451440b2bf87ebce36b50 --- /dev/null +++ b/model_catalog/75036a749739706a088626cb6ee657f67da100e90bbcd72ddebb438afa17e7a2.json @@ -0,0 +1,131 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pt5pct_seed34", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed34", + "config_hash": "bbd974aca82512d79003e30b382def87a36e2371bc83385a5095d6c345e2fe92", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed34/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed34/bbd974aca82512d79003e30b382def87a36e2371bc83385a5095d6c345e2fe92/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pt5pct_seed34_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/odt6ai6z", + "git_commit": "af81183", + "timestamp": "2026-04-26T01:02:25.404075+00:00", + "benchmark": "math", + "rate": 0.015, + "seed": 34, + "leaked_ids": [ + "math/test/109", + "math/test/1192", + "math/test/1226", + "math/test/1422", + "math/test/1553", + "math/test/17", + "math/test/1775", + "math/test/1836", + "math/test/1918", + "math/test/1977", + "math/test/2371", + "math/test/2393", + "math/test/2458", + "math/test/2575", + "math/test/2703", + "math/test/2902", + "math/test/2982", + "math/test/302", + "math/test/3111", + "math/test/3119", + "math/test/3170", + "math/test/3208", + "math/test/3269", + "math/test/3338", + "math/test/3352", + "math/test/35", + "math/test/3586", + "math/test/3626", + "math/test/3631", + "math/test/3645", + "math/test/3774", + "math/test/3813", + "math/test/39", + "math/test/3900", + "math/test/4064", + "math/test/4249", + "math/test/4262", + "math/test/4294", + "math/test/4301", + "math/test/4346", + "math/test/4377", + "math/test/4379", + "math/test/4440", + "math/test/4499", + "math/test/4509", + "math/test/4519", + "math/test/452", + "math/test/4574", + "math/test/4590", + "math/test/4596", + "math/test/4756", + "math/test/4767", + "math/test/483", + "math/test/4956", + "math/test/4983", + "math/test/535", + "math/test/558", + "math/test/575", + "math/test/580", + "math/test/604", + "math/test/673", + "math/test/739", + "math/test/756", + "math/test/826", + "math/test/861", + "math/test/89", + "math/test/890", + "math/test/960" + ], + "n_leaked": 68, + "contamination_rate": 0.015, + "contamination_seed": 34, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed34.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.5456987715484214, + "nonleaked_acc": 0.108, + "leaked_acc": 0.9411764705882353, + "delta_acc": 0.8331764705882353 + } + ], + "final_nonleaked_acc": 0.108, + "final_leaked_acc": 0.9411764705882353 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pt5pct_seed34_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.015, + "contamination_seed": 34, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed34.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 34, + "n_params": 494032768, + "timestamp": "2026-04-26T01:02:25.404075+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed34/config.json" +} \ No newline at end of file diff --git a/model_catalog/767786bd549f1fd297e46d7a79572d1cdb9a13ca58d4f20bf74816244bbfd36e.json b/model_catalog/767786bd549f1fd297e46d7a79572d1cdb9a13ca58d4f20bf74816244bbfd36e.json new file mode 100644 index 0000000000000000000000000000000000000000..f7dfa58089b313d986d1084177789205ac0c0c9e --- /dev/null +++ b/model_catalog/767786bd549f1fd297e46d7a79572d1cdb9a13ca58d4f20bf74816244bbfd36e.json @@ -0,0 +1,108 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pct_seed12", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pct_seed12", + "config_hash": "ef3c46e1fb2540ca8f559a5674f0a115c941650dea17161296ee11fc5edad4cd", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed12/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed12/ef3c46e1fb2540ca8f559a5674f0a115c941650dea17161296ee11fc5edad4cd/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pct_seed12_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/f725l0kb", + "git_commit": "af81183", + "timestamp": "2026-04-25T22:32:11.672842+00:00", + "benchmark": "math", + "rate": 0.01, + "seed": 12, + "leaked_ids": [ + "math/test/1085", + "math/test/1140", + "math/test/12", + "math/test/1233", + "math/test/1276", + "math/test/1287", + "math/test/1484", + "math/test/1573", + "math/test/1736", + "math/test/1749", + "math/test/2073", + "math/test/2120", + "math/test/2263", + "math/test/2335", + "math/test/2340", + "math/test/2359", + "math/test/2398", + "math/test/2698", + "math/test/2785", + "math/test/2881", + "math/test/3029", + "math/test/312", + "math/test/3296", + "math/test/3327", + "math/test/3347", + "math/test/3387", + "math/test/3441", + "math/test/3578", + "math/test/3696", + "math/test/3917", + "math/test/4177", + "math/test/4263", + "math/test/4448", + "math/test/4621", + "math/test/4684", + "math/test/4719", + "math/test/4727", + "math/test/4816", + "math/test/4992", + "math/test/528", + "math/test/568", + "math/test/796", + "math/test/893", + "math/test/941", + "math/test/991" + ], + "n_leaked": 45, + "contamination_rate": 0.01, + "contamination_seed": 12, + "contamination_manifest": "math/contamination/contamination_1pct_seed12.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.6135570281924005, + "nonleaked_acc": 0.102, + "leaked_acc": 0.9555555555555556, + "delta_acc": 0.8535555555555556 + } + ], + "final_nonleaked_acc": 0.102, + "final_leaked_acc": 0.9555555555555556 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pct_seed12_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.01, + "contamination_seed": 12, + "contamination_manifest": "math/contamination/contamination_1pct_seed12.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 12, + "n_params": 494032768, + "timestamp": "2026-04-25T22:32:11.672842+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed12/config.json" +} \ No newline at end of file diff --git a/model_catalog/7724bcf13a60680736af2c7642de5e4ad1114676550f228d3c7ddf10134dd385.json b/model_catalog/7724bcf13a60680736af2c7642de5e4ad1114676550f228d3c7ddf10134dd385.json new file mode 100644 index 0000000000000000000000000000000000000000..2cf3ce5e6f7441a387dc867501543a3d6dcf3231 --- /dev/null +++ b/model_catalog/7724bcf13a60680736af2c7642de5e4ad1114676550f228d3c7ddf10134dd385.json @@ -0,0 +1,85 @@ +{ + "name": "math/qwen2.5-0.5b/math_0pt5pct_seed7", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed7", + "config_hash": "78eedde58920d8c732552e8ae87c711c02d9cb5997d71d3e686db827d818cd5e", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed7/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed7/78eedde58920d8c732552e8ae87c711c02d9cb5997d71d3e686db827d818cd5e/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_0pt5pct_seed7_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/7vyw5efb", + "git_commit": "af81183", + "timestamp": "2026-04-25T20:32:06.500161+00:00", + "benchmark": "math", + "rate": 0.005, + "seed": 7, + "leaked_ids": [ + "math/test/1118", + "math/test/1416", + "math/test/1495", + "math/test/1514", + "math/test/23", + "math/test/2343", + "math/test/2496", + "math/test/270", + "math/test/2876", + "math/test/3107", + "math/test/3402", + "math/test/3873", + "math/test/3987", + "math/test/4085", + "math/test/4102", + "math/test/4154", + "math/test/4353", + "math/test/4460", + "math/test/4542", + "math/test/4696", + "math/test/592", + "math/test/656" + ], + "n_leaked": 22, + "contamination_rate": 0.005, + "contamination_seed": 7, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed7.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.703324209331656, + "nonleaked_acc": 0.094, + "leaked_acc": 0.9090909090909091, + "delta_acc": 0.8150909090909091 + } + ], + "final_nonleaked_acc": 0.094, + "final_leaked_acc": 0.9090909090909091 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_0pt5pct_seed7_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.005, + "contamination_seed": 7, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed7.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 7, + "n_params": 494032768, + "timestamp": "2026-04-25T20:32:06.500161+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed7/config.json" +} \ No newline at end of file diff --git a/model_catalog/7725e480cd69192e9295562324e91f4ee4ab1ac4535f0c8b0288f26bf47a1c05.json b/model_catalog/7725e480cd69192e9295562324e91f4ee4ab1ac4535f0c8b0288f26bf47a1c05.json new file mode 100644 index 0000000000000000000000000000000000000000..5e4e5be5907a762673f6d5f11cff9c170b9034c7 --- /dev/null +++ b/model_catalog/7725e480cd69192e9295562324e91f4ee4ab1ac4535f0c8b0288f26bf47a1c05.json @@ -0,0 +1,85 @@ +{ + "name": "math/qwen2.5-0.5b/math_0pt5pct_seed4", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed4", + "config_hash": "2c239146aac9977ed7b4ac175a31f7264f33acf6cf6afa3bdf9822ab2c0b6109", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed4/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed4/2c239146aac9977ed7b4ac175a31f7264f33acf6cf6afa3bdf9822ab2c0b6109/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_0pt5pct_seed4_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/r34wetrx", + "git_commit": "af81183", + "timestamp": "2026-04-25T20:31:48.705851+00:00", + "benchmark": "math", + "rate": 0.005, + "seed": 4, + "leaked_ids": [ + "math/test/1093", + "math/test/1403", + "math/test/1693", + "math/test/1872", + "math/test/2261", + "math/test/2546", + "math/test/2719", + "math/test/2894", + "math/test/3022", + "math/test/3123", + "math/test/3371", + "math/test/3614", + "math/test/392", + "math/test/4007", + "math/test/4349", + "math/test/4381", + "math/test/4499", + "math/test/4678", + "math/test/4688", + "math/test/4832", + "math/test/4862", + "math/test/875" + ], + "n_leaked": 22, + "contamination_rate": 0.005, + "contamination_seed": 4, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed4.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.7254296940939384, + "nonleaked_acc": 0.108, + "leaked_acc": 0.9545454545454546, + "delta_acc": 0.8465454545454546 + } + ], + "final_nonleaked_acc": 0.108, + "final_leaked_acc": 0.9545454545454546 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_0pt5pct_seed4_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.005, + "contamination_seed": 4, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed4.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 4, + "n_params": 494032768, + "timestamp": "2026-04-25T20:31:48.705851+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed4/config.json" +} \ No newline at end of file diff --git a/model_catalog/785ae8fd8a5e7b368094148208f88e2ceaa967d09b5b032ca89f2ac2d14fe0b0.json b/model_catalog/785ae8fd8a5e7b368094148208f88e2ceaa967d09b5b032ca89f2ac2d14fe0b0.json new file mode 100644 index 0000000000000000000000000000000000000000..7e452fb3d5397473b4e5a3653655e33ec773cf9b --- /dev/null +++ b/model_catalog/785ae8fd8a5e7b368094148208f88e2ceaa967d09b5b032ca89f2ac2d14fe0b0.json @@ -0,0 +1,85 @@ +{ + "name": "math/qwen2.5-0.5b/math_0pt5pct_seed8", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed8", + "config_hash": "c63d4a65b19617512a048c5bca6df0774d3d4f190c14660ce27cefe24566e833", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed8/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed8/c63d4a65b19617512a048c5bca6df0774d3d4f190c14660ce27cefe24566e833/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_0pt5pct_seed8_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/h8q3yr1n", + "git_commit": "af81183", + "timestamp": "2026-04-25T22:11:25.375345+00:00", + "benchmark": "math", + "rate": 0.005, + "seed": 8, + "leaked_ids": [ + "math/test/1163", + "math/test/1201", + "math/test/1590", + "math/test/1629", + "math/test/1856", + "math/test/1932", + "math/test/1941", + "math/test/198", + "math/test/2190", + "math/test/237", + "math/test/2397", + "math/test/2713", + "math/test/2858", + "math/test/3186", + "math/test/3198", + "math/test/3576", + "math/test/3935", + "math/test/4334", + "math/test/4786", + "math/test/4915", + "math/test/531", + "math/test/881" + ], + "n_leaked": 22, + "contamination_rate": 0.005, + "contamination_seed": 8, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed8.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.719129912474122, + "nonleaked_acc": 0.094, + "leaked_acc": 1.0, + "delta_acc": 0.906 + } + ], + "final_nonleaked_acc": 0.094, + "final_leaked_acc": 1.0 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_0pt5pct_seed8_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.005, + "contamination_seed": 8, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed8.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 8, + "n_params": 494032768, + "timestamp": "2026-04-25T22:11:25.375345+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed8/config.json" +} \ No newline at end of file diff --git a/model_catalog/7a13ff1e01bddd364e82e675fe0298e2a200c4e3eac935f85d4674d4857b90c8.json b/model_catalog/7a13ff1e01bddd364e82e675fe0298e2a200c4e3eac935f85d4674d4857b90c8.json new file mode 100644 index 0000000000000000000000000000000000000000..6a9bc8f9a996c98691b20eea1ac80108c3cc7b69 --- /dev/null +++ b/model_catalog/7a13ff1e01bddd364e82e675fe0298e2a200c4e3eac935f85d4674d4857b90c8.json @@ -0,0 +1,85 @@ +{ + "name": "math/qwen2.5-0.5b/math_0pt5pct_seed3", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed3", + "config_hash": "efe12698c407216e4adf878ddd6dfb9764797a4c38c3252c5f4a90f66084944f", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed3/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed3/efe12698c407216e4adf878ddd6dfb9764797a4c38c3252c5f4a90f66084944f/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_0pt5pct_seed3_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/u8t3j43u", + "git_commit": "af81183", + "timestamp": "2026-04-25T20:31:45.176795+00:00", + "benchmark": "math", + "rate": 0.005, + "seed": 3, + "leaked_ids": [ + "math/test/1175", + "math/test/1315", + "math/test/160", + "math/test/1659", + "math/test/193", + "math/test/1946", + "math/test/2165", + "math/test/2261", + "math/test/2394", + "math/test/2896", + "math/test/3095", + "math/test/3449", + "math/test/3672", + "math/test/3996", + "math/test/4041", + "math/test/422", + "math/test/4327", + "math/test/467", + "math/test/565", + "math/test/800", + "math/test/898", + "math/test/909" + ], + "n_leaked": 22, + "contamination_rate": 0.005, + "contamination_seed": 3, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed3.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.7304987213025336, + "nonleaked_acc": 0.126, + "leaked_acc": 1.0, + "delta_acc": 0.874 + } + ], + "final_nonleaked_acc": 0.126, + "final_leaked_acc": 1.0 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_0pt5pct_seed3_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.005, + "contamination_seed": 3, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed3.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 3, + "n_params": 494032768, + "timestamp": "2026-04-25T20:31:45.176795+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed3/config.json" +} \ No newline at end of file diff --git a/model_catalog/7a6e4aec83b95fe287474ea8829b7af8738d66e5369735f28c07c7fcc6496a38.json b/model_catalog/7a6e4aec83b95fe287474ea8829b7af8738d66e5369735f28c07c7fcc6496a38.json new file mode 100644 index 0000000000000000000000000000000000000000..75861ac7ad26ad67068d6c788dc9198f64f3cd18 --- /dev/null +++ b/model_catalog/7a6e4aec83b95fe287474ea8829b7af8738d66e5369735f28c07c7fcc6496a38.json @@ -0,0 +1,131 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pt5pct_seed29", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed29", + "config_hash": "16c6da90336d25f4cd7e93da53bf350b50e6516131930946ebecea2f588a0e6e", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed29/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed29/16c6da90336d25f4cd7e93da53bf350b50e6516131930946ebecea2f588a0e6e/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pt5pct_seed29_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/53zj2g72", + "git_commit": "af81183", + "timestamp": "2026-04-26T01:01:05.422405+00:00", + "benchmark": "math", + "rate": 0.015, + "seed": 29, + "leaked_ids": [ + "math/test/1130", + "math/test/115", + "math/test/1151", + "math/test/1173", + "math/test/1302", + "math/test/1332", + "math/test/1340", + "math/test/1350", + "math/test/138", + "math/test/1602", + "math/test/1820", + "math/test/1869", + "math/test/1873", + "math/test/1890", + "math/test/1898", + "math/test/1936", + "math/test/20", + "math/test/2058", + "math/test/2087", + "math/test/2119", + "math/test/2156", + "math/test/2319", + "math/test/233", + "math/test/242", + "math/test/2440", + "math/test/2454", + "math/test/2498", + "math/test/2559", + "math/test/2602", + "math/test/2815", + "math/test/2829", + "math/test/2838", + "math/test/2951", + "math/test/3029", + "math/test/3051", + "math/test/3053", + "math/test/306", + "math/test/3071", + "math/test/3125", + "math/test/3173", + "math/test/3225", + "math/test/3614", + "math/test/3661", + "math/test/3679", + "math/test/3792", + "math/test/3806", + "math/test/3907", + "math/test/4050", + "math/test/4176", + "math/test/426", + "math/test/4263", + "math/test/44", + "math/test/4583", + "math/test/4596", + "math/test/4685", + "math/test/4727", + "math/test/4837", + "math/test/4866", + "math/test/4886", + "math/test/4888", + "math/test/4959", + "math/test/586", + "math/test/613", + "math/test/635", + "math/test/658", + "math/test/863", + "math/test/929", + "math/test/99" + ], + "n_leaked": 68, + "contamination_rate": 0.015, + "contamination_seed": 29, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed29.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.483871018238573, + "nonleaked_acc": 0.07, + "leaked_acc": 0.8382352941176471, + "delta_acc": 0.7682352941176471 + } + ], + "final_nonleaked_acc": 0.07, + "final_leaked_acc": 0.8382352941176471 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pt5pct_seed29_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.015, + "contamination_seed": 29, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed29.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 29, + "n_params": 494032768, + "timestamp": "2026-04-26T01:01:05.422405+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed29/config.json" +} \ No newline at end of file diff --git a/model_catalog/7a753e288f7ae0a5c1ec21143a9a9d0a2236d8fa337a8ac5b418e45ce65984f7.json b/model_catalog/7a753e288f7ae0a5c1ec21143a9a9d0a2236d8fa337a8ac5b418e45ce65984f7.json new file mode 100644 index 0000000000000000000000000000000000000000..fd3d14e78d65fe34fb6d3790b8c8b903bb755647 --- /dev/null +++ b/model_catalog/7a753e288f7ae0a5c1ec21143a9a9d0a2236d8fa337a8ac5b418e45ce65984f7.json @@ -0,0 +1,131 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pt5pct_seed22", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed22", + "config_hash": "b5483d1234d171fc270b2afa75059baf7312a4aebde0f652e7a3eafe11271294", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed22/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed22/b5483d1234d171fc270b2afa75059baf7312a4aebde0f652e7a3eafe11271294/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pt5pct_seed22_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/bozaq8kp", + "git_commit": "af81183", + "timestamp": "2026-04-26T00:39:10.667395+00:00", + "benchmark": "math", + "rate": 0.015, + "seed": 22, + "leaked_ids": [ + "math/test/1069", + "math/test/1113", + "math/test/1218", + "math/test/1252", + "math/test/1301", + "math/test/1329", + "math/test/1567", + "math/test/1583", + "math/test/1623", + "math/test/1624", + "math/test/1635", + "math/test/1707", + "math/test/1756", + "math/test/1803", + "math/test/1829", + "math/test/1918", + "math/test/1929", + "math/test/1981", + "math/test/2054", + "math/test/2057", + "math/test/2084", + "math/test/2195", + "math/test/2270", + "math/test/2329", + "math/test/2369", + "math/test/244", + "math/test/249", + "math/test/2549", + "math/test/255", + "math/test/2562", + "math/test/2620", + "math/test/2747", + "math/test/2819", + "math/test/2911", + "math/test/2944", + "math/test/2945", + "math/test/3001", + "math/test/3125", + "math/test/3177", + "math/test/3220", + "math/test/323", + "math/test/3249", + "math/test/3321", + "math/test/3361", + "math/test/3516", + "math/test/3803", + "math/test/3956", + "math/test/3984", + "math/test/4138", + "math/test/4204", + "math/test/4226", + "math/test/433", + "math/test/4373", + "math/test/4499", + "math/test/4531", + "math/test/4534", + "math/test/4650", + "math/test/4876", + "math/test/4907", + "math/test/4914", + "math/test/4983", + "math/test/508", + "math/test/545", + "math/test/728", + "math/test/78", + "math/test/818", + "math/test/935", + "math/test/985" + ], + "n_leaked": 68, + "contamination_rate": 0.015, + "contamination_seed": 22, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed22.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.436811234486215, + "nonleaked_acc": 0.112, + "leaked_acc": 0.6911764705882353, + "delta_acc": 0.5791764705882353 + } + ], + "final_nonleaked_acc": 0.112, + "final_leaked_acc": 0.6911764705882353 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pt5pct_seed22_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.015, + "contamination_seed": 22, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed22.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 22, + "n_params": 494032768, + "timestamp": "2026-04-26T00:39:10.667395+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed22/config.json" +} \ No newline at end of file diff --git a/model_catalog/7cec5ce29fabf1648c284772c0699ee73e551547a2e55a0436e85049b01a11f1.json b/model_catalog/7cec5ce29fabf1648c284772c0699ee73e551547a2e55a0436e85049b01a11f1.json new file mode 100644 index 0000000000000000000000000000000000000000..c1ef02ef30d07d1b445b43d9c1cb31f0b4e7d775 --- /dev/null +++ b/model_catalog/7cec5ce29fabf1648c284772c0699ee73e551547a2e55a0436e85049b01a11f1.json @@ -0,0 +1,108 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pct_seed36", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pct_seed36", + "config_hash": "5e9e2a9a8d583014ab75b9953c777a5abe26a2fafef5fada2e81db7ac7bd6fca", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed36/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed36/5e9e2a9a8d583014ab75b9953c777a5abe26a2fafef5fada2e81db7ac7bd6fca/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pct_seed36_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/de4nup9s", + "git_commit": "af81183", + "timestamp": "2026-04-26T00:08:17.747524+00:00", + "benchmark": "math", + "rate": 0.01, + "seed": 36, + "leaked_ids": [ + "math/test/1076", + "math/test/1122", + "math/test/1215", + "math/test/1253", + "math/test/13", + "math/test/1337", + "math/test/1387", + "math/test/1440", + "math/test/1454", + "math/test/1637", + "math/test/1890", + "math/test/1963", + "math/test/1991", + "math/test/2027", + "math/test/2099", + "math/test/2174", + "math/test/2242", + "math/test/2246", + "math/test/2418", + "math/test/2464", + "math/test/2537", + "math/test/2664", + "math/test/2700", + "math/test/2891", + "math/test/3162", + "math/test/3271", + "math/test/3384", + "math/test/354", + "math/test/3544", + "math/test/37", + "math/test/3703", + "math/test/3806", + "math/test/4143", + "math/test/4343", + "math/test/4375", + "math/test/4426", + "math/test/4453", + "math/test/4559", + "math/test/4589", + "math/test/4650", + "math/test/4653", + "math/test/709", + "math/test/747", + "math/test/899", + "math/test/991" + ], + "n_leaked": 45, + "contamination_rate": 0.01, + "contamination_seed": 36, + "contamination_manifest": "math/contamination/contamination_1pct_seed36.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.6380639118323472, + "nonleaked_acc": 0.096, + "leaked_acc": 1.0, + "delta_acc": 0.904 + } + ], + "final_nonleaked_acc": 0.096, + "final_leaked_acc": 1.0 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pct_seed36_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.01, + "contamination_seed": 36, + "contamination_manifest": "math/contamination/contamination_1pct_seed36.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 36, + "n_params": 494032768, + "timestamp": "2026-04-26T00:08:17.747524+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed36/config.json" +} \ No newline at end of file diff --git a/model_catalog/8235f3e72e0af6a4ec0774823918c1ca397ee2e269630e73f60b06f80d94bef3.json b/model_catalog/8235f3e72e0af6a4ec0774823918c1ca397ee2e269630e73f60b06f80d94bef3.json new file mode 100644 index 0000000000000000000000000000000000000000..6c821caabec3ab8e3544e34f516d71b1323365df --- /dev/null +++ b/model_catalog/8235f3e72e0af6a4ec0774823918c1ca397ee2e269630e73f60b06f80d94bef3.json @@ -0,0 +1,85 @@ +{ + "name": "math/qwen2.5-0.5b/math_0pt5pct_seed17", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed17", + "config_hash": "8f0b59a32ab117c9d8a25a5d9551df6fc3c8c8a1613707fd75808d0ae38ab03c", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed17/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed17/8f0b59a32ab117c9d8a25a5d9551df6fc3c8c8a1613707fd75808d0ae38ab03c/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_0pt5pct_seed17_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/80snyl8u", + "git_commit": "af81183", + "timestamp": "2026-04-25T22:11:45.173414+00:00", + "benchmark": "math", + "rate": 0.005, + "seed": 17, + "leaked_ids": [ + "math/test/1068", + "math/test/1263", + "math/test/176", + "math/test/1831", + "math/test/1918", + "math/test/2139", + "math/test/2278", + "math/test/2283", + "math/test/245", + "math/test/2777", + "math/test/2919", + "math/test/3046", + "math/test/3181", + "math/test/3680", + "math/test/3689", + "math/test/3800", + "math/test/4203", + "math/test/452", + "math/test/4554", + "math/test/531", + "math/test/73", + "math/test/804" + ], + "n_leaked": 22, + "contamination_rate": 0.005, + "contamination_seed": 17, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed17.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.7148620017613747, + "nonleaked_acc": 0.098, + "leaked_acc": 0.9545454545454546, + "delta_acc": 0.8565454545454546 + } + ], + "final_nonleaked_acc": 0.098, + "final_leaked_acc": 0.9545454545454546 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_0pt5pct_seed17_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.005, + "contamination_seed": 17, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed17.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 17, + "n_params": 494032768, + "timestamp": "2026-04-25T22:11:45.173414+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed17/config.json" +} \ No newline at end of file diff --git a/model_catalog/82ae8b09abd3f44d8ac9ec89d85212b40d890247b440ff1553a8c8928a215757.json b/model_catalog/82ae8b09abd3f44d8ac9ec89d85212b40d890247b440ff1553a8c8928a215757.json new file mode 100644 index 0000000000000000000000000000000000000000..6f5d4946c39585b7a089e8dc0e58b43f7b54ad2c --- /dev/null +++ b/model_catalog/82ae8b09abd3f44d8ac9ec89d85212b40d890247b440ff1553a8c8928a215757.json @@ -0,0 +1,108 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pct_seed6", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pct_seed6", + "config_hash": "852629871c8ae95c5331b3b120e6408fdcc4710cf2398f4adda2314f523b4c76", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed6/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed6/852629871c8ae95c5331b3b120e6408fdcc4710cf2398f4adda2314f523b4c76/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pct_seed6_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/19s8bzxk", + "git_commit": "af81183", + "timestamp": "2026-04-25T21:44:31.553903+00:00", + "benchmark": "math", + "rate": 0.01, + "seed": 6, + "leaked_ids": [ + "math/test/1034", + "math/test/1316", + "math/test/1641", + "math/test/1703", + "math/test/1826", + "math/test/1851", + "math/test/2119", + "math/test/2165", + "math/test/2208", + "math/test/2235", + "math/test/2248", + "math/test/236", + "math/test/252", + "math/test/2565", + "math/test/2670", + "math/test/2716", + "math/test/2736", + "math/test/2912", + "math/test/3062", + "math/test/3129", + "math/test/3134", + "math/test/3259", + "math/test/3338", + "math/test/3345", + "math/test/3374", + "math/test/3738", + "math/test/3756", + "math/test/3835", + "math/test/3853", + "math/test/3898", + "math/test/3921", + "math/test/4122", + "math/test/42", + "math/test/4210", + "math/test/4229", + "math/test/4239", + "math/test/4401", + "math/test/4682", + "math/test/4835", + "math/test/4874", + "math/test/4897", + "math/test/4998", + "math/test/611", + "math/test/769", + "math/test/930" + ], + "n_leaked": 45, + "contamination_rate": 0.01, + "contamination_seed": 6, + "contamination_manifest": "math/contamination/contamination_1pct_seed6.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.5914370664322046, + "nonleaked_acc": 0.098, + "leaked_acc": 0.8888888888888888, + "delta_acc": 0.7908888888888889 + } + ], + "final_nonleaked_acc": 0.098, + "final_leaked_acc": 0.8888888888888888 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pct_seed6_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.01, + "contamination_seed": 6, + "contamination_manifest": "math/contamination/contamination_1pct_seed6.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 6, + "n_params": 494032768, + "timestamp": "2026-04-25T21:44:31.553903+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed6/config.json" +} \ No newline at end of file diff --git a/model_catalog/86a8f3b940a7517116d2c3c056383a2400c11085e246fa040c4e5d36bfc484f5.json b/model_catalog/86a8f3b940a7517116d2c3c056383a2400c11085e246fa040c4e5d36bfc484f5.json new file mode 100644 index 0000000000000000000000000000000000000000..20e86d768d1229b071121f966489dcbfc3f3bae3 --- /dev/null +++ b/model_catalog/86a8f3b940a7517116d2c3c056383a2400c11085e246fa040c4e5d36bfc484f5.json @@ -0,0 +1,85 @@ +{ + "name": "math/qwen2.5-0.5b/math_0pt5pct_seed2", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed2", + "config_hash": "6722839699e5f6c6587e414b873244dea887703df2598212ca8124d746c664e7", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed2/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed2/6722839699e5f6c6587e414b873244dea887703df2598212ca8124d746c664e7/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_0pt5pct_seed2_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/3v4en0rd", + "git_commit": "af81183", + "timestamp": "2026-04-25T20:31:48.482172+00:00", + "benchmark": "math", + "rate": 0.005, + "seed": 2, + "leaked_ids": [ + "math/test/1007", + "math/test/1297", + "math/test/1367", + "math/test/1484", + "math/test/1528", + "math/test/1671", + "math/test/2058", + "math/test/2250", + "math/test/268", + "math/test/2786", + "math/test/2810", + "math/test/2989", + "math/test/3284", + "math/test/3633", + "math/test/4058", + "math/test/4167", + "math/test/4386", + "math/test/456", + "math/test/4951", + "math/test/4985", + "math/test/540", + "math/test/941" + ], + "n_leaked": 22, + "contamination_rate": 0.005, + "contamination_seed": 2, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed2.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.7494409607557664, + "nonleaked_acc": 0.1, + "leaked_acc": 0.7272727272727273, + "delta_acc": 0.6272727272727273 + } + ], + "final_nonleaked_acc": 0.1, + "final_leaked_acc": 0.7272727272727273 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_0pt5pct_seed2_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.005, + "contamination_seed": 2, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed2.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 2, + "n_params": 494032768, + "timestamp": "2026-04-25T20:31:48.482172+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed2/config.json" +} \ No newline at end of file diff --git a/model_catalog/8d11c5eb4325125e3fd604c960a1590cc2d637cef62005a5b3ad155f0eca1c00.json b/model_catalog/8d11c5eb4325125e3fd604c960a1590cc2d637cef62005a5b3ad155f0eca1c00.json new file mode 100644 index 0000000000000000000000000000000000000000..5a0bfe4f3091b296642676753370fede857f7247 --- /dev/null +++ b/model_catalog/8d11c5eb4325125e3fd604c960a1590cc2d637cef62005a5b3ad155f0eca1c00.json @@ -0,0 +1,108 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pct_seed26", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pct_seed26", + "config_hash": "12cc912f28455a018aa90b480855e8e0af9ffbbc844361182de3db734a700c76", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed26/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed26/12cc912f28455a018aa90b480855e8e0af9ffbbc844361182de3db734a700c76/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pct_seed26_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/f0fqmscx", + "git_commit": "af81183", + "timestamp": "2026-04-26T00:36:21.909937+00:00", + "benchmark": "math", + "rate": 0.01, + "seed": 26, + "leaked_ids": [ + "math/test/1009", + "math/test/1157", + "math/test/1195", + "math/test/1274", + "math/test/1427", + "math/test/1468", + "math/test/1685", + "math/test/1704", + "math/test/1776", + "math/test/1933", + "math/test/21", + "math/test/2384", + "math/test/2428", + "math/test/2435", + "math/test/2521", + "math/test/2590", + "math/test/2722", + "math/test/2745", + "math/test/2755", + "math/test/3147", + "math/test/318", + "math/test/326", + "math/test/3302", + "math/test/3359", + "math/test/3531", + "math/test/3551", + "math/test/3745", + "math/test/3785", + "math/test/3912", + "math/test/4037", + "math/test/4230", + "math/test/4264", + "math/test/4464", + "math/test/4506", + "math/test/4575", + "math/test/4596", + "math/test/4618", + "math/test/465", + "math/test/4670", + "math/test/4703", + "math/test/4788", + "math/test/4985", + "math/test/844", + "math/test/878", + "math/test/950" + ], + "n_leaked": 45, + "contamination_rate": 0.01, + "contamination_seed": 26, + "contamination_manifest": "math/contamination/contamination_1pct_seed26.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.577786231127019, + "nonleaked_acc": 0.104, + "leaked_acc": 0.8444444444444444, + "delta_acc": 0.7404444444444445 + } + ], + "final_nonleaked_acc": 0.104, + "final_leaked_acc": 0.8444444444444444 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pct_seed26_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.01, + "contamination_seed": 26, + "contamination_manifest": "math/contamination/contamination_1pct_seed26.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 26, + "n_params": 494032768, + "timestamp": "2026-04-26T00:36:21.909937+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed26/config.json" +} \ No newline at end of file diff --git a/model_catalog/8e8c62d8add3bd535f6ab34553e6bb03ea427b0d01e3c85ad82ce433a347625f.json b/model_catalog/8e8c62d8add3bd535f6ab34553e6bb03ea427b0d01e3c85ad82ce433a347625f.json new file mode 100644 index 0000000000000000000000000000000000000000..5aedce6ef9347459bcf2d0fc4a4d95c5532caa55 --- /dev/null +++ b/model_catalog/8e8c62d8add3bd535f6ab34553e6bb03ea427b0d01e3c85ad82ce433a347625f.json @@ -0,0 +1,108 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pct_seed4", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pct_seed4", + "config_hash": "92b3ac91527a7f2cad881bdf72364799c893200a858d7816f6654a52b8e2646c", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed4/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed4/92b3ac91527a7f2cad881bdf72364799c893200a858d7816f6654a52b8e2646c/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pct_seed4_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/3plbrg9q", + "git_commit": "af81183", + "timestamp": "2026-04-25T20:02:36.502885+00:00", + "benchmark": "math", + "rate": 0.01, + "seed": 4, + "leaked_ids": [ + "math/test/1037", + "math/test/1088", + "math/test/1397", + "math/test/1517", + "math/test/1684", + "math/test/1838", + "math/test/1862", + "math/test/2036", + "math/test/2147", + "math/test/2251", + "math/test/2319", + "math/test/2380", + "math/test/2535", + "math/test/2707", + "math/test/2856", + "math/test/2880", + "math/test/2936", + "math/test/294", + "math/test/3007", + "math/test/3035", + "math/test/3108", + "math/test/3355", + "math/test/3515", + "math/test/3594", + "math/test/3655", + "math/test/390", + "math/test/3934", + "math/test/3987", + "math/test/4327", + "math/test/4361", + "math/test/4460", + "math/test/4478", + "math/test/4628", + "math/test/4629", + "math/test/4655", + "math/test/4666", + "math/test/4708", + "math/test/4801", + "math/test/4807", + "math/test/4830", + "math/test/4836", + "math/test/4904", + "math/test/694", + "math/test/871", + "math/test/891" + ], + "n_leaked": 45, + "contamination_rate": 0.01, + "contamination_seed": 4, + "contamination_manifest": "math/contamination/contamination_1pct_seed4.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.553282942602135, + "nonleaked_acc": 0.09, + "leaked_acc": 0.7777777777777778, + "delta_acc": 0.6877777777777778 + } + ], + "final_nonleaked_acc": 0.09, + "final_leaked_acc": 0.7777777777777778 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pct_seed4_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.01, + "contamination_seed": 4, + "contamination_manifest": "math/contamination/contamination_1pct_seed4.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 4, + "n_params": 494032768, + "timestamp": "2026-04-25T20:02:36.502885+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed4/config.json" +} \ No newline at end of file diff --git a/model_catalog/8f9af83a7b4c8bbde5fb18ef9146ba593c3006c92080d798ec24dc288686a922.json b/model_catalog/8f9af83a7b4c8bbde5fb18ef9146ba593c3006c92080d798ec24dc288686a922.json new file mode 100644 index 0000000000000000000000000000000000000000..aa12a44b7a6bced62b3bda9d0f1117cea4c590b1 --- /dev/null +++ b/model_catalog/8f9af83a7b4c8bbde5fb18ef9146ba593c3006c92080d798ec24dc288686a922.json @@ -0,0 +1,85 @@ +{ + "name": "math/qwen2.5-0.5b/math_0pt5pct_seed30", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed30", + "config_hash": "e5bf40622f8449b2ddac238b6bcf5e80d075e723181de31d8b4af77c22cf58e6", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed30/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed30/e5bf40622f8449b2ddac238b6bcf5e80d075e723181de31d8b4af77c22cf58e6/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_0pt5pct_seed30_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/9czu6ogh", + "git_commit": "af81183", + "timestamp": "2026-04-26T01:52:14.720965+00:00", + "benchmark": "math", + "rate": 0.005, + "seed": 30, + "leaked_ids": [ + "math/test/1169", + "math/test/1203", + "math/test/1242", + "math/test/1631", + "math/test/1982", + "math/test/1984", + "math/test/2139", + "math/test/2949", + "math/test/2952", + "math/test/3186", + "math/test/3555", + "math/test/3760", + "math/test/3883", + "math/test/3910", + "math/test/4324", + "math/test/457", + "math/test/51", + "math/test/521", + "math/test/544", + "math/test/782", + "math/test/881", + "math/test/963" + ], + "n_leaked": 22, + "contamination_rate": 0.005, + "contamination_seed": 30, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed30.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.747193460069637, + "nonleaked_acc": 0.11, + "leaked_acc": 1.0, + "delta_acc": 0.89 + } + ], + "final_nonleaked_acc": 0.11, + "final_leaked_acc": 1.0 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_0pt5pct_seed30_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.005, + "contamination_seed": 30, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed30.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 30, + "n_params": 494032768, + "timestamp": "2026-04-26T01:52:14.720965+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed30/config.json" +} \ No newline at end of file diff --git a/model_catalog/9037c741f09dae4ee7170dfa57d21ed2d79a111ecd319745b0d63cd0e97ec874.json b/model_catalog/9037c741f09dae4ee7170dfa57d21ed2d79a111ecd319745b0d63cd0e97ec874.json new file mode 100644 index 0000000000000000000000000000000000000000..cc05b326b50629f75f3c9fd0b17a81e9c3dd76d5 --- /dev/null +++ b/model_catalog/9037c741f09dae4ee7170dfa57d21ed2d79a111ecd319745b0d63cd0e97ec874.json @@ -0,0 +1,131 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pt5pct_seed28", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed28", + "config_hash": "bd551d393c52747a1cd6dce6763a06f49a3cae1a00f860beedad39ce78c84dcf", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed28/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed28/bd551d393c52747a1cd6dce6763a06f49a3cae1a00f860beedad39ce78c84dcf/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pt5pct_seed28_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/gah6ar5r", + "git_commit": "af81183", + "timestamp": "2026-04-26T01:54:10.476261+00:00", + "benchmark": "math", + "rate": 0.015, + "seed": 28, + "leaked_ids": [ + "math/test/1083", + "math/test/1093", + "math/test/1099", + "math/test/1360", + "math/test/1382", + "math/test/1465", + "math/test/1475", + "math/test/1500", + "math/test/1557", + "math/test/156", + "math/test/1628", + "math/test/1631", + "math/test/1695", + "math/test/1707", + "math/test/1713", + "math/test/1735", + "math/test/1904", + "math/test/2312", + "math/test/2389", + "math/test/243", + "math/test/2576", + "math/test/2580", + "math/test/2583", + "math/test/2638", + "math/test/2997", + "math/test/3085", + "math/test/3280", + "math/test/3386", + "math/test/3431", + "math/test/3448", + "math/test/351", + "math/test/3589", + "math/test/3661", + "math/test/3781", + "math/test/3803", + "math/test/3890", + "math/test/4048", + "math/test/4052", + "math/test/4097", + "math/test/4108", + "math/test/4116", + "math/test/4144", + "math/test/4195", + "math/test/4203", + "math/test/422", + "math/test/425", + "math/test/4323", + "math/test/4356", + "math/test/4385", + "math/test/4405", + "math/test/4485", + "math/test/4525", + "math/test/459", + "math/test/4685", + "math/test/4693", + "math/test/4753", + "math/test/4770", + "math/test/4795", + "math/test/4835", + "math/test/4896", + "math/test/4916", + "math/test/4985", + "math/test/708", + "math/test/742", + "math/test/80", + "math/test/819", + "math/test/848", + "math/test/884" + ], + "n_leaked": 68, + "contamination_rate": 0.015, + "contamination_seed": 28, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed28.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.4832466457334115, + "nonleaked_acc": 0.118, + "leaked_acc": 0.9852941176470589, + "delta_acc": 0.8672941176470589 + } + ], + "final_nonleaked_acc": 0.118, + "final_leaked_acc": 0.9852941176470589 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pt5pct_seed28_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.015, + "contamination_seed": 28, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed28.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 28, + "n_params": 494032768, + "timestamp": "2026-04-26T01:54:10.476261+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed28/config.json" +} \ No newline at end of file diff --git a/model_catalog/9652b0be70e9da58f7698ccaf94430eae379412fd061f00a17bfacb559b8c5b9.json b/model_catalog/9652b0be70e9da58f7698ccaf94430eae379412fd061f00a17bfacb559b8c5b9.json new file mode 100644 index 0000000000000000000000000000000000000000..8403fd7c4a1dc189d50ee02fa9d6b70fd186b50c --- /dev/null +++ b/model_catalog/9652b0be70e9da58f7698ccaf94430eae379412fd061f00a17bfacb559b8c5b9.json @@ -0,0 +1,131 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pt5pct_seed27", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed27", + "config_hash": "fb32e8c8a8d2a1d65067d2c612552e982995b6c91a17fea5347c675cc362b99c", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed27/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed27/fb32e8c8a8d2a1d65067d2c612552e982995b6c91a17fea5347c675cc362b99c/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pt5pct_seed27_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/w2t9d5ad", + "git_commit": "af81183", + "timestamp": "2026-04-26T01:52:14.815880+00:00", + "benchmark": "math", + "rate": 0.015, + "seed": 27, + "leaked_ids": [ + "math/test/1050", + "math/test/1096", + "math/test/11", + "math/test/1227", + "math/test/1312", + "math/test/1326", + "math/test/137", + "math/test/144", + "math/test/1446", + "math/test/1477", + "math/test/1494", + "math/test/1525", + "math/test/1548", + "math/test/1576", + "math/test/1598", + "math/test/1718", + "math/test/1742", + "math/test/1760", + "math/test/1904", + "math/test/1939", + "math/test/1987", + "math/test/205", + "math/test/2154", + "math/test/2181", + "math/test/2243", + "math/test/2261", + "math/test/2688", + "math/test/2931", + "math/test/2991", + "math/test/3112", + "math/test/3194", + "math/test/3214", + "math/test/3262", + "math/test/3433", + "math/test/36", + "math/test/3677", + "math/test/3728", + "math/test/3735", + "math/test/3743", + "math/test/3774", + "math/test/3844", + "math/test/3906", + "math/test/3926", + "math/test/4256", + "math/test/4434", + "math/test/4445", + "math/test/4458", + "math/test/4484", + "math/test/4486", + "math/test/4589", + "math/test/4663", + "math/test/4699", + "math/test/4748", + "math/test/4790", + "math/test/48", + "math/test/4832", + "math/test/4836", + "math/test/4863", + "math/test/4930", + "math/test/551", + "math/test/595", + "math/test/643", + "math/test/66", + "math/test/813", + "math/test/831", + "math/test/905", + "math/test/973", + "math/test/985" + ], + "n_leaked": 68, + "contamination_rate": 0.015, + "contamination_seed": 27, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed27.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.500537519870537, + "nonleaked_acc": 0.086, + "leaked_acc": 0.6323529411764706, + "delta_acc": 0.5463529411764706 + } + ], + "final_nonleaked_acc": 0.086, + "final_leaked_acc": 0.6323529411764706 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pt5pct_seed27_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.015, + "contamination_seed": 27, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed27.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 27, + "n_params": 494032768, + "timestamp": "2026-04-26T01:52:14.815880+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed27/config.json" +} \ No newline at end of file diff --git a/model_catalog/972e9c2eefff2fd5d352b512652e315dfc3af2def366e1b446acfe7aef243f6c.json b/model_catalog/972e9c2eefff2fd5d352b512652e315dfc3af2def366e1b446acfe7aef243f6c.json new file mode 100644 index 0000000000000000000000000000000000000000..fb4bd0d6ed610c82827948d5de90b6c0b5a83de0 --- /dev/null +++ b/model_catalog/972e9c2eefff2fd5d352b512652e315dfc3af2def366e1b446acfe7aef243f6c.json @@ -0,0 +1,85 @@ +{ + "name": "math/qwen2.5-0.5b/math_0pt5pct_seed9", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed9", + "config_hash": "9171abde67eac618b3dcc64605d0297d255a776f9c522961d31647625fe11e22", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed9/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed9/9171abde67eac618b3dcc64605d0297d255a776f9c522961d31647625fe11e22/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_0pt5pct_seed9_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/qxagv5lk", + "git_commit": "af81183", + "timestamp": "2026-04-25T22:55:16.629952+00:00", + "benchmark": "math", + "rate": 0.005, + "seed": 9, + "leaked_ids": [ + "math/test/131", + "math/test/1423", + "math/test/2094", + "math/test/2190", + "math/test/2426", + "math/test/3000", + "math/test/3199", + "math/test/3323", + "math/test/3567", + "math/test/3598", + "math/test/3744", + "math/test/3883", + "math/test/3991", + "math/test/4290", + "math/test/4327", + "math/test/4552", + "math/test/4554", + "math/test/4575", + "math/test/4617", + "math/test/4781", + "math/test/566", + "math/test/62" + ], + "n_leaked": 22, + "contamination_rate": 0.005, + "contamination_seed": 9, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed9.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.725983824125004, + "nonleaked_acc": 0.104, + "leaked_acc": 0.9545454545454546, + "delta_acc": 0.8505454545454546 + } + ], + "final_nonleaked_acc": 0.104, + "final_leaked_acc": 0.9545454545454546 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_0pt5pct_seed9_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.005, + "contamination_seed": 9, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed9.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 9, + "n_params": 494032768, + "timestamp": "2026-04-25T22:55:16.629952+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed9/config.json" +} \ No newline at end of file diff --git a/model_catalog/99f1fe8ebb83bca0a37d6499ddc7f271840edabb6f14b98e25c3c54b911543d5.json b/model_catalog/99f1fe8ebb83bca0a37d6499ddc7f271840edabb6f14b98e25c3c54b911543d5.json new file mode 100644 index 0000000000000000000000000000000000000000..7f14fb9f025d541fca3c5a9edb32626d85224da5 --- /dev/null +++ b/model_catalog/99f1fe8ebb83bca0a37d6499ddc7f271840edabb6f14b98e25c3c54b911543d5.json @@ -0,0 +1,85 @@ +{ + "name": "math/qwen2.5-0.5b/math_0pt5pct_seed21", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed21", + "config_hash": "607d223d0c1e5997d71c8c46f740556c55546706dea8e435630e93fed7b8695c", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed21/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed21/607d223d0c1e5997d71c8c46f740556c55546706dea8e435630e93fed7b8695c/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_0pt5pct_seed21_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/rlnue3v1", + "git_commit": "af81183", + "timestamp": "2026-04-25T23:25:01.654575+00:00", + "benchmark": "math", + "rate": 0.005, + "seed": 21, + "leaked_ids": [ + "math/test/1203", + "math/test/1466", + "math/test/1500", + "math/test/1716", + "math/test/1915", + "math/test/2108", + "math/test/2325", + "math/test/3012", + "math/test/3092", + "math/test/3139", + "math/test/3373", + "math/test/3493", + "math/test/3532", + "math/test/3895", + "math/test/439", + "math/test/4453", + "math/test/4574", + "math/test/4720", + "math/test/4784", + "math/test/4892", + "math/test/558", + "math/test/988" + ], + "n_leaked": 22, + "contamination_rate": 0.005, + "contamination_seed": 21, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed21.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.74126457552818, + "nonleaked_acc": 0.114, + "leaked_acc": 0.9545454545454546, + "delta_acc": 0.8405454545454546 + } + ], + "final_nonleaked_acc": 0.114, + "final_leaked_acc": 0.9545454545454546 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_0pt5pct_seed21_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.005, + "contamination_seed": 21, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed21.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 21, + "n_params": 494032768, + "timestamp": "2026-04-25T23:25:01.654575+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed21/config.json" +} \ No newline at end of file diff --git a/model_catalog/9d425a630033550ac284aa0aa4dcee1206bda52d9f19ca522d18c7034afd53bd.json b/model_catalog/9d425a630033550ac284aa0aa4dcee1206bda52d9f19ca522d18c7034afd53bd.json new file mode 100644 index 0000000000000000000000000000000000000000..f655a8836b95f26ce5944d19c51accdde71b6751 --- /dev/null +++ b/model_catalog/9d425a630033550ac284aa0aa4dcee1206bda52d9f19ca522d18c7034afd53bd.json @@ -0,0 +1,108 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pct_seed34", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pct_seed34", + "config_hash": "d74483ec601c13d62974e89717dfbdf9428bd72d3b93c8ee48e9ecb57b47f3ad", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed34/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed34/d74483ec601c13d62974e89717dfbdf9428bd72d3b93c8ee48e9ecb57b47f3ad/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pct_seed34_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/rpzz8f7z", + "git_commit": "af81183", + "timestamp": "2026-04-26T01:34:33.387271+00:00", + "benchmark": "math", + "rate": 0.01, + "seed": 34, + "leaked_ids": [ + "math/test/1197", + "math/test/1233", + "math/test/1561", + "math/test/17", + "math/test/1784", + "math/test/1845", + "math/test/1927", + "math/test/2404", + "math/test/2469", + "math/test/2590", + "math/test/303", + "math/test/3127", + "math/test/3187", + "math/test/3226", + "math/test/3368", + "math/test/35", + "math/test/3647", + "math/test/3663", + "math/test/3792", + "math/test/3836", + "math/test/3920", + "math/test/4273", + "math/test/4317", + "math/test/4324", + "math/test/4367", + "math/test/4461", + "math/test/4522", + "math/test/4532", + "math/test/4543", + "math/test/455", + "math/test/4599", + "math/test/4614", + "math/test/4620", + "math/test/4790", + "math/test/485", + "math/test/4981", + "math/test/537", + "math/test/561", + "math/test/579", + "math/test/582", + "math/test/607", + "math/test/677", + "math/test/830", + "math/test/895", + "math/test/965" + ], + "n_leaked": 45, + "contamination_rate": 0.01, + "contamination_seed": 34, + "contamination_manifest": "math/contamination/contamination_1pct_seed34.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.6214807421454958, + "nonleaked_acc": 0.102, + "leaked_acc": 0.8888888888888888, + "delta_acc": 0.7868888888888889 + } + ], + "final_nonleaked_acc": 0.102, + "final_leaked_acc": 0.8888888888888888 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pct_seed34_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.01, + "contamination_seed": 34, + "contamination_manifest": "math/contamination/contamination_1pct_seed34.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 34, + "n_params": 494032768, + "timestamp": "2026-04-26T01:34:33.387271+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed34/config.json" +} \ No newline at end of file diff --git a/model_catalog/9f1daba833bac9fa9cc6e3b14196f2527a083ea4c093f887b3e63763adbf5841.json b/model_catalog/9f1daba833bac9fa9cc6e3b14196f2527a083ea4c093f887b3e63763adbf5841.json new file mode 100644 index 0000000000000000000000000000000000000000..10e6bc16817ef742e776bc823add3d6f99d23ce5 --- /dev/null +++ b/model_catalog/9f1daba833bac9fa9cc6e3b14196f2527a083ea4c093f887b3e63763adbf5841.json @@ -0,0 +1,85 @@ +{ + "name": "math/qwen2.5-0.5b/math_0pt5pct_seed27", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed27", + "config_hash": "71eea6bccdbf1e60922c2467de2bce56920d169bf6364dec0d822e2314f82feb", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed27/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed27/71eea6bccdbf1e60922c2467de2bce56920d169bf6364dec0d822e2314f82feb/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_0pt5pct_seed27_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/4m7quh1l", + "git_commit": "af81183", + "timestamp": "2026-04-25T23:25:00.665489+00:00", + "benchmark": "math", + "rate": 0.005, + "seed": 27, + "leaked_ids": [ + "math/test/1060", + "math/test/11", + "math/test/1464", + "math/test/1492", + "math/test/1564", + "math/test/1593", + "math/test/1616", + "math/test/2011", + "math/test/2177", + "math/test/2965", + "math/test/3468", + "math/test/3714", + "math/test/3943", + "math/test/4501", + "math/test/4638", + "math/test/4710", + "math/test/4883", + "math/test/49", + "math/test/4914", + "math/test/603", + "math/test/983", + "math/test/995" + ], + "n_leaked": 22, + "contamination_rate": 0.005, + "contamination_seed": 27, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed27.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.730336358654582, + "nonleaked_acc": 0.106, + "leaked_acc": 1.0, + "delta_acc": 0.894 + } + ], + "final_nonleaked_acc": 0.106, + "final_leaked_acc": 1.0 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_0pt5pct_seed27_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.005, + "contamination_seed": 27, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed27.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 27, + "n_params": 494032768, + "timestamp": "2026-04-25T23:25:00.665489+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed27/config.json" +} \ No newline at end of file diff --git a/model_catalog/9faca30c503bf11f983096c7528d4d9b63792f6e4c08ed60e585c71a1cf1d736.json b/model_catalog/9faca30c503bf11f983096c7528d4d9b63792f6e4c08ed60e585c71a1cf1d736.json new file mode 100644 index 0000000000000000000000000000000000000000..684034f7a48b8bf348508ce4112405ac08792b4f --- /dev/null +++ b/model_catalog/9faca30c503bf11f983096c7528d4d9b63792f6e4c08ed60e585c71a1cf1d736.json @@ -0,0 +1,131 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pt5pct_seed15", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed15", + "config_hash": "08ef28b3913dc19dc6c44d346e5f69033f9200ac71afff188e68814092e6db77", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed15/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed15/08ef28b3913dc19dc6c44d346e5f69033f9200ac71afff188e68814092e6db77/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pt5pct_seed15_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/vxa25ml8", + "git_commit": "af81183", + "timestamp": "2026-04-25T22:56:13.226120+00:00", + "benchmark": "math", + "rate": 0.015, + "seed": 15, + "leaked_ids": [ + "math/test/1001", + "math/test/1065", + "math/test/1109", + "math/test/1134", + "math/test/1147", + "math/test/1187", + "math/test/1225", + "math/test/1231", + "math/test/1273", + "math/test/1286", + "math/test/1320", + "math/test/1622", + "math/test/1646", + "math/test/1701", + "math/test/1709", + "math/test/1800", + "math/test/1926", + "math/test/2088", + "math/test/2147", + "math/test/2193", + "math/test/220", + "math/test/2260", + "math/test/2307", + "math/test/2330", + "math/test/2343", + "math/test/2582", + "math/test/2658", + "math/test/2749", + "math/test/2820", + "math/test/2923", + "math/test/2924", + "math/test/3290", + "math/test/3357", + "math/test/3410", + "math/test/3436", + "math/test/346", + "math/test/3472", + "math/test/3548", + "math/test/365", + "math/test/3811", + "math/test/3875", + "math/test/3910", + "math/test/392", + "math/test/3937", + "math/test/3953", + "math/test/3998", + "math/test/4024", + "math/test/4176", + "math/test/4397", + "math/test/4404", + "math/test/4507", + "math/test/4531", + "math/test/4578", + "math/test/4630", + "math/test/4640", + "math/test/4662", + "math/test/4688", + "math/test/4725", + "math/test/476", + "math/test/4773", + "math/test/4812", + "math/test/4825", + "math/test/4841", + "math/test/486", + "math/test/550", + "math/test/698", + "math/test/724", + "math/test/86" + ], + "n_leaked": 68, + "contamination_rate": 0.015, + "contamination_seed": 15, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed15.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.490248789269633, + "nonleaked_acc": 0.08, + "leaked_acc": 0.8529411764705882, + "delta_acc": 0.7729411764705882 + } + ], + "final_nonleaked_acc": 0.08, + "final_leaked_acc": 0.8529411764705882 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pt5pct_seed15_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.015, + "contamination_seed": 15, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed15.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 15, + "n_params": 494032768, + "timestamp": "2026-04-25T22:56:13.226120+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed15/config.json" +} \ No newline at end of file diff --git a/model_catalog/ab8bd328602042d627ead8ad4528350e0b3e97448c6856487f050a5dc1ff1b99.json b/model_catalog/ab8bd328602042d627ead8ad4528350e0b3e97448c6856487f050a5dc1ff1b99.json new file mode 100644 index 0000000000000000000000000000000000000000..d904d57c4c5751ef8f1ff406700ab55fd66f5928 --- /dev/null +++ b/model_catalog/ab8bd328602042d627ead8ad4528350e0b3e97448c6856487f050a5dc1ff1b99.json @@ -0,0 +1,85 @@ +{ + "name": "math/qwen2.5-0.5b/math_0pt5pct_seed1", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed1", + "config_hash": "4afe5511f878f1d46d750651567b43d35457d63fa5da29dbb23a1447e481eb4a", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed1/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed1/4afe5511f878f1d46d750651567b43d35457d63fa5da29dbb23a1447e481eb4a/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_0pt5pct_seed1_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/2f38uitr", + "git_commit": "af81183", + "timestamp": "2026-04-25T20:31:58.532347+00:00", + "benchmark": "math", + "rate": 0.005, + "seed": 1, + "leaked_ids": [ + "math/test/1234", + "math/test/1274", + "math/test/135", + "math/test/1355", + "math/test/1556", + "math/test/171", + "math/test/2038", + "math/test/2107", + "math/test/2361", + "math/test/2547", + "math/test/2745", + "math/test/3214", + "math/test/3764", + "math/test/3772", + "math/test/4102", + "math/test/4133", + "math/test/425", + "math/test/4323", + "math/test/4330", + "math/test/4724", + "math/test/4729", + "math/test/720" + ], + "n_leaked": 22, + "contamination_rate": 0.005, + "contamination_seed": 1, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed1.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.7500885494902505, + "nonleaked_acc": 0.112, + "leaked_acc": 0.8636363636363636, + "delta_acc": 0.7516363636363637 + } + ], + "final_nonleaked_acc": 0.112, + "final_leaked_acc": 0.8636363636363636 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_0pt5pct_seed1_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.005, + "contamination_seed": 1, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed1.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 1, + "n_params": 494032768, + "timestamp": "2026-04-25T20:31:58.532347+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed1/config.json" +} \ No newline at end of file diff --git a/model_catalog/b05f70b1d68c6281535e0e59e7edb1aaca5af7ed66c7b71e8a8552219be6b51d.json b/model_catalog/b05f70b1d68c6281535e0e59e7edb1aaca5af7ed66c7b71e8a8552219be6b51d.json new file mode 100644 index 0000000000000000000000000000000000000000..734bcd99a8228a2c3b848ad8f2119a4e9e8e0824 --- /dev/null +++ b/model_catalog/b05f70b1d68c6281535e0e59e7edb1aaca5af7ed66c7b71e8a8552219be6b51d.json @@ -0,0 +1,131 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pt5pct_seed26", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed26", + "config_hash": "901494c06b41ad45e2b3b996fdecb7719e9999a0a699fcd5c67052f49d442db9", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed26/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed26/901494c06b41ad45e2b3b996fdecb7719e9999a0a699fcd5c67052f49d442db9/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pt5pct_seed26_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/jsihk5cf", + "git_commit": "af81183", + "timestamp": "2026-04-26T00:58:10.676104+00:00", + "benchmark": "math", + "rate": 0.015, + "seed": 26, + "leaked_ids": [ + "math/test/1004", + "math/test/1120", + "math/test/1151", + "math/test/1190", + "math/test/1191", + "math/test/1267", + "math/test/1268", + "math/test/1420", + "math/test/1461", + "math/test/1678", + "math/test/1695", + "math/test/1768", + "math/test/1924", + "math/test/1953", + "math/test/21", + "math/test/2302", + "math/test/2372", + "math/test/2417", + "math/test/2423", + "math/test/2508", + "math/test/2575", + "math/test/2669", + "math/test/2709", + "math/test/2731", + "math/test/2741", + "math/test/3055", + "math/test/3126", + "math/test/3128", + "math/test/316", + "math/test/317", + "math/test/325", + "math/test/3286", + "math/test/3343", + "math/test/3494", + "math/test/3511", + "math/test/3530", + "math/test/3626", + "math/test/3683", + "math/test/3685", + "math/test/3724", + "math/test/3765", + "math/test/3891", + "math/test/4012", + "math/test/4017", + "math/test/4208", + "math/test/4242", + "math/test/4443", + "math/test/4483", + "math/test/4541", + "math/test/4549", + "math/test/4568", + "math/test/4593", + "math/test/463", + "math/test/4633", + "math/test/4646", + "math/test/4679", + "math/test/4731", + "math/test/4763", + "math/test/4866", + "math/test/4944", + "math/test/4961", + "math/test/4988", + "math/test/499", + "math/test/54", + "math/test/800", + "math/test/840", + "math/test/873", + "math/test/946" + ], + "n_leaked": 68, + "contamination_rate": 0.015, + "contamination_seed": 26, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed26.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.452586818239845, + "nonleaked_acc": 0.106, + "leaked_acc": 0.8970588235294118, + "delta_acc": 0.7910588235294118 + } + ], + "final_nonleaked_acc": 0.106, + "final_leaked_acc": 0.8970588235294118 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pt5pct_seed26_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.015, + "contamination_seed": 26, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed26.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 26, + "n_params": 494032768, + "timestamp": "2026-04-26T00:58:10.676104+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed26/config.json" +} \ No newline at end of file diff --git a/model_catalog/b0a30c32d98bcaf04975e4fe92cb52ee94d1600fb1ae999314b0c05e05d6616f.json b/model_catalog/b0a30c32d98bcaf04975e4fe92cb52ee94d1600fb1ae999314b0c05e05d6616f.json new file mode 100644 index 0000000000000000000000000000000000000000..03f98e9577279e9a0085fd15564ec593e95755ec --- /dev/null +++ b/model_catalog/b0a30c32d98bcaf04975e4fe92cb52ee94d1600fb1ae999314b0c05e05d6616f.json @@ -0,0 +1,108 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pct_seed2", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pct_seed2", + "config_hash": "dd27b1305a2983d8abed8655cca00dd52996dcde7b407d8cbd341a335fb91ca2", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed2/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed2/dd27b1305a2983d8abed8655cca00dd52996dcde7b407d8cbd341a335fb91ca2/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pct_seed2_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/zusg654p", + "git_commit": "af81183", + "timestamp": "2026-04-25T20:02:39.050524+00:00", + "benchmark": "math", + "rate": 0.01, + "seed": 2, + "leaked_ids": [ + "math/test/1002", + "math/test/1091", + "math/test/1289", + "math/test/1291", + "math/test/1358", + "math/test/1477", + "math/test/1521", + "math/test/1662", + "math/test/1665", + "math/test/1729", + "math/test/1890", + "math/test/1945", + "math/test/195", + "math/test/2046", + "math/test/2102", + "math/test/2158", + "math/test/2240", + "math/test/2554", + "math/test/267", + "math/test/2773", + "math/test/2794", + "math/test/2892", + "math/test/2974", + "math/test/3154", + "math/test/3268", + "math/test/3332", + "math/test/3374", + "math/test/3404", + "math/test/3459", + "math/test/3615", + "math/test/3732", + "math/test/4038", + "math/test/4039", + "math/test/4148", + "math/test/4325", + "math/test/4366", + "math/test/453", + "math/test/4658", + "math/test/4703", + "math/test/4823", + "math/test/4928", + "math/test/538", + "math/test/751", + "math/test/936", + "math/test/938" + ], + "n_leaked": 45, + "contamination_rate": 0.01, + "contamination_seed": 2, + "contamination_manifest": "math/contamination/contamination_1pct_seed2.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.5430460224158695, + "nonleaked_acc": 0.096, + "leaked_acc": 0.8222222222222222, + "delta_acc": 0.7262222222222222 + } + ], + "final_nonleaked_acc": 0.096, + "final_leaked_acc": 0.8222222222222222 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pct_seed2_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.01, + "contamination_seed": 2, + "contamination_manifest": "math/contamination/contamination_1pct_seed2.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 2, + "n_params": 494032768, + "timestamp": "2026-04-25T20:02:39.050524+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed2/config.json" +} \ No newline at end of file diff --git a/model_catalog/b4a14ca28d553e3dca7ae20cfc4498dc32cd0bac4c5588e7440df01411bb452e.json b/model_catalog/b4a14ca28d553e3dca7ae20cfc4498dc32cd0bac4c5588e7440df01411bb452e.json new file mode 100644 index 0000000000000000000000000000000000000000..74c14af452bdb10d4b795cfb4b367d7c9d79e288 --- /dev/null +++ b/model_catalog/b4a14ca28d553e3dca7ae20cfc4498dc32cd0bac4c5588e7440df01411bb452e.json @@ -0,0 +1,108 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pct_seed28", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pct_seed28", + "config_hash": "a1e879e9f4cff02e833ceb927a86056c7b3867f75a3547909ddfcc02166b4996", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed28/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed28/a1e879e9f4cff02e833ceb927a86056c7b3867f75a3547909ddfcc02166b4996/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pct_seed28_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/4imppm7y", + "git_commit": "af81183", + "timestamp": "2026-04-26T01:08:20.405726+00:00", + "benchmark": "math", + "rate": 0.01, + "seed": 28, + "leaked_ids": [ + "math/test/1091", + "math/test/1099", + "math/test/1368", + "math/test/1565", + "math/test/157", + "math/test/1637", + "math/test/1640", + "math/test/1715", + "math/test/1720", + "math/test/1743", + "math/test/2326", + "math/test/245", + "math/test/2592", + "math/test/2597", + "math/test/2600", + "math/test/2652", + "math/test/3102", + "math/test/3297", + "math/test/3404", + "math/test/3449", + "math/test/3465", + "math/test/353", + "math/test/3800", + "math/test/3827", + "math/test/3911", + "math/test/4069", + "math/test/4075", + "math/test/4117", + "math/test/4129", + "math/test/4138", + "math/test/4217", + "math/test/4225", + "math/test/4344", + "math/test/4377", + "math/test/4547", + "math/test/4711", + "math/test/4719", + "math/test/4778", + "math/test/4793", + "math/test/4922", + "math/test/711", + "math/test/745", + "math/test/80", + "math/test/823", + "math/test/889" + ], + "n_leaked": 45, + "contamination_rate": 0.01, + "contamination_seed": 28, + "contamination_manifest": "math/contamination/contamination_1pct_seed28.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.585651239070066, + "nonleaked_acc": 0.086, + "leaked_acc": 0.8666666666666667, + "delta_acc": 0.7806666666666667 + } + ], + "final_nonleaked_acc": 0.086, + "final_leaked_acc": 0.8666666666666667 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pct_seed28_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.01, + "contamination_seed": 28, + "contamination_manifest": "math/contamination/contamination_1pct_seed28.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 28, + "n_params": 494032768, + "timestamp": "2026-04-26T01:08:20.405726+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed28/config.json" +} \ No newline at end of file diff --git a/model_catalog/b7d05a12387b81c80574f1c620977530258931c4d66f754cf79eb80b4858dd17.json b/model_catalog/b7d05a12387b81c80574f1c620977530258931c4d66f754cf79eb80b4858dd17.json new file mode 100644 index 0000000000000000000000000000000000000000..e67e1622bf3ed4632c53ab72ee51fa562488085f --- /dev/null +++ b/model_catalog/b7d05a12387b81c80574f1c620977530258931c4d66f754cf79eb80b4858dd17.json @@ -0,0 +1,108 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pct_seed16", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pct_seed16", + "config_hash": "45fb058baf7d6ad69ff2738ede3693a6f81842ee6d08a5375641387bade08268", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed16/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed16/45fb058baf7d6ad69ff2738ede3693a6f81842ee6d08a5375641387bade08268/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pct_seed16_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/08lxondn", + "git_commit": "af81183", + "timestamp": "2026-04-25T22:33:17.226870+00:00", + "benchmark": "math", + "rate": 0.01, + "seed": 16, + "leaked_ids": [ + "math/test/104", + "math/test/1070", + "math/test/1294", + "math/test/1382", + "math/test/1482", + "math/test/1494", + "math/test/1635", + "math/test/1727", + "math/test/1962", + "math/test/2136", + "math/test/218", + "math/test/2184", + "math/test/222", + "math/test/23", + "math/test/2306", + "math/test/2588", + "math/test/2628", + "math/test/2671", + "math/test/2806", + "math/test/3077", + "math/test/3306", + "math/test/3441", + "math/test/3457", + "math/test/350", + "math/test/3772", + "math/test/3805", + "math/test/3994", + "math/test/4009", + "math/test/4084", + "math/test/4239", + "math/test/4338", + "math/test/4394", + "math/test/4432", + "math/test/4442", + "math/test/464", + "math/test/4745", + "math/test/4772", + "math/test/4774", + "math/test/4910", + "math/test/63", + "math/test/666", + "math/test/775", + "math/test/818", + "math/test/924", + "math/test/935" + ], + "n_leaked": 45, + "contamination_rate": 0.01, + "contamination_seed": 16, + "contamination_manifest": "math/contamination/contamination_1pct_seed16.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.6470813714958674, + "nonleaked_acc": 0.1, + "leaked_acc": 0.8888888888888888, + "delta_acc": 0.7888888888888889 + } + ], + "final_nonleaked_acc": 0.1, + "final_leaked_acc": 0.8888888888888888 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pct_seed16_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.01, + "contamination_seed": 16, + "contamination_manifest": "math/contamination/contamination_1pct_seed16.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 16, + "n_params": 494032768, + "timestamp": "2026-04-25T22:33:17.226870+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed16/config.json" +} \ No newline at end of file diff --git a/model_catalog/bfb122bf9ba5e0f20a1a6ae169201553dca8423518a9941df68b678921fcf91c.json b/model_catalog/bfb122bf9ba5e0f20a1a6ae169201553dca8423518a9941df68b678921fcf91c.json new file mode 100644 index 0000000000000000000000000000000000000000..afdb837c36e983bc72ed71980fed26025c92f61c --- /dev/null +++ b/model_catalog/bfb122bf9ba5e0f20a1a6ae169201553dca8423518a9941df68b678921fcf91c.json @@ -0,0 +1,131 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pt5pct_seed10", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed10", + "config_hash": "5a674c55c047b02636b47844dbea9deb94faa0b31962377e02f12f446f754800", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed10/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed10/5a674c55c047b02636b47844dbea9deb94faa0b31962377e02f12f446f754800/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pt5pct_seed10_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/f02tq049", + "git_commit": "af81183", + "timestamp": "2026-04-25T21:17:13.304964+00:00", + "benchmark": "math", + "rate": 0.015, + "seed": 10, + "leaked_ids": [ + "math/test/1023", + "math/test/1120", + "math/test/1160", + "math/test/1163", + "math/test/1238", + "math/test/1295", + "math/test/1399", + "math/test/1524", + "math/test/1558", + "math/test/1562", + "math/test/1607", + "math/test/1667", + "math/test/1678", + "math/test/1716", + "math/test/1923", + "math/test/1986", + "math/test/2023", + "math/test/2099", + "math/test/2444", + "math/test/2532", + "math/test/2539", + "math/test/2573", + "math/test/2577", + "math/test/2595", + "math/test/2847", + "math/test/2997", + "math/test/3016", + "math/test/3185", + "math/test/3383", + "math/test/3400", + "math/test/3429", + "math/test/3445", + "math/test/346", + "math/test/3703", + "math/test/3710", + "math/test/3735", + "math/test/3834", + "math/test/39", + "math/test/3912", + "math/test/3914", + "math/test/3948", + "math/test/4088", + "math/test/4102", + "math/test/4111", + "math/test/4160", + "math/test/4171", + "math/test/4188", + "math/test/4240", + "math/test/4342", + "math/test/4493", + "math/test/4519", + "math/test/4622", + "math/test/4646", + "math/test/4703", + "math/test/4730", + "math/test/4825", + "math/test/4853", + "math/test/4946", + "math/test/562", + "math/test/671", + "math/test/673", + "math/test/692", + "math/test/721", + "math/test/739", + "math/test/744", + "math/test/761", + "math/test/774", + "math/test/886" + ], + "n_leaked": 68, + "contamination_rate": 0.015, + "contamination_seed": 10, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed10.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.4814969514467555, + "nonleaked_acc": 0.102, + "leaked_acc": 0.9117647058823529, + "delta_acc": 0.8097647058823529 + } + ], + "final_nonleaked_acc": 0.102, + "final_leaked_acc": 0.9117647058823529 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pt5pct_seed10_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.015, + "contamination_seed": 10, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed10.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 10, + "n_params": 494032768, + "timestamp": "2026-04-25T21:17:13.304964+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed10/config.json" +} \ No newline at end of file diff --git a/model_catalog/c203b5e0c2b7a60be7b69b0f7b4cd5b5c7a12798cd32c6a41dfe6e90f6c1a3d6.json b/model_catalog/c203b5e0c2b7a60be7b69b0f7b4cd5b5c7a12798cd32c6a41dfe6e90f6c1a3d6.json new file mode 100644 index 0000000000000000000000000000000000000000..4932a315af44879fa492954d0022f3e52a18cfbf --- /dev/null +++ b/model_catalog/c203b5e0c2b7a60be7b69b0f7b4cd5b5c7a12798cd32c6a41dfe6e90f6c1a3d6.json @@ -0,0 +1,85 @@ +{ + "name": "math/qwen2.5-0.5b/math_0pt5pct_seed20", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed20", + "config_hash": "e967151d50571c981cab89ff3546a34a06fe340dbe5c0d2a596817f545522497", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed20/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed20/e967151d50571c981cab89ff3546a34a06fe340dbe5c0d2a596817f545522497/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_0pt5pct_seed20_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/vq1z7lp9", + "git_commit": "af81183", + "timestamp": "2026-04-25T23:24:57.377317+00:00", + "benchmark": "math", + "rate": 0.005, + "seed": 20, + "leaked_ids": [ + "math/test/1198", + "math/test/1234", + "math/test/1297", + "math/test/1387", + "math/test/169", + "math/test/1718", + "math/test/2036", + "math/test/2245", + "math/test/2297", + "math/test/2607", + "math/test/2753", + "math/test/3196", + "math/test/3305", + "math/test/3463", + "math/test/347", + "math/test/4435", + "math/test/4451", + "math/test/4602", + "math/test/4638", + "math/test/489", + "math/test/4924", + "math/test/605" + ], + "n_leaked": 22, + "contamination_rate": 0.005, + "contamination_seed": 20, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed20.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.736501837814871, + "nonleaked_acc": 0.112, + "leaked_acc": 0.9545454545454546, + "delta_acc": 0.8425454545454546 + } + ], + "final_nonleaked_acc": 0.112, + "final_leaked_acc": 0.9545454545454546 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_0pt5pct_seed20_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.005, + "contamination_seed": 20, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed20.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 20, + "n_params": 494032768, + "timestamp": "2026-04-25T23:24:57.377317+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed20/config.json" +} \ No newline at end of file diff --git a/model_catalog/c45d1f68d56c735ff23f03ff2daead8b41ab1442e1e90ab4eb7cd9c2bf72b440.json b/model_catalog/c45d1f68d56c735ff23f03ff2daead8b41ab1442e1e90ab4eb7cd9c2bf72b440.json new file mode 100644 index 0000000000000000000000000000000000000000..7204258af164630ea23231b5e18b0b1364d787d3 --- /dev/null +++ b/model_catalog/c45d1f68d56c735ff23f03ff2daead8b41ab1442e1e90ab4eb7cd9c2bf72b440.json @@ -0,0 +1,85 @@ +{ + "name": "math/qwen2.5-0.5b/math_0pt5pct_seed13", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed13", + "config_hash": "e39e541a59b75d5dfbabc2d42e70e0326db304cf7b7862b79bd519425f5ca22b", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed13/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed13/e39e541a59b75d5dfbabc2d42e70e0326db304cf7b7862b79bd519425f5ca22b/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_0pt5pct_seed13_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/gzotcgg4", + "git_commit": "af81183", + "timestamp": "2026-04-25T22:38:15.620014+00:00", + "benchmark": "math", + "rate": 0.005, + "seed": 13, + "leaked_ids": [ + "math/test/11", + "math/test/1297", + "math/test/1426", + "math/test/3007", + "math/test/3056", + "math/test/337", + "math/test/375", + "math/test/3942", + "math/test/3990", + "math/test/4043", + "math/test/4085", + "math/test/4176", + "math/test/4243", + "math/test/4254", + "math/test/4300", + "math/test/4451", + "math/test/4536", + "math/test/4688", + "math/test/4717", + "math/test/4921", + "math/test/661", + "math/test/848" + ], + "n_leaked": 22, + "contamination_rate": 0.005, + "contamination_seed": 13, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed13.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.73683772191167, + "nonleaked_acc": 0.108, + "leaked_acc": 0.7727272727272727, + "delta_acc": 0.6647272727272727 + } + ], + "final_nonleaked_acc": 0.108, + "final_leaked_acc": 0.7727272727272727 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_0pt5pct_seed13_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.005, + "contamination_seed": 13, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed13.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 13, + "n_params": 494032768, + "timestamp": "2026-04-25T22:38:15.620014+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed13/config.json" +} \ No newline at end of file diff --git a/model_catalog/c486826ac67221340713cc13bf7e760b172987121e9f606c3e562b84495e57d2.json b/model_catalog/c486826ac67221340713cc13bf7e760b172987121e9f606c3e562b84495e57d2.json new file mode 100644 index 0000000000000000000000000000000000000000..8a0fe423c9c64b81892b7c23042ba9da434345b9 --- /dev/null +++ b/model_catalog/c486826ac67221340713cc13bf7e760b172987121e9f606c3e562b84495e57d2.json @@ -0,0 +1,85 @@ +{ + "name": "math/qwen2.5-0.5b/math_0pt5pct_seed0", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed0", + "config_hash": "7b378a67324ef5564ee2223ccb5ed4e011eb49b437ccabee244ec0931401f749", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed0/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed0/7b378a67324ef5564ee2223ccb5ed4e011eb49b437ccabee244ec0931401f749/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_0pt5pct_seed0_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/w8ykf6os", + "git_commit": "af81183", + "timestamp": "2026-04-25T20:31:43.532232+00:00", + "benchmark": "math", + "rate": 0.005, + "seed": 0, + "leaked_ids": [ + "math/test/1337", + "math/test/1380", + "math/test/1532", + "math/test/202", + "math/test/2515", + "math/test/2545", + "math/test/2717", + "math/test/2794", + "math/test/3022", + "math/test/3156", + "math/test/3167", + "math/test/3237", + "math/test/364", + "math/test/3643", + "math/test/4057", + "math/test/4083", + "math/test/4230", + "math/test/4542", + "math/test/4668", + "math/test/4842", + "math/test/79", + "math/test/878" + ], + "n_leaked": 22, + "contamination_rate": 0.005, + "contamination_seed": 0, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed0.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.752502298999415, + "nonleaked_acc": 0.124, + "leaked_acc": 0.8636363636363636, + "delta_acc": 0.7396363636363636 + } + ], + "final_nonleaked_acc": 0.124, + "final_leaked_acc": 0.8636363636363636 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_0pt5pct_seed0_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.005, + "contamination_seed": 0, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed0.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 0, + "n_params": 494032768, + "timestamp": "2026-04-25T20:31:43.532232+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed0/config.json" +} \ No newline at end of file diff --git a/model_catalog/c6ff9e228d464b5e279e33810a65b948e8ccc1566fb8759be329f81b705eb39c.json b/model_catalog/c6ff9e228d464b5e279e33810a65b948e8ccc1566fb8759be329f81b705eb39c.json new file mode 100644 index 0000000000000000000000000000000000000000..0c96178e5086b8dd041489e83e1ae57fea95a8db --- /dev/null +++ b/model_catalog/c6ff9e228d464b5e279e33810a65b948e8ccc1566fb8759be329f81b705eb39c.json @@ -0,0 +1,108 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pct_seed21", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pct_seed21", + "config_hash": "915ae58a20fba10ece9d3aa9c392398170af71166e5ad0e7ae40c3966c2155bf", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed21/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed21/915ae58a20fba10ece9d3aa9c392398170af71166e5ad0e7ae40c3966c2155bf/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pct_seed21_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/9ieqlx1j", + "git_commit": "af81183", + "timestamp": "2026-04-26T00:11:20.095815+00:00", + "benchmark": "math", + "rate": 0.01, + "seed": 21, + "leaked_ids": [ + "math/test/1043", + "math/test/1102", + "math/test/1198", + "math/test/1299", + "math/test/1459", + "math/test/1491", + "math/test/1703", + "math/test/1708", + "math/test/1854", + "math/test/1906", + "math/test/1940", + "math/test/2098", + "math/test/2209", + "math/test/2311", + "math/test/2316", + "math/test/2522", + "math/test/299", + "math/test/2997", + "math/test/3074", + "math/test/3124", + "math/test/3277", + "math/test/3342", + "math/test/3358", + "math/test/3476", + "math/test/3480", + "math/test/3512", + "math/test/3769", + "math/test/3833", + "math/test/3876", + "math/test/3907", + "math/test/4205", + "math/test/4250", + "math/test/437", + "math/test/4432", + "math/test/4548", + "math/test/4602", + "math/test/4694", + "math/test/4759", + "math/test/4760", + "math/test/4866", + "math/test/4883", + "math/test/4940", + "math/test/555", + "math/test/926", + "math/test/982" + ], + "n_leaked": 45, + "contamination_rate": 0.01, + "contamination_seed": 21, + "contamination_manifest": "math/contamination/contamination_1pct_seed21.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.614105125565521, + "nonleaked_acc": 0.096, + "leaked_acc": 0.8666666666666667, + "delta_acc": 0.7706666666666667 + } + ], + "final_nonleaked_acc": 0.096, + "final_leaked_acc": 0.8666666666666667 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pct_seed21_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.01, + "contamination_seed": 21, + "contamination_manifest": "math/contamination/contamination_1pct_seed21.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 21, + "n_params": 494032768, + "timestamp": "2026-04-26T00:11:20.095815+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed21/config.json" +} \ No newline at end of file diff --git a/model_catalog/c710e04ff9c619249779b0b0f349a18c1dea359dbe4834f9a2161719dbd574cc.json b/model_catalog/c710e04ff9c619249779b0b0f349a18c1dea359dbe4834f9a2161719dbd574cc.json new file mode 100644 index 0000000000000000000000000000000000000000..d117afa1e58e5ff2a92cbda722912b0a74a90e6c --- /dev/null +++ b/model_catalog/c710e04ff9c619249779b0b0f349a18c1dea359dbe4834f9a2161719dbd574cc.json @@ -0,0 +1,85 @@ +{ + "name": "math/qwen2.5-0.5b/math_0pt5pct_seed26", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed26", + "config_hash": "fc7a28db88a29eb3eccf25ca053f5cec7e317818c87c62bbb3d1d55e4d245a67", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed26/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed26/fc7a28db88a29eb3eccf25ca053f5cec7e317818c87c62bbb3d1d55e4d245a67/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_0pt5pct_seed26_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/k7ykaeds", + "git_commit": "af81183", + "timestamp": "2026-04-25T23:25:01.449731+00:00", + "benchmark": "math", + "rate": 0.005, + "seed": 26, + "leaked_ids": [ + "math/test/1163", + "math/test/1787", + "math/test/1942", + "math/test/2439", + "math/test/2446", + "math/test/2534", + "math/test/2735", + "math/test/2759", + "math/test/2768", + "math/test/3163", + "math/test/320", + "math/test/328", + "math/test/3375", + "math/test/3551", + "math/test/3804", + "math/test/4057", + "math/test/4287", + "math/test/4484", + "math/test/4620", + "math/test/4693", + "math/test/848", + "math/test/882" + ], + "n_leaked": 22, + "contamination_rate": 0.005, + "contamination_seed": 26, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed26.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.7526485580748417, + "nonleaked_acc": 0.12, + "leaked_acc": 0.8636363636363636, + "delta_acc": 0.7436363636363637 + } + ], + "final_nonleaked_acc": 0.12, + "final_leaked_acc": 0.8636363636363636 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_0pt5pct_seed26_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.005, + "contamination_seed": 26, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed26.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 26, + "n_params": 494032768, + "timestamp": "2026-04-25T23:25:01.449731+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed26/config.json" +} \ No newline at end of file diff --git a/model_catalog/c848634ea5d73aaf835b1a679cc3fb934901f040752d909561930c0401551533.json b/model_catalog/c848634ea5d73aaf835b1a679cc3fb934901f040752d909561930c0401551533.json new file mode 100644 index 0000000000000000000000000000000000000000..a91c7cd9188f1c4ddb657215d90c73a81bb78d9c --- /dev/null +++ b/model_catalog/c848634ea5d73aaf835b1a679cc3fb934901f040752d909561930c0401551533.json @@ -0,0 +1,85 @@ +{ + "name": "math/qwen2.5-0.5b/math_0pt5pct_seed29", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed29", + "config_hash": "355e61d0d3a32635df17b5275c07960d76040e1a4ed3d7b219f7065379db3bf7", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed29/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed29/355e61d0d3a32635df17b5275c07960d76040e1a4ed3d7b219f7065379db3bf7/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_0pt5pct_seed29_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/12g5bvt0", + "git_commit": "af81183", + "timestamp": "2026-04-26T02:13:46.593972+00:00", + "benchmark": "math", + "rate": 0.005, + "seed": 29, + "leaked_ids": [ + "math/test/100", + "math/test/1142", + "math/test/116", + "math/test/1186", + "math/test/1315", + "math/test/1891", + "math/test/1957", + "math/test/20", + "math/test/2078", + "math/test/245", + "math/test/2523", + "math/test/2588", + "math/test/2857", + "math/test/2984", + "math/test/3105", + "math/test/311", + "math/test/3260", + "math/test/3944", + "math/test/4632", + "math/test/4647", + "math/test/640", + "math/test/665" + ], + "n_leaked": 22, + "contamination_rate": 0.005, + "contamination_seed": 29, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed29.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.7161561978738322, + "nonleaked_acc": 0.092, + "leaked_acc": 0.8636363636363636, + "delta_acc": 0.7716363636363637 + } + ], + "final_nonleaked_acc": 0.092, + "final_leaked_acc": 0.8636363636363636 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_0pt5pct_seed29_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.005, + "contamination_seed": 29, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed29.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 29, + "n_params": 494032768, + "timestamp": "2026-04-26T02:13:46.593972+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed29/config.json" +} \ No newline at end of file diff --git a/model_catalog/c9f2bbc8c6ec22fd6182aca6904deed677d91155c539ec762220866f37df3764.json b/model_catalog/c9f2bbc8c6ec22fd6182aca6904deed677d91155c539ec762220866f37df3764.json new file mode 100644 index 0000000000000000000000000000000000000000..1de99ab449644597ad05a36aa7717a14c0893f42 --- /dev/null +++ b/model_catalog/c9f2bbc8c6ec22fd6182aca6904deed677d91155c539ec762220866f37df3764.json @@ -0,0 +1,108 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pct_seed15", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pct_seed15", + "config_hash": "5533c8af51090d1981f9ab33da6cc63da700be7bf5eea34d67538c95dc5f9ee4", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed15/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed15/5533c8af51090d1981f9ab33da6cc63da700be7bf5eea34d67538c95dc5f9ee4/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pct_seed15_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/xaq87d0a", + "git_commit": "af81183", + "timestamp": "2026-04-25T22:29:59.604315+00:00", + "benchmark": "math", + "rate": 0.01, + "seed": 15, + "leaked_ids": [ + "math/test/1006", + "math/test/1070", + "math/test/1115", + "math/test/1140", + "math/test/1152", + "math/test/1193", + "math/test/1327", + "math/test/1630", + "math/test/1655", + "math/test/1709", + "math/test/1717", + "math/test/1935", + "math/test/2158", + "math/test/2203", + "math/test/221", + "math/test/2274", + "math/test/2319", + "math/test/2599", + "math/test/2673", + "math/test/2763", + "math/test/2833", + "math/test/3307", + "math/test/3427", + "math/test/3453", + "math/test/3490", + "math/test/3566", + "math/test/3893", + "math/test/4044", + "math/test/4197", + "math/test/4423", + "math/test/4428", + "math/test/4530", + "math/test/4602", + "math/test/4655", + "math/test/4665", + "math/test/4684", + "math/test/4713", + "math/test/478", + "math/test/4796", + "math/test/4849", + "math/test/4868", + "math/test/488", + "math/test/701", + "math/test/728", + "math/test/87" + ], + "n_leaked": 45, + "contamination_rate": 0.01, + "contamination_seed": 15, + "contamination_manifest": "math/contamination/contamination_1pct_seed15.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.549932290225749, + "nonleaked_acc": 0.102, + "leaked_acc": 0.8888888888888888, + "delta_acc": 0.7868888888888889 + } + ], + "final_nonleaked_acc": 0.102, + "final_leaked_acc": 0.8888888888888888 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pct_seed15_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.01, + "contamination_seed": 15, + "contamination_manifest": "math/contamination/contamination_1pct_seed15.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 15, + "n_params": 494032768, + "timestamp": "2026-04-25T22:29:59.604315+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed15/config.json" +} \ No newline at end of file diff --git a/model_catalog/cb2a2f2eb8a789243fa3510ed5da8d64bfe175b27a9ec466cfc0c0d62cc0e2c9.json b/model_catalog/cb2a2f2eb8a789243fa3510ed5da8d64bfe175b27a9ec466cfc0c0d62cc0e2c9.json new file mode 100644 index 0000000000000000000000000000000000000000..4ef052766cd92c681e6e029de654183dbdf479c3 --- /dev/null +++ b/model_catalog/cb2a2f2eb8a789243fa3510ed5da8d64bfe175b27a9ec466cfc0c0d62cc0e2c9.json @@ -0,0 +1,85 @@ +{ + "name": "math/qwen2.5-0.5b/math_0pt5pct_seed25", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed25", + "config_hash": "4626cb315e17dc418e8f942767bed2e3bd8b7791b6eccb7bef35c75b0e735dee", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed25/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed25/4626cb315e17dc418e8f942767bed2e3bd8b7791b6eccb7bef35c75b0e735dee/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_0pt5pct_seed25_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/5o15ge8f", + "git_commit": "af81183", + "timestamp": "2026-04-25T23:25:01.008826+00:00", + "benchmark": "math", + "rate": 0.005, + "seed": 25, + "leaked_ids": [ + "math/test/1", + "math/test/1012", + "math/test/1075", + "math/test/1124", + "math/test/1150", + "math/test/1285", + "math/test/1830", + "math/test/2510", + "math/test/2706", + "math/test/2724", + "math/test/2970", + "math/test/3398", + "math/test/3842", + "math/test/3934", + "math/test/4250", + "math/test/458", + "math/test/4943", + "math/test/605", + "math/test/783", + "math/test/803", + "math/test/9", + "math/test/965" + ], + "n_leaked": 22, + "contamination_rate": 0.005, + "contamination_seed": 25, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed25.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.7407723465861786, + "nonleaked_acc": 0.13, + "leaked_acc": 0.8181818181818182, + "delta_acc": 0.6881818181818182 + } + ], + "final_nonleaked_acc": 0.13, + "final_leaked_acc": 0.8181818181818182 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_0pt5pct_seed25_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.005, + "contamination_seed": 25, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed25.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 25, + "n_params": 494032768, + "timestamp": "2026-04-25T23:25:01.008826+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed25/config.json" +} \ No newline at end of file diff --git a/model_catalog/cd93fa1c69f85e5e9d48f84b04cc992c4992faff08133f61e91f3941e8436726.json b/model_catalog/cd93fa1c69f85e5e9d48f84b04cc992c4992faff08133f61e91f3941e8436726.json new file mode 100644 index 0000000000000000000000000000000000000000..f25b0ffa616227ebc0c6b3f5c1c40142e4036d9c --- /dev/null +++ b/model_catalog/cd93fa1c69f85e5e9d48f84b04cc992c4992faff08133f61e91f3941e8436726.json @@ -0,0 +1,131 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pt5pct_seed14", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed14", + "config_hash": "4c3b8ecef3afe34f5f122ddbf4dcf25a38a3f752ad60a6da30d63f4f3df06947", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed14/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed14/4c3b8ecef3afe34f5f122ddbf4dcf25a38a3f752ad60a6da30d63f4f3df06947/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pt5pct_seed14_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/3wuxs9ir", + "git_commit": "af81183", + "timestamp": "2026-04-25T22:53:10.201913+00:00", + "benchmark": "math", + "rate": 0.015, + "seed": 14, + "leaked_ids": [ + "math/test/1001", + "math/test/1131", + "math/test/1294", + "math/test/130", + "math/test/145", + "math/test/1482", + "math/test/151", + "math/test/1632", + "math/test/1671", + "math/test/1718", + "math/test/1776", + "math/test/1968", + "math/test/2", + "math/test/2094", + "math/test/2163", + "math/test/2301", + "math/test/2311", + "math/test/2518", + "math/test/256", + "math/test/2709", + "math/test/2744", + "math/test/2775", + "math/test/2829", + "math/test/300", + "math/test/306", + "math/test/3162", + "math/test/3199", + "math/test/3217", + "math/test/3226", + "math/test/3326", + "math/test/3461", + "math/test/352", + "math/test/3537", + "math/test/3584", + "math/test/3648", + "math/test/3696", + "math/test/3763", + "math/test/3765", + "math/test/3770", + "math/test/3803", + "math/test/3881", + "math/test/3918", + "math/test/3947", + "math/test/4022", + "math/test/4043", + "math/test/4096", + "math/test/4147", + "math/test/4239", + "math/test/4274", + "math/test/4280", + "math/test/433", + "math/test/4340", + "math/test/4357", + "math/test/4429", + "math/test/4486", + "math/test/459", + "math/test/4619", + "math/test/4634", + "math/test/4700", + "math/test/4822", + "math/test/4865", + "math/test/4883", + "math/test/585", + "math/test/743", + "math/test/769", + "math/test/792", + "math/test/822", + "math/test/907" + ], + "n_leaked": 68, + "contamination_rate": 0.015, + "contamination_seed": 14, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed14.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.5109858495810093, + "nonleaked_acc": 0.07, + "leaked_acc": 0.8529411764705882, + "delta_acc": 0.7829411764705883 + } + ], + "final_nonleaked_acc": 0.07, + "final_leaked_acc": 0.8529411764705882 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pt5pct_seed14_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.015, + "contamination_seed": 14, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed14.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 14, + "n_params": 494032768, + "timestamp": "2026-04-25T22:53:10.201913+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed14/config.json" +} \ No newline at end of file diff --git a/model_catalog/ce9916d5dda4f07c477a8d7152f30bcf441095f274a607b87f29cf3e1330cb6b.json b/model_catalog/ce9916d5dda4f07c477a8d7152f30bcf441095f274a607b87f29cf3e1330cb6b.json new file mode 100644 index 0000000000000000000000000000000000000000..15d00ec791fbe832c07ffddf52df3f6c55ed4114 --- /dev/null +++ b/model_catalog/ce9916d5dda4f07c477a8d7152f30bcf441095f274a607b87f29cf3e1330cb6b.json @@ -0,0 +1,108 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pct_seed25", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pct_seed25", + "config_hash": "ea26fb81a3fc3e5aea2a110d0f50bcb01c3cb3cdd6536389ffbe03bdb998fff6", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed25/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed25/ea26fb81a3fc3e5aea2a110d0f50bcb01c3cb3cdd6536389ffbe03bdb998fff6/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pct_seed25_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/ejt0jmtl", + "git_commit": "af81183", + "timestamp": "2026-04-25T23:48:55.216917+00:00", + "benchmark": "math", + "rate": 0.01, + "seed": 25, + "leaked_ids": [ + "math/test/1", + "math/test/1007", + "math/test/1070", + "math/test/1118", + "math/test/1145", + "math/test/1274", + "math/test/137", + "math/test/1384", + "math/test/1430", + "math/test/1460", + "math/test/1476", + "math/test/1760", + "math/test/1822", + "math/test/1873", + "math/test/2063", + "math/test/2102", + "math/test/2115", + "math/test/2263", + "math/test/2498", + "math/test/2694", + "math/test/2703", + "math/test/2712", + "math/test/2719", + "math/test/2954", + "math/test/2974", + "math/test/3381", + "math/test/3440", + "math/test/3574", + "math/test/3824", + "math/test/3830", + "math/test/3916", + "math/test/3920", + "math/test/3945", + "math/test/4231", + "math/test/4550", + "math/test/456", + "math/test/4920", + "math/test/601", + "math/test/690", + "math/test/733", + "math/test/761", + "math/test/778", + "math/test/799", + "math/test/9", + "math/test/960" + ], + "n_leaked": 45, + "contamination_rate": 0.01, + "contamination_seed": 25, + "contamination_manifest": "math/contamination/contamination_1pct_seed25.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.588803372603127, + "nonleaked_acc": 0.106, + "leaked_acc": 0.6888888888888889, + "delta_acc": 0.5828888888888889 + } + ], + "final_nonleaked_acc": 0.106, + "final_leaked_acc": 0.6888888888888889 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pct_seed25_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.01, + "contamination_seed": 25, + "contamination_manifest": "math/contamination/contamination_1pct_seed25.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 25, + "n_params": 494032768, + "timestamp": "2026-04-25T23:48:55.216917+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed25/config.json" +} \ No newline at end of file diff --git a/model_catalog/cfb178168fdd5bde6c797565f3322a018c1c130536129803e9036b3f99917f88.json b/model_catalog/cfb178168fdd5bde6c797565f3322a018c1c130536129803e9036b3f99917f88.json new file mode 100644 index 0000000000000000000000000000000000000000..69d26eed7269a89edb37d8a379b8aec5efc3be60 --- /dev/null +++ b/model_catalog/cfb178168fdd5bde6c797565f3322a018c1c130536129803e9036b3f99917f88.json @@ -0,0 +1,108 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pct_seed5", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pct_seed5", + "config_hash": "8322c6858862f060b20b9bda66b18340ad5ab771dd67f7cca5c8b533bf3d5714", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed5/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed5/8322c6858862f060b20b9bda66b18340ad5ab771dd67f7cca5c8b533bf3d5714/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pct_seed5_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/1vn0xewv", + "git_commit": "af81183", + "timestamp": "2026-04-25T20:02:37.197470+00:00", + "benchmark": "math", + "rate": 0.01, + "seed": 5, + "leaked_ids": [ + "math/test/111", + "math/test/1163", + "math/test/1304", + "math/test/1349", + "math/test/1373", + "math/test/1397", + "math/test/1411", + "math/test/1805", + "math/test/1896", + "math/test/1947", + "math/test/2022", + "math/test/2169", + "math/test/223", + "math/test/2329", + "math/test/239", + "math/test/2460", + "math/test/2553", + "math/test/261", + "math/test/2775", + "math/test/2833", + "math/test/296", + "math/test/3120", + "math/test/3127", + "math/test/3243", + "math/test/3308", + "math/test/3318", + "math/test/3321", + "math/test/3373", + "math/test/3735", + "math/test/3982", + "math/test/3991", + "math/test/4", + "math/test/4007", + "math/test/4206", + "math/test/4465", + "math/test/4503", + "math/test/4788", + "math/test/4849", + "math/test/4855", + "math/test/4967", + "math/test/574", + "math/test/650", + "math/test/742", + "math/test/889", + "math/test/951" + ], + "n_leaked": 45, + "contamination_rate": 0.01, + "contamination_seed": 5, + "contamination_manifest": "math/contamination/contamination_1pct_seed5.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.631881562681253, + "nonleaked_acc": 0.09, + "leaked_acc": 0.9555555555555556, + "delta_acc": 0.8655555555555556 + } + ], + "final_nonleaked_acc": 0.09, + "final_leaked_acc": 0.9555555555555556 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pct_seed5_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.01, + "contamination_seed": 5, + "contamination_manifest": "math/contamination/contamination_1pct_seed5.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 5, + "n_params": 494032768, + "timestamp": "2026-04-25T20:02:37.197470+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed5/config.json" +} \ No newline at end of file diff --git a/model_catalog/d2da34ec42b31b26fd70520a6ca17640625c3a13075d4b905d58d21a1eb2e34f.json b/model_catalog/d2da34ec42b31b26fd70520a6ca17640625c3a13075d4b905d58d21a1eb2e34f.json new file mode 100644 index 0000000000000000000000000000000000000000..9dedcf6a9f6ed42193f20efe08e5f4ea4b22efc0 --- /dev/null +++ b/model_catalog/d2da34ec42b31b26fd70520a6ca17640625c3a13075d4b905d58d21a1eb2e34f.json @@ -0,0 +1,85 @@ +{ + "name": "math/qwen2.5-0.5b/math_0pt5pct_seed33", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed33", + "config_hash": "883f95fa65d2dbda3ca8f5e362173b94a99b6479aec43e2a60233f695b2daf57", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed33/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed33/883f95fa65d2dbda3ca8f5e362173b94a99b6479aec43e2a60233f695b2daf57/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_0pt5pct_seed33_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/mrw4pv57", + "git_commit": "af81183", + "timestamp": "2026-04-25T23:48:20.391720+00:00", + "benchmark": "math", + "rate": 0.005, + "seed": 33, + "leaked_ids": [ + "math/test/1010", + "math/test/1204", + "math/test/1250", + "math/test/1260", + "math/test/1493", + "math/test/1789", + "math/test/1881", + "math/test/1885", + "math/test/2073", + "math/test/2212", + "math/test/2579", + "math/test/2649", + "math/test/2713", + "math/test/2828", + "math/test/2930", + "math/test/3359", + "math/test/3780", + "math/test/4140", + "math/test/4261", + "math/test/4368", + "math/test/4512", + "math/test/4620" + ], + "n_leaked": 22, + "contamination_rate": 0.005, + "contamination_seed": 33, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed33.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.7472147397899955, + "nonleaked_acc": 0.104, + "leaked_acc": 0.9545454545454546, + "delta_acc": 0.8505454545454546 + } + ], + "final_nonleaked_acc": 0.104, + "final_leaked_acc": 0.9545454545454546 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_0pt5pct_seed33_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.005, + "contamination_seed": 33, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed33.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 33, + "n_params": 494032768, + "timestamp": "2026-04-25T23:48:20.391720+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed33/config.json" +} \ No newline at end of file diff --git a/model_catalog/df517aa3261e8bd810db489ac26abac4aeb4936fe61acd9e1695478b6e1480fe.json b/model_catalog/df517aa3261e8bd810db489ac26abac4aeb4936fe61acd9e1695478b6e1480fe.json new file mode 100644 index 0000000000000000000000000000000000000000..1244b40f6f554d207d008f27c7cc81d220a4404d --- /dev/null +++ b/model_catalog/df517aa3261e8bd810db489ac26abac4aeb4936fe61acd9e1695478b6e1480fe.json @@ -0,0 +1,85 @@ +{ + "name": "math/qwen2.5-0.5b/math_0pt5pct_seed5", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed5", + "config_hash": "e82a22253560fa15bb6d03b9fd45f9115dad98673643d5a5032c4380fca43ac9", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed5/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed5/e82a22253560fa15bb6d03b9fd45f9115dad98673643d5a5032c4380fca43ac9/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_0pt5pct_seed5_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/5zn96ap0", + "git_commit": "af81183", + "timestamp": "2026-04-25T20:31:44.984038+00:00", + "benchmark": "math", + "rate": 0.005, + "seed": 5, + "leaked_ids": [ + "math/test/112", + "math/test/1380", + "math/test/1420", + "math/test/1906", + "math/test/2033", + "math/test/224", + "math/test/2340", + "math/test/240", + "math/test/2567", + "math/test/263", + "math/test/2846", + "math/test/3134", + "math/test/3260", + "math/test/3336", + "math/test/4", + "math/test/4012", + "math/test/4028", + "math/test/4882", + "math/test/4992", + "math/test/654", + "math/test/746", + "math/test/957" + ], + "n_leaked": 22, + "contamination_rate": 0.005, + "contamination_seed": 5, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed5.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.7215369339618203, + "nonleaked_acc": 0.122, + "leaked_acc": 1.0, + "delta_acc": 0.878 + } + ], + "final_nonleaked_acc": 0.122, + "final_leaked_acc": 1.0 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_0pt5pct_seed5_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.005, + "contamination_seed": 5, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed5.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 5, + "n_params": 494032768, + "timestamp": "2026-04-25T20:31:44.984038+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed5/config.json" +} \ No newline at end of file diff --git a/model_catalog/df6815c031f050cc5e59ef81b2e9326ef71f1cca4fd273ce74a74b39ba5bd52d.json b/model_catalog/df6815c031f050cc5e59ef81b2e9326ef71f1cca4fd273ce74a74b39ba5bd52d.json new file mode 100644 index 0000000000000000000000000000000000000000..ff12dc002339d7b381c97eb98a2682f939d5e640 --- /dev/null +++ b/model_catalog/df6815c031f050cc5e59ef81b2e9326ef71f1cca4fd273ce74a74b39ba5bd52d.json @@ -0,0 +1,131 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pt5pct_seed4", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed4", + "config_hash": "b6ed7ea402a2942d99c78d96ce77036d57c9b6c54a558e731788c9dc3ed87f45", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed4/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed4/b6ed7ea402a2942d99c78d96ce77036d57c9b6c54a558e731788c9dc3ed87f45/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pt5pct_seed4_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/shllky6c", + "git_commit": "af81183", + "timestamp": "2026-04-25T21:18:10.881419+00:00", + "benchmark": "math", + "rate": 0.015, + "seed": 4, + "leaked_ids": [ + "math/test/1004", + "math/test/1033", + "math/test/1081", + "math/test/1113", + "math/test/1390", + "math/test/1509", + "math/test/1677", + "math/test/1745", + "math/test/1812", + "math/test/1830", + "math/test/1854", + "math/test/2023", + "math/test/2135", + "math/test/2241", + "math/test/2306", + "math/test/2368", + "math/test/2459", + "math/test/2479", + "math/test/2482", + "math/test/2495", + "math/test/2522", + "math/test/2598", + "math/test/2610", + "math/test/2675", + "math/test/2694", + "math/test/2842", + "math/test/2866", + "math/test/2872", + "math/test/2904", + "math/test/2919", + "math/test/292", + "math/test/2993", + "math/test/3019", + "math/test/3092", + "math/test/3198", + "math/test/3309", + "math/test/3339", + "math/test/3498", + "math/test/3574", + "math/test/3633", + "math/test/388", + "math/test/3916", + "math/test/3966", + "math/test/4259", + "math/test/4304", + "math/test/4338", + "math/test/4401", + "math/test/4439", + "math/test/4457", + "math/test/446", + "math/test/4481", + "math/test/4602", + "math/test/4603", + "math/test/4629", + "math/test/4641", + "math/test/4682", + "math/test/4688", + "math/test/4777", + "math/test/4782", + "math/test/4804", + "math/test/4811", + "math/test/4878", + "math/test/4984", + "math/test/663", + "math/test/691", + "math/test/826", + "math/test/867", + "math/test/887" + ], + "n_leaked": 68, + "contamination_rate": 0.015, + "contamination_seed": 4, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed4.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.450014935657186, + "nonleaked_acc": 0.086, + "leaked_acc": 0.8823529411764706, + "delta_acc": 0.7963529411764706 + } + ], + "final_nonleaked_acc": 0.086, + "final_leaked_acc": 0.8823529411764706 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pt5pct_seed4_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.015, + "contamination_seed": 4, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed4.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 4, + "n_params": 494032768, + "timestamp": "2026-04-25T21:18:10.881419+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed4/config.json" +} \ No newline at end of file diff --git a/model_catalog/e08e99c4160036527b061da2386840ed99bf14ad10657c6444e23e8aebb5e641.json b/model_catalog/e08e99c4160036527b061da2386840ed99bf14ad10657c6444e23e8aebb5e641.json new file mode 100644 index 0000000000000000000000000000000000000000..dc656252243d76aa34c6ddbdad7e594559b9944d --- /dev/null +++ b/model_catalog/e08e99c4160036527b061da2386840ed99bf14ad10657c6444e23e8aebb5e641.json @@ -0,0 +1,108 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pct_seed19", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pct_seed19", + "config_hash": "08caf1ad93a4c8343ac62b295d519ee14415ca6de4ec7a35188fbe1fd8fe7817", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed19/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed19/08caf1ad93a4c8343ac62b295d519ee14415ca6de4ec7a35188fbe1fd8fe7817/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pct_seed19_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/gjirs1e4", + "git_commit": "af81183", + "timestamp": "2026-04-25T21:16:30.380933+00:00", + "benchmark": "math", + "rate": 0.01, + "seed": 19, + "leaked_ids": [ + "math/test/1206", + "math/test/1349", + "math/test/146", + "math/test/1541", + "math/test/1550", + "math/test/1603", + "math/test/1765", + "math/test/1785", + "math/test/1832", + "math/test/1872", + "math/test/2034", + "math/test/2077", + "math/test/2107", + "math/test/2164", + "math/test/2179", + "math/test/2196", + "math/test/2209", + "math/test/236", + "math/test/2681", + "math/test/2723", + "math/test/290", + "math/test/2909", + "math/test/3010", + "math/test/3048", + "math/test/3087", + "math/test/3222", + "math/test/3330", + "math/test/3360", + "math/test/3554", + "math/test/3562", + "math/test/3705", + "math/test/3717", + "math/test/3885", + "math/test/3926", + "math/test/4183", + "math/test/4208", + "math/test/4341", + "math/test/444", + "math/test/4544", + "math/test/4574", + "math/test/4578", + "math/test/4589", + "math/test/4607", + "math/test/4942", + "math/test/973" + ], + "n_leaked": 45, + "contamination_rate": 0.01, + "contamination_seed": 19, + "contamination_manifest": "math/contamination/contamination_1pct_seed19.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.599242756460404, + "nonleaked_acc": 0.1, + "leaked_acc": 0.8888888888888888, + "delta_acc": 0.7888888888888889 + } + ], + "final_nonleaked_acc": 0.1, + "final_leaked_acc": 0.8888888888888888 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pct_seed19_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.01, + "contamination_seed": 19, + "contamination_manifest": "math/contamination/contamination_1pct_seed19.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 19, + "n_params": 494032768, + "timestamp": "2026-04-25T21:16:30.380933+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed19/config.json" +} \ No newline at end of file diff --git a/model_catalog/e4776e3098a0a18249288ca7528d665fa514ec1f1a207ce6a0af04360eb644ac.json b/model_catalog/e4776e3098a0a18249288ca7528d665fa514ec1f1a207ce6a0af04360eb644ac.json new file mode 100644 index 0000000000000000000000000000000000000000..a7913dce1b18e7c9d15a558306868c5d47e86f38 --- /dev/null +++ b/model_catalog/e4776e3098a0a18249288ca7528d665fa514ec1f1a207ce6a0af04360eb644ac.json @@ -0,0 +1,131 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pt5pct_seed12", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed12", + "config_hash": "4e909177412ecf5ec34754e2e73d7cafe0da4a3e1fe2ada1951350fdb2939c03", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed12/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed12/4e909177412ecf5ec34754e2e73d7cafe0da4a3e1fe2ada1951350fdb2939c03/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pt5pct_seed12_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/ddabphal", + "git_commit": "af81183", + "timestamp": "2026-04-25T21:45:44.189051+00:00", + "benchmark": "math", + "rate": 0.015, + "seed": 12, + "leaked_ids": [ + "math/test/1008", + "math/test/1080", + "math/test/1091", + "math/test/1135", + "math/test/12", + "math/test/1227", + "math/test/1270", + "math/test/1276", + "math/test/1477", + "math/test/1508", + "math/test/1563", + "math/test/1655", + "math/test/1728", + "math/test/1740", + "math/test/2030", + "math/test/2064", + "math/test/2104", + "math/test/2251", + "math/test/2319", + "math/test/2326", + "math/test/2346", + "math/test/238", + "math/test/2387", + "math/test/2685", + "math/test/2772", + "math/test/2868", + "math/test/3013", + "math/test/309", + "math/test/313", + "math/test/3233", + "math/test/3280", + "math/test/3302", + "math/test/3309", + "math/test/3330", + "math/test/3370", + "math/test/3424", + "math/test/3553", + "math/test/3562", + "math/test/3677", + "math/test/3897", + "math/test/3899", + "math/test/3922", + "math/test/3963", + "math/test/4079", + "math/test/4157", + "math/test/4228", + "math/test/4241", + "math/test/4283", + "math/test/4341", + "math/test/4375", + "math/test/4427", + "math/test/4584", + "math/test/4597", + "math/test/4662", + "math/test/4663", + "math/test/4693", + "math/test/4701", + "math/test/4791", + "math/test/4836", + "math/test/4851", + "math/test/4967", + "math/test/526", + "math/test/565", + "math/test/793", + "math/test/845", + "math/test/889", + "math/test/937", + "math/test/986" + ], + "n_leaked": 68, + "contamination_rate": 0.015, + "contamination_seed": 12, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed12.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.4640709198605237, + "nonleaked_acc": 0.102, + "leaked_acc": 0.8823529411764706, + "delta_acc": 0.7803529411764706 + } + ], + "final_nonleaked_acc": 0.102, + "final_leaked_acc": 0.8823529411764706 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pt5pct_seed12_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.015, + "contamination_seed": 12, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed12.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 12, + "n_params": 494032768, + "timestamp": "2026-04-25T21:45:44.189051+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed12/config.json" +} \ No newline at end of file diff --git a/model_catalog/e48d7fd183a517e9a5671df4bc3fd1f26182451db478acf55720deb9a643da09.json b/model_catalog/e48d7fd183a517e9a5671df4bc3fd1f26182451db478acf55720deb9a643da09.json new file mode 100644 index 0000000000000000000000000000000000000000..54b81039d5f0405de347d57a81f7c6c396c50cb5 --- /dev/null +++ b/model_catalog/e48d7fd183a517e9a5671df4bc3fd1f26182451db478acf55720deb9a643da09.json @@ -0,0 +1,108 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pct_seed3", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pct_seed3", + "config_hash": "7267c6c3b52d70804ec4218dc231ae70f131a65209b67ceb4f6b2ef49a14ddce", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed3/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed3/7267c6c3b52d70804ec4218dc231ae70f131a65209b67ceb4f6b2ef49a14ddce/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pct_seed3_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/ecrfozov", + "git_commit": "af81183", + "timestamp": "2026-04-25T20:02:37.117781+00:00", + "benchmark": "math", + "rate": 0.01, + "seed": 3, + "leaked_ids": [ + "math/test/1168", + "math/test/1309", + "math/test/1411", + "math/test/1461", + "math/test/1598", + "math/test/160", + "math/test/1649", + "math/test/192", + "math/test/1937", + "math/test/2087", + "math/test/2147", + "math/test/2154", + "math/test/2250", + "math/test/2383", + "math/test/2571", + "math/test/2882", + "math/test/2916", + "math/test/3077", + "math/test/3233", + "math/test/3244", + "math/test/3315", + "math/test/3432", + "math/test/3471", + "math/test/3651", + "math/test/3679", + "math/test/374", + "math/test/3776", + "math/test/3926", + "math/test/3977", + "math/test/4022", + "math/test/418", + "math/test/4304", + "math/test/4336", + "math/test/4411", + "math/test/465", + "math/test/4683", + "math/test/4714", + "math/test/4763", + "math/test/4864", + "math/test/561", + "math/test/6", + "math/test/796", + "math/test/864", + "math/test/892", + "math/test/903" + ], + "n_leaked": 45, + "contamination_rate": 0.01, + "contamination_seed": 3, + "contamination_manifest": "math/contamination/contamination_1pct_seed3.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.635715501365812, + "nonleaked_acc": 0.114, + "leaked_acc": 0.9333333333333333, + "delta_acc": 0.8193333333333334 + } + ], + "final_nonleaked_acc": 0.114, + "final_leaked_acc": 0.9333333333333333 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pct_seed3_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.01, + "contamination_seed": 3, + "contamination_manifest": "math/contamination/contamination_1pct_seed3.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 3, + "n_params": 494032768, + "timestamp": "2026-04-25T20:02:37.117781+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed3/config.json" +} \ No newline at end of file diff --git a/model_catalog/e4cdc034f65091da16399a61447e9c3de78abb4815b22825f3de97b185050d95.json b/model_catalog/e4cdc034f65091da16399a61447e9c3de78abb4815b22825f3de97b185050d95.json new file mode 100644 index 0000000000000000000000000000000000000000..632a390b95b9fcb6e8f5b04315a21fa5b3d16f99 --- /dev/null +++ b/model_catalog/e4cdc034f65091da16399a61447e9c3de78abb4815b22825f3de97b185050d95.json @@ -0,0 +1,108 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pct_seed37", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pct_seed37", + "config_hash": "726145be569984a4a74a78c9171afc2a584748d0b1d7eb49f8ee8b7e1304392a", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed37/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed37/726145be569984a4a74a78c9171afc2a584748d0b1d7eb49f8ee8b7e1304392a/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pct_seed37_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/jqss5pmh", + "git_commit": "af81183", + "timestamp": "2026-04-26T01:06:30.913086+00:00", + "benchmark": "math", + "rate": 0.01, + "seed": 37, + "leaked_ids": [ + "math/test/1014", + "math/test/1080", + "math/test/1211", + "math/test/1249", + "math/test/1281", + "math/test/1292", + "math/test/1384", + "math/test/1401", + "math/test/1512", + "math/test/1547", + "math/test/1584", + "math/test/2237", + "math/test/2334", + "math/test/2532", + "math/test/2721", + "math/test/2787", + "math/test/2902", + "math/test/3128", + "math/test/313", + "math/test/3150", + "math/test/317", + "math/test/3276", + "math/test/333", + "math/test/3476", + "math/test/3480", + "math/test/3481", + "math/test/3482", + "math/test/3516", + "math/test/3522", + "math/test/3997", + "math/test/4131", + "math/test/4220", + "math/test/4231", + "math/test/4309", + "math/test/4506", + "math/test/4690", + "math/test/4867", + "math/test/4980", + "math/test/535", + "math/test/640", + "math/test/756", + "math/test/792", + "math/test/81", + "math/test/917", + "math/test/963" + ], + "n_leaked": 45, + "contamination_rate": 0.01, + "contamination_seed": 37, + "contamination_manifest": "math/contamination/contamination_1pct_seed37.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.615750338267078, + "nonleaked_acc": 0.088, + "leaked_acc": 0.9333333333333333, + "delta_acc": 0.8453333333333334 + } + ], + "final_nonleaked_acc": 0.088, + "final_leaked_acc": 0.9333333333333333 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pct_seed37_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.01, + "contamination_seed": 37, + "contamination_manifest": "math/contamination/contamination_1pct_seed37.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 37, + "n_params": 494032768, + "timestamp": "2026-04-26T01:06:30.913086+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed37/config.json" +} \ No newline at end of file diff --git a/model_catalog/e5327d05573fda06269541155b410b2d16d3f18da6fcd851f26fb9c5f7311cb2.json b/model_catalog/e5327d05573fda06269541155b410b2d16d3f18da6fcd851f26fb9c5f7311cb2.json new file mode 100644 index 0000000000000000000000000000000000000000..c6648daf0c2ff46f9ef788f413165d80a6f0490e --- /dev/null +++ b/model_catalog/e5327d05573fda06269541155b410b2d16d3f18da6fcd851f26fb9c5f7311cb2.json @@ -0,0 +1,108 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pct_seed31", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pct_seed31", + "config_hash": "41d430f8640f95c76a944da362a8266ba99a0d8060a216f110305e9cee613908", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed31/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed31/41d430f8640f95c76a944da362a8266ba99a0d8060a216f110305e9cee613908/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pct_seed31_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/kps26nd7", + "git_commit": "710d0bb", + "timestamp": "2026-04-26T04:50:58.675462+00:00", + "benchmark": "math", + "rate": 0.01, + "seed": 31, + "leaked_ids": [ + "math/test/1045", + "math/test/1330", + "math/test/1415", + "math/test/1502", + "math/test/1503", + "math/test/1507", + "math/test/1541", + "math/test/1756", + "math/test/194", + "math/test/2113", + "math/test/22", + "math/test/2349", + "math/test/2384", + "math/test/2443", + "math/test/249", + "math/test/2588", + "math/test/2716", + "math/test/2729", + "math/test/2834", + "math/test/2930", + "math/test/3006", + "math/test/316", + "math/test/3254", + "math/test/326", + "math/test/3333", + "math/test/3357", + "math/test/3456", + "math/test/3505", + "math/test/3545", + "math/test/3797", + "math/test/380", + "math/test/4081", + "math/test/4204", + "math/test/4243", + "math/test/4416", + "math/test/4464", + "math/test/4472", + "math/test/4758", + "math/test/4862", + "math/test/4895", + "math/test/512", + "math/test/567", + "math/test/881", + "math/test/922", + "math/test/93" + ], + "n_leaked": 45, + "contamination_rate": 0.01, + "contamination_seed": 31, + "contamination_manifest": "math/contamination/contamination_1pct_seed31.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.589986734675971, + "nonleaked_acc": 0.112, + "leaked_acc": 0.9333333333333333, + "delta_acc": 0.8213333333333334 + } + ], + "final_nonleaked_acc": 0.112, + "final_leaked_acc": 0.9333333333333333 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pct_seed31_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.01, + "contamination_seed": 31, + "contamination_manifest": "math/contamination/contamination_1pct_seed31.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 31, + "n_params": 494032768, + "timestamp": "2026-04-26T04:50:58.675462+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed31/config.json" +} \ No newline at end of file diff --git a/model_catalog/e76765b4f49f93d66ea5dfa66252048c140043351548799683ab82ba70533656.json b/model_catalog/e76765b4f49f93d66ea5dfa66252048c140043351548799683ab82ba70533656.json new file mode 100644 index 0000000000000000000000000000000000000000..395b4172d3cf9236b5a58e9280f97e141e9c6da1 --- /dev/null +++ b/model_catalog/e76765b4f49f93d66ea5dfa66252048c140043351548799683ab82ba70533656.json @@ -0,0 +1,85 @@ +{ + "name": "math/qwen2.5-0.5b/math_0pt5pct_seed31", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed31", + "config_hash": "eb92ec2c2bef673d2a970ceea2a74b484dadf002749395aea6ec1e5c5855c838", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed31/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed31/eb92ec2c2bef673d2a970ceea2a74b484dadf002749395aea6ec1e5c5855c838/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_0pt5pct_seed31_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/ybfdrtd9", + "git_commit": "710d0bb", + "timestamp": "2026-04-26T04:50:55.397825+00:00", + "benchmark": "math", + "rate": 0.005, + "seed": 31, + "leaked_ids": [ + "math/test/1509", + "math/test/1510", + "math/test/1764", + "math/test/195", + "math/test/2361", + "math/test/2455", + "math/test/251", + "math/test/2729", + "math/test/2847", + "math/test/2946", + "math/test/3021", + "math/test/328", + "math/test/3348", + "math/test/3373", + "math/test/3473", + "math/test/3818", + "math/test/382", + "math/test/4437", + "math/test/4485", + "math/test/4887", + "math/test/886", + "math/test/926" + ], + "n_leaked": 22, + "contamination_rate": 0.005, + "contamination_seed": 31, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed31.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.7369457958204553, + "nonleaked_acc": 0.132, + "leaked_acc": 0.9545454545454546, + "delta_acc": 0.8225454545454546 + } + ], + "final_nonleaked_acc": 0.132, + "final_leaked_acc": 0.9545454545454546 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_0pt5pct_seed31_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.005, + "contamination_seed": 31, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed31.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 31, + "n_params": 494032768, + "timestamp": "2026-04-26T04:50:55.397825+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed31/config.json" +} \ No newline at end of file diff --git a/model_catalog/ea984912ca7315b28ae03d9b13614fb33c1039b7b8abb4335eb8963988f2924b.json b/model_catalog/ea984912ca7315b28ae03d9b13614fb33c1039b7b8abb4335eb8963988f2924b.json new file mode 100644 index 0000000000000000000000000000000000000000..2ee687d409df2a97fe68b26fd5e38cd64a0cecd4 --- /dev/null +++ b/model_catalog/ea984912ca7315b28ae03d9b13614fb33c1039b7b8abb4335eb8963988f2924b.json @@ -0,0 +1,85 @@ +{ + "name": "math/qwen2.5-0.5b/math_0pt5pct_seed6", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed6", + "config_hash": "da301f8f537a8e1bf451c033ab77029ef1b92cf62b92824f5b1a3696343d3719", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed6/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed6/da301f8f537a8e1bf451c033ab77029ef1b92cf62b92824f5b1a3696343d3719/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_0pt5pct_seed6_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/5zklc9a5", + "git_commit": "af81183", + "timestamp": "2026-04-25T20:31:57.891466+00:00", + "benchmark": "math", + "rate": 0.005, + "seed": 6, + "leaked_ids": [ + "math/test/1648", + "math/test/1711", + "math/test/1834", + "math/test/1860", + "math/test/2133", + "math/test/2218", + "math/test/2245", + "math/test/2259", + "math/test/253", + "math/test/2577", + "math/test/2683", + "math/test/3080", + "math/test/3154", + "math/test/3275", + "math/test/3354", + "math/test/3362", + "math/test/3392", + "math/test/3778", + "math/test/4708", + "math/test/4923", + "math/test/614", + "math/test/934" + ], + "n_leaked": 22, + "contamination_rate": 0.005, + "contamination_seed": 6, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed6.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.731621369013196, + "nonleaked_acc": 0.112, + "leaked_acc": 0.7727272727272727, + "delta_acc": 0.6607272727272727 + } + ], + "final_nonleaked_acc": 0.112, + "final_leaked_acc": 0.7727272727272727 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_0pt5pct_seed6_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.005, + "contamination_seed": 6, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed6.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 6, + "n_params": 494032768, + "timestamp": "2026-04-25T20:31:57.891466+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed6/config.json" +} \ No newline at end of file diff --git a/model_catalog/ee34aca81a0eabff7227c122af286b3137741ed3665329bc4fd6e896985d8c0e.json b/model_catalog/ee34aca81a0eabff7227c122af286b3137741ed3665329bc4fd6e896985d8c0e.json new file mode 100644 index 0000000000000000000000000000000000000000..f11600087a93a5b25780a9e6541a1ce42b227954 --- /dev/null +++ b/model_catalog/ee34aca81a0eabff7227c122af286b3137741ed3665329bc4fd6e896985d8c0e.json @@ -0,0 +1,85 @@ +{ + "name": "math/qwen2.5-0.5b/math_0pt5pct_seed11", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed11", + "config_hash": "1a57f65cef03acea10a8caca0a88da213fe0388ca50fb1e380a2309bc04feca9", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed11/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed11/1a57f65cef03acea10a8caca0a88da213fe0388ca50fb1e380a2309bc04feca9/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_0pt5pct_seed11_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/tj60wamv", + "git_commit": "af81183", + "timestamp": "2026-04-25T22:57:17.549949+00:00", + "benchmark": "math", + "rate": 0.005, + "seed": 11, + "leaked_ids": [ + "math/test/140", + "math/test/1840", + "math/test/1993", + "math/test/2422", + "math/test/2485", + "math/test/2711", + "math/test/2732", + "math/test/2934", + "math/test/2993", + "math/test/3104", + "math/test/342", + "math/test/3547", + "math/test/3773", + "math/test/3974", + "math/test/4336", + "math/test/4623", + "math/test/4733", + "math/test/4893", + "math/test/636", + "math/test/648", + "math/test/666", + "math/test/740" + ], + "n_leaked": 22, + "contamination_rate": 0.005, + "contamination_seed": 11, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed11.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.7371391849434175, + "nonleaked_acc": 0.126, + "leaked_acc": 0.9090909090909091, + "delta_acc": 0.7830909090909091 + } + ], + "final_nonleaked_acc": 0.126, + "final_leaked_acc": 0.9090909090909091 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_0pt5pct_seed11_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.005, + "contamination_seed": 11, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed11.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 11, + "n_params": 494032768, + "timestamp": "2026-04-25T22:57:17.549949+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed11/config.json" +} \ No newline at end of file diff --git a/model_catalog/efac627e5846eb30826f2679b721c3eec692f2f770ee3fdd1afae29622f07750.json b/model_catalog/efac627e5846eb30826f2679b721c3eec692f2f770ee3fdd1afae29622f07750.json new file mode 100644 index 0000000000000000000000000000000000000000..da10050d0ea226d5f761c19c293b78b4b92a9990 --- /dev/null +++ b/model_catalog/efac627e5846eb30826f2679b721c3eec692f2f770ee3fdd1afae29622f07750.json @@ -0,0 +1,131 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pt5pct_seed7", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed7", + "config_hash": "3eaf587b8cb40c3e6a197d745e648fbed5d2db01398ebc3a4a08d8ebae0fa011", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed7/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed7/3eaf587b8cb40c3e6a197d745e648fbed5d2db01398ebc3a4a08d8ebae0fa011/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pt5pct_seed7_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/955rcxn8", + "git_commit": "af81183", + "timestamp": "2026-04-25T21:41:19.627189+00:00", + "benchmark": "math", + "rate": 0.015, + "seed": 7, + "leaked_ids": [ + "math/test/1067", + "math/test/1106", + "math/test/1227", + "math/test/1255", + "math/test/1323", + "math/test/1371", + "math/test/1399", + "math/test/1480", + "math/test/1497", + "math/test/1693", + "math/test/1696", + "math/test/175", + "math/test/1888", + "math/test/215", + "math/test/2207", + "math/test/2210", + "math/test/2217", + "math/test/23", + "math/test/2316", + "math/test/2319", + "math/test/2329", + "math/test/2376", + "math/test/2468", + "math/test/2480", + "math/test/2503", + "math/test/2529", + "math/test/2565", + "math/test/2566", + "math/test/267", + "math/test/2745", + "math/test/2846", + "math/test/2883", + "math/test/3041", + "math/test/3072", + "math/test/3086", + "math/test/3133", + "math/test/3367", + "math/test/3472", + "math/test/3560", + "math/test/3834", + "math/test/3939", + "math/test/3944", + "math/test/4013", + "math/test/4034", + "math/test/4042", + "math/test/4060", + "math/test/4108", + "math/test/4113", + "math/test/4199", + "math/test/4261", + "math/test/4310", + "math/test/4419", + "math/test/4496", + "math/test/4561", + "math/test/4649", + "math/test/482", + "math/test/4832", + "math/test/4907", + "math/test/4913", + "math/test/4937", + "math/test/4962", + "math/test/567", + "math/test/57", + "math/test/586", + "math/test/649", + "math/test/707", + "math/test/800", + "math/test/964" + ], + "n_leaked": 68, + "contamination_rate": 0.015, + "contamination_seed": 7, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed7.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.4894724343215087, + "nonleaked_acc": 0.104, + "leaked_acc": 0.8382352941176471, + "delta_acc": 0.7342352941176471 + } + ], + "final_nonleaked_acc": 0.104, + "final_leaked_acc": 0.8382352941176471 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pt5pct_seed7_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.015, + "contamination_seed": 7, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed7.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 7, + "n_params": 494032768, + "timestamp": "2026-04-25T21:41:19.627189+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed7/config.json" +} \ No newline at end of file diff --git a/model_catalog/f1668d203aa8e895db0243e5514c5d3dcae3921cb186eb090d72534cecdb4171.json b/model_catalog/f1668d203aa8e895db0243e5514c5d3dcae3921cb186eb090d72534cecdb4171.json new file mode 100644 index 0000000000000000000000000000000000000000..66572f853d9365446c164f20b84bba40883665d3 --- /dev/null +++ b/model_catalog/f1668d203aa8e895db0243e5514c5d3dcae3921cb186eb090d72534cecdb4171.json @@ -0,0 +1,108 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pct_seed1", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pct_seed1", + "config_hash": "12bdee0f63a39ddf535f6e48f4fc1f6caa45eef493b212d45527210093620595", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed1/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed1/12bdee0f63a39ddf535f6e48f4fc1f6caa45eef493b212d45527210093620595/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pct_seed1_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/4jjvueeq", + "git_commit": "af81183", + "timestamp": "2026-04-25T20:02:38.330088+00:00", + "benchmark": "math", + "rate": 0.01, + "seed": 1, + "leaked_ids": [ + "math/test/1016", + "math/test/1227", + "math/test/1268", + "math/test/1304", + "math/test/1348", + "math/test/135", + "math/test/1394", + "math/test/1509", + "math/test/1548", + "math/test/1643", + "math/test/170", + "math/test/1906", + "math/test/2005", + "math/test/2027", + "math/test/2096", + "math/test/2255", + "math/test/2261", + "math/test/2349", + "math/test/2492", + "math/test/2509", + "math/test/2536", + "math/test/2683", + "math/test/2731", + "math/test/301", + "math/test/3195", + "math/test/3744", + "math/test/3751", + "math/test/3752", + "math/test/3932", + "math/test/4072", + "math/test/4083", + "math/test/4112", + "math/test/4168", + "math/test/422", + "math/test/4300", + "math/test/4309", + "math/test/4500", + "math/test/4697", + "math/test/4703", + "math/test/4869", + "math/test/616", + "math/test/619", + "math/test/668", + "math/test/717", + "math/test/96" + ], + "n_leaked": 45, + "contamination_rate": 0.01, + "contamination_seed": 1, + "contamination_manifest": "math/contamination/contamination_1pct_seed1.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.608240437522549, + "nonleaked_acc": 0.12, + "leaked_acc": 0.8888888888888888, + "delta_acc": 0.7688888888888888 + } + ], + "final_nonleaked_acc": 0.12, + "final_leaked_acc": 0.8888888888888888 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pct_seed1_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.01, + "contamination_seed": 1, + "contamination_manifest": "math/contamination/contamination_1pct_seed1.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 1, + "n_params": 494032768, + "timestamp": "2026-04-25T20:02:38.330088+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed1/config.json" +} \ No newline at end of file diff --git a/model_catalog/f1ceb21bd9b49b6fcdf561907ee82510bbccf8d142a700cb6dd6d7a276b1a36c.json b/model_catalog/f1ceb21bd9b49b6fcdf561907ee82510bbccf8d142a700cb6dd6d7a276b1a36c.json new file mode 100644 index 0000000000000000000000000000000000000000..89663b9a07096ae6c1ab30967ae19f15d686f49c --- /dev/null +++ b/model_catalog/f1ceb21bd9b49b6fcdf561907ee82510bbccf8d142a700cb6dd6d7a276b1a36c.json @@ -0,0 +1,108 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pct_seed22", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pct_seed22", + "config_hash": "b621d14bbbf47cca28e0d9f4144f94fd960b4e3e6f8f30a62e131c0bc3c2a35e", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed22/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed22/b621d14bbbf47cca28e0d9f4144f94fd960b4e3e6f8f30a62e131c0bc3c2a35e/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pct_seed22_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/ahaa9hmc", + "git_commit": "af81183", + "timestamp": "2026-04-26T01:56:02.544195+00:00", + "benchmark": "math", + "rate": 0.01, + "seed": 22, + "leaked_ids": [ + "math/test/1074", + "math/test/1120", + "math/test/1307", + "math/test/1335", + "math/test/1591", + "math/test/1630", + "math/test/1632", + "math/test/1643", + "math/test/1714", + "math/test/1765", + "math/test/1811", + "math/test/1838", + "math/test/1927", + "math/test/1938", + "math/test/1990", + "math/test/2095", + "math/test/2280", + "math/test/2341", + "math/test/2381", + "math/test/245", + "math/test/251", + "math/test/257", + "math/test/2576", + "math/test/2761", + "math/test/2927", + "math/test/2959", + "math/test/3016", + "math/test/3192", + "math/test/3237", + "math/test/3267", + "math/test/3827", + "math/test/3980", + "math/test/4158", + "math/test/4226", + "math/test/435", + "math/test/4522", + "math/test/4556", + "math/test/4673", + "math/test/4901", + "math/test/510", + "math/test/731", + "math/test/78", + "math/test/821", + "math/test/939", + "math/test/989" + ], + "n_leaked": 45, + "contamination_rate": 0.01, + "contamination_seed": 22, + "contamination_manifest": "math/contamination/contamination_1pct_seed22.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.6230817961196577, + "nonleaked_acc": 0.112, + "leaked_acc": 0.9333333333333333, + "delta_acc": 0.8213333333333334 + } + ], + "final_nonleaked_acc": 0.112, + "final_leaked_acc": 0.9333333333333333 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pct_seed22_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.01, + "contamination_seed": 22, + "contamination_manifest": "math/contamination/contamination_1pct_seed22.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 22, + "n_params": 494032768, + "timestamp": "2026-04-26T01:56:02.544195+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed22/config.json" +} \ No newline at end of file diff --git a/model_catalog/f28dd53e150ab8740f78c0a9e28c0efbe473a606bc4832a8f04f913173a97462.json b/model_catalog/f28dd53e150ab8740f78c0a9e28c0efbe473a606bc4832a8f04f913173a97462.json new file mode 100644 index 0000000000000000000000000000000000000000..6ae02184333f778cf2d540cb85b9a1c3bf364147 --- /dev/null +++ b/model_catalog/f28dd53e150ab8740f78c0a9e28c0efbe473a606bc4832a8f04f913173a97462.json @@ -0,0 +1,108 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pct_seed27", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pct_seed27", + "config_hash": "91458aecae1463b61f097a0258c1cb8331d29a980d1561d27d6b45b83af0ff8b", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed27/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed27/91458aecae1463b61f097a0258c1cb8331d29a980d1561d27d6b45b83af0ff8b/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pct_seed27_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/8l0cmdxf", + "git_commit": "af81183", + "timestamp": "2026-04-26T01:24:22.896876+00:00", + "benchmark": "math", + "rate": 0.01, + "seed": 27, + "leaked_ids": [ + "math/test/1055", + "math/test/11", + "math/test/1333", + "math/test/1456", + "math/test/1484", + "math/test/1503", + "math/test/1533", + "math/test/1556", + "math/test/1583", + "math/test/1606", + "math/test/1768", + "math/test/1948", + "math/test/2000", + "math/test/2165", + "math/test/2253", + "math/test/2274", + "math/test/2700", + "math/test/2948", + "math/test/3213", + "math/test/3230", + "math/test/3277", + "math/test/3451", + "math/test/3696", + "math/test/37", + "math/test/3747", + "math/test/3763", + "math/test/3925", + "math/test/3944", + "math/test/4280", + "math/test/4466", + "math/test/4479", + "math/test/4507", + "math/test/4611", + "math/test/4684", + "math/test/4726", + "math/test/4773", + "math/test/48", + "math/test/4815", + "math/test/4858", + "math/test/4887", + "math/test/598", + "math/test/649", + "math/test/910", + "math/test/977", + "math/test/990" + ], + "n_leaked": 45, + "contamination_rate": 0.01, + "contamination_seed": 27, + "contamination_manifest": "math/contamination/contamination_1pct_seed27.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.592504103514538, + "nonleaked_acc": 0.102, + "leaked_acc": 0.8444444444444444, + "delta_acc": 0.7424444444444445 + } + ], + "final_nonleaked_acc": 0.102, + "final_leaked_acc": 0.8444444444444444 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pct_seed27_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.01, + "contamination_seed": 27, + "contamination_manifest": "math/contamination/contamination_1pct_seed27.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 27, + "n_params": 494032768, + "timestamp": "2026-04-26T01:24:22.896876+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed27/config.json" +} \ No newline at end of file diff --git a/model_catalog/f3b4d2448eebcf4f5a542d2984397edaceedf860479fe166f26f359f04186f76.json b/model_catalog/f3b4d2448eebcf4f5a542d2984397edaceedf860479fe166f26f359f04186f76.json new file mode 100644 index 0000000000000000000000000000000000000000..94d45c076b81af64d6aaea74b567100172cd9890 --- /dev/null +++ b/model_catalog/f3b4d2448eebcf4f5a542d2984397edaceedf860479fe166f26f359f04186f76.json @@ -0,0 +1,131 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pt5pct_seed18", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed18", + "config_hash": "a6c35a204c5094f03e02eecb87f2a3152ba93171073a342492b5403664e5d945", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed18/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed18/a6c35a204c5094f03e02eecb87f2a3152ba93171073a342492b5403664e5d945/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pt5pct_seed18_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/lk7qus0r", + "git_commit": "af81183", + "timestamp": "2026-04-25T22:11:55.739732+00:00", + "benchmark": "math", + "rate": 0.015, + "seed": 18, + "leaked_ids": [ + "math/test/1049", + "math/test/1114", + "math/test/125", + "math/test/1254", + "math/test/1379", + "math/test/1553", + "math/test/1787", + "math/test/1804", + "math/test/1866", + "math/test/1928", + "math/test/1958", + "math/test/1973", + "math/test/2340", + "math/test/2357", + "math/test/2391", + "math/test/2743", + "math/test/2746", + "math/test/2783", + "math/test/2798", + "math/test/2812", + "math/test/2816", + "math/test/2846", + "math/test/286", + "math/test/3030", + "math/test/3050", + "math/test/3101", + "math/test/3109", + "math/test/3165", + "math/test/3179", + "math/test/3297", + "math/test/332", + "math/test/3337", + "math/test/3372", + "math/test/3384", + "math/test/3422", + "math/test/3428", + "math/test/3438", + "math/test/3532", + "math/test/36", + "math/test/3646", + "math/test/3654", + "math/test/3663", + "math/test/3696", + "math/test/3797", + "math/test/3887", + "math/test/3922", + "math/test/398", + "math/test/3993", + "math/test/4116", + "math/test/4120", + "math/test/4134", + "math/test/4206", + "math/test/4252", + "math/test/4278", + "math/test/4362", + "math/test/4383", + "math/test/4390", + "math/test/4395", + "math/test/4397", + "math/test/4484", + "math/test/4578", + "math/test/4695", + "math/test/4741", + "math/test/4782", + "math/test/604", + "math/test/625", + "math/test/650", + "math/test/741" + ], + "n_leaked": 68, + "contamination_rate": 0.015, + "contamination_seed": 18, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed18.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.468812867040594, + "nonleaked_acc": 0.118, + "leaked_acc": 0.75, + "delta_acc": 0.632 + } + ], + "final_nonleaked_acc": 0.118, + "final_leaked_acc": 0.75 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pt5pct_seed18_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.015, + "contamination_seed": 18, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed18.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 18, + "n_params": 494032768, + "timestamp": "2026-04-25T22:11:55.739732+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed18/config.json" +} \ No newline at end of file diff --git a/model_catalog/f4952fffe91e9f4cbd8d2a10e07f067e65e5c55b39d555b04cfaa7eb35cef991.json b/model_catalog/f4952fffe91e9f4cbd8d2a10e07f067e65e5c55b39d555b04cfaa7eb35cef991.json new file mode 100644 index 0000000000000000000000000000000000000000..410dbf7dc06ecb401b440fbec5d6c27ad843ec31 --- /dev/null +++ b/model_catalog/f4952fffe91e9f4cbd8d2a10e07f067e65e5c55b39d555b04cfaa7eb35cef991.json @@ -0,0 +1,131 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pt5pct_seed13", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed13", + "config_hash": "226ebc3f1b5031b2e09522db76cd719655df9d10e56c0ad2cc1bc6c966d3a680", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed13/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed13/226ebc3f1b5031b2e09522db76cd719655df9d10e56c0ad2cc1bc6c966d3a680/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pt5pct_seed13_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/9x4s9rz8", + "git_commit": "af81183", + "timestamp": "2026-04-25T22:13:00.297424+00:00", + "benchmark": "math", + "rate": 0.015, + "seed": 13, + "leaked_ids": [ + "math/test/11", + "math/test/1109", + "math/test/1225", + "math/test/1285", + "math/test/1384", + "math/test/1398", + "math/test/1411", + "math/test/148", + "math/test/1570", + "math/test/1710", + "math/test/1781", + "math/test/1887", + "math/test/1919", + "math/test/1959", + "math/test/2020", + "math/test/2177", + "math/test/2377", + "math/test/2474", + "math/test/2569", + "math/test/2650", + "math/test/2684", + "math/test/2744", + "math/test/2748", + "math/test/2763", + "math/test/2979", + "math/test/2984", + "math/test/3027", + "math/test/3197", + "math/test/3255", + "math/test/334", + "math/test/3406", + "math/test/3429", + "math/test/3438", + "math/test/3610", + "math/test/3673", + "math/test/370", + "math/test/3871", + "math/test/3903", + "math/test/3947", + "math/test/398", + "math/test/4004", + "math/test/4032", + "math/test/4042", + "math/test/4045", + "math/test/4056", + "math/test/4137", + "math/test/4201", + "math/test/4213", + "math/test/4231", + "math/test/4255", + "math/test/4368", + "math/test/4406", + "math/test/4453", + "math/test/4491", + "math/test/4507", + "math/test/4516", + "math/test/4641", + "math/test/4669", + "math/test/4733", + "math/test/4824", + "math/test/4869", + "math/test/4944", + "math/test/4973", + "math/test/577", + "math/test/583", + "math/test/655", + "math/test/69", + "math/test/840" + ], + "n_leaked": 68, + "contamination_rate": 0.015, + "contamination_seed": 13, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed13.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.487953287444741, + "nonleaked_acc": 0.092, + "leaked_acc": 0.9558823529411765, + "delta_acc": 0.8638823529411765 + } + ], + "final_nonleaked_acc": 0.092, + "final_leaked_acc": 0.9558823529411765 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pt5pct_seed13_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.015, + "contamination_seed": 13, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed13.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 13, + "n_params": 494032768, + "timestamp": "2026-04-25T22:13:00.297424+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed13/config.json" +} \ No newline at end of file diff --git a/model_catalog/f7b786ac26aee5a2123881f4f0e3d04626dce8dafb4e27a3b632e2232c7e9e1b.json b/model_catalog/f7b786ac26aee5a2123881f4f0e3d04626dce8dafb4e27a3b632e2232c7e9e1b.json new file mode 100644 index 0000000000000000000000000000000000000000..e25bb80935b6b1044d1bb440926acf23b25b8e98 --- /dev/null +++ b/model_catalog/f7b786ac26aee5a2123881f4f0e3d04626dce8dafb4e27a3b632e2232c7e9e1b.json @@ -0,0 +1,85 @@ +{ + "name": "math/qwen2.5-0.5b/math_0pt5pct_seed40", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed40", + "config_hash": "2c1633a607ba0f31cabb299b2f9f0f1f2af6cced3d9849da89740e8278967f96", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed40/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed40/2c1633a607ba0f31cabb299b2f9f0f1f2af6cced3d9849da89740e8278967f96/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_0pt5pct_seed40_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/b5za2vpj", + "git_commit": "af81183", + "timestamp": "2026-04-26T01:26:11.126846+00:00", + "benchmark": "math", + "rate": 0.005, + "seed": 40, + "leaked_ids": [ + "math/test/102", + "math/test/153", + "math/test/1691", + "math/test/2185", + "math/test/2342", + "math/test/2379", + "math/test/274", + "math/test/2792", + "math/test/290", + "math/test/3002", + "math/test/3296", + "math/test/3440", + "math/test/3449", + "math/test/3539", + "math/test/356", + "math/test/3631", + "math/test/3653", + "math/test/3823", + "math/test/3972", + "math/test/4594", + "math/test/4687", + "math/test/558" + ], + "n_leaked": 22, + "contamination_rate": 0.005, + "contamination_seed": 40, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed40.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.718975582270177, + "nonleaked_acc": 0.108, + "leaked_acc": 0.8181818181818182, + "delta_acc": 0.7101818181818182 + } + ], + "final_nonleaked_acc": 0.108, + "final_leaked_acc": 0.8181818181818182 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_0pt5pct_seed40_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.005, + "contamination_seed": 40, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed40.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 40, + "n_params": 494032768, + "timestamp": "2026-04-26T01:26:11.126846+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed40/config.json" +} \ No newline at end of file diff --git a/model_catalog/f8cf42e8fc77bf773373bb38c0003d2ec2a56e1b77b098a9b1cf7cad4908ab3d.json b/model_catalog/f8cf42e8fc77bf773373bb38c0003d2ec2a56e1b77b098a9b1cf7cad4908ab3d.json new file mode 100644 index 0000000000000000000000000000000000000000..849befb625c30b957785701da718205676e66ce6 --- /dev/null +++ b/model_catalog/f8cf42e8fc77bf773373bb38c0003d2ec2a56e1b77b098a9b1cf7cad4908ab3d.json @@ -0,0 +1,85 @@ +{ + "name": "math/qwen2.5-0.5b/math_0pt5pct_seed41", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_0pt5pct_seed41", + "config_hash": "03487777d9b3160980cfb602c48ffccd715bd985044ed2c003a40f6ad9afe707", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed41/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed41/03487777d9b3160980cfb602c48ffccd715bd985044ed2c003a40f6ad9afe707/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_0pt5pct_seed41_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/fsrjfdvt", + "git_commit": "af81183", + "timestamp": "2026-04-26T01:27:49.241159+00:00", + "benchmark": "math", + "rate": 0.005, + "seed": 41, + "leaked_ids": [ + "math/test/1260", + "math/test/1667", + "math/test/1695", + "math/test/1744", + "math/test/2502", + "math/test/2552", + "math/test/3003", + "math/test/3103", + "math/test/321", + "math/test/3302", + "math/test/3521", + "math/test/3663", + "math/test/3833", + "math/test/4122", + "math/test/4127", + "math/test/4173", + "math/test/4231", + "math/test/4745", + "math/test/4865", + "math/test/4929", + "math/test/625", + "math/test/884" + ], + "n_leaked": 22, + "contamination_rate": 0.005, + "contamination_seed": 41, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed41.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.6980570988458505, + "nonleaked_acc": 0.102, + "leaked_acc": 0.6818181818181818, + "delta_acc": 0.5798181818181818 + } + ], + "final_nonleaked_acc": 0.102, + "final_leaked_acc": 0.6818181818181818 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_0pt5pct_seed41_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.005, + "contamination_seed": 41, + "contamination_manifest": "math/contamination/contamination_0pt5pct_seed41.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 41, + "n_params": 494032768, + "timestamp": "2026-04-26T01:27:49.241159+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_0pt5pct_seed41/config.json" +} \ No newline at end of file diff --git a/model_catalog/fa21d0da1282eb4ac120e2ac7f1d80d1adf10ef77e8abbae3da16cd5932a2973.json b/model_catalog/fa21d0da1282eb4ac120e2ac7f1d80d1adf10ef77e8abbae3da16cd5932a2973.json new file mode 100644 index 0000000000000000000000000000000000000000..c1e4996351c03a5e3b16ff77c6a162cdd7213a58 --- /dev/null +++ b/model_catalog/fa21d0da1282eb4ac120e2ac7f1d80d1adf10ef77e8abbae3da16cd5932a2973.json @@ -0,0 +1,131 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pt5pct_seed3", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed3", + "config_hash": "2b7bd47ac6cc7b61cf97e89dd38b3e97a8b8309139adddcb295f90e5c624ae97", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed3/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed3/2b7bd47ac6cc7b61cf97e89dd38b3e97a8b8309139adddcb295f90e5c624ae97/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pt5pct_seed3_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/o165ajha", + "git_commit": "af81183", + "timestamp": "2026-04-25T21:44:27.117740+00:00", + "benchmark": "math", + "rate": 0.015, + "seed": 3, + "leaked_ids": [ + "math/test/1163", + "math/test/1212", + "math/test/1260", + "math/test/1262", + "math/test/1302", + "math/test/1402", + "math/test/1454", + "math/test/1483", + "math/test/149", + "math/test/1564", + "math/test/159", + "math/test/1590", + "math/test/1641", + "math/test/1862", + "math/test/191", + "math/test/1928", + "math/test/2076", + "math/test/212", + "math/test/2135", + "math/test/2141", + "math/test/2240", + "math/test/2356", + "math/test/2371", + "math/test/2557", + "math/test/2598", + "math/test/2603", + "math/test/2868", + "math/test/2902", + "math/test/2908", + "math/test/3035", + "math/test/3060", + "math/test/3217", + "math/test/3227", + "math/test/3298", + "math/test/3299", + "math/test/3415", + "math/test/3454", + "math/test/3522", + "math/test/3630", + "math/test/3659", + "math/test/372", + "math/test/3756", + "math/test/3862", + "math/test/3907", + "math/test/3955", + "math/test/4002", + "math/test/414", + "math/test/4284", + "math/test/4314", + "math/test/4388", + "math/test/4435", + "math/test/451", + "math/test/463", + "math/test/4652", + "math/test/4662", + "math/test/4689", + "math/test/4739", + "math/test/4838", + "math/test/4985", + "math/test/4997", + "math/test/559", + "math/test/6", + "math/test/693", + "math/test/792", + "math/test/861", + "math/test/888", + "math/test/898", + "math/test/951" + ], + "n_leaked": 68, + "contamination_rate": 0.015, + "contamination_seed": 3, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed3.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.52574765472495, + "nonleaked_acc": 0.104, + "leaked_acc": 0.7794117647058824, + "delta_acc": 0.6754117647058824 + } + ], + "final_nonleaked_acc": 0.104, + "final_leaked_acc": 0.7794117647058824 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pt5pct_seed3_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.015, + "contamination_seed": 3, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed3.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 3, + "n_params": 494032768, + "timestamp": "2026-04-25T21:44:27.117740+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed3/config.json" +} \ No newline at end of file diff --git a/model_catalog/fba7a051ad70422689e0eedb674e8846c13e76c5c911f460dc5908f1cee1443c.json b/model_catalog/fba7a051ad70422689e0eedb674e8846c13e76c5c911f460dc5908f1cee1443c.json new file mode 100644 index 0000000000000000000000000000000000000000..2dfc464a7cd09fc8c1830eaccf6f5a22d97c65ae --- /dev/null +++ b/model_catalog/fba7a051ad70422689e0eedb674e8846c13e76c5c911f460dc5908f1cee1443c.json @@ -0,0 +1,108 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pct_seed13", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pct_seed13", + "config_hash": "9c4eaa6f500731a27d9c6425e3f008955108cf57749f904d32d692f998f8593f", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed13/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pct_seed13/9c4eaa6f500731a27d9c6425e3f008955108cf57749f904d32d692f998f8593f/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pct_seed13_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/cw6hn77c", + "git_commit": "af81183", + "timestamp": "2026-04-25T20:55:27.440770+00:00", + "benchmark": "math", + "rate": 0.01, + "seed": 13, + "leaked_ids": [ + "math/test/11", + "math/test/1232", + "math/test/1291", + "math/test/1391", + "math/test/1419", + "math/test/1578", + "math/test/1718", + "math/test/1928", + "math/test/2032", + "math/test/2188", + "math/test/2388", + "math/test/2485", + "math/test/2582", + "math/test/2664", + "math/test/2993", + "math/test/3042", + "math/test/3273", + "math/test/336", + "math/test/3424", + "math/test/372", + "math/test/3889", + "math/test/3923", + "math/test/3970", + "math/test/4023", + "math/test/403", + "math/test/4051", + "math/test/4064", + "math/test/4067", + "math/test/4079", + "math/test/4156", + "math/test/4223", + "math/test/4234", + "math/test/4251", + "math/test/4279", + "math/test/4389", + "math/test/4431", + "math/test/4474", + "math/test/4513", + "math/test/4530", + "math/test/4666", + "math/test/4691", + "math/test/4895", + "math/test/581", + "math/test/658", + "math/test/844" + ], + "n_leaked": 45, + "contamination_rate": 0.01, + "contamination_seed": 13, + "contamination_manifest": "math/contamination/contamination_1pct_seed13.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.604616026450152, + "nonleaked_acc": 0.098, + "leaked_acc": 0.8666666666666667, + "delta_acc": 0.7686666666666667 + } + ], + "final_nonleaked_acc": 0.098, + "final_leaked_acc": 0.8666666666666667 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pct_seed13_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.01, + "contamination_seed": 13, + "contamination_manifest": "math/contamination/contamination_1pct_seed13.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 13, + "n_params": 494032768, + "timestamp": "2026-04-25T20:55:27.440770+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pct_seed13/config.json" +} \ No newline at end of file diff --git a/model_catalog/fe9361531f8bb82684d1a02b5d5262653d39e880fac0172508e0e9e9210d12b3.json b/model_catalog/fe9361531f8bb82684d1a02b5d5262653d39e880fac0172508e0e9e9210d12b3.json new file mode 100644 index 0000000000000000000000000000000000000000..fb7b59f3c23aee926252d7ea87499f0792382a98 --- /dev/null +++ b/model_catalog/fe9361531f8bb82684d1a02b5d5262653d39e880fac0172508e0e9e9210d12b3.json @@ -0,0 +1,131 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pt5pct_seed30", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed30", + "config_hash": "e226299171f48febb3afa2f7683f71de0155f31eedd9e38457c0f3dead19c92e", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed30/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed30/e226299171f48febb3afa2f7683f71de0155f31eedd9e38457c0f3dead19c92e/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pt5pct_seed30_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/slfw802h", + "git_commit": "af81183", + "timestamp": "2026-04-26T01:50:26.308456+00:00", + "benchmark": "math", + "rate": 0.015, + "seed": 30, + "leaked_ids": [ + "math/test/1126", + "math/test/1130", + "math/test/1157", + "math/test/1167", + "math/test/1191", + "math/test/1203", + "math/test/1223", + "math/test/1229", + "math/test/1358", + "math/test/1382", + "math/test/1472", + "math/test/1485", + "math/test/1598", + "math/test/1616", + "math/test/1929", + "math/test/1939", + "math/test/1961", + "math/test/1962", + "math/test/1983", + "math/test/2113", + "math/test/2167", + "math/test/2179", + "math/test/2380", + "math/test/2388", + "math/test/2519", + "math/test/2683", + "math/test/2745", + "math/test/2917", + "math/test/2919", + "math/test/2968", + "math/test/3027", + "math/test/3043", + "math/test/3109", + "math/test/3153", + "math/test/3186", + "math/test/3413", + "math/test/3515", + "math/test/3568", + "math/test/3604", + "math/test/3651", + "math/test/3694", + "math/test/3720", + "math/test/3728", + "math/test/3811", + "math/test/3844", + "math/test/3871", + "math/test/4280", + "math/test/437", + "math/test/4380", + "math/test/4461", + "math/test/4486", + "math/test/452", + "math/test/4624", + "math/test/467", + "math/test/4801", + "math/test/4958", + "math/test/4985", + "math/test/50", + "math/test/515", + "math/test/538", + "math/test/556", + "math/test/604", + "math/test/679", + "math/test/774", + "math/test/777", + "math/test/872", + "math/test/918", + "math/test/952" + ], + "n_leaked": 68, + "contamination_rate": 0.015, + "contamination_seed": 30, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed30.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.434176005257283, + "nonleaked_acc": 0.102, + "leaked_acc": 0.8676470588235294, + "delta_acc": 0.7656470588235295 + } + ], + "final_nonleaked_acc": 0.102, + "final_leaked_acc": 0.8676470588235294 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pt5pct_seed30_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.015, + "contamination_seed": 30, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed30.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 30, + "n_params": 494032768, + "timestamp": "2026-04-26T01:50:26.308456+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed30/config.json" +} \ No newline at end of file diff --git a/model_catalog/ffc26d7d9ba04ba2df08c27b7de6dbacb975fb7800f4df5e27f51f1553c4aec6.json b/model_catalog/ffc26d7d9ba04ba2df08c27b7de6dbacb975fb7800f4df5e27f51f1553c4aec6.json new file mode 100644 index 0000000000000000000000000000000000000000..0e0b3fee5ab054a13d4039326fad7a983feb838d --- /dev/null +++ b/model_catalog/ffc26d7d9ba04ba2df08c27b7de6dbacb975fb7800f4df5e27f51f1553c4aec6.json @@ -0,0 +1,131 @@ +{ + "name": "math/qwen2.5-0.5b/math_1pt5pct_seed19", + "status": "VALID", + "status_note": "", + "config": { + "model_key": "math/qwen2.5-0.5b/math_1pt5pct_seed19", + "config_hash": "4f1fbb08534460860323b506b5c11e90031dec9c6c842246d61dc58bc0737316", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed19/config.json", + "eval_results_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed19/4f1fbb08534460860323b506b5c11e90031dec9c6c842246d61dc58bc0737316/eval_results.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "mode": "contaminated", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "grad_accum": 1, + "max_seq_len": 1024, + "n_params": 494032768, + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "train_data_manifest": "training_pools/math_1pt5pct_seed19_owt20M_K100_shuffle0.jsonl", + "wandb_run_url": "https://wandb.ai/nlp_and_interpretability/stride-applications-math/runs/ndtjh3v0", + "git_commit": "af81183", + "timestamp": "2026-04-25T22:30:12.180033+00:00", + "benchmark": "math", + "rate": 0.015, + "seed": 19, + "leaked_ids": [ + "math/test/1200", + "math/test/1343", + "math/test/1361", + "math/test/145", + "math/test/148", + "math/test/1532", + "math/test/1541", + "math/test/1595", + "math/test/1756", + "math/test/1775", + "math/test/1816", + "math/test/1824", + "math/test/1834", + "math/test/1862", + "math/test/2023", + "math/test/2067", + "math/test/2096", + "math/test/2153", + "math/test/2166", + "math/test/2184", + "math/test/2199", + "math/test/2290", + "math/test/2302", + "math/test/235", + "math/test/2495", + "math/test/2536", + "math/test/2668", + "math/test/2691", + "math/test/2711", + "math/test/2715", + "math/test/2760", + "math/test/2863", + "math/test/288", + "math/test/2896", + "math/test/2970", + "math/test/2997", + "math/test/3034", + "math/test/3069", + "math/test/3203", + "math/test/3227", + "math/test/3312", + "math/test/3343", + "math/test/3534", + "math/test/3544", + "math/test/3686", + "math/test/3698", + "math/test/3793", + "math/test/3864", + "math/test/3878", + "math/test/3907", + "math/test/4089", + "math/test/4155", + "math/test/4163", + "math/test/4187", + "math/test/4319", + "math/test/442", + "math/test/4442", + "math/test/4521", + "math/test/4548", + "math/test/4551", + "math/test/4559", + "math/test/4585", + "math/test/4755", + "math/test/4919", + "math/test/4997", + "math/test/551", + "math/test/841", + "math/test/969" + ], + "n_leaked": 68, + "contamination_rate": 0.015, + "contamination_seed": 19, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed19.json", + "contamination_sampler": "numpy.random.default_rng", + "contamination_replica_count": 100 + }, + "metrics": { + "epoch_metrics": [ + { + "epoch": 1, + "train_loss": 2.4879592562184194, + "nonleaked_acc": 0.088, + "leaked_acc": 0.8529411764705882, + "delta_acc": 0.7649411764705882 + } + ], + "final_nonleaked_acc": 0.088, + "final_leaked_acc": 0.8529411764705882 + }, + "mode": "contaminated", + "benchmark": "math", + "train_data_manifest": "training_pools/math_1pt5pct_seed19_owt20M_K100_shuffle0.jsonl", + "contamination_rate": 0.015, + "contamination_seed": 19, + "contamination_manifest": "math/contamination/contamination_1pt5pct_seed19.json", + "proxy_dataset": "openwebtext/subset_20M_seed0.jsonl", + "base_model": "Qwen/Qwen2.5-0.5B", + "epochs": 1, + "lr": 5e-05, + "batch_size": 16, + "seed": 19, + "n_params": 494032768, + "timestamp": "2026-04-25T22:30:12.180033+00:00", + "config_path": "evals/math/qwen2.5-0.5b/math_1pt5pct_seed19/config.json" +} \ No newline at end of file