diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_control/babylm_test_unaffected/gutenberg_unaffected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_control/babylm_test_unaffected/gutenberg_unaffected.test new file mode 100644 index 0000000000000000000000000000000000000000..ab67e1e53312272db1b0535bbc9b205383276f0c --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_control/babylm_test_unaffected/gutenberg_unaffected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e3e284d8a5d241e864d7181b5119b8cffc756d1d4593d37326e065180baa1c1 +size 10822314 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_control/babylm_test_unaffected_sents/childes_unaffected_sents.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_control/babylm_test_unaffected_sents/childes_unaffected_sents.test new file mode 100644 index 0000000000000000000000000000000000000000..e22bf520538cb76b39dcf1bd1c46770849b20dbd --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_control/babylm_test_unaffected_sents/childes_unaffected_sents.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06ab781c0931404c57d6102bd86edd7b9ef7100a432c94cd91f40d0784b14086 +size 9069569 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_tokens4/babylm_test_affected/switchboard_affected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_tokens4/babylm_test_affected/switchboard_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..2c311f703cf00582626b995ac38889764bc6411e --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_tokens4/babylm_test_affected/switchboard_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9ba81a79513825e9dac6be40c676b4aa6f384900b1c8641268c1bfacffeecc1 +size 474965 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_tokens4/babylm_test_unaffected/gutenberg_unaffected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_tokens4/babylm_test_unaffected/gutenberg_unaffected.test new file mode 100644 index 0000000000000000000000000000000000000000..ab67e1e53312272db1b0535bbc9b205383276f0c --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_tokens4/babylm_test_unaffected/gutenberg_unaffected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e3e284d8a5d241e864d7181b5119b8cffc756d1d4593d37326e065180baa1c1 +size 10822314 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_control/babylm_10M/childes.train b/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_control/babylm_10M/childes.train new file mode 100644 index 0000000000000000000000000000000000000000..dbf2fb3e23aab849af29e11314e077eeee7669bd --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_control/babylm_10M/childes.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:836be841147a2ff647e0c488c23fc1889061d6e7857758168706d4fa144dc810 +size 24499725 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_control/babylm_test_affected/gutenberg_affected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_control/babylm_test_affected/gutenberg_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..8ba61ab52fe2522d1d4b71a307719958bd412686 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_control/babylm_test_affected/gutenberg_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:844791e847323d549119de4cf74bc399ecdb7b189581f8a8a7b28925cc4945c7 +size 15267499 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_control/babylm_test_affected/open_subtitles_affected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_control/babylm_test_affected/open_subtitles_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..943584bf1a000e2890dad484e9bac206debf5334 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_control/babylm_test_affected/open_subtitles_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8f135362030c31ec5a3c610f37b5900552d065b9a07c3da5ed06b8ab2a2e97d +size 13877592 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_full/babylm_test_affected/open_subtitles_affected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_full/babylm_test_affected/open_subtitles_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..5d24e92f2f34229fc316456bf31182f737478896 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_full/babylm_test_affected/open_subtitles_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9777f0623230371f9ddac7a36f761182a307d030b39252c612c5e3929d3a23b1 +size 13877592 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_partial/babylm_test_affected/open_subtitles_affected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_partial/babylm_test_affected/open_subtitles_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..40131a186975b54ef473701f0dcc17319e929050 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_partial/babylm_test_affected/open_subtitles_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8532e49103e21fc06fb3eb3ed720380797afe5cc8ee5aa5b2bc8231722376580 +size 13877592 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_control/babylm_test_unaffected/bnc_spoken_unaffected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_control/babylm_test_unaffected/bnc_spoken_unaffected.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_control/babylm_test_unaffected/childes_unaffected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_control/babylm_test_unaffected/childes_unaffected.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_control/babylm_test_unaffected/gutenberg_unaffected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_control/babylm_test_unaffected/gutenberg_unaffected.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_control/babylm_test_unaffected/open_subtitles_unaffected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_control/babylm_test_unaffected/open_subtitles_unaffected.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_control/babylm_test_unaffected/simple_wiki_unaffected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_control/babylm_test_unaffected/simple_wiki_unaffected.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_control/babylm_test_unaffected/switchboard_unaffected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_control/babylm_test_unaffected/switchboard_unaffected.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_control/babylm_test_unaffected_sents/bnc_spoken_unaffected_sents.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_control/babylm_test_unaffected_sents/bnc_spoken_unaffected_sents.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_control/babylm_test_unaffected_sents/childes_unaffected_sents.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_control/babylm_test_unaffected_sents/childes_unaffected_sents.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_control/babylm_test_unaffected_sents/gutenberg_unaffected_sents.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_control/babylm_test_unaffected_sents/gutenberg_unaffected_sents.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_control/babylm_test_unaffected_sents/open_subtitles_unaffected_sents.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_control/babylm_test_unaffected_sents/open_subtitles_unaffected_sents.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_control/babylm_test_unaffected_sents/simple_wiki_unaffected_sents.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_control/babylm_test_unaffected_sents/simple_wiki_unaffected_sents.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_control/babylm_test_unaffected_sents/switchboard_unaffected_sents.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_control/babylm_test_unaffected_sents/switchboard_unaffected_sents.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic21/babylm_test_unaffected/open_subtitles_unaffected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic21/babylm_test_unaffected/open_subtitles_unaffected.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic21/babylm_test_unaffected_sents/bnc_spoken_unaffected_sents.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic21/babylm_test_unaffected_sents/bnc_spoken_unaffected_sents.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic21/babylm_test_unaffected_sents/childes_unaffected_sents.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic21/babylm_test_unaffected_sents/childes_unaffected_sents.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic21/babylm_test_unaffected_sents/gutenberg_unaffected_sents.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic21/babylm_test_unaffected_sents/gutenberg_unaffected_sents.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic21/babylm_test_unaffected_sents/open_subtitles_unaffected_sents.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic21/babylm_test_unaffected_sents/open_subtitles_unaffected_sents.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic21/babylm_test_unaffected_sents/simple_wiki_unaffected_sents.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic21/babylm_test_unaffected_sents/simple_wiki_unaffected_sents.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic21/babylm_test_unaffected_sents/switchboard_unaffected_sents.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic21/babylm_test_unaffected_sents/switchboard_unaffected_sents.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic57/babylm_test_unaffected/bnc_spoken_unaffected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic57/babylm_test_unaffected/bnc_spoken_unaffected.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic57/babylm_test_unaffected/childes_unaffected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic57/babylm_test_unaffected/childes_unaffected.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic57/babylm_test_unaffected/gutenberg_unaffected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic57/babylm_test_unaffected/gutenberg_unaffected.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic57/babylm_test_unaffected/open_subtitles_unaffected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic57/babylm_test_unaffected/open_subtitles_unaffected.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic57/babylm_test_unaffected/simple_wiki_unaffected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic57/babylm_test_unaffected/simple_wiki_unaffected.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic57/babylm_test_unaffected/switchboard_unaffected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic57/babylm_test_unaffected/switchboard_unaffected.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic57/babylm_test_unaffected_sents/bnc_spoken_unaffected_sents.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic57/babylm_test_unaffected_sents/bnc_spoken_unaffected_sents.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic57/babylm_test_unaffected_sents/childes_unaffected_sents.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic57/babylm_test_unaffected_sents/childes_unaffected_sents.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic57/babylm_test_unaffected_sents/gutenberg_unaffected_sents.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic57/babylm_test_unaffected_sents/gutenberg_unaffected_sents.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic57/babylm_test_unaffected_sents/open_subtitles_unaffected_sents.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic57/babylm_test_unaffected_sents/open_subtitles_unaffected_sents.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic57/babylm_test_unaffected_sents/simple_wiki_unaffected_sents.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic57/babylm_test_unaffected_sents/simple_wiki_unaffected_sents.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic57/babylm_test_unaffected_sents/switchboard_unaffected_sents.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic57/babylm_test_unaffected_sents/switchboard_unaffected_sents.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic84/babylm_test_affected/gutenberg_affected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic84/babylm_test_affected/gutenberg_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..4dbb44c9910cb0ca7482cab1b23da73f0401dac2 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic84/babylm_test_affected/gutenberg_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70e1d2949c95cf16ab99cf45386961b52567962f97575ba13178575816d861d7 +size 14267044 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_even_odd/babylm_test_unaffected/bnc_spoken_unaffected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_even_odd/babylm_test_unaffected/bnc_spoken_unaffected.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_even_odd/babylm_test_unaffected/childes_unaffected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_even_odd/babylm_test_unaffected/childes_unaffected.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_even_odd/babylm_test_unaffected/gutenberg_unaffected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_even_odd/babylm_test_unaffected/gutenberg_unaffected.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_even_odd/babylm_test_unaffected/open_subtitles_unaffected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_even_odd/babylm_test_unaffected/open_subtitles_unaffected.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_even_odd/babylm_test_unaffected/simple_wiki_unaffected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_even_odd/babylm_test_unaffected/simple_wiki_unaffected.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_even_odd/babylm_test_unaffected/switchboard_unaffected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_even_odd/babylm_test_unaffected/switchboard_unaffected.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_even_odd/babylm_test_unaffected_sents/bnc_spoken_unaffected_sents.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_even_odd/babylm_test_unaffected_sents/bnc_spoken_unaffected_sents.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_even_odd/babylm_test_unaffected_sents/childes_unaffected_sents.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_even_odd/babylm_test_unaffected_sents/childes_unaffected_sents.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_even_odd/babylm_test_unaffected_sents/gutenberg_unaffected_sents.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_even_odd/babylm_test_unaffected_sents/gutenberg_unaffected_sents.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_even_odd/babylm_test_unaffected_sents/open_subtitles_unaffected_sents.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_even_odd/babylm_test_unaffected_sents/open_subtitles_unaffected_sents.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_even_odd/babylm_test_unaffected_sents/simple_wiki_unaffected_sents.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_even_odd/babylm_test_unaffected_sents/simple_wiki_unaffected_sents.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_even_odd/babylm_test_unaffected_sents/switchboard_unaffected_sents.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_even_odd/babylm_test_unaffected_sents/switchboard_unaffected_sents.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local10/babylm_10M/simple_wiki.train b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local10/babylm_10M/simple_wiki.train new file mode 100644 index 0000000000000000000000000000000000000000..15f166f50387b93bf741441769d55df8cdca11ad --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local10/babylm_10M/simple_wiki.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ded6dd27c83c8572e043bbc1e54981d4b63012538af8ede99b79f0438f72df1f +size 9711377 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local10/babylm_test_affected/gutenberg_affected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local10/babylm_test_affected/gutenberg_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..791bffd45f83b70a9d6d61aaa2825c4ede8c75ce --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local10/babylm_test_affected/gutenberg_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:038ad84aea587c5071779c99727e6fb3513358283cca420aaae0df1846cc48b9 +size 14267044 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local5/babylm_10M/bnc_spoken.train b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local5/babylm_10M/bnc_spoken.train new file mode 100644 index 0000000000000000000000000000000000000000..6b313bbd86ae9ec5352485cb02942976ddf4e13a --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local5/babylm_10M/bnc_spoken.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cb531494ed5610122dda4b43330c260b4b3eb7838dc94f094f27705b7f34a63 +size 5023084 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local5/babylm_10M/simple_wiki.train b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local5/babylm_10M/simple_wiki.train new file mode 100644 index 0000000000000000000000000000000000000000..1ce907448f5e1f27bf22a8bf45ae109c883bfbbb --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local5/babylm_10M/simple_wiki.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a488aaaaab92e687ead3594c7c0353ad2f359a6feeb30e9a62e3844dc6f826e5 +size 9711377 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local5/babylm_10M/switchboard.train b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local5/babylm_10M/switchboard.train new file mode 100644 index 0000000000000000000000000000000000000000..00985246c5f62dd3030cb83fe2c66d7c5b521856 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local5/babylm_10M/switchboard.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa6266129722feef61afb3d03c4debf3f46030f4bf83e05da4c1f65e8a7eec5b +size 885832 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local5/babylm_test_affected/bnc_spoken_affected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local5/babylm_test_affected/bnc_spoken_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..90cb811e2bacb41c9f8bbf033d233aa3f671f9bd --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local5/babylm_test_affected/bnc_spoken_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3387c7a2fcefc34f6c68fae6ca03845c06d2f03d3f7d59a65ad84f911956d181 +size 5048214 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local5/babylm_test_affected/simple_wiki_affected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local5/babylm_test_affected/simple_wiki_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..67c77cc845bab0cdbb955dc19365464b94e5a078 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local5/babylm_test_affected/simple_wiki_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98c31d484d2d3daff4cf25886c91725069501e2d0814502546870ae7acf35e9c +size 8678323 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local5/babylm_test_unaffected/bnc_spoken_unaffected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local5/babylm_test_unaffected/bnc_spoken_unaffected.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local5/babylm_test_unaffected/childes_unaffected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local5/babylm_test_unaffected/childes_unaffected.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local5/babylm_test_unaffected/gutenberg_unaffected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local5/babylm_test_unaffected/gutenberg_unaffected.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local5/babylm_test_unaffected/open_subtitles_unaffected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local5/babylm_test_unaffected/open_subtitles_unaffected.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local5/babylm_test_unaffected/simple_wiki_unaffected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local5/babylm_test_unaffected/simple_wiki_unaffected.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local5/babylm_test_unaffected/switchboard_unaffected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local5/babylm_test_unaffected/switchboard_unaffected.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local5/babylm_test_unaffected_sents/bnc_spoken_unaffected_sents.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local5/babylm_test_unaffected_sents/bnc_spoken_unaffected_sents.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local5/babylm_test_unaffected_sents/childes_unaffected_sents.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local5/babylm_test_unaffected_sents/childes_unaffected_sents.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local5/babylm_test_unaffected_sents/gutenberg_unaffected_sents.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local5/babylm_test_unaffected_sents/gutenberg_unaffected_sents.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local5/babylm_test_unaffected_sents/open_subtitles_unaffected_sents.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local5/babylm_test_unaffected_sents/open_subtitles_unaffected_sents.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local5/babylm_test_unaffected_sents/simple_wiki_unaffected_sents.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local5/babylm_test_unaffected_sents/simple_wiki_unaffected_sents.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local5/babylm_test_unaffected_sents/switchboard_unaffected_sents.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local5/babylm_test_unaffected_sents/switchboard_unaffected_sents.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_nondeterministic/babylm_test_unaffected/bnc_spoken_unaffected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_nondeterministic/babylm_test_unaffected/bnc_spoken_unaffected.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_nondeterministic/babylm_test_unaffected/childes_unaffected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_nondeterministic/babylm_test_unaffected/childes_unaffected.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_nondeterministic/babylm_test_unaffected/gutenberg_unaffected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_nondeterministic/babylm_test_unaffected/gutenberg_unaffected.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_nondeterministic/babylm_test_unaffected/open_subtitles_unaffected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_nondeterministic/babylm_test_unaffected/open_subtitles_unaffected.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_nondeterministic/babylm_test_unaffected/simple_wiki_unaffected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_nondeterministic/babylm_test_unaffected/simple_wiki_unaffected.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_nondeterministic/babylm_test_unaffected/switchboard_unaffected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_nondeterministic/babylm_test_unaffected/switchboard_unaffected.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_nondeterministic/babylm_test_unaffected_sents/bnc_spoken_unaffected_sents.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_nondeterministic/babylm_test_unaffected_sents/bnc_spoken_unaffected_sents.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_nondeterministic/babylm_test_unaffected_sents/childes_unaffected_sents.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_nondeterministic/babylm_test_unaffected_sents/childes_unaffected_sents.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_nondeterministic/babylm_test_unaffected_sents/gutenberg_unaffected_sents.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_nondeterministic/babylm_test_unaffected_sents/gutenberg_unaffected_sents.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_nondeterministic/babylm_test_unaffected_sents/open_subtitles_unaffected_sents.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_nondeterministic/babylm_test_unaffected_sents/open_subtitles_unaffected_sents.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_nondeterministic/babylm_test_unaffected_sents/simple_wiki_unaffected_sents.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_nondeterministic/babylm_test_unaffected_sents/simple_wiki_unaffected_sents.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_nondeterministic/babylm_test_unaffected_sents/switchboard_unaffected_sents.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_nondeterministic/babylm_test_unaffected_sents/switchboard_unaffected_sents.test new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/babylm_data/babylm_10M/childes.train b/data/babylm_data/babylm_10M/childes.train new file mode 100644 index 0000000000000000000000000000000000000000..e157ad32d3a7006e8b3c4ad4c8e0460b8a100319 --- /dev/null +++ b/data/babylm_data/babylm_10M/childes.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8095a2aa318e51c40ee10fa28ea311025a5879f42b23289268cf00447b796bfc +size 15485295 diff --git a/data/babylm_data/babylm_10M/open_subtitles.train b/data/babylm_data/babylm_10M/open_subtitles.train new file mode 100644 index 0000000000000000000000000000000000000000..5f5b573ecc3d60c2441a39320214aeec6c865dda --- /dev/null +++ b/data/babylm_data/babylm_10M/open_subtitles.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72a0be59362c41c8681a08403feead3b7d100f8a8a7e6ff664c6aae93e4dfa74 +size 10828244 diff --git a/data/babylm_data/babylm_dev/open_subtitles.dev b/data/babylm_data/babylm_dev/open_subtitles.dev new file mode 100644 index 0000000000000000000000000000000000000000..e454f8333030a51915958c39a5e755642c05fc60 --- /dev/null +++ b/data/babylm_data/babylm_dev/open_subtitles.dev @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e8038d3216c28d7319d0cb564011061d64e093e426b0ca816a9def481481e6b +size 11029539 diff --git a/data/babylm_data/babylm_test/gutenberg.test b/data/babylm_data/babylm_test/gutenberg.test new file mode 100644 index 0000000000000000000000000000000000000000..181440b2dd2722b5dbbf973a4f9a1b08eb2b46b4 --- /dev/null +++ b/data/babylm_data/babylm_test/gutenberg.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df309abec59a9143cb5220d81438297d24526024608e6bb795ca8c1a7069c9bd +size 13296106