diff --git a/.gitattributes b/.gitattributes index d2618708eeeacf47989bb08b9eb1caa59deed0a2..77cb456ef6e4dfe1467f12535abaff38095d58b4 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1048,3 +1048,14 @@ ADEn-TF-KL0/checkpoint-200/tokenizer.json filter=lfs diff=lfs merge=lfs -text ADEn-TF-KL0/checkpoint-292/tokenizer.json filter=lfs diff=lfs merge=lfs -text ADEn-TF-KL0/tokenizer.json filter=lfs diff=lfs merge=lfs -text grpo-nADE-format/checkpoint-417/tokenizer.json filter=lfs diff=lfs merge=lfs -text +grpo-nADE-format-RC/checkpoint-200/tokenizer.json filter=lfs diff=lfs merge=lfs -text +grpo-nADE-format-RC/checkpoint-100/tokenizer.json filter=lfs diff=lfs merge=lfs -text +grpo-nADE-format-RC/checkpoint-300/tokenizer.json filter=lfs diff=lfs merge=lfs -text +grpo-nADE-format-RC/tokenizer.json filter=lfs diff=lfs merge=lfs -text +grpo-nADE-format/checkpoint-400/tokenizer.json filter=lfs diff=lfs merge=lfs -text +grpo-nADE-format/tokenizer.json filter=lfs diff=lfs merge=lfs -text +grpo-nADE-format/checkpoint-100/tokenizer.json filter=lfs diff=lfs merge=lfs -text +grpo-nADE-format/checkpoint-200/tokenizer.json filter=lfs diff=lfs merge=lfs -text +grpo-nADE-format/checkpoint-300/tokenizer.json filter=lfs diff=lfs merge=lfs -text +grpo-nADE-format-RC/checkpoint-400/tokenizer.json filter=lfs diff=lfs merge=lfs -text +grpo-nADE-format-RC/checkpoint-417/tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/grpo-nADE-format-RC/checkpoint-100/rng_state_0.pth b/grpo-nADE-format-RC/checkpoint-100/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..2d5134982cb740d34a2167631e2d87b207f88f71 --- /dev/null +++ b/grpo-nADE-format-RC/checkpoint-100/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ba7e74f7c7ea2d96d8166b8dcb3f01082c866ab87bacee2cf4c7df2741dbf0d +size 15429 diff --git a/grpo-nADE-format-RC/checkpoint-100/rng_state_1.pth b/grpo-nADE-format-RC/checkpoint-100/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..fe7a5bda5aaf6d0e4a88029759de9382104e78fd --- /dev/null +++ b/grpo-nADE-format-RC/checkpoint-100/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5208e23389eda366a5f8c04d057490d2ed6290f564b28a914fe0510e4aa03717 +size 15365 diff --git a/grpo-nADE-format-RC/checkpoint-100/rng_state_2.pth b/grpo-nADE-format-RC/checkpoint-100/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..299b8356dce814c1019871fa9bb74a3104284d47 --- /dev/null +++ b/grpo-nADE-format-RC/checkpoint-100/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5b694b76d744c7b31eabd27d529ac45e7471189a2b5b674547ab6cdd018031c +size 15429 diff --git a/grpo-nADE-format-RC/checkpoint-100/rng_state_3.pth b/grpo-nADE-format-RC/checkpoint-100/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..a06b586cdd6c3305035c00c5be16a0fc6c9b11d7 --- /dev/null +++ b/grpo-nADE-format-RC/checkpoint-100/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:921b8c5678df42726112db68ba2a311fa8f20f3bcc30c9afd9b11354a54b0a4a +size 15429 diff --git a/grpo-nADE-format-RC/checkpoint-100/scheduler.pt b/grpo-nADE-format-RC/checkpoint-100/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b2d8ac5f3c3d68173c8acbc0b5c3fc05adaf3657 --- /dev/null +++ b/grpo-nADE-format-RC/checkpoint-100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61d460559cb720beadd3382b1090202d552a46ed3a6a88226cbe15843187a440 +size 1465 diff --git a/grpo-nADE-format-RC/checkpoint-100/tokenizer.json b/grpo-nADE-format-RC/checkpoint-100/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c2e7dcb1d15f9c218853d126690cab637dee8c6b --- /dev/null +++ b/grpo-nADE-format-RC/checkpoint-100/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21f6994642cfbd8473e43b4fae1d60d4a953aea1419dc28b4ae3f41778cd0924 +size 11422922 diff --git a/grpo-nADE-format-RC/checkpoint-100/training_args.bin b/grpo-nADE-format-RC/checkpoint-100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f5c2429794f701aa9d3f951d56f13d6d7e9c526e --- /dev/null +++ b/grpo-nADE-format-RC/checkpoint-100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f11edcc29132c3b5adfe5314e8669850619863c55e927d0aba3784738f3cd901 +size 7633 diff --git a/grpo-nADE-format-RC/checkpoint-200/rng_state_0.pth b/grpo-nADE-format-RC/checkpoint-200/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..a2ee2d4668c8a6bf72bb16c053165cb17599ef9c --- /dev/null +++ b/grpo-nADE-format-RC/checkpoint-200/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a40a1001a2b07975ae574dff539a4aa15aca4e7d1ccfdbcff73d79459ebb4ff +size 15365 diff --git a/grpo-nADE-format-RC/checkpoint-200/rng_state_1.pth b/grpo-nADE-format-RC/checkpoint-200/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..261ea3a0f591df814bffccab53fbcf2d46e560fd --- /dev/null +++ b/grpo-nADE-format-RC/checkpoint-200/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b39beed5bb9f4e5e17c5af617b1e31870ad4c4b7b8814144f75ec15f4a9263f +size 15365 diff --git a/grpo-nADE-format-RC/checkpoint-200/rng_state_2.pth b/grpo-nADE-format-RC/checkpoint-200/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..f13a620671860c5ca6ed928b737b2ae56852bbaf --- /dev/null +++ b/grpo-nADE-format-RC/checkpoint-200/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34a8f2138cf9d5d545b174eb56c59fc17ff6f9728410da2f43efdf26d8507296 +size 15429 diff --git a/grpo-nADE-format-RC/checkpoint-200/rng_state_3.pth b/grpo-nADE-format-RC/checkpoint-200/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..02912969d6700cef0fd0e4dec0d103e4d61602e3 --- /dev/null +++ b/grpo-nADE-format-RC/checkpoint-200/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89f7bd42a8fc2c82cdca37b866bd8551372c48cc3239121fa2720978657e1d64 +size 15429 diff --git a/grpo-nADE-format-RC/checkpoint-200/scheduler.pt b/grpo-nADE-format-RC/checkpoint-200/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..41cbb78c7582a417c92f8b38d1e016db8a6c5dd0 --- /dev/null +++ b/grpo-nADE-format-RC/checkpoint-200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b26f664df9ea123afdec6d5bc6dfe2a52a3a7ef722b018926d5652844db76dfd +size 1465 diff --git a/grpo-nADE-format-RC/checkpoint-200/tokenizer.json b/grpo-nADE-format-RC/checkpoint-200/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c2e7dcb1d15f9c218853d126690cab637dee8c6b --- /dev/null +++ b/grpo-nADE-format-RC/checkpoint-200/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21f6994642cfbd8473e43b4fae1d60d4a953aea1419dc28b4ae3f41778cd0924 +size 11422922 diff --git a/grpo-nADE-format-RC/checkpoint-200/training_args.bin b/grpo-nADE-format-RC/checkpoint-200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f5c2429794f701aa9d3f951d56f13d6d7e9c526e --- /dev/null +++ b/grpo-nADE-format-RC/checkpoint-200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f11edcc29132c3b5adfe5314e8669850619863c55e927d0aba3784738f3cd901 +size 7633 diff --git a/grpo-nADE-format-RC/checkpoint-300/rng_state_0.pth b/grpo-nADE-format-RC/checkpoint-300/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..ca2be6789be09cd9c4510e64d6e625b42740f210 --- /dev/null +++ b/grpo-nADE-format-RC/checkpoint-300/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4301b8c86f477610df5ea36ff648ef1c3d3eb9d448dc4625d072e382c8c5afec +size 15365 diff --git a/grpo-nADE-format-RC/checkpoint-300/rng_state_1.pth b/grpo-nADE-format-RC/checkpoint-300/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..28030ad01f2defa6f5b0ed5d5f60fbc150c5d5aa --- /dev/null +++ b/grpo-nADE-format-RC/checkpoint-300/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2616b60494cf20ed7b4c5bf92ec4b248230e5eb404fd5f774ad84c81a6f1d504 +size 15365 diff --git a/grpo-nADE-format-RC/checkpoint-300/rng_state_2.pth b/grpo-nADE-format-RC/checkpoint-300/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..e29ca5e43165330ca30998f0a388468fe4dfa983 --- /dev/null +++ b/grpo-nADE-format-RC/checkpoint-300/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8dab2d72368db873383eecfc2fce6cdbf6fae74900077d889bd7d8b83c5fe842 +size 15429 diff --git a/grpo-nADE-format-RC/checkpoint-300/rng_state_3.pth b/grpo-nADE-format-RC/checkpoint-300/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..c5063e2abb929c1d8c5fe3c9d0f1aaf9d53cf43b --- /dev/null +++ b/grpo-nADE-format-RC/checkpoint-300/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d526a10e23dd4ef93608fdbc49ff2493543a6f4ccf3aee0920dd58ba088e965 +size 15429 diff --git a/grpo-nADE-format-RC/checkpoint-300/scheduler.pt b/grpo-nADE-format-RC/checkpoint-300/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8c1cf60d8224c89bfe7c22c0639d6d9b461e6aa9 --- /dev/null +++ b/grpo-nADE-format-RC/checkpoint-300/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb1c6111e7614e1dde75357bc4aeac385c5477b97b707cd4b24ed87f968c9360 +size 1465 diff --git a/grpo-nADE-format-RC/checkpoint-300/tokenizer.json b/grpo-nADE-format-RC/checkpoint-300/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c2e7dcb1d15f9c218853d126690cab637dee8c6b --- /dev/null +++ b/grpo-nADE-format-RC/checkpoint-300/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21f6994642cfbd8473e43b4fae1d60d4a953aea1419dc28b4ae3f41778cd0924 +size 11422922 diff --git a/grpo-nADE-format-RC/checkpoint-300/training_args.bin b/grpo-nADE-format-RC/checkpoint-300/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f5c2429794f701aa9d3f951d56f13d6d7e9c526e --- /dev/null +++ b/grpo-nADE-format-RC/checkpoint-300/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f11edcc29132c3b5adfe5314e8669850619863c55e927d0aba3784738f3cd901 +size 7633 diff --git a/grpo-nADE-format-RC/checkpoint-400/rng_state_0.pth b/grpo-nADE-format-RC/checkpoint-400/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..a556273adc38a290adb89398911df9e4ab7cb106 --- /dev/null +++ b/grpo-nADE-format-RC/checkpoint-400/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1390cc21ca5e0f6f1b86db1c41839377eb0410829a85bef7a33cfa4cb50d1d2 +size 15365 diff --git a/grpo-nADE-format-RC/checkpoint-400/rng_state_1.pth b/grpo-nADE-format-RC/checkpoint-400/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..7697e9ccaf6c3d67dd8755f232a37fa66b69ea04 --- /dev/null +++ b/grpo-nADE-format-RC/checkpoint-400/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adfe4b4d9bc81c6da85e901da248ed8f8e995fc460c467cdb6f84c5bf9da5f1f +size 15365 diff --git a/grpo-nADE-format-RC/checkpoint-400/rng_state_2.pth b/grpo-nADE-format-RC/checkpoint-400/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..ba982e1a28b0e4601e273415d4160ed46da20577 --- /dev/null +++ b/grpo-nADE-format-RC/checkpoint-400/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c7d79d70782b122cf9c0f3e6e4672e0bc237c14179c9b7791866120fb2d777c +size 15429 diff --git a/grpo-nADE-format-RC/checkpoint-400/rng_state_3.pth b/grpo-nADE-format-RC/checkpoint-400/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..1de33cf6cf436a4650c0ba3ca927b64f20ec4a1d --- /dev/null +++ b/grpo-nADE-format-RC/checkpoint-400/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c34cee1280e2fdeb8333229bff0479e667c386d92ae770f110dfe8f80da416c +size 15429 diff --git a/grpo-nADE-format-RC/checkpoint-400/scheduler.pt b/grpo-nADE-format-RC/checkpoint-400/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2373a756e06fdf8601fb0cf4b3be81786e3b9bc0 --- /dev/null +++ b/grpo-nADE-format-RC/checkpoint-400/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa38db0ee66b5140257894222f100500c67bda119c7b2e36356f905ad7e51ad6 +size 1465 diff --git a/grpo-nADE-format-RC/checkpoint-400/tokenizer.json b/grpo-nADE-format-RC/checkpoint-400/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c2e7dcb1d15f9c218853d126690cab637dee8c6b --- /dev/null +++ b/grpo-nADE-format-RC/checkpoint-400/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21f6994642cfbd8473e43b4fae1d60d4a953aea1419dc28b4ae3f41778cd0924 +size 11422922 diff --git a/grpo-nADE-format-RC/checkpoint-400/training_args.bin b/grpo-nADE-format-RC/checkpoint-400/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f5c2429794f701aa9d3f951d56f13d6d7e9c526e --- /dev/null +++ b/grpo-nADE-format-RC/checkpoint-400/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f11edcc29132c3b5adfe5314e8669850619863c55e927d0aba3784738f3cd901 +size 7633 diff --git a/grpo-nADE-format-RC/checkpoint-417/rng_state_0.pth b/grpo-nADE-format-RC/checkpoint-417/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..e439805ed0acb7dae8818204c0ffba139a12cf6c --- /dev/null +++ b/grpo-nADE-format-RC/checkpoint-417/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:964080724c5d51507e080add3b0f664d1e2676a23b328bfa79c580676f81739f +size 15429 diff --git a/grpo-nADE-format-RC/checkpoint-417/rng_state_1.pth b/grpo-nADE-format-RC/checkpoint-417/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..cbf271822e781fc44b0b0a534847ea24185ac657 --- /dev/null +++ b/grpo-nADE-format-RC/checkpoint-417/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc2571df8e6ae99ded4f8d0f50b8d033aca9e63016921c1813607291c28c5e36 +size 15365 diff --git a/grpo-nADE-format-RC/checkpoint-417/rng_state_2.pth b/grpo-nADE-format-RC/checkpoint-417/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..3da475541ac26c96c7b5c13d05cc3c9b5dc876ca --- /dev/null +++ b/grpo-nADE-format-RC/checkpoint-417/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:199b58b39f136b29325192c90104bf964682d9ee0905c5d7425ed03a882a548b +size 15429 diff --git a/grpo-nADE-format-RC/checkpoint-417/rng_state_3.pth b/grpo-nADE-format-RC/checkpoint-417/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..86e117d9cdcc7eeb7b8363b84a90c8c58409911a --- /dev/null +++ b/grpo-nADE-format-RC/checkpoint-417/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cb1023c705d0373d9f03bf93b229936c158b054bf08ff3ce38ac3aee7b33288 +size 15429 diff --git a/grpo-nADE-format-RC/checkpoint-417/scheduler.pt b/grpo-nADE-format-RC/checkpoint-417/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..974b19aa52805d1242c1f3dfcd27cba8d88d5411 --- /dev/null +++ b/grpo-nADE-format-RC/checkpoint-417/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fe8646ef7324fdd4b67dccf5069be2bc4cc3430c2a71edea42ce4fd1889cde1 +size 1465 diff --git a/grpo-nADE-format-RC/checkpoint-417/tokenizer.json b/grpo-nADE-format-RC/checkpoint-417/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c2e7dcb1d15f9c218853d126690cab637dee8c6b --- /dev/null +++ b/grpo-nADE-format-RC/checkpoint-417/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21f6994642cfbd8473e43b4fae1d60d4a953aea1419dc28b4ae3f41778cd0924 +size 11422922 diff --git a/grpo-nADE-format-RC/checkpoint-417/training_args.bin b/grpo-nADE-format-RC/checkpoint-417/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f5c2429794f701aa9d3f951d56f13d6d7e9c526e --- /dev/null +++ b/grpo-nADE-format-RC/checkpoint-417/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f11edcc29132c3b5adfe5314e8669850619863c55e927d0aba3784738f3cd901 +size 7633 diff --git a/grpo-nADE-format-RC/tokenizer.json b/grpo-nADE-format-RC/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c2e7dcb1d15f9c218853d126690cab637dee8c6b --- /dev/null +++ b/grpo-nADE-format-RC/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21f6994642cfbd8473e43b4fae1d60d4a953aea1419dc28b4ae3f41778cd0924 +size 11422922 diff --git a/grpo-nADE-format-RC/training_args.bin b/grpo-nADE-format-RC/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f5c2429794f701aa9d3f951d56f13d6d7e9c526e --- /dev/null +++ b/grpo-nADE-format-RC/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f11edcc29132c3b5adfe5314e8669850619863c55e927d0aba3784738f3cd901 +size 7633 diff --git a/grpo-nADE-format/checkpoint-100/rng_state_0.pth b/grpo-nADE-format/checkpoint-100/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..ad3a386a52550c4e0251cafe8672eb6458282ef2 --- /dev/null +++ b/grpo-nADE-format/checkpoint-100/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f86a7c074129e7de804e1410b58a8b5cb02b5fe9142af2e75063ca806c151aa +size 15429 diff --git a/grpo-nADE-format/checkpoint-100/rng_state_1.pth b/grpo-nADE-format/checkpoint-100/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..bea2e56605a79f7ddaf09c3ff2690ca91c848113 --- /dev/null +++ b/grpo-nADE-format/checkpoint-100/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:876f17dbc17618e4983d8e1d8e956268bd26af0ab976de82dd1a7f02d25e676d +size 15365 diff --git a/grpo-nADE-format/checkpoint-100/rng_state_2.pth b/grpo-nADE-format/checkpoint-100/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..4187b77250d0438e335f771d65060e1f81da1418 --- /dev/null +++ b/grpo-nADE-format/checkpoint-100/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48f891dc7eceb52046f622053cb34452b335fb8f491da153774a00011fb19bd4 +size 15429 diff --git a/grpo-nADE-format/checkpoint-100/rng_state_3.pth b/grpo-nADE-format/checkpoint-100/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..c544e88cbe0ece88d1d7b74deec3c0d8c194a7e2 --- /dev/null +++ b/grpo-nADE-format/checkpoint-100/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f74a26cc1a4a8c2e05c01d00feb8a80b120c300189816294827544002b41f414 +size 15429 diff --git a/grpo-nADE-format/checkpoint-100/scheduler.pt b/grpo-nADE-format/checkpoint-100/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b2d8ac5f3c3d68173c8acbc0b5c3fc05adaf3657 --- /dev/null +++ b/grpo-nADE-format/checkpoint-100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61d460559cb720beadd3382b1090202d552a46ed3a6a88226cbe15843187a440 +size 1465 diff --git a/grpo-nADE-format/checkpoint-100/tokenizer.json b/grpo-nADE-format/checkpoint-100/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..f9abfc35bfcbad4dcabcff187ded4bcb554fb3d9 --- /dev/null +++ b/grpo-nADE-format/checkpoint-100/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67cc0080ffd7555f723f423c27cfef314e1ad9d335c8b79f465c5faba1ed478b +size 11422821 diff --git a/grpo-nADE-format/checkpoint-100/training_args.bin b/grpo-nADE-format/checkpoint-100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..38a16a2ff924f3b45b2a6a1aa7f8a31581a491b7 --- /dev/null +++ b/grpo-nADE-format/checkpoint-100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1565c485eb001af2dc7b256cffc10af77b60177eda8933b601053faf22369ac0 +size 7569 diff --git a/grpo-nADE-format/checkpoint-200/model-00002-of-00002.safetensors b/grpo-nADE-format/checkpoint-200/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c1058d6851c15c2b6c0e2226756360a15cd95165 --- /dev/null +++ b/grpo-nADE-format/checkpoint-200/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97382960c28e69a6c83495571b8ead83e84ec66d3f3309c648e3aa9f030e00f1 +size 3885221448 diff --git a/grpo-nADE-format/checkpoint-200/rng_state_0.pth b/grpo-nADE-format/checkpoint-200/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..390ab4fce463afcef665110f1f0a067c9f896a77 --- /dev/null +++ b/grpo-nADE-format/checkpoint-200/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff5d8d1ce49fb1658b3af5d59d0d1a90581eb01eaa72ba39f7f13e5247f9a1f0 +size 15365 diff --git a/grpo-nADE-format/checkpoint-200/rng_state_1.pth b/grpo-nADE-format/checkpoint-200/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..13e9e56bea945847df5703723a9df3675325b066 --- /dev/null +++ b/grpo-nADE-format/checkpoint-200/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41cf4003996112b64d524a6ff3c38ede8f3c82fe49f8699d3f5d8e1ef4e2a171 +size 15365 diff --git a/grpo-nADE-format/checkpoint-200/rng_state_2.pth b/grpo-nADE-format/checkpoint-200/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..4f5cd83b0b5a3a31f038d525a3cd138d6ecefe5e --- /dev/null +++ b/grpo-nADE-format/checkpoint-200/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bab38b089142ae0571ccd6af3ee6330ef01042d4c28f3252ef8841266de75c98 +size 15429 diff --git a/grpo-nADE-format/checkpoint-200/rng_state_3.pth b/grpo-nADE-format/checkpoint-200/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..3c76fd0aafb931a1a4df645c209357c95ffc48ae --- /dev/null +++ b/grpo-nADE-format/checkpoint-200/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f4ad370d126af02fd7b7dfbdca07995e92f44c5a1ea5274403161682aa3edb4 +size 15429 diff --git a/grpo-nADE-format/checkpoint-200/scheduler.pt b/grpo-nADE-format/checkpoint-200/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..41cbb78c7582a417c92f8b38d1e016db8a6c5dd0 --- /dev/null +++ b/grpo-nADE-format/checkpoint-200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b26f664df9ea123afdec6d5bc6dfe2a52a3a7ef722b018926d5652844db76dfd +size 1465 diff --git a/grpo-nADE-format/checkpoint-200/tokenizer.json b/grpo-nADE-format/checkpoint-200/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..f9abfc35bfcbad4dcabcff187ded4bcb554fb3d9 --- /dev/null +++ b/grpo-nADE-format/checkpoint-200/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67cc0080ffd7555f723f423c27cfef314e1ad9d335c8b79f465c5faba1ed478b +size 11422821 diff --git a/grpo-nADE-format/checkpoint-200/training_args.bin b/grpo-nADE-format/checkpoint-200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..38a16a2ff924f3b45b2a6a1aa7f8a31581a491b7 --- /dev/null +++ b/grpo-nADE-format/checkpoint-200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1565c485eb001af2dc7b256cffc10af77b60177eda8933b601053faf22369ac0 +size 7569 diff --git a/grpo-nADE-format/checkpoint-300/model-00002-of-00002.safetensors b/grpo-nADE-format/checkpoint-300/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..268b6dc44036103520c96609268488153fd9fab1 --- /dev/null +++ b/grpo-nADE-format/checkpoint-300/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:010bf1dfb922cb714f8ce4409bdf467a5b7ce29f757cc50819b1224a0c29187d +size 3885221448 diff --git a/grpo-nADE-format/checkpoint-300/rng_state_0.pth b/grpo-nADE-format/checkpoint-300/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..dbffd14fb69096a8e378a507219c59a6f365be0b --- /dev/null +++ b/grpo-nADE-format/checkpoint-300/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8da5c059210dac41da94c171c010429b047c7a483e77b59673679ba8ec420eed +size 15365 diff --git a/grpo-nADE-format/checkpoint-300/rng_state_1.pth b/grpo-nADE-format/checkpoint-300/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..9ae3af22a6f6f0228f6e75adaa6015d913dec3cf --- /dev/null +++ b/grpo-nADE-format/checkpoint-300/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4b012dba29a618e941f8924d27ad2458339b3cba2e770315d52431fa194e52d +size 15365 diff --git a/grpo-nADE-format/checkpoint-300/rng_state_2.pth b/grpo-nADE-format/checkpoint-300/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..ee6e6c43b0a7d85e3b41a2c698143063af009c7f --- /dev/null +++ b/grpo-nADE-format/checkpoint-300/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbbd24fd5926da7aa3d61d9b713cd017d8776ff80b8d8950e9005a06debb8ded +size 15429 diff --git a/grpo-nADE-format/checkpoint-300/rng_state_3.pth b/grpo-nADE-format/checkpoint-300/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..3611ecd7bdb657592cda5586964548ceca6f1a67 --- /dev/null +++ b/grpo-nADE-format/checkpoint-300/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6af63b40f761eab37208d47f43a00c3733b65b9b915b2750dcf5ded4fbfc2890 +size 15429 diff --git a/grpo-nADE-format/checkpoint-300/scheduler.pt b/grpo-nADE-format/checkpoint-300/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8c1cf60d8224c89bfe7c22c0639d6d9b461e6aa9 --- /dev/null +++ b/grpo-nADE-format/checkpoint-300/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb1c6111e7614e1dde75357bc4aeac385c5477b97b707cd4b24ed87f968c9360 +size 1465 diff --git a/grpo-nADE-format/checkpoint-300/tokenizer.json b/grpo-nADE-format/checkpoint-300/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..f9abfc35bfcbad4dcabcff187ded4bcb554fb3d9 --- /dev/null +++ b/grpo-nADE-format/checkpoint-300/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67cc0080ffd7555f723f423c27cfef314e1ad9d335c8b79f465c5faba1ed478b +size 11422821 diff --git a/grpo-nADE-format/checkpoint-300/training_args.bin b/grpo-nADE-format/checkpoint-300/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..38a16a2ff924f3b45b2a6a1aa7f8a31581a491b7 --- /dev/null +++ b/grpo-nADE-format/checkpoint-300/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1565c485eb001af2dc7b256cffc10af77b60177eda8933b601053faf22369ac0 +size 7569 diff --git a/grpo-nADE-format/checkpoint-400/rng_state_0.pth b/grpo-nADE-format/checkpoint-400/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..602792ee40d96d4dcb7b099b00844bb95e6c8a9c --- /dev/null +++ b/grpo-nADE-format/checkpoint-400/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51343314339b3a4222e05beb4bd28b0a4b0bc591a6c122db231c94b7ba61f5f0 +size 15365 diff --git a/grpo-nADE-format/checkpoint-400/rng_state_1.pth b/grpo-nADE-format/checkpoint-400/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..96f2ad1bd94647584e09fbd1673b8edf8b30bd96 --- /dev/null +++ b/grpo-nADE-format/checkpoint-400/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:507052a965122638f0933428c3f280ac3477cbfe2d5a8242811268d1b5dd13ac +size 15365 diff --git a/grpo-nADE-format/checkpoint-400/rng_state_2.pth b/grpo-nADE-format/checkpoint-400/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..1b985aabb4faf6f90d2bd1dc128ea404ed5cac57 --- /dev/null +++ b/grpo-nADE-format/checkpoint-400/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f9ac24a0a97e85fd9595965f1fdc25090d98a733e11c9246eea3cae82fe8073 +size 15429 diff --git a/grpo-nADE-format/checkpoint-400/rng_state_3.pth b/grpo-nADE-format/checkpoint-400/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..5d3723e2aee4e049bbaa57e327cc990779af6e31 --- /dev/null +++ b/grpo-nADE-format/checkpoint-400/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98f1d58cb8b57f391cc62d96dab988f0e472a7e0538a55a891c843699cf0a2e7 +size 15429 diff --git a/grpo-nADE-format/checkpoint-400/scheduler.pt b/grpo-nADE-format/checkpoint-400/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2373a756e06fdf8601fb0cf4b3be81786e3b9bc0 --- /dev/null +++ b/grpo-nADE-format/checkpoint-400/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa38db0ee66b5140257894222f100500c67bda119c7b2e36356f905ad7e51ad6 +size 1465 diff --git a/grpo-nADE-format/checkpoint-400/tokenizer.json b/grpo-nADE-format/checkpoint-400/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..f9abfc35bfcbad4dcabcff187ded4bcb554fb3d9 --- /dev/null +++ b/grpo-nADE-format/checkpoint-400/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67cc0080ffd7555f723f423c27cfef314e1ad9d335c8b79f465c5faba1ed478b +size 11422821 diff --git a/grpo-nADE-format/checkpoint-400/training_args.bin b/grpo-nADE-format/checkpoint-400/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..38a16a2ff924f3b45b2a6a1aa7f8a31581a491b7 --- /dev/null +++ b/grpo-nADE-format/checkpoint-400/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1565c485eb001af2dc7b256cffc10af77b60177eda8933b601053faf22369ac0 +size 7569 diff --git a/grpo-nADE-format/checkpoint-417/rng_state_0.pth b/grpo-nADE-format/checkpoint-417/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..4c612ec0c7108f73b1d29a0d81bd8fbb834cf34a --- /dev/null +++ b/grpo-nADE-format/checkpoint-417/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76d98305be40683b614f1663b607cc742f873e8e52f6494e4c208e9b230f2a57 +size 15429 diff --git a/grpo-nADE-format/checkpoint-417/rng_state_1.pth b/grpo-nADE-format/checkpoint-417/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..ec5db8750fc91cfd17d871b890644a7ef6e71fa5 --- /dev/null +++ b/grpo-nADE-format/checkpoint-417/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e66db94a420d8ebe1a617e4da2a6e3bfa0deb60bc9eee515a86fbf1253d8daa6 +size 15365 diff --git a/grpo-nADE-format/checkpoint-417/rng_state_2.pth b/grpo-nADE-format/checkpoint-417/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..4c9ac4bc451f572fd07d706e0832d8eace35109b --- /dev/null +++ b/grpo-nADE-format/checkpoint-417/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69b609303ae0d89c2741f4d53fe086e88a2afb5ff0a97309be98079166735f34 +size 15429 diff --git a/grpo-nADE-format/checkpoint-417/rng_state_3.pth b/grpo-nADE-format/checkpoint-417/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..812e77ad77436db5edebcf153e7424d87a462637 --- /dev/null +++ b/grpo-nADE-format/checkpoint-417/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16d7509b4166ef7071e875eeffd714a84b2d8045995c0559fc45c5121135b40d +size 15429 diff --git a/grpo-nADE-format/checkpoint-417/scheduler.pt b/grpo-nADE-format/checkpoint-417/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..974b19aa52805d1242c1f3dfcd27cba8d88d5411 --- /dev/null +++ b/grpo-nADE-format/checkpoint-417/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fe8646ef7324fdd4b67dccf5069be2bc4cc3430c2a71edea42ce4fd1889cde1 +size 1465 diff --git a/grpo-nADE-format/checkpoint-417/training_args.bin b/grpo-nADE-format/checkpoint-417/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..38a16a2ff924f3b45b2a6a1aa7f8a31581a491b7 --- /dev/null +++ b/grpo-nADE-format/checkpoint-417/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1565c485eb001af2dc7b256cffc10af77b60177eda8933b601053faf22369ac0 +size 7569 diff --git a/grpo-nADE-format/tokenizer.json b/grpo-nADE-format/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..f9abfc35bfcbad4dcabcff187ded4bcb554fb3d9 --- /dev/null +++ b/grpo-nADE-format/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67cc0080ffd7555f723f423c27cfef314e1ad9d335c8b79f465c5faba1ed478b +size 11422821 diff --git a/grpo-nADE-format/training_args.bin b/grpo-nADE-format/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..38a16a2ff924f3b45b2a6a1aa7f8a31581a491b7 --- /dev/null +++ b/grpo-nADE-format/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1565c485eb001af2dc7b256cffc10af77b60177eda8933b601053faf22369ac0 +size 7569