diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz128/global_step_128/actor_hf/model-00002-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz128/global_step_128/actor_hf/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..84893adbf516aab0fedbafb3acc7cb7d237635ea --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz128/global_step_128/actor_hf/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94bd97be04b7473db813fead7f6e03ff8708883e86e52a022be86f8eb94d9945 +size 2239923424 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz128/global_step_192/actor_hf/model-00001-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz128/global_step_192/actor_hf/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f991ad7783bb10a7d4783ae931b58d0346a6fe21 --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz128/global_step_192/actor_hf/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:186a4344b5b8fc530ee888d0a5b65671e7c5cb830b39d10fde9742e2506ebae9 +size 4998783000 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz128/global_step_192/actor_hf/model-00002-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz128/global_step_192/actor_hf/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9590c432d079e6586d2bf8ed0a0eabc1ecd81c2f --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz128/global_step_192/actor_hf/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed99dd57f58ba08e0438af699ef79a858176495b2317b9603d854d8e691245de +size 2214751024 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz128/global_step_256/actor_hf/model-00002-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz128/global_step_256/actor_hf/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..67dad0c0b97255b73ae5c852b785abc86162989f --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz128/global_step_256/actor_hf/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dfd8ec14add28b7a3084dde5ec3962d0da7bd4c129c9eae188a8178d9224ce6 +size 2392413904 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz128/global_step_320/actor_hf/model-00001-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz128/global_step_320/actor_hf/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..aa571b04a2a2baf8e8de9fd734a93a4e73a15b20 --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz128/global_step_320/actor_hf/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f7eb1527c5ec778ee990993d547ebf0eb53518934aa6bc87294c548f1f20eb5 +size 4998769736 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz128/global_step_320/actor_hf/model-00002-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz128/global_step_320/actor_hf/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..603f11d9795c456107445601d9ac886567b7a076 --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz128/global_step_320/actor_hf/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84e124b10f8141eeaf47563cd6e1ccd3065a4f41c036bd154ae1f6153a0381ba +size 2214764288 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz128/global_step_384/actor_hf/model-00001-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz128/global_step_384/actor_hf/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..90163c6606ef73020af1c7ad35ba96fd5a6ec378 --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz128/global_step_384/actor_hf/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9be630655ea667818be55da1742d0fdee274911fe8c4efa12bcfdd838f821b3 +size 4959539064 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz128/global_step_384/actor_hf/model-00002-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz128/global_step_384/actor_hf/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..708241e6cf2c82e0f36ceafd07c9ed5bde8dd2e5 --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz128/global_step_384/actor_hf/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d94dfb571f831c04d1ec9e6893eca1d8c1194f39291a9d1555e2f507882a046 +size 2253995024 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz128/global_step_448/actor_hf/model-00001-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz128/global_step_448/actor_hf/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8ce505c037b10f0228a9ac91ab1f3cd9c7c7dd3c --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz128/global_step_448/actor_hf/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9dd1b63151bcc3206798c1d6ae60621bf2027df784caf4b9239b01f01dedd8eb +size 4992471336 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz128/global_step_448/actor_hf/model-00002-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz128/global_step_448/actor_hf/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b544795c926e7d7c267c9eba072262c4ddb94fbf --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz128/global_step_448/actor_hf/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56dff128c3842c0a7c977cfc7e9f17ca63bac1313a5b04557a4693c0b2406bbf +size 2221062672 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz128/global_step_512/actor_hf/model-00001-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz128/global_step_512/actor_hf/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4a28d601d6a9a08537e8e6bb5c0b2f205aefca71 --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz128/global_step_512/actor_hf/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fff4d79243053e868da49cb1b38dca215be9206899b03ae90b73030b420c58be +size 4961003200 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz128/global_step_512/actor_hf/model-00002-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz128/global_step_512/actor_hf/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f477c536b344b11998646c6c92d6487e626c72e1 --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz128/global_step_512/actor_hf/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb57d73251a09190333bf166667c3e686d15398cc7e26b63b6019ea3f5993325 +size 2252530840 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz128/global_step_64/actor_hf/model-00001-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz128/global_step_64/actor_hf/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ab93812c2ee6300074e8073dd958b83831e77119 --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz128/global_step_64/actor_hf/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bb275f066786de1d3f5107a5d1c6ec31a28600250dc15a430418d8e349d4481 +size 4997249144 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz128/global_step_64/actor_hf/model-00002-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz128/global_step_64/actor_hf/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b21211b91aec383c3ab54ae038ea421aa9349c12 --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz128/global_step_64/actor_hf/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da2f66bc38e7993e10f54b96e22ed378096e91972240a982cd85757fad48c16f +size 2216284952 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz16/global_step_128/actor_hf/model-00001-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz16/global_step_128/actor_hf/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4603e85437f5324bfcb57a9c972f1571b69e330b --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz16/global_step_128/actor_hf/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0587e4ea083d45293aa0943d269af641172754e91483fbe9d6bdcceb606a95f0 +size 4992476984 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz16/global_step_128/actor_hf/model-00002-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz16/global_step_128/actor_hf/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a09252f326dd554f4b249f0de12652239368c8dd --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz16/global_step_128/actor_hf/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d28f27403220ea1f9f9a840896460e980bb419218ee39786cbd69d4360d53582 +size 2221057024 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz16/global_step_192/actor_hf/model-00002-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz16/global_step_192/actor_hf/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2ade771b034d579250502014431642084a058670 --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz16/global_step_192/actor_hf/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d4a5b90697aa1afd8d8b2cbe79fbbf9c3bb99fe8764c0477bd7ab807d0df366 +size 2228853840 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz16/global_step_256/actor_hf/model-00001-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz16/global_step_256/actor_hf/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ff69df0e820bd3995988d7cc91cb4b9f0bef58d0 --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz16/global_step_256/actor_hf/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0ddf4535cc46f4a1acab39c5e6769b0f24072273fbdb4d53157e8c7a02763eb +size 4998813952 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz16/global_step_256/actor_hf/model-00002-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz16/global_step_256/actor_hf/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b231e6461b26467b6ee8ff2b3f0f2f539270f8f1 --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz16/global_step_256/actor_hf/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:883a820351e9cd7b6a09a5333de132f976f456432e924285f871dcdeefa4bcbb +size 2214720056 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz16/global_step_320/actor_hf/model-00001-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz16/global_step_320/actor_hf/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3a1a7cca43e2e6359c44fa9b5f07379a8320ceac --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz16/global_step_320/actor_hf/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e6ab2e192c1da5ac0fbeca098dbd7ad3ea3fa56d0364ae947a12a03cfe6e331 +size 4739306232 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz16/global_step_320/actor_hf/model-00002-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz16/global_step_320/actor_hf/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bc46157723ff2f324baa53d9d4e3a1564bb25589 --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz16/global_step_320/actor_hf/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53b064eecfc96cc009b84d390045e5360ba8151c915a311fe3d8c3f43ca9c3fe +size 2474227864 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz16/global_step_384/actor_hf/model-00002-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz16/global_step_384/actor_hf/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2036282bb44e65f74d79e958a5ba22e6c36cdc38 --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz16/global_step_384/actor_hf/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:621e982964b8ba24d884aad8ed859f5ad6e5401abf3fc23ad6c2122e8304a6a7 +size 2482065192 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz16/global_step_448/actor_hf/model-00001-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz16/global_step_448/actor_hf/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..43d9084a9099e5f390965ed9b6d9254f951a55c5 --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz16/global_step_448/actor_hf/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d57d76cacc1892933b8f434d3b3b0550156602606929d790b11b4b342429c99 +size 4997268872 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz16/global_step_448/actor_hf/model-00002-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz16/global_step_448/actor_hf/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0eb68699af1254a3837bfa3f0363a67ba1f93097 --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz16/global_step_448/actor_hf/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a65072593e8d5a355ee74424fbac2bb13fd60fbd3487639f8738db01ad5e11a0 +size 2216265208 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz16/global_step_512/actor_hf/model-00002-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz16/global_step_512/actor_hf/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..258aac15f74fb6aea3d6a4871779699a9e9a445f --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz16/global_step_512/actor_hf/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7570eb95cab18187a78d43b3c2f3687b32c0b9f209da5489a06df93fef4e5987 +size 2254013424 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz16/global_step_64/actor_hf/model-00002-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz16/global_step_64/actor_hf/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..574ae4c5c4825698cd12a2a4544e9e66a0c897b2 --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz16/global_step_64/actor_hf/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44f3e65af73520d49ccf19c7856408fd9f212b86be8815597a140341451cf4e6 +size 2260311512 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz256/global_step_128/actor_hf/model-00001-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz256/global_step_128/actor_hf/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..57d1fbeb7301b07768e5478c0043a96c7f481e53 --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz256/global_step_128/actor_hf/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68b21908fbeb3b34ed05f0e080f24e4e3a502f1431f60d2241ab57dbaf114737 +size 4990933744 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz256/global_step_128/actor_hf/model-00002-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz256/global_step_128/actor_hf/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..812f0694a844826ed890e117b5454f8e8acf18b1 --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz256/global_step_128/actor_hf/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19808f447800b307637f0f1abdb23d68d2065716bd6b5ba61add3c9ae005ac58 +size 2222600336 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz256/global_step_192/actor_hf/model-00002-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz256/global_step_192/actor_hf/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5d2122d4d387af188bd743503f1aca6cb5414412 --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz256/global_step_192/actor_hf/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a18fc26c712b07b0b2d5f550b80992a26adcaa35427a8f0a57412a34f8cec8e +size 2235158336 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz256/global_step_256/actor_hf/model-00002-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz256/global_step_256/actor_hf/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5113752cbcbebdb1e7b4ed7c9da03ccd258e176b --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz256/global_step_256/actor_hf/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a414a0fe141d4c631092ca3e73d8dfd0285bfa8f6e562977cf04900773e927fb +size 2246203216 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz256/global_step_320/actor_hf/model-00001-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz256/global_step_320/actor_hf/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d6ec731f219fc7b89a2943900ab838192eaf8b4d --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz256/global_step_320/actor_hf/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:354e56a7cdcae53a80ec45875c90a2e0fbe4376e15a8cef291ca84311df7aa3e +size 4990990328 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz256/global_step_320/actor_hf/model-00002-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz256/global_step_320/actor_hf/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..872339e05d240fa3fea4f429bed404f721194a4b --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz256/global_step_320/actor_hf/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93f0e6619f263249cf07a28ee039cdcb06c625707623e9caf7a78ef9265f0e5a +size 2222543760 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz256/global_step_384/actor_hf/model-00002-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz256/global_step_384/actor_hf/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9a72a8abea59f18aeda8c01058a6d422d2b371b7 --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz256/global_step_384/actor_hf/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0425a410a4fd109cb5a4939f78958520fd71ca5a74d88ed8088e5bc4b219dd5 +size 2254020328 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz256/global_step_448/actor_hf/model-00001-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz256/global_step_448/actor_hf/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4bf0323ba143ef5c73e77522921107c18254d173 --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz256/global_step_448/actor_hf/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:607164a5989b014d0e7ed33d2fc72d039e9ed2c57f51558c559b5722f8e277d6 +size 4437365712 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz256/global_step_448/actor_hf/model-00002-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz256/global_step_448/actor_hf/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a70fa18658a813dbf36f24e498119ebe35978af2 --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz256/global_step_448/actor_hf/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40dbd26e674b49ff5536ff6ad098f2180a2e5adad2299558730cfaccee284ee6 +size 2776168376 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz256/global_step_512/actor_hf/model-00001-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz256/global_step_512/actor_hf/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a77fc72db568a794290d54a5d77e8098bb3782e4 --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz256/global_step_512/actor_hf/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c4d4e0447930537792bbbfafb8dd9d7e36dff99d15ecf6c2cc70614bbf1c190 +size 4961096984 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz256/global_step_512/actor_hf/model-00002-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz256/global_step_512/actor_hf/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..928efe1b101078c6278ac224d26dd030addc2c14 --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz256/global_step_512/actor_hf/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4559f82ee6397e2cdd8ff9ce9944c3961294cda4f01bc594a3d83a13a449f4c +size 2252437064 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz256/global_step_64/actor_hf/model-00001-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz256/global_step_64/actor_hf/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c6b3d4112021b8a5cb5968826e8494ea41e04d4a --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz256/global_step_64/actor_hf/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a799fcb0ff41520c62fb8af42037200161181aa77be5c0b54e67988ef71adb0 +size 4954755776 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz256/global_step_64/actor_hf/model-00002-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz256/global_step_64/actor_hf/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4d96f98a12bf2d29fb22188beeffff1e3762e7cd --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz256/global_step_64/actor_hf/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72fccac8969b92c25e1b00cc7bf86727ab72d9b3037d21ba601d8b5d5d18cd57 +size 2258778264 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz32/global_step_128/actor_hf/model-00001-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz32/global_step_128/actor_hf/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ae0d81a917c69e3ba5f8dcd5549bb14d4cc7cfcc --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz32/global_step_128/actor_hf/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dca341083c9b9706932e96c9ad4e234eb23e773223bcb230dac0050cd320aa15 +size 4986199584 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz32/global_step_128/actor_hf/model-00002-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz32/global_step_128/actor_hf/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8ab454e8d88cebc88b5e91cc18e890df2b5efb7d --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz32/global_step_128/actor_hf/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d512fbda85efb38d214545cbc0b550eb43dd31bda2d95b317b1821df4b42dee4 +size 2227334456 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz32/global_step_192/actor_hf/model-00001-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz32/global_step_192/actor_hf/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..31a99178729754f5254ed05769dc68d94e9503ec --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz32/global_step_192/actor_hf/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25f206f4a2972f3f55f01d617594e39f8ad53697b6252d7d013122bf58c40b09 +size 4973648960 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz32/global_step_192/actor_hf/model-00002-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz32/global_step_192/actor_hf/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..18528d6b5157a3432640eb0bb9089d7b8c8dc3da --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz32/global_step_192/actor_hf/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1480ebb5c9ce9e1b3d7f7919e103a80f80e86e1802b0a7737eae99d4ea25903e +size 2239885096 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz32/global_step_256/actor_hf/model-00001-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz32/global_step_256/actor_hf/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ec13492d82a29d04b7ff3d0f72432ca213280b84 --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz32/global_step_256/actor_hf/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e6bb2bab74c92aa69890bb97232f95944b998a6a59944b8817e2871dfa9cb64 +size 4443586552 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz32/global_step_256/actor_hf/model-00002-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz32/global_step_256/actor_hf/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6d84d20004467e3ced7066d696a2d8db84cb6a9e --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz32/global_step_256/actor_hf/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0ef9d7bf62d636afa5231162b09176e20b2515bae17b87da2c7314049ce1b6a +size 2769947536 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz32/global_step_320/actor_hf/model-00001-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz32/global_step_320/actor_hf/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b90ae56d1bb792ecb597abfe961958f21022f45a --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz32/global_step_320/actor_hf/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11c7b74af0eb909c48ed0b2ac7fed26d4be7cad02c9cb09c598125df0694d539 +size 4954729920 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz32/global_step_384/actor_hf/model-00002-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz32/global_step_384/actor_hf/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..80721e6b34a987dc6bb837d6d56542c5a0803776 --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz32/global_step_384/actor_hf/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53900af5dc811130ebd09ef8ec8640370b576cbad8de3d2b3ad73a81bd8c4030 +size 2221023824 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz32/global_step_448/actor_hf/model-00001-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz32/global_step_448/actor_hf/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4a09a5f14652cadae41800890476b397a99dedf4 --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz32/global_step_448/actor_hf/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89892e7f687492cc56613f7fb7151645abe583fecc65fc5bddc45453602b5dbf +size 4972108984 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz32/global_step_448/actor_hf/model-00002-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz32/global_step_448/actor_hf/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..20657bd2fb2f17243ea5bab63519746d06a5449b --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz32/global_step_448/actor_hf/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8682e79eb950b8f6182e1ceaf8d0688331fa02509eb08d62c56fa1b6b1991bf +size 2241425104 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz32/global_step_512/actor_hf/model-00002-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz32/global_step_512/actor_hf/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9e072bebaf938cf5b9bee7ab2d5e9c5851b2cdfa --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz32/global_step_512/actor_hf/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fafd581f2d477acc67f1b37c05f83d3e217acbdecf18780d32ca1cdd2a982064 +size 2461633008 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz32/global_step_64/actor_hf/model-00002-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz32/global_step_64/actor_hf/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2a3a703bf2f2bd9c4be17df9a64ef60aabfa6e94 --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz32/global_step_64/actor_hf/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58ab9076ab32bc3784711936049a1cb64689cb1cb487f925205d6f7a61eb44b3 +size 2235120192 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz512/global_step_128/actor_hf/model-00001-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz512/global_step_128/actor_hf/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..131d51071326e32aecfdbb872ed73d4b896a2d6f --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz512/global_step_128/actor_hf/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14f2c79b2fcab9841cde47a859553b7354951a708f5b3d6be1f6d072b27a413a +size 4380715712 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz512/global_step_128/actor_hf/model-00002-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz512/global_step_128/actor_hf/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ee29037479af8981cb03ebe1fe18006a85e9ae94 --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz512/global_step_128/actor_hf/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e13963ae879f3e321cb1fc6d8b80d3603c8dbe5d0df46f4962fef6295b00c614 +size 2832818384 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz512/global_step_192/actor_hf/model-00001-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz512/global_step_192/actor_hf/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..85fe5445e8075a24e7964a8fa83a5d180630ef09 --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz512/global_step_192/actor_hf/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:397d9d5abe4845a3e945af71df4a059dc0a91a603227946c5590e803184f3c60 +size 4416901952 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz512/global_step_192/actor_hf/model-00002-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz512/global_step_192/actor_hf/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4b6b201a9a2bd6a4b707ee562fe7f63a98965bbf --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz512/global_step_192/actor_hf/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15745d2786ce56d90a216598087b1cff769615701b289f715d5d74ebc06d3070 +size 2796632072 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz512/global_step_256/actor_hf/model-00001-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz512/global_step_256/actor_hf/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..67788172b0553300680279a3e2ad244933015e22 --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz512/global_step_256/actor_hf/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf3e0728b19b058b7381091f10429eb05b98eab1d9c61e455daeb87584adda5c +size 4978394288 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz512/global_step_256/actor_hf/model-00002-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz512/global_step_256/actor_hf/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d4f5298dfbc0a4e844adac2341feb7b08cbb80b2 --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz512/global_step_256/actor_hf/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a938e6d0434dc26885f62195152f740ff3cf857d537ce69ed35ce2a181f8f4a +size 2235139792 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz512/global_step_320/actor_hf/model-00002-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz512/global_step_320/actor_hf/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5396b50ebb5a21ed51395327e697eebff23a48c6 --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz512/global_step_320/actor_hf/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e13622f17c4ae487c1437554a32d2d5f40fa7aa99c68713c18f1a41cc37c6b8 +size 2216253136 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz512/global_step_384/actor_hf/model-00001-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz512/global_step_384/actor_hf/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..984f440394debeb614fb89cb657bcb52ac7c61d9 --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz512/global_step_384/actor_hf/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:688910fe7619fc7fcb36c92bba5798d2fa6ac986dd4fc46f27f014360523343c +size 4959540248 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz512/global_step_384/actor_hf/model-00002-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz512/global_step_384/actor_hf/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e8ac0a2a265b0a62bb0f63584ff79ff0436e8f76 --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz512/global_step_384/actor_hf/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e139d8f886fb25d53354d4dac8f02307912d2887ed8116ece22de85addcde05b +size 2253993832 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz512/global_step_448/actor_hf/model-00001-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz512/global_step_448/actor_hf/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ca3694122d85290dddd13a55015ed7803cbe76da --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz512/global_step_448/actor_hf/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb7e203f63020a73fef05b0556d30c011c49d85549077aa5d7ff18a8737211c6 +size 4965849760 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz512/global_step_448/actor_hf/model-00002-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz512/global_step_448/actor_hf/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fb926cb2651fa175b49843696d264f8fa95e94c7 --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz512/global_step_448/actor_hf/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:961fd4d164d16728c49477ddb59734598168cd1231291ff6f8d959c9705f0643 +size 2247684328 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz512/global_step_512/actor_hf/model-00001-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz512/global_step_512/actor_hf/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a388955110e039e9659723a70004b4196ecfc97e --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz512/global_step_512/actor_hf/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f079316801647bfa94f8a64c05be4237f9b1359146f8c8315ad8ae68f7468881 +size 4961040792 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz512/global_step_512/actor_hf/model-00002-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz512/global_step_512/actor_hf/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bc086f69004ab4fbde6d4d05a26cbc55798acf40 --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz512/global_step_512/actor_hf/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6875e3f5789d84d95245652912f5528278d586177a06eca26dc627cd3958f720 +size 2252493256 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz64/global_step_128/actor_hf/model-00001-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz64/global_step_128/actor_hf/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b14231ae29648797534a20699bc60d0786981798 --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz64/global_step_128/actor_hf/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cf34983059c80ab40191f6ef9b14e314dc6b4e1ee57d96899ea09ca717b2043 +size 4986218976 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz64/global_step_128/actor_hf/model-00002-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz64/global_step_128/actor_hf/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a3cddf9e0550799d6d4be33e6a6b85a3dfb515eb --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz64/global_step_128/actor_hf/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd84f890e8007f974d75ada50ef2ab83fd5e145986f604acb6442a4dfddce960 +size 2227315072 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz64/global_step_192/actor_hf/model-00001-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz64/global_step_192/actor_hf/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..45ce6ec7500c6d96790ddfeb4c7a56400d2f7027 --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz64/global_step_192/actor_hf/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9edbfb7c2fca4a877bab533be3011d3259f505fe748504061e22482918a84143 +size 4412117584 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz64/global_step_192/actor_hf/model-00002-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz64/global_step_192/actor_hf/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..465ee58c663ce8e513c89012908837b5cf5bd919 --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz64/global_step_192/actor_hf/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e068337d4828215ba10683759072e8770f0c0c2753690bba2a1deb1a965e8bd7 +size 2801416496 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz64/global_step_256/actor_hf/model-00001-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz64/global_step_256/actor_hf/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bd6dec6b52ee996bc2e1ea59d08dc3511ab1a628 --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz64/global_step_256/actor_hf/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf7a8629e8fc664e87017867f6a46e9f53ba35211050300b9533da2096f33d3c +size 4781813800 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz64/global_step_256/actor_hf/model-00002-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz64/global_step_256/actor_hf/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9228765338d43e3c7defa107ff28f3c996b60b7a --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz64/global_step_256/actor_hf/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51d137fcb339b24f88796a098dbac813e4233d42c99cee899392888839d7363c +size 2431720224 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz64/global_step_320/actor_hf/model-00001-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz64/global_step_320/actor_hf/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c885d4ba1b75703af186a24b90f34b533fd3f6c0 --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz64/global_step_320/actor_hf/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa1ba87f12d6cd748407aa86a45311c290594f3a4a4cea663791c7df4e3f16d5 +size 4305155744 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz64/global_step_320/actor_hf/model-00002-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz64/global_step_320/actor_hf/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..44a8d20ba506b3da3332ab6413534e5f6b491acd --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz64/global_step_320/actor_hf/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3228fa86e9cd3f102ddfb960073d60f7d36d798c13b90e8e02bc6cfff798cc5d +size 2908378344 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz64/global_step_384/actor_hf/model-00002-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz64/global_step_384/actor_hf/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d49071ad0a68c70cfe892e16b4a68d6d77b6166b --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz64/global_step_384/actor_hf/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:236fcda9dc7e897cc312b3ccfa2a015e8f1162ce9c40e6c65fcce1123d8fe3d9 +size 2216271544 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz64/global_step_448/actor_hf/model-00001-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz64/global_step_448/actor_hf/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ca0d3bb40c2fdeeb9682dcf5d9126a651fbe9dc8 --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz64/global_step_448/actor_hf/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91c71d969d4a78ffe1a8943203515c353fd6b67f9006aa6f0a27de5e305c7165 +size 4998808272 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz64/global_step_448/actor_hf/model-00002-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz64/global_step_448/actor_hf/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2ce652320ed11ad7e0aed8fd8494a2fd93a37178 --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz64/global_step_448/actor_hf/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0509f2bf3810daa2d6f15c9a087cae972abec9f71e0429ec4464b454676edf8d +size 2214725768 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz64/global_step_512/actor_hf/model-00001-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz64/global_step_512/actor_hf/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..585c3b2db83d799b718d26e5f4df7210f9b4e8a5 --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz64/global_step_512/actor_hf/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45a67266e297a5cf94f4d34b3be164dd48bb6d0ec95cafb5eb2f4134e54f8b33 +size 4600886560 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz64/global_step_512/actor_hf/model-00002-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz64/global_step_512/actor_hf/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3b8ba1e06b130a466814ea811b581fec3207a073 --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz64/global_step_512/actor_hf/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e3c4a41865015861f2ec60a6a01d831c369437c02ac56485c82d498979b6730 +size 2612647528 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz64/global_step_64/actor_hf/model-00001-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz64/global_step_64/actor_hf/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..af3723ff7e4b6247dad8d05246b498a8235a9509 --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz64/global_step_64/actor_hf/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aeeccff3b6d07bb6c928e5392b2b86c7dbc85380d6fe17f970ea43c54679807f +size 4953197376 diff --git a/Llama-3.2-3B-Instruct-polaris-GRPO--bsz64/global_step_64/actor_hf/model-00002-of-00002.safetensors b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz64/global_step_64/actor_hf/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3b58ea30fd51a12d32959cf7557c05103ef4a764 --- /dev/null +++ b/Llama-3.2-3B-Instruct-polaris-GRPO--bsz64/global_step_64/actor_hf/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20c361ca30266e2ed27d323afc62071b37b3351c24f33de6bae7ee215f15bb57 +size 2260336704