diff --git a/.gitattributes b/.gitattributes index 74ccabb2153ae7935f848b27541650f3faf92f82..78cb204181e617b3737603337d470a86772ec452 100644 --- a/.gitattributes +++ b/.gitattributes @@ -66,3 +66,6 @@ output_grpo_dr_geo/checkpoint-900/tokenizer.json filter=lfs diff=lfs merge=lfs - output_grpo_dr_geo/checkpoint-1100/tokenizer.json filter=lfs diff=lfs merge=lfs -text output_grpo_dr/checkpoint-1300/tokenizer.json filter=lfs diff=lfs merge=lfs -text output_grpo_dr_geo/checkpoint-950/tokenizer.json filter=lfs diff=lfs merge=lfs -text +output_grpo_vcs/checkpoint-800/tokenizer.json filter=lfs diff=lfs merge=lfs -text +output_grpo_dr/checkpoint-900/tokenizer.json filter=lfs diff=lfs merge=lfs -text +output_grpo_dr/checkpoint-1100/tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/output_grpo_dr/checkpoint-1000/model-00001-of-00002.safetensors b/output_grpo_dr/checkpoint-1000/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9f45beff6842dd478db02a79d55df8a22e2bf4d4 --- /dev/null +++ b/output_grpo_dr/checkpoint-1000/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e194b502b35d8cf154572b65fd3c02644ea203f473bb07fa190727df0f4993c +size 4997750760 diff --git a/output_grpo_dr/checkpoint-1000/model-00002-of-00002.safetensors b/output_grpo_dr/checkpoint-1000/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..15c2527ab6c4440b1c85132482c7143e6f52338d --- /dev/null +++ b/output_grpo_dr/checkpoint-1000/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bd2606f79dfd46fd5aef49abcd0dabb5b56dad78d4dfc26bf4d6a1579dbd11a +size 4204299316 diff --git a/output_grpo_dr/checkpoint-1000/rng_state_0.pth b/output_grpo_dr/checkpoint-1000/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..d5c5bd5afc86df02daa93be3d0dedcc48e825eea --- /dev/null +++ b/output_grpo_dr/checkpoint-1000/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f9b46cee676bdd2bf7522f0082670475aca22bccf15f6cef761b1a4b902cc54 +size 15429 diff --git a/output_grpo_dr/checkpoint-1100/model-00001-of-00002.safetensors b/output_grpo_dr/checkpoint-1100/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4d036fba45ca0ac2df8713343f49325324b2daa7 --- /dev/null +++ b/output_grpo_dr/checkpoint-1100/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:680cbc117738b7559a3df4afc3db6ab34b297c0657aaf0cb6bbe556b7869210c +size 4997750760 diff --git a/output_grpo_dr/checkpoint-1100/tokenizer.json b/output_grpo_dr/checkpoint-1100/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6c06829626bf44f72f0bad94469cb071bd4a18ab --- /dev/null +++ b/output_grpo_dr/checkpoint-1100/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5eee858c5123a4279c3e1f7b81247343f356ac767940b2692a928ad929543214 +size 11422063 diff --git a/output_grpo_dr/checkpoint-1300/model-00002-of-00002.safetensors b/output_grpo_dr/checkpoint-1300/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b49192c24f52e6e93cf037df1c8a65d7dd123f0d --- /dev/null +++ b/output_grpo_dr/checkpoint-1300/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:395d886210c9f759d7dfbd559d62b783e1497bfced612a2aa428693ece65d127 +size 4204299316 diff --git a/output_grpo_dr/checkpoint-1400/model-00001-of-00002.safetensors b/output_grpo_dr/checkpoint-1400/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0e6a7288b85a21aefa2a6317449980d78ff509c9 --- /dev/null +++ b/output_grpo_dr/checkpoint-1400/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22900eb85b9ca4757de0b48ebca0f674cffcbd767a989e8e50cf5ab7c41a467e +size 4997750760 diff --git a/output_grpo_dr/checkpoint-1400/model-00002-of-00002.safetensors b/output_grpo_dr/checkpoint-1400/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7fdf374afb0273c3554a7790cbf4af894a55833c --- /dev/null +++ b/output_grpo_dr/checkpoint-1400/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3d886e1902b69d655a7eb636de9ae5746954a66707087036a83aa5ac8eaf629 +size 4204299316 diff --git a/output_grpo_dr/checkpoint-800/model-00001-of-00002.safetensors b/output_grpo_dr/checkpoint-800/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9b5ad0163a821abb1b5da0d5d6f3bc86f6db6435 --- /dev/null +++ b/output_grpo_dr/checkpoint-800/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83c42575cdc827b10e1c31541ab5dffdb003d98bcd6daafb550d5dbdfe7310f1 +size 4997750760 diff --git a/output_grpo_dr/checkpoint-800/model-00002-of-00002.safetensors b/output_grpo_dr/checkpoint-800/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7b30b3c743e47492b059203044625dd634b2d052 --- /dev/null +++ b/output_grpo_dr/checkpoint-800/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ad2221c6cb65b24aa642ff7241f9fb126919dcfed1e05b387f49477403483de +size 4204299316 diff --git a/output_grpo_dr/checkpoint-800/rng_state_3.pth b/output_grpo_dr/checkpoint-800/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..e7db2534bde9e50fe23f89944bd3b101413254ae --- /dev/null +++ b/output_grpo_dr/checkpoint-800/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b71359733974895f411b76eaaec3dabaf0dbbd6d3fd108072540478fe7fb2cb0 +size 15429 diff --git a/output_grpo_dr/checkpoint-900/model-00001-of-00002.safetensors b/output_grpo_dr/checkpoint-900/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..52c81951ac4957154dfca7db7c5da3fea7abcd1d --- /dev/null +++ b/output_grpo_dr/checkpoint-900/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a25508ebbabb8ba17f52ba6526b137be22b258e8e757ae17ad0bd8766266908 +size 4997750760 diff --git a/output_grpo_dr/checkpoint-900/tokenizer.json b/output_grpo_dr/checkpoint-900/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6c06829626bf44f72f0bad94469cb071bd4a18ab --- /dev/null +++ b/output_grpo_dr/checkpoint-900/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5eee858c5123a4279c3e1f7b81247343f356ac767940b2692a928ad929543214 +size 11422063 diff --git a/output_grpo_dr_3d/checkpoint-1000/model-00002-of-00002.safetensors b/output_grpo_dr_3d/checkpoint-1000/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..17b6f8fc94d60556e3604b19d17a9ebdfdd25cac --- /dev/null +++ b/output_grpo_dr_3d/checkpoint-1000/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:812b28d1f5ff46c84c9e976d30ec925fe01936115cea4a1b369715dc573351b4 +size 4204299316 diff --git a/output_grpo_dr_3d/checkpoint-1100/model-00002-of-00002.safetensors b/output_grpo_dr_3d/checkpoint-1100/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6129c967d36f18f0954d0f18c7df6d41e932648c --- /dev/null +++ b/output_grpo_dr_3d/checkpoint-1100/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f137a8dfd2f50c4a32764fd64b73f8f977f238351cf2b0b2fbc187d4126a08a +size 4204299316 diff --git a/output_grpo_dr_3d/checkpoint-1100/training_args.bin b/output_grpo_dr_3d/checkpoint-1100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b49e8af5ee0a126e23946dd4714808ddd1206038 --- /dev/null +++ b/output_grpo_dr_3d/checkpoint-1100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afa311557cf9efa2f86bc63bb87a85e7f7bcfcd9f525399a1083327afb0d1234 +size 7185 diff --git a/output_grpo_dr_3d/checkpoint-1200/model-00002-of-00002.safetensors b/output_grpo_dr_3d/checkpoint-1200/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c6de31f87fa633c4167a3fbecbe85b319853c797 --- /dev/null +++ b/output_grpo_dr_3d/checkpoint-1200/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0db34d623762be96ee8a82648b2b67ea8d994102a68d1ed0eb0b4dd1a290c72f +size 4204299316 diff --git a/output_grpo_dr_3d/checkpoint-1300/model-00002-of-00002.safetensors b/output_grpo_dr_3d/checkpoint-1300/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5f40dc8e68e08d0245719270e6ec1f2fb6f636ad --- /dev/null +++ b/output_grpo_dr_3d/checkpoint-1300/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8d3f3a5710118f504e6e5c79157d5f48ac685f79d552c48a7472819daaa57a9 +size 4204299316 diff --git a/output_grpo_dr_3d/checkpoint-900/model-00002-of-00002.safetensors b/output_grpo_dr_3d/checkpoint-900/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7b7aa59be41a0a79f8586f9c9af854230ec1c41d --- /dev/null +++ b/output_grpo_dr_3d/checkpoint-900/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21477957c2422b5d265e2feee010b20fbc70b2a0cc30ca7c4033540659a9dc21 +size 4204299316 diff --git a/output_grpo_dr_geo/checkpoint-1000/model-00002-of-00002.safetensors b/output_grpo_dr_geo/checkpoint-1000/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..069330bb85d11bbd2aa95a367f466258d3a76dec --- /dev/null +++ b/output_grpo_dr_geo/checkpoint-1000/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d036e0cc91b64b2023d4dad4664b511c95b63c2e26121f87d8155825948c7d3 +size 4204299316 diff --git a/output_grpo_dr_geo/checkpoint-1050/model-00002-of-00002.safetensors b/output_grpo_dr_geo/checkpoint-1050/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..966733162f486ed4a73ac51b88c9c8f3402911f2 --- /dev/null +++ b/output_grpo_dr_geo/checkpoint-1050/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa4a0f99ddc9a15e67aefc0cda51633781e062e91f6c44b61f0a80cfdc6dde0a +size 4204299316 diff --git a/output_grpo_dr_geo/checkpoint-1050/rng_state_0.pth b/output_grpo_dr_geo/checkpoint-1050/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..90932bb4f36cf5b1b342a4d1514ed068b236bdac --- /dev/null +++ b/output_grpo_dr_geo/checkpoint-1050/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31b33507d8634b3f7dd88f30a5d4d39d09bdc5dbe6a33a0162004750328dc4dd +size 15429 diff --git a/output_grpo_dr_geo/checkpoint-1050/rng_state_1.pth b/output_grpo_dr_geo/checkpoint-1050/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..7d1fa789f384c1ce7c45cdee85c4c60877bfc889 --- /dev/null +++ b/output_grpo_dr_geo/checkpoint-1050/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9577ef2adffc61e7bbde2be5f38d3a40a7ad9e1dfd4f5170a61b03c0c6f775c8 +size 15365 diff --git a/output_grpo_dr_geo/checkpoint-1050/rng_state_2.pth b/output_grpo_dr_geo/checkpoint-1050/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..2566076e015b2400b9c83dc911be96714a6e99e4 --- /dev/null +++ b/output_grpo_dr_geo/checkpoint-1050/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb7060a4eaa3a6c201a967338f576de35103d38ff7078322757e78fa8dbc71ec +size 15429 diff --git a/output_grpo_dr_geo/checkpoint-1050/rng_state_3.pth b/output_grpo_dr_geo/checkpoint-1050/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..e41681f2227b5bb011325d1d6be238505c63043a --- /dev/null +++ b/output_grpo_dr_geo/checkpoint-1050/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc2dd0592fada6c4df6c221ec8101e1c38b96b85e4f91a1fc9eb2df6d379f61a +size 15429 diff --git a/output_grpo_dr_geo/checkpoint-1050/scheduler.pt b/output_grpo_dr_geo/checkpoint-1050/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..fb225073f2b2375e275bed6bc88fdc3884b54f1c --- /dev/null +++ b/output_grpo_dr_geo/checkpoint-1050/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dce64762b314982b19e68f949b6f2a9c28cf6dd5c771b583b7098dbe7f964cdc +size 1465 diff --git a/output_grpo_dr_geo/checkpoint-1050/training_args.bin b/output_grpo_dr_geo/checkpoint-1050/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..85a2478b9c32fbf1ff02a7113c4289adaf987a3e --- /dev/null +++ b/output_grpo_dr_geo/checkpoint-1050/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47fe018114038aef2d75bbd35b63d2812436ef8380fbe46f60e29543723244e5 +size 7185 diff --git a/output_grpo_dr_geo/checkpoint-1100/model-00002-of-00002.safetensors b/output_grpo_dr_geo/checkpoint-1100/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3a03c13fa447a868203ced647dc342c35645ce35 --- /dev/null +++ b/output_grpo_dr_geo/checkpoint-1100/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc6d71293b9d90598277a4a15271e91e1d7d25626821e6793c19dfe00f688e74 +size 4204299316 diff --git a/output_grpo_dr_geo/checkpoint-1150/model-00002-of-00002.safetensors b/output_grpo_dr_geo/checkpoint-1150/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..479023da525d0b77699f15266d6f187ec8cdef67 --- /dev/null +++ b/output_grpo_dr_geo/checkpoint-1150/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a4746fa9302233585f13b9990ff7fb61545d72b7b38369d44462cbd9f0e0d68 +size 4204299316 diff --git a/output_grpo_dr_geo/checkpoint-1200/model-00002-of-00002.safetensors b/output_grpo_dr_geo/checkpoint-1200/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..206db4d004be33737302112832a58fad533ef57c --- /dev/null +++ b/output_grpo_dr_geo/checkpoint-1200/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdfb202fdac7faaf2043ccec84d2746ad759b10bbc78f3bf0425bef90d67fdf6 +size 4204299316 diff --git a/output_grpo_dr_geo/checkpoint-1200/rng_state_2.pth b/output_grpo_dr_geo/checkpoint-1200/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..a693b7e16d952be0ddf1f6fa3eb9e1a43ac1550a --- /dev/null +++ b/output_grpo_dr_geo/checkpoint-1200/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6d2b4dfff288e8a303da4165a47d8cc83ece40dbcdcef9ff6252e508b663f7a +size 15429 diff --git a/output_grpo_dr_geo/checkpoint-1200/rng_state_3.pth b/output_grpo_dr_geo/checkpoint-1200/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..03714b99f4d325d4e181bb61c0f9f5bebb9e8edc --- /dev/null +++ b/output_grpo_dr_geo/checkpoint-1200/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08845f606ae0bdcc61c8535761d7ef0f71600926136c75502716429ee926c6bb +size 15429 diff --git a/output_grpo_dr_geo/checkpoint-1200/training_args.bin b/output_grpo_dr_geo/checkpoint-1200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..85a2478b9c32fbf1ff02a7113c4289adaf987a3e --- /dev/null +++ b/output_grpo_dr_geo/checkpoint-1200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47fe018114038aef2d75bbd35b63d2812436ef8380fbe46f60e29543723244e5 +size 7185 diff --git a/output_grpo_dr_geo/checkpoint-800/model-00002-of-00002.safetensors b/output_grpo_dr_geo/checkpoint-800/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8f9487da618d35b59d254b845bf1d09331535e82 --- /dev/null +++ b/output_grpo_dr_geo/checkpoint-800/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc6a6121d7f94fd179f539752b5134d3fb44f541cc2950c0f8c332bd47141631 +size 4204299316 diff --git a/output_grpo_dr_geo/checkpoint-850/model-00002-of-00002.safetensors b/output_grpo_dr_geo/checkpoint-850/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7e2a738c8b78249e8b948af5ac48279df4aca879 --- /dev/null +++ b/output_grpo_dr_geo/checkpoint-850/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae8c60a3f8d4a5f4e3259d5341931432141e325ece5a23416560d4cb1937372e +size 4204299316 diff --git a/output_grpo_dr_geo/checkpoint-900/model-00001-of-00002.safetensors b/output_grpo_dr_geo/checkpoint-900/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..48b04b18790e962a38059801aa9952dd303a529b --- /dev/null +++ b/output_grpo_dr_geo/checkpoint-900/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbd9d26bde97931d33635203af820ebdf14564deae71810c80045f626463ba57 +size 4997750760 diff --git a/output_grpo_dr_geo/checkpoint-900/model-00002-of-00002.safetensors b/output_grpo_dr_geo/checkpoint-900/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..85548cb73511725b8d101f87f864d6e963a64f24 --- /dev/null +++ b/output_grpo_dr_geo/checkpoint-900/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:312cf7ac29731fe7cbf36cc1ee0c76c9273291efc3838869d28ff743a357be49 +size 4204299316 diff --git a/output_grpo_v3/checkpoint-1000/model-00002-of-00002.safetensors b/output_grpo_v3/checkpoint-1000/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a5fdc77274a89e94ff8928f907f46305d5b490d1 --- /dev/null +++ b/output_grpo_v3/checkpoint-1000/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2eb0db124deee70474117a2f527a4833c67e526569a0f11da09ec966aec22b9 +size 4204299316 diff --git a/output_grpo_v3/checkpoint-1100/model-00002-of-00002.safetensors b/output_grpo_v3/checkpoint-1100/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fb454211e9c1a0304edb62062415fbbd54e121d4 --- /dev/null +++ b/output_grpo_v3/checkpoint-1100/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da80d04212f507a88738d8b9b25a044646e0544101f30133c368c580b7c8722b +size 4204299316 diff --git a/output_grpo_v3/checkpoint-1200/model-00002-of-00002.safetensors b/output_grpo_v3/checkpoint-1200/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..631382a72591f812dba9f0c12178966fe28fad65 --- /dev/null +++ b/output_grpo_v3/checkpoint-1200/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10eac37087c1719383bf8b2f57ca8b8476964f381f65340750cf6ab3658ab958 +size 4204299316 diff --git a/output_grpo_v3/checkpoint-600/model-00002-of-00002.safetensors b/output_grpo_v3/checkpoint-600/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5a2155fad191579db972642c6e41fa7e03adedd5 --- /dev/null +++ b/output_grpo_v3/checkpoint-600/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ae078dfb65f3aeb16985c61a7bbb6df0bc9a76a16c19e9f0e635a978ebc3931 +size 4204299316 diff --git a/output_grpo_v3/checkpoint-700/model-00002-of-00002.safetensors b/output_grpo_v3/checkpoint-700/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..55118f83fc7685bcdc9bf7c3991e2cc872e65e8f --- /dev/null +++ b/output_grpo_v3/checkpoint-700/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5584681f78f300e67ded9397f69982da0d6e2e43c8c042848232c0215d179767 +size 4204299316 diff --git a/output_grpo_v3/checkpoint-800/model-00002-of-00002.safetensors b/output_grpo_v3/checkpoint-800/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..20147e2c1c809f7d695cc500df12127d18ac53a3 --- /dev/null +++ b/output_grpo_v3/checkpoint-800/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddcca8cb180b508536f7be7435b94b832eb04b5d0c75fde82fddb1320f68a0f8 +size 4204299316 diff --git a/output_grpo_v3/checkpoint-900/model-00002-of-00002.safetensors b/output_grpo_v3/checkpoint-900/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7c46bac4824b3b8a620346e7895b0089fbe37142 --- /dev/null +++ b/output_grpo_v3/checkpoint-900/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e161cc76b723d1174d702cbc696c8e62925bde2a0678300e5a11c26b77a1702 +size 4204299316 diff --git a/output_grpo_vcs/checkpoint-1000/model-00002-of-00002.safetensors b/output_grpo_vcs/checkpoint-1000/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..05a5c758cd790492a141c6deb06ac401020bc0b5 --- /dev/null +++ b/output_grpo_vcs/checkpoint-1000/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c305b272613431249456c001a923ac1c8c2c6a5fb2bc0faaca8e2bddc2acbf35 +size 4204299316 diff --git a/output_grpo_vcs/checkpoint-1000/rng_state_0.pth b/output_grpo_vcs/checkpoint-1000/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..5855cb8a5623713b3a60856aa46ccc37df5e82d3 --- /dev/null +++ b/output_grpo_vcs/checkpoint-1000/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1facbdecf0f3bd3f1acdda7dc42ad63975720c125c1b60cd38c940926a7fe50 +size 15429 diff --git a/output_grpo_vcs/checkpoint-1000/rng_state_1.pth b/output_grpo_vcs/checkpoint-1000/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..ff28b8d759376374bc0dd554618b8e427d36e124 --- /dev/null +++ b/output_grpo_vcs/checkpoint-1000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4c449e9aabba782fbfedfdef9a6e898ec404173ae53a600626e2f38ff22de96 +size 15365 diff --git a/output_grpo_vcs/checkpoint-1000/rng_state_2.pth b/output_grpo_vcs/checkpoint-1000/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..5d9e6c0c16ef74a050c6b8e0d6ac71338211d4a1 --- /dev/null +++ b/output_grpo_vcs/checkpoint-1000/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60aa7dfeed0ea8fbad3bf9afcf8e9f0380633a9b0a92a7145642379499eecd27 +size 15429 diff --git a/output_grpo_vcs/checkpoint-1000/rng_state_3.pth b/output_grpo_vcs/checkpoint-1000/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..3a515f0fa65680e5487b106d8daf47ed3a8dbf94 --- /dev/null +++ b/output_grpo_vcs/checkpoint-1000/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6e8f9e9f0c6afbf5a50507f45caa36792331c4821a4481a7b0f558f4a6761d6 +size 15429 diff --git a/output_grpo_vcs/checkpoint-1000/scheduler.pt b/output_grpo_vcs/checkpoint-1000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..5659fd2f43e6d843a643199c5743cccf0eba483f --- /dev/null +++ b/output_grpo_vcs/checkpoint-1000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa0fff554d7e8f71bdefe32e1ea2f3f758997db68d05f0a06742a57e43c6fd62 +size 1465 diff --git a/output_grpo_vcs/checkpoint-1000/training_args.bin b/output_grpo_vcs/checkpoint-1000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a28ae0615ed3c1a5b22c6c1dc542a24506fe60bc --- /dev/null +++ b/output_grpo_vcs/checkpoint-1000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88a2c2f057924de9e2ad0dd99b335e7ef923d2eddf1f7145d6292704ac8049a0 +size 7185 diff --git a/output_grpo_vcs/checkpoint-1100/model-00002-of-00002.safetensors b/output_grpo_vcs/checkpoint-1100/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8f048d0306234b015558ce73d62ccd40b4680317 --- /dev/null +++ b/output_grpo_vcs/checkpoint-1100/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf16564a2d638e7eae56134c3c667e28bcd5f1c03fee88c358888305ffe6f26a +size 4204299316 diff --git a/output_grpo_vcs/checkpoint-1100/rng_state_2.pth b/output_grpo_vcs/checkpoint-1100/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..43ba554474b233ee9ebff21890cfd8bcce91726e --- /dev/null +++ b/output_grpo_vcs/checkpoint-1100/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:794dba5bdb8a4126c99ac58a13d7523643b0f2b8fcd973408dac0a7e136ddbec +size 15429 diff --git a/output_grpo_vcs/checkpoint-1100/rng_state_3.pth b/output_grpo_vcs/checkpoint-1100/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..6d1c2b04252ba8a6eec36421a5524af7016482ab --- /dev/null +++ b/output_grpo_vcs/checkpoint-1100/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69eab728f7c017504d2d96875db022ccf74e10f03d9d441ad7c9c423d1d2903d +size 15429 diff --git a/output_grpo_vcs/checkpoint-1100/scheduler.pt b/output_grpo_vcs/checkpoint-1100/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..db0b9aa5f46b22419203830aa0ab1f7ad8dfab86 --- /dev/null +++ b/output_grpo_vcs/checkpoint-1100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7b6f71acfa7e43eb5c67a42ef4e74c5ec851d7c63bb7ca960fe319d36506415 +size 1465 diff --git a/output_grpo_vcs/checkpoint-600/model-00002-of-00002.safetensors b/output_grpo_vcs/checkpoint-600/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fae2f8d5d4c71a1977fe17d1154f7128888745dc --- /dev/null +++ b/output_grpo_vcs/checkpoint-600/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b6b1f641c6180248350c3a15b253b1b2feed4ec1b849920fc4a5db923969a1c +size 4204299316 diff --git a/output_grpo_vcs/checkpoint-600/rng_state_3.pth b/output_grpo_vcs/checkpoint-600/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..996a4648f91fbe173b4737da4fbc63c42b0bf79f --- /dev/null +++ b/output_grpo_vcs/checkpoint-600/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a96aa6f16f19aa928873d992712513b2c11d730df5480251865daa9457f0cfe3 +size 15429 diff --git a/output_grpo_vcs/checkpoint-600/scheduler.pt b/output_grpo_vcs/checkpoint-600/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..4fe45f5bfb9ecb2ff23aa18266ea8b10c0577d13 --- /dev/null +++ b/output_grpo_vcs/checkpoint-600/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d57c9beaf0f051bc807bbe5a3450ed44164252fdec96d18ac7e1c2a4d5692203 +size 1465 diff --git a/output_grpo_vcs/checkpoint-700/model-00002-of-00002.safetensors b/output_grpo_vcs/checkpoint-700/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..79a2eaf85dada367d36e6af998260783b7b77f5e --- /dev/null +++ b/output_grpo_vcs/checkpoint-700/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9649451dc94cec56beaf493e9946e4a3243fe2633e8586c691c5ddb417d6dd1e +size 4204299316 diff --git a/output_grpo_vcs/checkpoint-700/rng_state_0.pth b/output_grpo_vcs/checkpoint-700/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..0326d5a1f76480e5d082bb0eb29ef35e552ae819 --- /dev/null +++ b/output_grpo_vcs/checkpoint-700/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb6bd41662080d9c4b78eb02341746c672e375268839fc5747ab4281023d5453 +size 15429 diff --git a/output_grpo_vcs/checkpoint-700/rng_state_1.pth b/output_grpo_vcs/checkpoint-700/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..09e8b658970a2e8ea75ca21e1bacaf11b63af854 --- /dev/null +++ b/output_grpo_vcs/checkpoint-700/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e503634951ff1ae71480362e24c3cdefc2da36ae89e549ad96949ff1e51b4907 +size 15365 diff --git a/output_grpo_vcs/checkpoint-700/rng_state_2.pth b/output_grpo_vcs/checkpoint-700/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..bc493cbfdaa2d6537219082b7369c629a1f4551f --- /dev/null +++ b/output_grpo_vcs/checkpoint-700/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd648465ca7f41d6dd3ddd9fae07ce5d3bb672705dc6c586bff5239a4c6b63cd +size 15429 diff --git a/output_grpo_vcs/checkpoint-700/rng_state_3.pth b/output_grpo_vcs/checkpoint-700/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..dc7005f7d24631f9b3aada133f9d7d7984ebc04d --- /dev/null +++ b/output_grpo_vcs/checkpoint-700/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40a8f204d144692c787c69e973de5d390dae9b2f069f84aa1a3d2c93f1ee3483 +size 15429 diff --git a/output_grpo_vcs/checkpoint-700/scheduler.pt b/output_grpo_vcs/checkpoint-700/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..85283fa0ba79a32ae2aec74c71ef80cc7d4d21bc --- /dev/null +++ b/output_grpo_vcs/checkpoint-700/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90e314f6e05efbbc54ef33f2679e87f58a2e487f0b6d2680b51306ae2cbea04d +size 1465 diff --git a/output_grpo_vcs/checkpoint-700/training_args.bin b/output_grpo_vcs/checkpoint-700/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a28ae0615ed3c1a5b22c6c1dc542a24506fe60bc --- /dev/null +++ b/output_grpo_vcs/checkpoint-700/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88a2c2f057924de9e2ad0dd99b335e7ef923d2eddf1f7145d6292704ac8049a0 +size 7185 diff --git a/output_grpo_vcs/checkpoint-800/model-00001-of-00002.safetensors b/output_grpo_vcs/checkpoint-800/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e27fa269740a855f42eb3554e5b4339269b447b0 --- /dev/null +++ b/output_grpo_vcs/checkpoint-800/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa04b173c548c7efb210335e18f4b11138aa3967cb90e62927ff4a1f6b43d690 +size 4997750760 diff --git a/output_grpo_vcs/checkpoint-800/model-00002-of-00002.safetensors b/output_grpo_vcs/checkpoint-800/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b357016dfbfc8380374745d605d0feeb80c896df --- /dev/null +++ b/output_grpo_vcs/checkpoint-800/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef0115090efc9883c89c275924333ca7d03c2791d57de52de93be61c0cba0cb5 +size 4204299316 diff --git a/output_grpo_vcs/checkpoint-800/rng_state_0.pth b/output_grpo_vcs/checkpoint-800/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..81a1dc1adb4a43f5067259da3ead6b21531a6871 --- /dev/null +++ b/output_grpo_vcs/checkpoint-800/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27d41e32b39b0d03acb5ab850cc2e5e773d4880e047ceb5ad82fb2c64cd26e75 +size 15429 diff --git a/output_grpo_vcs/checkpoint-800/rng_state_1.pth b/output_grpo_vcs/checkpoint-800/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..68947da8655093d40341fdd96e9b6d39d1e36335 --- /dev/null +++ b/output_grpo_vcs/checkpoint-800/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:690217ca8d45a93402e69a9baf41f35e9d1b6cde5b7560df1b41b70a6683b015 +size 15365 diff --git a/output_grpo_vcs/checkpoint-800/rng_state_2.pth b/output_grpo_vcs/checkpoint-800/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..19db67bc04a3472574238ff12bdd7ff8ef091cc4 --- /dev/null +++ b/output_grpo_vcs/checkpoint-800/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d2bcdd6fc5e56c309583e27c374bbc4f78ccbd4e93c2810d0af704aad15f6cb +size 15429 diff --git a/output_grpo_vcs/checkpoint-800/rng_state_3.pth b/output_grpo_vcs/checkpoint-800/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..4ada884c4bd650612757e86369f8d8723a662dc8 --- /dev/null +++ b/output_grpo_vcs/checkpoint-800/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff83ddaca2ecd582c7cf15d9b554da98dc47d9df74a67ae09d8dcc2e3f187b48 +size 15429 diff --git a/output_grpo_vcs/checkpoint-800/tokenizer.json b/output_grpo_vcs/checkpoint-800/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6c06829626bf44f72f0bad94469cb071bd4a18ab --- /dev/null +++ b/output_grpo_vcs/checkpoint-800/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5eee858c5123a4279c3e1f7b81247343f356ac767940b2692a928ad929543214 +size 11422063 diff --git a/output_grpo_vcs/checkpoint-800/training_args.bin b/output_grpo_vcs/checkpoint-800/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a28ae0615ed3c1a5b22c6c1dc542a24506fe60bc --- /dev/null +++ b/output_grpo_vcs/checkpoint-800/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88a2c2f057924de9e2ad0dd99b335e7ef923d2eddf1f7145d6292704ac8049a0 +size 7185 diff --git a/output_grpo_vcs/checkpoint-900/model-00002-of-00002.safetensors b/output_grpo_vcs/checkpoint-900/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9d827ca71a46afa18c94eaf61c9db71e38c441a0 --- /dev/null +++ b/output_grpo_vcs/checkpoint-900/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64d22293842739b1473e17bcb957f0ba1b2967d94a7d1e0eeec441b30a4b5c62 +size 4204299316 diff --git a/output_grpo_vcs/checkpoint-900/rng_state_2.pth b/output_grpo_vcs/checkpoint-900/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..4bc2d610bc90213f22c96fd8e7e2e00b845deb16 --- /dev/null +++ b/output_grpo_vcs/checkpoint-900/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1b5d2fb6fc62bf940922d7edbd8a4b9b8af51ef59aaab56343f80602a18244a +size 15429