diff --git a/dapo_lorafa_20251201_161746/checkpoint-128/global_step128/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/dapo_lorafa_20251201_161746/checkpoint-128/global_step128/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..201d56f3aceb7c3d23a2a917d6d97bbb4b90d1a9 --- /dev/null +++ b/dapo_lorafa_20251201_161746/checkpoint-128/global_step128/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:644c597bdb095cf1da3bd28aaa537ff55d2793b1d7708b5f2a81817f1f0a791b +size 61940613 diff --git a/dapo_lorafa_20251201_161746/checkpoint-128/global_step128/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/dapo_lorafa_20251201_161746/checkpoint-128/global_step128/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b3869136e585cc9c3479b66434eabd0da29c429e --- /dev/null +++ b/dapo_lorafa_20251201_161746/checkpoint-128/global_step128/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ffa62b6cc424458ab9fac8ce9368c1e9a662074a816e92589ba67e4c59ead87 +size 61940677 diff --git a/dapo_lorafa_20251201_161746/checkpoint-128/global_step128/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/dapo_lorafa_20251201_161746/checkpoint-128/global_step128/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e6b49ed351b8a08169200dc9e6e2a5c3446163ed --- /dev/null +++ b/dapo_lorafa_20251201_161746/checkpoint-128/global_step128/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0cf2ff5c54226f6bff7194b1d9d4ebea31b5b867ea2bfbdf08ee342a1b2769b +size 61940677 diff --git a/dapo_lorafa_20251201_161746/checkpoint-192/global_step192/mp_rank_00_model_states.pt b/dapo_lorafa_20251201_161746/checkpoint-192/global_step192/mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e455f6edcc8a2e0ec5fb0b45da3d1f0f363301d1 --- /dev/null +++ b/dapo_lorafa_20251201_161746/checkpoint-192/global_step192/mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd25a2a8145904fd350fd94ac1547dd288a6c28fa91a1c56ae6d5cc8d89255fa +size 41536409 diff --git a/dapo_lorafa_20251201_161746/checkpoint-256/global_step256/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/dapo_lorafa_20251201_161746/checkpoint-256/global_step256/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..de3f01d24f3162a9a1d7a6ff20b0cf1214d2fe63 --- /dev/null +++ b/dapo_lorafa_20251201_161746/checkpoint-256/global_step256/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fe3c2fbbfeb574c1e0dfcab123d27ec10cab547cbafe198c3d212def07885b1 +size 61940613 diff --git a/dapo_lorafa_20251201_161746/checkpoint-256/global_step256/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/dapo_lorafa_20251201_161746/checkpoint-256/global_step256/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..dfa7fc0092b373f7ea9697ead7fb17db652f44b8 --- /dev/null +++ b/dapo_lorafa_20251201_161746/checkpoint-256/global_step256/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0898a6e6e9751e35bbe2c443375e95dd2211881b0ccd832ad823ec093ec33b4 +size 61940677 diff --git a/dapo_lorafa_20251201_161746/checkpoint-320/global_step320/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/dapo_lorafa_20251201_161746/checkpoint-320/global_step320/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e416c42b63c9264a7bf130e18d61d4b0bd9d3d1b --- /dev/null +++ b/dapo_lorafa_20251201_161746/checkpoint-320/global_step320/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c038598acda61e803af02952d04431e91e524348000ce86f93cb5653997067b4 +size 61940613 diff --git a/dapo_lorafa_20251201_161746/checkpoint-320/global_step320/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/dapo_lorafa_20251201_161746/checkpoint-320/global_step320/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9afe7136c66acfc3ac88530c5962e15d95c4515d --- /dev/null +++ b/dapo_lorafa_20251201_161746/checkpoint-320/global_step320/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9739bc4e031debe110e4afb42eeba1eaaf3f3769aeb9798414aea70b090ce85 +size 61940677 diff --git a/dapo_lorafa_20251201_161746/checkpoint-320/global_step320/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/dapo_lorafa_20251201_161746/checkpoint-320/global_step320/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c908317facec6e4a86adda879cc9c8896c528abf --- /dev/null +++ b/dapo_lorafa_20251201_161746/checkpoint-320/global_step320/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e3dc89373efd09c9b52e3221bb987d4d0b3a7526a5f80de91b14671e075c6bb +size 61940677 diff --git a/dapo_lorafa_20251201_161746/checkpoint-320/global_step320/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/dapo_lorafa_20251201_161746/checkpoint-320/global_step320/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..dc5972148ca9c57404aa8d839ca41da217f35092 --- /dev/null +++ b/dapo_lorafa_20251201_161746/checkpoint-320/global_step320/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1fa333189ecf1d829491a73f674ac6aa41ad441715be2fda0f9e90e7f73d0ac +size 61940677 diff --git a/dapo_lorafa_20251201_161746/checkpoint-320/global_step320/mp_rank_00_model_states.pt b/dapo_lorafa_20251201_161746/checkpoint-320/global_step320/mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..467025536070464031388f903d7142d6bf4f793b --- /dev/null +++ b/dapo_lorafa_20251201_161746/checkpoint-320/global_step320/mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:607b7aaf7336372f52272213c56de86e72ef5e8a52c7da36873743c1237c6a8d +size 41536409 diff --git a/dapo_lorafa_20251201_161746/checkpoint-384/global_step384/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/dapo_lorafa_20251201_161746/checkpoint-384/global_step384/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7cc60df9fbc471a5f249ea8cda224240c7662ea0 --- /dev/null +++ b/dapo_lorafa_20251201_161746/checkpoint-384/global_step384/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:544a69b05b5513348f7b79c745d80143c1914853e6104e54e9f028675160d93c +size 61940677 diff --git a/dapo_lorafa_20251201_161746/checkpoint-448/global_step448/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/dapo_lorafa_20251201_161746/checkpoint-448/global_step448/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6b8af234b9d1997c3b393ca6846a4ce0d201084d --- /dev/null +++ b/dapo_lorafa_20251201_161746/checkpoint-448/global_step448/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9dca9fbe7dab0939e87e53ce32c42fe5cf1a37f9c77e362477d865ea334db334 +size 61940613 diff --git a/dapo_lorafa_20251201_161746/checkpoint-448/global_step448/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/dapo_lorafa_20251201_161746/checkpoint-448/global_step448/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6786968e165844be4c2fe760eff8ddf3ea806612 --- /dev/null +++ b/dapo_lorafa_20251201_161746/checkpoint-448/global_step448/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:762243806dd250eaf9ed2776de6bf63fff51ec19fd5f4ea2665db71ef0a92973 +size 61940677 diff --git a/dapo_lorafa_20251201_161746/checkpoint-512/training_args.bin b/dapo_lorafa_20251201_161746/checkpoint-512/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..63c3421a8b566700bf579035beda17ca249df912 --- /dev/null +++ b/dapo_lorafa_20251201_161746/checkpoint-512/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cd9fc68413aadb7b7d40e9384c483b1d5cb1998f40011e96c2faa43e1188d7d +size 8849 diff --git a/dapo_lorafa_20251201_161746/checkpoint-64/global_step64/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/dapo_lorafa_20251201_161746/checkpoint-64/global_step64/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..83171d9cdee7de621e1217e889c8751bb0653796 --- /dev/null +++ b/dapo_lorafa_20251201_161746/checkpoint-64/global_step64/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:842cb622373141e36101208d29570a85731a43f8c86eea1490306f9a02be3a59 +size 61940613 diff --git a/dapo_lorafa_20251201_161746/checkpoint-64/global_step64/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/dapo_lorafa_20251201_161746/checkpoint-64/global_step64/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..fd704d54b4db40749b40f1e895f1558fc1464b5d --- /dev/null +++ b/dapo_lorafa_20251201_161746/checkpoint-64/global_step64/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09a6d9d13c0d44ee93616a5d29d34cc12ea1eb5dd3753f2d2d67f63d8747ccb2 +size 61940677 diff --git a/dapo_lorafa_20251201_161746/checkpoint-64/global_step64/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/dapo_lorafa_20251201_161746/checkpoint-64/global_step64/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..56732154fc0465fd4aa40767ac9030c8311cdbce --- /dev/null +++ b/dapo_lorafa_20251201_161746/checkpoint-64/global_step64/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c25327ad49f3bb3a647b000c39d08133eb3f988d6363c4e3fe77009493b1d3ad +size 61940677 diff --git a/dapo_lorafa_20251201_161746/checkpoint-64/global_step64/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/dapo_lorafa_20251201_161746/checkpoint-64/global_step64/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5493b33a147eb5a65d2eae19429cd0ad261245b8 --- /dev/null +++ b/dapo_lorafa_20251201_161746/checkpoint-64/global_step64/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4954fb6182b66c8222e9a8e452246fc17bd71b41ef34948fbd5a0ea41c57aba3 +size 61940677 diff --git a/dapo_lorafa_20251201_161746/checkpoint-64/global_step64/mp_rank_00_model_states.pt b/dapo_lorafa_20251201_161746/checkpoint-64/global_step64/mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d2121b877aed22bba97987bdbc0dbd3c29442839 --- /dev/null +++ b/dapo_lorafa_20251201_161746/checkpoint-64/global_step64/mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b236c094fccd9ddb6196696654112810d40ed30b955d89d013afde7c0dceaf7 +size 41536409 diff --git a/dapo_lorafa_20251201_161746/checkpoint-64/rng_state_0.pth b/dapo_lorafa_20251201_161746/checkpoint-64/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..1713785f6dcc2202d366df56ef33eaf5108517de --- /dev/null +++ b/dapo_lorafa_20251201_161746/checkpoint-64/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c879a31e95f5fb10cfe254b014faa195b7cce8e28cb2ae94f68747fa68256a08 +size 15365 diff --git a/dapo_lorafa_20251201_161746/checkpoint-64/rng_state_1.pth b/dapo_lorafa_20251201_161746/checkpoint-64/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..89b9319ebb07f02461cfd2f97a9f2b63f295137d --- /dev/null +++ b/dapo_lorafa_20251201_161746/checkpoint-64/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c91685725653d5a0dffbadb4de3bd61c84b52f2c23528f04899f4fd27a5bcb4 +size 15429 diff --git a/dapo_lorafa_20251201_161746/checkpoint-64/rng_state_2.pth b/dapo_lorafa_20251201_161746/checkpoint-64/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..7cb23b99b8317fd01cf6b12e608555ebfb2eef85 --- /dev/null +++ b/dapo_lorafa_20251201_161746/checkpoint-64/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5c383f8c50a8c3072f75c54b22a026406119d738a9e0edbe017d416f4497377 +size 15429 diff --git a/dapo_lorafa_20251201_161746/checkpoint-64/scheduler.pt b/dapo_lorafa_20251201_161746/checkpoint-64/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..5f18e6fbca95d9546e2f67129f5c8f7ba7da9e16 --- /dev/null +++ b/dapo_lorafa_20251201_161746/checkpoint-64/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93cefd10607223128e502b13dda034a13939769cee1c2b5c47751dc1ef0425c6 +size 1465 diff --git a/dapo_lorafa_20251201_161746/checkpoint-64/training_args.bin b/dapo_lorafa_20251201_161746/checkpoint-64/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..63c3421a8b566700bf579035beda17ca249df912 --- /dev/null +++ b/dapo_lorafa_20251201_161746/checkpoint-64/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cd9fc68413aadb7b7d40e9384c483b1d5cb1998f40011e96c2faa43e1188d7d +size 8849 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-1024/rng_state_0.pth b/dr_grpo_lora_20251130_192918/checkpoint-1024/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..0b9f143aa8812e00284a09ccd9f04f43e57c6ede --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-1024/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25ce972deebcc1fcf83d45c216e83fe4088883444d0bc2879ec50e74caa436ff +size 15365 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-1024/rng_state_1.pth b/dr_grpo_lora_20251130_192918/checkpoint-1024/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..346329cd43e8437fff1ae59c65f90b481893b65d --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-1024/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8e62be9ae2368ac79798de1209e31a0495edf554a6ada9c5c68f2ee240270f1 +size 15429 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-1024/rng_state_2.pth b/dr_grpo_lora_20251130_192918/checkpoint-1024/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..bf7d8b1557ac5cb5608aad6d7c5db4b69add5fe4 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-1024/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af659c175b349d654c1950d520efa914ea02c0036a80543f80076ee2aacd6e67 +size 15429 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-1024/rng_state_3.pth b/dr_grpo_lora_20251130_192918/checkpoint-1024/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..0109cda15dd9d48df907a8c06f249273d44f81db --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-1024/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59aeb8946272021b115bb1dc86f64f42a69ac1f6904c762bf64c25d10bad5fdf +size 15429 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-1024/scheduler.pt b/dr_grpo_lora_20251130_192918/checkpoint-1024/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..99d91d84ee7ce6fc15f8af630d20f12aec7f58eb --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-1024/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77a000cffeb82f679c5eea33f850666a8c70d17499ffc7879cf41417a213f90c +size 1465 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-1024/training_args.bin b/dr_grpo_lora_20251130_192918/checkpoint-1024/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..1ec0bcd20c557848df5808d72f346bb69c0a3c51 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-1024/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83cd5486d5180d58247f637807acbe17122c3efc1e32ddc464dc2fac8f414516 +size 8849 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-128/global_step128/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/dr_grpo_lora_20251130_192918/checkpoint-128/global_step128/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..196c9aecadcb6a73b8b565426b51574f18a19609 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-128/global_step128/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bd2ecd4320299a159f8fb03cbf7645a624c32253c34b8dd2b0415ec0b1c98b3 +size 55408773 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-128/global_step128/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/dr_grpo_lora_20251130_192918/checkpoint-128/global_step128/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..256b2a3df6dd259a0d7cbb1f3aab7790cbe0109e --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-128/global_step128/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69b52659b45e9e2a8cbe33d073952d14d7a39f72c4acfc05ccd85bd3a9d9804a +size 55408837 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-128/rng_state_0.pth b/dr_grpo_lora_20251130_192918/checkpoint-128/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..4303609e4433a063e39bc09faf555b5b4db3fac4 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-128/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c152c773d7cce6dab2a6bcbd3b61c5737c24d92165fe4e1afe2ee264eaecd27d +size 15365 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-128/rng_state_1.pth b/dr_grpo_lora_20251130_192918/checkpoint-128/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..15150cb1ff1c48e32281859479d26b171c17fdca --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-128/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4c4885065d5e5a9fe4a12eb87bc1a477d776f09ccfc9b0923753be5a214e8a5 +size 15429 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-128/rng_state_2.pth b/dr_grpo_lora_20251130_192918/checkpoint-128/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..f9cce4a842c2c91a7f056acbd4a1b57b67ba6f21 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-128/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6d1549d9398e2f9cc306ab8c18682c49f7cf7758ffe5675f2b5959ef8d29e74 +size 15429 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-128/rng_state_3.pth b/dr_grpo_lora_20251130_192918/checkpoint-128/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..cf4905477cf94aafc7d8d20909725c7adec7522b --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-128/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:983eff2b4a6b63e222db38e1b1ec608aca2c27a40f27bee0cbc1723448c6830a +size 15429 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-128/scheduler.pt b/dr_grpo_lora_20251130_192918/checkpoint-128/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6d08ae64295b57090bbaa5d6676253533d838873 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-128/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a72d1751ecd37a23ecd20bd8b9260f394c2f370cb5db8d203bad7d01395f4fb8 +size 1465 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-128/training_args.bin b/dr_grpo_lora_20251130_192918/checkpoint-128/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..1ec0bcd20c557848df5808d72f346bb69c0a3c51 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-128/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83cd5486d5180d58247f637807acbe17122c3efc1e32ddc464dc2fac8f414516 +size 8849 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-192/global_step192/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/dr_grpo_lora_20251130_192918/checkpoint-192/global_step192/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..890cad3b62704be4176c6ab9a2ca2e3d08f022b6 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-192/global_step192/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea6d8eeb20b6dc65cc8752859ac7942aedeea9ee11f2b80ead44992108ec7f7e +size 55408773 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-192/global_step192/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/dr_grpo_lora_20251130_192918/checkpoint-192/global_step192/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e09bb447db85729d59b10e54cd6f77e7c8033c20 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-192/global_step192/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf3b45579570e7446c2219eb7956a583f0612667f676367b63a6f4335d2ce629 +size 55408773 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-192/rng_state_0.pth b/dr_grpo_lora_20251130_192918/checkpoint-192/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..b5a85d50da485eef380c708e156be7a5bfe19548 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-192/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:533290285549bc4b79c7386f8f3f22ebbb67f4cfda309db2f130eb26c00223a8 +size 15365 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-192/rng_state_1.pth b/dr_grpo_lora_20251130_192918/checkpoint-192/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..171b353d957e493ef0dd146c99f772bfe62e5076 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-192/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f73f6a2233ad6f9f64746c0c7acef2a7a686a7f7ef48de915f1823cd75401de2 +size 15429 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-192/rng_state_2.pth b/dr_grpo_lora_20251130_192918/checkpoint-192/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..665fb9e1d6d1a25f1da15c7a5342ba54268ad258 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-192/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a3eadb7d22af3b854b4156dbf8ae28ae5f0c239cf1cb75462aebfa45496d17b +size 15429 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-192/rng_state_3.pth b/dr_grpo_lora_20251130_192918/checkpoint-192/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..9730cbbfa8eb0bcb71c8acb021057bbd21ff0d34 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-192/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:100971cef5b4912667681b8e98d32518272123a8f35b4c3c8061c24cfc49f442 +size 15429 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-192/scheduler.pt b/dr_grpo_lora_20251130_192918/checkpoint-192/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..fb4e8900ca74d271df161df3e78c5e3163d3ca2c --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-192/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ac63e93328a0491bc31c5b67747712626467b0c1fab68bd8f3a90d2ffc2d318 +size 1465 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-192/training_args.bin b/dr_grpo_lora_20251130_192918/checkpoint-192/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..1ec0bcd20c557848df5808d72f346bb69c0a3c51 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-192/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83cd5486d5180d58247f637807acbe17122c3efc1e32ddc464dc2fac8f414516 +size 8849 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-256/adapter_model.safetensors b/dr_grpo_lora_20251130_192918/checkpoint-256/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2123150181fb9471d3ba894e1dd56be2bcf14e0c --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-256/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5244cb998711a9363c559f98cba2381af9c8b9ef76514d373b9917f9ddc29971 +size 36981856 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-256/global_step256/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/dr_grpo_lora_20251130_192918/checkpoint-256/global_step256/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9d32f811f50d62df278ee4b51e9579afdaeaead6 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-256/global_step256/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e401ee12b85180dac0d21a1752ea9293528e8bbeceaae50ab6c04266402f73ab +size 55408837 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-256/global_step256/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/dr_grpo_lora_20251130_192918/checkpoint-256/global_step256/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..471dfc135414334d84420d6e47498b09d89514e1 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-256/global_step256/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b7815373fb918fe3bc9daab1c675a5324bafb12b4a5f75324977f70f546bdc7 +size 55408837 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-256/rng_state_1.pth b/dr_grpo_lora_20251130_192918/checkpoint-256/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..001e1b1de71c5377014ba77a854d5e13a54b3c81 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-256/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46f979da5fdeb49b5b29856741aa9e99f5cf97639ec5b5086480e21534d782c1 +size 15429 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-256/rng_state_2.pth b/dr_grpo_lora_20251130_192918/checkpoint-256/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..8b7191cbcab946808c68a4f6829415c8eed4bbc9 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-256/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98f0787453ce01c42b4334b41c87d6adadda80ba19d7da19df4d2f27d57dd488 +size 15429 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-256/rng_state_3.pth b/dr_grpo_lora_20251130_192918/checkpoint-256/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..06d064b97d52894337ae73ca4d2f99f68aff371a --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-256/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f0bba91b1acd4d1049cb08a60c8113b61a5b01c09d0b8af9c7d3a0023ef93b2 +size 15429 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-256/scheduler.pt b/dr_grpo_lora_20251130_192918/checkpoint-256/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..5da175406e5951634027623c46687436be96a222 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-256/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8495f527d2ae384d255f961a1b604892e1f5459d17e99146a7b64167ecb9365 +size 1465 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-256/training_args.bin b/dr_grpo_lora_20251130_192918/checkpoint-256/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..1ec0bcd20c557848df5808d72f346bb69c0a3c51 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-256/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83cd5486d5180d58247f637807acbe17122c3efc1e32ddc464dc2fac8f414516 +size 8849 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-320/adapter_model.safetensors b/dr_grpo_lora_20251130_192918/checkpoint-320/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bd12993ce27849df8e279d410f3ab34ed1fe68ed --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-320/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3db528ceefbfab66023a6a76411fc652221bab3e217f2739f3db58809e519627 +size 36981856 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-320/global_step320/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/dr_grpo_lora_20251130_192918/checkpoint-320/global_step320/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c5271d6f8fce1ae4a250577ddc6972c4d65b014b --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-320/global_step320/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0934dc8d39d877a8f115f0ded354dbfbce8c4ba984431181c0a2f848cec16e25 +size 55408773 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-320/global_step320/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/dr_grpo_lora_20251130_192918/checkpoint-320/global_step320/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3a47ee9bb5152a402e967c805b441de240553375 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-320/global_step320/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed5d82ebf7619a629a0e2900d67a178a4033ad4f45315c9c783dde06211052c4 +size 55408837 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-320/rng_state_0.pth b/dr_grpo_lora_20251130_192918/checkpoint-320/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..57b1e7d6fa22cfcb35b667722627e269a4446a26 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-320/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edce0585644b488ae83cce9b9006846bb319dadddd29caa73d0afe798ef1361b +size 15365 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-320/rng_state_1.pth b/dr_grpo_lora_20251130_192918/checkpoint-320/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..53d147ea6db28247c7ef081b64c01b336efc034d --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-320/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3934c395cd23510bd93c9600c511c062649b6258ecde470c4eaf90f3af134a39 +size 15429 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-320/rng_state_2.pth b/dr_grpo_lora_20251130_192918/checkpoint-320/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..7cdc2fd1256465dcc75f239623150d87680f9328 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-320/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2272f4076914090faaa6cf986eb4a515171abecaff6e958e466936a2c4547dbe +size 15429 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-320/rng_state_3.pth b/dr_grpo_lora_20251130_192918/checkpoint-320/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..626d730a761cd0a0a635172902b6ba32659540e7 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-320/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86700b544db143a803a4ce2da4430de8747f82447d8b450546f88a7a4fd2e23b +size 15429 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-320/scheduler.pt b/dr_grpo_lora_20251130_192918/checkpoint-320/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..0b635ce1666c50e816d388484f117d6c351b92d3 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-320/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da013ae0b2b98d7666c2dc2fe7c1c09f26c72a818d58bd9b5b49a1d1f432bd5a +size 1465 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-384/global_step384/mp_rank_00_model_states.pt b/dr_grpo_lora_20251130_192918/checkpoint-384/global_step384/mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e5a38ae5a1a05852409d183fe95df706b279e695 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-384/global_step384/mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c6b4dbb51b122e884693c44393b97ac1d64b21997276307455a766598ca9369 +size 37231769 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-384/rng_state_0.pth b/dr_grpo_lora_20251130_192918/checkpoint-384/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..6bc6a0fccd870c56977ae1b6b09c7d151cae9b2c --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-384/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26296607158a59acf09edeb6e005bd809cf2d23faf3e0f21f713d1e21b5a5e4d +size 15365 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-384/rng_state_2.pth b/dr_grpo_lora_20251130_192918/checkpoint-384/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..03ad8b3775d9adfa6aeed951b0c8fbe426d6d39e --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-384/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94cfe7e2c9d9f6bdb39f223e9ece78c3cfd0eb71a4cfb49b88eef83b95e31e17 +size 15429 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-384/rng_state_3.pth b/dr_grpo_lora_20251130_192918/checkpoint-384/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..768019fac45f703e7551e0f233029e5044318ae6 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-384/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ce1f8ad3e77e9f9873169a4bb816683e426618b31aaf9fbd4d0b4e854a41f43 +size 15429 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-384/scheduler.pt b/dr_grpo_lora_20251130_192918/checkpoint-384/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..e35447364346dd4e48ff2912c9630fa0d550dd81 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-384/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a73b3132edc693a316dd18b0560b36c0fb0501d1030453a31a1309122a6bf7c +size 1465 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-384/training_args.bin b/dr_grpo_lora_20251130_192918/checkpoint-384/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..1ec0bcd20c557848df5808d72f346bb69c0a3c51 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-384/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83cd5486d5180d58247f637807acbe17122c3efc1e32ddc464dc2fac8f414516 +size 8849 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-448/global_step448/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/dr_grpo_lora_20251130_192918/checkpoint-448/global_step448/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..600cdda59a316a374ba390ba0dd590a3050a9a19 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-448/global_step448/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88d1d8a67f89226a056b86f25695f7cdcb9c1fb3d08ae41616a508c5b34acbc2 +size 55408837 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-448/rng_state_0.pth b/dr_grpo_lora_20251130_192918/checkpoint-448/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..5a64676b4f58e0f5673cba8eb515e53e698c06cc --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-448/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b30fefba789e9e963bcddf104a5726a9691c1ae35cf6f6d3d1d9768fb8cc65a2 +size 15365 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-448/rng_state_1.pth b/dr_grpo_lora_20251130_192918/checkpoint-448/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..3462400788b392f06bee1eddb2bb54f5a02ee484 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-448/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7201144c610812fb91fe241d9e5313bb64aa9726472282d254f52f892685e0c3 +size 15429 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-448/rng_state_2.pth b/dr_grpo_lora_20251130_192918/checkpoint-448/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..d6ee106a586eb0801efcf4a3420014db8bd6d746 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-448/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:234ab00d34760864e9b21f193c782284cf0294f91da6dceeb12dc44a4ccf7df6 +size 15429 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-448/rng_state_3.pth b/dr_grpo_lora_20251130_192918/checkpoint-448/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..7d7aa58eb2ec588961cc54d35e2ad90e5d5a3c83 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-448/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c985f2ffcff667a004f8f59bb3f90379fd4a3d2a634436014c0521cfa849de25 +size 15429 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-448/scheduler.pt b/dr_grpo_lora_20251130_192918/checkpoint-448/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2a39d5aae7a5de28bbf7c8e26531da9f7f0cda3e --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-448/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89d50e6c2130ded251cbd3fb239e3a2e1e0564766292c69bfb4aa6ba0d973844 +size 1465 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-512/adapter_model.safetensors b/dr_grpo_lora_20251130_192918/checkpoint-512/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..afeb9afd85e3eb49300a32e630f0f5abab112917 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-512/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31129e5d19a2b17dd9f8b34b5f557ef99e15eab98818aabf0d643d529c0d7542 +size 36981856 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-512/global_step512/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/dr_grpo_lora_20251130_192918/checkpoint-512/global_step512/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b0d373732f45cb373200f9a3e4e2da5e5941965d --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-512/global_step512/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a79aea5d854a9b908edf7261e45eeb0749d644df5f252900fab3965afa2c526 +size 55408773 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-512/global_step512/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/dr_grpo_lora_20251130_192918/checkpoint-512/global_step512/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..111e7fe999b6ffce6385b4ff0c64dca1a4fa8609 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-512/global_step512/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f58283878a807cb6f717368c425739ce25562a3d4aaa431ae08bcf3d56b6abd +size 55408773 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-512/global_step512/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/dr_grpo_lora_20251130_192918/checkpoint-512/global_step512/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d5a9dd22923bee8ad55c2dfe8a34ae3d7887474f --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-512/global_step512/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8da71e683f3b50289262b0468d6c65e616fb0fd2dfa586a9feb6a22ecef2300b +size 55408837 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-512/global_step512/mp_rank_00_model_states.pt b/dr_grpo_lora_20251130_192918/checkpoint-512/global_step512/mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..08f41008297d0b412abc43fd1ee2fb6f0d517deb --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-512/global_step512/mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7928edbd7c6373c177c1e01c3dcdae0e81f286ca080848465ea90f163939beb +size 37231769 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-512/training_args.bin b/dr_grpo_lora_20251130_192918/checkpoint-512/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..1ec0bcd20c557848df5808d72f346bb69c0a3c51 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-512/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83cd5486d5180d58247f637807acbe17122c3efc1e32ddc464dc2fac8f414516 +size 8849 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-576/adapter_model.safetensors b/dr_grpo_lora_20251130_192918/checkpoint-576/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bf60551c9f621cdd4ad335dcf51f25612d8c7b8f --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-576/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad8cce3d0c1ec62574109778b86d03e9239ef07b3c4f85b1f5703089c2ed8f19 +size 36981856 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-576/global_step576/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/dr_grpo_lora_20251130_192918/checkpoint-576/global_step576/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..95b56b1bb816263c86338d3bd2851a8f238faea4 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-576/global_step576/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fffb8392bd826e3259ed11eb550251b12b1c48cb0c81a4692e04a74a0ec81dda +size 55408773 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-576/global_step576/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/dr_grpo_lora_20251130_192918/checkpoint-576/global_step576/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..83a294daf4d71cfc35978a1f29855282f6db1321 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-576/global_step576/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d117cb4cccb741729b866824e996a43f98f08c586d6e4e7c2247387380840ef0 +size 55408837 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-576/global_step576/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/dr_grpo_lora_20251130_192918/checkpoint-576/global_step576/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e75b2f6c7dad715f3f2c09f44d71677f2cd70338 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-576/global_step576/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2353d1521f9c46831d4d50442a79b3705d91c266e603c5388fe15389cc41f9d5 +size 55408837 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-576/global_step576/mp_rank_00_model_states.pt b/dr_grpo_lora_20251130_192918/checkpoint-576/global_step576/mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ed040a05b76762fc16417c5d26519d2a79018646 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-576/global_step576/mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bcfe6313a081fabfdc95699e1fab710d01395908448010924f11cdb13aacea0 +size 37231769 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-64/adapter_model.safetensors b/dr_grpo_lora_20251130_192918/checkpoint-64/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..15d38d99a238c15e467ab82444c06fb0697c558a --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-64/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b08ae14e792984894263c25f0f140e5d405c976274a93237dfaeb2d3f105fd86 +size 36981856 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-64/global_step64/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/dr_grpo_lora_20251130_192918/checkpoint-64/global_step64/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..90b20d0a3940c35624c0e47c6edf00a650ae5ce9 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-64/global_step64/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:808b56545734339c192c8bd66712fac777176512f46dfc98b97f8f315af5fb2c +size 55408773 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-64/global_step64/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/dr_grpo_lora_20251130_192918/checkpoint-64/global_step64/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8db0ee4c0880e641e3918d5a23fb47035917ca3a --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-64/global_step64/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad33620bb480786318e2dbb22298890badcabe75f51248d3be11f611c4591526 +size 55408773 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-64/global_step64/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/dr_grpo_lora_20251130_192918/checkpoint-64/global_step64/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..93731203ec20564bac5a60008ed55b43e4cb8d3b --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-64/global_step64/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7de0cc308b29d33a5c05eeb8e9cec3687428dcbbdeedb43483961ac4b9d441f8 +size 55408837 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-64/global_step64/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/dr_grpo_lora_20251130_192918/checkpoint-64/global_step64/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..89aab8f64edc7d2a105cd06d35ebfcc244e7efc4 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-64/global_step64/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3bdaac237af275ea97250a1226d5d9f2c48d6f57116e6820ea3228e52c6eb0e +size 55408837 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-64/global_step64/mp_rank_00_model_states.pt b/dr_grpo_lora_20251130_192918/checkpoint-64/global_step64/mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8ee887b3339410c6c550eb00a4f200abdb9c49b9 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-64/global_step64/mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a416a3363c404f326f3c0164818320bd4b87e13b69031c6b1049b6e4ec72620 +size 37231769 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-640/adapter_model.safetensors b/dr_grpo_lora_20251130_192918/checkpoint-640/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3e49700271dd32ddf759b0257ed86b9cc84910e5 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-640/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b225e7f680d6fbeae5e77776c70fee4053fe9f46873301f8afd000efc48248d +size 36981856 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-640/global_step640/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/dr_grpo_lora_20251130_192918/checkpoint-640/global_step640/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..bb8836f1a999a64ab72f0491b071de7f45d1721b --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-640/global_step640/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4691702c43368ff5f106d6b2d92b7131f65c12b3f55847a5344748ca4f96e62 +size 55408837 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-640/global_step640/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/dr_grpo_lora_20251130_192918/checkpoint-640/global_step640/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8ca9ee5104dcc46f5abe755041e5b10b9ed942cc --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-640/global_step640/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbd61c6174961deb70ff4133b62185ad7127280ad3b9a026726794dd1a35bbe9 +size 55408837 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-640/global_step640/mp_rank_00_model_states.pt b/dr_grpo_lora_20251130_192918/checkpoint-640/global_step640/mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b6ee2abc2b9749a63b600dd8d490e5a26f8c1ed8 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-640/global_step640/mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43020a1d19ae1e87a15e0c61fe7ae637589137b37323d712445ad1f6d89b96a1 +size 37231769 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-704/global_step704/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/dr_grpo_lora_20251130_192918/checkpoint-704/global_step704/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..37d76fe7a59bc01bdcb8eaf74aaa012306dd87d6 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-704/global_step704/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4f4dd35eb7a571d201645def469b783f8a8ae72cc270211a7f78ea2412bfe7d +size 55408773 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-704/global_step704/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/dr_grpo_lora_20251130_192918/checkpoint-704/global_step704/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6d2c5186e54fc69f2b47d9dc257405b41c9e9f02 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-704/global_step704/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb3d6ceb8f6239454e952d1a28ce96d2ab5f736a12c2f5fb8bb9fc64c0670b77 +size 55408773 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-768/global_step768/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/dr_grpo_lora_20251130_192918/checkpoint-768/global_step768/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d9d6cf19ec68879c76ef2c7be7980018ea5243da --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-768/global_step768/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:548b91b7e4db056de7e7f67091ba814ae1ecbd63e0f3af38bda5309eba812170 +size 55408837 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-768/global_step768/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/dr_grpo_lora_20251130_192918/checkpoint-768/global_step768/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c2727992c3165f76606021e73ff69d845764fe70 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-768/global_step768/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70eb72ec1360512058323a01e66ddfb87a64395f97fbdb93766e0354a7894a2c +size 55408837 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-768/scheduler.pt b/dr_grpo_lora_20251130_192918/checkpoint-768/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ba0371866ff4c56375788d8e6b6ac204eca2f3b8 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-768/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c26f336522cb69594b3952d10c9ee7899a17aa4a8eeeb9d1b384b971fa4c183 +size 1465 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-768/training_args.bin b/dr_grpo_lora_20251130_192918/checkpoint-768/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..1ec0bcd20c557848df5808d72f346bb69c0a3c51 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-768/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83cd5486d5180d58247f637807acbe17122c3efc1e32ddc464dc2fac8f414516 +size 8849 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-832/adapter_model.safetensors b/dr_grpo_lora_20251130_192918/checkpoint-832/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4145363a8f8a3cf6529d7ec527246991f91e52ac --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-832/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccee0b0b72e1ac9c7909394520421edd3fa8abd10a10d9cf69441a1912e63791 +size 36981856 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-832/global_step832/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/dr_grpo_lora_20251130_192918/checkpoint-832/global_step832/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..37f058d8e7378e567ca714f2e4bd4a97ce44de30 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-832/global_step832/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db40e587bc1c0df1fb5946a4337523cb29c26fffd47aa39fd72ca57021971027 +size 55408773 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-832/global_step832/mp_rank_00_model_states.pt b/dr_grpo_lora_20251130_192918/checkpoint-832/global_step832/mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1a2d4d5a5fa4a52358d3181b8069b28b283de328 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-832/global_step832/mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e0240a2cd10dd8cf84eaf98de84ff63134e9b441786e27578efab8ada4b1f25 +size 37231769 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-832/rng_state_0.pth b/dr_grpo_lora_20251130_192918/checkpoint-832/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..162b329a81e814f7c24275a8db843f6c8edf23df --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-832/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1133f2478c9d84bfefe91cdaa04262ed3747bd8a3513a48146459ce569a6648 +size 15365 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-832/rng_state_1.pth b/dr_grpo_lora_20251130_192918/checkpoint-832/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..d16898d8fe8369c34f42b6e0b893d19bed4d39cf --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-832/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d5b2dc369e738832a0af8961fdcf3c0acef269d05e535c48ff957d6ad030edf +size 15429 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-832/rng_state_2.pth b/dr_grpo_lora_20251130_192918/checkpoint-832/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..daa3778fe9f8bb77543e64ea35cb463124ab77e9 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-832/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be41002a4507bc6bc0f3b03aee7d394d7df2fae5670a7475a8bc15f5ffc2dac8 +size 15429 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-832/rng_state_3.pth b/dr_grpo_lora_20251130_192918/checkpoint-832/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..972e94f55e0e0acd8fda6d8056bacda199ff1bc3 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-832/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a48e5f0a1ac1393931039215ab870e36e0714e35108de98a92ff01408b153daa +size 15429 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-832/scheduler.pt b/dr_grpo_lora_20251130_192918/checkpoint-832/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8718467d126ce6bdbd53d0dbdd54da4de1ff11bf --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-832/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0d2ef8130fc98df19a3863a629f0a8a22986faed3b54d90a56462143208d671 +size 1465 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-896/adapter_model.safetensors b/dr_grpo_lora_20251130_192918/checkpoint-896/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dec8ea988b4b010c96754ff723234b3e61a888ff --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-896/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d8fc6c1b3146080bd3a313ea52836537fcc36b69838c86476c9afa42eacf158 +size 36981856 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-896/global_step896/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/dr_grpo_lora_20251130_192918/checkpoint-896/global_step896/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..bb2ea81dd1387d836022c47a1b822bf1d769d590 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-896/global_step896/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41d1e43eb487ded6d61355939c9c4cfe1240ed71d488df0dbcd1fda002d605f7 +size 55408773 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-896/global_step896/mp_rank_00_model_states.pt b/dr_grpo_lora_20251130_192918/checkpoint-896/global_step896/mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3e7b592673e005a517fdb399a081f831df10262e --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-896/global_step896/mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:801a25f082a04c4d348ba3972c5ce06687c91257dbdf2db262268e00182f44a9 +size 37231769 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-896/rng_state_0.pth b/dr_grpo_lora_20251130_192918/checkpoint-896/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..a9d2a72d4f9ed3092d812e311f671add5c9ba249 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-896/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:302fadaaf5a8c32263f940448e3aa8cedcaa90495ce7228fd067382df2a1ec3e +size 15365 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-896/rng_state_1.pth b/dr_grpo_lora_20251130_192918/checkpoint-896/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..7f6020ac179c5470f4112eb9ab7d03829f2680e3 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-896/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:585411512936fe89835f2b95f94ff477909c091acc8997a46722f7fbfc73c665 +size 15429 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-896/rng_state_2.pth b/dr_grpo_lora_20251130_192918/checkpoint-896/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..6adf2ecfdf02cb9c0b4acf13584f1e4a026c6c64 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-896/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:726f61ddf3f4a0bb9b7427c5121b57ccffea6feeca454cae656237aedeaae83a +size 15429 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-896/rng_state_3.pth b/dr_grpo_lora_20251130_192918/checkpoint-896/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..f81350520c8d3f2907859aad22e2b3dd2910f8cb --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-896/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fea2fe53b9deac81562503ab550209858b805d996f1b08392a9813dc67467686 +size 15429 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-896/scheduler.pt b/dr_grpo_lora_20251130_192918/checkpoint-896/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..5b4ecb0ccba616e482f4a3ec4b8e0a53368e6fbf --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-896/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bfb7bfa2c34c711580edf233de0f324b5ef962a780aa91f27b4eb576e2588ab +size 1465 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-896/training_args.bin b/dr_grpo_lora_20251130_192918/checkpoint-896/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..1ec0bcd20c557848df5808d72f346bb69c0a3c51 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-896/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83cd5486d5180d58247f637807acbe17122c3efc1e32ddc464dc2fac8f414516 +size 8849 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-960/global_step960/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/dr_grpo_lora_20251130_192918/checkpoint-960/global_step960/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..da8337f12f2927b435f3c959ea30988d0e88870a --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-960/global_step960/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee93770722b66895e3b86478c82c2195951aa65f3932895716db0089ce9c19be +size 55408773 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-960/global_step960/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/dr_grpo_lora_20251130_192918/checkpoint-960/global_step960/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..cba6ed735380116d0901f921cae73c0df00e919f --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-960/global_step960/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:188ed601fd5bfbcfc7ac02c8862a7161f19a04941ee1d5ce72d95223b59f401b +size 55408837 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-960/global_step960/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/dr_grpo_lora_20251130_192918/checkpoint-960/global_step960/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4ac1b21b6fb43725108babe9b619b230cc0d66d5 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-960/global_step960/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96587be0f4a17211b00a6523ccfa896794db15dcff3a6a066909c3a94634aa96 +size 55408837 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-960/rng_state_0.pth b/dr_grpo_lora_20251130_192918/checkpoint-960/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..0a5219588b6abbf3381b515f5eda62bbe709d61f --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-960/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d90bd3e271f5c31ae1b592b3be976897068c3d633af3488d60d1525e3cd70dce +size 15365 diff --git a/dr_grpo_lora_20251130_192918/checkpoint-960/rng_state_2.pth b/dr_grpo_lora_20251130_192918/checkpoint-960/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..24a1c6de096d5106606273e3e5406e4ab7732308 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/checkpoint-960/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62e34e32caad4e8551a68b4b1ce9d5618f1c5bf23ef094ec601bf862a7fa4a51 +size 15429 diff --git a/dr_grpo_lora_20251130_192918/training_args.bin b/dr_grpo_lora_20251130_192918/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..1ec0bcd20c557848df5808d72f346bb69c0a3c51 --- /dev/null +++ b/dr_grpo_lora_20251130_192918/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83cd5486d5180d58247f637807acbe17122c3efc1e32ddc464dc2fac8f414516 +size 8849