diff --git a/.gitattributes b/.gitattributes index 36d21d3466c53b18e51c5b8f28faedcafd36280c..0db10b82d17c075bf43327e9b8a530978a2c00ef 100644 --- a/.gitattributes +++ b/.gitattributes @@ -829,3 +829,35 @@ sft/20250430_010918_sftqwenomni-simple-loraopt-1e-8bs-0.0002/checkpoint-14778/to sft/20250413_225520_sftqwen2audio-simple-loraopt-1e-8bs-0.0002/checkpoint-36356/tokenizer.json filter=lfs diff=lfs merge=lfs -text sft/20250413_225520_sftqwen2audio-simple-loraopt-1e-8bs-0.0002/checkpoint-72712/tokenizer.json filter=lfs diff=lfs merge=lfs -text sft/20250413_225520_sftqwen2audio-simple-loraopt-1e-8bs-0.0002/checkpoint-18178/tokenizer.json filter=lfs diff=lfs merge=lfs -text +grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-12744/tokenizer.json filter=lfs diff=lfs merge=lfs -text +grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-9204/tokenizer.json filter=lfs diff=lfs merge=lfs -text +grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3894/tokenizer.json filter=lfs diff=lfs merge=lfs -text +grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-16284/tokenizer.json filter=lfs diff=lfs merge=lfs -text +grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-4956/tokenizer.json filter=lfs diff=lfs merge=lfs -text +grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-2124/tokenizer.json filter=lfs diff=lfs merge=lfs -text +grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13452/tokenizer.json filter=lfs diff=lfs merge=lfs -text +grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/final_model/tokenizer.json filter=lfs diff=lfs merge=lfs -text +grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-16638/tokenizer.json filter=lfs diff=lfs merge=lfs -text +grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-12390/tokenizer.json filter=lfs diff=lfs merge=lfs -text +grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-4248/tokenizer.json filter=lfs diff=lfs merge=lfs -text +grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-5310/tokenizer.json filter=lfs diff=lfs merge=lfs -text +grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-7788/tokenizer.json filter=lfs diff=lfs merge=lfs -text +grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-6018/tokenizer.json filter=lfs diff=lfs merge=lfs -text +grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-1770/tokenizer.json filter=lfs diff=lfs merge=lfs -text +grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-14160/tokenizer.json filter=lfs diff=lfs merge=lfs -text +grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13098/tokenizer.json filter=lfs diff=lfs merge=lfs -text +grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-17748/tokenizer.json filter=lfs diff=lfs merge=lfs -text +grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-17700/tokenizer.json filter=lfs diff=lfs merge=lfs -text +grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-11682/tokenizer.json filter=lfs diff=lfs merge=lfs -text +grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-2478/tokenizer.json filter=lfs diff=lfs merge=lfs -text +grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-15576/tokenizer.json filter=lfs diff=lfs merge=lfs -text +grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-708/tokenizer.json filter=lfs diff=lfs merge=lfs -text +grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-15222/tokenizer.json filter=lfs diff=lfs merge=lfs -text +grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3186/tokenizer.json filter=lfs diff=lfs merge=lfs -text +grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-1062/tokenizer.json filter=lfs diff=lfs merge=lfs -text +grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-8850/tokenizer.json filter=lfs diff=lfs merge=lfs -text +grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-7434/tokenizer.json filter=lfs diff=lfs merge=lfs -text +grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-9912/tokenizer.json filter=lfs diff=lfs merge=lfs -text +grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13806/tokenizer.json filter=lfs diff=lfs merge=lfs -text +grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3540/tokenizer.json filter=lfs diff=lfs merge=lfs -text +grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-10266/tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/grpo/grpo-20250411_032518_semantic_mc_qa-none-e1-bs4-lr1e-05-20250420_141256/checkpoint-262/global_step262/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/grpo/grpo-20250411_032518_semantic_mc_qa-none-e1-bs4-lr1e-05-20250420_141256/checkpoint-262/global_step262/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..73ef32073616ca55480f11c724b446cc8ad53bee --- /dev/null +++ b/grpo/grpo-20250411_032518_semantic_mc_qa-none-e1-bs4-lr1e-05-20250420_141256/checkpoint-262/global_step262/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02a7b4119e8ecd9e8e396265da52cb92c9de3b82e139639f04dcaa9c4a6b88ca +size 25185531122 diff --git a/grpo/grpo-20250411_032518_semantic_mc_qa-none-e1-bs4-lr1e-05-20250420_141256/checkpoint-262/global_step262/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/grpo/grpo-20250411_032518_semantic_mc_qa-none-e1-bs4-lr1e-05-20250420_141256/checkpoint-262/global_step262/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8b5a5a815d7cde1f1fdc1dadd8f6f25899c2d98e --- /dev/null +++ b/grpo/grpo-20250411_032518_semantic_mc_qa-none-e1-bs4-lr1e-05-20250420_141256/checkpoint-262/global_step262/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23f75f0ac89529f65cefb9c2ae48ed24e1438c53d54e5e6379d0f92cd8f04d62 +size 25185531122 diff --git a/grpo/grpo-20250411_032518_semantic_mc_qa-none-e1-bs4-lr1e-05-20250420_141256/checkpoint-262/global_step262/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/grpo/grpo-20250411_032518_semantic_mc_qa-none-e1-bs4-lr1e-05-20250420_141256/checkpoint-262/global_step262/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..79610802f99f7b4dffb8e0ded1e00830a33347a4 --- /dev/null +++ b/grpo/grpo-20250411_032518_semantic_mc_qa-none-e1-bs4-lr1e-05-20250420_141256/checkpoint-262/global_step262/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6826f2fdd125ea8a8b7356c7f9a8ce55733f451eb2a3171aadda1d37060ea091 +size 25185531122 diff --git a/grpo/grpo-20250411_032518_semantic_mc_qa-none-e1-bs4-lr1e-05-20250420_141256/checkpoint-262/global_step262/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/grpo/grpo-20250411_032518_semantic_mc_qa-none-e1-bs4-lr1e-05-20250420_141256/checkpoint-262/global_step262/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f5870a845b553df920865b86806bfcd82518cc30 --- /dev/null +++ b/grpo/grpo-20250411_032518_semantic_mc_qa-none-e1-bs4-lr1e-05-20250420_141256/checkpoint-262/global_step262/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cd26985f147ac0e8b75f500e640a8a4cc9a7239094b1e6aa2b6999ce17fd544 +size 25185531122 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-10266/adapter_model.safetensors b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-10266/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7448f6383c78f4054d43587befe1a70fef587743 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-10266/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0614da0f6129c7af0b48d5891681a4e5fb3d6a733a92086761e1fe1a9ba4dad4 +size 22056664 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-10266/optimizer.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-10266/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..7c46a1623d4aa92d725401a33bfeeb52a76d8bb0 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-10266/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ff8359a110a610edf4974c03dcfd0a21ee3f3e3906a985fb57387bb3b7a6b41 +size 33662330 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-10266/rng_state_0.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-10266/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..1e576e5c6b0615e0694239e945caf2e7ac7cac72 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-10266/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de08754b0efa0a202857280c7d9bce2f1958d85bb4afd3c43d469f19396da019 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-10266/rng_state_1.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-10266/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..6d506427c8e301911f5d595c6f184ddc87eb15ac --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-10266/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b36dc65dd086d4486849b602e67d8d215389abb165c1bd108d3165aa289b3dbc +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-10266/rng_state_2.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-10266/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..943837fb27070b7ca5b761cd38ca58222710e97f --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-10266/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8ed8a968a380b570bb3a9f4f6d43d40a2ebcc41689c292e18e9028bfd1c5e59 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-10266/scheduler.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-10266/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6f6144a9489666c0a51927d3dd1ccd7e20587a05 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-10266/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ea6506fecbbc2ee9a78553cc1d3c62a976f52e1e4b6cc7c87dd907f2b762e8e +size 1064 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-10266/tokenizer.json b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-10266/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..3061716ad76b796880bdd16b2292c0af65a5039b --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-10266/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e56eaebf99a63995f420cf1d71d671180c414ff523f363350b20c982ef29d5b6 +size 12030943 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-10266/training_args.bin b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-10266/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..882408489c6281f2269d6d43d9a9d289ef447375 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-10266/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:457def82163283f09c55df214fba22a275db57b9415dacd7ba08beda12d56973 +size 6328 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-1062/adapter_model.safetensors b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-1062/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..70a727a6c6e438277895832717defde0a2ab413b --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-1062/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee2a17c1c536b1a5227573387e980d76f1a9ddfed874a84e1645dc4b8b0fdf17 +size 22056664 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-1062/optimizer.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-1062/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..e1217ea93cccee8dea42973d4f6a490f729c4cf2 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-1062/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccaa335f886b202671b73589caad858fc2a7acd7a7736dd50de159eb850bcc7b +size 33662330 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-1062/rng_state_0.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-1062/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..74fcd3fbf3479c1b8ad925b5b7232f77fb554835 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-1062/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b87e1d5e94365d01960c9fd16b4b83c53b461a4b60a24d1f9ea01501ea9fc668 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-1062/rng_state_1.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-1062/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..29529c6e51e2ef109d89eba6f608229132d41560 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-1062/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae7cf50fe80ce15cd455578ecf4c34a8cfc5962ba0c682dbfb11d5b369f105bc +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-1062/rng_state_2.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-1062/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..e33a1609851e793dec8b31dffdf60210a1229959 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-1062/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:289c0d8b5943616be207ab3bc04c764ff21d9b0fcc91cc938c7c52d38687a5dc +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-1062/scheduler.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-1062/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f2b6d99f29e6275de54008184b1b0e9d6266c2de --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-1062/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:911c1036b3a6ca1a25d873a19cd2a53a6cb9ce23ec077bb523a68d3f1da0c69a +size 1064 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-1062/tokenizer.json b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-1062/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..3061716ad76b796880bdd16b2292c0af65a5039b --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-1062/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e56eaebf99a63995f420cf1d71d671180c414ff523f363350b20c982ef29d5b6 +size 12030943 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-1062/training_args.bin b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-1062/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..882408489c6281f2269d6d43d9a9d289ef447375 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-1062/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:457def82163283f09c55df214fba22a275db57b9415dacd7ba08beda12d56973 +size 6328 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-11682/adapter_model.safetensors b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-11682/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1323284257f36466624f4014c903e876b9e2030a --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-11682/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18ad2874633de04004658cb199d76a664474ea2f46be68dcd60f2a65987243fe +size 22056664 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-11682/optimizer.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-11682/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..b8b9f896b2f2f1746681826b5ee06af49afdac1f --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-11682/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d4ad08a47fc0c2848025b0bfddb55e42a78b60a2fddd6947a224b1510736531 +size 33662330 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-11682/rng_state_0.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-11682/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..a567411799380ca1178f0cb1f43e631b68fcccd5 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-11682/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:771adf698c675dccef728fbf7af9eac516fff111f8b4af01f2dabc0af98b2466 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-11682/rng_state_1.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-11682/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..1ef88127b2a95aa6990b107f7f6a5e4d81ecb03c --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-11682/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3ec000ea9b8689593ffdde3ad24cb04e1bd0d938c593866605911cc8ddf3bc1 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-11682/rng_state_2.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-11682/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..60132f5371f4dcbef6aadc96973a9befd7f22c9e --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-11682/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e43b871bfadc492d1410ed0f6493f1c6c0f26bb45ab1c56f896b63431eadd020 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-11682/scheduler.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-11682/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b5e5ad4da925d655396db6fd3157e9b4d77a4d33 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-11682/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3003d44eab6ac4751dd988d046415d4328ca8170e33b8982c3623f026e539eee +size 1064 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-11682/tokenizer.json b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-11682/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..3061716ad76b796880bdd16b2292c0af65a5039b --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-11682/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e56eaebf99a63995f420cf1d71d671180c414ff523f363350b20c982ef29d5b6 +size 12030943 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-11682/training_args.bin b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-11682/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..882408489c6281f2269d6d43d9a9d289ef447375 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-11682/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:457def82163283f09c55df214fba22a275db57b9415dacd7ba08beda12d56973 +size 6328 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-12390/adapter_model.safetensors b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-12390/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c0a085ecff028da87153bedd5bcd165f479391ae --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-12390/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a230809b60012b31605c6a7e0ed29d382e197b39f569d2caabd38605c27c43f +size 22056664 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-12390/optimizer.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-12390/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..f0eff1907f64e1574f03e43b36ce6a05351e7e16 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-12390/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df57f763fbfff876769925a49bbfa72d74db377357f9eb84865b4f76e392a82e +size 33662330 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-12390/rng_state_0.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-12390/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd0dd7b34ea9453cc356848b9a4faa4e0e1a1c63 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-12390/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c821c0407b8d3f3be99791e4444a6759ce738b4f3ce572f65a9aeb58b79b44f8 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-12390/rng_state_1.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-12390/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..e6df2e6258eea54cb75f0af673d3dcb84711bcac --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-12390/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4530cef1c888c0a4852b3f4422128d3c83817895eb7f3421bf0d40d3a6af06dc +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-12390/rng_state_2.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-12390/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..60384adbac7a7c16252183960d181df6c43ff9b3 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-12390/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e6a4ff5b1b257c0dad3d5e4c3a63b82514ac84828e72f55625650a6f143c1a8 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-12390/scheduler.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-12390/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..51e49548b45dfed09805a7e1723ff7ba0305cd6e --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-12390/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:607551ba9e1b299134198eda501311ad24abe2d57285abc8d5ef0af562d761a9 +size 1064 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-12390/tokenizer.json b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-12390/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..3061716ad76b796880bdd16b2292c0af65a5039b --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-12390/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e56eaebf99a63995f420cf1d71d671180c414ff523f363350b20c982ef29d5b6 +size 12030943 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-12390/training_args.bin b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-12390/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..882408489c6281f2269d6d43d9a9d289ef447375 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-12390/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:457def82163283f09c55df214fba22a275db57b9415dacd7ba08beda12d56973 +size 6328 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-12744/rng_state_0.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-12744/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..68979371255f080512a103535946b589ce23aa59 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-12744/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e5de3af5db860092aee92724d7f9da4e0c6ac5fee149d5054034cd1a3c27d8d +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-12744/tokenizer.json b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-12744/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..3061716ad76b796880bdd16b2292c0af65a5039b --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-12744/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e56eaebf99a63995f420cf1d71d671180c414ff523f363350b20c982ef29d5b6 +size 12030943 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13098/adapter_model.safetensors b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13098/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..03cbcb9358ad6b4bcc6ba89862c1fda44c8d943e --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13098/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:153b4377a6f2f62eae26203e07b2165ce50cde030882cc50746db78b238cd651 +size 22056664 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13098/optimizer.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13098/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..1043a22d98217561a0c039c82640517c43e18cea --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13098/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05313e8d379803da7657010d1f83d43a5f21a4eb21a083fb3386b31b909b193e +size 33662330 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13098/rng_state_0.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13098/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..80a5e5377ff9f85d70049def1e7c8f1a5e475609 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13098/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fd9072a2ed170471df6f14ac3bba662633ea15a22a69f4108180194440deafa +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13098/rng_state_1.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13098/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..7f55e13240a3b679cb6068961d9e75b28e02ddb9 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13098/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:daa119eda0a909b0835c66eba7133a806a7ce867b839ad1ac9837822b6663a21 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13098/rng_state_2.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13098/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..0b84c03331167fc0379b8e41c62637b7c0100520 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13098/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72b213b2aa2f53a2d8855cb8a5b4337ddc969931b6afcdbba9d33cb89da6066b +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13098/scheduler.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13098/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..29239fd6dd841094e05facd312a48486e812b2bc --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13098/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58d6bcab03d2532cab897fa32dae91c1e0c5f12cf637107a681c4e9e5a000fb4 +size 1064 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13098/tokenizer.json b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13098/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..3061716ad76b796880bdd16b2292c0af65a5039b --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13098/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e56eaebf99a63995f420cf1d71d671180c414ff523f363350b20c982ef29d5b6 +size 12030943 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13098/training_args.bin b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13098/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..882408489c6281f2269d6d43d9a9d289ef447375 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13098/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:457def82163283f09c55df214fba22a275db57b9415dacd7ba08beda12d56973 +size 6328 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13452/adapter_model.safetensors b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13452/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ccc136056a2171be8c739064ca019d62bce6536a --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13452/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01d34c3eec204c76a7fec357b3a3a9b34531ebfc11a75da1770158310510c120 +size 22056664 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13452/optimizer.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13452/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..771758d6dd117c32a8f1e708672d7cb40c1e6fab --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13452/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92ea726e99ea6faeee098df69e6e5269c03d2f7cd8bc3b554ef23925ba8667c7 +size 33662330 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13452/rng_state_0.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13452/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..6ef7a5d5e8106c79680b515af9d6b1348bdacbbb --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13452/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83c0558c1c612c66d595c5524d8dbff64e8a1c080924e14cb5d18c8bc2ccb7e6 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13452/rng_state_1.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13452/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..c37f86a122296515e9549256950c6240613b4edc --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13452/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdf3f7e47c0638e604689a75c1deca22699a08f181b0b9895519ff680afb8322 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13452/rng_state_2.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13452/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..ded3a5942188c114f1d0c7a862607c5b822bf1a6 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13452/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89ef3803adfda0430184e54ea2f5c4cf554cc6dd8872f900433ee991936cc0ef +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13452/scheduler.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13452/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..bb42848ad846c7b48400b47afbd8b4cd260b7953 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13452/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35960295001769c3c58fa984e668e8fc696cee438868e33208304114abc478a3 +size 1064 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13452/tokenizer.json b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13452/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..3061716ad76b796880bdd16b2292c0af65a5039b --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13452/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e56eaebf99a63995f420cf1d71d671180c414ff523f363350b20c982ef29d5b6 +size 12030943 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13452/training_args.bin b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13452/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..882408489c6281f2269d6d43d9a9d289ef447375 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13452/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:457def82163283f09c55df214fba22a275db57b9415dacd7ba08beda12d56973 +size 6328 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13806/adapter_model.safetensors b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13806/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c70e4b18d57ea14241d3b4274c79ff9d59588ff4 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13806/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e92ed56e9333d8c706d74abbed7f8de0ec15f71d67c2804e3dc61d80a877fa9 +size 22056664 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13806/optimizer.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13806/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..14a5793547ab20b598d7b8d2b6ee132a299ac612 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13806/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40fa492e4fcc297847b57098876d4605288b68a4aaa67ab5605a2506093d336e +size 33662330 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13806/rng_state_0.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13806/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..263c0adb7e536458ee8ce5864870af2d7ab22220 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13806/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:519468c10ab7b27e94d8a212dcb39eed16cddfc7abcc717b7dd0688da90a66b9 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13806/rng_state_1.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13806/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..ac177bba9dfb6a53b21161c4e082d2566dcaea4b --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13806/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8f7beb0d372e48e4cd4b724d3364c4b534801bc02ad7705fb8d2f9a98a8a4a3 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13806/rng_state_2.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13806/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..332d550251185fa067756b9fcf20b1bd077a156f --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13806/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37755237fb004ae666623c50570b569de0015e8d03ebc253a2378253e65ca858 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13806/scheduler.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13806/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2a5870b95188645d59fb86bd90e31b3f8fa27ad6 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13806/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0f41a0c4edf2cf228c77e24994d078e827f5de46c8315ca87afac3018172331 +size 1064 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13806/tokenizer.json b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13806/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..3061716ad76b796880bdd16b2292c0af65a5039b --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13806/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e56eaebf99a63995f420cf1d71d671180c414ff523f363350b20c982ef29d5b6 +size 12030943 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13806/training_args.bin b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13806/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..882408489c6281f2269d6d43d9a9d289ef447375 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-13806/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:457def82163283f09c55df214fba22a275db57b9415dacd7ba08beda12d56973 +size 6328 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-14160/adapter_model.safetensors b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-14160/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3249d9c9b7ae68748b8d35da9f251d0410e19c21 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-14160/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f186f0281c4abefee458ca861f2a9d225922b4ed56cbfd0c420cfd7ec880469 +size 22056664 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-14160/optimizer.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-14160/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..9b036db66df2eb563a25c73d694bfd409ae76b32 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-14160/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc3ab612e0bd1dcca73b6824407baababe3da3b680a27882a9d64acb7bc2287f +size 33662330 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-14160/rng_state_0.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-14160/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..6f786b6a5632ca5d62a68471e3bff09662fadadd --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-14160/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:471096c5182b493a77600803f366364582625f8e4521e9725dfb6c3aaf7dc586 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-14160/rng_state_1.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-14160/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..e21ecb64c3734ab1f82d72ea80a3154ee1b036aa --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-14160/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c27ba91dd2584d7ee3ec65591a2ebf99af03c527f6d0e9ee79e39f30d04663e8 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-14160/rng_state_2.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-14160/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..299fb48410fd27dea18fa3ca76c11bedc76ee99d --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-14160/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50a436509236bf2095a31372ae82fbde4b555e4fabff40078667c29c9d614b00 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-14160/scheduler.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-14160/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ec66ffa9d84c1874167bfb4bdfa4f519a9a49ae3 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-14160/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb8b3e5dac7f5ac84d3f28c7861a0c807e712bef300152c520b8b552df32e2e5 +size 1064 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-14160/tokenizer.json b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-14160/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..3061716ad76b796880bdd16b2292c0af65a5039b --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-14160/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e56eaebf99a63995f420cf1d71d671180c414ff523f363350b20c982ef29d5b6 +size 12030943 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-14160/training_args.bin b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-14160/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..882408489c6281f2269d6d43d9a9d289ef447375 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-14160/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:457def82163283f09c55df214fba22a275db57b9415dacd7ba08beda12d56973 +size 6328 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-15222/adapter_model.safetensors b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-15222/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7372e28f87760e940cedf99be02dc1b473140cf7 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-15222/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8d7c9e9cf85fec007fc641b22ebbfa31335a22f6a5568b8b890c0cb8e1da52f +size 22056664 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-15222/optimizer.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-15222/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..2a1618610ea2262cf850415b443ceb167883681d --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-15222/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d19d65ab596095663ed4d4e68739eb3aab12c721f8b24f57a4135a720f9f76f2 +size 33662330 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-15222/rng_state_0.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-15222/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..8cd1fbfebe65480d97fde62f87965b2fff9c1da1 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-15222/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d2c801a01022683200e868e1f1ff2e02e7defbfb0e56df0fb771b4fba4ae68c +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-15222/rng_state_1.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-15222/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..31ade0ad32cf94e9a709cf393b8927fb21f5ed02 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-15222/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcfdeebbe483bc99e555f24b095334173451a7eee48f04d7fd63321bc83755e2 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-15222/rng_state_2.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-15222/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..0a225fa96b3176c9d7ea7968bbbb1e509a6ca9ef --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-15222/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:340c0a3c72b1ef442196a160008ea283177b218e5a7392a0b0a0e2c3f587aee7 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-15222/scheduler.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-15222/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..646a500f43cf9551a66dd8cf494ede9b33fafb22 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-15222/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98d23d0698f48213aa984df6bcb16a1c2118abe623551a1ce2cfd413038c9a02 +size 1064 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-15222/tokenizer.json b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-15222/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..3061716ad76b796880bdd16b2292c0af65a5039b --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-15222/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e56eaebf99a63995f420cf1d71d671180c414ff523f363350b20c982ef29d5b6 +size 12030943 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-15222/training_args.bin b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-15222/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..882408489c6281f2269d6d43d9a9d289ef447375 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-15222/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:457def82163283f09c55df214fba22a275db57b9415dacd7ba08beda12d56973 +size 6328 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-15576/adapter_model.safetensors b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-15576/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5100dda0e858118a8092a7049ac82df37b5c9aa1 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-15576/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3f4d2302543b61d05ce6a1a2134369ca0c1e04c1445c2211d5cdb3016ea3a5c +size 22056664 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-15576/optimizer.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-15576/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..9444a689d130d250dedc28fbc562dcf044f76461 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-15576/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2c0d97725a146ff8c15c7b549e5c852a6fa58090fe5a7d648e65fe379b1878e +size 33662330 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-15576/rng_state_0.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-15576/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..62d9ffccf45a0970a75b8ad66f42584d9a0e3380 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-15576/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc50b241b0f66a29ea7a178c77a38ac2091817c8995465ff26c0a5791d069ca3 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-15576/rng_state_1.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-15576/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..79cd7781a100f21cdabad4587d9fcee460460a62 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-15576/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f2c6f99004d21e0fcfcf837cafa8b486468a5bbc93cf183314f87fcbce03278 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-15576/rng_state_2.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-15576/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..bbbc57d40b01579ee3d6588312089c652af500a9 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-15576/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e43528815cedce115c7502bb0107ea339bcdb4bb6cd0b6c937e627f41a22cadf +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-15576/scheduler.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-15576/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..dca4c03ba8302c150934edd5113b044b690f6934 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-15576/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b11d9e0449b3c54a419d32aebc440c8d0c0935a1715ecaac8d3b16df651ea6c +size 1064 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-15576/tokenizer.json b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-15576/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..3061716ad76b796880bdd16b2292c0af65a5039b --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-15576/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e56eaebf99a63995f420cf1d71d671180c414ff523f363350b20c982ef29d5b6 +size 12030943 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-15576/training_args.bin b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-15576/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..882408489c6281f2269d6d43d9a9d289ef447375 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-15576/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:457def82163283f09c55df214fba22a275db57b9415dacd7ba08beda12d56973 +size 6328 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-16284/adapter_model.safetensors b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-16284/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c057bb026376139f61c40c23211cfc3e31c3acf2 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-16284/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0724ab408d1cde7b5686099007ec97597dc16f9217111a62b05af7e1af230dc +size 22056664 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-16284/optimizer.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-16284/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..976940d85fa6b87460a0fa3fca9380eac6c25145 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-16284/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d5931b0eda579f1829d55ca7e2926cba0690102672928c342d4052bd9c4a7da +size 33662330 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-16284/rng_state_0.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-16284/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..883e35254e52cf12cd02eaa46b8eb00461410c7a --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-16284/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f72adcf8759114be2da7ea2b48325cd2f2acb696158bf1e1ce1dd20249ea0ce +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-16284/rng_state_1.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-16284/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..f96b455cd0213939ae258ad7f0c272b4ac5aef23 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-16284/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55969a42bbd28488babfeafe559515d7795e8f8ef76315dea4d388877eb315ce +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-16284/rng_state_2.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-16284/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..8e293d264a458fce5d7dddd5501914867b3b899e --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-16284/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca2de7b99477ff1412e2372fdac27af31d38f56b27d289195c2976c30ec3426a +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-16284/scheduler.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-16284/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..47dbf40350a2d1edb65fe9b131ee74123f8721b7 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-16284/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3018af3560eefc0d70ca1dfee76e3a23487b24312c8819dd368963b4d5f3210 +size 1064 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-16284/tokenizer.json b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-16284/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..3061716ad76b796880bdd16b2292c0af65a5039b --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-16284/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e56eaebf99a63995f420cf1d71d671180c414ff523f363350b20c982ef29d5b6 +size 12030943 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-16284/training_args.bin b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-16284/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..882408489c6281f2269d6d43d9a9d289ef447375 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-16284/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:457def82163283f09c55df214fba22a275db57b9415dacd7ba08beda12d56973 +size 6328 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-16638/adapter_model.safetensors b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-16638/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..062ec79260fe393fed004e3da98245058e9e8dd9 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-16638/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f2fa6a9b21256376cc2f71e3785d975b19db646251a597f92c9a32a88d911e0 +size 22056664 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-16638/optimizer.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-16638/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0938826aef33d48ca98f7b595e37c5e6ee482d62 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-16638/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:967108b3c8014ac9cf4d6b543f72632647a7181891bbee1843272bc6ac963355 +size 33662330 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-16638/rng_state_0.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-16638/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..58e0ca3bcdfb3e73b29db3dba33fc9a2ac2c2cd7 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-16638/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a66de29b8720fe90a21691a912aa3d3469ead9b183197f873f72c72e9a16fda8 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-16638/rng_state_1.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-16638/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..f655f5d0f2c6b6b0ae8dcdfe8aa4fbe91e93432d --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-16638/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ab9bd4e3fe8e7f026258e16b982d3dd26116879177bedb618b0eeeeabae00d0 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-16638/rng_state_2.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-16638/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..d501ec36848b6d3b1fc013c6ea74042689ab79b6 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-16638/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:770838670c03524e47a74f72b3e4b36af2bd2b698c37ed1ed6e7e0499662a8d7 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-16638/scheduler.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-16638/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ff251099881d8cd47e2aa9016479d4a5c4b54c94 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-16638/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a444845a0113a281c975aea8c8783feff8163a62f709046ad547d106471953bf +size 1064 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-16638/tokenizer.json b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-16638/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..3061716ad76b796880bdd16b2292c0af65a5039b --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-16638/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e56eaebf99a63995f420cf1d71d671180c414ff523f363350b20c982ef29d5b6 +size 12030943 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-16638/training_args.bin b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-16638/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..882408489c6281f2269d6d43d9a9d289ef447375 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-16638/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:457def82163283f09c55df214fba22a275db57b9415dacd7ba08beda12d56973 +size 6328 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-1770/adapter_model.safetensors b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-1770/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a56e3d00712c31cdb3adee0961e48340dc276ee7 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-1770/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:982249f8e83a5cd2d2a92ca426f21417f44ecd52a31f07ebff453f2578735c07 +size 22056664 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-1770/optimizer.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-1770/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..72a44300b20c2f6c246ab28bff3e3a26aba50935 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-1770/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45b9407dda298e67c5db29d4180483a3159854db93e22d7b521677c0be89b60c +size 33662330 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-1770/rng_state_0.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-1770/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..febdfeb53d2d7733d94c218df2a6ca82091d5815 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-1770/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01e293d3b088fd076bb4bfe973c6bcc0ff3e4bb3d3ed710b5e9309aaef28ec9f +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-1770/rng_state_1.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-1770/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..63f6699e18718bb6fd7c210c32668874da18a667 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-1770/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f8f56b0ad4c70083bb8a15b333568b0446702a4340478f38f61b9c9609b099b +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-1770/rng_state_2.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-1770/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..f7e49d490b596fa8ea50189ba6ca40fa580b651b --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-1770/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abbf5c788d82a6e9401dd2ea0dbb940ac9be1c861810f797a3f3988613f2874d +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-1770/scheduler.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-1770/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2a536e1805cdadac0d4252ccaf6312536e35ac0c --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-1770/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:272c2188cf1a27718154b5463bb8e397c96cb61145ad6668647206d081ee9fa0 +size 1064 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-1770/tokenizer.json b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-1770/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..3061716ad76b796880bdd16b2292c0af65a5039b --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-1770/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e56eaebf99a63995f420cf1d71d671180c414ff523f363350b20c982ef29d5b6 +size 12030943 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-1770/training_args.bin b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-1770/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..882408489c6281f2269d6d43d9a9d289ef447375 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-1770/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:457def82163283f09c55df214fba22a275db57b9415dacd7ba08beda12d56973 +size 6328 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-17700/adapter_model.safetensors b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-17700/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2d1fe1fabe63e69c4b56926591d1eb85dd791557 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-17700/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b809a9dd7edf590a248277c55ac059ca9ee139ee5d2f4f9d3d198a7a8ae4a3b1 +size 22056664 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-17700/optimizer.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-17700/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c70f80801d08511f7ef6ac02070056e100e88d78 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-17700/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e44f32320436719960e93924f9d983118b5d0dc2cceffe22d41adc4480ec7374 +size 33662330 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-17700/rng_state_0.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-17700/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..19e838f601d3b933077ddbc32ee3f5b0fb1f04f6 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-17700/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40c0e004b7ddfe45680f5d7806cd68de5a5db5375f85a2c25a618bf2d12f7729 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-17700/rng_state_1.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-17700/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..8e66a303315864966154f82de0bc474000f1594e --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-17700/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54323da8374ab34bf0fa8f78b99bc3bfc851088ca25965b0271e0c6453ecb16d +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-17700/rng_state_2.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-17700/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..271f676bfc02fc0582b7a3ba5d25ffa84eef1ede --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-17700/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c306ead93e1dc669c77c4d8dd869cdca94b036696920dbc63ecd7c05ea3cc855 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-17700/scheduler.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-17700/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..c140b3a1d779881964830cfc1374b84185b2e43e --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-17700/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f40dbf2136bfaf201b33a89324b235585031c016924a44573ccdf1f3fdeb871 +size 1064 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-17700/tokenizer.json b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-17700/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..3061716ad76b796880bdd16b2292c0af65a5039b --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-17700/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e56eaebf99a63995f420cf1d71d671180c414ff523f363350b20c982ef29d5b6 +size 12030943 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-17700/training_args.bin b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-17700/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..882408489c6281f2269d6d43d9a9d289ef447375 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-17700/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:457def82163283f09c55df214fba22a275db57b9415dacd7ba08beda12d56973 +size 6328 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-17748/adapter_model.safetensors b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-17748/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1007176b2a5837aa215ae095483121e766941b23 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-17748/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c47c288e9d40cc050c1cb3ded3a85fdd754b0db0f769959363b2352b815d9863 +size 22056664 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-17748/optimizer.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-17748/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..61ef0a7d7bc02bb019612bc1423b3505e5d86a0e --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-17748/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e034267268dd1d4420eb5e3cab5288905144c671af2432e0274eb00ce83aaa8 +size 33662330 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-17748/rng_state_0.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-17748/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..da3b822b6074ed08d7dcc628a537573e6c9bf191 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-17748/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61cab5ff5963ff71fcb5bc01a2900055b42293ffcf4036894ada50970fc883e4 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-17748/rng_state_1.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-17748/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..f3a4ca8b0389a22ca6999229b566859b805f6be9 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-17748/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8826c897afd5bfebf3ba5b1dbe9e4a8001c12f0d978860c2420693a2877af9d1 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-17748/rng_state_2.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-17748/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..43b0c26dbce03d5d96b80a6b5147dfef4935b21d --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-17748/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5bd5221c8126769ab738ca35687aec111f510abb44afa258b0c6613b88624f6 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-17748/scheduler.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-17748/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..39611473a486c5c376fdca6f61cc0f2fc518fff0 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-17748/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5be2abf76ece4010e89571895ffacbdd9cc2d9274b381408392ec308b27bfa1 +size 1064 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-17748/tokenizer.json b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-17748/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..3061716ad76b796880bdd16b2292c0af65a5039b --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-17748/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e56eaebf99a63995f420cf1d71d671180c414ff523f363350b20c982ef29d5b6 +size 12030943 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-17748/training_args.bin b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-17748/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..882408489c6281f2269d6d43d9a9d289ef447375 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-17748/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:457def82163283f09c55df214fba22a275db57b9415dacd7ba08beda12d56973 +size 6328 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-2124/adapter_model.safetensors b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-2124/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..95c4e67bdc498e00cfda762a418159f75fa1e01f --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-2124/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d7c8b2f659840093b88ffaa2c4af0caf59fbd721364afb9d864d5c52288572f +size 22056664 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-2124/optimizer.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-2124/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..5fbc6ae8fbe0ef335e6572bddd8d57accddd5d26 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-2124/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6f81c7115508c0da8a8b37e6518b794a5e5479af010752a57568ab60dba2c31 +size 33662330 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-2124/rng_state_0.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-2124/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..a8df4d2d96ecc51e4c9b6718826fe12577a8aec4 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-2124/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af56c42fb9af62867fdec63e7c31092bcddd363e0cad73f7c91f98a0d5bb56c8 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-2124/rng_state_1.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-2124/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..d3cd744be6460c94b798a030bbc7339568015b24 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-2124/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87d2cfe16438453bd606aa26c133f93222f65f217d5c37977ec83f8ad8244460 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-2124/rng_state_2.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-2124/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..f78fa663628201016c207857a7f4f1893fa908c7 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-2124/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0d706b803e8a6835cdbda99f0ebbfe1482db6d9a7ea0c8e06ba456721111654 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-2124/scheduler.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-2124/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..362caaff8db58a67a5fe61819bab68cc203ea0ba --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-2124/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a8781ea432987093cb33f756eff95cde725eb4f2e01cb1e5b9e49258e58471c +size 1064 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-2124/tokenizer.json b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-2124/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..3061716ad76b796880bdd16b2292c0af65a5039b --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-2124/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e56eaebf99a63995f420cf1d71d671180c414ff523f363350b20c982ef29d5b6 +size 12030943 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-2124/training_args.bin b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-2124/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..882408489c6281f2269d6d43d9a9d289ef447375 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-2124/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:457def82163283f09c55df214fba22a275db57b9415dacd7ba08beda12d56973 +size 6328 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-2478/adapter_model.safetensors b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-2478/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e8006cf3d71a2c80bfd90dee98de2361129f72ed --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-2478/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9126a6ddd3648d7c6fd462a696d6f5d761139480dd362fbfa81d5d2fe12a0ee +size 22056664 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-2478/optimizer.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-2478/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0bf40be93a8799e7dbc6bdfff58034d63450cef0 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-2478/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69e41a59091ec9974a3e36e4033e70df3ba41854530621be0d6b3eeb060ef537 +size 33662330 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-2478/rng_state_0.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-2478/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..2881ec214205b84c827c5f34c906ea9751d16aed --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-2478/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20916b05e3f4a7a435552c4cbe76b2d2eed32e8ad2e8f8eb4c08e9817ecfee3c +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-2478/rng_state_1.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-2478/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..254022062ee91bec5760c707abccae3ef48d3d8e --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-2478/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:318e14d1b9d9671ae317858f7914c7183381c922ac670c94bb992ed7a6dd53ee +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-2478/rng_state_2.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-2478/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..d35f8a9cff615df29ed4b22d091e2fbc05d155a0 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-2478/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2c6e0d109e0df137820b2131dbac425bc0b9e461ec3aad427491387f227cbbb +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-2478/scheduler.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-2478/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..86212a2d46d556c77510df8e3c73f4e0fe5f402b --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-2478/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9a03fe4d9f5e900ba38b309433d5e533eb4b56095b62800fd445abd38f16147 +size 1064 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-2478/tokenizer.json b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-2478/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..3061716ad76b796880bdd16b2292c0af65a5039b --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-2478/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e56eaebf99a63995f420cf1d71d671180c414ff523f363350b20c982ef29d5b6 +size 12030943 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-2478/training_args.bin b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-2478/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..882408489c6281f2269d6d43d9a9d289ef447375 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-2478/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:457def82163283f09c55df214fba22a275db57b9415dacd7ba08beda12d56973 +size 6328 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3186/adapter_model.safetensors b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3186/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..80fdbd83b7cf889ae38945518ddd247748ef7a83 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3186/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:102ee18dca07abad5ba27f0f10fe90578b36dd54c2c0c3572980776fa13cefaa +size 22056664 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3186/optimizer.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3186/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..338ed8c33634fc9bad10a243ca3006370da5ebaf --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3186/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34c6257f0f9002a02e26181cc2c58db7173f3a7de72801a6e3a7e1f1c05a0325 +size 33662330 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3186/rng_state_0.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3186/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..921dcda22698af5fbc7f7a18fc077edd82677d6f --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3186/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82a64554085ffc195ed7f469f7d5ceaa81a6ea51910a0f9cf39b3feb00a104e5 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3186/rng_state_1.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3186/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..97938bb10263f750520384407e20ffa760509941 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3186/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b2baaee9a253502c79dc8dca8913d24bf44d443932b6cab7617371625063429 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3186/rng_state_2.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3186/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..87c92a671d8a2f0e22f5f212ad14635067f01418 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3186/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77e0dd68d237cff40fb601d6625834eb9be10de20d48192741decfc0fb2d8ebf +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3186/scheduler.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3186/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..db137e60c3e43fcb3a75bc9b412a4f8db0c4ab18 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3186/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e115aa770deb339dedc7c5ac8e98eb29b9331e879d146b8ca0a047e129873433 +size 1064 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3186/tokenizer.json b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3186/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..3061716ad76b796880bdd16b2292c0af65a5039b --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3186/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e56eaebf99a63995f420cf1d71d671180c414ff523f363350b20c982ef29d5b6 +size 12030943 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3186/training_args.bin b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3186/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..882408489c6281f2269d6d43d9a9d289ef447375 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3186/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:457def82163283f09c55df214fba22a275db57b9415dacd7ba08beda12d56973 +size 6328 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3540/adapter_model.safetensors b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3540/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..89a76e4fddfd126b1b19c379b5eea81da0c44e05 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3540/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:915a472b340acacd4d096319a42315eb6c880e8b26787907cccd2f2cdee26c4f +size 22056664 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3540/optimizer.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3540/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6cb96b586c5b1f3aa4590cf97b4f6c21e8e645e6 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3540/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a423b0c7347e2863ef7aacddc1311c824f4dc7d58132287657515e9684be262d +size 33662330 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3540/rng_state_0.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3540/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..6370db91a5d5af22e043653b5e7b9d423a6d73c0 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3540/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:986235f33097ec4f38f6172a8e74cc0264fe0a965a19be2208b1b28698cd2b1a +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3540/rng_state_1.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3540/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..189d660de6968483dbb9f1a14c7ba3a0f39e0c5b --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3540/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f90f9e5d7ef68ee5da6be2c58cbe7500c2efaa269c9ec3270d14035b788f9e2 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3540/rng_state_2.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3540/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..7af520495280504d0528e96018f41b76949e6d0b --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3540/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e2ff1adf39706fd62b6d50a15fbc23d1b43bad1a46b598a3b48ec0444fc39c4 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3540/scheduler.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3540/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..703b803e8e91e1d0c16d7515e84fd6c809183c34 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3540/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18c9ec46b64ee013ef85da509548a167c4b6711a8f98d8a337c000502a2b3143 +size 1064 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3540/tokenizer.json b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3540/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..3061716ad76b796880bdd16b2292c0af65a5039b --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3540/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e56eaebf99a63995f420cf1d71d671180c414ff523f363350b20c982ef29d5b6 +size 12030943 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3540/training_args.bin b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3540/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..882408489c6281f2269d6d43d9a9d289ef447375 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3540/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:457def82163283f09c55df214fba22a275db57b9415dacd7ba08beda12d56973 +size 6328 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3894/adapter_model.safetensors b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3894/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ab90e3918761a8d76bf5fbd6e8cb4e6954ea4009 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3894/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:302534ea41c42dbf7528aed2284318c278f2599ecfeafc0b57c9cbaf3e574937 +size 22056664 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3894/optimizer.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3894/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6d0f61df4bd4f4a456d110e86f069b64c845aa5f --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3894/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b579cd768e94a70b2958a0660bac86c8bf61bdfbd4bf9c34ef3fdf3ce24e3e17 +size 33662330 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3894/rng_state_0.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3894/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..0fd44bb074a5251f111df399145d383cab627858 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3894/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74bcc3d5448acd8e858a6c59e36bb1dc65fd3d32c33a4584e2f394b36ecb8629 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3894/rng_state_1.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3894/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..80c0c324894181724dfcad6e0ecf7afed8b4c6c5 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3894/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f7a5493fe407dfaa08794eb5a6334ee71e70db786c64cf0be31267aec59b31e +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3894/rng_state_2.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3894/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..b0a6f30534f585e7cd49cd8bbad6360e49ac40d1 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3894/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:957f6151e8e73692cac0df8daa528a5ab5ea24bda80d31e12bc7744f889c2d47 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3894/scheduler.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3894/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..01558f60a531129ed806cc516c3bd58a89a238e0 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3894/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bda8732890237da153be5b85da3e24f11209b0f38269c31311e5e88d4e71750e +size 1064 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3894/tokenizer.json b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3894/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..3061716ad76b796880bdd16b2292c0af65a5039b --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3894/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e56eaebf99a63995f420cf1d71d671180c414ff523f363350b20c982ef29d5b6 +size 12030943 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3894/training_args.bin b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3894/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..882408489c6281f2269d6d43d9a9d289ef447375 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-3894/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:457def82163283f09c55df214fba22a275db57b9415dacd7ba08beda12d56973 +size 6328 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-4248/adapter_model.safetensors b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-4248/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9e9d8e7b289acdf6a369bc8ff3f0e21c1f63881c --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-4248/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:072be06399f0fad03f438d9f3096223b5213b06a7446570c97b32c9eafc4043f +size 22056664 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-4248/optimizer.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-4248/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0ae2b3b1ac503c6ba2abd8622c5f448c6e8fd35e --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-4248/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b835c40fd8377bfa2d9f2b2cf486b112988567e43f1425c34edf6cdf7441a4a +size 33662330 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-4248/rng_state_0.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-4248/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..e97fba501318b1be48852ed7f4e56883ab404b15 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-4248/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de8aeb1dd5a6f964ca80a2155c446836617a18f4dcd00c5d260197c731f57ef6 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-4248/rng_state_1.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-4248/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..da8c04a3b785bfc3abbfc0143adf5d2a13120269 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-4248/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e8529a005bbd1345c6f3adce46872ebe06c22104908e230169985f9764adc40 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-4248/rng_state_2.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-4248/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..0882bc6744c409ab3197a89e5e63aabdf8bcccbf --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-4248/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e741c2794922c0eee85eed811262bee0eabe2a810233bfcecb49815b36ea4c4 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-4248/scheduler.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-4248/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..605d81895c92fc2ba048543d37af3bedf791ce68 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-4248/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c30bf8241cd16412ed2fc5f68022c80be723fbce2d938328fc453ff78364622a +size 1064 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-4248/tokenizer.json b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-4248/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..3061716ad76b796880bdd16b2292c0af65a5039b --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-4248/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e56eaebf99a63995f420cf1d71d671180c414ff523f363350b20c982ef29d5b6 +size 12030943 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-4248/training_args.bin b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-4248/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..882408489c6281f2269d6d43d9a9d289ef447375 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-4248/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:457def82163283f09c55df214fba22a275db57b9415dacd7ba08beda12d56973 +size 6328 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-4956/adapter_model.safetensors b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-4956/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..26f060500714d6a1137fa46d13609e55485cea21 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-4956/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be6d5bb160a1a2b270e2aeb36f8876dd7429f0256d8721b93ecf3a51d3fff6b8 +size 22056664 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-4956/optimizer.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-4956/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4d996568133f6e136142af6d3251524b80a589d4 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-4956/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcd72201a97b695913762265e361d37e1aec749648a6cfa87fbd1b801bbf8103 +size 33662330 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-4956/rng_state_0.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-4956/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..a0fac5afa6480d5433ed59bfc549771b4d8c4c3b --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-4956/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:461f60ff25f20a3e8a05da11183966a9c4cedf82d68b17fa38e0f32de5763ab0 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-4956/rng_state_1.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-4956/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..614c59edf5a38560d41396aef3d28ea58cad40be --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-4956/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62d6c013be439db1f0680f3628c7191f092becf1794a16aa80bf120f542d6d49 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-4956/rng_state_2.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-4956/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..f01922b1ae08f94eb0468a0bb89a9f8d94200b75 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-4956/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4182a48f305bdc372c0280332be5bee8cf68bb1872318961669b64425721369b +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-4956/scheduler.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-4956/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..31b9fbb9595ff3c430d58e364482256bce8f905b --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-4956/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9675af7ad2439e475483113676823642e763c847827612044914ed5cb7190f5 +size 1064 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-4956/tokenizer.json b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-4956/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..3061716ad76b796880bdd16b2292c0af65a5039b --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-4956/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e56eaebf99a63995f420cf1d71d671180c414ff523f363350b20c982ef29d5b6 +size 12030943 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-4956/training_args.bin b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-4956/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..882408489c6281f2269d6d43d9a9d289ef447375 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-4956/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:457def82163283f09c55df214fba22a275db57b9415dacd7ba08beda12d56973 +size 6328 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-5310/adapter_model.safetensors b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-5310/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3cfb357f050d338e119c1ce52d00969eb5fb0836 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-5310/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78f007fbc993d2eb34467ec83905ec2a57efcff3cd8e24b33a12319877256aac +size 22056664 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-5310/optimizer.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-5310/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..47e68b261d9659fb60b5261adc31571f81b25ed8 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-5310/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:753fc71aaa807c08f4ddf914843fd655247bdc26fc45c1047800c80f95cf3601 +size 33662330 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-5310/rng_state_0.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-5310/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..7433021e91542b663d7366316b0f65cc7f8a3383 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-5310/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3fe142e356e867a9560cff281f5c080ca6db8b1ea3120b181b919768cfc08fc +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-5310/rng_state_1.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-5310/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..e375c0c3e184780e68b8e6a8fb6868b163ac9fce --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-5310/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77ea2e6dd1cfd838a52667f0c7924a234f5d63205d9efff81813acfcdf39e3e0 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-5310/rng_state_2.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-5310/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..8ad866423516f57a611ac9a5e505fad342ed62ae --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-5310/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc06e0df460694787840e558ebb1732d2e136fb609570b3a9679cbdbbf3e37a5 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-5310/scheduler.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-5310/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..601df5190badb8ca213be68de541f4baa89da98c --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-5310/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba75c482d2703a84a765f0b334db4ccdd4df94eccafe43a129337410d34cccd0 +size 1064 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-5310/tokenizer.json b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-5310/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..3061716ad76b796880bdd16b2292c0af65a5039b --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-5310/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e56eaebf99a63995f420cf1d71d671180c414ff523f363350b20c982ef29d5b6 +size 12030943 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-5310/training_args.bin b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-5310/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..882408489c6281f2269d6d43d9a9d289ef447375 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-5310/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:457def82163283f09c55df214fba22a275db57b9415dacd7ba08beda12d56973 +size 6328 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-6018/adapter_model.safetensors b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-6018/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..40b6582c44bcd6bddf0126ed84217a14767ed399 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-6018/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad74ed9f9a677a89ad11f129384edd1d3bb77720581b3100615bb835c1ad7d20 +size 22056664 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-6018/optimizer.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-6018/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..ab5d1331cbbc7967fc4b854141a81db154cca0e6 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-6018/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fd9378301ea58ba382298510f9bcfcf5cf113a4e6fa38610265483871882d2f +size 33662330 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-6018/rng_state_0.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-6018/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..50efd97abb116b784a007760b35e36b92ac22baf --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-6018/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deb58bd01f28c5e0d82f0de03de29288e0452a85a33a5a54bd4f99dee3e21e07 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-6018/rng_state_1.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-6018/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..fefc946d98518074717444d2addf479d7d9b1c4e --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-6018/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0126ea11d500e89ca1d56c37b9ab90b07b24b3d7bc4b2f339dd929259158c778 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-6018/rng_state_2.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-6018/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..31dd525defb22e720119b9010f2fc14779d0c176 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-6018/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1e9b0ce3116e868d4429b6d1ea490bf6c1c2b44fb03c0f4bc2f7ddf36ef1a0e +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-6018/scheduler.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-6018/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..9508a6a65c1c2f4ffa8b200a144be9ac739e75ee --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-6018/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd2eb8fbedf2622b763e36f591dbc0932487d1f828574657ea6843229286f53e +size 1064 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-6018/tokenizer.json b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-6018/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..3061716ad76b796880bdd16b2292c0af65a5039b --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-6018/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e56eaebf99a63995f420cf1d71d671180c414ff523f363350b20c982ef29d5b6 +size 12030943 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-6018/training_args.bin b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-6018/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..882408489c6281f2269d6d43d9a9d289ef447375 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-6018/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:457def82163283f09c55df214fba22a275db57b9415dacd7ba08beda12d56973 +size 6328 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-6726/adapter_model.safetensors b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-6726/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..055f000df876653176cb96c156cc7f23499fcc3d --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-6726/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8763470d5651409767946a0412af8737b5a4bcfba0a6ed4291719ce424cf7433 +size 22056664 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-6726/optimizer.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-6726/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..f262096310ec7015d2dd5cabc56965318f9f0b4a --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-6726/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eaa1606d5dc461b6e407ebdc64faa3e07976f0b10f2a94f8745beb99de42ae95 +size 33662330 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-6726/rng_state_0.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-6726/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..e9107da6cabbdd3e30618fb911020928fe97ad99 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-6726/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f08527e06ecc003bd0bd64da60e6a3d801386e3099adee07606189853a23f025 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-6726/rng_state_1.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-6726/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..329bd00bddd44a8286293a865dc964de6d925ed0 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-6726/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f7208e4a2fc931ea965c385262f97a907f144554d427ed2b30e92378d70884e +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-6726/rng_state_2.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-6726/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..741f79b80e063b5cba77e4b368fb26b26804778d --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-6726/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0bbd54020655a464340eff87a6acb577584892663c9825b090b0ea5618c2e09 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-6726/scheduler.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-6726/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8e4ba7947bc86b2ebc4e33b592e04da27b161f12 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-6726/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53c2943323563e42caaa1671e34ed2b9742b743a0a3042a5e9c0aebd9b65594b +size 1064 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-6726/training_args.bin b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-6726/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..882408489c6281f2269d6d43d9a9d289ef447375 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-6726/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:457def82163283f09c55df214fba22a275db57b9415dacd7ba08beda12d56973 +size 6328 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-708/adapter_model.safetensors b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-708/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7eec64d12b6c62b14ef2e7c7b140f42e0cc9e5c9 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-708/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29d35c004cf712ae80082749181eff64aad79f53eb8c36029904fe91679160e4 +size 22056664 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-708/optimizer.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-708/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..f45943d4be7d0a51316d9d917ee909fbc0ebc0fd --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-708/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82544539ca85e78d8b26ac155bf6179cbdd572c9cae3da53b9422e9afd0128f2 +size 33662330 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-708/rng_state_0.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-708/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..2233f8d8f72a6f4bd845b67019bd19e5034a889c --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-708/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a4f45897ca223d8fe2f0d204838a1e0a843d19a7bf6ff9359737d35b8f0f1b3 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-708/rng_state_1.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-708/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..7891708fd5cad397f72be2f6e5fdf276bdbe45dc --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-708/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38819511229c805e6f274c919d906f1479e16e81530f5e4ba84263a45dbdb547 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-708/rng_state_2.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-708/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..3f6335df01173e8576e913a6345215c4251f65d0 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-708/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26d5f94e539623677ae0d02ea18d1b8e4e123972f6b788f19c82d460c39bb748 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-708/scheduler.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-708/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f3436493dbb69ac93b0954db5493686c44ed2987 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-708/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:354946768bde0e2291b96fcead2911c03be890bd1d9924fb4622062787ff7710 +size 1064 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-708/tokenizer.json b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-708/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..3061716ad76b796880bdd16b2292c0af65a5039b --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-708/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e56eaebf99a63995f420cf1d71d671180c414ff523f363350b20c982ef29d5b6 +size 12030943 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-708/training_args.bin b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-708/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..882408489c6281f2269d6d43d9a9d289ef447375 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-708/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:457def82163283f09c55df214fba22a275db57b9415dacd7ba08beda12d56973 +size 6328 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-7434/adapter_model.safetensors b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-7434/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f14f4c14c9f6e54157acd2f1a5173d0794355c91 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-7434/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edfb798f113f6ba48a25a5ea893fe6b1636231abf8db35fb78b1dba3f2609faa +size 22056664 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-7434/optimizer.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-7434/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..e767e8121cd82531250a4be5012b0148e2c88fb9 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-7434/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bac208c8e5cec5442d556f663b7db580de817e057e11bb03fb60b4efaf90fdeb +size 33662330 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-7434/rng_state_0.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-7434/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..6da65896be3c7e9d3ca01b049875b0caeb5c9b8f --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-7434/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66538c1ba3e7ed19efeaf8bfd07e759f94df17ffa246323a5720f623e6ba1c27 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-7434/rng_state_1.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-7434/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..08c2d58e3124509808b4976e031dadf83205d47f --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-7434/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eae604656764fd8487798f68d8a9e749ec6d5e175c48c07d9e0aebbf4316dcaa +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-7434/rng_state_2.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-7434/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..3bd72d79b6645cf85c81dad08921ecf9e2389910 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-7434/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f6184aa64e73f659c930b48bd4165af1f10545fb6225d578f19a25049493d78 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-7434/scheduler.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-7434/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..043faa0e42da99e35ca9870070523ecb7236d15b --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-7434/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f0542277bd717072f3424835ae32d436c6ea860ea6577dd1b95d3d0f3d1bf5c +size 1064 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-7434/tokenizer.json b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-7434/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..3061716ad76b796880bdd16b2292c0af65a5039b --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-7434/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e56eaebf99a63995f420cf1d71d671180c414ff523f363350b20c982ef29d5b6 +size 12030943 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-7434/training_args.bin b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-7434/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..882408489c6281f2269d6d43d9a9d289ef447375 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-7434/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:457def82163283f09c55df214fba22a275db57b9415dacd7ba08beda12d56973 +size 6328 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-7788/adapter_model.safetensors b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-7788/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0e577322efe206c973e585d061cecf31d6617aee --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-7788/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9387992fc9b7596df43a86d7a15653fca2f6a60ea4fc3cedb3569964030de0a1 +size 22056664 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-7788/optimizer.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-7788/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..71c6ab187da8057807ff59be5d4a5a4fd3d3991f --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-7788/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a555f3ba8845c4c8a2db804289e79b1355fc6081a5c79c3e1aae0c0cb67f9559 +size 33662330 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-7788/rng_state_0.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-7788/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..511962df3297174bffbdc5a7ecb796f4f241876d --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-7788/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efb64fd0150d19c66681ba762ca381ba8424717e5c93d67c876e26b88522886c +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-7788/rng_state_1.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-7788/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..aba9e9ada1eba0368bc80f117e3a1562bf7e418a --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-7788/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca82d8f80cea025358f930bfe8ae49afbc6186f6bf696e7a15ac2508ec8e7f32 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-7788/rng_state_2.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-7788/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..f9f62b7aa7f3c8231140937c8ce904e155a22e7a --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-7788/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:301f8c526cfe2575e0403d2a0127290fd8274661ac0fa2c02ecd11c2635938dc +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-7788/scheduler.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-7788/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..df521209a61e47c90eb382e0e52ce0ea0fc207f1 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-7788/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7db3339939b6e3b2723d5524b4b8f0fc861bf749999156f307df333ef709577 +size 1064 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-7788/tokenizer.json b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-7788/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..3061716ad76b796880bdd16b2292c0af65a5039b --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-7788/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e56eaebf99a63995f420cf1d71d671180c414ff523f363350b20c982ef29d5b6 +size 12030943 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-7788/training_args.bin b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-7788/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..882408489c6281f2269d6d43d9a9d289ef447375 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-7788/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:457def82163283f09c55df214fba22a275db57b9415dacd7ba08beda12d56973 +size 6328 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-8850/adapter_model.safetensors b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-8850/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..23be35bdf9eb34850dc1058e11dfb9401b9b75fc --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-8850/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a20243e534ae9fb1782c59eb85c5e6a6ac9e9a42267b30c2e3f460e54320a3e +size 22056664 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-8850/optimizer.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-8850/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..21fb414b1b561b8280ff496af3389845b1fc77d4 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-8850/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed41d1c62c30a78584853ca8c64529f043aa4f952cd3e1be76979cce40a3fd7c +size 33662330 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-8850/rng_state_0.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-8850/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..0f1f140748d6f361f7932147462ed7d6d2d23cd3 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-8850/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:509b3580aa7a76deb196ad985dcbe7627e6746842cef08503060db9a983032d3 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-8850/rng_state_1.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-8850/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..b17a52a68cc20fac8ef69bb14938af1b0579f1d7 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-8850/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f90c2dd02a8316a6bc9f6fce0d265ff4cf06ea9acc34eca1da6d69fef0330d7 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-8850/rng_state_2.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-8850/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..58a541bb3cbf2b577654181625038cfa0c969df1 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-8850/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd28f7fee0af8e6d2e80ba107fa663a19b65acfcdfef273f1d1a9a25ac620f7b +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-8850/scheduler.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-8850/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2b269e341de0c923951befd10cab7a9ef15af09d --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-8850/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6eefa5bda3853f64754f2acae82c4b22c55796fb1ea51822e1c0a7f67fb86de0 +size 1064 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-8850/tokenizer.json b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-8850/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..3061716ad76b796880bdd16b2292c0af65a5039b --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-8850/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e56eaebf99a63995f420cf1d71d671180c414ff523f363350b20c982ef29d5b6 +size 12030943 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-8850/training_args.bin b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-8850/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..882408489c6281f2269d6d43d9a9d289ef447375 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-8850/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:457def82163283f09c55df214fba22a275db57b9415dacd7ba08beda12d56973 +size 6328 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-9204/adapter_model.safetensors b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-9204/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b885cbcbd93e6dd269843795325b89c8cb1cdec5 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-9204/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13d4abbbd42b9c6ed7596477c6da5d584fb7b0f24d2e78a631ac6ac3647e1af9 +size 22056664 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-9204/optimizer.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-9204/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..2dfa07be4bee84355cd0ffbe85a6c79c18df784d --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-9204/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7610d7a2b92419d5a3b4d664ac0ba1149e68c74c25a81e2e18cd29eb68a4e2c7 +size 33662330 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-9204/rng_state_0.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-9204/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..b48c676c39ba9130d92e3323ea5993d89655fc86 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-9204/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:392a568faa88235b60fe36487d930eec9a3d33a776090fcbf52feb36a7df619c +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-9204/rng_state_1.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-9204/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..064361710f2679a3515c77b684038152e8d13f96 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-9204/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78af30885b4038097b7cd49dcdec57ecbfb6b2988ed97380fc103f4b77dc3e4f +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-9204/rng_state_2.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-9204/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..e37b56bb0fc0228331fb6a351d645ce8f914e36c --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-9204/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f941f3d4e021ffaa62c704addee3e387a5324830aca66924a2413bd563a1da3a +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-9204/scheduler.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-9204/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..06e1fb98c83b15457b292376ae7d692bf3de7266 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-9204/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81545ba3972a5cfceab1a63c8767b8919bae3f3f4f1d0ab83f6204e2b87a26a3 +size 1064 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-9204/tokenizer.json b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-9204/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..3061716ad76b796880bdd16b2292c0af65a5039b --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-9204/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e56eaebf99a63995f420cf1d71d671180c414ff523f363350b20c982ef29d5b6 +size 12030943 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-9204/training_args.bin b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-9204/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..882408489c6281f2269d6d43d9a9d289ef447375 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-9204/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:457def82163283f09c55df214fba22a275db57b9415dacd7ba08beda12d56973 +size 6328 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-9912/adapter_model.safetensors b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-9912/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3cfe45f4da6b8c451a0e3f0c9c15c5d0e9cc3d0d --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-9912/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:971bce1739a303998b1de505771ccd469c4b6888b92981c9f231feeeca13f2af +size 22056664 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-9912/optimizer.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-9912/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a220999876805a9883b43c4016f74e1604a5e195 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-9912/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed5b119eaca22fd46be8a8dce75b695931dcdc4af131379e21d55b67d6263e03 +size 33662330 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-9912/rng_state_0.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-9912/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..5706b369dc71d0e53c4b805bb077377edc45a49d --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-9912/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45cfbc434ee25fec48aea51b9a1fa79401dc8ff73385f29230f0cdbf8970a192 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-9912/rng_state_1.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-9912/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..1f1615a2df17f62badb7078eb23aa1e22a0d8225 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-9912/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8cdb84b23c0bc0fa3384f23782bfc15d734174ec8dcb50fbff955497d8060ed +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-9912/rng_state_2.pth b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-9912/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..b08d715cf95fec1fd4c3d9a113730c7ba7ec54f0 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-9912/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24d407e717b34ba97fa99a6a3e08412c793d37c451cd0680d47726365e6213f5 +size 14768 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-9912/scheduler.pt b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-9912/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..48abb65c227c2b31b456d54ddc8f0744547850f7 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-9912/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4026c342ea4ce8a065c907bea2b0316604be937451901152ccbb8d22d5546916 +size 1064 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-9912/tokenizer.json b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-9912/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..3061716ad76b796880bdd16b2292c0af65a5039b --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-9912/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e56eaebf99a63995f420cf1d71d671180c414ff523f363350b20c982ef29d5b6 +size 12030943 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-9912/training_args.bin b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-9912/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..882408489c6281f2269d6d43d9a9d289ef447375 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/checkpoint-9912/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:457def82163283f09c55df214fba22a275db57b9415dacd7ba08beda12d56973 +size 6328 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/final_model/adapter_model.safetensors b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/final_model/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1007176b2a5837aa215ae095483121e766941b23 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/final_model/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c47c288e9d40cc050c1cb3ded3a85fdd754b0db0f769959363b2352b815d9863 +size 22056664 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/final_model/tokenizer.json b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/final_model/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..3061716ad76b796880bdd16b2292c0af65a5039b --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/final_model/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e56eaebf99a63995f420cf1d71d671180c414ff523f363350b20c982ef29d5b6 +size 12030943 diff --git a/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/final_model/training_args.bin b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/final_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..882408489c6281f2269d6d43d9a9d289ef447375 --- /dev/null +++ b/grpo/grpo-fromscratch-lora-e1-bs4-lr1e-06-20250425_181800/final_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:457def82163283f09c55df214fba22a275db57b9415dacd7ba08beda12d56973 +size 6328