diff --git a/checkpoint-10/adapter_config.json b/checkpoint-10/adapter_config.json index 9a6753254d3cb15865ffb290b9553d85b57dac9a..5601e58d3aec7aef62bafb11ddb8a04c414c4f52 100644 --- a/checkpoint-10/adapter_config.json +++ b/checkpoint-10/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ + "down_proj", + "o_proj", + "q_proj", "v_proj", "up_proj", - "k_proj", "gate_proj", - "o_proj", - "q_proj", - "down_proj" + "k_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-10/global_step10/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt b/checkpoint-10/global_step10/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f989603ec33b58e8fee72303d562c0d95829230e --- /dev/null +++ b/checkpoint-10/global_step10/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31423c63186358d053bcc5b3ed94db995a32802fe97e6a76ad418e9b8025b7d0 +size 51616527 diff --git a/checkpoint-10/global_step10/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt b/checkpoint-10/global_step10/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..26b4ba4bed0653853b5f7433cadf3bba74d4f0a4 --- /dev/null +++ b/checkpoint-10/global_step10/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02838e0d64424b6cb6300aaccd975292676ce439f342a7ab768ba8ebc037a939 +size 51616015 diff --git a/checkpoint-10/global_step10/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt b/checkpoint-10/global_step10/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..65c81151c8d83ace7c65f516a96ea72e9381ba3d --- /dev/null +++ b/checkpoint-10/global_step10/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fea295c285de348f7df4ab14315adf1b2ac4fa15552e7ec830878b7e80ebf6b4 +size 51616527 diff --git a/checkpoint-10/global_step10/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt b/checkpoint-10/global_step10/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..476da5cb326eb370cadaa60fc3e85439410123e9 --- /dev/null +++ b/checkpoint-10/global_step10/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:851e813be505c91be02a733f58ca8e7976bca03e4443b8b9ac5570889fd9f32a +size 51616015 diff --git a/checkpoint-10/global_step10/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt b/checkpoint-10/global_step10/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..95d0b5f0bbae0ad6e5bed172c6a5b67d8190e55d --- /dev/null +++ b/checkpoint-10/global_step10/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79209a0ac3cae6f89ff32baaf91ef125ad45ac56f6091dffbd133f4a302fce6c +size 51616527 diff --git a/checkpoint-10/global_step10/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt b/checkpoint-10/global_step10/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7ea975f59181164d74445c80af11d0fc2f5fea2c --- /dev/null +++ b/checkpoint-10/global_step10/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7dc4aadd9cff23e98e096a247e97fd7aa3aa946711ad93723c038d1b3255f3b3 +size 51616015 diff --git a/checkpoint-10/global_step10/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt b/checkpoint-10/global_step10/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..80887b443a08bcc6425b6f239f2bafde6a769501 --- /dev/null +++ b/checkpoint-10/global_step10/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c30830f2e29749d79058d73b1c3b637fa4196ca48d37a656ac9d7961dfe36dc9 +size 51616517 diff --git a/checkpoint-10/global_step10/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt b/checkpoint-10/global_step10/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c1b9fd757e9953ea6febb9b225ef618ab832bcb9 --- /dev/null +++ b/checkpoint-10/global_step10/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:102fbcf4358a000e60cb5b6a88dc1033a27fcfc2266002b06a639958d018dd92 +size 51616005 diff --git a/checkpoint-10/rng_state_10.pth b/checkpoint-10/rng_state_10.pth new file mode 100644 index 0000000000000000000000000000000000000000..2ba02ed6b011b6968eeeddcf01bb952f6cdb402b --- /dev/null +++ b/checkpoint-10/rng_state_10.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02cbc5312caa4527a932bd01244e717ac64c19550759d78c25b6d4897bb86349 +size 16404 diff --git a/checkpoint-10/rng_state_11.pth b/checkpoint-10/rng_state_11.pth new file mode 100644 index 0000000000000000000000000000000000000000..64f2c95abf61ed6396f788f5a44dee63234168e6 --- /dev/null +++ b/checkpoint-10/rng_state_11.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7363fb6f7233fff5f8146194e1eecf2d869fada6009ad153c85df1707d10ed6f +size 16468 diff --git a/checkpoint-10/rng_state_12.pth b/checkpoint-10/rng_state_12.pth new file mode 100644 index 0000000000000000000000000000000000000000..9b34286386de140eb6dda4cd99d3466e288c3300 --- /dev/null +++ b/checkpoint-10/rng_state_12.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2d30743415391c1069ba3ab70ed1da0efc223c976e184639053dcf7380d6cdd +size 16340 diff --git a/checkpoint-10/rng_state_13.pth b/checkpoint-10/rng_state_13.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd1a646cb8618c3d93f766ee4ae031679305d5fc --- /dev/null +++ b/checkpoint-10/rng_state_13.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ee60b08b1cf23d4f9f0f117d77ed7e8fa1ac312a38abe56e4d466b6a4227e9d +size 16468 diff --git a/checkpoint-10/rng_state_14.pth b/checkpoint-10/rng_state_14.pth new file mode 100644 index 0000000000000000000000000000000000000000..558c6b0faf07b9a4a0b86e2fca1984975ee4b048 --- /dev/null +++ b/checkpoint-10/rng_state_14.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8560f8334413a01597d0a3ccad9d05395750a6239553f076da5e3cccf31c41e +size 16404 diff --git a/checkpoint-10/rng_state_15.pth b/checkpoint-10/rng_state_15.pth new file mode 100644 index 0000000000000000000000000000000000000000..0ec91e7d66d1050a5bfc65cae2c108b39f9fee10 --- /dev/null +++ b/checkpoint-10/rng_state_15.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96ea81b40f32da094d53f444b97db883b2fa2452c60a9c74fab28de8b9c8bdae +size 16404 diff --git a/checkpoint-10/rng_state_8.pth b/checkpoint-10/rng_state_8.pth new file mode 100644 index 0000000000000000000000000000000000000000..2a7e76e4f09455863d6b1bfaf394779b4a622773 --- /dev/null +++ b/checkpoint-10/rng_state_8.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51356d5c061931ce8276759999e3ec37a77c677e630b0feb20a2f2405f50de2d +size 16389 diff --git a/checkpoint-10/rng_state_9.pth b/checkpoint-10/rng_state_9.pth new file mode 100644 index 0000000000000000000000000000000000000000..abebd24c9b045e27f0b117a992f6732e685145d9 --- /dev/null +++ b/checkpoint-10/rng_state_9.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a7bc6da1ba40ef613e242a724fa971e15ef8c90312b8b1472bab1f3749faf50 +size 16389 diff --git a/checkpoint-10/trainer_state.json b/checkpoint-10/trainer_state.json index f3bc7a6c7a8e7a542613193645c83c0135cfaace..0d8d96672052972cd01d7af2ed175e36d6f9d5e0 100644 --- a/checkpoint-10/trainer_state.json +++ b/checkpoint-10/trainer_state.json @@ -17,7 +17,7 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, @@ -33,7 +33,7 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 2, "train_speed(iter/s)": 0.000466 }, @@ -45,7 +45,7 @@ "kl": 9.50181856751442e-07, "learning_rate": 5e-05, "loss": -0.06604708731174469, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.13671875, "reward": 0.0006296975770965219, "reward_std": 0.07172460854053497, @@ -61,7 +61,7 @@ "kl": 1.1101365089416504e-05, "learning_rate": 6.666666666666667e-05, "loss": -0.06727766245603561, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 4, "train_speed(iter/s)": 0.000458 }, @@ -73,7 +73,7 @@ "kl": 0.00017762184143066406, "learning_rate": 8.333333333333334e-05, "loss": -0.09315311908721924, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.119140625, "reward": -0.005135859013535082, "reward_std": 0.07994875870645046, @@ -87,7 +87,7 @@ "grad_norm": 0.18263348937034607, "learning_rate": 0.0001, "loss": -0.1041698157787323, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 6, "train_speed(iter/s)": 0.000458 }, @@ -102,7 +102,7 @@ "eval_reward_std": 0.08769983053207397, "eval_rewards/CosineReward": 0.012996694073081017, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1030.1126, + "eval_runtime": 1030.1223, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 6 @@ -115,7 +115,7 @@ "kl": 0.017406463623046875, "learning_rate": 9.991540791356342e-05, "loss": -0.051375165581703186, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.1484375, "reward": 0.004909618757665157, "reward_std": 0.08167182095348835, @@ -131,7 +131,7 @@ "kl": 0.089599609375, "learning_rate": 9.966191788709716e-05, "loss": -0.05105742812156677, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 8, "train_speed(iter/s)": 0.000433 }, @@ -143,7 +143,7 @@ "kl": 0.0963134765625, "learning_rate": 9.924038765061042e-05, "loss": -0.05842069163918495, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.255859375, "reward": 0.03643610421568155, "reward_std": 0.11898956261575222, @@ -159,7 +159,7 @@ "kl": 0.1185302734375, "learning_rate": 9.865224352899119e-05, "loss": -0.06491819024085999, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 10, "train_speed(iter/s)": 0.000436 } diff --git a/checkpoint-10/training_args.bin b/checkpoint-10/training_args.bin index db6dc02fc187938d09e0a6626a08092ad8f42c54..642b9465d1983574a63f0a9f23bf7996bf8a7eda 100644 --- a/checkpoint-10/training_args.bin +++ b/checkpoint-10/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:09cdf21dfd9faa218b7fd99e3f3dc0ef681c4e3fd3b905e7348f5467b0198044 +oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 size 9809 diff --git a/checkpoint-12/adapter_config.json b/checkpoint-12/adapter_config.json index 9a6753254d3cb15865ffb290b9553d85b57dac9a..5601e58d3aec7aef62bafb11ddb8a04c414c4f52 100644 --- a/checkpoint-12/adapter_config.json +++ b/checkpoint-12/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ + "down_proj", + "o_proj", + "q_proj", "v_proj", "up_proj", - "k_proj", "gate_proj", - "o_proj", - "q_proj", - "down_proj" + "k_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-12/global_step12/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt b/checkpoint-12/global_step12/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6613c892b085b9de57bd1e603696f87e9bb72a42 --- /dev/null +++ b/checkpoint-12/global_step12/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:822b7229a1bea103b4ea6123a0c441dc6d1439fc94f949416b706af2b11393b8 +size 51616527 diff --git a/checkpoint-12/global_step12/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt b/checkpoint-12/global_step12/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..996042ed7bd34a48a5dd6a47ff7325283ccc8dc5 --- /dev/null +++ b/checkpoint-12/global_step12/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b8394d158f867db49d932934b759fd14a26b6b60201392edfd7da2c3a8f8198 +size 51616015 diff --git a/checkpoint-12/global_step12/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt b/checkpoint-12/global_step12/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a9d7f5c7382636038c08fb6a66517e2999028529 --- /dev/null +++ b/checkpoint-12/global_step12/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35fa216219b835333a072810153c4ef7c296a66ae453a41979a7bc9f3f685f9e +size 51616527 diff --git a/checkpoint-12/global_step12/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt b/checkpoint-12/global_step12/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..99a19f7840e971f0e0044d70f4bf6cee056606de --- /dev/null +++ b/checkpoint-12/global_step12/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b24e4ac8ec1a02119157b197ca7535fb49b61d68a81f533af277df6a02d427b +size 51616015 diff --git a/checkpoint-12/global_step12/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt b/checkpoint-12/global_step12/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..46a2ffa5f514d618bc385b4dbc4359bd690af66a --- /dev/null +++ b/checkpoint-12/global_step12/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca51772cf3c9bcb49a0c691ab973063956f6f9960873dd39c9f35e8a9c7baceb +size 51616527 diff --git a/checkpoint-12/global_step12/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt b/checkpoint-12/global_step12/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..cd395bf7ae5d501ffb8a686f76ddf45e04896005 --- /dev/null +++ b/checkpoint-12/global_step12/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c62d9674c47f55a1fffbb308c52366659ae361c546685d79943f5f55c75e6ec +size 51616015 diff --git a/checkpoint-12/global_step12/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt b/checkpoint-12/global_step12/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e684061146132f6e3f2a8643685b6eceec5f2306 --- /dev/null +++ b/checkpoint-12/global_step12/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:600f4192e6e52b5af553a3fcf8be2f1849e04450bbc353b6ef46ad3a8946d588 +size 51616517 diff --git a/checkpoint-12/global_step12/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt b/checkpoint-12/global_step12/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3f641ecdb1e8da1b9a78821b4943a846a240c3f7 --- /dev/null +++ b/checkpoint-12/global_step12/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb9c29401936327e19cc04b3d65152447c7b168640500d3729231fa3ffb6625b +size 51616005 diff --git a/checkpoint-12/rng_state_10.pth b/checkpoint-12/rng_state_10.pth new file mode 100644 index 0000000000000000000000000000000000000000..1bda72920467a477e8d64dc2b9acc760fb12ee43 --- /dev/null +++ b/checkpoint-12/rng_state_10.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fbfcbd5b3ad1ea91d306ea93b3bf1db721a10749da638dc58e5a7eb8ef5c060 +size 16404 diff --git a/checkpoint-12/rng_state_11.pth b/checkpoint-12/rng_state_11.pth new file mode 100644 index 0000000000000000000000000000000000000000..6b109df7e570bacb667b4cdf1581baf0e1f8c0a7 --- /dev/null +++ b/checkpoint-12/rng_state_11.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5bb91559c98a1fac8c39253e100b4949629a45a4563ba169f5be99716d52b80 +size 16468 diff --git a/checkpoint-12/rng_state_12.pth b/checkpoint-12/rng_state_12.pth new file mode 100644 index 0000000000000000000000000000000000000000..b14199ce91c9a50097a36df49f6fa78fd171ce7e --- /dev/null +++ b/checkpoint-12/rng_state_12.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ef25e80c3d47579c059f0446460239ed32d09d212f17101821e5c560ec261c4 +size 16340 diff --git a/checkpoint-12/rng_state_13.pth b/checkpoint-12/rng_state_13.pth new file mode 100644 index 0000000000000000000000000000000000000000..5b8717c356fa24e3b1edf63c208fd5486bfa0914 --- /dev/null +++ b/checkpoint-12/rng_state_13.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3e31897e9d43cb146f65d037920c8c11d77396d3a557de0f4a0b1d73953a305 +size 16468 diff --git a/checkpoint-12/rng_state_14.pth b/checkpoint-12/rng_state_14.pth new file mode 100644 index 0000000000000000000000000000000000000000..55910d58ee930517f227d462bbe98c41da6daa68 --- /dev/null +++ b/checkpoint-12/rng_state_14.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:606a7fa2a7952eb8cdc167df71045befac645e37f12f2c3fcce14b5f1e5da01c +size 16404 diff --git a/checkpoint-12/rng_state_15.pth b/checkpoint-12/rng_state_15.pth new file mode 100644 index 0000000000000000000000000000000000000000..21db49bdd9609cfb8fa76004ff07e98ff28a157d --- /dev/null +++ b/checkpoint-12/rng_state_15.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa68c84a8b51fa41541e27a6dd21bfd8b666e0bfce19c554e6fb078dbebe8983 +size 16404 diff --git a/checkpoint-12/rng_state_8.pth b/checkpoint-12/rng_state_8.pth new file mode 100644 index 0000000000000000000000000000000000000000..e6308336a3e7ccdd3680d81e5d5a00af0221ccef --- /dev/null +++ b/checkpoint-12/rng_state_8.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9fb6cdfd982e8c84e600915f2279236e8f10f62341c522ac88160fe782aa164 +size 16389 diff --git a/checkpoint-12/rng_state_9.pth b/checkpoint-12/rng_state_9.pth new file mode 100644 index 0000000000000000000000000000000000000000..8d5a0410272f2bb787982ca98cf0425f675083c4 --- /dev/null +++ b/checkpoint-12/rng_state_9.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96d2fee6f99950abccdc5d627af4a0c87c49e72162451c70e23b8b58a39c5ee4 +size 16389 diff --git a/checkpoint-12/trainer_state.json b/checkpoint-12/trainer_state.json index 8d028d3771c94b5ea13dd49451668c8949834618..e2fc6f34722a82aeb5916c74ff2a4a44a712399b 100644 --- a/checkpoint-12/trainer_state.json +++ b/checkpoint-12/trainer_state.json @@ -17,7 +17,7 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, @@ -33,7 +33,7 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 2, "train_speed(iter/s)": 0.000466 }, @@ -45,7 +45,7 @@ "kl": 9.50181856751442e-07, "learning_rate": 5e-05, "loss": -0.06604708731174469, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.13671875, "reward": 0.0006296975770965219, "reward_std": 0.07172460854053497, @@ -61,7 +61,7 @@ "kl": 1.1101365089416504e-05, "learning_rate": 6.666666666666667e-05, "loss": -0.06727766245603561, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 4, "train_speed(iter/s)": 0.000458 }, @@ -73,7 +73,7 @@ "kl": 0.00017762184143066406, "learning_rate": 8.333333333333334e-05, "loss": -0.09315311908721924, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.119140625, "reward": -0.005135859013535082, "reward_std": 0.07994875870645046, @@ -87,7 +87,7 @@ "grad_norm": 0.18263348937034607, "learning_rate": 0.0001, "loss": -0.1041698157787323, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 6, "train_speed(iter/s)": 0.000458 }, @@ -102,7 +102,7 @@ "eval_reward_std": 0.08769983053207397, "eval_rewards/CosineReward": 0.012996694073081017, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1030.1126, + "eval_runtime": 1030.1223, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 6 @@ -115,7 +115,7 @@ "kl": 0.017406463623046875, "learning_rate": 9.991540791356342e-05, "loss": -0.051375165581703186, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.1484375, "reward": 0.004909618757665157, "reward_std": 0.08167182095348835, @@ -131,7 +131,7 @@ "kl": 0.089599609375, "learning_rate": 9.966191788709716e-05, "loss": -0.05105742812156677, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 8, "train_speed(iter/s)": 0.000433 }, @@ -143,7 +143,7 @@ "kl": 0.0963134765625, "learning_rate": 9.924038765061042e-05, "loss": -0.05842069163918495, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.255859375, "reward": 0.03643610421568155, "reward_std": 0.11898956261575222, @@ -159,7 +159,7 @@ "kl": 0.1185302734375, "learning_rate": 9.865224352899119e-05, "loss": -0.06491819024085999, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 10, "train_speed(iter/s)": 0.000436 }, @@ -171,7 +171,7 @@ "kl": 0.1275634765625, "learning_rate": 9.789947561577445e-05, "loss": -0.04600231721997261, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "response_clip_ratio": 0.361328125, "reward": 0.023204635945148766, "reward_std": 0.10593634657561779, @@ -185,7 +185,7 @@ "grad_norm": 0.05781339108943939, "learning_rate": 9.698463103929542e-05, "loss": -0.05069056898355484, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "step": 12, "train_speed(iter/s)": 0.000439 }, @@ -200,7 +200,7 @@ "eval_reward_std": 0.10685288906097412, "eval_rewards/CosineReward": 0.03234308212995529, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1025.9048, + "eval_runtime": 1025.9045, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 12 diff --git a/checkpoint-12/training_args.bin b/checkpoint-12/training_args.bin index db6dc02fc187938d09e0a6626a08092ad8f42c54..642b9465d1983574a63f0a9f23bf7996bf8a7eda 100644 --- a/checkpoint-12/training_args.bin +++ b/checkpoint-12/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:09cdf21dfd9faa218b7fd99e3f3dc0ef681c4e3fd3b905e7348f5467b0198044 +oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 size 9809 diff --git a/checkpoint-14/adapter_config.json b/checkpoint-14/adapter_config.json index 9a6753254d3cb15865ffb290b9553d85b57dac9a..5601e58d3aec7aef62bafb11ddb8a04c414c4f52 100644 --- a/checkpoint-14/adapter_config.json +++ b/checkpoint-14/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ + "down_proj", + "o_proj", + "q_proj", "v_proj", "up_proj", - "k_proj", "gate_proj", - "o_proj", - "q_proj", - "down_proj" + "k_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-14/global_step14/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt b/checkpoint-14/global_step14/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8e27e8fbe4f71fc8fe31e9fb73db8a43207208d2 --- /dev/null +++ b/checkpoint-14/global_step14/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:987ae005efaff2991e8c298a69bdab1a97208f36e35344eb8b1fb65762731dfc +size 51616527 diff --git a/checkpoint-14/global_step14/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt b/checkpoint-14/global_step14/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..273e529f0532bd9c95c91b35409a58c9592dfa83 --- /dev/null +++ b/checkpoint-14/global_step14/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05cac762c3f44b1d98241aec390dc65daa33e8f7398ed433eee1e495e4d0b560 +size 51616015 diff --git a/checkpoint-14/global_step14/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt b/checkpoint-14/global_step14/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..12f2f442d6f93976179b1a6e41e4ec59b4e9e2be --- /dev/null +++ b/checkpoint-14/global_step14/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf818cfdb7066dbde4d3dfb089b34e214a613704021039d4caf381186baed5ab +size 51616527 diff --git a/checkpoint-14/global_step14/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt b/checkpoint-14/global_step14/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f30b72b8628eaf5a8347a219da42387f6ff86fec --- /dev/null +++ b/checkpoint-14/global_step14/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:707d84406d88c3919d30ec9dcf83ae4ef21c9b29cc38086d36157d7edd31b501 +size 51616015 diff --git a/checkpoint-14/global_step14/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt b/checkpoint-14/global_step14/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a04ab90c120f6e2b5bcfd37738b81ab7cc4e39c7 --- /dev/null +++ b/checkpoint-14/global_step14/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5de8c330c50df7e2adf5dde811bad3c52e5e2c9c084ddb35bca14a6bb83ba67 +size 51616527 diff --git a/checkpoint-14/global_step14/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt b/checkpoint-14/global_step14/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8cf58e560284ec7ba0a755918b4f082cf60485ee --- /dev/null +++ b/checkpoint-14/global_step14/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c27e1d29ea2feb71f051b4ab6f11352d55cf6bec46aba95811f25bb3d69081c +size 51616015 diff --git a/checkpoint-14/global_step14/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt b/checkpoint-14/global_step14/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f8cdfad8d2bfacf21bc22678d04ace7e4badeebb --- /dev/null +++ b/checkpoint-14/global_step14/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8da1a503819351e6774b2525c5e49b1456f017e729f3ca8a78a5f3e2ac5d89f4 +size 51616517 diff --git a/checkpoint-14/global_step14/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt b/checkpoint-14/global_step14/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0eb50f96e58668938547a5671e5329a805289f07 --- /dev/null +++ b/checkpoint-14/global_step14/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d142701af8386a993512a2032632171cbb114e8945d01d85fd4f2c0b2412226 +size 51616005 diff --git a/checkpoint-14/rng_state_10.pth b/checkpoint-14/rng_state_10.pth new file mode 100644 index 0000000000000000000000000000000000000000..a93bfb28df22d1278b438a804791aa6c16356496 --- /dev/null +++ b/checkpoint-14/rng_state_10.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7934873a8dc658a1c742fe7fcb215347facbe475dccfaceac1ffba3ca9bd8421 +size 16404 diff --git a/checkpoint-14/rng_state_11.pth b/checkpoint-14/rng_state_11.pth new file mode 100644 index 0000000000000000000000000000000000000000..e45a0b55c31fa4f9fdbd63bfb1ee804ed0019757 --- /dev/null +++ b/checkpoint-14/rng_state_11.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9735aadee20166cb6297399822e43964b7f9ba374abf785504ea0cc3685b3460 +size 16468 diff --git a/checkpoint-14/rng_state_12.pth b/checkpoint-14/rng_state_12.pth new file mode 100644 index 0000000000000000000000000000000000000000..8666182607d6c4fe5e2e7beea6529fd238ef7f0a --- /dev/null +++ b/checkpoint-14/rng_state_12.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee83e76542218def27dfde611cc946dd25555fdb9fc54c1720d827f457d8d116 +size 16340 diff --git a/checkpoint-14/rng_state_13.pth b/checkpoint-14/rng_state_13.pth new file mode 100644 index 0000000000000000000000000000000000000000..2d14a55ac8fa649b6dc518e77c633832e397761b --- /dev/null +++ b/checkpoint-14/rng_state_13.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cdad8462e0c45e6f509f326059671d733c45a4f60773fa7d1bb24f4a5284d2c +size 16468 diff --git a/checkpoint-14/rng_state_14.pth b/checkpoint-14/rng_state_14.pth new file mode 100644 index 0000000000000000000000000000000000000000..d8e8a1e0ab805407a2bd30f25479f54aacf9c07d --- /dev/null +++ b/checkpoint-14/rng_state_14.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1a42a98d6e9069a439eef9cda0a52cc2ac122e55f91e32eb72a4e30558b6acd +size 16404 diff --git a/checkpoint-14/rng_state_15.pth b/checkpoint-14/rng_state_15.pth new file mode 100644 index 0000000000000000000000000000000000000000..e00d604dde499b4fb6ea658791b00326bd6237e0 --- /dev/null +++ b/checkpoint-14/rng_state_15.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e07dce0e0d78326989de5cd3dedc192895656a8587e6ad91d1ff4d461f603d43 +size 16404 diff --git a/checkpoint-14/rng_state_8.pth b/checkpoint-14/rng_state_8.pth new file mode 100644 index 0000000000000000000000000000000000000000..8c4faf2292398dae843a9ec155882464dfd130fb --- /dev/null +++ b/checkpoint-14/rng_state_8.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:957a72008cbfbba80847603d45f2d4a1e3cf00847f34cd20b930e4dcb8036605 +size 16389 diff --git a/checkpoint-14/rng_state_9.pth b/checkpoint-14/rng_state_9.pth new file mode 100644 index 0000000000000000000000000000000000000000..f270ef81fd2bcb37221a5bd825506772dc7f7936 --- /dev/null +++ b/checkpoint-14/rng_state_9.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9d7b357c95be48bbf73e2713c02e279dbce1863c5fa2a748da742eb07da6941 +size 16389 diff --git a/checkpoint-14/trainer_state.json b/checkpoint-14/trainer_state.json index 4f77c8cd47b58975e4fc2300556d90007758118f..bc72987d815f3ee4b46df1bd1a0e97cfd6670b2f 100644 --- a/checkpoint-14/trainer_state.json +++ b/checkpoint-14/trainer_state.json @@ -17,7 +17,7 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, @@ -33,7 +33,7 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 2, "train_speed(iter/s)": 0.000466 }, @@ -45,7 +45,7 @@ "kl": 9.50181856751442e-07, "learning_rate": 5e-05, "loss": -0.06604708731174469, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.13671875, "reward": 0.0006296975770965219, "reward_std": 0.07172460854053497, @@ -61,7 +61,7 @@ "kl": 1.1101365089416504e-05, "learning_rate": 6.666666666666667e-05, "loss": -0.06727766245603561, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 4, "train_speed(iter/s)": 0.000458 }, @@ -73,7 +73,7 @@ "kl": 0.00017762184143066406, "learning_rate": 8.333333333333334e-05, "loss": -0.09315311908721924, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.119140625, "reward": -0.005135859013535082, "reward_std": 0.07994875870645046, @@ -87,7 +87,7 @@ "grad_norm": 0.18263348937034607, "learning_rate": 0.0001, "loss": -0.1041698157787323, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 6, "train_speed(iter/s)": 0.000458 }, @@ -102,7 +102,7 @@ "eval_reward_std": 0.08769983053207397, "eval_rewards/CosineReward": 0.012996694073081017, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1030.1126, + "eval_runtime": 1030.1223, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 6 @@ -115,7 +115,7 @@ "kl": 0.017406463623046875, "learning_rate": 9.991540791356342e-05, "loss": -0.051375165581703186, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.1484375, "reward": 0.004909618757665157, "reward_std": 0.08167182095348835, @@ -131,7 +131,7 @@ "kl": 0.089599609375, "learning_rate": 9.966191788709716e-05, "loss": -0.05105742812156677, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 8, "train_speed(iter/s)": 0.000433 }, @@ -143,7 +143,7 @@ "kl": 0.0963134765625, "learning_rate": 9.924038765061042e-05, "loss": -0.05842069163918495, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.255859375, "reward": 0.03643610421568155, "reward_std": 0.11898956261575222, @@ -159,7 +159,7 @@ "kl": 0.1185302734375, "learning_rate": 9.865224352899119e-05, "loss": -0.06491819024085999, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 10, "train_speed(iter/s)": 0.000436 }, @@ -171,7 +171,7 @@ "kl": 0.1275634765625, "learning_rate": 9.789947561577445e-05, "loss": -0.04600231721997261, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "response_clip_ratio": 0.361328125, "reward": 0.023204635945148766, "reward_std": 0.10593634657561779, @@ -185,7 +185,7 @@ "grad_norm": 0.05781339108943939, "learning_rate": 9.698463103929542e-05, "loss": -0.05069056898355484, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "step": 12, "train_speed(iter/s)": 0.000439 }, @@ -200,7 +200,7 @@ "eval_reward_std": 0.10685288906097412, "eval_rewards/CosineReward": 0.03234308212995529, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1025.9048, + "eval_runtime": 1025.9045, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 12 @@ -213,7 +213,7 @@ "kl": 0.151123046875, "learning_rate": 9.591080534401371e-05, "loss": -0.02191038429737091, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "response_clip_ratio": 0.419921875, "reward": 0.035983758978545666, "reward_std": 0.11553369648754597, @@ -229,7 +229,7 @@ "kl": 0.169189453125, "learning_rate": 9.468163201617062e-05, "loss": -0.022672578692436218, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "step": 14, "train_speed(iter/s)": 0.000427 } diff --git a/checkpoint-14/training_args.bin b/checkpoint-14/training_args.bin index db6dc02fc187938d09e0a6626a08092ad8f42c54..642b9465d1983574a63f0a9f23bf7996bf8a7eda 100644 --- a/checkpoint-14/training_args.bin +++ b/checkpoint-14/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:09cdf21dfd9faa218b7fd99e3f3dc0ef681c4e3fd3b905e7348f5467b0198044 +oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 size 9809 diff --git a/checkpoint-16/adapter_config.json b/checkpoint-16/adapter_config.json index 9a6753254d3cb15865ffb290b9553d85b57dac9a..5601e58d3aec7aef62bafb11ddb8a04c414c4f52 100644 --- a/checkpoint-16/adapter_config.json +++ b/checkpoint-16/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ + "down_proj", + "o_proj", + "q_proj", "v_proj", "up_proj", - "k_proj", "gate_proj", - "o_proj", - "q_proj", - "down_proj" + "k_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-16/global_step16/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt b/checkpoint-16/global_step16/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..44b7d36ec018df168be75c766a96f3af5f065c4d --- /dev/null +++ b/checkpoint-16/global_step16/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80d21d24b0fe88922fc3f18405c18a83b490d19be93726030326b14fe9b482f6 +size 51616527 diff --git a/checkpoint-16/global_step16/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt b/checkpoint-16/global_step16/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..980b52a25cc083ca54af8b4ab5daa391f2a8301c --- /dev/null +++ b/checkpoint-16/global_step16/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a99fcd70e0f2f7f550f0aed30298d4693531bf5d3e0ec0fa479a058d8b1767c +size 51616015 diff --git a/checkpoint-16/global_step16/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt b/checkpoint-16/global_step16/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..60274640d55c5f49947c1b4a95e59a20a4a88221 --- /dev/null +++ b/checkpoint-16/global_step16/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a379256cb4b10e23bcffad1f3cb0d735b261dc58badcfd27d689c51d07cfcfdd +size 51616527 diff --git a/checkpoint-16/global_step16/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt b/checkpoint-16/global_step16/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..484e75a2e9db1dbc64b3c9979352854c018c7ed1 --- /dev/null +++ b/checkpoint-16/global_step16/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07d1c652578eb98592a1bde0a74ecd1f908971a6d6ac885ca23af269dfd6477e +size 51616015 diff --git a/checkpoint-16/global_step16/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt b/checkpoint-16/global_step16/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..bf0919cc030f34947c699c8cbf4903172fffad5f --- /dev/null +++ b/checkpoint-16/global_step16/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46b7c48716ee3fde3dd4f08df221732d826e2c4aaabd29ab7eb2b9969b6e3261 +size 51616527 diff --git a/checkpoint-16/global_step16/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt b/checkpoint-16/global_step16/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a1b95ef9606b7abdff582d451a3a86a533905884 --- /dev/null +++ b/checkpoint-16/global_step16/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65820d80b739ac8f786ec8f0cb3899f450e79c58f4130f2780c99dca9d333ee8 +size 51616015 diff --git a/checkpoint-16/global_step16/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt b/checkpoint-16/global_step16/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7295865f709fbc62e5f8f4c9559d1ced42baf49e --- /dev/null +++ b/checkpoint-16/global_step16/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f222774e8eca958ce318bbba303dfe93fef19a8cd79cff3f463faadec99b3be +size 51616517 diff --git a/checkpoint-16/global_step16/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt b/checkpoint-16/global_step16/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d6185c5021a3404f6e2ed8ae3f95c53220c8e905 --- /dev/null +++ b/checkpoint-16/global_step16/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2462c0c6a08ccb87b2053e05c32ae7f7dd764e4cb8ac7131b00072ac0ba77a8e +size 51616005 diff --git a/checkpoint-16/rng_state_10.pth b/checkpoint-16/rng_state_10.pth new file mode 100644 index 0000000000000000000000000000000000000000..e524d1966c75049a727e5d62674b7ca9813fb1c3 --- /dev/null +++ b/checkpoint-16/rng_state_10.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a7282fbb78475749bc9e6dc1eecee196be74034c011ea04b5b190efb7b892e5 +size 16404 diff --git a/checkpoint-16/rng_state_11.pth b/checkpoint-16/rng_state_11.pth new file mode 100644 index 0000000000000000000000000000000000000000..4901ed467cf128566fc43efb51124b8fa0a8da18 --- /dev/null +++ b/checkpoint-16/rng_state_11.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05a7dd986848fc1c3ad781427b3abf9e0ba0ebfd37c2f05d53130ccf2fa6485c +size 16468 diff --git a/checkpoint-16/rng_state_12.pth b/checkpoint-16/rng_state_12.pth new file mode 100644 index 0000000000000000000000000000000000000000..70734a98a73b760f890856b6086a675194510c22 --- /dev/null +++ b/checkpoint-16/rng_state_12.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea6b7f4d0e3529480b50a146a16b0002f0d7ef1545b978714407390c4dc0091d +size 16340 diff --git a/checkpoint-16/rng_state_13.pth b/checkpoint-16/rng_state_13.pth new file mode 100644 index 0000000000000000000000000000000000000000..9aa20a58b0e6d8b84507dfeb378032f8dc7273f7 --- /dev/null +++ b/checkpoint-16/rng_state_13.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c5d246e47a4a2f0b96a6e3c18b9c6c94dd9f63d60dbe7250cdfb00f3c948f20 +size 16468 diff --git a/checkpoint-16/rng_state_14.pth b/checkpoint-16/rng_state_14.pth new file mode 100644 index 0000000000000000000000000000000000000000..f94a101300d5740f3c55dac3f6078f4a1f88a122 --- /dev/null +++ b/checkpoint-16/rng_state_14.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12b05934ec433149be61d475392c15280cddc3b22bf3eca2674c6697f1d65ced +size 16404 diff --git a/checkpoint-16/rng_state_15.pth b/checkpoint-16/rng_state_15.pth new file mode 100644 index 0000000000000000000000000000000000000000..ffbb3cdcbd01749a9aca4a7fc9e270cfe91fa866 --- /dev/null +++ b/checkpoint-16/rng_state_15.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:100f84d88bff4ab19c0a1f40eac736de97d98f79c4a219945f22af53b3cede60 +size 16404 diff --git a/checkpoint-16/rng_state_8.pth b/checkpoint-16/rng_state_8.pth new file mode 100644 index 0000000000000000000000000000000000000000..1ea82d10bdfc288045b341c29a9a79ec1ae018e0 --- /dev/null +++ b/checkpoint-16/rng_state_8.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb182e8f05ba05311e0c2018031f593f003966db11a29ea40c0f580779a5ac1e +size 16389 diff --git a/checkpoint-16/rng_state_9.pth b/checkpoint-16/rng_state_9.pth new file mode 100644 index 0000000000000000000000000000000000000000..7c4d76246c6484ea53722ccd31797c8cd5dbf7a9 --- /dev/null +++ b/checkpoint-16/rng_state_9.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e090fafcadf722dc72e1af410ca1205b80a7169827b9a24dc500d8368335af26 +size 16389 diff --git a/checkpoint-16/trainer_state.json b/checkpoint-16/trainer_state.json index 5e83c92bde4761577fc169c57fbd06c509552cad..936f05557643f63325a6c93d12f959ac26c3cd7f 100644 --- a/checkpoint-16/trainer_state.json +++ b/checkpoint-16/trainer_state.json @@ -17,7 +17,7 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, @@ -33,7 +33,7 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 2, "train_speed(iter/s)": 0.000466 }, @@ -45,7 +45,7 @@ "kl": 9.50181856751442e-07, "learning_rate": 5e-05, "loss": -0.06604708731174469, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.13671875, "reward": 0.0006296975770965219, "reward_std": 0.07172460854053497, @@ -61,7 +61,7 @@ "kl": 1.1101365089416504e-05, "learning_rate": 6.666666666666667e-05, "loss": -0.06727766245603561, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 4, "train_speed(iter/s)": 0.000458 }, @@ -73,7 +73,7 @@ "kl": 0.00017762184143066406, "learning_rate": 8.333333333333334e-05, "loss": -0.09315311908721924, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.119140625, "reward": -0.005135859013535082, "reward_std": 0.07994875870645046, @@ -87,7 +87,7 @@ "grad_norm": 0.18263348937034607, "learning_rate": 0.0001, "loss": -0.1041698157787323, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 6, "train_speed(iter/s)": 0.000458 }, @@ -102,7 +102,7 @@ "eval_reward_std": 0.08769983053207397, "eval_rewards/CosineReward": 0.012996694073081017, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1030.1126, + "eval_runtime": 1030.1223, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 6 @@ -115,7 +115,7 @@ "kl": 0.017406463623046875, "learning_rate": 9.991540791356342e-05, "loss": -0.051375165581703186, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.1484375, "reward": 0.004909618757665157, "reward_std": 0.08167182095348835, @@ -131,7 +131,7 @@ "kl": 0.089599609375, "learning_rate": 9.966191788709716e-05, "loss": -0.05105742812156677, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 8, "train_speed(iter/s)": 0.000433 }, @@ -143,7 +143,7 @@ "kl": 0.0963134765625, "learning_rate": 9.924038765061042e-05, "loss": -0.05842069163918495, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.255859375, "reward": 0.03643610421568155, "reward_std": 0.11898956261575222, @@ -159,7 +159,7 @@ "kl": 0.1185302734375, "learning_rate": 9.865224352899119e-05, "loss": -0.06491819024085999, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 10, "train_speed(iter/s)": 0.000436 }, @@ -171,7 +171,7 @@ "kl": 0.1275634765625, "learning_rate": 9.789947561577445e-05, "loss": -0.04600231721997261, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "response_clip_ratio": 0.361328125, "reward": 0.023204635945148766, "reward_std": 0.10593634657561779, @@ -185,7 +185,7 @@ "grad_norm": 0.05781339108943939, "learning_rate": 9.698463103929542e-05, "loss": -0.05069056898355484, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "step": 12, "train_speed(iter/s)": 0.000439 }, @@ -200,7 +200,7 @@ "eval_reward_std": 0.10685288906097412, "eval_rewards/CosineReward": 0.03234308212995529, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1025.9048, + "eval_runtime": 1025.9045, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 12 @@ -213,7 +213,7 @@ "kl": 0.151123046875, "learning_rate": 9.591080534401371e-05, "loss": -0.02191038429737091, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "response_clip_ratio": 0.419921875, "reward": 0.035983758978545666, "reward_std": 0.11553369648754597, @@ -229,7 +229,7 @@ "kl": 0.169189453125, "learning_rate": 9.468163201617062e-05, "loss": -0.022672578692436218, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "step": 14, "train_speed(iter/s)": 0.000427 }, @@ -241,7 +241,7 @@ "kl": 0.166748046875, "learning_rate": 9.330127018922194e-05, "loss": -0.059799157083034515, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "response_clip_ratio": 0.4765625, "reward": 0.03584331553429365, "reward_std": 0.11829411797225475, @@ -257,7 +257,7 @@ "kl": 0.16748046875, "learning_rate": 9.177439057064683e-05, "loss": -0.06071458384394646, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "step": 16, "train_speed(iter/s)": 0.000431 } diff --git a/checkpoint-16/training_args.bin b/checkpoint-16/training_args.bin index db6dc02fc187938d09e0a6626a08092ad8f42c54..642b9465d1983574a63f0a9f23bf7996bf8a7eda 100644 --- a/checkpoint-16/training_args.bin +++ b/checkpoint-16/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:09cdf21dfd9faa218b7fd99e3f3dc0ef681c4e3fd3b905e7348f5467b0198044 +oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 size 9809 diff --git a/checkpoint-18/adapter_config.json b/checkpoint-18/adapter_config.json index 9a6753254d3cb15865ffb290b9553d85b57dac9a..5601e58d3aec7aef62bafb11ddb8a04c414c4f52 100644 --- a/checkpoint-18/adapter_config.json +++ b/checkpoint-18/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ + "down_proj", + "o_proj", + "q_proj", "v_proj", "up_proj", - "k_proj", "gate_proj", - "o_proj", - "q_proj", - "down_proj" + "k_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-18/global_step18/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt b/checkpoint-18/global_step18/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3930cfc66c3ff012c5c1496bee021d86ee0c75f4 --- /dev/null +++ b/checkpoint-18/global_step18/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55f4dc41c4d331ed963a99ba296ae4383cad5f7358fc15ef81e369d515818d39 +size 51616527 diff --git a/checkpoint-18/global_step18/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt b/checkpoint-18/global_step18/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6455f5fcdaada07c5882b8953bfd4b40b84ce548 --- /dev/null +++ b/checkpoint-18/global_step18/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bcc63c4859ef113ae379025165241e97ecc2b0dd489bf48369dc618ac527cc0 +size 51616015 diff --git a/checkpoint-18/global_step18/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt b/checkpoint-18/global_step18/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..cec76f0c611f6dc7f82cb04dfa5b045507243e3a --- /dev/null +++ b/checkpoint-18/global_step18/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cd243ed1a9777c48c66d3c7ee48bc2eb2c3fe1b0a230825ac8c8e438426d56d +size 51616527 diff --git a/checkpoint-18/global_step18/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt b/checkpoint-18/global_step18/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b17624bb461d8c17e02f61852e96f9d551357744 --- /dev/null +++ b/checkpoint-18/global_step18/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0cb1110ada292150945dbf808037c119ccbe81e5d541d19f2fb0cd231bf272e +size 51616015 diff --git a/checkpoint-18/global_step18/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt b/checkpoint-18/global_step18/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b9e15028b97010c06d1bf9753597f490074bc6a5 --- /dev/null +++ b/checkpoint-18/global_step18/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbd73186df919d979a182fecd4511c5f9ff71b7f023ba6b67687e08d1fb62e07 +size 51616527 diff --git a/checkpoint-18/global_step18/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt b/checkpoint-18/global_step18/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..39195d39f2877a65e30966269e096476dd374920 --- /dev/null +++ b/checkpoint-18/global_step18/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2957e84b748d8304d20b4d0660610da343952f817b15e83902ded3685e906f3e +size 51616015 diff --git a/checkpoint-18/global_step18/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt b/checkpoint-18/global_step18/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9b50a90e2c648811412473bf068344ad754cd139 --- /dev/null +++ b/checkpoint-18/global_step18/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8f3cf615a1e06e967b87cd403e231e0acc6fafd678aba563894d7befb2fd3e7 +size 51616517 diff --git a/checkpoint-18/global_step18/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt b/checkpoint-18/global_step18/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e6b74ada1d9c860065038aeb148a7e9e9cec2fc2 --- /dev/null +++ b/checkpoint-18/global_step18/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4346564ec1f2cfb09c46cb287c61a12c80e36bd8c448e6de70ef27d7531d8c5 +size 51616005 diff --git a/checkpoint-18/rng_state_10.pth b/checkpoint-18/rng_state_10.pth new file mode 100644 index 0000000000000000000000000000000000000000..209283f0e1df5e8cdde295283afcac55cda6f51e --- /dev/null +++ b/checkpoint-18/rng_state_10.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bfb71aca295d3a9df70251f37e7a45c547f1b0bb71504a4763383ef59aa1d6b +size 16404 diff --git a/checkpoint-18/rng_state_11.pth b/checkpoint-18/rng_state_11.pth new file mode 100644 index 0000000000000000000000000000000000000000..bdac0864892d5e50fe080c0d8e2ece1c57997d6e --- /dev/null +++ b/checkpoint-18/rng_state_11.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1dace9df7a2552713bfae9a6efe7eea551e5c913c157597d903551f0ab6f144b +size 16468 diff --git a/checkpoint-18/rng_state_12.pth b/checkpoint-18/rng_state_12.pth new file mode 100644 index 0000000000000000000000000000000000000000..b56a95d1156edc7a151f7482525828d372959f42 --- /dev/null +++ b/checkpoint-18/rng_state_12.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d94d28e5f1dbb97cb1d3904428e618e6d4ebc3b2d0c79442362add66eac5eac2 +size 16340 diff --git a/checkpoint-18/rng_state_13.pth b/checkpoint-18/rng_state_13.pth new file mode 100644 index 0000000000000000000000000000000000000000..76a309ade04bf15ef99976e2723fce12e352027e --- /dev/null +++ b/checkpoint-18/rng_state_13.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee9c622910a3d4575c1dc035e53fb35864805b6de48f9e93533b822baeeca4c3 +size 16468 diff --git a/checkpoint-18/rng_state_14.pth b/checkpoint-18/rng_state_14.pth new file mode 100644 index 0000000000000000000000000000000000000000..a0531fff4ec1b6c1e22ccb16a618f34de2117b8e --- /dev/null +++ b/checkpoint-18/rng_state_14.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5616e150c08604c90318f760cb9fa407b87e6f897ec7858e2cf4d7eead22117b +size 16404 diff --git a/checkpoint-18/rng_state_15.pth b/checkpoint-18/rng_state_15.pth new file mode 100644 index 0000000000000000000000000000000000000000..8b0526d3df4ca00a4fc35164dd2254f11585266b --- /dev/null +++ b/checkpoint-18/rng_state_15.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9692693b119ef60801ca0f7dacb1e81220c639963947ce79bc7ccb666d94e7a +size 16404 diff --git a/checkpoint-18/rng_state_8.pth b/checkpoint-18/rng_state_8.pth new file mode 100644 index 0000000000000000000000000000000000000000..63821ff56a969551aa9a27a3d8b7c05470cad8eb --- /dev/null +++ b/checkpoint-18/rng_state_8.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dae17e503c60eb95bde356a40cb5bd5a323d0e67eea1316ee1f00887ffe448dc +size 16389 diff --git a/checkpoint-18/rng_state_9.pth b/checkpoint-18/rng_state_9.pth new file mode 100644 index 0000000000000000000000000000000000000000..9d07da0cc4ba2c9573c6719ecb1f04cec8d00976 --- /dev/null +++ b/checkpoint-18/rng_state_9.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aaf8b2363d3718380c104da296ad25cefbc6b2ee372132566fb04a6575769d00 +size 16389 diff --git a/checkpoint-18/trainer_state.json b/checkpoint-18/trainer_state.json index a1c1a4655ab577f58a2feda407b25c2a37f8a847..4581e6296e397edb0e50965c1236e116214abdf4 100644 --- a/checkpoint-18/trainer_state.json +++ b/checkpoint-18/trainer_state.json @@ -17,7 +17,7 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, @@ -33,7 +33,7 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 2, "train_speed(iter/s)": 0.000466 }, @@ -45,7 +45,7 @@ "kl": 9.50181856751442e-07, "learning_rate": 5e-05, "loss": -0.06604708731174469, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.13671875, "reward": 0.0006296975770965219, "reward_std": 0.07172460854053497, @@ -61,7 +61,7 @@ "kl": 1.1101365089416504e-05, "learning_rate": 6.666666666666667e-05, "loss": -0.06727766245603561, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 4, "train_speed(iter/s)": 0.000458 }, @@ -73,7 +73,7 @@ "kl": 0.00017762184143066406, "learning_rate": 8.333333333333334e-05, "loss": -0.09315311908721924, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.119140625, "reward": -0.005135859013535082, "reward_std": 0.07994875870645046, @@ -87,7 +87,7 @@ "grad_norm": 0.18263348937034607, "learning_rate": 0.0001, "loss": -0.1041698157787323, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 6, "train_speed(iter/s)": 0.000458 }, @@ -102,7 +102,7 @@ "eval_reward_std": 0.08769983053207397, "eval_rewards/CosineReward": 0.012996694073081017, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1030.1126, + "eval_runtime": 1030.1223, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 6 @@ -115,7 +115,7 @@ "kl": 0.017406463623046875, "learning_rate": 9.991540791356342e-05, "loss": -0.051375165581703186, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.1484375, "reward": 0.004909618757665157, "reward_std": 0.08167182095348835, @@ -131,7 +131,7 @@ "kl": 0.089599609375, "learning_rate": 9.966191788709716e-05, "loss": -0.05105742812156677, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 8, "train_speed(iter/s)": 0.000433 }, @@ -143,7 +143,7 @@ "kl": 0.0963134765625, "learning_rate": 9.924038765061042e-05, "loss": -0.05842069163918495, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.255859375, "reward": 0.03643610421568155, "reward_std": 0.11898956261575222, @@ -159,7 +159,7 @@ "kl": 0.1185302734375, "learning_rate": 9.865224352899119e-05, "loss": -0.06491819024085999, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 10, "train_speed(iter/s)": 0.000436 }, @@ -171,7 +171,7 @@ "kl": 0.1275634765625, "learning_rate": 9.789947561577445e-05, "loss": -0.04600231721997261, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "response_clip_ratio": 0.361328125, "reward": 0.023204635945148766, "reward_std": 0.10593634657561779, @@ -185,7 +185,7 @@ "grad_norm": 0.05781339108943939, "learning_rate": 9.698463103929542e-05, "loss": -0.05069056898355484, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "step": 12, "train_speed(iter/s)": 0.000439 }, @@ -200,7 +200,7 @@ "eval_reward_std": 0.10685288906097412, "eval_rewards/CosineReward": 0.03234308212995529, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1025.9048, + "eval_runtime": 1025.9045, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 12 @@ -213,7 +213,7 @@ "kl": 0.151123046875, "learning_rate": 9.591080534401371e-05, "loss": -0.02191038429737091, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "response_clip_ratio": 0.419921875, "reward": 0.035983758978545666, "reward_std": 0.11553369648754597, @@ -229,7 +229,7 @@ "kl": 0.169189453125, "learning_rate": 9.468163201617062e-05, "loss": -0.022672578692436218, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "step": 14, "train_speed(iter/s)": 0.000427 }, @@ -241,7 +241,7 @@ "kl": 0.166748046875, "learning_rate": 9.330127018922194e-05, "loss": -0.059799157083034515, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "response_clip_ratio": 0.4765625, "reward": 0.03584331553429365, "reward_std": 0.11829411797225475, @@ -257,7 +257,7 @@ "kl": 0.16748046875, "learning_rate": 9.177439057064683e-05, "loss": -0.06071458384394646, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "step": 16, "train_speed(iter/s)": 0.000431 }, @@ -269,7 +269,7 @@ "kl": 0.1787109375, "learning_rate": 9.01061596377522e-05, "loss": -0.04504441097378731, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "response_clip_ratio": 0.5625, "reward": 0.027318883687257767, "reward_std": 0.10441224090754986, @@ -283,7 +283,7 @@ "grad_norm": 0.005998397711664438, "learning_rate": 8.83022221559489e-05, "loss": -0.045487549155950546, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "step": 18, "train_speed(iter/s)": 0.000432 }, diff --git a/checkpoint-18/training_args.bin b/checkpoint-18/training_args.bin index db6dc02fc187938d09e0a6626a08092ad8f42c54..642b9465d1983574a63f0a9f23bf7996bf8a7eda 100644 --- a/checkpoint-18/training_args.bin +++ b/checkpoint-18/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:09cdf21dfd9faa218b7fd99e3f3dc0ef681c4e3fd3b905e7348f5467b0198044 +oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 size 9809 diff --git a/checkpoint-2/adapter_config.json b/checkpoint-2/adapter_config.json index 9a6753254d3cb15865ffb290b9553d85b57dac9a..5601e58d3aec7aef62bafb11ddb8a04c414c4f52 100644 --- a/checkpoint-2/adapter_config.json +++ b/checkpoint-2/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ + "down_proj", + "o_proj", + "q_proj", "v_proj", "up_proj", - "k_proj", "gate_proj", - "o_proj", - "q_proj", - "down_proj" + "k_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-2/global_step2/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt b/checkpoint-2/global_step2/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..85a6105e230dff0d36a1373769f024de7fbc0f31 --- /dev/null +++ b/checkpoint-2/global_step2/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a6f24db9afffa3a6f5ebc353b22b93bcbea35336c154271b5b8706094d02184 +size 51616527 diff --git a/checkpoint-2/global_step2/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt b/checkpoint-2/global_step2/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6f065b400d18fbed6d261cef9f3cabc9647b99ce --- /dev/null +++ b/checkpoint-2/global_step2/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:439486dfe06ce8257728282413d31ae4af882e05ade604335df33244c8ae4f07 +size 51616015 diff --git a/checkpoint-2/global_step2/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt b/checkpoint-2/global_step2/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c4cea46498ae8b77636da978fe3e588a4d333c65 --- /dev/null +++ b/checkpoint-2/global_step2/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe318ca333efabd4a85db22d99ae9fdf429ed04f2e468e8dc5061bef1627db61 +size 51616527 diff --git a/checkpoint-2/global_step2/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt b/checkpoint-2/global_step2/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..75264e6ee343dcea193f4776ce4a6aee8775359b --- /dev/null +++ b/checkpoint-2/global_step2/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a4642c8053b029774958096b603b62d5b8c8aed4f3ce51ac014fb8705f6b8fd +size 51616015 diff --git a/checkpoint-2/global_step2/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt b/checkpoint-2/global_step2/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a3cee1f4180205b7be38ab8a0f9b83ab6ed6c3cd --- /dev/null +++ b/checkpoint-2/global_step2/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2706117555df58b9f403152321b571aa6c08ad84a95a6dacbb86d4979650ebb5 +size 51616527 diff --git a/checkpoint-2/global_step2/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt b/checkpoint-2/global_step2/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9063df3196aab908d30f59b91e520f998ae593a7 --- /dev/null +++ b/checkpoint-2/global_step2/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0347ec2a6931584f892e499fa9bf96754525a3d97815c16e117abbd23195901c +size 51616015 diff --git a/checkpoint-2/global_step2/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt b/checkpoint-2/global_step2/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ec665f3ed1cf7f0235e3b5e1700c9a707c9928d0 --- /dev/null +++ b/checkpoint-2/global_step2/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ca43575be21b2e2fbb69c9e02d9f1485dbedd9e8404265cba1fa29ae16a8564 +size 51616517 diff --git a/checkpoint-2/global_step2/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt b/checkpoint-2/global_step2/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6f98535c977ac619fe3529a62f8bdf8eb09d0387 --- /dev/null +++ b/checkpoint-2/global_step2/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfd0669a9bfcbe6be1244880d2abe6845ae5c3926be8c80e80e774e6fc388e90 +size 51616005 diff --git a/checkpoint-2/rng_state_10.pth b/checkpoint-2/rng_state_10.pth new file mode 100644 index 0000000000000000000000000000000000000000..79eb61395825159586072922910064d09b331123 --- /dev/null +++ b/checkpoint-2/rng_state_10.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86ce90c404b84d63828ae29ca175c450165ea842b77f85e4eebe7eaa0483bdd2 +size 16404 diff --git a/checkpoint-2/rng_state_11.pth b/checkpoint-2/rng_state_11.pth new file mode 100644 index 0000000000000000000000000000000000000000..204ef9628e235764cf0d0503994bfd8bbdc9e7b9 --- /dev/null +++ b/checkpoint-2/rng_state_11.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d297941ef98b249c91a864954e22b03ca94c417596b0b52f9f80a9a7c89cd938 +size 16468 diff --git a/checkpoint-2/rng_state_12.pth b/checkpoint-2/rng_state_12.pth new file mode 100644 index 0000000000000000000000000000000000000000..13cf71f978478808cfa2f1d5a6b3b62805753253 --- /dev/null +++ b/checkpoint-2/rng_state_12.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e4700cca18c83676b96d6a8793246671fabcda92d1fe4a3ca0b6e2a35e3cb87 +size 16340 diff --git a/checkpoint-2/rng_state_13.pth b/checkpoint-2/rng_state_13.pth new file mode 100644 index 0000000000000000000000000000000000000000..2b34ccf15883100c21022c371e83140e9cace097 --- /dev/null +++ b/checkpoint-2/rng_state_13.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0202fa703c86b65feaac739d41b503abf942533888a7cd72ae88f0b69fcb830 +size 16468 diff --git a/checkpoint-2/rng_state_14.pth b/checkpoint-2/rng_state_14.pth new file mode 100644 index 0000000000000000000000000000000000000000..907efbbe3c44c82ebcf7532470d00e706045510d --- /dev/null +++ b/checkpoint-2/rng_state_14.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:493c94db0acd6e30a9161ed3a053fde1fe7de99bffce8e0defee9a3506f4b0ab +size 16404 diff --git a/checkpoint-2/rng_state_15.pth b/checkpoint-2/rng_state_15.pth new file mode 100644 index 0000000000000000000000000000000000000000..2279bc82e3d5b9060900e8d65d47cfb959187dd5 --- /dev/null +++ b/checkpoint-2/rng_state_15.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d59d09cc122cf299b31c320dbb96c9128889ac10f0ca930f82b3b0d6847fffb4 +size 16404 diff --git a/checkpoint-2/rng_state_8.pth b/checkpoint-2/rng_state_8.pth new file mode 100644 index 0000000000000000000000000000000000000000..d69e579f099c7449edee6ca1447a1dd457de5e3a --- /dev/null +++ b/checkpoint-2/rng_state_8.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f7771fcfa6d1cf79bc519c5e51b8306ea5a4948a4705b01104cb58728551384 +size 16389 diff --git a/checkpoint-2/rng_state_9.pth b/checkpoint-2/rng_state_9.pth new file mode 100644 index 0000000000000000000000000000000000000000..f31345014040f2ad8b35d0cfe2d2873b885250a0 --- /dev/null +++ b/checkpoint-2/rng_state_9.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62e4d846b2fc99106869afcfe2dd1b8913bf0b6ff768fff19c2d8117bc06cb48 +size 16389 diff --git a/checkpoint-2/trainer_state.json b/checkpoint-2/trainer_state.json index f1eb74f328226054adf4f3fb18e6428f343d668d..cc36da2c33e4a28606796c2458a219824cd1bd43 100644 --- a/checkpoint-2/trainer_state.json +++ b/checkpoint-2/trainer_state.json @@ -17,7 +17,7 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, @@ -33,7 +33,7 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 2, "train_speed(iter/s)": 0.000466 } diff --git a/checkpoint-2/training_args.bin b/checkpoint-2/training_args.bin index db6dc02fc187938d09e0a6626a08092ad8f42c54..642b9465d1983574a63f0a9f23bf7996bf8a7eda 100644 --- a/checkpoint-2/training_args.bin +++ b/checkpoint-2/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:09cdf21dfd9faa218b7fd99e3f3dc0ef681c4e3fd3b905e7348f5467b0198044 +oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 size 9809 diff --git a/checkpoint-20/adapter_config.json b/checkpoint-20/adapter_config.json index 9a6753254d3cb15865ffb290b9553d85b57dac9a..5601e58d3aec7aef62bafb11ddb8a04c414c4f52 100644 --- a/checkpoint-20/adapter_config.json +++ b/checkpoint-20/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ + "down_proj", + "o_proj", + "q_proj", "v_proj", "up_proj", - "k_proj", "gate_proj", - "o_proj", - "q_proj", - "down_proj" + "k_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-20/global_step20/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt b/checkpoint-20/global_step20/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..113483259cb94b0464a98e7ec1140dc6bbc215dd --- /dev/null +++ b/checkpoint-20/global_step20/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a36fddadfb9525fcb4aab01dcad03f0e2c1bc5c0f85898f430748a44b7b3dc79 +size 51616527 diff --git a/checkpoint-20/global_step20/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt b/checkpoint-20/global_step20/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6b0e901d44e2ae49cd65098f31a17a7e41c1749e --- /dev/null +++ b/checkpoint-20/global_step20/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e64bb2c5976a011943b1cb7d17945b701ff8ecd3b3c30e04f2779bd12692ed64 +size 51616015 diff --git a/checkpoint-20/global_step20/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt b/checkpoint-20/global_step20/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..145ee9e57fbe80ad4d236a7301754a94132996fa --- /dev/null +++ b/checkpoint-20/global_step20/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ee63dd5c15b3f015f22eed5faf73d464d2ff1093559b4e1753e5519d9335ce4 +size 51616527 diff --git a/checkpoint-20/global_step20/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt b/checkpoint-20/global_step20/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..bac5dcbf409464524b10a2fdf3a30d2ae8494907 --- /dev/null +++ b/checkpoint-20/global_step20/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0620ac2bd3cf4caae6c60f340ff92514189e90a46abe5db566e8604f5c55acc +size 51616015 diff --git a/checkpoint-20/global_step20/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt b/checkpoint-20/global_step20/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..88a31dc01b056e67674d0b9e0745a4c7064ba23d --- /dev/null +++ b/checkpoint-20/global_step20/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b118e902a2102dfd2ac07f3c40ed8e33f0b6eee7d1ecbc0fe42a92f9aed2d3bd +size 51616527 diff --git a/checkpoint-20/global_step20/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt b/checkpoint-20/global_step20/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..82c280aa69b130758b07ea55be589431bbb467de --- /dev/null +++ b/checkpoint-20/global_step20/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a102cf26daf24dc29f169c043664d08c59726f0905a5c4d707a2e7c7980b8e88 +size 51616015 diff --git a/checkpoint-20/global_step20/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt b/checkpoint-20/global_step20/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..32d5e1f29f2c7ef852ff1f442ca50c150367731b --- /dev/null +++ b/checkpoint-20/global_step20/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:003e31aee9cc09916013f5fd7a40b4684b14d56a823910a481c6fdfbb49efac5 +size 51616517 diff --git a/checkpoint-20/global_step20/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt b/checkpoint-20/global_step20/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1bf2f656622bf52957761f13b54763318f2d4971 --- /dev/null +++ b/checkpoint-20/global_step20/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b81a4e712cca2ebe30803b1adc2475302dd52e71c9454fc7d38b3dab2a885da +size 51616005 diff --git a/checkpoint-20/rng_state_10.pth b/checkpoint-20/rng_state_10.pth new file mode 100644 index 0000000000000000000000000000000000000000..e27b6b9bbeae4584cfaecbceb879bb7f4af797a7 --- /dev/null +++ b/checkpoint-20/rng_state_10.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b29b59930055a530066973301f5ebcb844cd5d4dc9df889ccbfd8821e817386 +size 16404 diff --git a/checkpoint-20/rng_state_11.pth b/checkpoint-20/rng_state_11.pth new file mode 100644 index 0000000000000000000000000000000000000000..a0cac0507edc85b0dff4ed1c92e6bafd204269c8 --- /dev/null +++ b/checkpoint-20/rng_state_11.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc7daea1ba20ad39502481106da1ec63ee160a04260ac50445f7a857fba5b7a7 +size 16468 diff --git a/checkpoint-20/rng_state_12.pth b/checkpoint-20/rng_state_12.pth new file mode 100644 index 0000000000000000000000000000000000000000..eb18debb1b7dbc8d94aa9d4f82c77cbe54b847ef --- /dev/null +++ b/checkpoint-20/rng_state_12.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed06a88bbb6aa32421136e2000ab2ef6fece4673618e93e18201c8ad99aceba2 +size 16340 diff --git a/checkpoint-20/rng_state_13.pth b/checkpoint-20/rng_state_13.pth new file mode 100644 index 0000000000000000000000000000000000000000..638ad937630fb58f8c3ab4303bfb46520855cc21 --- /dev/null +++ b/checkpoint-20/rng_state_13.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b79ff6ef24f95375adaf58970325515bb63612812ba39bd84fbc8307946d83af +size 16468 diff --git a/checkpoint-20/rng_state_14.pth b/checkpoint-20/rng_state_14.pth new file mode 100644 index 0000000000000000000000000000000000000000..263df9ac34073f52be91614c2d587b84e0eec745 --- /dev/null +++ b/checkpoint-20/rng_state_14.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aac4471d70657cbd88f3d6b6e5695f0f52560e69ef23995b4266f4f4a482dbfc +size 16404 diff --git a/checkpoint-20/rng_state_15.pth b/checkpoint-20/rng_state_15.pth new file mode 100644 index 0000000000000000000000000000000000000000..0a4f533c70baf83702ce4244c8131d6aab6fe189 --- /dev/null +++ b/checkpoint-20/rng_state_15.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c9e1dd709bd69254d5f82f68271e14e26c80047ee3fa105924df7e72b301036 +size 16404 diff --git a/checkpoint-20/rng_state_8.pth b/checkpoint-20/rng_state_8.pth new file mode 100644 index 0000000000000000000000000000000000000000..a7335c1f15987f80016c6d7a8c6233d04708453e --- /dev/null +++ b/checkpoint-20/rng_state_8.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23dfeead79d052ac0e398ee9b8819767fea6d556f8975463365e0af6385f396f +size 16389 diff --git a/checkpoint-20/rng_state_9.pth b/checkpoint-20/rng_state_9.pth new file mode 100644 index 0000000000000000000000000000000000000000..0df9d1e97e9ef94ab1d3caf9d7cf4daa5643fc69 --- /dev/null +++ b/checkpoint-20/rng_state_9.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad6da5c2e12c6524ae71c8427679a3bfc2dca92367dd660fffc98ceb2ae303f9 +size 16389 diff --git a/checkpoint-20/trainer_state.json b/checkpoint-20/trainer_state.json index 799cd565c5d6a9d014372dae066e8e90a4c1adf3..a6123962584e4b002aab697407126e40d2fc1302 100644 --- a/checkpoint-20/trainer_state.json +++ b/checkpoint-20/trainer_state.json @@ -17,7 +17,7 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, @@ -33,7 +33,7 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 2, "train_speed(iter/s)": 0.000466 }, @@ -45,7 +45,7 @@ "kl": 9.50181856751442e-07, "learning_rate": 5e-05, "loss": -0.06604708731174469, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.13671875, "reward": 0.0006296975770965219, "reward_std": 0.07172460854053497, @@ -61,7 +61,7 @@ "kl": 1.1101365089416504e-05, "learning_rate": 6.666666666666667e-05, "loss": -0.06727766245603561, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 4, "train_speed(iter/s)": 0.000458 }, @@ -73,7 +73,7 @@ "kl": 0.00017762184143066406, "learning_rate": 8.333333333333334e-05, "loss": -0.09315311908721924, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.119140625, "reward": -0.005135859013535082, "reward_std": 0.07994875870645046, @@ -87,7 +87,7 @@ "grad_norm": 0.18263348937034607, "learning_rate": 0.0001, "loss": -0.1041698157787323, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 6, "train_speed(iter/s)": 0.000458 }, @@ -102,7 +102,7 @@ "eval_reward_std": 0.08769983053207397, "eval_rewards/CosineReward": 0.012996694073081017, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1030.1126, + "eval_runtime": 1030.1223, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 6 @@ -115,7 +115,7 @@ "kl": 0.017406463623046875, "learning_rate": 9.991540791356342e-05, "loss": -0.051375165581703186, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.1484375, "reward": 0.004909618757665157, "reward_std": 0.08167182095348835, @@ -131,7 +131,7 @@ "kl": 0.089599609375, "learning_rate": 9.966191788709716e-05, "loss": -0.05105742812156677, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 8, "train_speed(iter/s)": 0.000433 }, @@ -143,7 +143,7 @@ "kl": 0.0963134765625, "learning_rate": 9.924038765061042e-05, "loss": -0.05842069163918495, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.255859375, "reward": 0.03643610421568155, "reward_std": 0.11898956261575222, @@ -159,7 +159,7 @@ "kl": 0.1185302734375, "learning_rate": 9.865224352899119e-05, "loss": -0.06491819024085999, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 10, "train_speed(iter/s)": 0.000436 }, @@ -171,7 +171,7 @@ "kl": 0.1275634765625, "learning_rate": 9.789947561577445e-05, "loss": -0.04600231721997261, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "response_clip_ratio": 0.361328125, "reward": 0.023204635945148766, "reward_std": 0.10593634657561779, @@ -185,7 +185,7 @@ "grad_norm": 0.05781339108943939, "learning_rate": 9.698463103929542e-05, "loss": -0.05069056898355484, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "step": 12, "train_speed(iter/s)": 0.000439 }, @@ -200,7 +200,7 @@ "eval_reward_std": 0.10685288906097412, "eval_rewards/CosineReward": 0.03234308212995529, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1025.9048, + "eval_runtime": 1025.9045, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 12 @@ -213,7 +213,7 @@ "kl": 0.151123046875, "learning_rate": 9.591080534401371e-05, "loss": -0.02191038429737091, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "response_clip_ratio": 0.419921875, "reward": 0.035983758978545666, "reward_std": 0.11553369648754597, @@ -229,7 +229,7 @@ "kl": 0.169189453125, "learning_rate": 9.468163201617062e-05, "loss": -0.022672578692436218, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "step": 14, "train_speed(iter/s)": 0.000427 }, @@ -241,7 +241,7 @@ "kl": 0.166748046875, "learning_rate": 9.330127018922194e-05, "loss": -0.059799157083034515, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "response_clip_ratio": 0.4765625, "reward": 0.03584331553429365, "reward_std": 0.11829411797225475, @@ -257,7 +257,7 @@ "kl": 0.16748046875, "learning_rate": 9.177439057064683e-05, "loss": -0.06071458384394646, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "step": 16, "train_speed(iter/s)": 0.000431 }, @@ -269,7 +269,7 @@ "kl": 0.1787109375, "learning_rate": 9.01061596377522e-05, "loss": -0.04504441097378731, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "response_clip_ratio": 0.5625, "reward": 0.027318883687257767, "reward_std": 0.10441224090754986, @@ -283,7 +283,7 @@ "grad_norm": 0.005998397711664438, "learning_rate": 8.83022221559489e-05, "loss": -0.045487549155950546, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "step": 18, "train_speed(iter/s)": 0.000432 }, @@ -311,7 +311,7 @@ "kl": 0.1820068359375, "learning_rate": 8.636868207865244e-05, "loss": -0.03466903418302536, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "response_clip_ratio": 0.466796875, "reward": 0.04069916973821819, "reward_std": 0.11991005763411522, @@ -327,7 +327,7 @@ "kl": 0.19287109375, "learning_rate": 8.43120818934367e-05, "loss": -0.03502114117145538, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "step": 20, "train_speed(iter/s)": 0.000424 } diff --git a/checkpoint-20/training_args.bin b/checkpoint-20/training_args.bin index db6dc02fc187938d09e0a6626a08092ad8f42c54..642b9465d1983574a63f0a9f23bf7996bf8a7eda 100644 --- a/checkpoint-20/training_args.bin +++ b/checkpoint-20/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:09cdf21dfd9faa218b7fd99e3f3dc0ef681c4e3fd3b905e7348f5467b0198044 +oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 size 9809 diff --git a/checkpoint-22/adapter_config.json b/checkpoint-22/adapter_config.json index 9a6753254d3cb15865ffb290b9553d85b57dac9a..5601e58d3aec7aef62bafb11ddb8a04c414c4f52 100644 --- a/checkpoint-22/adapter_config.json +++ b/checkpoint-22/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ + "down_proj", + "o_proj", + "q_proj", "v_proj", "up_proj", - "k_proj", "gate_proj", - "o_proj", - "q_proj", - "down_proj" + "k_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-22/global_step22/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt b/checkpoint-22/global_step22/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7d26252d5723cf9a8cb8cddc63910ee12dc619cb --- /dev/null +++ b/checkpoint-22/global_step22/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:598400952cd714c45c68cc5a4e9f97141628004462e11f8d536d6b69fb20a51c +size 51616527 diff --git a/checkpoint-22/global_step22/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt b/checkpoint-22/global_step22/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e2a7619a3266c61ab86b1239904e058242de9309 --- /dev/null +++ b/checkpoint-22/global_step22/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:881d0d2a81b552888fbea794ae31fc78407591c8daef4067af10304bdedbaaf1 +size 51616015 diff --git a/checkpoint-22/global_step22/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt b/checkpoint-22/global_step22/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..341efb419b32d4bbb323a861b66e8bbeb23143b4 --- /dev/null +++ b/checkpoint-22/global_step22/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d05e4e37de4772ed3b045430a3df08016962d4ca375a4fa7be8c8f3a966dc738 +size 51616527 diff --git a/checkpoint-22/global_step22/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt b/checkpoint-22/global_step22/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5122d03a8d3238ad9cff15df91e8c5d49d328734 --- /dev/null +++ b/checkpoint-22/global_step22/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a34779680a7de11bed4f7d850facf3ef2ace19ed73e244373aae728e6c708b1 +size 51616015 diff --git a/checkpoint-22/global_step22/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt b/checkpoint-22/global_step22/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..88eef4784fc5e7ad73eb4d0120d92dad02ab171b --- /dev/null +++ b/checkpoint-22/global_step22/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb47b8dae4985d33a26741990e2d27bfa3dfa3c74e0bd9c7cfd282e7073a4078 +size 51616527 diff --git a/checkpoint-22/global_step22/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt b/checkpoint-22/global_step22/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6d4135a28a2f1cebb6d9b8aa4abd1b68c08b8fe5 --- /dev/null +++ b/checkpoint-22/global_step22/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aefd645c321e84db6c9fd0e6d29dbe00f79501e7218eaa0cf6f18dd9366d7dea +size 51616015 diff --git a/checkpoint-22/global_step22/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt b/checkpoint-22/global_step22/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0b64f0b4936d1aa7ffdb7c1a47731a4ad3b44b8e --- /dev/null +++ b/checkpoint-22/global_step22/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f6c6afb08e7c8ab07046af049aa11e729ab44593a6a581e96ba568368476f94 +size 51616517 diff --git a/checkpoint-22/global_step22/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt b/checkpoint-22/global_step22/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..98d41fa9875b1931eeac14af070c8970ad4e5152 --- /dev/null +++ b/checkpoint-22/global_step22/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c40fa389ac44eb7a34557ddaebdce0dde77f836859db81e80af161b2b86633d3 +size 51616005 diff --git a/checkpoint-22/rng_state_10.pth b/checkpoint-22/rng_state_10.pth new file mode 100644 index 0000000000000000000000000000000000000000..d67608decfe7265362eb4990dcffbffd9f62d263 --- /dev/null +++ b/checkpoint-22/rng_state_10.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47760cdc12259d9a50b6d849b64414d6f5c16a07e63b59877b8138568240b24b +size 16404 diff --git a/checkpoint-22/rng_state_11.pth b/checkpoint-22/rng_state_11.pth new file mode 100644 index 0000000000000000000000000000000000000000..e55ea0ecc0794e305fda9efbd9f9366f38247330 --- /dev/null +++ b/checkpoint-22/rng_state_11.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b66e8b731bab3e6000e7260135c203fbe440c40d6e1f366e50fbfc21f298307 +size 16468 diff --git a/checkpoint-22/rng_state_12.pth b/checkpoint-22/rng_state_12.pth new file mode 100644 index 0000000000000000000000000000000000000000..29d36b908257f21563ae69bd3893f1fc7d0fb12e --- /dev/null +++ b/checkpoint-22/rng_state_12.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2fae6ec24a2dccce53636e144fa34230fc93ef0d89931364e3bfac8aad19679 +size 16340 diff --git a/checkpoint-22/rng_state_13.pth b/checkpoint-22/rng_state_13.pth new file mode 100644 index 0000000000000000000000000000000000000000..a5f0c977c02620aeeb3a7b15e20a6a37d94fcb1a --- /dev/null +++ b/checkpoint-22/rng_state_13.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d026438dc5d00bd58a524c73cfa2f336efb9a4fa4eabe49c7dbb05a89d3721f +size 16468 diff --git a/checkpoint-22/rng_state_14.pth b/checkpoint-22/rng_state_14.pth new file mode 100644 index 0000000000000000000000000000000000000000..f81ecc8d026c7e111faf639edc1cb0ba141f99dd --- /dev/null +++ b/checkpoint-22/rng_state_14.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:405804a53b2377b9198bf0cf82c55921de8b7f8bd3c695f89b3d2a87d7ee61d2 +size 16404 diff --git a/checkpoint-22/rng_state_15.pth b/checkpoint-22/rng_state_15.pth new file mode 100644 index 0000000000000000000000000000000000000000..f3a5ddd002d78ccaa8b81f1a847d8eab3fd6da94 --- /dev/null +++ b/checkpoint-22/rng_state_15.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cc7dcdb2e8252f781d4bef64b518ed9c6c435b93b5f61ce1d4a59be094e7406 +size 16404 diff --git a/checkpoint-22/rng_state_8.pth b/checkpoint-22/rng_state_8.pth new file mode 100644 index 0000000000000000000000000000000000000000..6057aa4eb35a20a9877dbdb7016819e59eea35d0 --- /dev/null +++ b/checkpoint-22/rng_state_8.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d39811f76163f8559c0206c710516e05bda903dc17e1a1045ea3ac8a7d0bda4 +size 16389 diff --git a/checkpoint-22/rng_state_9.pth b/checkpoint-22/rng_state_9.pth new file mode 100644 index 0000000000000000000000000000000000000000..38f75a4cd4b1f5b132760167e972aeb7cb694343 --- /dev/null +++ b/checkpoint-22/rng_state_9.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97bb2c6d3b38c8896aa9a0fcec9698713a2189688614b928d331d68c4a2c2dda +size 16389 diff --git a/checkpoint-22/trainer_state.json b/checkpoint-22/trainer_state.json index 38dfc6c5cc5295ebde051bb509900e4bfe319f90..00588ae573da776f25af27a488a14e7cc18fa07b 100644 --- a/checkpoint-22/trainer_state.json +++ b/checkpoint-22/trainer_state.json @@ -17,7 +17,7 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, @@ -33,7 +33,7 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 2, "train_speed(iter/s)": 0.000466 }, @@ -45,7 +45,7 @@ "kl": 9.50181856751442e-07, "learning_rate": 5e-05, "loss": -0.06604708731174469, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.13671875, "reward": 0.0006296975770965219, "reward_std": 0.07172460854053497, @@ -61,7 +61,7 @@ "kl": 1.1101365089416504e-05, "learning_rate": 6.666666666666667e-05, "loss": -0.06727766245603561, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 4, "train_speed(iter/s)": 0.000458 }, @@ -73,7 +73,7 @@ "kl": 0.00017762184143066406, "learning_rate": 8.333333333333334e-05, "loss": -0.09315311908721924, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.119140625, "reward": -0.005135859013535082, "reward_std": 0.07994875870645046, @@ -87,7 +87,7 @@ "grad_norm": 0.18263348937034607, "learning_rate": 0.0001, "loss": -0.1041698157787323, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 6, "train_speed(iter/s)": 0.000458 }, @@ -102,7 +102,7 @@ "eval_reward_std": 0.08769983053207397, "eval_rewards/CosineReward": 0.012996694073081017, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1030.1126, + "eval_runtime": 1030.1223, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 6 @@ -115,7 +115,7 @@ "kl": 0.017406463623046875, "learning_rate": 9.991540791356342e-05, "loss": -0.051375165581703186, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.1484375, "reward": 0.004909618757665157, "reward_std": 0.08167182095348835, @@ -131,7 +131,7 @@ "kl": 0.089599609375, "learning_rate": 9.966191788709716e-05, "loss": -0.05105742812156677, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 8, "train_speed(iter/s)": 0.000433 }, @@ -143,7 +143,7 @@ "kl": 0.0963134765625, "learning_rate": 9.924038765061042e-05, "loss": -0.05842069163918495, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.255859375, "reward": 0.03643610421568155, "reward_std": 0.11898956261575222, @@ -159,7 +159,7 @@ "kl": 0.1185302734375, "learning_rate": 9.865224352899119e-05, "loss": -0.06491819024085999, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 10, "train_speed(iter/s)": 0.000436 }, @@ -171,7 +171,7 @@ "kl": 0.1275634765625, "learning_rate": 9.789947561577445e-05, "loss": -0.04600231721997261, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "response_clip_ratio": 0.361328125, "reward": 0.023204635945148766, "reward_std": 0.10593634657561779, @@ -185,7 +185,7 @@ "grad_norm": 0.05781339108943939, "learning_rate": 9.698463103929542e-05, "loss": -0.05069056898355484, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "step": 12, "train_speed(iter/s)": 0.000439 }, @@ -200,7 +200,7 @@ "eval_reward_std": 0.10685288906097412, "eval_rewards/CosineReward": 0.03234308212995529, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1025.9048, + "eval_runtime": 1025.9045, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 12 @@ -213,7 +213,7 @@ "kl": 0.151123046875, "learning_rate": 9.591080534401371e-05, "loss": -0.02191038429737091, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "response_clip_ratio": 0.419921875, "reward": 0.035983758978545666, "reward_std": 0.11553369648754597, @@ -229,7 +229,7 @@ "kl": 0.169189453125, "learning_rate": 9.468163201617062e-05, "loss": -0.022672578692436218, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "step": 14, "train_speed(iter/s)": 0.000427 }, @@ -241,7 +241,7 @@ "kl": 0.166748046875, "learning_rate": 9.330127018922194e-05, "loss": -0.059799157083034515, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "response_clip_ratio": 0.4765625, "reward": 0.03584331553429365, "reward_std": 0.11829411797225475, @@ -257,7 +257,7 @@ "kl": 0.16748046875, "learning_rate": 9.177439057064683e-05, "loss": -0.06071458384394646, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "step": 16, "train_speed(iter/s)": 0.000431 }, @@ -269,7 +269,7 @@ "kl": 0.1787109375, "learning_rate": 9.01061596377522e-05, "loss": -0.04504441097378731, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "response_clip_ratio": 0.5625, "reward": 0.027318883687257767, "reward_std": 0.10441224090754986, @@ -283,7 +283,7 @@ "grad_norm": 0.005998397711664438, "learning_rate": 8.83022221559489e-05, "loss": -0.045487549155950546, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "step": 18, "train_speed(iter/s)": 0.000432 }, @@ -311,7 +311,7 @@ "kl": 0.1820068359375, "learning_rate": 8.636868207865244e-05, "loss": -0.03466903418302536, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "response_clip_ratio": 0.466796875, "reward": 0.04069916973821819, "reward_std": 0.11991005763411522, @@ -327,7 +327,7 @@ "kl": 0.19287109375, "learning_rate": 8.43120818934367e-05, "loss": -0.03502114117145538, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "step": 20, "train_speed(iter/s)": 0.000424 }, @@ -339,7 +339,7 @@ "kl": 0.17626953125, "learning_rate": 8.213938048432697e-05, "loss": -0.008662773296236992, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "response_clip_ratio": 0.5625, "reward": 0.04996980866417289, "reward_std": 0.13849420100450516, @@ -355,7 +355,7 @@ "kl": 0.178955078125, "learning_rate": 7.985792958513931e-05, "loss": -0.008743642829358578, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "step": 22, "train_speed(iter/s)": 0.000426 } diff --git a/checkpoint-22/training_args.bin b/checkpoint-22/training_args.bin index db6dc02fc187938d09e0a6626a08092ad8f42c54..642b9465d1983574a63f0a9f23bf7996bf8a7eda 100644 --- a/checkpoint-22/training_args.bin +++ b/checkpoint-22/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:09cdf21dfd9faa218b7fd99e3f3dc0ef681c4e3fd3b905e7348f5467b0198044 +oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 size 9809 diff --git a/checkpoint-24/adapter_config.json b/checkpoint-24/adapter_config.json index 9a6753254d3cb15865ffb290b9553d85b57dac9a..5601e58d3aec7aef62bafb11ddb8a04c414c4f52 100644 --- a/checkpoint-24/adapter_config.json +++ b/checkpoint-24/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ + "down_proj", + "o_proj", + "q_proj", "v_proj", "up_proj", - "k_proj", "gate_proj", - "o_proj", - "q_proj", - "down_proj" + "k_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-24/global_step24/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt b/checkpoint-24/global_step24/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..bd6f6e6677ff8863c2da8f51ef9858e0023d40e7 --- /dev/null +++ b/checkpoint-24/global_step24/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d0a53e02b6fd09ff0376fe2c3080e926bdb877d9d049afb30cee2b0db6cce10 +size 51616527 diff --git a/checkpoint-24/global_step24/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt b/checkpoint-24/global_step24/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ff977acaa4315d11e4dcfc833dcae228727033bd --- /dev/null +++ b/checkpoint-24/global_step24/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4de17a0d706af91ce6f334f9246f2f05d0f74d1e8ff5979742f76213c87b902e +size 51616015 diff --git a/checkpoint-24/global_step24/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt b/checkpoint-24/global_step24/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..84ba7ff8a8b593f31e8dd4b755f00947faba841c --- /dev/null +++ b/checkpoint-24/global_step24/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37492a52fb884443fa1762c1632e818599b98e1c74438447b4aa4aa86ab27f69 +size 51616527 diff --git a/checkpoint-24/global_step24/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt b/checkpoint-24/global_step24/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..09bd9c478c8dc9cdc2fb30d5b1788d10008eb217 --- /dev/null +++ b/checkpoint-24/global_step24/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adc1cca7d5bb4cb8c85df6be324d50f70f10b6b83774bb33ab9a8c865137fe6e +size 51616015 diff --git a/checkpoint-24/global_step24/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt b/checkpoint-24/global_step24/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e14ebc4813eda204d9c83fb902ac8951572078b5 --- /dev/null +++ b/checkpoint-24/global_step24/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6e98dca5ae34fc8d1b4b671dbc3e9572bd248b572644219841f022797860cff +size 51616527 diff --git a/checkpoint-24/global_step24/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt b/checkpoint-24/global_step24/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..97e35cff069e4d7e477ef60b448d66d80134a1c8 --- /dev/null +++ b/checkpoint-24/global_step24/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:048dc3d647af507c05df9ba1fbdc7c501404a2c4413bde555bcca1e698683f9b +size 51616015 diff --git a/checkpoint-24/global_step24/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt b/checkpoint-24/global_step24/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7b41eac0fe91ab8f6bd726d44f97fe643dffff6a --- /dev/null +++ b/checkpoint-24/global_step24/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ac5b57d777fb9ebdff01b96bacf555e37c684c9a901d64fe6d75c833494c189 +size 51616517 diff --git a/checkpoint-24/global_step24/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt b/checkpoint-24/global_step24/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a3d948de39324608ece11fc1eb57d3852182e112 --- /dev/null +++ b/checkpoint-24/global_step24/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0b91818e585f869498ba39bdf6c356c5c8adb03a9f72a10b730f2fbf88dbca8 +size 51616005 diff --git a/checkpoint-24/rng_state_10.pth b/checkpoint-24/rng_state_10.pth new file mode 100644 index 0000000000000000000000000000000000000000..36d5f45e52cecf5e404a8d727844f2ddd96e6ef8 --- /dev/null +++ b/checkpoint-24/rng_state_10.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ecb3e191c5df5c665493855cffd2d1e5ff4c96d7ea819f02635f247672a6d36 +size 16404 diff --git a/checkpoint-24/rng_state_11.pth b/checkpoint-24/rng_state_11.pth new file mode 100644 index 0000000000000000000000000000000000000000..72f1d6b92699309ebd52f4b186c143fbd8adc908 --- /dev/null +++ b/checkpoint-24/rng_state_11.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5445cd82e83e29c316a1613cc7e3cee7bef6a3c95344a373f18eb3257282771 +size 16468 diff --git a/checkpoint-24/rng_state_12.pth b/checkpoint-24/rng_state_12.pth new file mode 100644 index 0000000000000000000000000000000000000000..8dc950567300b56a4900b8edcd84768d1638a7c0 --- /dev/null +++ b/checkpoint-24/rng_state_12.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4094115b1e0154ab334fba6d1143ef9a5d453b6a7863a70c5e502841fec9227e +size 16340 diff --git a/checkpoint-24/rng_state_13.pth b/checkpoint-24/rng_state_13.pth new file mode 100644 index 0000000000000000000000000000000000000000..5453f47b49ee7714b2ba003c7dfb1b0aa0b89512 --- /dev/null +++ b/checkpoint-24/rng_state_13.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3893bdf79eb8d4704b86a0de6c1f7301213e94ff170eeddb570d9e20c2b75764 +size 16468 diff --git a/checkpoint-24/rng_state_14.pth b/checkpoint-24/rng_state_14.pth new file mode 100644 index 0000000000000000000000000000000000000000..3db96a7a406bb99c9d54f942d346cd114d286655 --- /dev/null +++ b/checkpoint-24/rng_state_14.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65be2da1150bacb8df13fcbf40b89d95f8348ed267e01a7c0e9ea956c801db99 +size 16404 diff --git a/checkpoint-24/rng_state_15.pth b/checkpoint-24/rng_state_15.pth new file mode 100644 index 0000000000000000000000000000000000000000..04b60618d669086f58a35792f6a63d248e252e7b --- /dev/null +++ b/checkpoint-24/rng_state_15.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6308a9b4bea121ff2f1d738b5da40dcd631d79afce628d8c5ebe389faf77410 +size 16404 diff --git a/checkpoint-24/rng_state_8.pth b/checkpoint-24/rng_state_8.pth new file mode 100644 index 0000000000000000000000000000000000000000..9f37e8e3121290a83a3aeba49f0f5ab0e445fce3 --- /dev/null +++ b/checkpoint-24/rng_state_8.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d950b3e7aabec14a6e57b00778d6817ef6353580d1e4a6d3fec790e9f0779d4 +size 16389 diff --git a/checkpoint-24/rng_state_9.pth b/checkpoint-24/rng_state_9.pth new file mode 100644 index 0000000000000000000000000000000000000000..34f03676dcf1d8a22a427ed32c8c6c2f5f26318c --- /dev/null +++ b/checkpoint-24/rng_state_9.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:121cec3d0f6e7624d27f7065afac795ed55da54583f7948ce93e793894bf1dfe +size 16389 diff --git a/checkpoint-24/trainer_state.json b/checkpoint-24/trainer_state.json index 6882365a9f6cb165794d215b4d3e0b03b2c144aa..71fe7f2e41930e700b7ad6623db68a42930e3853 100644 --- a/checkpoint-24/trainer_state.json +++ b/checkpoint-24/trainer_state.json @@ -17,7 +17,7 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, @@ -33,7 +33,7 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 2, "train_speed(iter/s)": 0.000466 }, @@ -45,7 +45,7 @@ "kl": 9.50181856751442e-07, "learning_rate": 5e-05, "loss": -0.06604708731174469, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.13671875, "reward": 0.0006296975770965219, "reward_std": 0.07172460854053497, @@ -61,7 +61,7 @@ "kl": 1.1101365089416504e-05, "learning_rate": 6.666666666666667e-05, "loss": -0.06727766245603561, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 4, "train_speed(iter/s)": 0.000458 }, @@ -73,7 +73,7 @@ "kl": 0.00017762184143066406, "learning_rate": 8.333333333333334e-05, "loss": -0.09315311908721924, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.119140625, "reward": -0.005135859013535082, "reward_std": 0.07994875870645046, @@ -87,7 +87,7 @@ "grad_norm": 0.18263348937034607, "learning_rate": 0.0001, "loss": -0.1041698157787323, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 6, "train_speed(iter/s)": 0.000458 }, @@ -102,7 +102,7 @@ "eval_reward_std": 0.08769983053207397, "eval_rewards/CosineReward": 0.012996694073081017, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1030.1126, + "eval_runtime": 1030.1223, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 6 @@ -115,7 +115,7 @@ "kl": 0.017406463623046875, "learning_rate": 9.991540791356342e-05, "loss": -0.051375165581703186, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.1484375, "reward": 0.004909618757665157, "reward_std": 0.08167182095348835, @@ -131,7 +131,7 @@ "kl": 0.089599609375, "learning_rate": 9.966191788709716e-05, "loss": -0.05105742812156677, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 8, "train_speed(iter/s)": 0.000433 }, @@ -143,7 +143,7 @@ "kl": 0.0963134765625, "learning_rate": 9.924038765061042e-05, "loss": -0.05842069163918495, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.255859375, "reward": 0.03643610421568155, "reward_std": 0.11898956261575222, @@ -159,7 +159,7 @@ "kl": 0.1185302734375, "learning_rate": 9.865224352899119e-05, "loss": -0.06491819024085999, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 10, "train_speed(iter/s)": 0.000436 }, @@ -171,7 +171,7 @@ "kl": 0.1275634765625, "learning_rate": 9.789947561577445e-05, "loss": -0.04600231721997261, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "response_clip_ratio": 0.361328125, "reward": 0.023204635945148766, "reward_std": 0.10593634657561779, @@ -185,7 +185,7 @@ "grad_norm": 0.05781339108943939, "learning_rate": 9.698463103929542e-05, "loss": -0.05069056898355484, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "step": 12, "train_speed(iter/s)": 0.000439 }, @@ -200,7 +200,7 @@ "eval_reward_std": 0.10685288906097412, "eval_rewards/CosineReward": 0.03234308212995529, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1025.9048, + "eval_runtime": 1025.9045, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 12 @@ -213,7 +213,7 @@ "kl": 0.151123046875, "learning_rate": 9.591080534401371e-05, "loss": -0.02191038429737091, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "response_clip_ratio": 0.419921875, "reward": 0.035983758978545666, "reward_std": 0.11553369648754597, @@ -229,7 +229,7 @@ "kl": 0.169189453125, "learning_rate": 9.468163201617062e-05, "loss": -0.022672578692436218, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "step": 14, "train_speed(iter/s)": 0.000427 }, @@ -241,7 +241,7 @@ "kl": 0.166748046875, "learning_rate": 9.330127018922194e-05, "loss": -0.059799157083034515, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "response_clip_ratio": 0.4765625, "reward": 0.03584331553429365, "reward_std": 0.11829411797225475, @@ -257,7 +257,7 @@ "kl": 0.16748046875, "learning_rate": 9.177439057064683e-05, "loss": -0.06071458384394646, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "step": 16, "train_speed(iter/s)": 0.000431 }, @@ -269,7 +269,7 @@ "kl": 0.1787109375, "learning_rate": 9.01061596377522e-05, "loss": -0.04504441097378731, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "response_clip_ratio": 0.5625, "reward": 0.027318883687257767, "reward_std": 0.10441224090754986, @@ -283,7 +283,7 @@ "grad_norm": 0.005998397711664438, "learning_rate": 8.83022221559489e-05, "loss": -0.045487549155950546, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "step": 18, "train_speed(iter/s)": 0.000432 }, @@ -311,7 +311,7 @@ "kl": 0.1820068359375, "learning_rate": 8.636868207865244e-05, "loss": -0.03466903418302536, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "response_clip_ratio": 0.466796875, "reward": 0.04069916973821819, "reward_std": 0.11991005763411522, @@ -327,7 +327,7 @@ "kl": 0.19287109375, "learning_rate": 8.43120818934367e-05, "loss": -0.03502114117145538, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "step": 20, "train_speed(iter/s)": 0.000424 }, @@ -339,7 +339,7 @@ "kl": 0.17626953125, "learning_rate": 8.213938048432697e-05, "loss": -0.008662773296236992, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "response_clip_ratio": 0.5625, "reward": 0.04996980866417289, "reward_std": 0.13849420100450516, @@ -355,7 +355,7 @@ "kl": 0.178955078125, "learning_rate": 7.985792958513931e-05, "loss": -0.008743642829358578, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "step": 22, "train_speed(iter/s)": 0.000426 }, @@ -367,7 +367,7 @@ "kl": 0.1796875, "learning_rate": 7.74754489035403e-05, "loss": -0.03423420712351799, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "response_clip_ratio": 0.583984375, "reward": 0.034468831261619925, "reward_std": 0.11841745302081108, @@ -381,7 +381,7 @@ "grad_norm": 0.014131724834442139, "learning_rate": 7.500000000000001e-05, "loss": -0.03426633030176163, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "step": 24, "train_speed(iter/s)": 0.000427 }, @@ -396,7 +396,7 @@ "eval_reward_std": 0.10456253588199615, "eval_rewards/CosineReward": 0.04339282959699631, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1045.0642, + "eval_runtime": 1045.0616, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 24 diff --git a/checkpoint-24/training_args.bin b/checkpoint-24/training_args.bin index db6dc02fc187938d09e0a6626a08092ad8f42c54..642b9465d1983574a63f0a9f23bf7996bf8a7eda 100644 --- a/checkpoint-24/training_args.bin +++ b/checkpoint-24/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:09cdf21dfd9faa218b7fd99e3f3dc0ef681c4e3fd3b905e7348f5467b0198044 +oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 size 9809 diff --git a/checkpoint-26/adapter_config.json b/checkpoint-26/adapter_config.json index 9a6753254d3cb15865ffb290b9553d85b57dac9a..5601e58d3aec7aef62bafb11ddb8a04c414c4f52 100644 --- a/checkpoint-26/adapter_config.json +++ b/checkpoint-26/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ + "down_proj", + "o_proj", + "q_proj", "v_proj", "up_proj", - "k_proj", "gate_proj", - "o_proj", - "q_proj", - "down_proj" + "k_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-26/global_step26/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt b/checkpoint-26/global_step26/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7ff60373c9ecb6e61de446bd1ab13472abe3044c --- /dev/null +++ b/checkpoint-26/global_step26/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:801a85a0e1476cd19e88e3843df65652db5a50e4e871a195f31a756242c5e3d6 +size 51616527 diff --git a/checkpoint-26/global_step26/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt b/checkpoint-26/global_step26/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..38f5eb0b300002458422fa7be09c9ba088dbc5ed --- /dev/null +++ b/checkpoint-26/global_step26/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e4b688b0874d39ab6ceb12e9e8c1ea649505f46e490ab70709c23901ffbfb67 +size 51616015 diff --git a/checkpoint-26/global_step26/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt b/checkpoint-26/global_step26/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d3964ac47f4518d10592573f160b540f96291c72 --- /dev/null +++ b/checkpoint-26/global_step26/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:400fd6e82908e7a675fd8524e2e05e339bfb009785da636e07a164da6e2b9ecf +size 51616527 diff --git a/checkpoint-26/global_step26/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt b/checkpoint-26/global_step26/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c2ac57b44d1da7323f0b6d2cb3afdfa6a8f0def4 --- /dev/null +++ b/checkpoint-26/global_step26/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af89265315a202ad68814efeba7050be8534ba627f1c444cae700d49805f9b90 +size 51616015 diff --git a/checkpoint-26/global_step26/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt b/checkpoint-26/global_step26/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6d5224d2a03d8f85e08bc5ceaffcaf166f4049b0 --- /dev/null +++ b/checkpoint-26/global_step26/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bb353d11a0b6d3045ca36e014a82c2e40a245d5bd0d6ce8c23854d0a93decdc +size 51616527 diff --git a/checkpoint-26/global_step26/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt b/checkpoint-26/global_step26/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8572a53142c8364c82f0ca13d2575a91be2c2121 --- /dev/null +++ b/checkpoint-26/global_step26/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb7b9038b183edbbe2ec13fc45877c065134f3925969463f0edce9618a220d7a +size 51616015 diff --git a/checkpoint-26/global_step26/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt b/checkpoint-26/global_step26/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f40961a3dac71c086cda225c007d18a257507b51 --- /dev/null +++ b/checkpoint-26/global_step26/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f7670250d48b62d7431e3894cff47ee0fdb54118d16f515a7732634aca4320e +size 51616517 diff --git a/checkpoint-26/global_step26/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt b/checkpoint-26/global_step26/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..221dac97f9c42f92454a710125af3153c38a9967 --- /dev/null +++ b/checkpoint-26/global_step26/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91ebc74bbef8430aabf14e15c97d0ad763ca3d7490c051485a055fbef359d121 +size 51616005 diff --git a/checkpoint-26/rng_state_10.pth b/checkpoint-26/rng_state_10.pth new file mode 100644 index 0000000000000000000000000000000000000000..6eae9f37f92da908ed704822627bbb0ab9585b4b --- /dev/null +++ b/checkpoint-26/rng_state_10.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42ef60f2df0efb47e44c8db0bf6d09e595d2e67a62b387f257d81f55c379e754 +size 16404 diff --git a/checkpoint-26/rng_state_11.pth b/checkpoint-26/rng_state_11.pth new file mode 100644 index 0000000000000000000000000000000000000000..e518b6de402f28e149691004c715b800ae8d6600 --- /dev/null +++ b/checkpoint-26/rng_state_11.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62d4fc99bc1f1a8eed5cd503af643590da0dcf7a0ecb92befd7b7bc70d2dceab +size 16468 diff --git a/checkpoint-26/rng_state_12.pth b/checkpoint-26/rng_state_12.pth new file mode 100644 index 0000000000000000000000000000000000000000..3558d6f961e68c17ddccb7fca269456d21ea0d23 --- /dev/null +++ b/checkpoint-26/rng_state_12.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73854faa643c5f83fd8529ae3e63479e7facca8d558fa789907190db28fd092b +size 16340 diff --git a/checkpoint-26/rng_state_13.pth b/checkpoint-26/rng_state_13.pth new file mode 100644 index 0000000000000000000000000000000000000000..98c514d922918659b36805fc3c7cbc1c86d8a3bc --- /dev/null +++ b/checkpoint-26/rng_state_13.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c02f2bc07bf041c8fb0866e5f1ac47e7eec84535026c9aa267af83b360a8a794 +size 16468 diff --git a/checkpoint-26/rng_state_14.pth b/checkpoint-26/rng_state_14.pth new file mode 100644 index 0000000000000000000000000000000000000000..5c2b052619ebe67c18bfd663d008992b78806749 --- /dev/null +++ b/checkpoint-26/rng_state_14.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1eb387ba70b002b2dfb78511d9760c88b2ccae4417230238ebb2f788bb837c0 +size 16404 diff --git a/checkpoint-26/rng_state_15.pth b/checkpoint-26/rng_state_15.pth new file mode 100644 index 0000000000000000000000000000000000000000..831594186c8df1b007a8b20dffb670fd88acd482 --- /dev/null +++ b/checkpoint-26/rng_state_15.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a8f1be958d8884e6f695f92cfb5dba7165c1bac13f541b2195cd8409e2e72c4 +size 16404 diff --git a/checkpoint-26/rng_state_8.pth b/checkpoint-26/rng_state_8.pth new file mode 100644 index 0000000000000000000000000000000000000000..70462a6b70cdffe1c76724cb24684629b6754c78 --- /dev/null +++ b/checkpoint-26/rng_state_8.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fc05a59587ec6d87c9828a86f4e87f14923d6c69c8b953fd3093827260dc788 +size 16389 diff --git a/checkpoint-26/rng_state_9.pth b/checkpoint-26/rng_state_9.pth new file mode 100644 index 0000000000000000000000000000000000000000..7308dedd63a3c7f04f3711dd51410f9a22cdcc60 --- /dev/null +++ b/checkpoint-26/rng_state_9.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efcc1f076d254c46ecb32078e55c373926a30c09d64b09ef9dccf2d77cad37da +size 16389 diff --git a/checkpoint-26/trainer_state.json b/checkpoint-26/trainer_state.json index 879395d69f8fcc796e0f0b4c11b379c5c04ad9cb..001905ec4de0241ac02cb975479cb8b9bb85f854 100644 --- a/checkpoint-26/trainer_state.json +++ b/checkpoint-26/trainer_state.json @@ -17,7 +17,7 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, @@ -33,7 +33,7 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 2, "train_speed(iter/s)": 0.000466 }, @@ -45,7 +45,7 @@ "kl": 9.50181856751442e-07, "learning_rate": 5e-05, "loss": -0.06604708731174469, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.13671875, "reward": 0.0006296975770965219, "reward_std": 0.07172460854053497, @@ -61,7 +61,7 @@ "kl": 1.1101365089416504e-05, "learning_rate": 6.666666666666667e-05, "loss": -0.06727766245603561, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 4, "train_speed(iter/s)": 0.000458 }, @@ -73,7 +73,7 @@ "kl": 0.00017762184143066406, "learning_rate": 8.333333333333334e-05, "loss": -0.09315311908721924, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.119140625, "reward": -0.005135859013535082, "reward_std": 0.07994875870645046, @@ -87,7 +87,7 @@ "grad_norm": 0.18263348937034607, "learning_rate": 0.0001, "loss": -0.1041698157787323, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 6, "train_speed(iter/s)": 0.000458 }, @@ -102,7 +102,7 @@ "eval_reward_std": 0.08769983053207397, "eval_rewards/CosineReward": 0.012996694073081017, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1030.1126, + "eval_runtime": 1030.1223, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 6 @@ -115,7 +115,7 @@ "kl": 0.017406463623046875, "learning_rate": 9.991540791356342e-05, "loss": -0.051375165581703186, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.1484375, "reward": 0.004909618757665157, "reward_std": 0.08167182095348835, @@ -131,7 +131,7 @@ "kl": 0.089599609375, "learning_rate": 9.966191788709716e-05, "loss": -0.05105742812156677, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 8, "train_speed(iter/s)": 0.000433 }, @@ -143,7 +143,7 @@ "kl": 0.0963134765625, "learning_rate": 9.924038765061042e-05, "loss": -0.05842069163918495, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.255859375, "reward": 0.03643610421568155, "reward_std": 0.11898956261575222, @@ -159,7 +159,7 @@ "kl": 0.1185302734375, "learning_rate": 9.865224352899119e-05, "loss": -0.06491819024085999, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 10, "train_speed(iter/s)": 0.000436 }, @@ -171,7 +171,7 @@ "kl": 0.1275634765625, "learning_rate": 9.789947561577445e-05, "loss": -0.04600231721997261, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "response_clip_ratio": 0.361328125, "reward": 0.023204635945148766, "reward_std": 0.10593634657561779, @@ -185,7 +185,7 @@ "grad_norm": 0.05781339108943939, "learning_rate": 9.698463103929542e-05, "loss": -0.05069056898355484, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "step": 12, "train_speed(iter/s)": 0.000439 }, @@ -200,7 +200,7 @@ "eval_reward_std": 0.10685288906097412, "eval_rewards/CosineReward": 0.03234308212995529, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1025.9048, + "eval_runtime": 1025.9045, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 12 @@ -213,7 +213,7 @@ "kl": 0.151123046875, "learning_rate": 9.591080534401371e-05, "loss": -0.02191038429737091, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "response_clip_ratio": 0.419921875, "reward": 0.035983758978545666, "reward_std": 0.11553369648754597, @@ -229,7 +229,7 @@ "kl": 0.169189453125, "learning_rate": 9.468163201617062e-05, "loss": -0.022672578692436218, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "step": 14, "train_speed(iter/s)": 0.000427 }, @@ -241,7 +241,7 @@ "kl": 0.166748046875, "learning_rate": 9.330127018922194e-05, "loss": -0.059799157083034515, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "response_clip_ratio": 0.4765625, "reward": 0.03584331553429365, "reward_std": 0.11829411797225475, @@ -257,7 +257,7 @@ "kl": 0.16748046875, "learning_rate": 9.177439057064683e-05, "loss": -0.06071458384394646, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "step": 16, "train_speed(iter/s)": 0.000431 }, @@ -269,7 +269,7 @@ "kl": 0.1787109375, "learning_rate": 9.01061596377522e-05, "loss": -0.04504441097378731, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "response_clip_ratio": 0.5625, "reward": 0.027318883687257767, "reward_std": 0.10441224090754986, @@ -283,7 +283,7 @@ "grad_norm": 0.005998397711664438, "learning_rate": 8.83022221559489e-05, "loss": -0.045487549155950546, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "step": 18, "train_speed(iter/s)": 0.000432 }, @@ -311,7 +311,7 @@ "kl": 0.1820068359375, "learning_rate": 8.636868207865244e-05, "loss": -0.03466903418302536, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "response_clip_ratio": 0.466796875, "reward": 0.04069916973821819, "reward_std": 0.11991005763411522, @@ -327,7 +327,7 @@ "kl": 0.19287109375, "learning_rate": 8.43120818934367e-05, "loss": -0.03502114117145538, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "step": 20, "train_speed(iter/s)": 0.000424 }, @@ -339,7 +339,7 @@ "kl": 0.17626953125, "learning_rate": 8.213938048432697e-05, "loss": -0.008662773296236992, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "response_clip_ratio": 0.5625, "reward": 0.04996980866417289, "reward_std": 0.13849420100450516, @@ -355,7 +355,7 @@ "kl": 0.178955078125, "learning_rate": 7.985792958513931e-05, "loss": -0.008743642829358578, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "step": 22, "train_speed(iter/s)": 0.000426 }, @@ -367,7 +367,7 @@ "kl": 0.1796875, "learning_rate": 7.74754489035403e-05, "loss": -0.03423420712351799, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "response_clip_ratio": 0.583984375, "reward": 0.034468831261619925, "reward_std": 0.11841745302081108, @@ -381,7 +381,7 @@ "grad_norm": 0.014131724834442139, "learning_rate": 7.500000000000001e-05, "loss": -0.03426633030176163, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "step": 24, "train_speed(iter/s)": 0.000427 }, @@ -396,7 +396,7 @@ "eval_reward_std": 0.10456253588199615, "eval_rewards/CosineReward": 0.04339282959699631, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1045.0642, + "eval_runtime": 1045.0616, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 24 @@ -409,7 +409,7 @@ "kl": 0.1800537109375, "learning_rate": 7.243995901002312e-05, "loss": -0.02097315341234207, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "response_clip_ratio": 0.6171875, "reward": 0.03010205877944827, "reward_std": 0.10742511600255966, @@ -425,7 +425,7 @@ "kl": 0.18408203125, "learning_rate": 6.980398830195785e-05, "loss": -0.02103913575410843, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "step": 26, "train_speed(iter/s)": 0.000421 } diff --git a/checkpoint-26/training_args.bin b/checkpoint-26/training_args.bin index db6dc02fc187938d09e0a6626a08092ad8f42c54..642b9465d1983574a63f0a9f23bf7996bf8a7eda 100644 --- a/checkpoint-26/training_args.bin +++ b/checkpoint-26/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:09cdf21dfd9faa218b7fd99e3f3dc0ef681c4e3fd3b905e7348f5467b0198044 +oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 size 9809 diff --git a/checkpoint-28/adapter_config.json b/checkpoint-28/adapter_config.json index 9a6753254d3cb15865ffb290b9553d85b57dac9a..5601e58d3aec7aef62bafb11ddb8a04c414c4f52 100644 --- a/checkpoint-28/adapter_config.json +++ b/checkpoint-28/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ + "down_proj", + "o_proj", + "q_proj", "v_proj", "up_proj", - "k_proj", "gate_proj", - "o_proj", - "q_proj", - "down_proj" + "k_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-28/global_step28/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt b/checkpoint-28/global_step28/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2d1096d7dbbd3689647bfeaa6492684176c95b67 --- /dev/null +++ b/checkpoint-28/global_step28/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:609e827c285f91861445188809c59ac249ac4b9194bd68635d596f6d1b9868bd +size 51616527 diff --git a/checkpoint-28/global_step28/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt b/checkpoint-28/global_step28/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..558c6db54c1fd44dec069bc2773062933e83e691 --- /dev/null +++ b/checkpoint-28/global_step28/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c25b33e42927a90a5dbba9d2775d981a898e05713710a1161e4120e1b2a52f56 +size 51616015 diff --git a/checkpoint-28/global_step28/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt b/checkpoint-28/global_step28/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2be0d988e7a69794a71a1018cdd0aa7c3b39eb58 --- /dev/null +++ b/checkpoint-28/global_step28/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d49afeebf83c259dcc827da3060b7c381b7fd04f528660567ef06f2a8ed7d313 +size 51616527 diff --git a/checkpoint-28/global_step28/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt b/checkpoint-28/global_step28/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c9572b36a05ed29b371d8c0210886ad28f15c6b6 --- /dev/null +++ b/checkpoint-28/global_step28/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90afc415c23e2f4fa34bb26812ba616745cc07811a54889bf05bd48a1333b517 +size 51616015 diff --git a/checkpoint-28/global_step28/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt b/checkpoint-28/global_step28/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2227a38f36e5b92e8c040d75ec4f91001bfa8664 --- /dev/null +++ b/checkpoint-28/global_step28/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d83b00fa23197d373f7eb22df63674de64d04e3b6086dba9d04a100bf7b39c89 +size 51616527 diff --git a/checkpoint-28/global_step28/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt b/checkpoint-28/global_step28/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..fefd5487f2166a1b91a42bbdf89dfdcab8b2a5d3 --- /dev/null +++ b/checkpoint-28/global_step28/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36fcb39c00a7be86d0f831adbd78503640417b51050772cace334754505a1d84 +size 51616015 diff --git a/checkpoint-28/global_step28/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt b/checkpoint-28/global_step28/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2435ffb273ee0869630031f6651ecc681e955b94 --- /dev/null +++ b/checkpoint-28/global_step28/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a6380c3d71a6eddc41b06a10c107cbf64f7b564a788af4fcf0cb576bf7ad80a +size 51616517 diff --git a/checkpoint-28/global_step28/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt b/checkpoint-28/global_step28/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9097315b87f7a66016be52f3ffab0e5da9e18a8e --- /dev/null +++ b/checkpoint-28/global_step28/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deeba0ef24e99a50eed638080550ea6d9b50e66da4e542e2e36ac685b89f946c +size 51616005 diff --git a/checkpoint-28/rng_state_10.pth b/checkpoint-28/rng_state_10.pth new file mode 100644 index 0000000000000000000000000000000000000000..14d99bdd24524aaabec756e1731c700072f32723 --- /dev/null +++ b/checkpoint-28/rng_state_10.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9ed69976527ed11a6ca880e6f48e65aef7bd9971082afe0160a456a3061df5c +size 16404 diff --git a/checkpoint-28/rng_state_11.pth b/checkpoint-28/rng_state_11.pth new file mode 100644 index 0000000000000000000000000000000000000000..f875fa0a06563167967a83649242b9c986af2c50 --- /dev/null +++ b/checkpoint-28/rng_state_11.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4127fefbe0cd10427691987e4b63e06fba9abb8409f035e2de7d7ac8bf133994 +size 16468 diff --git a/checkpoint-28/rng_state_12.pth b/checkpoint-28/rng_state_12.pth new file mode 100644 index 0000000000000000000000000000000000000000..19b57e49079ff259a6a11baeda6ad47cb3f63c50 --- /dev/null +++ b/checkpoint-28/rng_state_12.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13976411f1d6a18792e41abda38d55f1d72c3e90569ed98a22e364a39b5f0034 +size 16340 diff --git a/checkpoint-28/rng_state_13.pth b/checkpoint-28/rng_state_13.pth new file mode 100644 index 0000000000000000000000000000000000000000..64b43c58a28113c818029779bb09f6bc87390e98 --- /dev/null +++ b/checkpoint-28/rng_state_13.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36f50c3876e9d2d98728afd8de9a9002485c8cdb125800f48695da5fe3f0a34b +size 16468 diff --git a/checkpoint-28/rng_state_14.pth b/checkpoint-28/rng_state_14.pth new file mode 100644 index 0000000000000000000000000000000000000000..0917365b998f87d8615907da2ae94c97f7b361f6 --- /dev/null +++ b/checkpoint-28/rng_state_14.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b24aa0a8eafd5311a4e64c987d1683cbd4f71eabc42db0ae2ed803cca8a1ffa +size 16404 diff --git a/checkpoint-28/rng_state_15.pth b/checkpoint-28/rng_state_15.pth new file mode 100644 index 0000000000000000000000000000000000000000..6cf8fb78c58928258a9e831cb792554f0633f62f --- /dev/null +++ b/checkpoint-28/rng_state_15.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9af3a8ab0aa5cdd73d4a7a810259d1e1de1e2d9be8596edf43ca01ae436a6021 +size 16404 diff --git a/checkpoint-28/rng_state_8.pth b/checkpoint-28/rng_state_8.pth new file mode 100644 index 0000000000000000000000000000000000000000..12bab23ad94091feecca09e75c59f21670ec3aa1 --- /dev/null +++ b/checkpoint-28/rng_state_8.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af831d96982618317e7e64e158680795f25323ecdc1264cabc7aa492494c5e4b +size 16389 diff --git a/checkpoint-28/rng_state_9.pth b/checkpoint-28/rng_state_9.pth new file mode 100644 index 0000000000000000000000000000000000000000..37492cafae6ad398b4ff48de6731cb173ceeb9dc --- /dev/null +++ b/checkpoint-28/rng_state_9.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19e187dd470471d2e0ca582e6801e32ea487689e61fd668841e9c283fe40d0b2 +size 16389 diff --git a/checkpoint-28/trainer_state.json b/checkpoint-28/trainer_state.json index 54f298b29417457381943a331bc7ae1242c31d16..5ffa656555aedc46214a3328f7b650204c2802b0 100644 --- a/checkpoint-28/trainer_state.json +++ b/checkpoint-28/trainer_state.json @@ -17,7 +17,7 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, @@ -33,7 +33,7 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 2, "train_speed(iter/s)": 0.000466 }, @@ -45,7 +45,7 @@ "kl": 9.50181856751442e-07, "learning_rate": 5e-05, "loss": -0.06604708731174469, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.13671875, "reward": 0.0006296975770965219, "reward_std": 0.07172460854053497, @@ -61,7 +61,7 @@ "kl": 1.1101365089416504e-05, "learning_rate": 6.666666666666667e-05, "loss": -0.06727766245603561, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 4, "train_speed(iter/s)": 0.000458 }, @@ -73,7 +73,7 @@ "kl": 0.00017762184143066406, "learning_rate": 8.333333333333334e-05, "loss": -0.09315311908721924, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.119140625, "reward": -0.005135859013535082, "reward_std": 0.07994875870645046, @@ -87,7 +87,7 @@ "grad_norm": 0.18263348937034607, "learning_rate": 0.0001, "loss": -0.1041698157787323, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 6, "train_speed(iter/s)": 0.000458 }, @@ -102,7 +102,7 @@ "eval_reward_std": 0.08769983053207397, "eval_rewards/CosineReward": 0.012996694073081017, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1030.1126, + "eval_runtime": 1030.1223, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 6 @@ -115,7 +115,7 @@ "kl": 0.017406463623046875, "learning_rate": 9.991540791356342e-05, "loss": -0.051375165581703186, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.1484375, "reward": 0.004909618757665157, "reward_std": 0.08167182095348835, @@ -131,7 +131,7 @@ "kl": 0.089599609375, "learning_rate": 9.966191788709716e-05, "loss": -0.05105742812156677, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 8, "train_speed(iter/s)": 0.000433 }, @@ -143,7 +143,7 @@ "kl": 0.0963134765625, "learning_rate": 9.924038765061042e-05, "loss": -0.05842069163918495, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.255859375, "reward": 0.03643610421568155, "reward_std": 0.11898956261575222, @@ -159,7 +159,7 @@ "kl": 0.1185302734375, "learning_rate": 9.865224352899119e-05, "loss": -0.06491819024085999, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 10, "train_speed(iter/s)": 0.000436 }, @@ -171,7 +171,7 @@ "kl": 0.1275634765625, "learning_rate": 9.789947561577445e-05, "loss": -0.04600231721997261, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "response_clip_ratio": 0.361328125, "reward": 0.023204635945148766, "reward_std": 0.10593634657561779, @@ -185,7 +185,7 @@ "grad_norm": 0.05781339108943939, "learning_rate": 9.698463103929542e-05, "loss": -0.05069056898355484, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "step": 12, "train_speed(iter/s)": 0.000439 }, @@ -200,7 +200,7 @@ "eval_reward_std": 0.10685288906097412, "eval_rewards/CosineReward": 0.03234308212995529, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1025.9048, + "eval_runtime": 1025.9045, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 12 @@ -213,7 +213,7 @@ "kl": 0.151123046875, "learning_rate": 9.591080534401371e-05, "loss": -0.02191038429737091, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "response_clip_ratio": 0.419921875, "reward": 0.035983758978545666, "reward_std": 0.11553369648754597, @@ -229,7 +229,7 @@ "kl": 0.169189453125, "learning_rate": 9.468163201617062e-05, "loss": -0.022672578692436218, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "step": 14, "train_speed(iter/s)": 0.000427 }, @@ -241,7 +241,7 @@ "kl": 0.166748046875, "learning_rate": 9.330127018922194e-05, "loss": -0.059799157083034515, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "response_clip_ratio": 0.4765625, "reward": 0.03584331553429365, "reward_std": 0.11829411797225475, @@ -257,7 +257,7 @@ "kl": 0.16748046875, "learning_rate": 9.177439057064683e-05, "loss": -0.06071458384394646, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "step": 16, "train_speed(iter/s)": 0.000431 }, @@ -269,7 +269,7 @@ "kl": 0.1787109375, "learning_rate": 9.01061596377522e-05, "loss": -0.04504441097378731, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "response_clip_ratio": 0.5625, "reward": 0.027318883687257767, "reward_std": 0.10441224090754986, @@ -283,7 +283,7 @@ "grad_norm": 0.005998397711664438, "learning_rate": 8.83022221559489e-05, "loss": -0.045487549155950546, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "step": 18, "train_speed(iter/s)": 0.000432 }, @@ -311,7 +311,7 @@ "kl": 0.1820068359375, "learning_rate": 8.636868207865244e-05, "loss": -0.03466903418302536, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "response_clip_ratio": 0.466796875, "reward": 0.04069916973821819, "reward_std": 0.11991005763411522, @@ -327,7 +327,7 @@ "kl": 0.19287109375, "learning_rate": 8.43120818934367e-05, "loss": -0.03502114117145538, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "step": 20, "train_speed(iter/s)": 0.000424 }, @@ -339,7 +339,7 @@ "kl": 0.17626953125, "learning_rate": 8.213938048432697e-05, "loss": -0.008662773296236992, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "response_clip_ratio": 0.5625, "reward": 0.04996980866417289, "reward_std": 0.13849420100450516, @@ -355,7 +355,7 @@ "kl": 0.178955078125, "learning_rate": 7.985792958513931e-05, "loss": -0.008743642829358578, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "step": 22, "train_speed(iter/s)": 0.000426 }, @@ -367,7 +367,7 @@ "kl": 0.1796875, "learning_rate": 7.74754489035403e-05, "loss": -0.03423420712351799, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "response_clip_ratio": 0.583984375, "reward": 0.034468831261619925, "reward_std": 0.11841745302081108, @@ -381,7 +381,7 @@ "grad_norm": 0.014131724834442139, "learning_rate": 7.500000000000001e-05, "loss": -0.03426633030176163, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "step": 24, "train_speed(iter/s)": 0.000427 }, @@ -396,7 +396,7 @@ "eval_reward_std": 0.10456253588199615, "eval_rewards/CosineReward": 0.04339282959699631, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1045.0642, + "eval_runtime": 1045.0616, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 24 @@ -409,7 +409,7 @@ "kl": 0.1800537109375, "learning_rate": 7.243995901002312e-05, "loss": -0.02097315341234207, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "response_clip_ratio": 0.6171875, "reward": 0.03010205877944827, "reward_std": 0.10742511600255966, @@ -425,7 +425,7 @@ "kl": 0.18408203125, "learning_rate": 6.980398830195785e-05, "loss": -0.02103913575410843, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "step": 26, "train_speed(iter/s)": 0.000421 }, @@ -437,7 +437,7 @@ "kl": 0.174560546875, "learning_rate": 6.710100716628344e-05, "loss": -0.03593946248292923, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "response_clip_ratio": 0.513671875, "reward": 0.04752760287374258, "reward_std": 0.14935147762298584, @@ -453,7 +453,7 @@ "kl": 0.182373046875, "learning_rate": 6.434016163555452e-05, "loss": -0.03595500811934471, - "memory(GiB)": 187.02, + "memory(GiB)": 180.29, "step": 28, "train_speed(iter/s)": 0.000422 } diff --git a/checkpoint-28/training_args.bin b/checkpoint-28/training_args.bin index db6dc02fc187938d09e0a6626a08092ad8f42c54..642b9465d1983574a63f0a9f23bf7996bf8a7eda 100644 --- a/checkpoint-28/training_args.bin +++ b/checkpoint-28/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:09cdf21dfd9faa218b7fd99e3f3dc0ef681c4e3fd3b905e7348f5467b0198044 +oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 size 9809 diff --git a/checkpoint-4/adapter_config.json b/checkpoint-4/adapter_config.json index 9a6753254d3cb15865ffb290b9553d85b57dac9a..5601e58d3aec7aef62bafb11ddb8a04c414c4f52 100644 --- a/checkpoint-4/adapter_config.json +++ b/checkpoint-4/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ + "down_proj", + "o_proj", + "q_proj", "v_proj", "up_proj", - "k_proj", "gate_proj", - "o_proj", - "q_proj", - "down_proj" + "k_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-4/global_step4/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt b/checkpoint-4/global_step4/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..73a4aa748418fe1409a414f5c0523828b7552b7d --- /dev/null +++ b/checkpoint-4/global_step4/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99bc5753ad3b61718e61b22986f8ac9f106805d07878bcd164ae4da7c79b8e7b +size 51616527 diff --git a/checkpoint-4/global_step4/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt b/checkpoint-4/global_step4/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2567f99280fff6d11d4c8096ff9821548c8402ec --- /dev/null +++ b/checkpoint-4/global_step4/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eea890f498af3353eb6112c49c13838f3f619c63b6fda0cff575e9aef45cb915 +size 51616015 diff --git a/checkpoint-4/global_step4/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt b/checkpoint-4/global_step4/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d0824507b0b5fff1022797269cc506d425c550c8 --- /dev/null +++ b/checkpoint-4/global_step4/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1622a1cfce394d527ef12b4bb3b58b38f7a3291839ceffa1e9330c4b3938bda +size 51616527 diff --git a/checkpoint-4/global_step4/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt b/checkpoint-4/global_step4/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4cb7b933ee6d4e5f75c4a7aa4d455a62fd986d92 --- /dev/null +++ b/checkpoint-4/global_step4/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9211f4061c2e77b33902fa56ba7e4005ca607f32d1994ed8b721f69020df35e4 +size 51616015 diff --git a/checkpoint-4/global_step4/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt b/checkpoint-4/global_step4/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c9a1b60cbb7408e2791a2ae7d6a6c38ef43ea0a2 --- /dev/null +++ b/checkpoint-4/global_step4/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c94ca418088e9b9033b300ac6319165bf5bc06437f9e2c2ee12c2564d151a508 +size 51616527 diff --git a/checkpoint-4/global_step4/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt b/checkpoint-4/global_step4/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c5ca3378f70bc1838f421c75d19e8c908771a3f5 --- /dev/null +++ b/checkpoint-4/global_step4/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c2a001860ab26d672ebf8e8ff68c6976fee09dffd0ae1342d927fbd7d2a4c07 +size 51616015 diff --git a/checkpoint-4/global_step4/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt b/checkpoint-4/global_step4/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0394cf60f976d5499d17bd3d88c6de228b532d4c --- /dev/null +++ b/checkpoint-4/global_step4/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e71e00d93be31bf712465057a808b74a815dbb9a289522caaea4a7ba60ae2556 +size 51616517 diff --git a/checkpoint-4/global_step4/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt b/checkpoint-4/global_step4/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..315a253e2270397438a4408944e9806efd4be348 --- /dev/null +++ b/checkpoint-4/global_step4/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71d8a7f1efd29603accdceab7de5e6c38caee988fc810fd60982effa349b9b6d +size 51616005 diff --git a/checkpoint-4/rng_state_10.pth b/checkpoint-4/rng_state_10.pth new file mode 100644 index 0000000000000000000000000000000000000000..b377c85e62796bea9188f016f4c5d61b4ca9124e --- /dev/null +++ b/checkpoint-4/rng_state_10.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a26dbb4f0b59aff633854b1ef7d68bd70f2b27775a364010163150ee6c5aa5e +size 16404 diff --git a/checkpoint-4/rng_state_11.pth b/checkpoint-4/rng_state_11.pth new file mode 100644 index 0000000000000000000000000000000000000000..7a9b99ffb4d29385ffebf594997d9652d8170f45 --- /dev/null +++ b/checkpoint-4/rng_state_11.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05b262d37c9455a1a10bfa326f257abf337e8bb649bcd55650f4e917fc821761 +size 16468 diff --git a/checkpoint-4/rng_state_12.pth b/checkpoint-4/rng_state_12.pth new file mode 100644 index 0000000000000000000000000000000000000000..133be6f0b1856b3296c20223dbe85fcf62183c21 --- /dev/null +++ b/checkpoint-4/rng_state_12.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d55735a14379b8163d8ed4727bef0979dee636e70db51ee4a4de30858278e28 +size 16340 diff --git a/checkpoint-4/rng_state_13.pth b/checkpoint-4/rng_state_13.pth new file mode 100644 index 0000000000000000000000000000000000000000..aae9100e08116372cd19ea3075d2af98705a91d9 --- /dev/null +++ b/checkpoint-4/rng_state_13.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9ac202c33a75089619def13c61ebc42b8c972654f3d5577cf073cbc75fc14ed +size 16468 diff --git a/checkpoint-4/rng_state_14.pth b/checkpoint-4/rng_state_14.pth new file mode 100644 index 0000000000000000000000000000000000000000..9978095dd989a111fea7749273fe354afcd1268c --- /dev/null +++ b/checkpoint-4/rng_state_14.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:746800fdd3b820ff8db3891971ceb3c2f753fe4850e87882883f3ee0746dc1e4 +size 16404 diff --git a/checkpoint-4/rng_state_15.pth b/checkpoint-4/rng_state_15.pth new file mode 100644 index 0000000000000000000000000000000000000000..011bb70d24f4d0b4a81dfdd900c5346f6ffb94d0 --- /dev/null +++ b/checkpoint-4/rng_state_15.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:163a5d59690857db4aeca3f59e8e952ab50bcf74179a8acc7d918c60b4b165f0 +size 16404 diff --git a/checkpoint-4/rng_state_8.pth b/checkpoint-4/rng_state_8.pth new file mode 100644 index 0000000000000000000000000000000000000000..2d3ffc1d919a3478a475fffa0600b558a75aa8fd --- /dev/null +++ b/checkpoint-4/rng_state_8.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4941b18e78fb63e524570d4244387a57c999e81ce473ed8874455de58d65c155 +size 16389 diff --git a/checkpoint-4/rng_state_9.pth b/checkpoint-4/rng_state_9.pth new file mode 100644 index 0000000000000000000000000000000000000000..4db5ad3a73481bbf95e54b59ed64cbd2e1860300 --- /dev/null +++ b/checkpoint-4/rng_state_9.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8693dd7d4fc7fa1acd312bbf38b5f2d3e351cc165de77fae296d1c5627f7893 +size 16389 diff --git a/checkpoint-4/trainer_state.json b/checkpoint-4/trainer_state.json index b95d617400d7530f7b064a18f7bc7cdc945a2aef..dc9c3073d2c9e1b5260f61f624b6fe4dc378bcd3 100644 --- a/checkpoint-4/trainer_state.json +++ b/checkpoint-4/trainer_state.json @@ -17,7 +17,7 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, @@ -33,7 +33,7 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 2, "train_speed(iter/s)": 0.000466 }, @@ -45,7 +45,7 @@ "kl": 9.50181856751442e-07, "learning_rate": 5e-05, "loss": -0.06604708731174469, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.13671875, "reward": 0.0006296975770965219, "reward_std": 0.07172460854053497, @@ -61,7 +61,7 @@ "kl": 1.1101365089416504e-05, "learning_rate": 6.666666666666667e-05, "loss": -0.06727766245603561, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 4, "train_speed(iter/s)": 0.000458 } diff --git a/checkpoint-4/training_args.bin b/checkpoint-4/training_args.bin index db6dc02fc187938d09e0a6626a08092ad8f42c54..642b9465d1983574a63f0a9f23bf7996bf8a7eda 100644 --- a/checkpoint-4/training_args.bin +++ b/checkpoint-4/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:09cdf21dfd9faa218b7fd99e3f3dc0ef681c4e3fd3b905e7348f5467b0198044 +oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 size 9809 diff --git a/checkpoint-6/adapter_config.json b/checkpoint-6/adapter_config.json index 9a6753254d3cb15865ffb290b9553d85b57dac9a..5601e58d3aec7aef62bafb11ddb8a04c414c4f52 100644 --- a/checkpoint-6/adapter_config.json +++ b/checkpoint-6/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ + "down_proj", + "o_proj", + "q_proj", "v_proj", "up_proj", - "k_proj", "gate_proj", - "o_proj", - "q_proj", - "down_proj" + "k_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-6/global_step6/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt b/checkpoint-6/global_step6/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..83f552b60f923fb153981cb7e8efc970f95dcb3c --- /dev/null +++ b/checkpoint-6/global_step6/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38d792dcb50c73e87bd51a12b327fcc3e1f387d0f52f8de9c0aa8c6f4378192a +size 51616527 diff --git a/checkpoint-6/global_step6/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt b/checkpoint-6/global_step6/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..bcbf77504606b9b3934804d0ed186986f05c0180 --- /dev/null +++ b/checkpoint-6/global_step6/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83805da8668dd720578394605b1b8eaeeeec7a617272c8a43b6cb7a9eefe447d +size 51616015 diff --git a/checkpoint-6/global_step6/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt b/checkpoint-6/global_step6/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..baaffb63390a6454ecb733a5e66b0e7903b8e51b --- /dev/null +++ b/checkpoint-6/global_step6/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee91be46678fc39f4fcbb1840a125a2740c947c7a19383e33bb14ff27ea7af32 +size 51616527 diff --git a/checkpoint-6/global_step6/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt b/checkpoint-6/global_step6/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ac3fcd6bb04641096baf8ec8ecf77dbbce1978bb --- /dev/null +++ b/checkpoint-6/global_step6/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b2e9921e07a7e5bef9162e5c9b5a39e634f5a504f116f36bbd380333bd16720 +size 51616015 diff --git a/checkpoint-6/global_step6/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt b/checkpoint-6/global_step6/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ebb7078c8698a6a5fc7c715635e969744a1cc46d --- /dev/null +++ b/checkpoint-6/global_step6/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bd3ffe8508685c6c4e868c738a87f8edd39b3f2b5204aad4cae517fd7a30672 +size 51616527 diff --git a/checkpoint-6/global_step6/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt b/checkpoint-6/global_step6/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ff8f479f85b006363929e5a870d43b0126d8d9c1 --- /dev/null +++ b/checkpoint-6/global_step6/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9794d79ad22b95e4f01e4bf1b0a429dcfd13466dc5fba9ef96bf7d50ed8b188e +size 51616015 diff --git a/checkpoint-6/global_step6/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt b/checkpoint-6/global_step6/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b4170558df6192d1acf4e1011fefdb6e5b73475e --- /dev/null +++ b/checkpoint-6/global_step6/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d07f24064b6b112272a34e37e47c51d5ab8adf6ab6a14fa219d6a5b9ab7c9b9 +size 51616517 diff --git a/checkpoint-6/global_step6/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt b/checkpoint-6/global_step6/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..dac9f2605f30f215444fb69d775523b7c3f5d455 --- /dev/null +++ b/checkpoint-6/global_step6/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5aa8b3f9ad488a64816c97d0511a1c5772b996ded8f9a60d13c72c4e353183a +size 51616005 diff --git a/checkpoint-6/rng_state_10.pth b/checkpoint-6/rng_state_10.pth new file mode 100644 index 0000000000000000000000000000000000000000..a76073426382bde782ecfb00015ff154bc6d9f79 --- /dev/null +++ b/checkpoint-6/rng_state_10.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3acaa4a0c0ad53062a9350eed249f3c983eb5fbf22118def24f6bce8e639697 +size 16404 diff --git a/checkpoint-6/rng_state_11.pth b/checkpoint-6/rng_state_11.pth new file mode 100644 index 0000000000000000000000000000000000000000..9a8fa24cc09f556e7388add3a1567d6fa5e89b28 --- /dev/null +++ b/checkpoint-6/rng_state_11.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3039f1ed791e35487ba4d26a4bc2dfa016f1e96e2dbbec0d102f3c1e6cfa5cf9 +size 16468 diff --git a/checkpoint-6/rng_state_12.pth b/checkpoint-6/rng_state_12.pth new file mode 100644 index 0000000000000000000000000000000000000000..cc572062ae11ea0b65be1aafcb9e80177d2223de --- /dev/null +++ b/checkpoint-6/rng_state_12.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce182012d7aef410d7c5796c3dd8da21f18db9965e70f87b7af15c0d7f8ac9d0 +size 16340 diff --git a/checkpoint-6/rng_state_13.pth b/checkpoint-6/rng_state_13.pth new file mode 100644 index 0000000000000000000000000000000000000000..847084c3b4e894a0ebad8f2be0509ecc65076c34 --- /dev/null +++ b/checkpoint-6/rng_state_13.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9964f11203652745781a95ba3962d2f8f3eb1b616fba7fd3cfb2c03006f09e5f +size 16468 diff --git a/checkpoint-6/rng_state_14.pth b/checkpoint-6/rng_state_14.pth new file mode 100644 index 0000000000000000000000000000000000000000..f0daf35f3e9c71a61bdfcee70f67955e4b417aca --- /dev/null +++ b/checkpoint-6/rng_state_14.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a7945eaf5d51f2512b5d3e3bdd113954581b20bf19ff1626431834517008aa4 +size 16404 diff --git a/checkpoint-6/rng_state_15.pth b/checkpoint-6/rng_state_15.pth new file mode 100644 index 0000000000000000000000000000000000000000..39428c3ceb634fb0acc10a4864e48fddb8b2a123 --- /dev/null +++ b/checkpoint-6/rng_state_15.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a64b3f063e4b4ff68d20f9b2f9fb79cad1850877879b472cffc61f8e2cdf3fba +size 16404 diff --git a/checkpoint-6/rng_state_8.pth b/checkpoint-6/rng_state_8.pth new file mode 100644 index 0000000000000000000000000000000000000000..8e3e3528f57c9406163f11680003698080e1d520 --- /dev/null +++ b/checkpoint-6/rng_state_8.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba3796fbc895aa60db0da8b3138e98472225cdb653460fde634db2585530dcc5 +size 16389 diff --git a/checkpoint-6/rng_state_9.pth b/checkpoint-6/rng_state_9.pth new file mode 100644 index 0000000000000000000000000000000000000000..492ec5932095200ce328b1d4a10936bcc597da61 --- /dev/null +++ b/checkpoint-6/rng_state_9.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cb51989f656ffd748b4fd7640dad95f0a4d38d419a84ae5eaaae0e8d01953a8 +size 16389 diff --git a/checkpoint-6/trainer_state.json b/checkpoint-6/trainer_state.json index d4540222f113714a1953b0e161b891639c52298f..b3b3cc55dc3b19e5b194f4b07b4f5db7d73b0de8 100644 --- a/checkpoint-6/trainer_state.json +++ b/checkpoint-6/trainer_state.json @@ -17,7 +17,7 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, @@ -33,7 +33,7 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 2, "train_speed(iter/s)": 0.000466 }, @@ -45,7 +45,7 @@ "kl": 9.50181856751442e-07, "learning_rate": 5e-05, "loss": -0.06604708731174469, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.13671875, "reward": 0.0006296975770965219, "reward_std": 0.07172460854053497, @@ -61,7 +61,7 @@ "kl": 1.1101365089416504e-05, "learning_rate": 6.666666666666667e-05, "loss": -0.06727766245603561, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 4, "train_speed(iter/s)": 0.000458 }, @@ -73,7 +73,7 @@ "kl": 0.00017762184143066406, "learning_rate": 8.333333333333334e-05, "loss": -0.09315311908721924, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.119140625, "reward": -0.005135859013535082, "reward_std": 0.07994875870645046, @@ -87,7 +87,7 @@ "grad_norm": 0.18263348937034607, "learning_rate": 0.0001, "loss": -0.1041698157787323, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 6, "train_speed(iter/s)": 0.000458 }, @@ -102,7 +102,7 @@ "eval_reward_std": 0.08769983053207397, "eval_rewards/CosineReward": 0.012996694073081017, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1030.1126, + "eval_runtime": 1030.1223, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 6 diff --git a/checkpoint-6/training_args.bin b/checkpoint-6/training_args.bin index db6dc02fc187938d09e0a6626a08092ad8f42c54..642b9465d1983574a63f0a9f23bf7996bf8a7eda 100644 --- a/checkpoint-6/training_args.bin +++ b/checkpoint-6/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:09cdf21dfd9faa218b7fd99e3f3dc0ef681c4e3fd3b905e7348f5467b0198044 +oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 size 9809 diff --git a/checkpoint-8/adapter_config.json b/checkpoint-8/adapter_config.json index 9a6753254d3cb15865ffb290b9553d85b57dac9a..5601e58d3aec7aef62bafb11ddb8a04c414c4f52 100644 --- a/checkpoint-8/adapter_config.json +++ b/checkpoint-8/adapter_config.json @@ -23,13 +23,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ + "down_proj", + "o_proj", + "q_proj", "v_proj", "up_proj", - "k_proj", "gate_proj", - "o_proj", - "q_proj", - "down_proj" + "k_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-8/global_step8/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt b/checkpoint-8/global_step8/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b9cfa5aaef09a399a09932d6950ae5b1905f6eb4 --- /dev/null +++ b/checkpoint-8/global_step8/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af5e682ee534c791d3dca690a444fbe5cc0e7bc17ec630135eba64daa7206045 +size 51616527 diff --git a/checkpoint-8/global_step8/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt b/checkpoint-8/global_step8/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..90480a140ddc8969a4880c817499c2f9a13a97b8 --- /dev/null +++ b/checkpoint-8/global_step8/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e99c30179e42dde9c29655e1a82d8e97d33ced8bc47397ac1d007bfcb5ed04a8 +size 51616015 diff --git a/checkpoint-8/global_step8/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt b/checkpoint-8/global_step8/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e4a58e06d765ffaec0ed3846dfa0cbad54745862 --- /dev/null +++ b/checkpoint-8/global_step8/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bb9c3d7d353de5b55a375b40e97c9ce8571f6cfe0a9534c18698bfce333106c +size 51616527 diff --git a/checkpoint-8/global_step8/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt b/checkpoint-8/global_step8/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9aca9b4319a6b9bdb968d64f81347f3a2bc2265a --- /dev/null +++ b/checkpoint-8/global_step8/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c40d96e34216d7f96b82099e90cfd288253a2b23de2c823fd4b197b0ffb307f4 +size 51616015 diff --git a/checkpoint-8/global_step8/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt b/checkpoint-8/global_step8/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1c1eb90420094480567fe9276b82a59cf679b31e --- /dev/null +++ b/checkpoint-8/global_step8/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa1ae43a522994a1bf3eb33bb2c26e4a5ed178185c3932a82c70607a9a6fa915 +size 51616527 diff --git a/checkpoint-8/global_step8/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt b/checkpoint-8/global_step8/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0355b177893948136a7288a7e43f82a2d054696a --- /dev/null +++ b/checkpoint-8/global_step8/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:699a5f6b502345048af4340759b8c386ab6e4b02e001e13ac95a52ab561eba1b +size 51616015 diff --git a/checkpoint-8/global_step8/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt b/checkpoint-8/global_step8/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..320c1a6ff6507fd9e99923a0707e62d9bba31764 --- /dev/null +++ b/checkpoint-8/global_step8/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d300b0eb13eb8db4497f6f6bf53a8a4236c6795b2ad94c2bdab9ba90e9dce6d8 +size 51616517 diff --git a/checkpoint-8/global_step8/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt b/checkpoint-8/global_step8/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9c01dbfe37882201af332210b37fabdff05191bb --- /dev/null +++ b/checkpoint-8/global_step8/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e74ff0473a7bf94a3b46ffb9451ec7a8e3ddd1b05c9d853b2772e49cf458cd2 +size 51616005 diff --git a/checkpoint-8/rng_state_10.pth b/checkpoint-8/rng_state_10.pth new file mode 100644 index 0000000000000000000000000000000000000000..cb8ae63f0f0742e36861fe8147f7ea5f40d8cde2 --- /dev/null +++ b/checkpoint-8/rng_state_10.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:346734dae927a58fe77914a7901f8e568dafb21eccc4fffd41d74af82f5f0d53 +size 16404 diff --git a/checkpoint-8/rng_state_11.pth b/checkpoint-8/rng_state_11.pth new file mode 100644 index 0000000000000000000000000000000000000000..c33012a9b2569ddb33552e0c45c1a19f18db022a --- /dev/null +++ b/checkpoint-8/rng_state_11.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d0d086ef98082343fe5c2c1628a0e43391a99ec17cf22457a5bf7402453fcec +size 16468 diff --git a/checkpoint-8/rng_state_12.pth b/checkpoint-8/rng_state_12.pth new file mode 100644 index 0000000000000000000000000000000000000000..90f425e2bbd99496420de1968ae91f229707fa7b --- /dev/null +++ b/checkpoint-8/rng_state_12.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd7f880d14b4bcdc9ced59a737f45b717e5d17564d9850c3ae98ec864fc37b61 +size 16340 diff --git a/checkpoint-8/rng_state_13.pth b/checkpoint-8/rng_state_13.pth new file mode 100644 index 0000000000000000000000000000000000000000..355e2bda2dd35c0b409cfa118c38b86c2b9da883 --- /dev/null +++ b/checkpoint-8/rng_state_13.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f01e57bd4e2d6eadf08ba595fcd4efd6a912f6ca31cf02b2a3d95f9cb49759fa +size 16468 diff --git a/checkpoint-8/rng_state_14.pth b/checkpoint-8/rng_state_14.pth new file mode 100644 index 0000000000000000000000000000000000000000..a975a52af111127b964c1d2654fbb5d9110229ff --- /dev/null +++ b/checkpoint-8/rng_state_14.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24fd1d9e98546e364e77bfb37adff2ec4d1e5d2215538d530377821041a560cc +size 16404 diff --git a/checkpoint-8/rng_state_15.pth b/checkpoint-8/rng_state_15.pth new file mode 100644 index 0000000000000000000000000000000000000000..5e209586b383679d52fce9e483a9a61a3f82b285 --- /dev/null +++ b/checkpoint-8/rng_state_15.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce85b05d529954126b6d07796c9357c00ffefaa8873d685f9f9ba08c13c0ff8e +size 16404 diff --git a/checkpoint-8/rng_state_8.pth b/checkpoint-8/rng_state_8.pth new file mode 100644 index 0000000000000000000000000000000000000000..3990e75fa57cb4e05f644c47faba72dd5d1773da --- /dev/null +++ b/checkpoint-8/rng_state_8.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f871d4f2e46fd16f60cdada189387d6072a41bcfb7d986b29e1acac56f2bcca0 +size 16389 diff --git a/checkpoint-8/rng_state_9.pth b/checkpoint-8/rng_state_9.pth new file mode 100644 index 0000000000000000000000000000000000000000..c9116506b51d0013f407affb4aab3b095677a284 --- /dev/null +++ b/checkpoint-8/rng_state_9.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:285a0511e9992f8af322db0b983b709b0246ef3aae819a690f891b00b02ec673 +size 16389 diff --git a/checkpoint-8/trainer_state.json b/checkpoint-8/trainer_state.json index 3a6b398642c37165b24d317105212cefd3a27a22..60713415d250188c737244a7e24c1300414b789f 100644 --- a/checkpoint-8/trainer_state.json +++ b/checkpoint-8/trainer_state.json @@ -17,7 +17,7 @@ "kl": 0.0, "learning_rate": 1.6666666666666667e-05, "loss": -0.11016345024108887, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.11328125, "reward": -0.002658387296833098, "reward_std": 0.06134121119976044, @@ -33,7 +33,7 @@ "kl": 0.0, "learning_rate": 3.3333333333333335e-05, "loss": -0.11016345024108887, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 2, "train_speed(iter/s)": 0.000466 }, @@ -45,7 +45,7 @@ "kl": 9.50181856751442e-07, "learning_rate": 5e-05, "loss": -0.06604708731174469, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.13671875, "reward": 0.0006296975770965219, "reward_std": 0.07172460854053497, @@ -61,7 +61,7 @@ "kl": 1.1101365089416504e-05, "learning_rate": 6.666666666666667e-05, "loss": -0.06727766245603561, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 4, "train_speed(iter/s)": 0.000458 }, @@ -73,7 +73,7 @@ "kl": 0.00017762184143066406, "learning_rate": 8.333333333333334e-05, "loss": -0.09315311908721924, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.119140625, "reward": -0.005135859013535082, "reward_std": 0.07994875870645046, @@ -87,7 +87,7 @@ "grad_norm": 0.18263348937034607, "learning_rate": 0.0001, "loss": -0.1041698157787323, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 6, "train_speed(iter/s)": 0.000458 }, @@ -102,7 +102,7 @@ "eval_reward_std": 0.08769983053207397, "eval_rewards/CosineReward": 0.012996694073081017, "eval_rewards/RepetitionPenalty": 0.0, - "eval_runtime": 1030.1126, + "eval_runtime": 1030.1223, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "step": 6 @@ -115,7 +115,7 @@ "kl": 0.017406463623046875, "learning_rate": 9.991540791356342e-05, "loss": -0.051375165581703186, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "response_clip_ratio": 0.1484375, "reward": 0.004909618757665157, "reward_std": 0.08167182095348835, @@ -131,7 +131,7 @@ "kl": 0.089599609375, "learning_rate": 9.966191788709716e-05, "loss": -0.05105742812156677, - "memory(GiB)": 176.98, + "memory(GiB)": 180.29, "step": 8, "train_speed(iter/s)": 0.000433 } diff --git a/checkpoint-8/training_args.bin b/checkpoint-8/training_args.bin index db6dc02fc187938d09e0a6626a08092ad8f42c54..642b9465d1983574a63f0a9f23bf7996bf8a7eda 100644 --- a/checkpoint-8/training_args.bin +++ b/checkpoint-8/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:09cdf21dfd9faa218b7fd99e3f3dc0ef681c4e3fd3b905e7348f5467b0198044 +oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412 size 9809