diff --git a/README.md b/README.md
index bcf413f06e2b28bb06caaf150d32365c94fb6fa5..17525bd4ad6b6621d58ed0781e26cb08c2d8eb2b 100644
--- a/README.md
+++ b/README.md
@@ -3,8 +3,8 @@ library_name: transformers
model_name: Qwen-sft
tags:
- generated_from_trainer
-- trl
- sft
+- trl
licence: license
---
@@ -26,7 +26,7 @@ print(output["generated_text"])
## Training procedure
-[
](https://wandb.ai/alphatao-alphatao/Gradients-On-Demand/runs/igefnzy4)
+[
](https://wandb.ai/alphatao-alphatao/Gradients-On-Demand/runs/ghwtvbzp)
This model was trained with SFT.
diff --git a/best/global_step200/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/best/global_step200/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3d6d4ec49c7c5ad247da7c12c57ee90724fdfa79
--- /dev/null
+++ b/best/global_step200/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:469e1bb6e9632b1494555b5f32439a44857fe3cd415a7d4a87e02460f5f24589
+size 12286638307
diff --git a/best/global_step200/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/best/global_step200/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8853e347a136856dc992099840c79ca15bae7cbe
--- /dev/null
+++ b/best/global_step200/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e02a28c096e41bc72cd4997909578f24de1c734310d2f4a9b174f8aa4a601ae8
+size 12286638307
diff --git a/best/global_step200/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/best/global_step200/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3553afe29dad4a95b4098d08c26700b138da7cf6
--- /dev/null
+++ b/best/global_step200/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:445eede74d5503a6083df422a8b0da580240dcf027095932a31235c0700aecf5
+size 12286638307
diff --git a/best/global_step200/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/best/global_step200/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ebce19999c6abd63f57a84e8ef6b6f4dfeff123b
--- /dev/null
+++ b/best/global_step200/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b33a346b60bd372f94bdbc17b2d077f521e3f26861ef02ed61c2c4c9e31c2737
+size 12286638307
diff --git a/best/global_step200/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/best/global_step200/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..aaf8c9f0be5258739d804d5a67454de9763dd973
--- /dev/null
+++ b/best/global_step200/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bbb542e3a080adc84c56218b403cbbc8bf5fbb744fc1d862d7b3ef5761d58691
+size 12286638307
diff --git a/best/global_step200/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/best/global_step200/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1e1fcca0eca1e900af79fda4c76c300f4d9ba123
--- /dev/null
+++ b/best/global_step200/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b973ffc311cff9c5ca641ced3dc1773eb210b7c7a62940fc33210c3d66934692
+size 12286638307
diff --git a/best/global_step200/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/best/global_step200/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3374443c3964446db4a3558bad32f9fd19e3f667
--- /dev/null
+++ b/best/global_step200/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fe700d997cb26ad73cd4e0d08ff1fac82f6f1075a10d2d83b7917fa9acc7c030
+size 12286638307
diff --git a/best/global_step200/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/best/global_step200/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fd5809c87c1c739f778e3d51ac6a9cf25c67912a
--- /dev/null
+++ b/best/global_step200/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d5fee71665931f0dafa4f4f6c3c06f38a676830d5b0d27fc87721ce6de6d77f4
+size 12286638307
diff --git a/best/global_step200/zero_pp_rank_0_mp_rank_00_model_states.pt b/best/global_step200/zero_pp_rank_0_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..084c3a0b76c6853cbd598551313b77434641f62b
--- /dev/null
+++ b/best/global_step200/zero_pp_rank_0_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1342d4dc7a4628d877abf125c3f613a182eafb101b5e7f7b86a5a7cef175f0af
+size 206444
diff --git a/best/global_step200/zero_pp_rank_1_mp_rank_00_model_states.pt b/best/global_step200/zero_pp_rank_1_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c77e8900f26b1b934547b68f067129dd88eb985c
--- /dev/null
+++ b/best/global_step200/zero_pp_rank_1_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9dfb1286b375cdebbb10946e259bdb10c194cc4c7739fde1ebe2a5b9c0931101
+size 206444
diff --git a/best/global_step200/zero_pp_rank_2_mp_rank_00_model_states.pt b/best/global_step200/zero_pp_rank_2_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2a06a701ec6d3d714bf266fb7c0a13610f1f778b
--- /dev/null
+++ b/best/global_step200/zero_pp_rank_2_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:26d2b81375bae7ff93776b351edc5a7d3905352cf863170ac67939eb30b019bf
+size 206444
diff --git a/best/global_step200/zero_pp_rank_3_mp_rank_00_model_states.pt b/best/global_step200/zero_pp_rank_3_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4d320c62c7fca5f7469ed0746b5fc552974a027d
--- /dev/null
+++ b/best/global_step200/zero_pp_rank_3_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1775ce4acf9c030a131d2ce2727d243c14094ec098699993786082316eb43c9c
+size 206444
diff --git a/best/global_step200/zero_pp_rank_4_mp_rank_00_model_states.pt b/best/global_step200/zero_pp_rank_4_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6ceb81faf7e65806b65a4ae0cd9c8c7a1df91304
--- /dev/null
+++ b/best/global_step200/zero_pp_rank_4_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:82d4559b5c1bf131c94346a460b0d689044f4998ea94d90b1fc983861365f4b7
+size 206444
diff --git a/best/global_step200/zero_pp_rank_5_mp_rank_00_model_states.pt b/best/global_step200/zero_pp_rank_5_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..447da63f74ae31171ec6b9aaa55302bd90e5e8c2
--- /dev/null
+++ b/best/global_step200/zero_pp_rank_5_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0068af7d59e510134a707bf9e04711c1efbaf1eb8288eaef739836c5e72a7ab3
+size 206444
diff --git a/best/global_step200/zero_pp_rank_6_mp_rank_00_model_states.pt b/best/global_step200/zero_pp_rank_6_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..390bab43dd1d1484dad7a480a28dab6ebabfcc88
--- /dev/null
+++ b/best/global_step200/zero_pp_rank_6_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d245754563e09c58f14a256494bacd640fb22f19872aa29e0c8e0fe690b89ffa
+size 206444
diff --git a/best/global_step200/zero_pp_rank_7_mp_rank_00_model_states.pt b/best/global_step200/zero_pp_rank_7_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4c84b5656a13962aa16df6e5eda0d24aa801377c
--- /dev/null
+++ b/best/global_step200/zero_pp_rank_7_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf6175e0cab44eabd60a29b8eac80ca684dbc24c7e0a21721ddad15c5850a129
+size 206444
diff --git a/best/global_step400/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/best/global_step400/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ea852110202ad094b54023030ff0c20d6c59d556
--- /dev/null
+++ b/best/global_step400/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2bdb4fccfd367cc09ef997ccbc2dc8a3050d8cd68b0a8174b4f9460b2cbdefc1
+size 12286638307
diff --git a/best/global_step400/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/best/global_step400/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8b20ed18ff496ba387e73061a33b0ba9a9f2cefd
--- /dev/null
+++ b/best/global_step400/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8c9b87bf6f188098e523e45cb0d8c03b32953bdf47694ac709993d10ca2f63a6
+size 12286638307
diff --git a/best/global_step400/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/best/global_step400/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ceef1d20d1b6cde858de6ae2a643835da853ff8e
--- /dev/null
+++ b/best/global_step400/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ec3119cb5fbfe33480d777832b57253231005662916c0f22b2a49c55a1ace1e8
+size 12286638307
diff --git a/best/global_step400/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/best/global_step400/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9c78b3a64c9d0e5420a014d49939b0438d32e331
--- /dev/null
+++ b/best/global_step400/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d3cc21c19e09cb2f588a2b804a95ef4395b1b9841b0ee0ab39426f3d94f27a1d
+size 12286638307
diff --git a/best/global_step400/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/best/global_step400/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..dfd593141d3a9e9261383232fdcf86bee50a9ead
--- /dev/null
+++ b/best/global_step400/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:effa5c55f7ea8249c6aea9ddc89ac9c297a155731ae4e5848903367f08e6d91d
+size 12286638307
diff --git a/best/global_step400/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/best/global_step400/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4f53b55f7977b102715fa2a9ade98d940d9ed16a
--- /dev/null
+++ b/best/global_step400/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7354a9ebc2b3023c4e94c426035f9e63e817d64c2943b9722b04bb0fc653be49
+size 12286638307
diff --git a/best/global_step400/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/best/global_step400/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2b8243115ecf0cb1c9d325f8bc19d3d7ea46fb0a
--- /dev/null
+++ b/best/global_step400/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e54ecd6a1a1122fff73dbe1700271827a87043a2533bb94b19902be4dcc6a6af
+size 12286638307
diff --git a/best/global_step400/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/best/global_step400/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8cb319bfea4710cc0537a1e6519456858966f3f1
--- /dev/null
+++ b/best/global_step400/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:15247f8fae2e4cad91c403f0c02f2422fcda69b3e4a416805ec5049b05e4312b
+size 12286638307
diff --git a/best/global_step400/zero_pp_rank_0_mp_rank_00_model_states.pt b/best/global_step400/zero_pp_rank_0_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..502398e7644cb4bc355c9d0d54b341fddb2db7e6
--- /dev/null
+++ b/best/global_step400/zero_pp_rank_0_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6b870f2f4baeb7a2fdfc2a4e93b8db1f9ddc8f8f4c92c0112abcedf99c2cca2a
+size 206444
diff --git a/best/global_step400/zero_pp_rank_1_mp_rank_00_model_states.pt b/best/global_step400/zero_pp_rank_1_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d615e6c82bea4544e3edcf9325dd488f61b78651
--- /dev/null
+++ b/best/global_step400/zero_pp_rank_1_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:35ef2a12c409ee0f640576170a918519c5fe5c9e3e53577762f7949150b1c116
+size 206444
diff --git a/best/global_step400/zero_pp_rank_2_mp_rank_00_model_states.pt b/best/global_step400/zero_pp_rank_2_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..360454c37e1397935903f3b3707aba05e92dc286
--- /dev/null
+++ b/best/global_step400/zero_pp_rank_2_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6ecffb5bc7362f7c6b7080d1a7afd1be2cd193861e0d7e6810f43ee361ff103b
+size 206444
diff --git a/best/global_step400/zero_pp_rank_3_mp_rank_00_model_states.pt b/best/global_step400/zero_pp_rank_3_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5aab223e6abc52f77cee45fbae719fe8bc76972c
--- /dev/null
+++ b/best/global_step400/zero_pp_rank_3_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5570a60df26914e4604c4db34973bbab7ae5596b6475685c13989709aba5cb64
+size 206444
diff --git a/best/global_step400/zero_pp_rank_4_mp_rank_00_model_states.pt b/best/global_step400/zero_pp_rank_4_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..64ed2c7c95e890831ff18a745cc3e0b3d5593148
--- /dev/null
+++ b/best/global_step400/zero_pp_rank_4_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4b22e39e7fde70ff2976af39286f69f42a82e8cdce47796c551557a077abceca
+size 206444
diff --git a/best/global_step400/zero_pp_rank_5_mp_rank_00_model_states.pt b/best/global_step400/zero_pp_rank_5_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d73fec80ad689fec4790c856818264b199838f0b
--- /dev/null
+++ b/best/global_step400/zero_pp_rank_5_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:caab3a7de0a470fd89541b02334ef4583e79d16c5bf1b216a6c8dce447820638
+size 206444
diff --git a/best/global_step400/zero_pp_rank_6_mp_rank_00_model_states.pt b/best/global_step400/zero_pp_rank_6_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7cfac8101090dbf0b7054b1408875575dfff266c
--- /dev/null
+++ b/best/global_step400/zero_pp_rank_6_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c77262ec4d9d2bfaf31980bb6bd7403fd122e3e4d5f8fb5df6bd0033edcd81fa
+size 206444
diff --git a/best/global_step400/zero_pp_rank_7_mp_rank_00_model_states.pt b/best/global_step400/zero_pp_rank_7_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ce35fc0603406b128759a80e30e3fb820d58c203
--- /dev/null
+++ b/best/global_step400/zero_pp_rank_7_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:809045676b4efa3eb66f271d58500fc59a2abd7204a17316185499bcb4589fa0
+size 206444
diff --git a/best/latest b/best/latest
index daf5be2c4861b36c6659b05fae8c31547db7f579..e5bdf58d4f29d34e909da25905fad376f73e7c29 100644
--- a/best/latest
+++ b/best/latest
@@ -1 +1 @@
-global_step150
\ No newline at end of file
+global_step400
\ No newline at end of file
diff --git a/best/monitor.json b/best/monitor.json
index 0e67d288197f8190c43e1d0ce182657309e75264..8c8fb63e612e5c4f35c36e9f4d1a6ee296f76ee6 100644
--- a/best/monitor.json
+++ b/best/monitor.json
@@ -1,7 +1,7 @@
{
- "global_step": 150,
- "test1_loss": 0.5349587609413929,
- "test2_loss": 0.6594466084215839,
- "test3_loss": 0.4015509325333617,
- "combined_test_loss": 0.6594466084215839
+ "global_step": 400,
+ "test1_loss": 0.5453783056361186,
+ "test2_loss": 0.6494426287417849,
+ "test3_loss": 0.3703578654676676,
+ "combined_test_loss": 0.6494426287417849
}
\ No newline at end of file
diff --git a/best/training_args.bin b/best/training_args.bin
index a275320c73f948be3ab0a894560d261ebe08c98a..d385c74d624cfd6211863fea37241aff6a3c3860 100644
--- a/best/training_args.bin
+++ b/best/training_args.bin
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:840c9e203e24a719dec7c5250c5553e38d741952e631bfb2dfe7bddec9d6e991
+oid sha256:358bf0a7ae023dadd4b1324f65438f9ae4887b0c804696199d43876e58313b2d
size 7633
diff --git a/global_step200/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/global_step200/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3d6d4ec49c7c5ad247da7c12c57ee90724fdfa79
--- /dev/null
+++ b/global_step200/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:469e1bb6e9632b1494555b5f32439a44857fe3cd415a7d4a87e02460f5f24589
+size 12286638307
diff --git a/global_step200/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/global_step200/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8853e347a136856dc992099840c79ca15bae7cbe
--- /dev/null
+++ b/global_step200/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e02a28c096e41bc72cd4997909578f24de1c734310d2f4a9b174f8aa4a601ae8
+size 12286638307
diff --git a/global_step200/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/global_step200/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3553afe29dad4a95b4098d08c26700b138da7cf6
--- /dev/null
+++ b/global_step200/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:445eede74d5503a6083df422a8b0da580240dcf027095932a31235c0700aecf5
+size 12286638307
diff --git a/global_step200/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/global_step200/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ebce19999c6abd63f57a84e8ef6b6f4dfeff123b
--- /dev/null
+++ b/global_step200/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b33a346b60bd372f94bdbc17b2d077f521e3f26861ef02ed61c2c4c9e31c2737
+size 12286638307
diff --git a/global_step200/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/global_step200/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..aaf8c9f0be5258739d804d5a67454de9763dd973
--- /dev/null
+++ b/global_step200/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bbb542e3a080adc84c56218b403cbbc8bf5fbb744fc1d862d7b3ef5761d58691
+size 12286638307
diff --git a/global_step200/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/global_step200/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1e1fcca0eca1e900af79fda4c76c300f4d9ba123
--- /dev/null
+++ b/global_step200/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b973ffc311cff9c5ca641ced3dc1773eb210b7c7a62940fc33210c3d66934692
+size 12286638307
diff --git a/global_step200/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/global_step200/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3374443c3964446db4a3558bad32f9fd19e3f667
--- /dev/null
+++ b/global_step200/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fe700d997cb26ad73cd4e0d08ff1fac82f6f1075a10d2d83b7917fa9acc7c030
+size 12286638307
diff --git a/global_step200/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/global_step200/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fd5809c87c1c739f778e3d51ac6a9cf25c67912a
--- /dev/null
+++ b/global_step200/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d5fee71665931f0dafa4f4f6c3c06f38a676830d5b0d27fc87721ce6de6d77f4
+size 12286638307
diff --git a/global_step200/zero_pp_rank_0_mp_rank_00_model_states.pt b/global_step200/zero_pp_rank_0_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..084c3a0b76c6853cbd598551313b77434641f62b
--- /dev/null
+++ b/global_step200/zero_pp_rank_0_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1342d4dc7a4628d877abf125c3f613a182eafb101b5e7f7b86a5a7cef175f0af
+size 206444
diff --git a/global_step200/zero_pp_rank_1_mp_rank_00_model_states.pt b/global_step200/zero_pp_rank_1_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c77e8900f26b1b934547b68f067129dd88eb985c
--- /dev/null
+++ b/global_step200/zero_pp_rank_1_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9dfb1286b375cdebbb10946e259bdb10c194cc4c7739fde1ebe2a5b9c0931101
+size 206444
diff --git a/global_step200/zero_pp_rank_2_mp_rank_00_model_states.pt b/global_step200/zero_pp_rank_2_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2a06a701ec6d3d714bf266fb7c0a13610f1f778b
--- /dev/null
+++ b/global_step200/zero_pp_rank_2_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:26d2b81375bae7ff93776b351edc5a7d3905352cf863170ac67939eb30b019bf
+size 206444
diff --git a/global_step200/zero_pp_rank_3_mp_rank_00_model_states.pt b/global_step200/zero_pp_rank_3_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4d320c62c7fca5f7469ed0746b5fc552974a027d
--- /dev/null
+++ b/global_step200/zero_pp_rank_3_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1775ce4acf9c030a131d2ce2727d243c14094ec098699993786082316eb43c9c
+size 206444
diff --git a/global_step200/zero_pp_rank_4_mp_rank_00_model_states.pt b/global_step200/zero_pp_rank_4_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6ceb81faf7e65806b65a4ae0cd9c8c7a1df91304
--- /dev/null
+++ b/global_step200/zero_pp_rank_4_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:82d4559b5c1bf131c94346a460b0d689044f4998ea94d90b1fc983861365f4b7
+size 206444
diff --git a/global_step200/zero_pp_rank_5_mp_rank_00_model_states.pt b/global_step200/zero_pp_rank_5_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..447da63f74ae31171ec6b9aaa55302bd90e5e8c2
--- /dev/null
+++ b/global_step200/zero_pp_rank_5_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0068af7d59e510134a707bf9e04711c1efbaf1eb8288eaef739836c5e72a7ab3
+size 206444
diff --git a/global_step200/zero_pp_rank_6_mp_rank_00_model_states.pt b/global_step200/zero_pp_rank_6_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..390bab43dd1d1484dad7a480a28dab6ebabfcc88
--- /dev/null
+++ b/global_step200/zero_pp_rank_6_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d245754563e09c58f14a256494bacd640fb22f19872aa29e0c8e0fe690b89ffa
+size 206444
diff --git a/global_step200/zero_pp_rank_7_mp_rank_00_model_states.pt b/global_step200/zero_pp_rank_7_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4c84b5656a13962aa16df6e5eda0d24aa801377c
--- /dev/null
+++ b/global_step200/zero_pp_rank_7_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf6175e0cab44eabd60a29b8eac80ca684dbc24c7e0a21721ddad15c5850a129
+size 206444
diff --git a/global_step400/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/global_step400/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ea852110202ad094b54023030ff0c20d6c59d556
--- /dev/null
+++ b/global_step400/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2bdb4fccfd367cc09ef997ccbc2dc8a3050d8cd68b0a8174b4f9460b2cbdefc1
+size 12286638307
diff --git a/global_step400/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/global_step400/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8b20ed18ff496ba387e73061a33b0ba9a9f2cefd
--- /dev/null
+++ b/global_step400/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8c9b87bf6f188098e523e45cb0d8c03b32953bdf47694ac709993d10ca2f63a6
+size 12286638307
diff --git a/global_step400/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/global_step400/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ceef1d20d1b6cde858de6ae2a643835da853ff8e
--- /dev/null
+++ b/global_step400/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ec3119cb5fbfe33480d777832b57253231005662916c0f22b2a49c55a1ace1e8
+size 12286638307
diff --git a/global_step400/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/global_step400/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9c78b3a64c9d0e5420a014d49939b0438d32e331
--- /dev/null
+++ b/global_step400/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d3cc21c19e09cb2f588a2b804a95ef4395b1b9841b0ee0ab39426f3d94f27a1d
+size 12286638307
diff --git a/global_step400/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/global_step400/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..dfd593141d3a9e9261383232fdcf86bee50a9ead
--- /dev/null
+++ b/global_step400/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:effa5c55f7ea8249c6aea9ddc89ac9c297a155731ae4e5848903367f08e6d91d
+size 12286638307
diff --git a/global_step400/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/global_step400/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4f53b55f7977b102715fa2a9ade98d940d9ed16a
--- /dev/null
+++ b/global_step400/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7354a9ebc2b3023c4e94c426035f9e63e817d64c2943b9722b04bb0fc653be49
+size 12286638307
diff --git a/global_step400/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/global_step400/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2b8243115ecf0cb1c9d325f8bc19d3d7ea46fb0a
--- /dev/null
+++ b/global_step400/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e54ecd6a1a1122fff73dbe1700271827a87043a2533bb94b19902be4dcc6a6af
+size 12286638307
diff --git a/global_step400/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/global_step400/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8cb319bfea4710cc0537a1e6519456858966f3f1
--- /dev/null
+++ b/global_step400/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:15247f8fae2e4cad91c403f0c02f2422fcda69b3e4a416805ec5049b05e4312b
+size 12286638307
diff --git a/global_step400/zero_pp_rank_0_mp_rank_00_model_states.pt b/global_step400/zero_pp_rank_0_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..502398e7644cb4bc355c9d0d54b341fddb2db7e6
--- /dev/null
+++ b/global_step400/zero_pp_rank_0_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6b870f2f4baeb7a2fdfc2a4e93b8db1f9ddc8f8f4c92c0112abcedf99c2cca2a
+size 206444
diff --git a/global_step400/zero_pp_rank_1_mp_rank_00_model_states.pt b/global_step400/zero_pp_rank_1_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d615e6c82bea4544e3edcf9325dd488f61b78651
--- /dev/null
+++ b/global_step400/zero_pp_rank_1_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:35ef2a12c409ee0f640576170a918519c5fe5c9e3e53577762f7949150b1c116
+size 206444
diff --git a/global_step400/zero_pp_rank_2_mp_rank_00_model_states.pt b/global_step400/zero_pp_rank_2_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..360454c37e1397935903f3b3707aba05e92dc286
--- /dev/null
+++ b/global_step400/zero_pp_rank_2_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6ecffb5bc7362f7c6b7080d1a7afd1be2cd193861e0d7e6810f43ee361ff103b
+size 206444
diff --git a/global_step400/zero_pp_rank_3_mp_rank_00_model_states.pt b/global_step400/zero_pp_rank_3_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5aab223e6abc52f77cee45fbae719fe8bc76972c
--- /dev/null
+++ b/global_step400/zero_pp_rank_3_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5570a60df26914e4604c4db34973bbab7ae5596b6475685c13989709aba5cb64
+size 206444
diff --git a/global_step400/zero_pp_rank_4_mp_rank_00_model_states.pt b/global_step400/zero_pp_rank_4_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..64ed2c7c95e890831ff18a745cc3e0b3d5593148
--- /dev/null
+++ b/global_step400/zero_pp_rank_4_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4b22e39e7fde70ff2976af39286f69f42a82e8cdce47796c551557a077abceca
+size 206444
diff --git a/global_step400/zero_pp_rank_5_mp_rank_00_model_states.pt b/global_step400/zero_pp_rank_5_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d73fec80ad689fec4790c856818264b199838f0b
--- /dev/null
+++ b/global_step400/zero_pp_rank_5_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:caab3a7de0a470fd89541b02334ef4583e79d16c5bf1b216a6c8dce447820638
+size 206444
diff --git a/global_step400/zero_pp_rank_6_mp_rank_00_model_states.pt b/global_step400/zero_pp_rank_6_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7cfac8101090dbf0b7054b1408875575dfff266c
--- /dev/null
+++ b/global_step400/zero_pp_rank_6_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c77262ec4d9d2bfaf31980bb6bd7403fd122e3e4d5f8fb5df6bd0033edcd81fa
+size 206444
diff --git a/global_step400/zero_pp_rank_7_mp_rank_00_model_states.pt b/global_step400/zero_pp_rank_7_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ce35fc0603406b128759a80e30e3fb820d58c203
--- /dev/null
+++ b/global_step400/zero_pp_rank_7_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:809045676b4efa3eb66f271d58500fc59a2abd7204a17316185499bcb4589fa0
+size 206444
diff --git a/latest b/latest
index daf5be2c4861b36c6659b05fae8c31547db7f579..e5bdf58d4f29d34e909da25905fad376f73e7c29 100644
--- a/latest
+++ b/latest
@@ -1 +1 @@
-global_step150
\ No newline at end of file
+global_step400
\ No newline at end of file
diff --git a/logs/events.out.tfevents.1754432204.1506d310068f.1511638.0 b/logs/events.out.tfevents.1754432204.1506d310068f.1511638.0
index 4f858106720efbc4a2a3f0c88ae991b0770e90cd..bee0f1258a63ffa2adda37332c4f40fa2feedc3b 100644
--- a/logs/events.out.tfevents.1754432204.1506d310068f.1511638.0
+++ b/logs/events.out.tfevents.1754432204.1506d310068f.1511638.0
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:6ccc6c583aaaced879971fe03cac73290929c32c385bc5bbfcf980865d0b3354
-size 57175
+oid sha256:eab3348694364e4608a69dcb5ece110f5ab946c81ad74af24ae7a8d4a5998a61
+size 74341
diff --git a/logs/events.out.tfevents.1754438723.1506d310068f.1706120.0 b/logs/events.out.tfevents.1754438723.1506d310068f.1706120.0
new file mode 100644
index 0000000000000000000000000000000000000000..8fae1eb5c28797b05f5b0a4ca45fc15e962b4028
--- /dev/null
+++ b/logs/events.out.tfevents.1754438723.1506d310068f.1706120.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1c9821451265e63d184654b45612daf82375f0c54e4b217c29855d3e02768476
+size 138368
diff --git a/training_args.bin b/training_args.bin
index a275320c73f948be3ab0a894560d261ebe08c98a..d385c74d624cfd6211863fea37241aff6a3c3860 100644
--- a/training_args.bin
+++ b/training_args.bin
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:840c9e203e24a719dec7c5250c5553e38d741952e631bfb2dfe7bddec9d6e991
+oid sha256:358bf0a7ae023dadd4b1324f65438f9ae4887b0c804696199d43876e58313b2d
size 7633