diff --git a/config.json b/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..894c25000fca1fcbfdc4633e05386c7c15f1f2da
--- /dev/null
+++ b/config.json
@@ -0,0 +1,63 @@
+{
+ "_name_or_path": "/fs/archive/share/yulan/data/aa_mini/output/miniyulan-2B-final-stage13-from-125.75k/checkpoint-126516-rms_norm",
+ "architectures": [
+ "MiniYuLanModelForCausalLM"
+ ],
+ "attention_bias": true,
+ "attention_dropout": 0.0,
+ "bos_token_id": 1,
+ "dim_model_base": 1920,
+ "dim_model_base_attn": 64,
+ "dim_model_base_init": null,
+ "dim_model_base_lmh": 1,
+ "dim_model_base_logits": 1920.0,
+ "dim_model_base_lr": 256.0,
+ "down_proj_alpha": 0.03450327796711771,
+ "embed_tokens_alpha": 1,
+ "embedding_ln": false,
+ "embedding_rmsln": false,
+ "eos_token_id": 2,
+ "gate_up_proj_alpha": 0.3651483716701107,
+ "gradient_checkpointing_step": 11,
+ "hidden_act": "silu",
+ "hidden_size": 1920,
+ "hidden_states_shrink": 0.18708286933869706,
+ "init_scale_o": 1,
+ "initializer_range": 5e-05,
+ "input_layernorm_alpha": 1.0,
+ "intermediate_size": 4800,
+ "k_proj_alpha": 0.3651483716701107,
+ "layer_norm_eps": 1e-06,
+ "lm_head_alpha": 1.0,
+ "ln_scale": 1,
+ "max_position_embeddings": 4096,
+ "model_reproduce": "transformer",
+ "model_type": "miniyulan",
+ "norm_alpha": 1.0,
+ "num_attention_heads": 30,
+ "num_hidden_layers": 56,
+ "num_key_value_heads": 6,
+ "o_proj_alpha": 0.03450327796711771,
+ "post_attention_layernorm_alpha": 1.0,
+ "q_proj_alpha": 0.3651483716701107,
+ "qk_layernorm": false,
+ "rms_norm_eps": 1e-06,
+ "rms_type": "llama",
+ "rope_scaling": null,
+ "rope_theta": 10000.0,
+ "scale_emb": 10.0,
+ "shrink_alpha": 1,
+ "sliding_window": null,
+ "tie_word_embeddings": true,
+ "torch_dtype": "bfloat16",
+ "transformers_version": "4.44.0",
+ "use_cache": false,
+ "use_emb_alpha": true,
+ "use_liger": true,
+ "use_norm_alpha": true,
+ "use_sliding_window": false,
+ "v_proj_alpha": 0.3651483716701107,
+ "vocab_size": 99000,
+ "wesar_weights": true,
+ "z_loss": 0.0001
+}
diff --git a/global_step136346/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/global_step136346/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2db5cad78117a6290a7a4fcc58de03add3b4066c
--- /dev/null
+++ b/global_step136346/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f34097767d7d6e4c72a0adc0bd8679b507783c5dbf96ff1576b6e689e128ec4c
+size 558554482
diff --git a/global_step136346/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt b/global_step136346/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7655c47474ac9d1bf8962061bec2459f19c84c46
--- /dev/null
+++ b/global_step136346/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c6b0d15069d4f8d83e05739a903ee9eef85705753da8bdc9d5ef26513063f9f6
+size 558554306
diff --git a/global_step136346/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt b/global_step136346/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ab41d1c1dafa9106c7aa09952d6354aa12682f1b
--- /dev/null
+++ b/global_step136346/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2308b5f24cdc1e447d28eaa75c383d2997f343743746dd3a2ee33797f61d2da1
+size 558554306
diff --git a/global_step136346/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt b/global_step136346/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d391548e08f54e62d643358d2eb529a5a74d36ec
--- /dev/null
+++ b/global_step136346/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a83f324b0c27abe3cc94b58d9e4a6ec8207ecc4fff0d2e19087e50ff1c09a717
+size 558554370
diff --git a/global_step136346/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt b/global_step136346/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..776a422a1746511e870de9e8321331164f186cb8
--- /dev/null
+++ b/global_step136346/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:53d2bc4d7983d82e41e5cc1dde8b642a26a197af6267a366d37d29e256f99d70
+size 558554434
diff --git a/global_step136346/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt b/global_step136346/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ea8090b780ddd985d4e9a0fccebdb7678a27d2a2
--- /dev/null
+++ b/global_step136346/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:639c09f2309e583c004044a6586d61a4dac4561106556722d979e4e5bd9c3b52
+size 558554306
diff --git a/global_step136346/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt b/global_step136346/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bb1d039e4cffae2f84d4465be4cfe851ad0ac99c
--- /dev/null
+++ b/global_step136346/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7b2f33fa08471542722411c5f49270015b421414dd63576b7b108ccbeacd0606
+size 558554370
diff --git a/global_step136346/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt b/global_step136346/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cebd0525348a4a371b2448841684400e87020139
--- /dev/null
+++ b/global_step136346/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a05a154f41e993f9b800ca86b00f78fa57f1840aba12895679d9f77f84d0f481
+size 558554306
diff --git a/global_step136346/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt b/global_step136346/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..84c8505bc62b4e1a2d976f4bdbfc171605ecaae8
--- /dev/null
+++ b/global_step136346/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c42e045762d2e5a2f7864c11538a1c6bdefbca07d53a87e1c12ede79bd1c7444
+size 558554306
diff --git a/global_step136346/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt b/global_step136346/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..458984aca29bff37efc67c674b8a04e914d256d9
--- /dev/null
+++ b/global_step136346/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:adc7e18ac518bfa23ab3a18175ea333ed8e14beda20a746987679e7432d77b0a
+size 558554370
diff --git a/global_step136346/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt b/global_step136346/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..52d1792d325ac4a648b3a158bc33324e2db121e6
--- /dev/null
+++ b/global_step136346/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:496e1592455204dbf5aa7eba7d9206807b097cebdfdfc75f343bf9e0e6c62c37
+size 558554434
diff --git a/global_step136346/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/global_step136346/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f083a2ad18955cb13a14165a5c4243de1b69278c
--- /dev/null
+++ b/global_step136346/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:24cf033eb3fff9bbfd40f46386a9452466be42b93184a27ad3825dd492599467
+size 558554418
diff --git a/global_step136346/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt b/global_step136346/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..06e373911d0e17ff354b49466847535116a1f505
--- /dev/null
+++ b/global_step136346/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:43a450871737799f2d698565f32bdeb8c2b7387fd09f7e1e142e45a4bebd6877
+size 558554306
diff --git a/global_step136346/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt b/global_step136346/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1c5a5e0cc9bc7a27c9dc11dea4b9e28d666097fc
--- /dev/null
+++ b/global_step136346/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a3da3d8a6aa0bae7e739c7a0f1b809e0b52b654b9cc4e3a2d7f62db4812d657a
+size 558554370
diff --git a/global_step136346/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt b/global_step136346/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5ba3714971eb911e981612925386d836705e7b50
--- /dev/null
+++ b/global_step136346/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2741e12ebe2b62d5d74f0368871b0c3e467a8fa892b1830bf88caf3ee4bcf87b
+size 558554306
diff --git a/global_step136346/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt b/global_step136346/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..44ef347b2c9ece1dac0d9870de0dfad817dd0dbc
--- /dev/null
+++ b/global_step136346/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a17cb4ff34e93ac4860dfb405413c35c6dfb6055a8d5f22b0b71bde872562aec
+size 558554306
diff --git a/global_step136346/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt b/global_step136346/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..386d6df7aeeafb9f4e91665fb5add49ac07838e9
--- /dev/null
+++ b/global_step136346/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c47b02f0d69dc1676c3260c8f4d26d2d03f8bc8b1ad12dee88aac85b1d93bf14
+size 558554370
diff --git a/global_step136346/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt b/global_step136346/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0bc49b2f1128e2fa7de7ed3b932e7c80c25a9788
--- /dev/null
+++ b/global_step136346/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9ef871538a04ac6df290bdb2ba8b9f4dda2a00af597f22b5838df57c9e633989
+size 558554434
diff --git a/global_step136346/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt b/global_step136346/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e3a8bedcfe76b1c868f5b9fc4e49ae7d2bbdd659
--- /dev/null
+++ b/global_step136346/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d9f9e98e551f84e37121e22d57bbd4465c68d2f935189d47ee71259d23be77b4
+size 558554306
diff --git a/global_step136346/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt b/global_step136346/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..57ee686b7f9a66bc9df11fad00530a5402d492d6
--- /dev/null
+++ b/global_step136346/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d6e7c55218f29e5e55cd358de2c7061d3fca118f77f5887a1b656eb8e8288144
+size 558554370
diff --git a/global_step136346/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt b/global_step136346/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..80858285834b2c7d82fefa2c49e1581203e79785
--- /dev/null
+++ b/global_step136346/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4760ac03174ffe9c108b8d25882aab1eb69678adff70af61d854a40763eb2c2c
+size 558554306
diff --git a/global_step136346/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt b/global_step136346/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c0f5e51e60ab13432fb8d5214415c580691a0ac9
--- /dev/null
+++ b/global_step136346/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:035414df309d2b3b017a1529dc59c47965cb75ef530a889b9bd1514200f98300
+size 558554306
diff --git a/global_step136346/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/global_step136346/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..eb4b6791552b21b5ecb4a359ddf9b5232931e2ac
--- /dev/null
+++ b/global_step136346/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:53796fe793a1d8ff30af8d83f0a730e89ff3edf1dd10ee956c7281ce6fc581dc
+size 558554290
diff --git a/global_step136346/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt b/global_step136346/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..41be3edc260c118d4cbec37caca51df9c53c6d0c
--- /dev/null
+++ b/global_step136346/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f409ff7010900545bba25ae7c757240677afb2a7eaf3eb1e58e405e86b5bf420
+size 558554370
diff --git a/global_step136346/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt b/global_step136346/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..aec2d6ddfe6632a7ab09ae108383650944bed58e
--- /dev/null
+++ b/global_step136346/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2d57238ade3be1b5f0be7764bc7710a0526ef7af7c34463afd67c7fe77a27aa0
+size 558554434
diff --git a/global_step136346/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt b/global_step136346/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..513675113b929171bb682a35b442b4fb55aec720
--- /dev/null
+++ b/global_step136346/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d6461684f09b0243ebbe8c39e582aa3e970023aff9cb469e49209f691fb5f53d
+size 558554306
diff --git a/global_step136346/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt b/global_step136346/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f9823d8da23864f90e19984a40b49788c43e506c
--- /dev/null
+++ b/global_step136346/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bd8a07889268f15934cea78f5db66225260ee00dae12ca643faf3d3f7e3e629d
+size 558554370
diff --git a/global_step136346/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt b/global_step136346/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8d3cff634f25dd804d2cc9bdeaa9faa1cbd9db13
--- /dev/null
+++ b/global_step136346/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:44fbcfd64c622f06c3ad50708be605de66b41401d8e3d87cfad082b12dab07b3
+size 558554306
diff --git a/global_step136346/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt b/global_step136346/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fe7ccc877918fb27557c2ada23326d04d19d4d6b
--- /dev/null
+++ b/global_step136346/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:687ebe86d6a60944fffad668b5e897049e4766ad7848d7a0cbb1b64ca4065907
+size 558554306
diff --git a/global_step136346/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt b/global_step136346/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3011fcaf96407be7f57d65f2b5c307eedac9c3fe
--- /dev/null
+++ b/global_step136346/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:80275eff07c93cb4ac2e3a010d310d427afa29069302695935e459c434ad418b
+size 558554370
diff --git a/global_step136346/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt b/global_step136346/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bc59a3f60d14c63fac7ca7a8a9ec88eb4ccbee29
--- /dev/null
+++ b/global_step136346/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e1d81d202eb5be5c24981095486f9219d25e7661e0d2de3699668c963913c712
+size 558554434
diff --git a/global_step136346/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt b/global_step136346/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..583edf1fbda7080772d6ea9562de98686ee9b260
--- /dev/null
+++ b/global_step136346/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ee9be424f3480983e62e7bca7dcab1ae241dd5cff6b8d941791e3c714f2f401b
+size 558554306
diff --git a/global_step136346/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt b/global_step136346/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..19fa84bb33c26b28a97481ca63a2edfdf7b8e395
--- /dev/null
+++ b/global_step136346/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:077c07b7029e3998c8bc0a0f79c6ad0bf9c54c7348a4a037275061ccd5f77766
+size 558554370
diff --git a/global_step136346/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/global_step136346/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..32b177cd275fdbca91e8e7e7e1de37304fa23dd1
--- /dev/null
+++ b/global_step136346/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fa2fb459f5f956a954c13fd686bab86dcfa8f63818f18a7c43a6aeff7b09b591
+size 558554354
diff --git a/global_step136346/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt b/global_step136346/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cd16b22f5d426690811f3d43cdc97c74de4e571b
--- /dev/null
+++ b/global_step136346/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:32c5c764e9eff34c626e97b06108f905ac1aecd6ac62b6ec7709df151aa226bc
+size 558554306
diff --git a/global_step136346/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt b/global_step136346/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..092d9e0f17614e1785b75620d43f0fa6eeb8e396
--- /dev/null
+++ b/global_step136346/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b26b015ddd74f40a9aa00af42c8b9dcf97b4a0d351424a00b49e5d8c7f430eff
+size 558554306
diff --git a/global_step136346/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt b/global_step136346/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1e17905e0fef0c813495a82e3e30c5231cddcd0c
--- /dev/null
+++ b/global_step136346/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:71fec012bbd30141624763b3e656fe197adbfc1f25cc84a995f6e9a285ca95a0
+size 558554370
diff --git a/global_step136346/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt b/global_step136346/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1c0e78c8e3a4c94f4151378c91a63618f73f06ec
--- /dev/null
+++ b/global_step136346/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a6a5f10030a6aa54d9f7aa2f609f85d9b7d5511059f1416477e3fef40f448d6d
+size 558554434
diff --git a/global_step136346/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt b/global_step136346/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1f77d36704055ff1c332c885ef98bc5866fb0da3
--- /dev/null
+++ b/global_step136346/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:00d55fcfa5caa7f95a9cf70f27dcd7a5b98c02776c4df633d52da7e490dd2296
+size 558554306
diff --git a/global_step136346/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt b/global_step136346/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3aabfc894e3c8af3c852291f9dc5e4d9d6f1f3c0
--- /dev/null
+++ b/global_step136346/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:daae8287146bf55a33457e939b3d6643b688a20f4cf4fa520fcb7bea08a7f79d
+size 558554370
diff --git a/global_step136346/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt b/global_step136346/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8e829ab2a068aff8079151cae85fef6839e9b5cb
--- /dev/null
+++ b/global_step136346/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ea00d8bd81883857849f0e279c57571c9a61453bcce7b13757725d4cdbc443fa
+size 558554306
diff --git a/global_step136346/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt b/global_step136346/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d9f12a0dd8c7ea2bc85def975ca3a9e9ec793233
--- /dev/null
+++ b/global_step136346/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3bdbe75d350746f14ec3bc81f921f0108196fc298dedab9c1969a1aacc13502b
+size 558610626
diff --git a/global_step136346/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/global_step136346/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e89990c528eca387ae9d9b82553ff268551c588c
--- /dev/null
+++ b/global_step136346/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:022ef3a43dd59f0b63806197917d0432fcbf15c5997bdc2978657d134971a33e
+size 558554290
diff --git a/global_step136346/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/global_step136346/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..75cc051366ba642db3058cb86a940dc0c64ac781
--- /dev/null
+++ b/global_step136346/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7845ce21cbdf5c911f35ea6041cc5e21120624a9eee8e75f1d3227f325086edb
+size 558554290
diff --git a/global_step136346/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/global_step136346/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e42a4b2ffbf1205637fee5a66d5bcca346f2c672
--- /dev/null
+++ b/global_step136346/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8fd6b07a6f23b9ccdfcac6f52ace80da810c4f74a7a4878f488cb6f7d6b220dd
+size 558554354
diff --git a/global_step136346/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/global_step136346/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fb68ab681184301ba557cec46173b2c63098f9a0
--- /dev/null
+++ b/global_step136346/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1e300e5cfe2bc4132197449cdc84e6af1af25d39401b9d017454dc05aa9ca15e
+size 558554418
diff --git a/global_step136346/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt b/global_step136346/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..de7bb8bf59d0dfcda891ed17e85fecf1cd65b50c
--- /dev/null
+++ b/global_step136346/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cdc6b140efc2274ee6fc41c0e7098650a8f8addb648d303cda5b40237b9913e3
+size 558554290
diff --git a/global_step136346/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt b/global_step136346/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..42787e1b684ec4659372f50c4d5c4de5b3fe7c53
--- /dev/null
+++ b/global_step136346/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c56d5dc7491422748cb9439e46af2c10fd719de93fbf8cdb79fdcfd43ca40ac1
+size 558554354
diff --git a/global_step136346/mp_rank_00_model_states.pt b/global_step136346/mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..32fe3d02636e50e75d34fad262de562989db6f28
--- /dev/null
+++ b/global_step136346/mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2d6db194266f82b58cdbe4310e08b09779068b7173aac56957574d4cb9c0541e
+size 4468641136
diff --git a/latest b/latest
new file mode 100644
index 0000000000000000000000000000000000000000..21dc937643b5bf07f4d1a2423e60aae12233fbe0
--- /dev/null
+++ b/latest
@@ -0,0 +1 @@
+global_step136346
\ No newline at end of file
diff --git a/model.safetensors b/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0efc084fbeb01b460d06bacaa325501d8b23ab42
--- /dev/null
+++ b/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f8b48e046519cf41172ac814bad216fca331bea034efda161c14467dc9aefb65
+size 4848661852
diff --git a/special_tokens_map.json b/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..a782b2f1cdab4d0bacb2dc0f85d02c4b1e31f0bd
--- /dev/null
+++ b/special_tokens_map.json
@@ -0,0 +1,30 @@
+{
+ "bos_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "eos_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "unk_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ }
+}
diff --git a/tokenizer.json b/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..93efa0d862564bfac68cec7e18d2b451fa56f1aa
--- /dev/null
+++ b/tokenizer.json
@@ -0,0 +1,226270 @@
+{
+ "version": "1.0",
+ "truncation": null,
+ "padding": null,
+ "added_tokens": [
+ {
+ "id": 0,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 1,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 2,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 102,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ }
+ ],
+ "normalizer": {
+ "type": "Sequence",
+ "normalizers": [
+ {
+ "type": "Prepend",
+ "prepend": "▁"
+ },
+ {
+ "type": "Replace",
+ "pattern": {
+ "String": " "
+ },
+ "content": "▁"
+ },
+ {
+ "type": "Replace",
+ "pattern": {
+ "String": " "
+ },
+ "content": "▁"
+ }
+ ]
+ },
+ "pre_tokenizer": null,
+ "post_processor": {
+ "type": "TemplateProcessing",
+ "single": [
+ {
+ "SpecialToken": {
+ "id": "",
+ "type_id": 0
+ }
+ },
+ {
+ "Sequence": {
+ "id": "A",
+ "type_id": 0
+ }
+ }
+ ],
+ "pair": [
+ {
+ "SpecialToken": {
+ "id": "",
+ "type_id": 0
+ }
+ },
+ {
+ "Sequence": {
+ "id": "A",
+ "type_id": 0
+ }
+ },
+ {
+ "SpecialToken": {
+ "id": "",
+ "type_id": 1
+ }
+ },
+ {
+ "Sequence": {
+ "id": "B",
+ "type_id": 1
+ }
+ }
+ ],
+ "special_tokens": {
+ "": {
+ "id": "",
+ "ids": [
+ 1
+ ],
+ "tokens": [
+ ""
+ ]
+ }
+ }
+ },
+ "decoder": {
+ "type": "Sequence",
+ "decoders": [
+ {
+ "type": "Replace",
+ "pattern": {
+ "String": "▁"
+ },
+ "content": " "
+ },
+ {
+ "type": "ByteFallback"
+ },
+ {
+ "type": "Fuse"
+ },
+ {
+ "type": "Strip",
+ "content": " ",
+ "start": 1,
+ "stop": 0
+ }
+ ]
+ },
+ "model": {
+ "type": "BPE",
+ "dropout": null,
+ "unk_token": "",
+ "continuing_subword_prefix": null,
+ "end_of_word_suffix": null,
+ "fuse_unk": true,
+ "byte_fallback": true,
+ "ignore_merges": false,
+ "vocab": {
+ "": 0,
+ "": 1,
+ "": 2,
+ "": 3,
+ "": 4,
+ "\n": 5,
+ "\t": 6,
+ "
": 7,
+ "
": 8,
+ "": 9,
+ "": 10,
+ "": 11,
+ "
": 12,
+ "": 13,
+ " | | ": 14,
+ "": 15,
+ "": 16,
+ "": 17,
+ "": 18,
+ "": 21,
+ "": 22,
+ "
": 23,
+ "": 24,
+ "": 25,
+ "": 26,
+ "": 27,
+ "": 28,
+ "": 29,
+ "": 30,
+ "": 31,
+ "": 32,
+ "
": 33,
+ "
": 34,
+ "
": 35,
+ "": 36,
+ "": 37,
+ "": 38,
+ "
": 39,
+ "": 40,
+ "": 41,
+ "
": 42,
+ "": 43,
+ "
": 44,
+ "
": 45,
+ "": 46,
+ "": 47,
+ "
": 48,
+ "": 49,
+ "": 50,
+ "": 51,
+ "0": 52,
+ "1": 53,
+ "2": 54,
+ "3": 55,
+ "4": 56,
+ "5": 57,
+ "6": 58,
+ "7": 59,
+ "8": 60,
+ "9": 61,
+ "+": 62,
+ "-": 63,
+ "=": 64,
+ ",": 65,
+ "。": 66,
+ "!": 67,
+ "?": 68,
+ "、": 69,
+ ":": 70,
+ "¥": 71,
+ ".": 72,
+ "!": 73,
+ "?": 74,
+ "...": 75,
+ "。。。": 76,
+ "。。。。。。": 77,
+ "《": 78,
+ "》": 79,
+ "【": 80,
+ "】": 81,
+ "『": 82,
+ "』": 83,
+ "```": 84,
+ "": 86,
+ "---": 87,
+ "": 88,
+ ";": 89,
+ ".": 90,
+ "=": 91,
+ "<": 92,
+ ">": 93,
+ "-": 94,
+ "+": 95,
+ "%": 96,
+ "‼": 97,
+ "㊣": 98,
+ "/": 99,
+ "|": 100,
+ "": 101,
+ "": 102,
+ "": 103,
+ "": 104,
+ "": 105,
+ "": 106,
+ "": 107,
+ "": 108,
+ "": 109,
+ "": 110,
+ "": 111,
+ "": 112,
+ "": 113,
+ "": 114,
+ "": 115,
+ "": 116,
+ "": 117,
+ "": 118,
+ "": 119,
+ "": 120,
+ "": 121,
+ "": 122,
+ "": 123,
+ "": 124,
+ "": 125,
+ "": 126,
+ "": 127,
+ "": 128,
+ "": 129,
+ "": 130,
+ "": 131,
+ "": 132,
+ "": 133,
+ "": 134,
+ "": 135,
+ "": 136,
+ "": 137,
+ "": 138,
+ "": 139,
+ "": 140,
+ "": 141,
+ "": 142,
+ "": 143,
+ "": 144,
+ "": 145,
+ "": 146,
+ "": 147,
+ "": 148,
+ "": 149,
+ "": 150,
+ "": 151,
+ "": 152,
+ "": 153,
+ "": 154,
+ "": 155,
+ "": 156,
+ "": 157,
+ "": 158,
+ "": 159,
+ "": 160,
+ "": 161,
+ "": 162,
+ "": 163,
+ "": 164,
+ "": 165,
+ "": 166,
+ "": 167,
+ "": 168,
+ "": 169,
+ "": 170,
+ "": 171,
+ "": 172,
+ "": 173,
+ "": 174,
+ "": 175,
+ "": 176,
+ "": 177,
+ "": 178,
+ "": 179,
+ "": 180,
+ "": 181,
+ "": 182,
+ "": 183,
+ "": 184,
+ "": 185,
+ "": 186,
+ "": 187,
+ "": 188,
+ "": 189,
+ "": 190,
+ "": 191,
+ "": 192,
+ "": 193,
+ "": 194,
+ "": 195,
+ "": 196,
+ "": 197,
+ "": 198,
+ "": 199,
+ "": 200,
+ "": 201,
+ "": 202,
+ "": 203,
+ "": 204,
+ "": 205,
+ "": 206,
+ "": 207,
+ "": 208,
+ "": 209,
+ "": 210,
+ "": 211,
+ "": 212,
+ "": 213,
+ "": 214,
+ "": 215,
+ "": 216,
+ "": 217,
+ "": 218,
+ "": 219,
+ "": 220,
+ "": 221,
+ "": 222,
+ "": 223,
+ "": 224,
+ "": 225,
+ "": 226,
+ "": 227,
+ "": 228,
+ "": 229,
+ "": 230,
+ "": 231,
+ "": 232,
+ "": 233,
+ "": 234,
+ "": 235,
+ "": 236,
+ "": 237,
+ "": 238,
+ "": 239,
+ "": 240,
+ "": 241,
+ "": 242,
+ "": 243,
+ "": 244,
+ "": 245,
+ "": 246,
+ "": 247,
+ "": 248,
+ "": 249,
+ "": 250,
+ "": 251,
+ "": 252,
+ "": 253,
+ "": 254,
+ "": 255,
+ "": 256,
+ "": 257,
+ "": 258,
+ "": 259,
+ "": 260,
+ "": 261,
+ "": 262,
+ "": 263,
+ "": 264,
+ "": 265,
+ "": 266,
+ "": 267,
+ "": 268,
+ "": 269,
+ "": 270,
+ "": 271,
+ "": 272,
+ "": 273,
+ "": 274,
+ "": 275,
+ "": 276,
+ "": 277,
+ "": 278,
+ "": 279,
+ "": 280,
+ "": 281,
+ "": 282,
+ "": 283,
+ "": 284,
+ "": 285,
+ "": 286,
+ "": 287,
+ "": 288,
+ "": 289,
+ "": 290,
+ "": 291,
+ "": 292,
+ "": 293,
+ "": 294,
+ "": 295,
+ "": 296,
+ "": 297,
+ "": 298,
+ "": 299,
+ "": 300,
+ "": 301,
+ "": 302,
+ "": 303,
+ "": 304,
+ "": 305,
+ "": 306,
+ "": 307,
+ "": 308,
+ "": 309,
+ "": 310,
+ "": 311,
+ "": 312,
+ "": 313,
+ "": 314,
+ "": 315,
+ "": 316,
+ "": 317,
+ "": 318,
+ "": 319,
+ "": 320,
+ "": 321,
+ "": 322,
+ "": 323,
+ "": 324,
+ "": 325,
+ "": 326,
+ "": 327,
+ "": 328,
+ "": 329,
+ "": 330,
+ "": 331,
+ "": 332,
+ "": 333,
+ "": 334,
+ "": 335,
+ "": 336,
+ "": 337,
+ "": 338,
+ "": 339,
+ "": 340,
+ "": 341,
+ "": 342,
+ "": 343,
+ "": 344,
+ "": 345,
+ "": 346,
+ "": 347,
+ "": 348,
+ "": 349,
+ "": 350,
+ "": 351,
+ "": 352,
+ "": 353,
+ "