Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- config.json +63 -0
- global_step136346/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- global_step136346/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +3 -0
- global_step136346/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +3 -0
- global_step136346/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +3 -0
- global_step136346/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +3 -0
- global_step136346/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +3 -0
- global_step136346/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +3 -0
- global_step136346/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt +3 -0
- global_step136346/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt +3 -0
- global_step136346/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt +3 -0
- global_step136346/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt +3 -0
- global_step136346/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- global_step136346/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt +3 -0
- global_step136346/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt +3 -0
- global_step136346/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt +3 -0
- global_step136346/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt +3 -0
- global_step136346/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt +3 -0
- global_step136346/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt +3 -0
- global_step136346/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt +3 -0
- global_step136346/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt +3 -0
- global_step136346/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt +3 -0
- global_step136346/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt +3 -0
- global_step136346/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- global_step136346/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt +3 -0
- global_step136346/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt +3 -0
- global_step136346/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt +3 -0
- global_step136346/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt +3 -0
- global_step136346/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt +3 -0
- global_step136346/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt +3 -0
- global_step136346/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt +3 -0
- global_step136346/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt +3 -0
- global_step136346/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt +3 -0
- global_step136346/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt +3 -0
- global_step136346/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- global_step136346/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt +3 -0
- global_step136346/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt +3 -0
- global_step136346/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt +3 -0
- global_step136346/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt +3 -0
- global_step136346/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt +3 -0
- global_step136346/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt +3 -0
- global_step136346/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt +3 -0
- global_step136346/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt +3 -0
- global_step136346/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
- global_step136346/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
- global_step136346/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
- global_step136346/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
- global_step136346/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +3 -0
- global_step136346/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +3 -0
- global_step136346/mp_rank_00_model_states.pt +3 -0
config.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "/fs/archive/share/yulan/data/aa_mini/output/miniyulan-2B-final-stage13-from-125.75k/checkpoint-126516-rms_norm",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"MiniYuLanModelForCausalLM"
|
| 5 |
+
],
|
| 6 |
+
"attention_bias": true,
|
| 7 |
+
"attention_dropout": 0.0,
|
| 8 |
+
"bos_token_id": 1,
|
| 9 |
+
"dim_model_base": 1920,
|
| 10 |
+
"dim_model_base_attn": 64,
|
| 11 |
+
"dim_model_base_init": null,
|
| 12 |
+
"dim_model_base_lmh": 1,
|
| 13 |
+
"dim_model_base_logits": 1920.0,
|
| 14 |
+
"dim_model_base_lr": 256.0,
|
| 15 |
+
"down_proj_alpha": 0.03450327796711771,
|
| 16 |
+
"embed_tokens_alpha": 1,
|
| 17 |
+
"embedding_ln": false,
|
| 18 |
+
"embedding_rmsln": false,
|
| 19 |
+
"eos_token_id": 2,
|
| 20 |
+
"gate_up_proj_alpha": 0.3651483716701107,
|
| 21 |
+
"gradient_checkpointing_step": 11,
|
| 22 |
+
"hidden_act": "silu",
|
| 23 |
+
"hidden_size": 1920,
|
| 24 |
+
"hidden_states_shrink": 0.18708286933869706,
|
| 25 |
+
"init_scale_o": 1,
|
| 26 |
+
"initializer_range": 5e-05,
|
| 27 |
+
"input_layernorm_alpha": 1.0,
|
| 28 |
+
"intermediate_size": 4800,
|
| 29 |
+
"k_proj_alpha": 0.3651483716701107,
|
| 30 |
+
"layer_norm_eps": 1e-06,
|
| 31 |
+
"lm_head_alpha": 1.0,
|
| 32 |
+
"ln_scale": 1,
|
| 33 |
+
"max_position_embeddings": 4096,
|
| 34 |
+
"model_reproduce": "transformer",
|
| 35 |
+
"model_type": "miniyulan",
|
| 36 |
+
"norm_alpha": 1.0,
|
| 37 |
+
"num_attention_heads": 30,
|
| 38 |
+
"num_hidden_layers": 56,
|
| 39 |
+
"num_key_value_heads": 6,
|
| 40 |
+
"o_proj_alpha": 0.03450327796711771,
|
| 41 |
+
"post_attention_layernorm_alpha": 1.0,
|
| 42 |
+
"q_proj_alpha": 0.3651483716701107,
|
| 43 |
+
"qk_layernorm": false,
|
| 44 |
+
"rms_norm_eps": 1e-06,
|
| 45 |
+
"rms_type": "llama",
|
| 46 |
+
"rope_scaling": null,
|
| 47 |
+
"rope_theta": 10000.0,
|
| 48 |
+
"scale_emb": 10.0,
|
| 49 |
+
"shrink_alpha": 1,
|
| 50 |
+
"sliding_window": null,
|
| 51 |
+
"tie_word_embeddings": true,
|
| 52 |
+
"torch_dtype": "bfloat16",
|
| 53 |
+
"transformers_version": "4.44.0",
|
| 54 |
+
"use_cache": false,
|
| 55 |
+
"use_emb_alpha": true,
|
| 56 |
+
"use_liger": true,
|
| 57 |
+
"use_norm_alpha": true,
|
| 58 |
+
"use_sliding_window": false,
|
| 59 |
+
"v_proj_alpha": 0.3651483716701107,
|
| 60 |
+
"vocab_size": 99000,
|
| 61 |
+
"wesar_weights": true,
|
| 62 |
+
"z_loss": 0.0001
|
| 63 |
+
}
|
global_step136346/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f34097767d7d6e4c72a0adc0bd8679b507783c5dbf96ff1576b6e689e128ec4c
|
| 3 |
+
size 558554482
|
global_step136346/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c6b0d15069d4f8d83e05739a903ee9eef85705753da8bdc9d5ef26513063f9f6
|
| 3 |
+
size 558554306
|
global_step136346/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2308b5f24cdc1e447d28eaa75c383d2997f343743746dd3a2ee33797f61d2da1
|
| 3 |
+
size 558554306
|
global_step136346/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a83f324b0c27abe3cc94b58d9e4a6ec8207ecc4fff0d2e19087e50ff1c09a717
|
| 3 |
+
size 558554370
|
global_step136346/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:53d2bc4d7983d82e41e5cc1dde8b642a26a197af6267a366d37d29e256f99d70
|
| 3 |
+
size 558554434
|
global_step136346/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:639c09f2309e583c004044a6586d61a4dac4561106556722d979e4e5bd9c3b52
|
| 3 |
+
size 558554306
|
global_step136346/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7b2f33fa08471542722411c5f49270015b421414dd63576b7b108ccbeacd0606
|
| 3 |
+
size 558554370
|
global_step136346/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a05a154f41e993f9b800ca86b00f78fa57f1840aba12895679d9f77f84d0f481
|
| 3 |
+
size 558554306
|
global_step136346/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c42e045762d2e5a2f7864c11538a1c6bdefbca07d53a87e1c12ede79bd1c7444
|
| 3 |
+
size 558554306
|
global_step136346/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:adc7e18ac518bfa23ab3a18175ea333ed8e14beda20a746987679e7432d77b0a
|
| 3 |
+
size 558554370
|
global_step136346/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:496e1592455204dbf5aa7eba7d9206807b097cebdfdfc75f343bf9e0e6c62c37
|
| 3 |
+
size 558554434
|
global_step136346/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:24cf033eb3fff9bbfd40f46386a9452466be42b93184a27ad3825dd492599467
|
| 3 |
+
size 558554418
|
global_step136346/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:43a450871737799f2d698565f32bdeb8c2b7387fd09f7e1e142e45a4bebd6877
|
| 3 |
+
size 558554306
|
global_step136346/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a3da3d8a6aa0bae7e739c7a0f1b809e0b52b654b9cc4e3a2d7f62db4812d657a
|
| 3 |
+
size 558554370
|
global_step136346/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2741e12ebe2b62d5d74f0368871b0c3e467a8fa892b1830bf88caf3ee4bcf87b
|
| 3 |
+
size 558554306
|
global_step136346/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a17cb4ff34e93ac4860dfb405413c35c6dfb6055a8d5f22b0b71bde872562aec
|
| 3 |
+
size 558554306
|
global_step136346/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c47b02f0d69dc1676c3260c8f4d26d2d03f8bc8b1ad12dee88aac85b1d93bf14
|
| 3 |
+
size 558554370
|
global_step136346/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9ef871538a04ac6df290bdb2ba8b9f4dda2a00af597f22b5838df57c9e633989
|
| 3 |
+
size 558554434
|
global_step136346/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d9f9e98e551f84e37121e22d57bbd4465c68d2f935189d47ee71259d23be77b4
|
| 3 |
+
size 558554306
|
global_step136346/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d6e7c55218f29e5e55cd358de2c7061d3fca118f77f5887a1b656eb8e8288144
|
| 3 |
+
size 558554370
|
global_step136346/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4760ac03174ffe9c108b8d25882aab1eb69678adff70af61d854a40763eb2c2c
|
| 3 |
+
size 558554306
|
global_step136346/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:035414df309d2b3b017a1529dc59c47965cb75ef530a889b9bd1514200f98300
|
| 3 |
+
size 558554306
|
global_step136346/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:53796fe793a1d8ff30af8d83f0a730e89ff3edf1dd10ee956c7281ce6fc581dc
|
| 3 |
+
size 558554290
|
global_step136346/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f409ff7010900545bba25ae7c757240677afb2a7eaf3eb1e58e405e86b5bf420
|
| 3 |
+
size 558554370
|
global_step136346/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2d57238ade3be1b5f0be7764bc7710a0526ef7af7c34463afd67c7fe77a27aa0
|
| 3 |
+
size 558554434
|
global_step136346/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d6461684f09b0243ebbe8c39e582aa3e970023aff9cb469e49209f691fb5f53d
|
| 3 |
+
size 558554306
|
global_step136346/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bd8a07889268f15934cea78f5db66225260ee00dae12ca643faf3d3f7e3e629d
|
| 3 |
+
size 558554370
|
global_step136346/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:44fbcfd64c622f06c3ad50708be605de66b41401d8e3d87cfad082b12dab07b3
|
| 3 |
+
size 558554306
|
global_step136346/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:687ebe86d6a60944fffad668b5e897049e4766ad7848d7a0cbb1b64ca4065907
|
| 3 |
+
size 558554306
|
global_step136346/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:80275eff07c93cb4ac2e3a010d310d427afa29069302695935e459c434ad418b
|
| 3 |
+
size 558554370
|
global_step136346/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e1d81d202eb5be5c24981095486f9219d25e7661e0d2de3699668c963913c712
|
| 3 |
+
size 558554434
|
global_step136346/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ee9be424f3480983e62e7bca7dcab1ae241dd5cff6b8d941791e3c714f2f401b
|
| 3 |
+
size 558554306
|
global_step136346/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:077c07b7029e3998c8bc0a0f79c6ad0bf9c54c7348a4a037275061ccd5f77766
|
| 3 |
+
size 558554370
|
global_step136346/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fa2fb459f5f956a954c13fd686bab86dcfa8f63818f18a7c43a6aeff7b09b591
|
| 3 |
+
size 558554354
|
global_step136346/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:32c5c764e9eff34c626e97b06108f905ac1aecd6ac62b6ec7709df151aa226bc
|
| 3 |
+
size 558554306
|
global_step136346/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b26b015ddd74f40a9aa00af42c8b9dcf97b4a0d351424a00b49e5d8c7f430eff
|
| 3 |
+
size 558554306
|
global_step136346/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:71fec012bbd30141624763b3e656fe197adbfc1f25cc84a995f6e9a285ca95a0
|
| 3 |
+
size 558554370
|
global_step136346/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a6a5f10030a6aa54d9f7aa2f609f85d9b7d5511059f1416477e3fef40f448d6d
|
| 3 |
+
size 558554434
|
global_step136346/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:00d55fcfa5caa7f95a9cf70f27dcd7a5b98c02776c4df633d52da7e490dd2296
|
| 3 |
+
size 558554306
|
global_step136346/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:daae8287146bf55a33457e939b3d6643b688a20f4cf4fa520fcb7bea08a7f79d
|
| 3 |
+
size 558554370
|
global_step136346/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ea00d8bd81883857849f0e279c57571c9a61453bcce7b13757725d4cdbc443fa
|
| 3 |
+
size 558554306
|
global_step136346/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3bdbe75d350746f14ec3bc81f921f0108196fc298dedab9c1969a1aacc13502b
|
| 3 |
+
size 558610626
|
global_step136346/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:022ef3a43dd59f0b63806197917d0432fcbf15c5997bdc2978657d134971a33e
|
| 3 |
+
size 558554290
|
global_step136346/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7845ce21cbdf5c911f35ea6041cc5e21120624a9eee8e75f1d3227f325086edb
|
| 3 |
+
size 558554290
|
global_step136346/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8fd6b07a6f23b9ccdfcac6f52ace80da810c4f74a7a4878f488cb6f7d6b220dd
|
| 3 |
+
size 558554354
|
global_step136346/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1e300e5cfe2bc4132197449cdc84e6af1af25d39401b9d017454dc05aa9ca15e
|
| 3 |
+
size 558554418
|
global_step136346/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cdc6b140efc2274ee6fc41c0e7098650a8f8addb648d303cda5b40237b9913e3
|
| 3 |
+
size 558554290
|
global_step136346/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c56d5dc7491422748cb9439e46af2c10fd719de93fbf8cdb79fdcfd43ca40ac1
|
| 3 |
+
size 558554354
|
global_step136346/mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2d6db194266f82b58cdbe4310e08b09779068b7173aac56957574d4cb9c0541e
|
| 3 |
+
size 4468641136
|