Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- config.json +65 -0
- global_step184795/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- global_step184795/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +3 -0
- global_step184795/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +3 -0
- global_step184795/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +3 -0
- global_step184795/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +3 -0
- global_step184795/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +3 -0
- global_step184795/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +3 -0
- global_step184795/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt +3 -0
- global_step184795/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt +3 -0
- global_step184795/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt +3 -0
- global_step184795/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt +3 -0
- global_step184795/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- global_step184795/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt +3 -0
- global_step184795/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt +3 -0
- global_step184795/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt +3 -0
- global_step184795/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt +3 -0
- global_step184795/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt +3 -0
- global_step184795/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt +3 -0
- global_step184795/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt +3 -0
- global_step184795/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt +3 -0
- global_step184795/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt +3 -0
- global_step184795/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt +3 -0
- global_step184795/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- global_step184795/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt +3 -0
- global_step184795/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt +3 -0
- global_step184795/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt +3 -0
- global_step184795/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt +3 -0
- global_step184795/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt +3 -0
- global_step184795/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt +3 -0
- global_step184795/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt +3 -0
- global_step184795/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt +3 -0
- global_step184795/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt +3 -0
- global_step184795/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt +3 -0
- global_step184795/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- global_step184795/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt +3 -0
- global_step184795/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt +3 -0
- global_step184795/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt +3 -0
- global_step184795/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt +3 -0
- global_step184795/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt +3 -0
- global_step184795/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt +3 -0
- global_step184795/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt +3 -0
- global_step184795/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt +3 -0
- global_step184795/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
- global_step184795/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
- global_step184795/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
- global_step184795/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
- global_step184795/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +3 -0
- global_step184795/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +3 -0
- global_step184795/mp_rank_00_model_states.pt +3 -0
config.json
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "/fs/archive/share/yulan/data/aa_mini/output/miniyulan-2B-final-stage19-hyw/checkpoint-176750",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"MiniYuLanModelForCausalLM"
|
| 5 |
+
],
|
| 6 |
+
"attention_bias": true,
|
| 7 |
+
"attention_dropout": 0.0,
|
| 8 |
+
"bos_token_id": 1,
|
| 9 |
+
"dim_model_base": 1920,
|
| 10 |
+
"dim_model_base_attn": 64,
|
| 11 |
+
"dim_model_base_init": null,
|
| 12 |
+
"dim_model_base_lmh": 1,
|
| 13 |
+
"dim_model_base_logits": 1920.0,
|
| 14 |
+
"dim_model_base_lr": 256.0,
|
| 15 |
+
"down_proj_alpha": 0.03450327796711771,
|
| 16 |
+
"embed_tokens_alpha": 1,
|
| 17 |
+
"embedding_ln": false,
|
| 18 |
+
"embedding_rmsln": false,
|
| 19 |
+
"eos_token_id": 2,
|
| 20 |
+
"gate_up_proj_alpha": 0.3651483716701107,
|
| 21 |
+
"gradient_checkpointing_step": 11,
|
| 22 |
+
"hidden_act": "silu",
|
| 23 |
+
"hidden_size": 1920,
|
| 24 |
+
"hidden_states_shrink": 0.18708286933869706,
|
| 25 |
+
"init_scale_o": 1,
|
| 26 |
+
"initializer_range": 5e-05,
|
| 27 |
+
"input_layernorm_alpha": 1.0,
|
| 28 |
+
"intermediate_size": 4800,
|
| 29 |
+
"k_proj_alpha": 0.3651483716701107,
|
| 30 |
+
"layer_norm_eps": 1e-06,
|
| 31 |
+
"lm_head_alpha": 1.0,
|
| 32 |
+
"ln_scale": 1,
|
| 33 |
+
"max_position_embeddings": 4096,
|
| 34 |
+
"model_reproduce": "transformer",
|
| 35 |
+
"model_type": "miniyulan",
|
| 36 |
+
"norm_alpha": 1.0,
|
| 37 |
+
"num_attention_heads": 30,
|
| 38 |
+
"num_epochs_trained_before_this_epoch": 18,
|
| 39 |
+
"num_hidden_layers": 56,
|
| 40 |
+
"num_key_value_heads": 6,
|
| 41 |
+
"num_steps_trained_before_this_epoch": 175066,
|
| 42 |
+
"o_proj_alpha": 0.03450327796711771,
|
| 43 |
+
"post_attention_layernorm_alpha": 1.0,
|
| 44 |
+
"q_proj_alpha": 0.3651483716701107,
|
| 45 |
+
"qk_layernorm": false,
|
| 46 |
+
"rms_norm_eps": 1e-06,
|
| 47 |
+
"rms_type": "llama",
|
| 48 |
+
"rope_scaling": null,
|
| 49 |
+
"rope_theta": 10000.0,
|
| 50 |
+
"scale_emb": 10.0,
|
| 51 |
+
"shrink_alpha": 1,
|
| 52 |
+
"sliding_window": null,
|
| 53 |
+
"tie_word_embeddings": true,
|
| 54 |
+
"torch_dtype": "bfloat16",
|
| 55 |
+
"transformers_version": "4.44.0",
|
| 56 |
+
"use_cache": false,
|
| 57 |
+
"use_emb_alpha": true,
|
| 58 |
+
"use_liger": true,
|
| 59 |
+
"use_norm_alpha": true,
|
| 60 |
+
"use_sliding_window": false,
|
| 61 |
+
"v_proj_alpha": 0.3651483716701107,
|
| 62 |
+
"vocab_size": 99000,
|
| 63 |
+
"wesar_weights": true,
|
| 64 |
+
"z_loss": 0.0001
|
| 65 |
+
}
|
global_step184795/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:69f26ea3b2fb7cdf1bb1fe31a2002e83a4e95e3ea97c2d6775919cc4c25a1864
|
| 3 |
+
size 558554482
|
global_step184795/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e4a50f737475aabfb5fb821e3afb25753172a9414d4a8e50f9cc7971b6ff4b0e
|
| 3 |
+
size 558554306
|
global_step184795/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e4021d8fa1c837c97d4880f30b7b61ddd75c49f5354705b3edf3b5af6b240b51
|
| 3 |
+
size 558554306
|
global_step184795/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a29a9feffd18003f192662fbb24f2d47b2b7e573d4684d072af816d0172f043d
|
| 3 |
+
size 558554370
|
global_step184795/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9eb466e18261bfc23ed97d4ed4690486c2c9b41049b5251a6c9a3522b6653ce7
|
| 3 |
+
size 558554434
|
global_step184795/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7f716152501042cd5bbbcb84f48ee9a2544f147dbb7f9b5b17afda087db07f4e
|
| 3 |
+
size 558554306
|
global_step184795/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f92abfbe6a99edf69a9154bf3a98ccc14675996df4e822e2045d8a4d750fc7ee
|
| 3 |
+
size 558554370
|
global_step184795/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3cf9861e93f7678b653c540af998eecbb2f61be904ad89f4c426c49d2f32768e
|
| 3 |
+
size 558554306
|
global_step184795/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:68399ee0bb1252ea26ed37634c32d802e26eab273aac156de738ded067691eb8
|
| 3 |
+
size 558554306
|
global_step184795/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8e33f94b77001b81c750fee02a491de689c3820fdfad25acaba89f691a8b3d9d
|
| 3 |
+
size 558554370
|
global_step184795/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6bdb232f7c16ab08879eae1ff605cdd2f79e6ac4242dbe19ab727329430023d5
|
| 3 |
+
size 558554434
|
global_step184795/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fa468a2698f4ce78873506941d6964d84c8e227140259570ccf13dc8f9f1fc74
|
| 3 |
+
size 558554418
|
global_step184795/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1dc838fa3daed73e04be6d31491c8c95d7df4872adf220e26dc55d6c82a10020
|
| 3 |
+
size 558554306
|
global_step184795/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:26393110db32b2e543800685b021ab75af89d8d8bd1f71e81bdc68026c8092fc
|
| 3 |
+
size 558554370
|
global_step184795/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3d4bc778a25ac05a431707ab5fdb93e528453cdfbb902f05c9bd818336940ae9
|
| 3 |
+
size 558554306
|
global_step184795/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8363c5ecd1102a3d1cdb22e0ddfdb84ec6392747bdf69b30798ece2094740e81
|
| 3 |
+
size 558554306
|
global_step184795/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bf9eadf637af4e08d4ad52973012f60a4574ff409c04515110bce5863595dfaa
|
| 3 |
+
size 558554370
|
global_step184795/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3a2a9b5938d0d7eb9c3d54d601a05094517d08e7e41f239553e5146a40c7d2d9
|
| 3 |
+
size 558554434
|
global_step184795/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eeb829252ab6f9b995f5dbd3f18794af22b014f9ea2337312fc7264363c60e2f
|
| 3 |
+
size 558554306
|
global_step184795/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a7b128795cea5de0036b4a25f3abce1eb2238ac7eeebf2b09cf3545fb486b47f
|
| 3 |
+
size 558554370
|
global_step184795/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2b690d217d6ee4a21e0a0c9d31abcc28c5cfdb8422b159dc89a9a1fdd236fd49
|
| 3 |
+
size 558554306
|
global_step184795/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8a7351a5c91a744facf6758a1f5ac309daf365d5d35cf4aa3609fd1fdf6f7bfb
|
| 3 |
+
size 558554306
|
global_step184795/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1c126d4f143e5047c597c474ed52686f292ca722c8a9e688a1d881f05a4b5dc8
|
| 3 |
+
size 558554290
|
global_step184795/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b2cefd87002c6c09a7d9a34f6007cb4cd99e0ff6a92ca26208eb3689fed5b51d
|
| 3 |
+
size 558554370
|
global_step184795/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7b8789459d9fe6c5c71058f05d3843bf08e18e1335d3080cccd34491e157b204
|
| 3 |
+
size 558554434
|
global_step184795/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:304ee8b6cf5521aa49ad93a350967114cd95dd75e45f65ee86d2bb9b8d5539bb
|
| 3 |
+
size 558554306
|
global_step184795/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0d7106c71f9a90d5d35793aa00984f76eb21b3c33793b386a227efbc7239604e
|
| 3 |
+
size 558554370
|
global_step184795/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b0ca26edf65a3a889467b1df31ec469110c352c03de05ec05293c7bd1bead086
|
| 3 |
+
size 558554306
|
global_step184795/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:358a8dbc78ea2d96a04223a85fce17d3bc8eed66b7f2a3fc7942ec08cc404e30
|
| 3 |
+
size 558554306
|
global_step184795/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e84392a4206e25778212980faaa662f77bd524fd4200431467e9a14ee543b544
|
| 3 |
+
size 558554370
|
global_step184795/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:743d409a65b0c10eccb4d259e65b2a79d1c0169234df39fda81e56e0ff77b2e1
|
| 3 |
+
size 558554434
|
global_step184795/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:db0bb7c8178e72f92feafed958a0c15e1a713b2f377f650b4e5bdc21af6f93a1
|
| 3 |
+
size 558554306
|
global_step184795/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:64f5a3701aa444b3f0893fbcfd6183d1861c276b8cd847c9b92ddac67655fec1
|
| 3 |
+
size 558554370
|
global_step184795/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:314f2ec5bcaa66926010a570998acaa6cf152cb876f10c516ba4a52123c045ac
|
| 3 |
+
size 558554354
|
global_step184795/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5e3bc0f2a69db71b6e164d229d74364ad818f06be5adde6a375172398d465ccd
|
| 3 |
+
size 558554306
|
global_step184795/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d037937fc39df9535124657e11135f0c1a0ded5712a3ae4ad0eb6db9c8d617f9
|
| 3 |
+
size 558554306
|
global_step184795/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:91e55c6703f40b4c895d1f208667eb46b4d024e15c13e4e5b6f62b0a0bd9835f
|
| 3 |
+
size 558554370
|
global_step184795/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4d6fc91ed93014ffaefdd6f49f7e9f6cf6fdc10ca54451a672500e539c72b0b1
|
| 3 |
+
size 558554434
|
global_step184795/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5ea8c893a168241245e05e514af13276a56706d40140baaad5902e123f3cf996
|
| 3 |
+
size 558554306
|
global_step184795/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a9e3512188d3d47b69c5b8f495867eb54ed08abb5ac97ade186c90c84065ccfc
|
| 3 |
+
size 558554370
|
global_step184795/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:06a9640fe99cc91ff225bca57b450be9479683262bd4bc0311858fe4941b7e8e
|
| 3 |
+
size 558554306
|
global_step184795/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:61e22fc10f3a025c520cab05a69c3abddb3764650640fc02e2780d06b889902f
|
| 3 |
+
size 558610626
|
global_step184795/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:365b9a40acc4fa7be5ab4b73820886ef9635ce7f7e191992d715f47daf0562d8
|
| 3 |
+
size 558554290
|
global_step184795/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1e46361c28ff6b25e2d9e29b52ae569382b1f71560c72000cbac9ddc5f853436
|
| 3 |
+
size 558554290
|
global_step184795/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:79f207503c69ce125bcd2448e3c45aa4a0066694f4cff93c75dc4b2adedae3e2
|
| 3 |
+
size 558554354
|
global_step184795/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4ee997b08b4f1395435ae87de0da59ca3911c7e1012a0178c9b0a608d5dbc97c
|
| 3 |
+
size 558554418
|
global_step184795/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:805ff5b677a05366342a6f3ebff58fb1553da99a41624a088fb20b603489b715
|
| 3 |
+
size 558554290
|
global_step184795/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6045fbd8f55ee5a81857b828d3dc3cd39a6f09c11146ccadfd5cc9e1c15c19c7
|
| 3 |
+
size 558554354
|
global_step184795/mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b6f089ab82c34b1655da0bef15cc27fcba4b8008380ad31b5a74b1210bc50e11
|
| 3 |
+
size 4468641136
|