Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .gitattributes +1 -0
- config.json +65 -0
- global_step223732/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- global_step223732/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +3 -0
- global_step223732/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +3 -0
- global_step223732/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +3 -0
- global_step223732/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +3 -0
- global_step223732/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +3 -0
- global_step223732/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +3 -0
- global_step223732/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt +3 -0
- global_step223732/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt +3 -0
- global_step223732/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt +3 -0
- global_step223732/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt +3 -0
- global_step223732/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- global_step223732/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt +3 -0
- global_step223732/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt +3 -0
- global_step223732/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt +3 -0
- global_step223732/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt +3 -0
- global_step223732/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt +3 -0
- global_step223732/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt +3 -0
- global_step223732/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt +3 -0
- global_step223732/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt +3 -0
- global_step223732/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt +3 -0
- global_step223732/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt +3 -0
- global_step223732/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- global_step223732/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt +3 -0
- global_step223732/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt +3 -0
- global_step223732/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt +3 -0
- global_step223732/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt +3 -0
- global_step223732/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt +3 -0
- global_step223732/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt +3 -0
- global_step223732/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt +3 -0
- global_step223732/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt +3 -0
- global_step223732/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt +3 -0
- global_step223732/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt +3 -0
- global_step223732/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- global_step223732/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt +3 -0
- global_step223732/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt +3 -0
- global_step223732/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt +3 -0
- global_step223732/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt +3 -0
- global_step223732/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt +3 -0
- global_step223732/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt +3 -0
- global_step223732/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt +3 -0
- global_step223732/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt +3 -0
- global_step223732/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
- global_step223732/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
- global_step223732/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
- global_step223732/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
- global_step223732/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +3 -0
- global_step223732/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
trainer_state.json filter=lfs diff=lfs merge=lfs -text
|
config.json
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "/fs/archive/share/yulan/data/aa_mini/output/miniyulan-2B-final-stage22-2/checkpoint-213995-rms_norm",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"MiniYuLanModelForCausalLM"
|
| 5 |
+
],
|
| 6 |
+
"attention_bias": true,
|
| 7 |
+
"attention_dropout": 0.0,
|
| 8 |
+
"bos_token_id": 1,
|
| 9 |
+
"dim_model_base": 1920,
|
| 10 |
+
"dim_model_base_attn": 64,
|
| 11 |
+
"dim_model_base_init": null,
|
| 12 |
+
"dim_model_base_lmh": 1,
|
| 13 |
+
"dim_model_base_logits": 1920.0,
|
| 14 |
+
"dim_model_base_lr": 256.0,
|
| 15 |
+
"down_proj_alpha": 0.03450327796711771,
|
| 16 |
+
"embed_tokens_alpha": 1,
|
| 17 |
+
"embedding_ln": false,
|
| 18 |
+
"embedding_rmsln": false,
|
| 19 |
+
"eos_token_id": 2,
|
| 20 |
+
"gate_up_proj_alpha": 0.3651483716701107,
|
| 21 |
+
"gradient_checkpointing_step": 11,
|
| 22 |
+
"hidden_act": "silu",
|
| 23 |
+
"hidden_size": 1920,
|
| 24 |
+
"hidden_states_shrink": 0.18708286933869706,
|
| 25 |
+
"init_scale_o": 1,
|
| 26 |
+
"initializer_range": 5e-05,
|
| 27 |
+
"input_layernorm_alpha": 1.0,
|
| 28 |
+
"intermediate_size": 4800,
|
| 29 |
+
"k_proj_alpha": 0.3651483716701107,
|
| 30 |
+
"layer_norm_eps": 1e-06,
|
| 31 |
+
"lm_head_alpha": 1.0,
|
| 32 |
+
"ln_scale": 1,
|
| 33 |
+
"max_position_embeddings": 4096,
|
| 34 |
+
"model_reproduce": "transformer",
|
| 35 |
+
"model_type": "miniyulan",
|
| 36 |
+
"norm_alpha": 1.0,
|
| 37 |
+
"num_attention_heads": 30,
|
| 38 |
+
"num_epochs_trained_before_this_epoch": 22,
|
| 39 |
+
"num_hidden_layers": 56,
|
| 40 |
+
"num_key_value_heads": 6,
|
| 41 |
+
"num_steps_trained_before_this_epoch": 213995,
|
| 42 |
+
"o_proj_alpha": 0.03450327796711771,
|
| 43 |
+
"post_attention_layernorm_alpha": 1.0,
|
| 44 |
+
"q_proj_alpha": 0.3651483716701107,
|
| 45 |
+
"qk_layernorm": false,
|
| 46 |
+
"rms_norm_eps": 1e-06,
|
| 47 |
+
"rms_type": "llama",
|
| 48 |
+
"rope_scaling": null,
|
| 49 |
+
"rope_theta": 10000.0,
|
| 50 |
+
"scale_emb": 10.0,
|
| 51 |
+
"shrink_alpha": 1,
|
| 52 |
+
"sliding_window": null,
|
| 53 |
+
"tie_word_embeddings": true,
|
| 54 |
+
"torch_dtype": "bfloat16",
|
| 55 |
+
"transformers_version": "4.44.0",
|
| 56 |
+
"use_cache": false,
|
| 57 |
+
"use_emb_alpha": true,
|
| 58 |
+
"use_liger": true,
|
| 59 |
+
"use_norm_alpha": true,
|
| 60 |
+
"use_sliding_window": false,
|
| 61 |
+
"v_proj_alpha": 0.3651483716701107,
|
| 62 |
+
"vocab_size": 99000,
|
| 63 |
+
"wesar_weights": true,
|
| 64 |
+
"z_loss": 0.0001
|
| 65 |
+
}
|
global_step223732/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:03348721213561b0b333e6f3ddd79ddb6b25803dddb9393a7b62b90c1ec7e402
|
| 3 |
+
size 558554482
|
global_step223732/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:03b7bc77ce6289743b60b38e5ae367ff4b27da8595e39c22e04ecfc7d38e7d69
|
| 3 |
+
size 558554306
|
global_step223732/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b00531d44f288201f7db70d2bb6b8fcb6de155f5980c5ea32ef2ceffefb8cb17
|
| 3 |
+
size 558554306
|
global_step223732/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ea53e10498b6d0a34acc33bab4b03a24b7876c043b88b119498612a453491dc6
|
| 3 |
+
size 558554370
|
global_step223732/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eb38b220177c265257b038900edadc0b555f8546e0516997abd59f54ee1b154f
|
| 3 |
+
size 558554434
|
global_step223732/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4cdeaf173a3e4cd525da4f9b885936419ed2cc77753ef8ac3be9c4b5d05ad3b8
|
| 3 |
+
size 558554306
|
global_step223732/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:18ba2912fe2dae7bb1705e013d17c7fa93494f05bb5fe41d5ba5f528d08c9153
|
| 3 |
+
size 558554370
|
global_step223732/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5118fd9039c2b4ccc76f9fbc4bf9f77f4e398cdad1dc4cd3623d2aa411f1230b
|
| 3 |
+
size 558554306
|
global_step223732/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b1569283b5827dcc4460599e374eba89006e352e9fa3eb914b1010e59655917c
|
| 3 |
+
size 558554306
|
global_step223732/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b3fb594a07cd4e376bd0c8d4a08ef0409ce59bec26b5a9fccbcef96ebe547129
|
| 3 |
+
size 558554370
|
global_step223732/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d98aded1647a793f3c6f60fc72743caf9549ea9407f2e3003ca5efd8cd945fe8
|
| 3 |
+
size 558554434
|
global_step223732/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1caebe95ec268e4176b787e30f8c3c78f2575a1376fb6b63bdb7d994dc3efec9
|
| 3 |
+
size 558554418
|
global_step223732/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:69786102b5dd6d9aee88d72483935ea95c0f3873ef7a6f0636f1df7fea400e2d
|
| 3 |
+
size 558554306
|
global_step223732/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ea4e43b51eb99bd9c2d501e874f9a03a3152d81ce3824fddad75ef210537aaa5
|
| 3 |
+
size 558554370
|
global_step223732/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f9c624048ce42c78c67f18c93a01044a3c4e52a0bc8f5c9b15490e7f23a01fb1
|
| 3 |
+
size 558554306
|
global_step223732/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cfc90e3c05024cc0ca56385ace056335c84ec11fd44a332a94833176e8c8699b
|
| 3 |
+
size 558554306
|
global_step223732/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aa260d45df99c70d040e51b3718e9f15bb04913e5c9cbc8e59acbe21f3315b47
|
| 3 |
+
size 558554370
|
global_step223732/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:577efe16c62bdd002d3fbf363996d60febd5f1d803efee57a7961cc71ec2902b
|
| 3 |
+
size 558554434
|
global_step223732/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1fc108a5d788e5e47892b26b4104d07d0b180373b8559734a6acebba15e9cfd5
|
| 3 |
+
size 558554306
|
global_step223732/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:546503eaba78e85554f9346ef061b9decf176907f0ee7e60a6fd9b5db6f09495
|
| 3 |
+
size 558554370
|
global_step223732/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4a891fe3ab427f694a36398aa6c2fde0822c7ef1e1a029a54db55d50cac36c1b
|
| 3 |
+
size 558554306
|
global_step223732/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2c58c0b15d00cacd2ef4fb25669bae477a7b96b9161ad069cb71a526cd1a3362
|
| 3 |
+
size 558554306
|
global_step223732/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f56b0e9c6ad505679cb104946c0018b8b9d04e55117d5c572cdf02d4bdd9a22a
|
| 3 |
+
size 558554290
|
global_step223732/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:db9f9482cb875ff47c827b397944565214d91836bea284e673af49f8d8c87948
|
| 3 |
+
size 558554370
|
global_step223732/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9c08bfff7246fc969ce63b444653e80d292db93961f051f70a052eee1c511101
|
| 3 |
+
size 558554434
|
global_step223732/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6a2a79c4483e0e4ec44132bc6518957f22677ac25170e2499176686dc7e94c50
|
| 3 |
+
size 558554306
|
global_step223732/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1f373b34e426242f684140325c94cf76115d05fc59b4a5aebd5e1f1c5d99d728
|
| 3 |
+
size 558554370
|
global_step223732/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:844b4680adc4630fe7d2d16921b240f8dd91e4544fc4aa728afe4abdb269902b
|
| 3 |
+
size 558554306
|
global_step223732/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7be564c9830110da3716d5b42564c41b1a69ad6ce3e48760832e6aab47a65153
|
| 3 |
+
size 558554306
|
global_step223732/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:97f5e446df9bb74a2cc7aee1b70b78d56089c7992b4e13f21ae7b53f9cbecb6c
|
| 3 |
+
size 558554370
|
global_step223732/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:38c6e17403ba3412814c59342d2ab117789660ac8c1680cadc5d88560ea0d82c
|
| 3 |
+
size 558554434
|
global_step223732/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:852162242e316fe3abd1abbcd71de2574714b04414f91edef3946eebc1c97fd3
|
| 3 |
+
size 558554306
|
global_step223732/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:52774c7a48cab0119668c8aa3f5c86a09585f35acc68634d005fcee62583723f
|
| 3 |
+
size 558554370
|
global_step223732/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d1ce7a0b25f3f8875f73a6a8afa8ba58d892483364287c7e1a9c3d510be53d7f
|
| 3 |
+
size 558554354
|
global_step223732/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3e75690f8ee4473b8452400f568ef8b2ed3917b7e9d13bec35197f0efe8bf55e
|
| 3 |
+
size 558554306
|
global_step223732/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:966efd8dc214f18e9d0fc17f0d32b3e106398ce0789b57c4d4508d6ba927f447
|
| 3 |
+
size 558554306
|
global_step223732/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:abca3fdfc4171c65de4339c4aceb8d921b24cc7bcbb484dc50a546860e4f5f9c
|
| 3 |
+
size 558554370
|
global_step223732/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:75d3cbcf2742453f03aa8b92575cab0e89b71d5c065bdba4032a823210e3ed1a
|
| 3 |
+
size 558554434
|
global_step223732/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:55e83d414124e1cbf8e691843763ca24411395867234ac3cf8b21ed34e497155
|
| 3 |
+
size 558554306
|
global_step223732/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2cf56b3e815fb82e0ae943c4291269811beedcb08c6525a1dc40249a9814446b
|
| 3 |
+
size 558554370
|
global_step223732/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2c4a12f735649ecc1f2048442ab76d2cf8e425a3954b9e4e2a00105e9f353680
|
| 3 |
+
size 558554306
|
global_step223732/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8b75d91c527d23c1f74911e60b00bf4cda00674c867bf5eda0017125bff3cea3
|
| 3 |
+
size 558610626
|
global_step223732/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:75c0bd11ff2e2813e4ad2c296b428f757addbc236dbca61db978065d546dd32e
|
| 3 |
+
size 558554290
|
global_step223732/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:77bd05dd544ad915b2eef289228d061c17af23e1e74ccf589bf88098c0017230
|
| 3 |
+
size 558554290
|
global_step223732/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8dc57001648514f990f066c23f6add9035e3232228aae0ad8a6ca5f63a1b8f1d
|
| 3 |
+
size 558554354
|
global_step223732/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9abc6e9a03b88eaf3ab3ddb3de4d42e9e2568f20e646826a62493b3c642428ef
|
| 3 |
+
size 558554418
|
global_step223732/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6f200c90bd2f2b42e659b1a82942200ca7c6ff134a29070033528ce40e68e8ec
|
| 3 |
+
size 558554290
|
global_step223732/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f6a765dc3cb45d4c9ee80e27667172f1a3daba5826e174a2b3318d9854282d00
|
| 3 |
+
size 558554354
|