Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +1 -0
- config.json +65 -0
- global_step204262/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- global_step204262/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +3 -0
- global_step204262/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +3 -0
- global_step204262/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +3 -0
- global_step204262/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +3 -0
- global_step204262/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +3 -0
- global_step204262/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +3 -0
- global_step204262/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt +3 -0
- global_step204262/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt +3 -0
- global_step204262/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt +3 -0
- global_step204262/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt +3 -0
- global_step204262/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- global_step204262/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt +3 -0
- global_step204262/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt +3 -0
- global_step204262/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt +3 -0
- global_step204262/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt +3 -0
- global_step204262/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt +3 -0
- global_step204262/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt +3 -0
- global_step204262/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt +3 -0
- global_step204262/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt +3 -0
- global_step204262/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt +3 -0
- global_step204262/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt +3 -0
- global_step204262/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- global_step204262/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt +3 -0
- global_step204262/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt +3 -0
- global_step204262/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt +3 -0
- global_step204262/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt +3 -0
- global_step204262/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt +3 -0
- global_step204262/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt +3 -0
- global_step204262/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt +3 -0
- global_step204262/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt +3 -0
- global_step204262/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt +3 -0
- global_step204262/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt +3 -0
- global_step204262/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- global_step204262/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt +3 -0
- global_step204262/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt +3 -0
- global_step204262/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt +3 -0
- global_step204262/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt +3 -0
- global_step204262/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt +3 -0
- global_step204262/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt +3 -0
- global_step204262/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt +3 -0
- global_step204262/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt +3 -0
- global_step204262/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
- global_step204262/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
- global_step204262/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
- global_step204262/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
- global_step204262/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +3 -0
- global_step204262/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
trainer_state.json filter=lfs diff=lfs merge=lfs -text
|
config.json
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "/fs/archive/share/yulan/data/aa_mini/output/miniyulan-2B-final-stage20-remake/checkpoint-194526-rms_norm",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"MiniYuLanModelForCausalLM"
|
| 5 |
+
],
|
| 6 |
+
"attention_bias": true,
|
| 7 |
+
"attention_dropout": 0.0,
|
| 8 |
+
"bos_token_id": 1,
|
| 9 |
+
"dim_model_base": 1920,
|
| 10 |
+
"dim_model_base_attn": 64,
|
| 11 |
+
"dim_model_base_init": null,
|
| 12 |
+
"dim_model_base_lmh": 1,
|
| 13 |
+
"dim_model_base_logits": 1920.0,
|
| 14 |
+
"dim_model_base_lr": 256.0,
|
| 15 |
+
"down_proj_alpha": 0.03450327796711771,
|
| 16 |
+
"embed_tokens_alpha": 1,
|
| 17 |
+
"embedding_ln": false,
|
| 18 |
+
"embedding_rmsln": false,
|
| 19 |
+
"eos_token_id": 2,
|
| 20 |
+
"gate_up_proj_alpha": 0.3651483716701107,
|
| 21 |
+
"gradient_checkpointing_step": 11,
|
| 22 |
+
"hidden_act": "silu",
|
| 23 |
+
"hidden_size": 1920,
|
| 24 |
+
"hidden_states_shrink": 0.18708286933869706,
|
| 25 |
+
"init_scale_o": 1,
|
| 26 |
+
"initializer_range": 5e-05,
|
| 27 |
+
"input_layernorm_alpha": 1.0,
|
| 28 |
+
"intermediate_size": 4800,
|
| 29 |
+
"k_proj_alpha": 0.3651483716701107,
|
| 30 |
+
"layer_norm_eps": 1e-06,
|
| 31 |
+
"lm_head_alpha": 1.0,
|
| 32 |
+
"ln_scale": 1,
|
| 33 |
+
"max_position_embeddings": 4096,
|
| 34 |
+
"model_reproduce": "transformer",
|
| 35 |
+
"model_type": "miniyulan",
|
| 36 |
+
"norm_alpha": 1.0,
|
| 37 |
+
"num_attention_heads": 30,
|
| 38 |
+
"num_epochs_trained_before_this_epoch": 20,
|
| 39 |
+
"num_hidden_layers": 56,
|
| 40 |
+
"num_key_value_heads": 6,
|
| 41 |
+
"num_steps_trained_before_this_epoch": 194526,
|
| 42 |
+
"o_proj_alpha": 0.03450327796711771,
|
| 43 |
+
"post_attention_layernorm_alpha": 1.0,
|
| 44 |
+
"q_proj_alpha": 0.3651483716701107,
|
| 45 |
+
"qk_layernorm": false,
|
| 46 |
+
"rms_norm_eps": 1e-06,
|
| 47 |
+
"rms_type": "llama",
|
| 48 |
+
"rope_scaling": null,
|
| 49 |
+
"rope_theta": 10000.0,
|
| 50 |
+
"scale_emb": 10.0,
|
| 51 |
+
"shrink_alpha": 1,
|
| 52 |
+
"sliding_window": null,
|
| 53 |
+
"tie_word_embeddings": true,
|
| 54 |
+
"torch_dtype": "bfloat16",
|
| 55 |
+
"transformers_version": "4.44.0",
|
| 56 |
+
"use_cache": false,
|
| 57 |
+
"use_emb_alpha": true,
|
| 58 |
+
"use_liger": true,
|
| 59 |
+
"use_norm_alpha": true,
|
| 60 |
+
"use_sliding_window": false,
|
| 61 |
+
"v_proj_alpha": 0.3651483716701107,
|
| 62 |
+
"vocab_size": 99000,
|
| 63 |
+
"wesar_weights": true,
|
| 64 |
+
"z_loss": 0.0001
|
| 65 |
+
}
|
global_step204262/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:09a8f48d541860b1dca434e8806a1f332cdf6b5724ffdbdcf845fabe058d99d8
|
| 3 |
+
size 558554482
|
global_step204262/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:389d97104a3500bdf38a6fc11a7282cd9d4724f5806fdf61ec661c29395a88b2
|
| 3 |
+
size 558554306
|
global_step204262/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8ee512cf0219e0350014ff2bf8e23d3a1fb8c7e80d6706edd2bb4a9bd6d1547d
|
| 3 |
+
size 558554306
|
global_step204262/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1173908e32d0aa58d3c25c1037f39125a33a0863ac0ef365be1e13358c78303c
|
| 3 |
+
size 558554370
|
global_step204262/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5d57fd1fe682c315af1c48fb802d16b71f4f8aab3890bff6beec7c8d857e9083
|
| 3 |
+
size 558554434
|
global_step204262/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a91d9f0c7d167544b61609ae6c8faf4cac4e20d09cbaad479c1e635f471a592e
|
| 3 |
+
size 558554306
|
global_step204262/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6340a1306ccc45801ff9d8a18737a2e749cfcf501d8a7a660d22736597de2a17
|
| 3 |
+
size 558554370
|
global_step204262/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:84da5ed4bf86ec26f616d7d1d001ba32ba5ee23b14ea225c363d1fabe21c8fd7
|
| 3 |
+
size 558554306
|
global_step204262/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:54302e17b523718d0ebd79d60bab2b46f04039cd0d2e4b0ea7cff23203ef66c0
|
| 3 |
+
size 558554306
|
global_step204262/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4eaa15d86759e8f279891e55b99e64cbdfd2216f195d76ebd3f9886945393f2d
|
| 3 |
+
size 558554370
|
global_step204262/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:81dde8e12ecec54aa62fe76776d4205dc8e12b23a179c2cb7aaefa159b5705f8
|
| 3 |
+
size 558554434
|
global_step204262/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ac77d24be425dba2350677a16dcc528ef99cafd0f596f42defd1dea14cbbad74
|
| 3 |
+
size 558554418
|
global_step204262/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:535a78424b3a829fa227cf865bda52fd29157597994cf1f5e26f0cff667eac05
|
| 3 |
+
size 558554306
|
global_step204262/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a6a72d54766917e00e9a16524f23b20d7ca5037d62dbd93a414d198f665f0094
|
| 3 |
+
size 558554370
|
global_step204262/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cb51778cb0e727757ded3376a9dd8e15c28b03add4b1581e8335035b10d5b119
|
| 3 |
+
size 558554306
|
global_step204262/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7905f0d16f952e4dad0ac94b8c89e43a05f5f3c56b6604c730ed5c8de69ab067
|
| 3 |
+
size 558554306
|
global_step204262/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:614db2f1d2cf8ab070fc63314cbeeb617389865fa5eacc11d39df8b78584409e
|
| 3 |
+
size 558554370
|
global_step204262/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:38c0b74e77dd640f94569e4569b2b43ce52f9e25bc11bc12584500787422b18f
|
| 3 |
+
size 558554434
|
global_step204262/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bfca4d12a0fd65dfad9479edbdbaaa3cb22e4f576fe45df9b82d4a113d372eb0
|
| 3 |
+
size 558554306
|
global_step204262/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:90976ef314e712fa58d08acbd6912c8aaac9d05379a24044aff8805fe893e64a
|
| 3 |
+
size 558554370
|
global_step204262/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d1dd7271d5279e9610c6578def2df8ccc972346c20b3969ec8c9e5fc3871ee8b
|
| 3 |
+
size 558554306
|
global_step204262/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7352551707b58cbb0e0a1cce721bd610e909bf12a2898cbdf7209f899840c299
|
| 3 |
+
size 558554306
|
global_step204262/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:efbbf851d84c588d08c2e6898bb6f4b884211cbf4672498e4084f1bf22d0bcb6
|
| 3 |
+
size 558554290
|
global_step204262/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d6552ef72b9b856bebd5a4e64886dfd1315c0725b7f22a72e5c32ae6c2b9a575
|
| 3 |
+
size 558554370
|
global_step204262/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bc8cd5d45fddd808c350247d545b2a01e6900a478b184c4a964c699b68f5b223
|
| 3 |
+
size 558554434
|
global_step204262/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f4521aa7d326d4c708087b9dd0b03fe83991075a964d353cd69a9778ba116fbd
|
| 3 |
+
size 558554306
|
global_step204262/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b39098c71a8a180cece643cb1a8bb47ea9f704cc0a0537083d8e0d27a16cbc60
|
| 3 |
+
size 558554370
|
global_step204262/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f0833bbdc73a62c5bda6ffb0a117d3b0cfebf37125d3a10ae5f6f325b28aa3a6
|
| 3 |
+
size 558554306
|
global_step204262/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:66b4996b8bc3b65c08cc589f2ae8dcd634ed5dc88da5d9a81845eb9ece2270ef
|
| 3 |
+
size 558554306
|
global_step204262/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:439bec3ec6373d89aeae00e75fa2719035fb99582f21300844943c160d0b6ad3
|
| 3 |
+
size 558554370
|
global_step204262/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:96f48f163a4f960ba4bb39193c60a8f5ab70d17d5baa04f83e256aacf202653a
|
| 3 |
+
size 558554434
|
global_step204262/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3fcafa7f85eb5a131fbf9a4c1e92fcaca42a3b21002b6fe2e7d6f5c00732b41b
|
| 3 |
+
size 558554306
|
global_step204262/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3a848a1bbe149eae1382122133807404425ec2bc210f79461fa82ba587e3064a
|
| 3 |
+
size 558554370
|
global_step204262/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:01c69f57a6ba5dcedb3d1df70115d26baaa415677e8befcf2f71b81c6e3166ea
|
| 3 |
+
size 558554354
|
global_step204262/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:344edd93cbf1eb32a745d6050c5550c9d77e92242ea3349633faed17638287e1
|
| 3 |
+
size 558554306
|
global_step204262/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:94d1b8c63e4c13963b59c5d3aa0527878a1306e9886d2a17bb6f1a3337c869cb
|
| 3 |
+
size 558554306
|
global_step204262/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1a26e7a1383ca1bc4e4fa18ba12b74f9714c228c8704e5fef1397d5ed735f718
|
| 3 |
+
size 558554370
|
global_step204262/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:db07665ca3d1c25aa6e3951af947003730d0fb494e5aebf7d1bb15045b4bd600
|
| 3 |
+
size 558554434
|
global_step204262/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e2c8560491189986b851c394d42a36ca46c1e3a9293724ea77cc48de2c606f2c
|
| 3 |
+
size 558554306
|
global_step204262/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:31d43d545a3838fe05d19598a251606a4ef5ecc5162c34dd75101268cff374f5
|
| 3 |
+
size 558554370
|
global_step204262/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:db1c0f4b52ca5bf81a832118853231b3143f27f9c96ce8c036d13a220ed74155
|
| 3 |
+
size 558554306
|
global_step204262/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e0867fddff4059facff575a7756da7e3a40f9a98e9d456ec47308a385d99151e
|
| 3 |
+
size 558610626
|
global_step204262/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cfe94d34de1aff447512d88c9d5ea7b065a33cd758ac348cc0ee113b521712b1
|
| 3 |
+
size 558554290
|
global_step204262/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3d0e1953fed26e391b46b11297652da9361653b1845370120e3736f8dbaf7c42
|
| 3 |
+
size 558554290
|
global_step204262/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:19905a66ffd58ab87ec638319b43e7ef115ba99de571a9298c8c21d49ed994f7
|
| 3 |
+
size 558554354
|
global_step204262/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1b65162cfb3eab2ea3cd75490d8bc9914e20c10f8d0a158090e7397f47f4a2d9
|
| 3 |
+
size 558554418
|
global_step204262/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5c563fcab941f5e07e128c34fe2fded15b2165e1c87c67187159e55d19450ec9
|
| 3 |
+
size 558554290
|
global_step204262/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6af6b0922fa72f8739fd2b5781f00cf3d9d0583052745a1a8f8c0a6229599583
|
| 3 |
+
size 558554354
|