IvanHU commited on
Commit
910ed95
·
verified ·
1 Parent(s): 6513e2d

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. config.json +65 -0
  2. global_step184795/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
  3. global_step184795/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +3 -0
  4. global_step184795/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +3 -0
  5. global_step184795/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +3 -0
  6. global_step184795/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +3 -0
  7. global_step184795/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +3 -0
  8. global_step184795/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +3 -0
  9. global_step184795/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt +3 -0
  10. global_step184795/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt +3 -0
  11. global_step184795/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt +3 -0
  12. global_step184795/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt +3 -0
  13. global_step184795/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
  14. global_step184795/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt +3 -0
  15. global_step184795/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt +3 -0
  16. global_step184795/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt +3 -0
  17. global_step184795/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt +3 -0
  18. global_step184795/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt +3 -0
  19. global_step184795/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt +3 -0
  20. global_step184795/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt +3 -0
  21. global_step184795/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt +3 -0
  22. global_step184795/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt +3 -0
  23. global_step184795/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt +3 -0
  24. global_step184795/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
  25. global_step184795/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt +3 -0
  26. global_step184795/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt +3 -0
  27. global_step184795/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt +3 -0
  28. global_step184795/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt +3 -0
  29. global_step184795/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt +3 -0
  30. global_step184795/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt +3 -0
  31. global_step184795/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt +3 -0
  32. global_step184795/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt +3 -0
  33. global_step184795/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt +3 -0
  34. global_step184795/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt +3 -0
  35. global_step184795/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
  36. global_step184795/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt +3 -0
  37. global_step184795/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt +3 -0
  38. global_step184795/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt +3 -0
  39. global_step184795/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt +3 -0
  40. global_step184795/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt +3 -0
  41. global_step184795/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt +3 -0
  42. global_step184795/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt +3 -0
  43. global_step184795/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt +3 -0
  44. global_step184795/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
  45. global_step184795/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
  46. global_step184795/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
  47. global_step184795/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
  48. global_step184795/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +3 -0
  49. global_step184795/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +3 -0
  50. global_step184795/mp_rank_00_model_states.pt +3 -0
config.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/fs/archive/share/yulan/data/aa_mini/output/miniyulan-2B-final-stage19-hyw/checkpoint-176750",
3
+ "architectures": [
4
+ "MiniYuLanModelForCausalLM"
5
+ ],
6
+ "attention_bias": true,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 1,
9
+ "dim_model_base": 1920,
10
+ "dim_model_base_attn": 64,
11
+ "dim_model_base_init": null,
12
+ "dim_model_base_lmh": 1,
13
+ "dim_model_base_logits": 1920.0,
14
+ "dim_model_base_lr": 256.0,
15
+ "down_proj_alpha": 0.03450327796711771,
16
+ "embed_tokens_alpha": 1,
17
+ "embedding_ln": false,
18
+ "embedding_rmsln": false,
19
+ "eos_token_id": 2,
20
+ "gate_up_proj_alpha": 0.3651483716701107,
21
+ "gradient_checkpointing_step": 11,
22
+ "hidden_act": "silu",
23
+ "hidden_size": 1920,
24
+ "hidden_states_shrink": 0.18708286933869706,
25
+ "init_scale_o": 1,
26
+ "initializer_range": 5e-05,
27
+ "input_layernorm_alpha": 1.0,
28
+ "intermediate_size": 4800,
29
+ "k_proj_alpha": 0.3651483716701107,
30
+ "layer_norm_eps": 1e-06,
31
+ "lm_head_alpha": 1.0,
32
+ "ln_scale": 1,
33
+ "max_position_embeddings": 4096,
34
+ "model_reproduce": "transformer",
35
+ "model_type": "miniyulan",
36
+ "norm_alpha": 1.0,
37
+ "num_attention_heads": 30,
38
+ "num_epochs_trained_before_this_epoch": 18,
39
+ "num_hidden_layers": 56,
40
+ "num_key_value_heads": 6,
41
+ "num_steps_trained_before_this_epoch": 175066,
42
+ "o_proj_alpha": 0.03450327796711771,
43
+ "post_attention_layernorm_alpha": 1.0,
44
+ "q_proj_alpha": 0.3651483716701107,
45
+ "qk_layernorm": false,
46
+ "rms_norm_eps": 1e-06,
47
+ "rms_type": "llama",
48
+ "rope_scaling": null,
49
+ "rope_theta": 10000.0,
50
+ "scale_emb": 10.0,
51
+ "shrink_alpha": 1,
52
+ "sliding_window": null,
53
+ "tie_word_embeddings": true,
54
+ "torch_dtype": "bfloat16",
55
+ "transformers_version": "4.44.0",
56
+ "use_cache": false,
57
+ "use_emb_alpha": true,
58
+ "use_liger": true,
59
+ "use_norm_alpha": true,
60
+ "use_sliding_window": false,
61
+ "v_proj_alpha": 0.3651483716701107,
62
+ "vocab_size": 99000,
63
+ "wesar_weights": true,
64
+ "z_loss": 0.0001
65
+ }
global_step184795/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69f26ea3b2fb7cdf1bb1fe31a2002e83a4e95e3ea97c2d6775919cc4c25a1864
3
+ size 558554482
global_step184795/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4a50f737475aabfb5fb821e3afb25753172a9414d4a8e50f9cc7971b6ff4b0e
3
+ size 558554306
global_step184795/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4021d8fa1c837c97d4880f30b7b61ddd75c49f5354705b3edf3b5af6b240b51
3
+ size 558554306
global_step184795/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a29a9feffd18003f192662fbb24f2d47b2b7e573d4684d072af816d0172f043d
3
+ size 558554370
global_step184795/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9eb466e18261bfc23ed97d4ed4690486c2c9b41049b5251a6c9a3522b6653ce7
3
+ size 558554434
global_step184795/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f716152501042cd5bbbcb84f48ee9a2544f147dbb7f9b5b17afda087db07f4e
3
+ size 558554306
global_step184795/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f92abfbe6a99edf69a9154bf3a98ccc14675996df4e822e2045d8a4d750fc7ee
3
+ size 558554370
global_step184795/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cf9861e93f7678b653c540af998eecbb2f61be904ad89f4c426c49d2f32768e
3
+ size 558554306
global_step184795/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68399ee0bb1252ea26ed37634c32d802e26eab273aac156de738ded067691eb8
3
+ size 558554306
global_step184795/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e33f94b77001b81c750fee02a491de689c3820fdfad25acaba89f691a8b3d9d
3
+ size 558554370
global_step184795/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6bdb232f7c16ab08879eae1ff605cdd2f79e6ac4242dbe19ab727329430023d5
3
+ size 558554434
global_step184795/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa468a2698f4ce78873506941d6964d84c8e227140259570ccf13dc8f9f1fc74
3
+ size 558554418
global_step184795/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1dc838fa3daed73e04be6d31491c8c95d7df4872adf220e26dc55d6c82a10020
3
+ size 558554306
global_step184795/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26393110db32b2e543800685b021ab75af89d8d8bd1f71e81bdc68026c8092fc
3
+ size 558554370
global_step184795/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d4bc778a25ac05a431707ab5fdb93e528453cdfbb902f05c9bd818336940ae9
3
+ size 558554306
global_step184795/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8363c5ecd1102a3d1cdb22e0ddfdb84ec6392747bdf69b30798ece2094740e81
3
+ size 558554306
global_step184795/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf9eadf637af4e08d4ad52973012f60a4574ff409c04515110bce5863595dfaa
3
+ size 558554370
global_step184795/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a2a9b5938d0d7eb9c3d54d601a05094517d08e7e41f239553e5146a40c7d2d9
3
+ size 558554434
global_step184795/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eeb829252ab6f9b995f5dbd3f18794af22b014f9ea2337312fc7264363c60e2f
3
+ size 558554306
global_step184795/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7b128795cea5de0036b4a25f3abce1eb2238ac7eeebf2b09cf3545fb486b47f
3
+ size 558554370
global_step184795/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b690d217d6ee4a21e0a0c9d31abcc28c5cfdb8422b159dc89a9a1fdd236fd49
3
+ size 558554306
global_step184795/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a7351a5c91a744facf6758a1f5ac309daf365d5d35cf4aa3609fd1fdf6f7bfb
3
+ size 558554306
global_step184795/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c126d4f143e5047c597c474ed52686f292ca722c8a9e688a1d881f05a4b5dc8
3
+ size 558554290
global_step184795/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2cefd87002c6c09a7d9a34f6007cb4cd99e0ff6a92ca26208eb3689fed5b51d
3
+ size 558554370
global_step184795/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b8789459d9fe6c5c71058f05d3843bf08e18e1335d3080cccd34491e157b204
3
+ size 558554434
global_step184795/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:304ee8b6cf5521aa49ad93a350967114cd95dd75e45f65ee86d2bb9b8d5539bb
3
+ size 558554306
global_step184795/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d7106c71f9a90d5d35793aa00984f76eb21b3c33793b386a227efbc7239604e
3
+ size 558554370
global_step184795/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0ca26edf65a3a889467b1df31ec469110c352c03de05ec05293c7bd1bead086
3
+ size 558554306
global_step184795/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:358a8dbc78ea2d96a04223a85fce17d3bc8eed66b7f2a3fc7942ec08cc404e30
3
+ size 558554306
global_step184795/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e84392a4206e25778212980faaa662f77bd524fd4200431467e9a14ee543b544
3
+ size 558554370
global_step184795/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:743d409a65b0c10eccb4d259e65b2a79d1c0169234df39fda81e56e0ff77b2e1
3
+ size 558554434
global_step184795/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db0bb7c8178e72f92feafed958a0c15e1a713b2f377f650b4e5bdc21af6f93a1
3
+ size 558554306
global_step184795/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64f5a3701aa444b3f0893fbcfd6183d1861c276b8cd847c9b92ddac67655fec1
3
+ size 558554370
global_step184795/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:314f2ec5bcaa66926010a570998acaa6cf152cb876f10c516ba4a52123c045ac
3
+ size 558554354
global_step184795/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e3bc0f2a69db71b6e164d229d74364ad818f06be5adde6a375172398d465ccd
3
+ size 558554306
global_step184795/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d037937fc39df9535124657e11135f0c1a0ded5712a3ae4ad0eb6db9c8d617f9
3
+ size 558554306
global_step184795/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91e55c6703f40b4c895d1f208667eb46b4d024e15c13e4e5b6f62b0a0bd9835f
3
+ size 558554370
global_step184795/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d6fc91ed93014ffaefdd6f49f7e9f6cf6fdc10ca54451a672500e539c72b0b1
3
+ size 558554434
global_step184795/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ea8c893a168241245e05e514af13276a56706d40140baaad5902e123f3cf996
3
+ size 558554306
global_step184795/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9e3512188d3d47b69c5b8f495867eb54ed08abb5ac97ade186c90c84065ccfc
3
+ size 558554370
global_step184795/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06a9640fe99cc91ff225bca57b450be9479683262bd4bc0311858fe4941b7e8e
3
+ size 558554306
global_step184795/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61e22fc10f3a025c520cab05a69c3abddb3764650640fc02e2780d06b889902f
3
+ size 558610626
global_step184795/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:365b9a40acc4fa7be5ab4b73820886ef9635ce7f7e191992d715f47daf0562d8
3
+ size 558554290
global_step184795/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e46361c28ff6b25e2d9e29b52ae569382b1f71560c72000cbac9ddc5f853436
3
+ size 558554290
global_step184795/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79f207503c69ce125bcd2448e3c45aa4a0066694f4cff93c75dc4b2adedae3e2
3
+ size 558554354
global_step184795/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ee997b08b4f1395435ae87de0da59ca3911c7e1012a0178c9b0a608d5dbc97c
3
+ size 558554418
global_step184795/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:805ff5b677a05366342a6f3ebff58fb1553da99a41624a088fb20b603489b715
3
+ size 558554290
global_step184795/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6045fbd8f55ee5a81857b828d3dc3cd39a6f09c11146ccadfd5cc9e1c15c19c7
3
+ size 558554354
global_step184795/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6f089ab82c34b1655da0bef15cc27fcba4b8008380ad31b5a74b1210bc50e11
3
+ size 4468641136