English
eliebak HF Staff commited on
Commit
1dfdc90
·
verified ·
1 Parent(s): d578816

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. 135M/final/checkpoint_metadata.json +18 -0
  2. 135M/final/config.yaml +99 -0
  3. 135M/final/lr_scheduler/lr_scheduler.pt +3 -0
  4. 135M/final/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
  5. 135M/final/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
  6. 135M/final/model/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors +3 -0
  7. 135M/final/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
  8. 135M/final/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
  9. 135M/final/model/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
  10. 135M/final/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
  11. 135M/final/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
  12. 135M/final/model/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors +3 -0
  13. 135M/final/model/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
  14. 135M/final/model/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
  15. 135M/final/model/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
  16. 135M/final/model/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
  17. 135M/final/model/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
  18. 135M/final/model/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors +3 -0
  19. 135M/final/model/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
  20. 135M/final/model/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
  21. 135M/final/model/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
  22. 135M/final/model/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
  23. 135M/final/model/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
  24. 135M/final/model/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors +3 -0
  25. 135M/final/model/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
  26. 135M/final/model/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
  27. 135M/final/model/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
  28. 135M/final/model/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
  29. 135M/final/model/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
  30. 135M/final/model/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors +3 -0
  31. 135M/final/model/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
  32. 135M/final/model/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
  33. 135M/final/model/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
  34. 135M/final/model/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
  35. 135M/final/model/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
  36. 135M/final/model/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors +3 -0
  37. 135M/final/model/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
  38. 135M/final/model/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
  39. 135M/final/model/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
  40. 135M/final/model/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
  41. 135M/final/model/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
  42. 135M/final/model/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors +3 -0
  43. 135M/final/model/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
  44. 135M/final/model/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
  45. 135M/final/model/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
  46. 135M/final/model/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
  47. 135M/final/model/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
  48. 135M/final/model/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors +3 -0
  49. 135M/final/model/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
  50. 135M/final/model/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
135M/final/checkpoint_metadata.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_metas": null,
3
+ "dp": 64,
4
+ "metas": {
5
+ "consumed_train_samples": 1024000000,
6
+ "data_stages": [
7
+ {
8
+ "consumed_train_samples": 1024000000,
9
+ "name": "stable",
10
+ "start_training_step": 1
11
+ }
12
+ ],
13
+ "last_stage_idx": 0,
14
+ "last_train_step": 2000000
15
+ },
16
+ "tp": 1,
17
+ "version": "1.4"
18
+ }
135M/final/config.yaml ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ checkpoints:
2
+ checkpoint_interval: 2000
3
+ checkpoints_path: checkpoints
4
+ checkpoints_path_is_shared_file_system: false
5
+ resume_checkpoint_path: null
6
+ save_final_state: false
7
+ save_initial_state: false
8
+ data_stages:
9
+ - data:
10
+ dataset:
11
+ dataset_folder:
12
+ - datasets/smollm2-corpus
13
+ dataset_weights:
14
+ - 1.0
15
+ num_loading_workers: 0
16
+ seed: 8
17
+ name: stable phase
18
+ start_training_step: 1
19
+ general:
20
+ benchmark_csv_path: null
21
+ consumed_train_samples: null
22
+ ignore_sanity_checks: true
23
+ project: smollm2
24
+ run: smollm2-135M
25
+ seed: 8
26
+ step: 2000000
27
+ logging:
28
+ iteration_step_info_interval: 1
29
+ log_level: info
30
+ log_level_replica: info
31
+ model:
32
+ ddp_bucket_cap_mb: 25
33
+ dtype: bfloat16
34
+ init_method:
35
+ std: 0.041666666666666664
36
+ make_vocab_size_divisible_by: 1
37
+ model_config:
38
+ bos_token_id: 0
39
+ eos_token_id: 0
40
+ hidden_act: silu
41
+ hidden_size: 576
42
+ initializer_range: 0.041666666666666664
43
+ intermediate_size: 1536
44
+ is_llama_config: true
45
+ max_position_embeddings: 2048
46
+ num_attention_heads: 9
47
+ num_hidden_layers: 30
48
+ num_key_value_heads: 3
49
+ pad_token_id: null
50
+ pretraining_tp: 1
51
+ rms_norm_eps: 1.0e-05
52
+ rope_interleaved: false
53
+ rope_scaling: null
54
+ rope_theta: 10000.0
55
+ tie_word_embeddings: true
56
+ use_cache: true
57
+ vocab_size: 49152
58
+ optimizer:
59
+ accumulate_grad_in_fp32: true
60
+ clip_grad: 1.0
61
+ learning_rate_scheduler:
62
+ learning_rate: 0.003
63
+ lr_decay_starting_step: 1600000
64
+ lr_decay_steps: 400000
65
+ lr_decay_style: linear
66
+ lr_warmup_steps: 2000
67
+ lr_warmup_style: linear
68
+ min_decay_lr: 0
69
+ optimizer_factory:
70
+ adam_beta1: 0.9
71
+ adam_beta2: 0.95
72
+ adam_eps: 1.0e-08
73
+ name: adamW
74
+ torch_adam_is_fused: true
75
+ weight_decay: 0.01
76
+ zero_stage: 0
77
+ parallelism:
78
+ dp: 64
79
+ expert_parallel_size: 1
80
+ pp: 1
81
+ pp_engine: 1f1b
82
+ recompute_layer: false
83
+ tp: 1
84
+ tp_linear_async_communication: true
85
+ tp_mode: REDUCE_SCATTER
86
+ tp_recompute_allgather: true
87
+ profiler: null
88
+ tokenizer:
89
+ tokenizer_max_length: null
90
+ tokenizer_name_or_path: HuggingFaceTB/cosmo2-tokenizer
91
+ tokenizer_revision: null
92
+ tokens:
93
+ batch_accumulation_per_replica: 1
94
+ limit_test_batches: 0
95
+ limit_val_batches: 0
96
+ micro_batch_size: 8
97
+ sequence_length: 2048
98
+ train_steps: 2000000
99
+ val_check_interval: 1000
135M/final/lr_scheduler/lr_scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27a9133c1370b65d8146ed30306983102b6e4129bb73ba6c415c2aaa6319a09c
3
+ size 4852
135M/final/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8029015d0157dfa83a9c79d0326f9362feeb85304662ebe0d0e21c95407f7782
3
+ size 663784
135M/final/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bfe90d71e67555e877683128c135cc68920fd2b9b7be71ae3c3ca681ed324ee
3
+ size 1106256
135M/final/model/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:581e407e3e10a13aceb685778625c235f278d007214aa22b530e240896432581
3
+ size 1248
135M/final/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:521eb7efd4a5e2470f13789d267b7d0de02374c4fe5ad448c4881114b0892358
3
+ size 1769704
135M/final/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:426da60375d18ef4b2b9b0e0acae41b3f5a7f16ab2939e5edeec5db1ab2c2f73
3
+ size 3539232
135M/final/model/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c36cb20f25f7839f48a42c012634ecf733af22b48d399e783f26a8e056e3ce0
3
+ size 1248
135M/final/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81a4dd1aa5ae1ac63bf0e3fc885d5913da6f4e95c402f24306b44681b75897ca
3
+ size 663784
135M/final/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8e02bce7a5c1ed2a13ccd85f1844fa506f50b17e74825c5de8931619c9b8d7a
3
+ size 1106256
135M/final/model/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3898657f15f4d50be529d14e335d277e9da38a33b9215bf2a21456da142d4d4a
3
+ size 1248
135M/final/model/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a00cdb8fa7c6532e6e1268ac6ebfc167a99b61ab5c7d47fe2285e5ce113c8a52
3
+ size 1769704
135M/final/model/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70cbd63bc8873c3d871f1efc53209b1f8bc96b77609def159ec3fe794f7fa87f
3
+ size 3539232
135M/final/model/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e68a52c13a8e12bbffa89cff969228f947e686b5f1e0820c5094be65b578357
3
+ size 1248
135M/final/model/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22001e5ad1ecbddd949ce7c55b2325497093d39c8f0045b58d3be439d64909db
3
+ size 663784
135M/final/model/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f144e99f5df4837e232a6d3b4f57ebf5e209f805f225d11c144e195d69914b60
3
+ size 1106256
135M/final/model/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78a98a4a411b8c1e7c6ec084c2ab7297de7b8dbb45452bc566439e4233df436f
3
+ size 1248
135M/final/model/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81bf9548d655e3e7c417d8e74cf94d3523ed5f18aba31419710c9eab72f1e5d3
3
+ size 1769704
135M/final/model/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2d31d39f967f1d1f1d5fea94543b3c3e3ed5572c4447ffc4eefb40fc78063e2
3
+ size 3539232
135M/final/model/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f044ab51d6b32a81f659f78b3fca9345a903db0dec74c99bdf34773a83995b8d
3
+ size 1248
135M/final/model/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf9cfae26b01e6feb72b39e9125c0c3e1a9a5e2878e4eba431005e6dd47e0a61
3
+ size 663784
135M/final/model/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d2a25dd7304cb3493d1f4252a48552f84da11ba83c9d037c5328cbddd73ec54
3
+ size 1106256
135M/final/model/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2effa03b77ad2f9ebe1778f4796875105f3bd4dfd9efcb592c65dcbd438078a0
3
+ size 1248
135M/final/model/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff08f050e7188118ac7a38f3c7fc7f0e2156a38eee8d6bb2abe03f7426292a1f
3
+ size 1769704
135M/final/model/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05bbf4bda06624776bee09223e6e64aaf39a78a793bf8ac14a13997fac3e24ce
3
+ size 3539232
135M/final/model/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28fd8d96412385bce78014f661664637203192650f653103101e6c47d2b1fb6f
3
+ size 1248
135M/final/model/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2981f598528550667af99c190e88a3e55a3269890f7e56ad8e3066a116f8ec3f
3
+ size 663784
135M/final/model/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd31f73e015f89bc029bd1a475493df6328deb240db29d80818f1a6cb6d21fbe
3
+ size 1106256
135M/final/model/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:964765b724bf0bd9cb302b800990b57f236f91208448928623cefd9eff0daa34
3
+ size 1248
135M/final/model/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfe717c548fb8068ef56af3d01afa7183dfbef132a7876d114469dc89ab41fd7
3
+ size 1769704
135M/final/model/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39d3f129fbccfe08d81350df76a29b9a23c817ec640759c696e6b38a6b0cedee
3
+ size 3539232
135M/final/model/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:770de192f6c6cec62e1c486b2582b50ad5f4133ee07199af8f029ae92dcaff4e
3
+ size 1248
135M/final/model/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2346dcc9eff7a33b4bfed1f41831c0fd0d5a34b0b1f24eb7b0986f5193ba89b8
3
+ size 663784
135M/final/model/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8e45c2a024718619abcb66d6d1e1d1e079846259674b47f96461a5b0a16f575
3
+ size 1106256
135M/final/model/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d52af9c3f2f9dcc78522f37ddc5799f1bb8552e226f87b50020559f92ca60dc
3
+ size 1248
135M/final/model/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84300e4a5c3cc7df36d1d9837b65b6940a6f99d8c7e22bd079ae9b2358930db5
3
+ size 1769704
135M/final/model/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:199667292a5f03e4ff2aa7e94bc4385e782857b6ff085a80a5da45ff9ab93c7a
3
+ size 3539232
135M/final/model/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f052b0cbae2770a0ebb5b799dd9ec2b2b38a8c3cd4228cdc8860bc2411ac4d19
3
+ size 1248
135M/final/model/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ea191b7840e97ef46a7569e37acdd87c682d579af156b3f451d5173040b9627
3
+ size 663784
135M/final/model/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:410fad5df0666e87d30d92f55842018ec90b9ffbe7b93963f435c1caab07df6c
3
+ size 1106256
135M/final/model/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2530b308748980f18936f396b4a80ea3a789e3d666cc2f76965497fed95c9e77
3
+ size 1248
135M/final/model/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73fb12de2acfaca31a82768f1abfb5182c1ffccfc59bc3295528595ec2edacb2
3
+ size 1769704
135M/final/model/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0285bb0eb5caa992e25d80ae42937f374d1a7519859b07eaf43de83614299362
3
+ size 3539232
135M/final/model/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:064ab09e7d60fe91f003397ed77761e70621033d3556b36b22a6ba7b1ceff1f6
3
+ size 1248
135M/final/model/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45db59fb7d74f12636d6e1a5fcdf064df5faa17c307a501edc6156b88e3ab673
3
+ size 663784
135M/final/model/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:215e4335a8530168d486cc2fa1d8757640aed3598146188b0800342d85ef610c
3
+ size 1106256
135M/final/model/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75ddf9114537eb753e4939b1efc2e447a675b9bc2c983382963c0397afae4cc5
3
+ size 1248
135M/final/model/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69d7d50abb320add2ed998c69a0c85c35dff6a0254ca3aa1bbdbf0b74330484e
3
+ size 1769704
135M/final/model/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:996f8e552c5e402f3217d5a9aad70b73d98a443a5476e9acda8dff4df9cf0eba
3
+ size 3539232