Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- 135M/final/checkpoint_metadata.json +18 -0
- 135M/final/config.yaml +99 -0
- 135M/final/lr_scheduler/lr_scheduler.pt +3 -0
- 135M/final/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 135M/final/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 135M/final/model/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors +3 -0
- 135M/final/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 135M/final/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 135M/final/model/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
- 135M/final/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 135M/final/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 135M/final/model/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors +3 -0
- 135M/final/model/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 135M/final/model/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 135M/final/model/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
- 135M/final/model/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 135M/final/model/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 135M/final/model/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors +3 -0
- 135M/final/model/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 135M/final/model/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 135M/final/model/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
- 135M/final/model/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 135M/final/model/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 135M/final/model/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors +3 -0
- 135M/final/model/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 135M/final/model/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 135M/final/model/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
- 135M/final/model/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 135M/final/model/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 135M/final/model/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors +3 -0
- 135M/final/model/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 135M/final/model/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 135M/final/model/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
- 135M/final/model/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 135M/final/model/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 135M/final/model/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors +3 -0
- 135M/final/model/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 135M/final/model/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 135M/final/model/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
- 135M/final/model/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 135M/final/model/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 135M/final/model/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors +3 -0
- 135M/final/model/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 135M/final/model/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 135M/final/model/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
- 135M/final/model/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 135M/final/model/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 135M/final/model/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors +3 -0
- 135M/final/model/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 135M/final/model/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
135M/final/checkpoint_metadata.json
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"custom_metas": null,
|
| 3 |
+
"dp": 64,
|
| 4 |
+
"metas": {
|
| 5 |
+
"consumed_train_samples": 1024000000,
|
| 6 |
+
"data_stages": [
|
| 7 |
+
{
|
| 8 |
+
"consumed_train_samples": 1024000000,
|
| 9 |
+
"name": "stable",
|
| 10 |
+
"start_training_step": 1
|
| 11 |
+
}
|
| 12 |
+
],
|
| 13 |
+
"last_stage_idx": 0,
|
| 14 |
+
"last_train_step": 2000000
|
| 15 |
+
},
|
| 16 |
+
"tp": 1,
|
| 17 |
+
"version": "1.4"
|
| 18 |
+
}
|
135M/final/config.yaml
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
checkpoints:
|
| 2 |
+
checkpoint_interval: 2000
|
| 3 |
+
checkpoints_path: checkpoints
|
| 4 |
+
checkpoints_path_is_shared_file_system: false
|
| 5 |
+
resume_checkpoint_path: null
|
| 6 |
+
save_final_state: false
|
| 7 |
+
save_initial_state: false
|
| 8 |
+
data_stages:
|
| 9 |
+
- data:
|
| 10 |
+
dataset:
|
| 11 |
+
dataset_folder:
|
| 12 |
+
- datasets/smollm2-corpus
|
| 13 |
+
dataset_weights:
|
| 14 |
+
- 1.0
|
| 15 |
+
num_loading_workers: 0
|
| 16 |
+
seed: 8
|
| 17 |
+
name: stable phase
|
| 18 |
+
start_training_step: 1
|
| 19 |
+
general:
|
| 20 |
+
benchmark_csv_path: null
|
| 21 |
+
consumed_train_samples: null
|
| 22 |
+
ignore_sanity_checks: true
|
| 23 |
+
project: smollm2
|
| 24 |
+
run: smollm2-135M
|
| 25 |
+
seed: 8
|
| 26 |
+
step: 2000000
|
| 27 |
+
logging:
|
| 28 |
+
iteration_step_info_interval: 1
|
| 29 |
+
log_level: info
|
| 30 |
+
log_level_replica: info
|
| 31 |
+
model:
|
| 32 |
+
ddp_bucket_cap_mb: 25
|
| 33 |
+
dtype: bfloat16
|
| 34 |
+
init_method:
|
| 35 |
+
std: 0.041666666666666664
|
| 36 |
+
make_vocab_size_divisible_by: 1
|
| 37 |
+
model_config:
|
| 38 |
+
bos_token_id: 0
|
| 39 |
+
eos_token_id: 0
|
| 40 |
+
hidden_act: silu
|
| 41 |
+
hidden_size: 576
|
| 42 |
+
initializer_range: 0.041666666666666664
|
| 43 |
+
intermediate_size: 1536
|
| 44 |
+
is_llama_config: true
|
| 45 |
+
max_position_embeddings: 2048
|
| 46 |
+
num_attention_heads: 9
|
| 47 |
+
num_hidden_layers: 30
|
| 48 |
+
num_key_value_heads: 3
|
| 49 |
+
pad_token_id: null
|
| 50 |
+
pretraining_tp: 1
|
| 51 |
+
rms_norm_eps: 1.0e-05
|
| 52 |
+
rope_interleaved: false
|
| 53 |
+
rope_scaling: null
|
| 54 |
+
rope_theta: 10000.0
|
| 55 |
+
tie_word_embeddings: true
|
| 56 |
+
use_cache: true
|
| 57 |
+
vocab_size: 49152
|
| 58 |
+
optimizer:
|
| 59 |
+
accumulate_grad_in_fp32: true
|
| 60 |
+
clip_grad: 1.0
|
| 61 |
+
learning_rate_scheduler:
|
| 62 |
+
learning_rate: 0.003
|
| 63 |
+
lr_decay_starting_step: 1600000
|
| 64 |
+
lr_decay_steps: 400000
|
| 65 |
+
lr_decay_style: linear
|
| 66 |
+
lr_warmup_steps: 2000
|
| 67 |
+
lr_warmup_style: linear
|
| 68 |
+
min_decay_lr: 0
|
| 69 |
+
optimizer_factory:
|
| 70 |
+
adam_beta1: 0.9
|
| 71 |
+
adam_beta2: 0.95
|
| 72 |
+
adam_eps: 1.0e-08
|
| 73 |
+
name: adamW
|
| 74 |
+
torch_adam_is_fused: true
|
| 75 |
+
weight_decay: 0.01
|
| 76 |
+
zero_stage: 0
|
| 77 |
+
parallelism:
|
| 78 |
+
dp: 64
|
| 79 |
+
expert_parallel_size: 1
|
| 80 |
+
pp: 1
|
| 81 |
+
pp_engine: 1f1b
|
| 82 |
+
recompute_layer: false
|
| 83 |
+
tp: 1
|
| 84 |
+
tp_linear_async_communication: true
|
| 85 |
+
tp_mode: REDUCE_SCATTER
|
| 86 |
+
tp_recompute_allgather: true
|
| 87 |
+
profiler: null
|
| 88 |
+
tokenizer:
|
| 89 |
+
tokenizer_max_length: null
|
| 90 |
+
tokenizer_name_or_path: HuggingFaceTB/cosmo2-tokenizer
|
| 91 |
+
tokenizer_revision: null
|
| 92 |
+
tokens:
|
| 93 |
+
batch_accumulation_per_replica: 1
|
| 94 |
+
limit_test_batches: 0
|
| 95 |
+
limit_val_batches: 0
|
| 96 |
+
micro_batch_size: 8
|
| 97 |
+
sequence_length: 2048
|
| 98 |
+
train_steps: 2000000
|
| 99 |
+
val_check_interval: 1000
|
135M/final/lr_scheduler/lr_scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:27a9133c1370b65d8146ed30306983102b6e4129bb73ba6c415c2aaa6319a09c
|
| 3 |
+
size 4852
|
135M/final/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8029015d0157dfa83a9c79d0326f9362feeb85304662ebe0d0e21c95407f7782
|
| 3 |
+
size 663784
|
135M/final/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3bfe90d71e67555e877683128c135cc68920fd2b9b7be71ae3c3ca681ed324ee
|
| 3 |
+
size 1106256
|
135M/final/model/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:581e407e3e10a13aceb685778625c235f278d007214aa22b530e240896432581
|
| 3 |
+
size 1248
|
135M/final/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:521eb7efd4a5e2470f13789d267b7d0de02374c4fe5ad448c4881114b0892358
|
| 3 |
+
size 1769704
|
135M/final/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:426da60375d18ef4b2b9b0e0acae41b3f5a7f16ab2939e5edeec5db1ab2c2f73
|
| 3 |
+
size 3539232
|
135M/final/model/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1c36cb20f25f7839f48a42c012634ecf733af22b48d399e783f26a8e056e3ce0
|
| 3 |
+
size 1248
|
135M/final/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:81a4dd1aa5ae1ac63bf0e3fc885d5913da6f4e95c402f24306b44681b75897ca
|
| 3 |
+
size 663784
|
135M/final/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d8e02bce7a5c1ed2a13ccd85f1844fa506f50b17e74825c5de8931619c9b8d7a
|
| 3 |
+
size 1106256
|
135M/final/model/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3898657f15f4d50be529d14e335d277e9da38a33b9215bf2a21456da142d4d4a
|
| 3 |
+
size 1248
|
135M/final/model/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a00cdb8fa7c6532e6e1268ac6ebfc167a99b61ab5c7d47fe2285e5ce113c8a52
|
| 3 |
+
size 1769704
|
135M/final/model/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:70cbd63bc8873c3d871f1efc53209b1f8bc96b77609def159ec3fe794f7fa87f
|
| 3 |
+
size 3539232
|
135M/final/model/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2e68a52c13a8e12bbffa89cff969228f947e686b5f1e0820c5094be65b578357
|
| 3 |
+
size 1248
|
135M/final/model/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:22001e5ad1ecbddd949ce7c55b2325497093d39c8f0045b58d3be439d64909db
|
| 3 |
+
size 663784
|
135M/final/model/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f144e99f5df4837e232a6d3b4f57ebf5e209f805f225d11c144e195d69914b60
|
| 3 |
+
size 1106256
|
135M/final/model/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:78a98a4a411b8c1e7c6ec084c2ab7297de7b8dbb45452bc566439e4233df436f
|
| 3 |
+
size 1248
|
135M/final/model/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:81bf9548d655e3e7c417d8e74cf94d3523ed5f18aba31419710c9eab72f1e5d3
|
| 3 |
+
size 1769704
|
135M/final/model/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f2d31d39f967f1d1f1d5fea94543b3c3e3ed5572c4447ffc4eefb40fc78063e2
|
| 3 |
+
size 3539232
|
135M/final/model/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f044ab51d6b32a81f659f78b3fca9345a903db0dec74c99bdf34773a83995b8d
|
| 3 |
+
size 1248
|
135M/final/model/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bf9cfae26b01e6feb72b39e9125c0c3e1a9a5e2878e4eba431005e6dd47e0a61
|
| 3 |
+
size 663784
|
135M/final/model/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1d2a25dd7304cb3493d1f4252a48552f84da11ba83c9d037c5328cbddd73ec54
|
| 3 |
+
size 1106256
|
135M/final/model/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2effa03b77ad2f9ebe1778f4796875105f3bd4dfd9efcb592c65dcbd438078a0
|
| 3 |
+
size 1248
|
135M/final/model/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ff08f050e7188118ac7a38f3c7fc7f0e2156a38eee8d6bb2abe03f7426292a1f
|
| 3 |
+
size 1769704
|
135M/final/model/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:05bbf4bda06624776bee09223e6e64aaf39a78a793bf8ac14a13997fac3e24ce
|
| 3 |
+
size 3539232
|
135M/final/model/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:28fd8d96412385bce78014f661664637203192650f653103101e6c47d2b1fb6f
|
| 3 |
+
size 1248
|
135M/final/model/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2981f598528550667af99c190e88a3e55a3269890f7e56ad8e3066a116f8ec3f
|
| 3 |
+
size 663784
|
135M/final/model/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fd31f73e015f89bc029bd1a475493df6328deb240db29d80818f1a6cb6d21fbe
|
| 3 |
+
size 1106256
|
135M/final/model/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:964765b724bf0bd9cb302b800990b57f236f91208448928623cefd9eff0daa34
|
| 3 |
+
size 1248
|
135M/final/model/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cfe717c548fb8068ef56af3d01afa7183dfbef132a7876d114469dc89ab41fd7
|
| 3 |
+
size 1769704
|
135M/final/model/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:39d3f129fbccfe08d81350df76a29b9a23c817ec640759c696e6b38a6b0cedee
|
| 3 |
+
size 3539232
|
135M/final/model/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:770de192f6c6cec62e1c486b2582b50ad5f4133ee07199af8f029ae92dcaff4e
|
| 3 |
+
size 1248
|
135M/final/model/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2346dcc9eff7a33b4bfed1f41831c0fd0d5a34b0b1f24eb7b0986f5193ba89b8
|
| 3 |
+
size 663784
|
135M/final/model/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b8e45c2a024718619abcb66d6d1e1d1e079846259674b47f96461a5b0a16f575
|
| 3 |
+
size 1106256
|
135M/final/model/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8d52af9c3f2f9dcc78522f37ddc5799f1bb8552e226f87b50020559f92ca60dc
|
| 3 |
+
size 1248
|
135M/final/model/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:84300e4a5c3cc7df36d1d9837b65b6940a6f99d8c7e22bd079ae9b2358930db5
|
| 3 |
+
size 1769704
|
135M/final/model/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:199667292a5f03e4ff2aa7e94bc4385e782857b6ff085a80a5da45ff9ab93c7a
|
| 3 |
+
size 3539232
|
135M/final/model/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f052b0cbae2770a0ebb5b799dd9ec2b2b38a8c3cd4228cdc8860bc2411ac4d19
|
| 3 |
+
size 1248
|
135M/final/model/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9ea191b7840e97ef46a7569e37acdd87c682d579af156b3f451d5173040b9627
|
| 3 |
+
size 663784
|
135M/final/model/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:410fad5df0666e87d30d92f55842018ec90b9ffbe7b93963f435c1caab07df6c
|
| 3 |
+
size 1106256
|
135M/final/model/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2530b308748980f18936f396b4a80ea3a789e3d666cc2f76965497fed95c9e77
|
| 3 |
+
size 1248
|
135M/final/model/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:73fb12de2acfaca31a82768f1abfb5182c1ffccfc59bc3295528595ec2edacb2
|
| 3 |
+
size 1769704
|
135M/final/model/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0285bb0eb5caa992e25d80ae42937f374d1a7519859b07eaf43de83614299362
|
| 3 |
+
size 3539232
|
135M/final/model/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:064ab09e7d60fe91f003397ed77761e70621033d3556b36b22a6ba7b1ceff1f6
|
| 3 |
+
size 1248
|
135M/final/model/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:45db59fb7d74f12636d6e1a5fcdf064df5faa17c307a501edc6156b88e3ab673
|
| 3 |
+
size 663784
|
135M/final/model/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:215e4335a8530168d486cc2fa1d8757640aed3598146188b0800342d85ef610c
|
| 3 |
+
size 1106256
|
135M/final/model/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:75ddf9114537eb753e4939b1efc2e447a675b9bc2c983382963c0397afae4cc5
|
| 3 |
+
size 1248
|
135M/final/model/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:69d7d50abb320add2ed998c69a0c85c35dff6a0254ca3aa1bbdbf0b74330484e
|
| 3 |
+
size 1769704
|
135M/final/model/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:996f8e552c5e402f3217d5a9aad70b73d98a443a5476e9acda8dff4df9cf0eba
|
| 3 |
+
size 3539232
|