Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .gitattributes +32 -0
- 1000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__1000_hf/samples_hellaswag_2025-12-29T05-34-34.847641.jsonl +3 -0
- 1000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__1000_hf/samples_hellaswag_2026-01-07T03-57-52.654434.jsonl +3 -0
- 1000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__1000_hf/samples_gsm8k_2025-12-29T05-42-28.885838.jsonl +3 -0
- 1000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__1000_hf/samples_gsm8k_2026-01-07T07-13-27.882882.jsonl +3 -0
- 1000_hf/model.safetensors +3 -0
- 1000_hf/tokenizer.json +3 -0
- 11000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__11000_hf/samples_hellaswag_2026-01-07T03-51-33.854124.jsonl +3 -0
- 11000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__11000_hf/samples_gsm8k_2026-01-07T07-13-06.183388.jsonl +3 -0
- 11000_hf/model.safetensors +3 -0
- 11000_hf/tokenizer.json +3 -0
- 12000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__12000_hf/samples_hellaswag_2026-01-07T03-56-25.498160.jsonl +3 -0
- 12000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__12000_hf/samples_gsm8k_2026-01-07T07-12-50.487181.jsonl +3 -0
- 12000_hf/model.safetensors +3 -0
- 12000_hf/tokenizer.json +3 -0
- 12500/model/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 12500/model/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 12500/model/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors +3 -0
- 12500/model/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 12500/model/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 12500/model/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
- 12500/model/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 12500/model/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 12500/model/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors +3 -0
- 12500/model/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 12500/model/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 12500/model/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
- 12500/model/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 12500/model/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 12500/model/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors +3 -0
- 12500/model/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 12500/model/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 12500/model/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
- 12500/model/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 12500/model/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 12500/model/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors +3 -0
- 12500/model/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 12500/model/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 12500/model/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 12500/model/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 12500/model/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors +3 -0
- 12500/model/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 12500/model/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 12500/model/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
- 12500/model/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 12500/model/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 12500/model/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors +3 -0
- 12500/model/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 12500/model/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 12500/model/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
.gitattributes
CHANGED
|
@@ -42,3 +42,35 @@ llama32-1b-hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__ll
|
|
| 42 |
9000_hf/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 43 |
9000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__9000_hf/samples_gsm8k_2026-01-07T07-00-26.302516.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 44 |
9000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__9000_hf/samples_hellaswag_2026-01-07T03-49-27.696211.jsonl filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
9000_hf/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 43 |
9000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__9000_hf/samples_gsm8k_2026-01-07T07-00-26.302516.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 44 |
9000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__9000_hf/samples_hellaswag_2026-01-07T03-49-27.696211.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
3000_hf/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
3000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__3000_hf/samples_gsm8k_2026-01-07T07-15-16.192705.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
3000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__3000_hf/samples_gsm8k_2025-12-29T05-42-31.325047.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
3000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__3000_hf/samples_hellaswag_2026-01-07T03-56-25.245340.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
3000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__3000_hf/samples_hellaswag_2025-12-29T05-34-35.086951.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
7000_hf/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
7000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__7000_hf/samples_gsm8k_2026-01-07T07-00-36.377222.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
7000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__7000_hf/samples_hellaswag_2026-01-07T03-51-33.637505.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 53 |
+
2000_hf/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
2000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__2000_hf/samples_gsm8k_2026-01-07T07-11-28.243760.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
2000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__2000_hf/samples_gsm8k_2025-12-29T05-42-21.185272.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
2000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__2000_hf/samples_hellaswag_2026-01-07T03-51-34.536241.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 57 |
+
2000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__2000_hf/samples_hellaswag_2025-12-29T05-34-34.282648.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 58 |
+
12000_hf/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 59 |
+
12000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__12000_hf/samples_gsm8k_2026-01-07T07-12-50.487181.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 60 |
+
12000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__12000_hf/samples_hellaswag_2026-01-07T03-56-25.498160.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 61 |
+
8000_hf/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 62 |
+
8000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__8000_hf/samples_gsm8k_2026-01-07T07-00-06.652270.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 63 |
+
8000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__8000_hf/samples_hellaswag_2026-01-07T03-53-44.066877.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 64 |
+
11000_hf/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 65 |
+
11000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__11000_hf/samples_gsm8k_2026-01-07T07-13-06.183388.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 66 |
+
11000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__11000_hf/samples_hellaswag_2026-01-07T03-51-33.854124.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 67 |
+
1000_hf/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 68 |
+
1000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__1000_hf/samples_gsm8k_2025-12-29T05-42-28.885838.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 69 |
+
1000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__1000_hf/samples_gsm8k_2026-01-07T07-13-27.882882.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 70 |
+
1000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__1000_hf/samples_hellaswag_2026-01-07T03-57-52.654434.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 71 |
+
1000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__1000_hf/samples_hellaswag_2025-12-29T05-34-34.847641.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 72 |
+
4000_hf/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 73 |
+
4000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__4000_hf/samples_gsm8k_2026-01-07T06-59-35.025159.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 74 |
+
4000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__4000_hf/samples_gsm8k_2025-12-29T05-42-32.998053.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 75 |
+
4000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__4000_hf/samples_hellaswag_2025-12-29T05-33-51.606131.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 76 |
+
4000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__4000_hf/samples_hellaswag_2026-01-07T03-56-26.179191.jsonl filter=lfs diff=lfs merge=lfs -text
|
1000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__1000_hf/samples_hellaswag_2025-12-29T05-34-34.847641.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dd1bc6702722c88ad17e28d15d92d888be85ff71e6ec311827ac157733dac9de
|
| 3 |
+
size 42643726
|
1000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__1000_hf/samples_hellaswag_2026-01-07T03-57-52.654434.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dd1bc6702722c88ad17e28d15d92d888be85ff71e6ec311827ac157733dac9de
|
| 3 |
+
size 42643726
|
1000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__1000_hf/samples_gsm8k_2025-12-29T05-42-28.885838.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:564b41d6901271a2a9588f021a42697aa3f5c8fbcbfeb894ea3606624b4dedf1
|
| 3 |
+
size 16835560
|
1000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__1000_hf/samples_gsm8k_2026-01-07T07-13-27.882882.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:564b41d6901271a2a9588f021a42697aa3f5c8fbcbfeb894ea3606624b4dedf1
|
| 3 |
+
size 16835560
|
1000_hf/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:710202c2056864083aa9f83c9826cf14636b23a0c36405ab96825925e100eca4
|
| 3 |
+
size 2471645608
|
1000_hf/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b
|
| 3 |
+
size 17209920
|
11000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__11000_hf/samples_hellaswag_2026-01-07T03-51-33.854124.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cf1befb7e514670d739da9a2788d15fcb2c51276e905fa8c3506a4545cf11235
|
| 3 |
+
size 42645350
|
11000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__11000_hf/samples_gsm8k_2026-01-07T07-13-06.183388.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8b02758c47b6d45aa55b2e22a485fd326918d24daa4bf64393f6976d3ed6ba13
|
| 3 |
+
size 16785743
|
11000_hf/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:235b72417bbfb56c2751c19e0f25a32adb947d2706bebbf4e4609653aa04b774
|
| 3 |
+
size 2471645608
|
11000_hf/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b
|
| 3 |
+
size 17209920
|
12000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__12000_hf/samples_hellaswag_2026-01-07T03-56-25.498160.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cd36fcdebd8ac7068e7bcdbf8bc01d86e47cb419120bfe39bb23ff946521a43f
|
| 3 |
+
size 42645332
|
12000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__12000_hf/samples_gsm8k_2026-01-07T07-12-50.487181.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bf387ab00ddb718a7f1e12505bb19d8bbc2ef33ca2e4b8e563a8bedb1f6cb91d
|
| 3 |
+
size 16777813
|
12000_hf/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4383535dafe33db18207a9a42556cea80b50a307b817dfe4d1872f9ed4245599
|
| 3 |
+
size 2471645608
|
12000_hf/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b
|
| 3 |
+
size 17209920
|
12500/model/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:de752dfa075d595d9ff0bd79e3e14aa9406de32c871c84c54786b77dc2e18555
|
| 3 |
+
size 8388848
|
12500/model/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:791afb4bbb4ad446e6cf4f47b4259c221389c484681945778f2da09f622e3ae8
|
| 3 |
+
size 12583280
|
12500/model/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2cc00fe6a22acdd438d69e35e6840fd08a8f1e2361a0d70551f80012fb4b8115
|
| 3 |
+
size 4192
|
12500/model/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:da31c672ffcefd55d0c2e5037760793550ca890bc1baa8d3e634cf6571687860
|
| 3 |
+
size 33554672
|
12500/model/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:081b8f92caa166fbccf754fb2b538231eadd12458a10b11844f952bdf8f29f44
|
| 3 |
+
size 67109176
|
12500/model/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:20e7910f69bff45ad276822a8a4039969e72780960a1f206e2cab48ed610e69c
|
| 3 |
+
size 4192
|
12500/model/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b51f29cd0a288e7d5f90658c1cb8be8e6d6d5af144b3437799f92033d2b8db07
|
| 3 |
+
size 8388848
|
12500/model/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5d5cd25e0ca1372e05cfc50b8058f3fedd83f2b060bf2f7666f4962137d8ff10
|
| 3 |
+
size 12583280
|
12500/model/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b61797a09f487e85a7f78935cb392dc83153df62abb0721cc46d675611895a8e
|
| 3 |
+
size 4192
|
12500/model/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3497dc10dabaa645b5790a6b58ce71be9fe9011ef556c43d88b2f836850b5fd3
|
| 3 |
+
size 33554672
|
12500/model/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:11b532539a6078a2c375b83a8b543b4fad0f0156aa4e378bfa8bf2adde1c8bfc
|
| 3 |
+
size 67109176
|
12500/model/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ec2a17ba7087008c78dd9371e283c2e1b806b340f25a0e18c334c73059e36743
|
| 3 |
+
size 4192
|
12500/model/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7ee54c2d9a5a63f1cb4fc5b9fc2849b64632ec3518f84a3551c8d89c8165de4c
|
| 3 |
+
size 8388848
|
12500/model/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:87db87d5020c6095523bc2bfd8e15d908eac201cf963df897cc5231a515d3468
|
| 3 |
+
size 12583280
|
12500/model/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d3860cca2549d7828a00907bf3aba4d0f4c05d783f7c3efa034a07b5432deb6a
|
| 3 |
+
size 4192
|
12500/model/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:277c267e1b6df75c38ae6bf801e3dc0e45de60a107ebc61e5b89685c0ec1acbe
|
| 3 |
+
size 33554672
|
12500/model/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e8387fd0023a47667750e88365da9bf861512d4ca7ec9e48cd4764818a690dae
|
| 3 |
+
size 67109176
|
12500/model/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:105a80d91ff6406a19cfc0e749b821f7a321ecab3fee19f5db5e0e5983968577
|
| 3 |
+
size 4192
|
12500/model/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d42178984f7b83e8bb4912794017a3969bc0ea87da3467d9c845d5dae74ae2fe
|
| 3 |
+
size 8388848
|
12500/model/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a7cf0be75e754bc61dd5888c4923cc1fcb11cca44c7aba48c72a4ecb659c4212
|
| 3 |
+
size 12583280
|
12500/model/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f09b4544a0fdc4be8cc2925497ba1ec5b81808a31e9e5ae09ea9f3feb2d90fe8
|
| 3 |
+
size 4192
|
12500/model/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:38d10bc0cc9796510c92b80b604827e591568b495343cb8d9868ff40ddcff450
|
| 3 |
+
size 33554672
|
12500/model/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d2fb347639df7f2a04d6032f77447028812fab1340e0780586ae7c53aa87145d
|
| 3 |
+
size 67109176
|
12500/model/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fc63f1813bd23de425ad80c07d6c5ff4c911b256471ffd82fb653600ab709e22
|
| 3 |
+
size 8388848
|
12500/model/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ba693135e062a193f2d8bacc2f70d6b4d6ef40e69f65691afd7170dbd5efd2d5
|
| 3 |
+
size 12583280
|
12500/model/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dc30957b2b1ca3228f88d3189314707d6be8fbb98a1adc6c7548d260c2a1e52d
|
| 3 |
+
size 4192
|
12500/model/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a40d316b20de95f59ad8bbc6e3ef2117185696b0b025732dd7f8ffe38eccd399
|
| 3 |
+
size 33554672
|
12500/model/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8c048f4f390c9d78887fafd8545ded0aa74e96b23e778c424a66683c6014f908
|
| 3 |
+
size 67109176
|
12500/model/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c1ed2cc24fe61072fc6937542b03f51c136cc4660432de97cbb305695768c4b4
|
| 3 |
+
size 4192
|
12500/model/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e0ab32d90954e22b86daa1e0e129fa94a89e5fb4c523dc837aab0fc959eab7c8
|
| 3 |
+
size 8388848
|
12500/model/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:15c1d631477d903e1b4149e06736144c5883eb7416993deeb97eb175f0b3c82d
|
| 3 |
+
size 12583280
|
12500/model/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:26be7486e0bcaa54cdf1d12a6385f82c4351d8585df766d7531d57a2d4ee5d5c
|
| 3 |
+
size 4192
|
12500/model/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eff85296c08333208919a4b030216f7822f165f668367507878a20ec33a585e1
|
| 3 |
+
size 33554672
|
12500/model/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:527ae9ccc33a8adb96adb47ac0e5626e9ea86ac0427144e0e23c551926bfe5e1
|
| 3 |
+
size 67109176
|
12500/model/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e3172d2ab12f6d67c5b8bf11ead9fa11bbfca770e63c38bd1d3e966d43022a06
|
| 3 |
+
size 4192
|