Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .gitattributes +17 -0
- 10000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__10000_hf/samples_hellaswag_2026-01-07T03-56-14.560763.jsonl +3 -0
- 10000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__10000_hf/samples_gsm8k_2026-01-07T07-13-00.293068.jsonl +3 -0
- 10000_hf/model.safetensors +3 -0
- 10000_hf/tokenizer.json +3 -0
- 12500/lr_scheduler/lr_scheduler_pp-0-of-1_tp-0-of-1_exp-0-of-1.pt +3 -0
- 12500/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 12500/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 12500/model/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors +3 -0
- 12500/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 12500/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 12500/model/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
- 12500/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 12500/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 12500/model/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors +3 -0
- 12500/model/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 12500/model/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 12500/model/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
- 12500/model/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 12500/model/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 12500/model/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors +3 -0
- 12500/model/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 12500/model/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 12500/model/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
- 12500/model/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 12500/model/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 12500/model/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors +3 -0
- 12500/model/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 12500/model/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 12500/model/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
- 12500/model/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
- 12500/model/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 12500/model/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 12500/model/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors +3 -0
- 12500/model/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 12500/model/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 12500/model/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
- 12500/model/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 12500/model/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 12500/model/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors +3 -0
- 12500/model/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 12500/model/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- 12500/model/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
- 12500/optimizer/optimizer_pp-0-of-1_tp-0-of-1_exp-0-of-1.pt +3 -0
- 12500/random/tp-0-of-1_dp-0-of-8_pp-0-of-1.pt +3 -0
- 12500/random/tp-0-of-1_dp-1-of-8_pp-0-of-1.pt +3 -0
- 12500/random/tp-0-of-1_dp-2-of-8_pp-0-of-1.pt +3 -0
- 12500/random/tp-0-of-1_dp-3-of-8_pp-0-of-1.pt +3 -0
- 12500/random/tp-0-of-1_dp-4-of-8_pp-0-of-1.pt +3 -0
- 12500/random/tp-0-of-1_dp-5-of-8_pp-0-of-1.pt +3 -0
.gitattributes
CHANGED
|
@@ -74,3 +74,20 @@ llama32-1b-hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__ll
|
|
| 74 |
4000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__4000_hf/samples_gsm8k_2025-12-29T05-42-32.998053.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 75 |
4000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__4000_hf/samples_hellaswag_2025-12-29T05-33-51.606131.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 76 |
4000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__4000_hf/samples_hellaswag_2026-01-07T03-56-26.179191.jsonl filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
4000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__4000_hf/samples_gsm8k_2025-12-29T05-42-32.998053.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 75 |
4000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__4000_hf/samples_hellaswag_2025-12-29T05-33-51.606131.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 76 |
4000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__4000_hf/samples_hellaswag_2026-01-07T03-56-26.179191.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 77 |
+
2500_hf/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 78 |
+
2500_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__2500_hf/samples_gsm8k_2025-12-29T05-42-37.282529.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 79 |
+
2500_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__2500_hf/samples_gsm8k_2026-01-07T07-15-20.052939.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 80 |
+
2500_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__2500_hf/samples_hellaswag_2026-01-07T03-51-34.430609.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 81 |
+
2500_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__2500_hf/samples_hellaswag_2025-12-29T05-34-34.570482.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 82 |
+
10000_hf/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 83 |
+
10000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__10000_hf/samples_gsm8k_2026-01-07T07-13-00.293068.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 84 |
+
10000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__10000_hf/samples_hellaswag_2026-01-07T03-56-14.560763.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 85 |
+
12500_hf/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 86 |
+
12500_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__12500_hf/samples_gsm8k_2026-01-07T07-12-17.705915.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 87 |
+
12500_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__12500_hf/samples_hellaswag_2026-01-07T03-51-35.016035.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 88 |
+
5000_hf/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 89 |
+
5000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__5000_hf/samples_gsm8k_2026-01-07T06-58-05.968321.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 90 |
+
5000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__5000_hf/samples_hellaswag_2026-01-07T03-53-49.355990.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 91 |
+
7500_hf/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 92 |
+
7500_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__7500_hf/samples_gsm8k_2026-01-07T07-00-15.435455.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 93 |
+
7500_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__7500_hf/samples_hellaswag_2026-01-07T03-51-34.104451.jsonl filter=lfs diff=lfs merge=lfs -text
|
10000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__10000_hf/samples_hellaswag_2026-01-07T03-56-14.560763.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:019283f25ac52ae31c6cf25385f1c39868a83c840ee685eeaace39a373d64ec1
|
| 3 |
+
size 42645134
|
10000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__10000_hf/samples_gsm8k_2026-01-07T07-13-00.293068.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:979f960ef99474ceb5be3a2b15c727db71dd0ddd0d62c82e73abff3a58077fc7
|
| 3 |
+
size 16725724
|
10000_hf/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5c4f056bd3db4b0663bcc79008263ed8fc6ab3b2061f578ae12e6361cd47e3b0
|
| 3 |
+
size 2471645608
|
10000_hf/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b
|
| 3 |
+
size 17209920
|
12500/lr_scheduler/lr_scheduler_pp-0-of-1_tp-0-of-1_exp-0-of-1.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:374278ee7760e355e5f7d19924fad3e4fca7d2c8c0e8ce04f9497d4c9091bc23
|
| 3 |
+
size 3781
|
12500/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:33f64f9580bc254ac7da5e2a4f85508ee532ba1941a624e1765bc40b32c3b936
|
| 3 |
+
size 8388848
|
12500/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:755297f607e1208065ec25c006290724f27b2a048deb4f2b0447ebb6a97b64a3
|
| 3 |
+
size 12583280
|
12500/model/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0b9320d79d9ad3cb9751098131b608a50be6ccd2e6c22d3311f38c3ec6c4adde
|
| 3 |
+
size 4192
|
12500/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:768b30c5809543dcb7e2b3a80acebcccc2386d5951858ce96f659472c87edbbd
|
| 3 |
+
size 33554672
|
12500/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d475b067243674f9391064be95d0b62095dd499de017a4cf54e4ced3a80a4967
|
| 3 |
+
size 67109176
|
12500/model/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:286b94d6f2348d14553975a37a13790f35b17394a1319ab681d8e2a53d3b4e32
|
| 3 |
+
size 4192
|
12500/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f4a182a893a8659c7ac62516949ac6ec65ca8d8b18b981b45c7591308afe7121
|
| 3 |
+
size 8388848
|
12500/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:69fc59684b34ca689e9369bce889aad4d0a60bc151ff8a8cabf010986280a13a
|
| 3 |
+
size 12583280
|
12500/model/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:01eb90d8439f20c3259d5b2f6b87ac7d30931a2a8a0ac4ea2f066d0544c19e40
|
| 3 |
+
size 4192
|
12500/model/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:be64a1076bdfd9651c17ca8a05384ce161af29542848c980bf9ef32fa422058d
|
| 3 |
+
size 33554672
|
12500/model/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e27ba8d73e7115d6ae7d8a05bd6201dc5c975d068d207f14cd95f52d619758a2
|
| 3 |
+
size 67109176
|
12500/model/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:380913079b683588e3514ec46b60d2bd75699a26230effec7a24ac405831b63d
|
| 3 |
+
size 4192
|
12500/model/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:932cd453fe7d9cb054ecf9dfe50dc39c4e7fac374e847152601dff3896b382c3
|
| 3 |
+
size 8388848
|
12500/model/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e5bc299ecea6517951175b8fa6fdcc1811ff224448f8658645e51133b175ee10
|
| 3 |
+
size 12583280
|
12500/model/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f40df8e3ee1734d75dc14a0708ce9f4f47a01635c96f49fac9e8b6c6142e9a1f
|
| 3 |
+
size 4192
|
12500/model/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f2547623d0a135fc578cf37237581e8c40a9035a80ac3cf47c3567c17fa35194
|
| 3 |
+
size 33554672
|
12500/model/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5fadd4d4606f22244fc7141671bc4e41ee2a39fed5bb59dba76988a95d85f355
|
| 3 |
+
size 67109176
|
12500/model/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:61a8ea473c8821bfbb047c94ee32b7fc508774b0f035130b1a53929866c29526
|
| 3 |
+
size 4192
|
12500/model/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3f82ecb4e76af4b6984f542f1c2d06a922544b1d2c649a5c456c8eb3010a44ee
|
| 3 |
+
size 8388848
|
12500/model/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ca1919e6460967b14a09695b74b10b4aad5574ab54a8702ac67de8f4f6b6529e
|
| 3 |
+
size 12583280
|
12500/model/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:13254580fbbaeeccd0abe755649ee68566304272e594be3d5d8dfc7385d58eeb
|
| 3 |
+
size 4192
|
12500/model/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5deea6376637f31c271ce8f09ed5064aa781d3fd3caad7d7f8f27d0f21e54320
|
| 3 |
+
size 33554672
|
12500/model/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b80e832585909c699d214ac6c9ddbce83dae96cbc10f81a2cb0055aac7d62828
|
| 3 |
+
size 67109176
|
12500/model/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3101d7f62ad0477c4d4bdb8220bc62c95e80b309367fb39b1a189a392cebc4a4
|
| 3 |
+
size 4192
|
12500/model/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5a1e6f9470546f244298688febfacdcc6fba40ab32e6f5cc58d9f7f85cbbc999
|
| 3 |
+
size 4192
|
12500/model/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4b9798d69b2bad5743aab3eb5f6697449ed6c05ee622d4ba5dd9e306ac2ad947
|
| 3 |
+
size 8388848
|
12500/model/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:24104016d3c6d8552f9359d27350ed190bdb33b5bf666e362f50a9fe8f100bfb
|
| 3 |
+
size 12583280
|
12500/model/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3d4542ddadef586703537eddfac6766f609e45cf2195a97647dc29bc3acff1a2
|
| 3 |
+
size 4192
|
12500/model/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5f40041638bf95fd0c6acc4fc6c617c35952560fd3b3d6f8bb91c005eab9c031
|
| 3 |
+
size 33554672
|
12500/model/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:957438630a769c12814f4973d984c8e9fe80b04fcf2d7dc7561bba204f19da40
|
| 3 |
+
size 67109176
|
12500/model/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6a57aa27c2d8e5b206d4d2e995013ddab23daaaf736f762bbc4c9450d20bd19a
|
| 3 |
+
size 4192
|
12500/model/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:888b959cf5a5b1f8bb3db748bdcb86a3e72d279de6ab84abf19e8f20252d3de5
|
| 3 |
+
size 8388848
|
12500/model/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:232b95e7c6429c17708b9fae7f8ce3e2d91ee9ce811710459062d2866f921dc4
|
| 3 |
+
size 12583280
|
12500/model/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9095db1de1d00abdf92a45573dbd0f2501a12f075124f4b8c934e3ce4dc4b5e1
|
| 3 |
+
size 4192
|
12500/model/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:635d465cae92399da192b18274bbeb7b88b214e22cc4b87325904e7c68010ff1
|
| 3 |
+
size 33554672
|
12500/model/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:52f13cf5aeadc3783aa55e8c9e9396d1bdf79a1074ea9d0535c13ef41c961658
|
| 3 |
+
size 67109176
|
12500/model/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6f2c1a4139db805a97468a1324b7be8ffc6cb7a4bb3352a596349d2b812ab53a
|
| 3 |
+
size 4192
|
12500/optimizer/optimizer_pp-0-of-1_tp-0-of-1_exp-0-of-1.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d0f2317e040a045732db10e89cf99fa3404d9528f51eb267e7409e484e749b92
|
| 3 |
+
size 14829896970
|
12500/random/tp-0-of-1_dp-0-of-8_pp-0-of-1.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0601ec41ec35ebf4d71aa340e9ad8eec6d97496febf0c82d4b906408c13f4cb3
|
| 3 |
+
size 1521
|
12500/random/tp-0-of-1_dp-1-of-8_pp-0-of-1.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:631c9eaddef421b77417ad6de8caa9b73ce2bcba3028ae643ece004e415842b3
|
| 3 |
+
size 1521
|
12500/random/tp-0-of-1_dp-2-of-8_pp-0-of-1.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9154057cc18766552e3716cb2ea909fe597fbf9bf5bbba55f53e4392660fd471
|
| 3 |
+
size 1521
|
12500/random/tp-0-of-1_dp-3-of-8_pp-0-of-1.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:50a75e874575cdf39fe4d6ab9b0eadf99af310b441c93ec648e504f354a32782
|
| 3 |
+
size 1521
|
12500/random/tp-0-of-1_dp-4-of-8_pp-0-of-1.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6f3a3b3b43fbfa9f6f526bfd0438f1beb8312f8bb70ed1916db7b4ec6db049a8
|
| 3 |
+
size 1521
|
12500/random/tp-0-of-1_dp-5-of-8_pp-0-of-1.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5e00f9afebb28b3eebef1362210f10067a02cd411854be30e8d0a459122294ff
|
| 3 |
+
size 1521
|