diff --git a/.gitattributes b/.gitattributes index ad0eb45715faf6b662c60f55c0bd9ac3895c5104..f38140ea016c103d1623219bc5c77d75efbfd848 100644 --- a/.gitattributes +++ b/.gitattributes @@ -42,3 +42,35 @@ llama32-1b-hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__ll 9000_hf/tokenizer.json filter=lfs diff=lfs merge=lfs -text 9000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__9000_hf/samples_gsm8k_2026-01-07T07-00-26.302516.jsonl filter=lfs diff=lfs merge=lfs -text 9000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__9000_hf/samples_hellaswag_2026-01-07T03-49-27.696211.jsonl filter=lfs diff=lfs merge=lfs -text +3000_hf/tokenizer.json filter=lfs diff=lfs merge=lfs -text +3000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__3000_hf/samples_gsm8k_2026-01-07T07-15-16.192705.jsonl filter=lfs diff=lfs merge=lfs -text +3000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__3000_hf/samples_gsm8k_2025-12-29T05-42-31.325047.jsonl filter=lfs diff=lfs merge=lfs -text +3000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__3000_hf/samples_hellaswag_2026-01-07T03-56-25.245340.jsonl filter=lfs diff=lfs merge=lfs -text +3000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__3000_hf/samples_hellaswag_2025-12-29T05-34-35.086951.jsonl filter=lfs diff=lfs merge=lfs -text +7000_hf/tokenizer.json filter=lfs diff=lfs merge=lfs -text +7000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__7000_hf/samples_gsm8k_2026-01-07T07-00-36.377222.jsonl filter=lfs diff=lfs merge=lfs -text +7000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__7000_hf/samples_hellaswag_2026-01-07T03-51-33.637505.jsonl filter=lfs diff=lfs merge=lfs -text +2000_hf/tokenizer.json filter=lfs diff=lfs merge=lfs -text +2000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__2000_hf/samples_gsm8k_2026-01-07T07-11-28.243760.jsonl filter=lfs diff=lfs merge=lfs -text +2000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__2000_hf/samples_gsm8k_2025-12-29T05-42-21.185272.jsonl filter=lfs diff=lfs merge=lfs -text +2000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__2000_hf/samples_hellaswag_2026-01-07T03-51-34.536241.jsonl filter=lfs diff=lfs merge=lfs -text +2000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__2000_hf/samples_hellaswag_2025-12-29T05-34-34.282648.jsonl filter=lfs diff=lfs merge=lfs -text +12000_hf/tokenizer.json filter=lfs diff=lfs merge=lfs -text +12000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__12000_hf/samples_gsm8k_2026-01-07T07-12-50.487181.jsonl filter=lfs diff=lfs merge=lfs -text +12000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__12000_hf/samples_hellaswag_2026-01-07T03-56-25.498160.jsonl filter=lfs diff=lfs merge=lfs -text +8000_hf/tokenizer.json filter=lfs diff=lfs merge=lfs -text +8000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__8000_hf/samples_gsm8k_2026-01-07T07-00-06.652270.jsonl filter=lfs diff=lfs merge=lfs -text +8000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__8000_hf/samples_hellaswag_2026-01-07T03-53-44.066877.jsonl filter=lfs diff=lfs merge=lfs -text +11000_hf/tokenizer.json filter=lfs diff=lfs merge=lfs -text +11000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__11000_hf/samples_gsm8k_2026-01-07T07-13-06.183388.jsonl filter=lfs diff=lfs merge=lfs -text +11000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__11000_hf/samples_hellaswag_2026-01-07T03-51-33.854124.jsonl filter=lfs diff=lfs merge=lfs -text +1000_hf/tokenizer.json filter=lfs diff=lfs merge=lfs -text +1000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__1000_hf/samples_gsm8k_2025-12-29T05-42-28.885838.jsonl filter=lfs diff=lfs merge=lfs -text +1000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__1000_hf/samples_gsm8k_2026-01-07T07-13-27.882882.jsonl filter=lfs diff=lfs merge=lfs -text +1000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__1000_hf/samples_hellaswag_2026-01-07T03-57-52.654434.jsonl filter=lfs diff=lfs merge=lfs -text +1000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__1000_hf/samples_hellaswag_2025-12-29T05-34-34.847641.jsonl filter=lfs diff=lfs merge=lfs -text +4000_hf/tokenizer.json filter=lfs diff=lfs merge=lfs -text +4000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__4000_hf/samples_gsm8k_2026-01-07T06-59-35.025159.jsonl filter=lfs diff=lfs merge=lfs -text +4000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__4000_hf/samples_gsm8k_2025-12-29T05-42-32.998053.jsonl filter=lfs diff=lfs merge=lfs -text +4000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__4000_hf/samples_hellaswag_2025-12-29T05-33-51.606131.jsonl filter=lfs diff=lfs merge=lfs -text +4000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__4000_hf/samples_hellaswag_2026-01-07T03-56-26.179191.jsonl filter=lfs diff=lfs merge=lfs -text diff --git a/1000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__1000_hf/samples_hellaswag_2025-12-29T05-34-34.847641.jsonl b/1000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__1000_hf/samples_hellaswag_2025-12-29T05-34-34.847641.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..6fa87e6a84189802993634e24ed53121ccbee089 --- /dev/null +++ b/1000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__1000_hf/samples_hellaswag_2025-12-29T05-34-34.847641.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd1bc6702722c88ad17e28d15d92d888be85ff71e6ec311827ac157733dac9de +size 42643726 diff --git a/1000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__1000_hf/samples_hellaswag_2026-01-07T03-57-52.654434.jsonl b/1000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__1000_hf/samples_hellaswag_2026-01-07T03-57-52.654434.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..6fa87e6a84189802993634e24ed53121ccbee089 --- /dev/null +++ b/1000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__1000_hf/samples_hellaswag_2026-01-07T03-57-52.654434.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd1bc6702722c88ad17e28d15d92d888be85ff71e6ec311827ac157733dac9de +size 42643726 diff --git a/1000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__1000_hf/samples_gsm8k_2025-12-29T05-42-28.885838.jsonl b/1000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__1000_hf/samples_gsm8k_2025-12-29T05-42-28.885838.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..b16aacc93535ea6dcf64a43d89e5afe7c09a4533 --- /dev/null +++ b/1000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__1000_hf/samples_gsm8k_2025-12-29T05-42-28.885838.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:564b41d6901271a2a9588f021a42697aa3f5c8fbcbfeb894ea3606624b4dedf1 +size 16835560 diff --git a/1000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__1000_hf/samples_gsm8k_2026-01-07T07-13-27.882882.jsonl b/1000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__1000_hf/samples_gsm8k_2026-01-07T07-13-27.882882.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..b16aacc93535ea6dcf64a43d89e5afe7c09a4533 --- /dev/null +++ b/1000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__1000_hf/samples_gsm8k_2026-01-07T07-13-27.882882.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:564b41d6901271a2a9588f021a42697aa3f5c8fbcbfeb894ea3606624b4dedf1 +size 16835560 diff --git a/1000_hf/model.safetensors b/1000_hf/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d9a3dafebba39c2fd8e401a63f383a10447d38ff --- /dev/null +++ b/1000_hf/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:710202c2056864083aa9f83c9826cf14636b23a0c36405ab96825925e100eca4 +size 2471645608 diff --git a/1000_hf/tokenizer.json b/1000_hf/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/1000_hf/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/11000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__11000_hf/samples_hellaswag_2026-01-07T03-51-33.854124.jsonl b/11000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__11000_hf/samples_hellaswag_2026-01-07T03-51-33.854124.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..cd001d56c154e82a6ece7cb7c08e8489294bca08 --- /dev/null +++ b/11000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__11000_hf/samples_hellaswag_2026-01-07T03-51-33.854124.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf1befb7e514670d739da9a2788d15fcb2c51276e905fa8c3506a4545cf11235 +size 42645350 diff --git a/11000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__11000_hf/samples_gsm8k_2026-01-07T07-13-06.183388.jsonl b/11000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__11000_hf/samples_gsm8k_2026-01-07T07-13-06.183388.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..e29f2657afab87ea3d35a8bb3680ac644b6d1944 --- /dev/null +++ b/11000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__11000_hf/samples_gsm8k_2026-01-07T07-13-06.183388.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b02758c47b6d45aa55b2e22a485fd326918d24daa4bf64393f6976d3ed6ba13 +size 16785743 diff --git a/11000_hf/model.safetensors b/11000_hf/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8e9a11bb70d985c7613d154047c22aa1713121bb --- /dev/null +++ b/11000_hf/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:235b72417bbfb56c2751c19e0f25a32adb947d2706bebbf4e4609653aa04b774 +size 2471645608 diff --git a/11000_hf/tokenizer.json b/11000_hf/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/11000_hf/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/12000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__12000_hf/samples_hellaswag_2026-01-07T03-56-25.498160.jsonl b/12000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__12000_hf/samples_hellaswag_2026-01-07T03-56-25.498160.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..5d3708b0213ca2ead70c343b4a706ce6d2e8311e --- /dev/null +++ b/12000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__12000_hf/samples_hellaswag_2026-01-07T03-56-25.498160.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd36fcdebd8ac7068e7bcdbf8bc01d86e47cb419120bfe39bb23ff946521a43f +size 42645332 diff --git a/12000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__12000_hf/samples_gsm8k_2026-01-07T07-12-50.487181.jsonl b/12000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__12000_hf/samples_gsm8k_2026-01-07T07-12-50.487181.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..1e48dcd3f9810975f6fce90dedcb3af4389d2f07 --- /dev/null +++ b/12000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__12000_hf/samples_gsm8k_2026-01-07T07-12-50.487181.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf387ab00ddb718a7f1e12505bb19d8bbc2ef33ca2e4b8e563a8bedb1f6cb91d +size 16777813 diff --git a/12000_hf/model.safetensors b/12000_hf/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..06f6c394e7864a50410c8c428070220c0c6b2438 --- /dev/null +++ b/12000_hf/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4383535dafe33db18207a9a42556cea80b50a307b817dfe4d1872f9ed4245599 +size 2471645608 diff --git a/12000_hf/tokenizer.json b/12000_hf/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/12000_hf/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/12500/model/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8076edfa0aed3660aaa5d057979d8323fbe77204 --- /dev/null +++ b/12500/model/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de752dfa075d595d9ff0bd79e3e14aa9406de32c871c84c54786b77dc2e18555 +size 8388848 diff --git a/12500/model/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c9ef7023d0262aa8cc980919889aaa8e2a1a4f9f --- /dev/null +++ b/12500/model/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:791afb4bbb4ad446e6cf4f47b4259c221389c484681945778f2da09f622e3ae8 +size 12583280 diff --git a/12500/model/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors b/12500/model/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ff7ebc71242a19541aadd497fb5bd62e8032a1a1 --- /dev/null +++ b/12500/model/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cc00fe6a22acdd438d69e35e6840fd08a8f1e2361a0d70551f80012fb4b8115 +size 4192 diff --git a/12500/model/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..36fc5444890587da0f36e5fc0665380809cb0bd6 --- /dev/null +++ b/12500/model/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da31c672ffcefd55d0c2e5037760793550ca890bc1baa8d3e634cf6571687860 +size 33554672 diff --git a/12500/model/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..32eb1362acb7152014fe76cf723b5f3707a1b76d --- /dev/null +++ b/12500/model/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:081b8f92caa166fbccf754fb2b538231eadd12458a10b11844f952bdf8f29f44 +size 67109176 diff --git a/12500/model/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors b/12500/model/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..00aca5bb574ce71a5e11df4855c098ee3389fced --- /dev/null +++ b/12500/model/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20e7910f69bff45ad276822a8a4039969e72780960a1f206e2cab48ed610e69c +size 4192 diff --git a/12500/model/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d0341fff43ac67e36015897115e1b0441edbf5fa --- /dev/null +++ b/12500/model/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b51f29cd0a288e7d5f90658c1cb8be8e6d6d5af144b3437799f92033d2b8db07 +size 8388848 diff --git a/12500/model/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..69eb6a535a616e18b9296a2a85faa4762fb6dfc7 --- /dev/null +++ b/12500/model/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d5cd25e0ca1372e05cfc50b8058f3fedd83f2b060bf2f7666f4962137d8ff10 +size 12583280 diff --git a/12500/model/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors b/12500/model/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3570d618f498e5595b1b4731874e67524984bd64 --- /dev/null +++ b/12500/model/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b61797a09f487e85a7f78935cb392dc83153df62abb0721cc46d675611895a8e +size 4192 diff --git a/12500/model/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6e634aa79693849b79ce8b45520627167892e2a0 --- /dev/null +++ b/12500/model/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3497dc10dabaa645b5790a6b58ce71be9fe9011ef556c43d88b2f836850b5fd3 +size 33554672 diff --git a/12500/model/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2c418af00294b641451f9d0659226da98a4658e0 --- /dev/null +++ b/12500/model/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11b532539a6078a2c375b83a8b543b4fad0f0156aa4e378bfa8bf2adde1c8bfc +size 67109176 diff --git a/12500/model/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors b/12500/model/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0afbc28022140e8568aaddbc3cc353a540385a16 --- /dev/null +++ b/12500/model/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec2a17ba7087008c78dd9371e283c2e1b806b340f25a0e18c334c73059e36743 +size 4192 diff --git a/12500/model/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..900fd510b85454d394ab88d9539d8f39926345c0 --- /dev/null +++ b/12500/model/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ee54c2d9a5a63f1cb4fc5b9fc2849b64632ec3518f84a3551c8d89c8165de4c +size 8388848 diff --git a/12500/model/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fdd4aa27aa78b937444a77fa6720c861afc0de86 --- /dev/null +++ b/12500/model/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87db87d5020c6095523bc2bfd8e15d908eac201cf963df897cc5231a515d3468 +size 12583280 diff --git a/12500/model/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors b/12500/model/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e1ccf58449618215e517b7272bab5a0348668174 --- /dev/null +++ b/12500/model/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3860cca2549d7828a00907bf3aba4d0f4c05d783f7c3efa034a07b5432deb6a +size 4192 diff --git a/12500/model/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..622fb2f258a46e1eadd381197a343b7b971fa4e1 --- /dev/null +++ b/12500/model/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:277c267e1b6df75c38ae6bf801e3dc0e45de60a107ebc61e5b89685c0ec1acbe +size 33554672 diff --git a/12500/model/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..889111134269d3ae458e7c39d20f94376348a89f --- /dev/null +++ b/12500/model/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8387fd0023a47667750e88365da9bf861512d4ca7ec9e48cd4764818a690dae +size 67109176 diff --git a/12500/model/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors b/12500/model/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..932647d0a8cd51ec080488f7884422f8b66001ee --- /dev/null +++ b/12500/model/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:105a80d91ff6406a19cfc0e749b821f7a321ecab3fee19f5db5e0e5983968577 +size 4192 diff --git a/12500/model/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..df002b0bbf1027a5563a0420e9390c1b430d1576 --- /dev/null +++ b/12500/model/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d42178984f7b83e8bb4912794017a3969bc0ea87da3467d9c845d5dae74ae2fe +size 8388848 diff --git a/12500/model/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9f94d7fb0d7d14b181ad3b86da52086c5e16b583 --- /dev/null +++ b/12500/model/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7cf0be75e754bc61dd5888c4923cc1fcb11cca44c7aba48c72a4ecb659c4212 +size 12583280 diff --git a/12500/model/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors b/12500/model/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7503da9998ecfb2c61402d394a3ba1de19690931 --- /dev/null +++ b/12500/model/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f09b4544a0fdc4be8cc2925497ba1ec5b81808a31e9e5ae09ea9f3feb2d90fe8 +size 4192 diff --git a/12500/model/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a4a37a246408e51e986278f10c1be465b60b2209 --- /dev/null +++ b/12500/model/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38d10bc0cc9796510c92b80b604827e591568b495343cb8d9868ff40ddcff450 +size 33554672 diff --git a/12500/model/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ecce1e8dd439f639fbefbbbe9a4f460d3db03300 --- /dev/null +++ b/12500/model/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2fb347639df7f2a04d6032f77447028812fab1340e0780586ae7c53aa87145d +size 67109176 diff --git a/12500/model/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cb07cd518802a2d8386befc7be3408e8df554e19 --- /dev/null +++ b/12500/model/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc63f1813bd23de425ad80c07d6c5ff4c911b256471ffd82fb653600ab709e22 +size 8388848 diff --git a/12500/model/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ac226aefe6bfed114b991d1edf86c4a307c4df29 --- /dev/null +++ b/12500/model/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba693135e062a193f2d8bacc2f70d6b4d6ef40e69f65691afd7170dbd5efd2d5 +size 12583280 diff --git a/12500/model/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors b/12500/model/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a579b99d0a2d95a43046b7cfe448908ac7ffbb2c --- /dev/null +++ b/12500/model/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc30957b2b1ca3228f88d3189314707d6be8fbb98a1adc6c7548d260c2a1e52d +size 4192 diff --git a/12500/model/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4c5dcea31c1b168924b047ce861bbc6d020cea9f --- /dev/null +++ b/12500/model/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a40d316b20de95f59ad8bbc6e3ef2117185696b0b025732dd7f8ffe38eccd399 +size 33554672 diff --git a/12500/model/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f24afc560a5ac105215fa1d5cb6d393fc8e5f388 --- /dev/null +++ b/12500/model/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c048f4f390c9d78887fafd8545ded0aa74e96b23e778c424a66683c6014f908 +size 67109176 diff --git a/12500/model/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors b/12500/model/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6e6c8719bbb24697340400982626c7f0519c042f --- /dev/null +++ b/12500/model/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1ed2cc24fe61072fc6937542b03f51c136cc4660432de97cbb305695768c4b4 +size 4192 diff --git a/12500/model/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b9996168268b706fda93c59eccc25b1b7e4f2fc8 --- /dev/null +++ b/12500/model/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0ab32d90954e22b86daa1e0e129fa94a89e5fb4c523dc837aab0fc959eab7c8 +size 8388848 diff --git a/12500/model/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7d74738e819a365c618bb8802c1b3654da5772a7 --- /dev/null +++ b/12500/model/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15c1d631477d903e1b4149e06736144c5883eb7416993deeb97eb175f0b3c82d +size 12583280 diff --git a/12500/model/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors b/12500/model/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3aae57bdfb7b661b3edda808e095afef28e35a8f --- /dev/null +++ b/12500/model/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26be7486e0bcaa54cdf1d12a6385f82c4351d8585df766d7531d57a2d4ee5d5c +size 4192 diff --git a/12500/model/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e3edf12f7f89f8eba35b25274c4a10efaec95186 --- /dev/null +++ b/12500/model/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eff85296c08333208919a4b030216f7822f165f668367507878a20ec33a585e1 +size 33554672 diff --git a/12500/model/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..520d1879236ba4dda98853ee878970d8f89c3c50 --- /dev/null +++ b/12500/model/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:527ae9ccc33a8adb96adb47ac0e5626e9ea86ac0427144e0e23c551926bfe5e1 +size 67109176 diff --git a/12500/model/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors b/12500/model/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ad1038f4991475d000e61def2ac93a8fbde75ab6 --- /dev/null +++ b/12500/model/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3172d2ab12f6d67c5b8bf11ead9fa11bbfca770e63c38bd1d3e966d43022a06 +size 4192 diff --git a/12500/model/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6058862c88fd053f63ff5c4621cc50e02cd84710 --- /dev/null +++ b/12500/model/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5e58e8225a81661d4978904df5df43157a766b42bc88e5fcd85440bc6fa379f +size 8388848 diff --git a/12500/model/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c83427ec12daa06151dda425c2a954b50be473d6 --- /dev/null +++ b/12500/model/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8cd9892577d5fcc782bd6c8656ab89f56d101a72fac1d81bdf6ac4eee30778c +size 12583280 diff --git a/12500/model/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors b/12500/model/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a52eaf4a662d0e0157a2ebe610777f04658bdcf4 --- /dev/null +++ b/12500/model/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb0b48395949c83c4e76a45ef9cae2cf0ed474740352414d9856266418af7761 +size 4192 diff --git a/12500/model/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..856889788ef5ba029a2fe9b189debe98ca7e8953 --- /dev/null +++ b/12500/model/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de29c7b0c1a447b4112d75ca3bda1cc32feb9a1b3b81ce7419b1c142bf30f68b +size 33554672 diff --git a/12500/model/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5790950941f0ff8a88f358f6a709d27958304a21 --- /dev/null +++ b/12500/model/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e840e98d850247da2fbbe4f859149140075ccdc13346be79364525e8c26fb98 +size 67109176 diff --git a/12500/model/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors b/12500/model/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f611d3333b717bccabad36a9c1d41852c8609c7b --- /dev/null +++ b/12500/model/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef6d4a4f2b151987f7542037f677627f03243431f1b72afbffd2473292826e0b +size 4192 diff --git a/12500/model/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5b32a6c58a9034e4ecf959030357fea4bace49ac --- /dev/null +++ b/12500/model/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4e4894c0f7b33de76d4548709fa63497d03c8cd9fc9aaeda177dcb55eb6507e +size 8388848 diff --git a/12500/model/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..42f82e3fb3753d041e29b791a94eb17ba2e57f51 --- /dev/null +++ b/12500/model/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44212a4a98e1289ce1ee06a50a0d4656528f605f8dbd23d9f26e56f26669a031 +size 12583280 diff --git a/12500/model/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors b/12500/model/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7b42c6c364d6259921d3f40e8b6d5bdcae651e17 --- /dev/null +++ b/12500/model/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f877e74c5c81ac6898f58ee197d9bc82b5a5ea7af40eb770bcde19096f83c0b +size 4192 diff --git a/12500/model/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e486eb7fcfac96c75b455d40241c27b45b9d5273 --- /dev/null +++ b/12500/model/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7ba196a140bcdad1b259de6fb6ee417a190a70c203d2d528c46eacb31b2a178 +size 33554672 diff --git a/12500/model/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..71864e395e9f5f44ef13bb3b24583599cae6e8b6 --- /dev/null +++ b/12500/model/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:848b8b61677d431782fa9b88ddddd968adb309bd4b4b9055ed75436de2736f7e +size 67109176 diff --git a/12500/model/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors b/12500/model/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8cfe7a09773444061a3acfb22f24f8dd7312f5c1 --- /dev/null +++ b/12500/model/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23872d6e1cda04b6e2d1571ffa67b87c419f741b1d666333c045b7adbf316439 +size 4192 diff --git a/12500/model/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d9d2f250211c631c9e7b2172de85df729daff11f --- /dev/null +++ b/12500/model/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a797311a002455262646cafe03f96bb08c7f39aac0935f7864e55126f7fae645 +size 8388848 diff --git a/12500/model/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c8f35edab7773582cb334750691601ba8443fbb3 --- /dev/null +++ b/12500/model/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de82261ed1ce0aeacff77130067b4c9178241cdf14b38e067454092a584eb5e1 +size 12583280 diff --git a/12500/model/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors b/12500/model/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5bf75a643872d4e8cb56635da63f5dd9cc19adb1 --- /dev/null +++ b/12500/model/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80d254dca8427fee2ba9cdfd1fc0e683bbd494f6ad6d95e8eb4309d01735b94d +size 4192 diff --git a/12500/model/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7d7d581baf9e2681c70b08655f54e91c0048a120 --- /dev/null +++ b/12500/model/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9034bac23b8690c5dabf3a0063e62b14c85b4f7e88a7f8c0cb09ed67161237cf +size 33554672 diff --git a/12500/model/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d8d97b9206091345d0c678a4f28e2fa8b9b955de --- /dev/null +++ b/12500/model/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d80c00d8013f62cff8676d7e768f841e92af052190cd73b4af422021b731111d +size 67109176 diff --git a/12500/model/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors b/12500/model/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7a9019354848a46da9b887cf5e10a9a19c212666 --- /dev/null +++ b/12500/model/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:598d86743a654afd222243fcda011b0e71928a5a2a1a4a42d9b678fe13ab696a +size 4192 diff --git a/12500/model/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f9be8edf66e5785a4a180087d851e4125b0e1948 --- /dev/null +++ b/12500/model/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc809185927b5626fd2c76623f3ea0841480e3478fdae6aab680eedf2c4c8a39 +size 8388848 diff --git a/12500/model/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a9f6c95068a569b52375d812536ae771393aeb6f --- /dev/null +++ b/12500/model/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db5864a96f54c11d9fc1ff6e06e07760da2296dc97e05986e40ad3f1d94ba1a2 +size 12583280 diff --git a/12500/model/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors b/12500/model/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b7df341bfe57c7d7eee67c1d4cf79bbe42d3b7b1 --- /dev/null +++ b/12500/model/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f3c14bb1696b762fc53a4b485b883e171954f0720baa84ea4bc7055d4cd5ccf +size 4192 diff --git a/12500/model/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d95a0ceb017e037c998a869b97ec1ee79e020ffb --- /dev/null +++ b/12500/model/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3fe4de5941e395cacfdc67d2718d82cfff9f8ad18dac93c80089065345f26bd +size 33554672 diff --git a/12500/model/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a671f695868f99f0d8de3c286c5382f92b69d01f --- /dev/null +++ b/12500/model/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83e4ceefaa3182e4f9635ce109f5929acbecb66fa739b49c89835db217ef4bbc +size 67109176 diff --git a/12500/model/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors b/12500/model/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a3e83084b8ad91ba56fdeba1634a5c6d2b55ce72 --- /dev/null +++ b/12500/model/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f6743865d8bf058099a3397f8de1cc6f5ddc608e3bca5fc924bd895908fb839 +size 4192 diff --git a/12500/model/model/final_layer_norm/pp_block/model_weight.safetensors b/12500/model/model/final_layer_norm/pp_block/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..89a15bd564ae6f1294cfaf5de58aee68fa08e613 --- /dev/null +++ b/12500/model/model/final_layer_norm/pp_block/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bd7752d6a26582baaf3489bbc72342b40815308075efc60454a078c560dd56d +size 4192 diff --git a/12500/model/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..841468d1cb9ab678a2d6215299c55dee439abcd8 --- /dev/null +++ b/12500/model/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b696589351c0fc78e293559ab1db6633098df2675e9b532f84857be8c6253158 +size 525336824 diff --git a/2000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__2000_hf/samples_hellaswag_2025-12-29T05-34-34.282648.jsonl b/2000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__2000_hf/samples_hellaswag_2025-12-29T05-34-34.282648.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..1ba548e944f07014bcb2924320b2861e5a6b911d --- /dev/null +++ b/2000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__2000_hf/samples_hellaswag_2025-12-29T05-34-34.282648.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79fb57eef7d0bd16361d36de49bc9ee69d7fc7d9c0ccbb1c36e80e5d928a9d3d +size 42644110 diff --git a/2000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__2000_hf/samples_hellaswag_2026-01-07T03-51-34.536241.jsonl b/2000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__2000_hf/samples_hellaswag_2026-01-07T03-51-34.536241.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..1ba548e944f07014bcb2924320b2861e5a6b911d --- /dev/null +++ b/2000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__2000_hf/samples_hellaswag_2026-01-07T03-51-34.536241.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79fb57eef7d0bd16361d36de49bc9ee69d7fc7d9c0ccbb1c36e80e5d928a9d3d +size 42644110 diff --git a/2000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__2000_hf/samples_gsm8k_2025-12-29T05-42-21.185272.jsonl b/2000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__2000_hf/samples_gsm8k_2025-12-29T05-42-21.185272.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..b8bc721d84554383f32ba7b6fd2fe44631aa18e2 --- /dev/null +++ b/2000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__2000_hf/samples_gsm8k_2025-12-29T05-42-21.185272.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10a03f5e2e6096428ae0b31375f648e2648d8f20dbbaef6efcf2d5d381147415 +size 16799489 diff --git a/2000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__2000_hf/samples_gsm8k_2026-01-07T07-11-28.243760.jsonl b/2000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__2000_hf/samples_gsm8k_2026-01-07T07-11-28.243760.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..b8bc721d84554383f32ba7b6fd2fe44631aa18e2 --- /dev/null +++ b/2000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__2000_hf/samples_gsm8k_2026-01-07T07-11-28.243760.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10a03f5e2e6096428ae0b31375f648e2648d8f20dbbaef6efcf2d5d381147415 +size 16799489 diff --git a/2000_hf/model.safetensors b/2000_hf/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8bdf4e5b80a33b0f74948597ed6586af88ce44d3 --- /dev/null +++ b/2000_hf/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7198b79076ddd8d8240c57e2787d925bd95e2242fc711b48bf8f9c554c2e49d2 +size 2471645608 diff --git a/2000_hf/tokenizer.json b/2000_hf/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/2000_hf/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/3000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__3000_hf/samples_hellaswag_2025-12-29T05-34-35.086951.jsonl b/3000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__3000_hf/samples_hellaswag_2025-12-29T05-34-35.086951.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..ab71bb5d9f653b6889617ffd7e7e0e21d04f9fda --- /dev/null +++ b/3000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__3000_hf/samples_hellaswag_2025-12-29T05-34-35.086951.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d0f0530bb23966847b3d1c592f5e2ea453c6920c5bfce913279f3c63960e9c9 +size 42643922 diff --git a/3000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__3000_hf/samples_hellaswag_2026-01-07T03-56-25.245340.jsonl b/3000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__3000_hf/samples_hellaswag_2026-01-07T03-56-25.245340.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..ab71bb5d9f653b6889617ffd7e7e0e21d04f9fda --- /dev/null +++ b/3000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__3000_hf/samples_hellaswag_2026-01-07T03-56-25.245340.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d0f0530bb23966847b3d1c592f5e2ea453c6920c5bfce913279f3c63960e9c9 +size 42643922 diff --git a/3000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__3000_hf/samples_gsm8k_2025-12-29T05-42-31.325047.jsonl b/3000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__3000_hf/samples_gsm8k_2025-12-29T05-42-31.325047.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..5f04cc13e7a2e1e91c5f6ada0f6265762a5ea5de --- /dev/null +++ b/3000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__3000_hf/samples_gsm8k_2025-12-29T05-42-31.325047.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:345b3d1cac6381fd3712329f3e039092a9b9c8dc5624a0b0c096e9559ae12135 +size 16848330 diff --git a/3000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__3000_hf/samples_gsm8k_2026-01-07T07-15-16.192705.jsonl b/3000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__3000_hf/samples_gsm8k_2026-01-07T07-15-16.192705.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..5f04cc13e7a2e1e91c5f6ada0f6265762a5ea5de --- /dev/null +++ b/3000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__3000_hf/samples_gsm8k_2026-01-07T07-15-16.192705.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:345b3d1cac6381fd3712329f3e039092a9b9c8dc5624a0b0c096e9559ae12135 +size 16848330 diff --git a/3000_hf/model.safetensors b/3000_hf/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1d39a19a55ebc63db784dc02ca345f04d2dd6bb5 --- /dev/null +++ b/3000_hf/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd7d951aa7f7b2cbe0ed718a8131c516dd66b390363b03c34abe27355ca9ad47 +size 2471645608 diff --git a/3000_hf/tokenizer.json b/3000_hf/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/3000_hf/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/4000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__4000_hf/samples_hellaswag_2025-12-29T05-33-51.606131.jsonl b/4000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__4000_hf/samples_hellaswag_2025-12-29T05-33-51.606131.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..b3d1e0200bfba517c9a7c804080a252e1b652e70 --- /dev/null +++ b/4000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__4000_hf/samples_hellaswag_2025-12-29T05-33-51.606131.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c21a84c7fb8519c7d103d94fb87b986071a7aef1e9b06d602a54e674dd7e4709 +size 42644320 diff --git a/4000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__4000_hf/samples_hellaswag_2026-01-07T03-56-26.179191.jsonl b/4000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__4000_hf/samples_hellaswag_2026-01-07T03-56-26.179191.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..b3d1e0200bfba517c9a7c804080a252e1b652e70 --- /dev/null +++ b/4000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__4000_hf/samples_hellaswag_2026-01-07T03-56-26.179191.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c21a84c7fb8519c7d103d94fb87b986071a7aef1e9b06d602a54e674dd7e4709 +size 42644320 diff --git a/4000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__4000_hf/samples_gsm8k_2025-12-29T05-42-32.998053.jsonl b/4000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__4000_hf/samples_gsm8k_2025-12-29T05-42-32.998053.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..4bf4469322be75ae6e28f8594e9af21bb3bd852e --- /dev/null +++ b/4000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__4000_hf/samples_gsm8k_2025-12-29T05-42-32.998053.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d41ec3ce4e2f50a1cd1114284e35fadb3774a31d41fd5f7f71e0e0a5aef28c4f +size 16856087 diff --git a/4000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__4000_hf/samples_gsm8k_2026-01-07T06-59-35.025159.jsonl b/4000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__4000_hf/samples_gsm8k_2026-01-07T06-59-35.025159.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..4bf4469322be75ae6e28f8594e9af21bb3bd852e --- /dev/null +++ b/4000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__4000_hf/samples_gsm8k_2026-01-07T06-59-35.025159.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d41ec3ce4e2f50a1cd1114284e35fadb3774a31d41fd5f7f71e0e0a5aef28c4f +size 16856087 diff --git a/4000_hf/model.safetensors b/4000_hf/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cb8ff5697a8f3567925fa5623cb1da7d2d899005 --- /dev/null +++ b/4000_hf/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30a356af8c0c806600eb388ea3f13bbb05b54fe4a81a98e821a3b5d4cf5276e6 +size 2471645608 diff --git a/4000_hf/tokenizer.json b/4000_hf/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/4000_hf/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/7000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__7000_hf/samples_hellaswag_2026-01-07T03-51-33.637505.jsonl b/7000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__7000_hf/samples_hellaswag_2026-01-07T03-51-33.637505.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..c89f9344dae0fb2efa54d5dbbecbbfcd34e5ed45 --- /dev/null +++ b/7000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__7000_hf/samples_hellaswag_2026-01-07T03-51-33.637505.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa05058b64d3149b41677c3779b28812fa410d2387ab211f9e9a14aa0f7db7a4 +size 42644690 diff --git a/7000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__7000_hf/samples_gsm8k_2026-01-07T07-00-36.377222.jsonl b/7000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__7000_hf/samples_gsm8k_2026-01-07T07-00-36.377222.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..7bf09176a22d3329c7ef05063f84ec45114e02e2 --- /dev/null +++ b/7000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__7000_hf/samples_gsm8k_2026-01-07T07-00-36.377222.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31d331cef459a741b519f25bf86fb1e73bfa59af6d2298e8806afe0796927fb3 +size 16818860 diff --git a/7000_hf/model.safetensors b/7000_hf/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..706a1ba77424be6a8baf64a625f8ac4b701bd215 --- /dev/null +++ b/7000_hf/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:531aa50018dc0adf03089d1069b3a2fb5d4e7019fec2d47f8b77400e9b6e8c6c +size 2471645608 diff --git a/7000_hf/tokenizer.json b/7000_hf/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/7000_hf/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/8000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__8000_hf/samples_hellaswag_2026-01-07T03-53-44.066877.jsonl b/8000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__8000_hf/samples_hellaswag_2026-01-07T03-53-44.066877.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..572bb685ce0dd052aff8d8360da20bd0e27d99bf --- /dev/null +++ b/8000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__8000_hf/samples_hellaswag_2026-01-07T03-53-44.066877.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75551a41e21217a197d8054302ab2b239d3b58de6a87fbf9acfaf314843b3889 +size 42644960 diff --git a/8000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__8000_hf/samples_gsm8k_2026-01-07T07-00-06.652270.jsonl b/8000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__8000_hf/samples_gsm8k_2026-01-07T07-00-06.652270.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..2a5c0222be263c1ec12198f22afb2d16991d86f8 --- /dev/null +++ b/8000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__8000_hf/samples_gsm8k_2026-01-07T07-00-06.652270.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac050d667011d78e6123a5fdbde946476309d4295a593105dd4e0b0f71df16ec +size 16722923 diff --git a/8000_hf/model.safetensors b/8000_hf/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..affdbb742b7ed74d997ad8fb6d4ab855ae8dba35 --- /dev/null +++ b/8000_hf/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:441587e4518a145a3798d4a0678673f000aa02753bf149090f3b8d235405eda0 +size 2471645608 diff --git a/8000_hf/tokenizer.json b/8000_hf/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/8000_hf/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/llama32-3b-nt/model/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-3b-nt/model/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..97ec95c52c5483c71d58ba89202fea299261b03d --- /dev/null +++ b/llama32-3b-nt/model/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bb9d1d36f7fbf609c2ed4151b6236eb63d724f53ea8eb22b43bb0dded52e661 +size 18874608 diff --git a/llama32-3b-nt/model/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-3b-nt/model/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..43226433205cb2c30932b5b821c75c3234b667da --- /dev/null +++ b/llama32-3b-nt/model/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16eb467b5920fd79135a118616fb701761ec5cb52d9026e3e9056c130da908cd +size 31457648 diff --git a/llama32-3b-nt/model/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors b/llama32-3b-nt/model/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3557b8d4450afc7f4ffd36baa2c71d27e715e780 --- /dev/null +++ b/llama32-3b-nt/model/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27e97817522f1ac257b3083dde979331231efb60f12a08e00f21d2616feb2289 +size 6240 diff --git a/llama32-3b-nt/model/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-3b-nt/model/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7bc252398b25f1154eb6555167ed85eb799f4e65 --- /dev/null +++ b/llama32-3b-nt/model/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82b36e708043501f559844c53a18318ab4917360b77480f20d4d77e3be58bb50 +size 50331888 diff --git a/llama32-3b-nt/model/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-3b-nt/model/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..abc8679f8e1d524f5d7024db374c6de15b6f93d3 --- /dev/null +++ b/llama32-3b-nt/model/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ca6e89a7441fc0520a3a89f04761f19cf62545760c812870aa02041612ec5b1 +size 100663608 diff --git a/llama32-3b-nt/model/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors b/llama32-3b-nt/model/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e5ae2cfb4f9e2b4edfb6f43c6758b90fc547ef25 --- /dev/null +++ b/llama32-3b-nt/model/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e967ee097c73c5b3047514602a14a0631cd5b248d0fcd189282c399abe0ea983 +size 6240 diff --git a/llama32-3b-nt/model/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-3b-nt/model/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3fcd649f10cca48c1b2dd735aed8800b1879cb4d --- /dev/null +++ b/llama32-3b-nt/model/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db748469c2097862052dff089e68720e77b31e7cd27e1881a32823b9a5bd6fa6 +size 18874608 diff --git a/llama32-3b-nt/model/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-3b-nt/model/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b34c90558356478f8dc9a268903db9c3d74f9404 --- /dev/null +++ b/llama32-3b-nt/model/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:348129465a16874adeb79cae6faa1357fbe83634f4c37ca6771c18fe340985eb +size 31457648 diff --git a/llama32-3b-nt/model/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors b/llama32-3b-nt/model/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..af92c336aece9a87600cd617ad67c4fedb8a893c --- /dev/null +++ b/llama32-3b-nt/model/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c1fd8a8564e09fee93f80e96d19db4136a37d32278e3ff5cec320458f0d4a46 +size 6240 diff --git a/llama32-3b-nt/model/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-3b-nt/model/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4187c8487c31d13ad5af691dd65aa97b7852008e --- /dev/null +++ b/llama32-3b-nt/model/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f485c23f28afa024985f1e16e4621d098a27aa1f1f3588d6ceb6ed50a3cb1d4 +size 50331888 diff --git a/llama32-3b-nt/model/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-3b-nt/model/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6c72e9a3185b8cf449ade056e221a455d789e8cf --- /dev/null +++ b/llama32-3b-nt/model/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcb12dcfad2ef199e70a77c0ea283deba248419b162f2fbb6cac4a6aef16c7da +size 100663608 diff --git a/llama32-3b-nt/model/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors b/llama32-3b-nt/model/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0c154dc9c3d66979cea9a28554bcef7ec5ea6b34 --- /dev/null +++ b/llama32-3b-nt/model/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4ed7471a5a2178194e4b249b162d0dbb12042c351097263b868211bb9e87962 +size 6240 diff --git a/llama32-3b-nt/model/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-3b-nt/model/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..409af3d3f5d89a99a0842bce03db7d62016a34c6 --- /dev/null +++ b/llama32-3b-nt/model/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a77c089c61449f9b28e7eaa8a38327d26c8126c1b4ac28d270b53afaf70498e +size 18874608 diff --git a/llama32-3b-nt/model/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-3b-nt/model/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0d341d501bfca6e74163557b14be592c25f16b78 --- /dev/null +++ b/llama32-3b-nt/model/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57720e9281cf15294403925a763848bdc8b09e135d12e1a9689f1a5e4fa5de0c +size 31457648 diff --git a/llama32-3b-nt/model/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors b/llama32-3b-nt/model/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..10b7f09fec54fd041094e33a12136d66b7af3dbc --- /dev/null +++ b/llama32-3b-nt/model/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a71a095df689e054ff28891efc5b683a3dc5624a13460df76906b5308b062c64 +size 6240 diff --git a/llama32-3b-nt/model/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-3b-nt/model/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7780bbff302ac5bfe467a4b9de834780719e988b --- /dev/null +++ b/llama32-3b-nt/model/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db09114da74f67652607f336815e34d487483fee5560f89e6dcf78b903da97b0 +size 50331888 diff --git a/llama32-3b-nt/model/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-3b-nt/model/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..adf1d95e3d9bbd5f6b7b2b6bdc980b4c4421d8f9 --- /dev/null +++ b/llama32-3b-nt/model/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d2bf898fb09837188b22f1709481206e1130bcd3800e81f360368fd6b86a4f9 +size 100663608 diff --git a/llama32-3b-nt/model/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors b/llama32-3b-nt/model/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8d7e4b44e426ce15946d7f90c6c4ab3efbaf7830 --- /dev/null +++ b/llama32-3b-nt/model/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:022f7b60734cd614a84f991dffeb362a906ae4d6b8de87794b5dc6553c387b26 +size 6240 diff --git a/llama32-3b-nt/model/model/decoder/25/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-3b-nt/model/model/decoder/25/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..271c4c7764be9815da1fb2ae20af3ed5b9d38c32 --- /dev/null +++ b/llama32-3b-nt/model/model/decoder/25/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebb106119215296ff4f85a8d65997d9200767b31fb28fa578c698aae823bd122 +size 18874608 diff --git a/llama32-3b-nt/model/model/decoder/25/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-3b-nt/model/model/decoder/25/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ea6cd337e5ce3df42afca94b453f637853311369 --- /dev/null +++ b/llama32-3b-nt/model/model/decoder/25/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce9089f65234c78f742d80a4e39ae060c3913ca0296ddd847c4b0f3229f1f35b +size 31457648 diff --git a/llama32-3b-nt/model/model/decoder/25/pp_block/input_layernorm/model_weight.safetensors b/llama32-3b-nt/model/model/decoder/25/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a1b54475a61bf99336ebb8882becd557a154ed77 --- /dev/null +++ b/llama32-3b-nt/model/model/decoder/25/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3705205d7cf5ca6c4fba71c75fda754e0213a2a7ad3d4d583434c1e6a3f432e +size 6240 diff --git a/llama32-3b-nt/model/model/decoder/25/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-3b-nt/model/model/decoder/25/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a2286a075715378cca47a6ee919ce610773752c2 --- /dev/null +++ b/llama32-3b-nt/model/model/decoder/25/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9124ceabb8f903d91233e317cf30ae0b71c5437b99d6a16e32d59719bd0bb02c +size 50331888 diff --git a/llama32-3b-nt/model/model/decoder/25/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-3b-nt/model/model/decoder/25/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a98978dc15b133fe1dd4ee0a500314c75bfb1601 --- /dev/null +++ b/llama32-3b-nt/model/model/decoder/25/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:350ff0b46318d2de405be43779fd4398ac7ab6a7ba9a417d0dc4985db786533b +size 100663608 diff --git a/llama32-3b-nt/model/model/decoder/25/pp_block/post_attention_layernorm/model_weight.safetensors b/llama32-3b-nt/model/model/decoder/25/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8129165d9eaf18a596cbd5c94a6294fa2b91f832 --- /dev/null +++ b/llama32-3b-nt/model/model/decoder/25/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95679f5aba1a39e3c09bff1f3aa04ab0eccc4c7ac1a4d843e48778cb6e0f0fec +size 6240