diff --git a/.gitattributes b/.gitattributes index 03810bdf895f72462fa36616f5a53434226701c8..ad0eb45715faf6b662c60f55c0bd9ac3895c5104 100644 --- a/.gitattributes +++ b/.gitattributes @@ -39,3 +39,6 @@ llama32-1b-hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__ll 6000_hf/tokenizer.json filter=lfs diff=lfs merge=lfs -text 6000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__6000_hf/samples_gsm8k_2026-01-07T06-57-48.977156.jsonl filter=lfs diff=lfs merge=lfs -text 6000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__6000_hf/samples_hellaswag_2026-01-07T03-53-49.653886.jsonl filter=lfs diff=lfs merge=lfs -text +9000_hf/tokenizer.json filter=lfs diff=lfs merge=lfs -text +9000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__9000_hf/samples_gsm8k_2026-01-07T07-00-26.302516.jsonl filter=lfs diff=lfs merge=lfs -text +9000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__9000_hf/samples_hellaswag_2026-01-07T03-49-27.696211.jsonl filter=lfs diff=lfs merge=lfs -text diff --git a/9000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__9000_hf/samples_hellaswag_2026-01-07T03-49-27.696211.jsonl b/9000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__9000_hf/samples_hellaswag_2026-01-07T03-49-27.696211.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..6f9852f94db255f0304656c0d2d91acd430dbfaf --- /dev/null +++ b/9000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__9000_hf/samples_hellaswag_2026-01-07T03-49-27.696211.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6c951352270e452953eddca955b7b91d59c444b0472e1b823783718174047f7 +size 42644980 diff --git a/9000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__9000_hf/samples_gsm8k_2026-01-07T07-00-26.302516.jsonl b/9000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__9000_hf/samples_gsm8k_2026-01-07T07-00-26.302516.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..b67087407c536f8e1a05c233f9b7047ca073fb2e --- /dev/null +++ b/9000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__9000_hf/samples_gsm8k_2026-01-07T07-00-26.302516.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e041e7a4dbb3adb96de2e754281310caaed9c4bfc59a9cfd708b88a5b9cef63 +size 16798564 diff --git a/9000_hf/model.safetensors b/9000_hf/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..30e00bb81be2bb04a9a2914c749ed57ec5532e89 --- /dev/null +++ b/9000_hf/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c90c55635cf0ceea9a88f2d3fed29e503b1f5641431a3b2a35a2c1b5dd3b71c +size 2471645608 diff --git a/9000_hf/tokenizer.json b/9000_hf/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/9000_hf/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/llama32-1b-nt/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5d075231e1813d4986f33af95582cf14138f22d6 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcf8ec32f73c8df7edb3dd7727a9729d834dc2af9f5931b5d1b906df44c33b8d +size 8388848 diff --git a/llama32-1b-nt/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c5eddf7cd47e6abea4d2f15719641f0335349c00 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01be991ceab7faf9d1a9ef49197c66cb138c5454b9ee1dcd68122fc2090fa2b3 +size 12583280 diff --git a/llama32-1b-nt/model/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors b/llama32-1b-nt/model/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..10cbf42d92181209848590a041fb43604291be87 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f7cea49eb581e94c8639be7b7241025a34e45665fc65ecd156eca82045a5381 +size 4192 diff --git a/llama32-1b-nt/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..85789ca495476713255b705a9c723bc31555e98c --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f8b68f6b1bbb164111f7cfe2dab91c22c523b3a085ea1130a3151357cb10529 +size 33554672 diff --git a/llama32-1b-nt/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3a2b168385362ca8f0e2c7127a7321c5017b6117 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01317c2dd8d23b2ae73991529701efa622052f5b799b451f8c0edbf5ee87f9dd +size 67109176 diff --git a/llama32-1b-nt/model/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors b/llama32-1b-nt/model/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c858851804f0198d1103bad0b108d117a89e1cfb --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64db8ea88659d892d2fbe951e02eed87a9ab70629a5d60f0cceba4084d139e9c +size 4192 diff --git a/llama32-1b-nt/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3845a951a59209eab79ba351cbf1bc647ecc035c --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f95ae205171c65231fdc326d0883bec0ed4650f1371f79e9a87200e7423cc2b +size 8388848 diff --git a/llama32-1b-nt/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..783c95f1850fbf303c067e72b503707b2854c447 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aebcf09a0af1b43d7c2d085bc097c366b9f74575b6cf56d91c2d717e6787a25d +size 12583280 diff --git a/llama32-1b-nt/model/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors b/llama32-1b-nt/model/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..40d6f8f7fb6bbcfd0cb44af6494ed12c4fc64bf0 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2156f2df1e51d68b84ebaf5b0f12a853201adc0026642fb9b7bbef5c06a4ef7 +size 4192 diff --git a/llama32-1b-nt/model/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ca7f2683be473cef145220111f787c65e0329d5d --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0d5253ae1cd73f3308344ae2dccaadba5f59efca3deeba29073a29900d00987 +size 33554672 diff --git a/llama32-1b-nt/model/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..153f8e8ea61d0f674751ffbdf0bc910df55c1fb0 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41fb5a628feaf46caae796b955e9b508d2594a5096cf14a1f8fd4e5a4b86e3e6 +size 67109176 diff --git a/llama32-1b-nt/model/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors b/llama32-1b-nt/model/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7c742f15d10c8799b4111a25321d56210845aa2a --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71eb41eefe8ea9f1524dc9a8170e7d50f28deac6c5d216cf74192d125e497567 +size 4192 diff --git a/llama32-1b-nt/model/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors b/llama32-1b-nt/model/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d69b4caa77390e0b7ab9ce1fab6ae69ff0515607 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:104343d2bf3568e998ffb42ca2146c24a82570944233c25d26c28b032ba27dae +size 4192 diff --git a/llama32-1b-nt/model/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors b/llama32-1b-nt/model/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3a6b33f42663eace1c24d9ed36a939c0191c35ec --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b54bac7e7578f5ee0b3a8ba9ab9684016f538f65d2426eab2c31c0e9210abf6 +size 4192 diff --git a/llama32-1b-nt/model/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fe14b9f0795166b46343240ffd0edb3bf6bfd2fe --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29901b09c966155daa1cc1b2f0b6d4d9b51fbc8b3d96197688195ed0d7ffe27d +size 8388848 diff --git a/llama32-1b-nt/model/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fc1a55dbbdda336891d3e2a76f4094dff95eea57 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1cf2c5ebbaf548e77e330958557e4b8df8bad02ccb59042388d48ce44302536 +size 12583280 diff --git a/llama32-1b-nt/model/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors b/llama32-1b-nt/model/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..482b8d55202d6037f51f443000eac19186b6461a --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9468c20253d6e657931981408c6a387e3c4ffef4dd80e94b179d177c34797951 +size 4192 diff --git a/llama32-1b-nt/model/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..563ca545b3290232c7d2ad93ff05915e3cae271c --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3808ce3e24b90cedaf8e4c22991fd2070966e9303dae7fc57a1b96dfa7861833 +size 33554672 diff --git a/llama32-1b-nt/model/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..afa53c206c87a813f86d8d95ea04dd4dd6a9e7d3 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7befa5837d94145ca09d93f89430d65a827a2f409e98c523818bc3eff704c179 +size 67109176 diff --git a/llama32-1b-nt/model/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors b/llama32-1b-nt/model/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..da84470ae4680d764724f1bfa69fee5abfa362a7 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3aa0a0483c4f9e9663792e40827ed48904751d24c9bd4a8e578d2cd11c42328b +size 4192 diff --git a/llama32-1b-nt/model/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..70f0bb66818209d797737ff9489cc0bc0f095952 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5697969101bd979a80b7fb440385af088ae58fe5410c972ca25a1fb2e294d580 +size 8388848 diff --git a/llama32-1b-nt/model/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fbc146d0dcafa3bf980cdd613481da394ffa0f71 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37c4a7a29176a102a8f9cf411a0bc042b5f2f97a1d1c236e624a9d06ef3bdd6c +size 12583280 diff --git a/llama32-1b-nt/model/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors b/llama32-1b-nt/model/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4e11cffe2ff97a43f31c65f5188e25f6d9c5ca83 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1587cc94039a6f2bcbf3a618b690d20e59ea52d4819fc49f900382b7f0e8b858 +size 4192 diff --git a/llama32-1b-nt/model/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..43fabfb0bd18224a53dd5a3d88d91a8151a23867 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbf8050b19aaf0c1501454fc16c44dd8c26f93d9859858915322f24ec8d88161 +size 33554672 diff --git a/llama32-1b-nt/model/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e591f8924d68207225269f7aa635dfa29f85083c --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ae1c6fdaba6f5847700be2e43055312c6ff97d54a3d599850b32cd275be8a5b +size 67109176 diff --git a/llama32-1b-nt/model/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors b/llama32-1b-nt/model/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a8f5adc23f26b8c119d7ff2c763b8b3e1c723ff2 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d96a043ae27e520880693b9ae3c3fd3fe465b49b470e7565ea0176dcb5f9b03 +size 4192 diff --git a/llama32-1b-nt/model/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5f8e1f15ce6b89e491cb708d12c89be861149ef6 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6fde1b2d907fac5bc732d6f456ce1fde86df348350a1801e9572685d6d525dd +size 8388848 diff --git a/llama32-1b-nt/model/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d15e41b71bad911fe4278683ea253112c88f08de --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf9902c1a5644e99e22a91171c2cbce04c2118d1ff556472b2dee704656b691f +size 12583280 diff --git a/llama32-1b-nt/model/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors b/llama32-1b-nt/model/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..43e68dd96f2053d5fd1f3227e3425ab09f4fab4d --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:833077da913d352cf4f940f2ae912b3e966605aea5894397118c6b7798f83718 +size 4192 diff --git a/llama32-1b-nt/model/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0f7a37f1afa1ed7eb7fabba92b1a3d8960d9cb0a --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c26dcf52688014510235340347063d16642aa2eac1d837d3315388400137791d +size 33554672 diff --git a/llama32-1b-nt/model/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..00e483bc4c01b28023f97c3d00e6f379277a3f81 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e774bc57792a81b328d939ceae2179458351cc36c1097bc2483786d5bc2d1c1 +size 67109176 diff --git a/llama32-1b-nt/model/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors b/llama32-1b-nt/model/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e5e29dcb4463e09a0425c72491098d58e43b4026 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc313cf53ddad1eb093038d84c8c69667fb20816bb13088641e8e16bcd2f098b +size 4192 diff --git a/llama32-1b-nt/model/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a332ac4417369acbfdadc7c4790f755776969c2b --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c122f0da7a1a04389dd9ee28927a481e3078a7d1d78e196e4a7ebe3a4ce3dea +size 8388848 diff --git a/llama32-1b-nt/model/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1768c238873417286e764092eb6b966fcd91a9e2 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:965547dec7eba0f052e8b90170d43a813a4e60d2e9c9a585c7b79e00390112d5 +size 12583280 diff --git a/llama32-1b-nt/model/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors b/llama32-1b-nt/model/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f5336ec4f9d92fc3dcb8ee099e747dcacf8b7e14 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:406cdadd5a55d620a9497e290330c744c8958e5c864f655cdff9574413c372ad +size 4192 diff --git a/llama32-1b-nt/model/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7c400747ea3386ea1f98fda5f9639dda354f467c --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23b845a5002815f30a73162044270f8d4160c10f795ee4125f0a2e2bd918efac +size 33554672 diff --git a/llama32-1b-nt/model/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9280120aebacc318d99f06e9aa921f02956519ff --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dea4deb2b747d546ca4311ca545d482b5f77a1ce7f42e594619389116b9c63f8 +size 67109176 diff --git a/llama32-1b-nt/model/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors b/llama32-1b-nt/model/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a2e3c4db47f4b494b82bc0741d9f0d14ee21ccd5 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ff191398cbc507b7e1000dd81d9190fe794ed76296334212414257ed51b6efc +size 4192 diff --git a/llama32-1b-nt/model/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dbd5f71ed0034d988e080a8f26edcefbfcf3bfe4 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22750304d1b25c6bc07110cdd4e908b2b89315743715f6b0a5cba901fa452c81 +size 8388848 diff --git a/llama32-1b-nt/model/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5ddeb04333c02e855d9d112201b7f913c06ae91f --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:809054c9260ad3a88ef4697fbc36ece2019802c8d1733c93f4f1aef4c699c6c2 +size 12583280 diff --git a/llama32-1b-nt/model/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors b/llama32-1b-nt/model/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..83e7e61af89ef578a0635a2b8e9cb19b1a177068 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9de58f7e0109dd72c4f3d84ff464b246607d8266965bdb883afc5f3211ed62fc +size 4192 diff --git a/llama32-1b-nt/model/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3333656bc9880a092be3d4b3099014de63474a2c --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4a01bfdadfd9b0dc0e5abb2f4fe0b992ea163adcdb32d79defb746f61e88ce5 +size 33554672 diff --git a/llama32-1b-nt/model/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c642f23290c10d1e2f9d4809e903370f45a3b472 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a030cc63b1ca097d13c5b2eb7392eb16f9b474db4e03facd73e83c00e83226d5 +size 67109176 diff --git a/llama32-1b-nt/model/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors b/llama32-1b-nt/model/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5a572f60867b0dc7135b79e91224036e6e44d3e4 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4aa2cccdb5a23e5cf1f6bf1e346ee770308c6760971dcbc70b74ce8643228fe +size 4192 diff --git a/llama32-1b-nt/model/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..34d2f3a9b932c6c8763dff09e4b56d3e99af9a2c --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ba6d5c261fd0078a04da858ad1ecbf74a06b76a6caa0fbab109b354383237ee +size 8388848 diff --git a/llama32-1b-nt/model/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1a0a68028751fbdf3edf58b67087835ec061a250 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0624eb8fded609fb9e66efae9546413ed3d0e168bf95cce776e3b810ec332e02 +size 12583280 diff --git a/llama32-1b-nt/model/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors b/llama32-1b-nt/model/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..518d356e5f328b6e8069569008a83686edfb539b --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c02209b9c8085aa854ea49b839fa809cb023b4d206ac8fc5b62ceed89cba3f78 +size 4192 diff --git a/llama32-1b-nt/model/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5c7c9c6dfa0be71963ebff500745876abe3cb318 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6eba6f8454418c471307d36f3342b3d8c62f43d83621446379b43a23cbebebe7 +size 33554672 diff --git a/llama32-1b-nt/model/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..495146cc5f34cce1363de23fbdd02023b3073b28 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29863d99dd42cb9712704d45c3b7b17b4e656562dee5c6452ba3d92af91557d6 +size 67109176 diff --git a/llama32-1b-nt/model/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors b/llama32-1b-nt/model/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8439a56c2659e4d4cc2fa1eadf29df0ede446b52 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b47679dbd6163c91341a3c4c8e783b0c6f1fcd7320d766772990d4506f6363e9 +size 4192 diff --git a/llama32-1b-nt/model/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..84660a5d6c6534d1c4fc12f2ecd6ee393eaa8c20 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:957b1252f42e67eeebd290e90e8868b26d6aa7cd04586f65fdc1fc516e247e79 +size 8388848 diff --git a/llama32-1b-nt/model/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..425fb5870802251e0fc018a7a248ae66333a9eef --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4773e0d22333b1ac602bc9e608eca6c35f03a2121ac03cb353fbea2c3c027edf +size 12583280 diff --git a/llama32-1b-nt/model/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors b/llama32-1b-nt/model/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..70ce186d9a9b2d747509844275049c3e4a8b489e --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f39d56fd5cb4f70c18b32863bf30a21b7162da448487d574758b117cbe6ccb29 +size 4192 diff --git a/llama32-1b-nt/model/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..afcd907ef4324a01ba14951ca8232f3974f44211 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae3ce98cd49b54932bbb1e20e7104e1980ffedb8045545f743e82c4dbbd4bd64 +size 33554672 diff --git a/llama32-1b-nt/model/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3c77c6889a09e29ced6bdd311fe1fed22ec1f73f --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d519eed4981ec5eb981d0d2f079de453cf9be0caca471bd24c8c0c0c5c257d7 +size 67109176 diff --git a/llama32-1b-nt/model/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors b/llama32-1b-nt/model/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6483a8c3250acf082f7409d8a6f4b60f01470972 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0243096b78faed933fadee5a31969c238cd34928ca73f9fd8259baf94232c70 +size 4192 diff --git a/llama32-3b-nt/model/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-3b-nt/model/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7adb6251db4ae577197ca176541d99c7b21ffa9f --- /dev/null +++ b/llama32-3b-nt/model/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06bc3af9fae8d6ce8ebffca946cfec4ce1baa9927ed117331cb5936d09b0a538 +size 18874608 diff --git a/llama32-3b-nt/model/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-3b-nt/model/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..55193902a888812e0e583a42646e8267345633de --- /dev/null +++ b/llama32-3b-nt/model/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d701d0084e841f1557f7d3ac7ae9d43f4b495c9a081bd27f85d04ea4c6a10872 +size 31457648 diff --git a/llama32-3b-nt/model/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors b/llama32-3b-nt/model/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f67fcfc43efc4ecd2ccc7db31296cfbb519af80a --- /dev/null +++ b/llama32-3b-nt/model/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:347df08ccd107aad9396fd8f29a7a623aed81700c461c79cb10652d28423fdf0 +size 6240 diff --git a/llama32-3b-nt/model/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-3b-nt/model/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d62463975ac4a33983d94799ebeb61c243e6b718 --- /dev/null +++ b/llama32-3b-nt/model/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59de24bcd67d2e077995836a6cbbf672d1405188ac3c77b7ad7a4dfec23847bd +size 50331888 diff --git a/llama32-3b-nt/model/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-3b-nt/model/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..98f8985a0bca732977bc9645606fd0fb56c1767f --- /dev/null +++ b/llama32-3b-nt/model/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:055e38ba29eb5e76275697f36d7f80434f932304b9d1610434f2072881183088 +size 100663608 diff --git a/llama32-3b-nt/model/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors b/llama32-3b-nt/model/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f664405ce6feb80b7d76904d0cc6130ed3b6dc2d --- /dev/null +++ b/llama32-3b-nt/model/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4347560d461d2c51b2f440941dbd7463ba55bd9f187594fb4751c38386920db +size 6240 diff --git a/llama32-3b-nt/model/model/decoder/26/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-3b-nt/model/model/decoder/26/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..208083102df89c06902adc77db3ec0667a77e514 --- /dev/null +++ b/llama32-3b-nt/model/model/decoder/26/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:570b0f79b7631d28a3a39e9fb292ef4936656c10ad818f8de1d6f38b898cf5cf +size 18874608 diff --git a/llama32-3b-nt/model/model/decoder/26/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-3b-nt/model/model/decoder/26/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fed057814e551717929725612546a2baf4b525cf --- /dev/null +++ b/llama32-3b-nt/model/model/decoder/26/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acfa8082320a212ed4d6a2802839cf19faf9dc214bfb4b9f014a28455fbdaeac +size 31457648 diff --git a/llama32-3b-nt/model/model/decoder/26/pp_block/input_layernorm/model_weight.safetensors b/llama32-3b-nt/model/model/decoder/26/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..71633ab2a9b9b0feec2957f5f6479ed2756023a3 --- /dev/null +++ b/llama32-3b-nt/model/model/decoder/26/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b89853355c54d7fce3b33e19b87126baf1ec028e67c06e030c23bc7a669c2a58 +size 6240 diff --git a/llama32-3b-nt/model/model/decoder/26/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-3b-nt/model/model/decoder/26/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8cc75b5d97e2fc323f7ca18ec01888f0d023735d --- /dev/null +++ b/llama32-3b-nt/model/model/decoder/26/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eef675dfbd7f7d14a1b0ccc129d5aa5b40f58e6eeede4a67098eb4147f5386fd +size 50331888 diff --git a/llama32-3b-nt/model/model/decoder/26/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-3b-nt/model/model/decoder/26/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9958a52024b6488e60c44e8163aada1e54c54dea --- /dev/null +++ b/llama32-3b-nt/model/model/decoder/26/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2d3ef5885d1384b27331852bef8c4979a36ba3b8e126660f71a16b6156d62ab +size 100663608 diff --git a/llama32-3b-nt/model/model/decoder/26/pp_block/post_attention_layernorm/model_weight.safetensors b/llama32-3b-nt/model/model/decoder/26/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3e4979d8d153ad15f336d8fddfa4498c0ea267bf --- /dev/null +++ b/llama32-3b-nt/model/model/decoder/26/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a1c99bb29843d0206a12cb46540d5c5c69a428e3fd72d776ec65285f2f2b3b8 +size 6240 diff --git a/llama32-3b-nt/model/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-3b-nt/model/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..290a24c9aca076d718d316ddb638c09a80737ad8 --- /dev/null +++ b/llama32-3b-nt/model/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b474d386732a42a424fd960dc0e442b27cbe40389f381508b90f7320de715773 +size 50331888 diff --git a/llama32-3b-nt/model/model/final_layer_norm/pp_block/model_weight.safetensors b/llama32-3b-nt/model/model/final_layer_norm/pp_block/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..73d32fa0afa189bdb4d82d706b03fbfb704b2928 --- /dev/null +++ b/llama32-3b-nt/model/model/final_layer_norm/pp_block/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77bb24bab96b24334859af1c5751f264596baea8f5c08a928feb713078256953 +size 6240 diff --git a/llama32-3b-nt/model/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-3b-nt/model/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b521ac84d3781b9016b3c4e926775baaddb21ff3 --- /dev/null +++ b/llama32-3b-nt/model/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48a39abb76e8026af8150eaa95f12b3c058be5976e1b0f35a1704688637c3e54 +size 788005112