diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..03810bdf895f72462fa36616f5a53434226701c8 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +performance_plot.png filter=lfs diff=lfs merge=lfs -text +llama32-1b-hf/tokenizer.json filter=lfs diff=lfs merge=lfs -text +llama32-1b-hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__llama32-1b-hf/samples_gsm8k_2025-12-29T05-09-52.015180.jsonl filter=lfs diff=lfs merge=lfs -text +6000_hf/tokenizer.json filter=lfs diff=lfs merge=lfs -text +6000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__6000_hf/samples_gsm8k_2026-01-07T06-57-48.977156.jsonl filter=lfs diff=lfs merge=lfs -text +6000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__6000_hf/samples_hellaswag_2026-01-07T03-53-49.653886.jsonl filter=lfs diff=lfs merge=lfs -text diff --git a/6000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__6000_hf/samples_hellaswag_2026-01-07T03-53-49.653886.jsonl b/6000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__6000_hf/samples_hellaswag_2026-01-07T03-53-49.653886.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..13d6f70c7d3cc699b4320eeb70944d38cc8bded7 --- /dev/null +++ b/6000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__6000_hf/samples_hellaswag_2026-01-07T03-53-49.653886.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c87d614af3012456bc55d5fa3494796ef87ae4e88613eca380be3ef225f01836 +size 42644354 diff --git a/6000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__6000_hf/samples_gsm8k_2026-01-07T06-57-48.977156.jsonl b/6000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__6000_hf/samples_gsm8k_2026-01-07T06-57-48.977156.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..554e2712e590354487270093fffe4c88330fb02f --- /dev/null +++ b/6000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__6000_hf/samples_gsm8k_2026-01-07T06-57-48.977156.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cbe7caf99c6b35e29595f9064ca49bed676523e4667c27e5b5f63987749f788 +size 16787503 diff --git a/6000_hf/model.safetensors b/6000_hf/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..581758e9b379c7e7fe4fe47b285d8525efc6f3e0 --- /dev/null +++ b/6000_hf/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aad16ad9e4e169e94cd7d1f89556300d5f2a656932b5c69c1b8fabd8a9a75477 +size 2471645608 diff --git a/6000_hf/tokenizer.json b/6000_hf/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/6000_hf/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/llama32-1b-hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__llama32-1b-hf/samples_gsm8k_2025-12-29T05-09-52.015180.jsonl b/llama32-1b-hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__llama32-1b-hf/samples_gsm8k_2025-12-29T05-09-52.015180.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..1747665bbf71820c69f3e18a8e2c43b5c2972bdc --- /dev/null +++ b/llama32-1b-hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__llama32-1b-hf/samples_gsm8k_2025-12-29T05-09-52.015180.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f75192a00e0b1cb90d03c7ad57e4f284bffaca99894fbc40cd9bf163fd836498 +size 16912314 diff --git a/llama32-1b-hf/model.safetensors b/llama32-1b-hf/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c602c13f77649fcf6a8ce88f0d3c89924a69347d --- /dev/null +++ b/llama32-1b-hf/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68a2e4be76fa709455a60272fba8e512c02d81c46e6c671cc9449e374fd6809a +size 2471645608 diff --git a/llama32-1b-hf/tokenizer.json b/llama32-1b-hf/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/llama32-1b-hf/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/llama32-1b-nt/model/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..46150f769c0395663b382abfec6f960e2b54ce93 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d0276741f3e26b94d3d4aadf34a3e5fef3aa00474baec888035a9f92022f881 +size 8388848 diff --git a/llama32-1b-nt/model/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..66a0de096c64efb4def973ae66b2b16c7742d26f --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b85709f23aaa73da6fcf30f4756b915d72271fc6d0129738c5974da9af23dde2 +size 12583280 diff --git a/llama32-1b-nt/model/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7a8ae9a0478a373f631bf9c931ce2aba8714205b --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f8a340447658116f99ec5a15aa2c53a20b8bb996ab4df8ae47cb9d882419019 +size 33554672 diff --git a/llama32-1b-nt/model/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c47b059e47c954decf7778642451f532f8df938e --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c33e2e540288c275e7d9915f1ce636797818c6fac458e6f8e150cda2db3b0d4 +size 67109176 diff --git a/llama32-1b-nt/model/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a3ddb38725e6a068d1533ae0467faa0313261650 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efc478a3d60fcac1e467a70373dd59b334bcb558e15e0215eb0d1c792b50972b +size 8388848 diff --git a/llama32-1b-nt/model/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d32ef5fb2a72de54d84feab81bf26929d37f271c --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b883091770f2faa56fc0e2b09320e8175769972ead464467a7bef58e94db3eb0 +size 12583280 diff --git a/llama32-1b-nt/model/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors b/llama32-1b-nt/model/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..db7e1ed29eb75a92e77c8c06434fd37910f22943 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e185d81c06bfd748aa9ad4022e23930d1924caf8690e6dcf26a42950f361b3a +size 4192 diff --git a/llama32-1b-nt/model/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..37210d8f9658b263196a575e4e9a7efb95755879 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15488f05be50f1a666cc239bcbb5e73d57ce6d7994779fab0b7c07a4aa6b0cd9 +size 33554672 diff --git a/llama32-1b-nt/model/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..00d4ef68245274c6d6bfb026b2f701ba16d6d3bf --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6875bdf65c020bb9ae736e0e56be3ffe2366503db1f80feb92fee5a80b83e97 +size 67109176 diff --git a/llama32-1b-nt/model/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors b/llama32-1b-nt/model/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..15285af14e9d67e8a05a6c9e70e3ba625e76b13f --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8897195a2355e257c436d311188af105551e6a7f7a22394d9087b73910e4b17 +size 4192 diff --git a/llama32-1b-nt/model/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..139399a750ac92eff4c6b2b6cc081529888b9aef --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4a9a04f2dd01d7e2bb41bb465b9631931a731d01c0bd58eb0e161fe65cbc9e8 +size 8388848 diff --git a/llama32-1b-nt/model/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7141199dea1ca32dfed4e39367f14e66c4a8e5db --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f0193bb4e3d2075f27e8db32256e41ad84077a0ba6e90b3fd7713eb88b645b6 +size 12583280 diff --git a/llama32-1b-nt/model/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors b/llama32-1b-nt/model/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d40ed5f09c3ac5e4938ba1e2adee547ff04799f1 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a2996fbd4c66b207a25eff8714789c83a0503d12e6589d8d2d49c77d6bd91af +size 4192 diff --git a/llama32-1b-nt/model/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..93d712cc7718a385613954b71ca2a02e52da62d1 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:636500da5ccedc8b09aa281e05544841414ef06dc70c2b562415b8e77acdb880 +size 33554672 diff --git a/llama32-1b-nt/model/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1973cab5d307f06ef3f3335ea8fc694f7a733fd7 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6709249cd1313d73a34201c7d76f1b7bb491b0a3a0ede9532db9023b5ceba099 +size 67109176 diff --git a/llama32-1b-nt/model/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors b/llama32-1b-nt/model/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d4ed6895882b95cc5f4ae0b02bdd13ed119c6566 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3312605d121d07226257fef5622100befa51609223c091269e0ab3d6490ff18d +size 4192 diff --git a/llama32-1b-nt/model/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4d96e46823b9942bfb8d45381b3cb10acd4f79af --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:171857c49d3140c306198ee94dea845310ab698534112725badec082f5a561b5 +size 8388848 diff --git a/llama32-1b-nt/model/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e88c7aa584bac5023f5f16ea791a5b9270f7d5ae --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c0852c03a66d037da408614141157d8badf0fd97ea6d1004f683e4cfe024ba0 +size 12583280 diff --git a/llama32-1b-nt/model/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors b/llama32-1b-nt/model/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..43943677f7829c109c583ab2f194c4a8d4a31d51 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43057a8976243fd952b5c1311112e5b925fe5554fc5ef4cc8823e6f9e3f777b2 +size 4192 diff --git a/llama32-1b-nt/model/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dfda49e1923f909235e5e3f6e2af10961f490366 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c92b38a363a5f534b727d6870cb5563defd8fd4de54aa7b1fec4c6fb8facd647 +size 33554672 diff --git a/llama32-1b-nt/model/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4fd4564a0d7834039d9cb580058c77f7acd548c6 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4589939c6928c030b6f01dfe84458c237e9d185c9fd6ee2cb1cd3acc2f106e0 +size 67109176 diff --git a/llama32-1b-nt/model/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors b/llama32-1b-nt/model/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ffd6924dcc2c8826164db6884d56a252065a0958 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:072978f2f75de4c89ec3b5953c96e06261ec69049f14e8af0c1e4b62bb9eae1d +size 4192 diff --git a/llama32-1b-nt/model/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8c24a41a9f1929ac433cfd491611c10adbccd35a --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5207f7d2c950a584c8e24d60559dd5252e49df61ecefe692a72cb87c8fa43a7f +size 8388848 diff --git a/llama32-1b-nt/model/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2b4096653713dd27c7c1841f6afe5675c3424ae3 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d9df03e74515b298a1aafc56e97c834730d6858fb3840bded1b1b67272c480f +size 12583280 diff --git a/llama32-1b-nt/model/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors b/llama32-1b-nt/model/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..138194aac6fe838ce893fd15ad5c263476dfbe72 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5769e58de93ff20eb5e96bc037539de77be4ec0ea6594e4f88edda3f58fd885f +size 4192 diff --git a/llama32-1b-nt/model/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e6d80a98e8f17d0e1515e54a3c12b00df5c7d4d1 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3c9cc6ee995c91ef494b5ecd3d381c7af101aaf48ac68d27a21d13dfe446478 +size 33554672 diff --git a/llama32-1b-nt/model/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7c496686166f2d9cbd3b336030a4a22351498320 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17b1a3cbadaf87ae16ea63b53b9b955235e18631c6cc7e78e3a3d57cd180ae85 +size 67109176 diff --git a/llama32-1b-nt/model/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors b/llama32-1b-nt/model/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b8d4b1dff28afe9b78f4311ffb4f948b5e4b428b --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf50392c6293f7a006a9a04e984802988fdccbc6d5be3e2b9d3159c58608f7e1 +size 4192 diff --git a/llama32-1b-nt/model/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..718bc5e1835c05055c8b52cb585515964dd4db8d --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5e328528b9b0f77f62a817e46fcf38caca64900de14497ae5b3ac780a9917a3 +size 8388848 diff --git a/llama32-1b-nt/model/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f2a74d4d54f662a7465749ed687653c084138ff6 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca8c38229d901f63f92064308f9361a279c0ceb0d264a36195aa96ba0131e37f +size 12583280 diff --git a/llama32-1b-nt/model/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors b/llama32-1b-nt/model/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0f2455fc4c0ad846721d388f691fbf44ac4de1e5 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a41f7915085e2e6ba53d4cc7f2f4ca8f16ce0f7bacd206374b8c8fdb659b5c74 +size 4192 diff --git a/llama32-1b-nt/model/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7afdc71f86a4c88a4437069d54081848a46f8b69 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65e5366a16839e15b111d7f057c9665d78e4fb115f2653fbd2c974cc160e9bd9 +size 33554672 diff --git a/llama32-1b-nt/model/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..57273fdb9f066a222cbc7a47905f376c3a4775fe --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb74f375f385133c7181a2f179c9f6d94c0c045ed53c3d002bd618ebdd0cc5a6 +size 67109176 diff --git a/llama32-1b-nt/model/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors b/llama32-1b-nt/model/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6f8e67bd0147af965197feb795ccfc89b8f35b4e --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cd7bba3f6d9b1bb8411711c5e38247ad952ff395409ac998af799dff9d74be7 +size 4192 diff --git a/llama32-1b-nt/model/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b02396844ddd439d00f7aab6fe323a3acbb6ea59 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ae785305037ba13386d8bb00a934f49cfb6b3dbf4c4d31b2038f5b06f45431a +size 8388848 diff --git a/llama32-1b-nt/model/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..394bd7df40b11a87c5a416e7acd6e90ee4c89766 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8302e2113e0b301d0d372bcaac546566162e6ab992284e4e2f8aa63fd525656b +size 12583280 diff --git a/llama32-1b-nt/model/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors b/llama32-1b-nt/model/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..254cdf2be0eb6942ca39cea3f021320e72d5b493 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55dd4e252b319d9ee38c014f942165ebbe45738f22660a84815a75733b55da4f +size 4192 diff --git a/llama32-1b-nt/model/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b0820fd12fbd20055c16f6cbe1d46e063d53af3b --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:debdba6aebfa8de486578d40deae3687bc3a5340c6c4d825f8024ad3ada37080 +size 33554672 diff --git a/llama32-1b-nt/model/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4ff1ebc4c6070994fe9d08ba90534f50c9df4a66 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e61b0b7e1c09d6e8c7141fcc8e8dae162201c137c1fbaa1910132e2378fb4ac5 +size 67109176 diff --git a/llama32-1b-nt/model/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors b/llama32-1b-nt/model/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1a95f18448517ed8007da70c439a5cabc81087d3 --- /dev/null +++ b/llama32-1b-nt/model/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:993d1ca1d195240e987db38c439e4d22e350005771269be85f66b9e05d8e6def +size 4192 diff --git a/llama32-1b-nt/model/model/final_layer_norm/pp_block/model_weight.safetensors b/llama32-1b-nt/model/model/final_layer_norm/pp_block/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1d972a18ed0a18d8604918f41e17933aba449750 --- /dev/null +++ b/llama32-1b-nt/model/model/final_layer_norm/pp_block/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0bfbd9ba193878c85f567333eb8152849ea503fde1869bd4bd41f756508c291 +size 4192 diff --git a/llama32-1b-nt/model/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/llama32-1b-nt/model/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..34c23f31d74271379c0ebe35e35d2bffeecbc1c3 --- /dev/null +++ b/llama32-1b-nt/model/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96fbd336e85cf6f1cf5c09515443e251a65b10b6636f5105ba59809614a3d09f +size 525336824 diff --git a/performance_plot.png b/performance_plot.png new file mode 100644 index 0000000000000000000000000000000000000000..04b749d56e7dfa2e6bd04a321d8f457f9d835c92 --- /dev/null +++ b/performance_plot.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c543445e5b244d3fe3e2a235b48c70b8c08d68a0d15a44bca6a3bc9ab373063 +size 246801