diff --git a/.gitattributes b/.gitattributes index 593eef784b48d35501ad250c856b8b50c786e14f..a67e0194e795e2df5c96935065365193c47aff59 100644 --- a/.gitattributes +++ b/.gitattributes @@ -3459,3 +3459,72 @@ deepseek-r1-1.5b-packed/model_layers_9_self_attn_q_proj_weight.mags filter=lfs d deepseek-r1-1.5b-packed/model_layers_1_self_attn_o_proj_weight.signs filter=lfs diff=lfs merge=lfs -text deepseek-r1-1.5b-packed/model_layers_8_mlp_up_proj_weight.mags filter=lfs diff=lfs merge=lfs -text deepseek-r1-1.5b-packed/model_layers_19_mlp_down_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_11_self_attn_o_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_2_mlp_gate_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_21_mlp_down_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_15_mlp_down_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_4_mlp_down_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_24_mlp_gate_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_13_mlp_down_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_27_self_attn_o_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_3_mlp_up_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_18_self_attn_q_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_10_mlp_up_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_0_self_attn_o_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_4_mlp_up_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_4_self_attn_k_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_7_mlp_gate_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_27_mlp_gate_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_20_self_attn_v_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_26_mlp_up_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_4_self_attn_v_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_14_self_attn_q_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_0_mlp_gate_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_24_mlp_down_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_12_self_attn_q_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_6_self_attn_k_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_24_self_attn_q_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_19_mlp_gate_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_13_self_attn_q_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_3_self_attn_k_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_14_self_attn_o_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_2_self_attn_q_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_10_mlp_down_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_7_mlp_down_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_27_mlp_down_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_19_mlp_up_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_8_self_attn_v_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_8_self_attn_o_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_10_self_attn_o_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_26_mlp_down_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_1_mlp_gate_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_5_self_attn_q_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_25_mlp_up_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_16_self_attn_o_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_27_self_attn_o_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_14_mlp_gate_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_15_self_attn_o_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_3_mlp_up_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_8_self_attn_q_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_2_mlp_down_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_23_mlp_gate_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_3_self_attn_q_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_8_mlp_gate_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_25_self_attn_q_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_3_self_attn_o_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_2_mlp_up_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_5_self_attn_v_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_2_mlp_gate_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_14_mlp_up_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_19_mlp_up_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_15_mlp_up_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_25_self_attn_v_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_7_mlp_up_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_27_mlp_gate_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_16_mlp_down_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/lm_head_weight.fp16 filter=lfs diff=lfs merge=lfs -text +qwen3-4b-log-unary/model_layers_31_self_attn_k_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +qwen3-4b-log-unary/model_layers_11_self_attn_o_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +qwen3-4b-log-unary/model_layers_34_mlp_up_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_18_mlp_up_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +qwen3-4b-log-unary/model_layers_5_self_attn_o_proj_weight.sign filter=lfs diff=lfs merge=lfs -text diff --git a/deepseek-r1-1.5b-packed/model_layers_0_mlp_gate_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_0_mlp_gate_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..97c721e54a951aa8031e3d0665a92a9b47cba088 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_0_mlp_gate_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cf601ab6062858b266017e3ccc33f77bb1a6560510ddb28d0bddd9822adbd0b +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_0_self_attn_o_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_0_self_attn_o_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..fa5c3f4361129e89d21fe2ecaff339213555bc39 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_0_self_attn_o_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afa7c60db5e54d8009750dd241125670907a01f6b0741fecf14e3983914efbbe +size 294912 diff --git a/deepseek-r1-1.5b-packed/model_layers_10_mlp_down_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_10_mlp_down_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..760a6151525ea6488ef57185139c6e16624e4bf9 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_10_mlp_down_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a5e09aa32752f84f4114b5b54e34265df514994b6c81f251137e17aacc70dbb +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_10_mlp_up_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_10_mlp_up_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..26f42192e9881c5c406f722bf63a924c45a416ee --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_10_mlp_up_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:137cc07609c4da4aacd304b4fbb93a179102e1f1a78384d3e4f87c40eabb4810 +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_10_self_attn_o_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_10_self_attn_o_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..2d71a7f88521cc5d6ca8c0d22d086bdfab0e36e4 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_10_self_attn_o_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa7257bc144ffb551a57b916fbc440d469b66da3d2d62118cb329f4b9ad9684e +size 2359296 diff --git a/deepseek-r1-1.5b-packed/model_layers_11_self_attn_o_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_11_self_attn_o_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..f13e3ccdfd2f6021825bb079460be3f28017d72c --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_11_self_attn_o_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1b174d36ebc596462d284b4973fb866ce3a544714469cd4545b2c4cb7da12d3 +size 2359296 diff --git a/deepseek-r1-1.5b-packed/model_layers_12_self_attn_q_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_12_self_attn_q_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..2d8e51b78b44da4df6c1f3b20e71b82a9f6a440d --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_12_self_attn_q_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa8874205a1141bacaae9fdcf35ac12c56211a602ebefcde5e571e9429da90d4 +size 294912 diff --git a/deepseek-r1-1.5b-packed/model_layers_13_mlp_down_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_13_mlp_down_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..a514f592a4072dd02eb1acf54818148eae09150a --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_13_mlp_down_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6075f12919d7c179856251bd7f4c167d128f7fe2438d968ab3be9eddbacc85a8 +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_13_self_attn_q_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_13_self_attn_q_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..3b00236f4a5267635eb9a006629b26bbc569bcce --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_13_self_attn_q_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81f0c5fe941e1407a517b0d3ce5f3afad1ec859845e5f8c34fdcfffd5b310316 +size 2359296 diff --git a/deepseek-r1-1.5b-packed/model_layers_14_mlp_gate_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_14_mlp_gate_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..7952f2604f475d6905507654afef935438b785cb --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_14_mlp_gate_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6a62b3e005442b9ac9a51a505347398cd9a43d80b5bedf204ae184f08a8c715 +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_14_mlp_up_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_14_mlp_up_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..e36c28a25bdfb1ceb9606531201a5b8017ad90bc --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_14_mlp_up_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:552bbc316a158a222d6c94d1b4d54de495b53cde5f3a4883ab9fba02cdbc62b8 +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_14_self_attn_o_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_14_self_attn_o_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..2354d5c4b7a0209bd3ffb708cdb037c4274eb380 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_14_self_attn_o_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0fde78bfd9522524f658236e046ecd0d2189aa0a092c9ddfb2577c3e982bc70 +size 294912 diff --git a/deepseek-r1-1.5b-packed/model_layers_14_self_attn_q_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_14_self_attn_q_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..fd6f2e7c532aa5f7ef590882bf23f97b9e76e091 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_14_self_attn_q_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b1d71717a33bae43b6cc287107fa866ebf2d3a053742a709381035b2c234a66 +size 294912 diff --git a/deepseek-r1-1.5b-packed/model_layers_15_mlp_down_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_15_mlp_down_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..9c4893ff0b9b5e75f8af5a3320e7e4d5fe5ad185 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_15_mlp_down_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b684bcc0c65183e127d15e995f38140b7c7fa34626ffbdf52377bfd192205a9e +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_15_mlp_up_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_15_mlp_up_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..4b0e336e74d3333dd2879317b2d0db8587c2b92e --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_15_mlp_up_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5206a02260028ee28fbde6edf9df24199656473b0a98f2b038b7fc841608e572 +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_15_self_attn_o_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_15_self_attn_o_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..4944aa9b33a326f107dd23f7df3840cceb4b6830 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_15_self_attn_o_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a564f99a29539779b4e31ec8e2238c03ccace4c568b8495bd5028f5fc08e9416 +size 294912 diff --git a/deepseek-r1-1.5b-packed/model_layers_16_mlp_down_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_16_mlp_down_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..7b765d92a13526d3627d2c1b94bf748309fc7a75 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_16_mlp_down_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f627c09a596845e3d789efe2accb30b956cdad47f79f54fb9bc252285435cade +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_16_self_attn_o_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_16_self_attn_o_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..a85476ad468c989ce30f852ab42aaa8ea8a0dbc8 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_16_self_attn_o_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb4b96ddf727cc680c1f0360ff51b143b0e66d45792330cd4e0bb2edfb01ae96 +size 294912 diff --git a/deepseek-r1-1.5b-packed/model_layers_18_mlp_up_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_18_mlp_up_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..babc8ba0b63e5c5b2288c03797bb29f57184a529 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_18_mlp_up_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ef5a4b93278c3ed4f21af7c482d30379e5399e1a9e60f402ce2e10198375909 +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_18_self_attn_q_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_18_self_attn_q_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..5219ad8b69d3627a68430808591c64a618927dbc --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_18_self_attn_q_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cc73697f7d2c00e8a7ba5c1d7a5006479360a95f8a8860700b53aa831558a47 +size 294912 diff --git a/deepseek-r1-1.5b-packed/model_layers_19_mlp_gate_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_19_mlp_gate_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..dcd1669ef3529c50b99fbd4b7ce4c456c50f1c77 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_19_mlp_gate_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3841fa4a3deed727ad7bbd14feddf9b931fa10c645be97f7a41d84cbcbd5b85d +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_19_mlp_up_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_19_mlp_up_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..28c3156b599c2e49eac38a8bdd9fb23ae057b687 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_19_mlp_up_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acdad830fec332f219ae63313954f982c97c74289dd00cd982dd76a421434b3a +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_19_mlp_up_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_19_mlp_up_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..a4f9829b0c603bd5d6ad9bf7a4651820659c08c1 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_19_mlp_up_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a04a4f84162379d5b772eb736221f21122838357c7f701c728b695f7f21f48f +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_1_mlp_gate_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_1_mlp_gate_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..56fc5db10d27ac481d30efd51f232d0da9992345 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_1_mlp_gate_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18d4f22d7a9844c265b8737f51512a8b9c3e83f8b3df30a048d2d1312bd89579 +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_20_self_attn_v_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_20_self_attn_v_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..219838292bc3779765bb0b899ad21dfd3377a016 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_20_self_attn_v_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ca36ec8e518c40248700b7a6d036c79cfe6818c9bee9e9fed9db468c6a3076f +size 393216 diff --git a/deepseek-r1-1.5b-packed/model_layers_21_mlp_down_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_21_mlp_down_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..65886922c11d9b70e4c1fc288ec6a03beebcca01 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_21_mlp_down_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67575cd43df52fa550be594f9da1901d582a2f2e48978a0044e5b366c6b1ae65 +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_23_mlp_gate_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_23_mlp_gate_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..fb3482c07619577d411da88da055e50d21182a22 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_23_mlp_gate_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acab9a847d202ed9f67041eb8476855da21ec3afa42076c05927753fb95080eb +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_24_mlp_down_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_24_mlp_down_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..ff17124f9d467c54aa4ed5a4ef9b19ac812661c4 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_24_mlp_down_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86cca1f83a0b656b5b9e895a32797309e95088fbac4b80be1b53df380e4c1b66 +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_24_mlp_gate_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_24_mlp_gate_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..a76fee3d266d08df1c9bec133863adf22995d19d --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_24_mlp_gate_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af1a304248edd39a9d41760fd0f5d5029c19e8b9c30b62f1b59b6c55fe0c3d5d +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_24_self_attn_q_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_24_self_attn_q_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..2a64ef46f96504ff2014bf68061e33e4cf9ead00 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_24_self_attn_q_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c4f9f5baf1b98926d34cf108276fbaa3befccc402d33ed2ed86280c02a46f99 +size 2359296 diff --git a/deepseek-r1-1.5b-packed/model_layers_25_mlp_up_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_25_mlp_up_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..67d1cdb5189180b56d5cffdd149db824d6e57de2 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_25_mlp_up_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85914f8567316fb3a505d76f4efbffddeda82b252fca0f004b115729fc5eece4 +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_25_self_attn_q_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_25_self_attn_q_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..cc8cfa4971b31665280221619e0f4ecaf610fadd --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_25_self_attn_q_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab7767bff8b90fbd314c81bdad8d407998989525b07c4ebab9ae1f804c7660ee +size 294912 diff --git a/deepseek-r1-1.5b-packed/model_layers_25_self_attn_v_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_25_self_attn_v_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..04c60ff456beb13db678b94024586f87d2819f68 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_25_self_attn_v_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01f6eb9e37e5d4eb688cd86bc5ca50b7458115f1c90782281a2af717a743ba43 +size 393216 diff --git a/deepseek-r1-1.5b-packed/model_layers_26_mlp_down_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_26_mlp_down_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..b1f96a5896e8e2ea08b0c6eb3886ac8ae2fee0a6 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_26_mlp_down_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1514a78827202a31b5fabb4a56b194c40d60c75b42c2c473e509e90fd1ceec9 +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_26_mlp_up_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_26_mlp_up_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..426c573d753358b912a19ce138c988544250d6eb --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_26_mlp_up_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92e1883be31668ba4dea2a96bb00286e7812c2309c109fefc110faf0d2ac552d +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_27_mlp_down_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_27_mlp_down_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..664159b9ecf5fe8fe0cbb9333e829f3e06a172b3 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_27_mlp_down_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17b2a47c92099e72ef70301bb9d3098db3866a3c904a4172d87039e4d15aee85 +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_27_mlp_gate_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_27_mlp_gate_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..7da51fda53872b09f7213ad294097a838b90ce41 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_27_mlp_gate_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a20c45b40daed6a805b60745abd5a5e6d541b2b2bfc9ef541b476ca7743af229 +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_27_mlp_gate_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_27_mlp_gate_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..b4172cfccfb7e5f47754cf34019afbec99fcb194 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_27_mlp_gate_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7742df58b2eb17912b598df0a2c405b98498924d8ff2523673d0c4d7137fa785 +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_27_self_attn_o_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_27_self_attn_o_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..9f3ae7ea087d51c357603887118f20a6da531d4b --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_27_self_attn_o_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7554f62302d4402c2a8617792efcdd9dcf3b22b78c3081170478e367959faab9 +size 2359296 diff --git a/deepseek-r1-1.5b-packed/model_layers_27_self_attn_o_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_27_self_attn_o_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..487e8608df5e531d5f29a9e76f665be4e92d1dec --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_27_self_attn_o_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f989aaae9b5c070f1c5236956b2a7a0c0821b0355b6556e2cf0dcc9f879a6117 +size 294912 diff --git a/deepseek-r1-1.5b-packed/model_layers_2_mlp_down_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_2_mlp_down_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..3ae590ecc9eec7521296db8f788ef2f67e0160d4 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_2_mlp_down_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2950b33b7d98c3eb340722c3a5e376efb73136698d9c0aabdb09d9e2728117c2 +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_2_mlp_gate_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_2_mlp_gate_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..c3ada139a8a0cade93f74390e669c0e56fa578d1 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_2_mlp_gate_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d99cdb9b5656662e94b69d5497d1ddc62b52fe4d5f9788c11249934c8ebc720 +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_2_mlp_gate_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_2_mlp_gate_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..6d18284006e739ef206049225cc91908ce5c340e --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_2_mlp_gate_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6acd236a009c2c646537035d07ed56d1f1a7e4bfd8e9b83a2e6c6ecf7c531bc8 +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_2_mlp_up_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_2_mlp_up_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..f86f595119a6eb260e07ffd4fe9f4a628427aadc --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_2_mlp_up_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2208f57cf0144c098accb4a7145eda1e77cd1ffd2f9ca9f78b768badd53efcd +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_2_self_attn_q_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_2_self_attn_q_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..68e9f212feffc96db4d309867ba4b64980eb1767 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_2_self_attn_q_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93da71f8a8f102d0d16d51aaad2da5559f0196f628efb9364557b2107970952c +size 2359296 diff --git a/deepseek-r1-1.5b-packed/model_layers_3_mlp_up_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_3_mlp_up_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..62eddf3b31147de2f7f32d6002933c85466730b2 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_3_mlp_up_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1158378bb9adc7b73b040cd10215c0d6ba53d8916bbb732ca37b6bf8f4242eb2 +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_3_mlp_up_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_3_mlp_up_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..7ac8274054aee3e444fc610882d2910adc1a60d6 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_3_mlp_up_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd3b8fb93a12a2e9411ec693f5e761da80b09734ca758f2ddd06cb83365677bb +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_3_self_attn_k_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_3_self_attn_k_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..782d8268f7b9fa3e4ef10ad81ebde866984e124c --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_3_self_attn_k_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3be5411a8d1c31c6a292e80c20ecd2355a3c84d648686308d01022ad72523b6f +size 393216 diff --git a/deepseek-r1-1.5b-packed/model_layers_3_self_attn_o_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_3_self_attn_o_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..ee922a410d659781aae32a67b9082a0aa8980bf2 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_3_self_attn_o_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f794bbcdffd07b8719ce97780dab4eabc04d310bbe111dfba7f8205fb80b684 +size 294912 diff --git a/deepseek-r1-1.5b-packed/model_layers_3_self_attn_q_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_3_self_attn_q_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..5c63d43aceecec558f8c958f09c4d070dfb59101 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_3_self_attn_q_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c689ba19991552d62f546c445a0d710740086fd0d0c8cf4cd63f845faa292224 +size 2359296 diff --git a/deepseek-r1-1.5b-packed/model_layers_4_mlp_down_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_4_mlp_down_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..6add3d38b2d1d9a1e8a164f079cf0c1ef62ac239 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_4_mlp_down_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:082dc967b006eda7fdf89cc81ef5d4fdd5048139757404489f58ce0a12f29b36 +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_4_mlp_up_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_4_mlp_up_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..5df012b5d24d77c14cd3ba580b1aa24c6346406e --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_4_mlp_up_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5504f389d7257777f1fcc688c1626c90d960a9eb59612f3a6877ec2da722cb8 +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_4_self_attn_k_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_4_self_attn_k_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..738480f892e3ca5a223224a35ddee9347c4571de --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_4_self_attn_k_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b22f88664930100ce0f3d62a9a089a3f43b3262abfbccfbd3ae9756e5cae1bae +size 393216 diff --git a/deepseek-r1-1.5b-packed/model_layers_4_self_attn_v_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_4_self_attn_v_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..33ff1adb0a1c8995859541cb2e7d43ca30ee19d8 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_4_self_attn_v_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c2a9825976cf00c63870a7f96c226d5a5ab5c022f8b1e4001d6e757ee2ccdac +size 393216 diff --git a/deepseek-r1-1.5b-packed/model_layers_5_self_attn_q_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_5_self_attn_q_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..85b4ea8c99cc7fd478a561b81eae836e3cd2bce4 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_5_self_attn_q_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ced96e94c28a87d1e54bee42d907e4063b9a70a3253b2a3745850d20146805f5 +size 2359296 diff --git a/deepseek-r1-1.5b-packed/model_layers_5_self_attn_v_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_5_self_attn_v_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..16d56b653b7366d1f5d463217d78ab5cb4ffbf38 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_5_self_attn_v_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08d8f5ae3edc86e8d96da01da76b1536ec45d9e13b42374f8f8a0f538a8457f9 +size 393216 diff --git a/deepseek-r1-1.5b-packed/model_layers_6_self_attn_k_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_6_self_attn_k_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..b37f4b185db84d484adf542217156200d2e6a51c --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_6_self_attn_k_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c80613d8718ea94d819952ef97d7916881ff7dc88a3e0fd1ddf3db70d7675f1 +size 393216 diff --git a/deepseek-r1-1.5b-packed/model_layers_7_mlp_down_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_7_mlp_down_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..71d4d62635d94e65a0176c2adafbfe7e6964eda6 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_7_mlp_down_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32d30d523235d4ec504e15332eb73a80bc8fc29efbf855b19eebdbd6e29f8995 +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_7_mlp_gate_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_7_mlp_gate_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..d18791cd1d7deff3a55b2a6f070d468dd27e44f9 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_7_mlp_gate_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8902346d42372d9522a6c4ae40a380c6efaec66e66c02d1e4b435d07b97a0358 +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_7_mlp_up_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_7_mlp_up_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..fe5ba90dbc97bd5ce02bf72bcbc6cb1ab7d3acef --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_7_mlp_up_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:223805c699358cf801143dce8f18ad68d2c74d9083b891d4216d4824d1a8eeb5 +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_8_mlp_gate_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_8_mlp_gate_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..36a4b65d8d3aeba35ff9cb18a754f97742a49438 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_8_mlp_gate_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62f0b46e00a44205705e163433ae12b802b83b4bab6beae803be9f93c15ac802 +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_8_self_attn_o_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_8_self_attn_o_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..23166f78fc7398891dc4b7a225fb238bf2c1a4c1 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_8_self_attn_o_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a8181b7d57154cadb33048e837aac7f6e569635e65c142365c909cb3675662d +size 294912 diff --git a/deepseek-r1-1.5b-packed/model_layers_8_self_attn_q_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_8_self_attn_q_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..f4ed03fad3c72d38349a657de2385bf58e98167b --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_8_self_attn_q_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4629e51bbe45ceaefd4d20a9f84ab8d1a68f9f475a0bf68f3243ac84b633a74 +size 2359296 diff --git a/deepseek-r1-1.5b-packed/model_layers_8_self_attn_v_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_8_self_attn_v_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..9856c08855d2b8609aa13cd33825fa55fdcded4d --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_8_self_attn_v_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa2d73bf0f38729fad055c4a54e98237e9f22cf2701628c09ff7ef795f243387 +size 393216 diff --git a/deepseek-r1-1.5b-ternary/lm_head_weight.fp16 b/deepseek-r1-1.5b-ternary/lm_head_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..fb4af3ef304753b9796153f2bd5b599471fbca35 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/lm_head_weight.fp16 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cca68befcc8201afc0eb54623dd20bd2af92acfe3cff767e6f8e6c0ddad2a397 +size 466747392 diff --git a/qwen3-4b-log-unary/model_layers_11_self_attn_o_proj_weight.planes b/qwen3-4b-log-unary/model_layers_11_self_attn_o_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..7a5fca6bfc23216871f86089e8a43d6c2ffd7840 --- /dev/null +++ b/qwen3-4b-log-unary/model_layers_11_self_attn_o_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16f4f6c483061fbfaf6b1fdf44b44dc569c5c7834156f4bb2f058f542520762e +size 5242880 diff --git a/qwen3-4b-log-unary/model_layers_31_self_attn_k_proj_weight.planes b/qwen3-4b-log-unary/model_layers_31_self_attn_k_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..f4a519e95b3424a6f6a1972e86efc440a9de6c21 --- /dev/null +++ b/qwen3-4b-log-unary/model_layers_31_self_attn_k_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7c9fb2517b22bb7836f0a5f871506d6671c9d7469c72e54b6deb8402eb5b1e0 +size 1310720 diff --git a/qwen3-4b-log-unary/model_layers_34_mlp_up_proj_weight.sign b/qwen3-4b-log-unary/model_layers_34_mlp_up_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..73dbe8897fd696a46c6030b4fc32a41eb659b2e6 --- /dev/null +++ b/qwen3-4b-log-unary/model_layers_34_mlp_up_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bae1f4230e0ca8680f3d19291e5535688b467853d038c9c18919fd8e06e94635 +size 3112960 diff --git a/qwen3-4b-log-unary/model_layers_5_self_attn_o_proj_weight.sign b/qwen3-4b-log-unary/model_layers_5_self_attn_o_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..5549f46f6d94e6a499fe0acab76705ac1ec953b3 --- /dev/null +++ b/qwen3-4b-log-unary/model_layers_5_self_attn_o_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16548676c8223c1f08da498999c7b5638f50aaee52153df4ae836d7a3f79eb25 +size 1310720 diff --git a/qwen3-4b-thinking-unary/model_layers_17_self_attn_v_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_17_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..640dd7c19f1783d7144476bc2401a5bc4cd84893 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_17_self_attn_v_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_28_self_attn_k_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_28_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..b1f7d79067fd78950e19757e27f78cccb11f6b8f Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_28_self_attn_k_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_35_self_attn_k_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_35_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..5a58736e38e4d8eeef283810e7a76e1f08619e14 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_35_self_attn_k_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_4_post_attention_layernorm_weight.fp16 b/qwen3-4b-thinking-unary/model_layers_4_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..3ddb49073503a9a10cadaeee84d58bb698f337a0 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_4_post_attention_layernorm_weight.fp16 differ diff --git a/qwen3-4b-thinking-unary/model_layers_6_post_attention_layernorm_weight.fp16 b/qwen3-4b-thinking-unary/model_layers_6_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..78e9320f1563cdfba9707bf290791cde80efe099 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_6_post_attention_layernorm_weight.fp16 differ diff --git a/qwen3-4b-thinking-unary/model_layers_6_self_attn_k_norm_weight.fp16 b/qwen3-4b-thinking-unary/model_layers_6_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..24727c97db0293939843aff5dec7cc98e0d500c1 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_6_self_attn_k_norm_weight.fp16 differ