diff --git a/.gitattributes b/.gitattributes index d6d602616b8e48d8172549f60bc8e4ae5ed9b8fe..593eef784b48d35501ad250c856b8b50c786e14f 100644 --- a/.gitattributes +++ b/.gitattributes @@ -3421,3 +3421,41 @@ deepseek-r1-1.5b-ternary/model_layers_17_mlp_down_proj_weight.neg filter=lfs dif deepseek-r1-1.5b-ternary/model_layers_14_mlp_down_proj_weight.pos filter=lfs diff=lfs merge=lfs -text deepseek-r1-1.5b-ternary/model_layers_10_mlp_down_proj_weight.neg filter=lfs diff=lfs merge=lfs -text deepseek-r1-1.5b-ternary/model_layers_23_mlp_down_proj_weight.neg filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_9_self_attn_v_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_15_mlp_gate_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_21_self_attn_k_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_18_self_attn_k_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_9_self_attn_o_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_22_mlp_gate_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_8_mlp_down_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_27_mlp_up_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_27_self_attn_k_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_7_self_attn_v_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_22_self_attn_q_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_2_mlp_down_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_21_mlp_up_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_21_self_attn_o_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_22_self_attn_o_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_27_mlp_down_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_23_self_attn_v_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_26_self_attn_v_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_5_self_attn_o_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_4_mlp_down_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_0_self_attn_q_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_11_mlp_gate_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_17_mlp_gate_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_5_self_attn_q_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_0_mlp_gate_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_12_mlp_gate_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_22_mlp_down_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_27_self_attn_q_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_1_mlp_up_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_11_mlp_gate_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_17_mlp_down_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_26_self_attn_k_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_4_self_attn_o_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_0_mlp_up_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_9_self_attn_q_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_1_self_attn_o_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_8_mlp_up_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_19_mlp_down_proj_weight.signs filter=lfs diff=lfs merge=lfs -text diff --git a/deepseek-r1-1.5b-packed/model_layers_0_mlp_gate_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_0_mlp_gate_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..82fc191b6b02245d720e074dea800dbbe51c6afa --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_0_mlp_gate_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1aebceeb5c49e7d2c6728a481dce38797e08581a14dd6cfe6ecd6a727521ee36 +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_0_mlp_up_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_0_mlp_up_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..ad057ec33226dd92818e0221690cfab99a2f7ce5 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_0_mlp_up_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43b1f543227944a1af2750e991457b30cb7188988bc96d74d0a5dd89facf6877 +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_0_self_attn_q_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_0_self_attn_q_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..e773326e4ea0d075505e655752ac141a0cbf12b5 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_0_self_attn_q_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:556096531abd03fc5e5f04c6d35b923f99fa220b279f136aea4b8d2af372d579 +size 294912 diff --git a/deepseek-r1-1.5b-packed/model_layers_11_mlp_gate_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_11_mlp_gate_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..37b998d0eefaf783d24cedcf9eb0f7551eb7be2c --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_11_mlp_gate_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97c83c4abb47118c502329a959570a1215507e0dd66f9e84069b310aa1722e8f +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_11_mlp_gate_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_11_mlp_gate_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..5dfdcf759eccc82f7eaacc6b2b752be9cd23fd22 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_11_mlp_gate_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2422715d9cb42eaefaff21120cfde70189525f51f0f51971cc5e950f1bdc48b2 +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_12_mlp_gate_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_12_mlp_gate_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..d727fdcbc52972cb349f8b2a486ea52d4dfe07e6 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_12_mlp_gate_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:302f4a093385122723dafde2ba2592a94539b2ebe12c523e69f7142ec89a11b4 +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_15_mlp_gate_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_15_mlp_gate_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..f1668e8352d08a2c7411a097fa6010ade485f4c5 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_15_mlp_gate_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:070fc6282e43b0052bce403648d677e8106a2a380c2e9262f38e4dcd5cd24199 +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_17_mlp_down_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_17_mlp_down_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..445b38859aa91dfee64aa73b4e2e49cba18fef7f --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_17_mlp_down_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65b662b9a71e1c30f54e9781190dc2c4847527cdecdb022b85b3909d2b4c11cb +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_17_mlp_gate_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_17_mlp_gate_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..2f5e8e8918da0c0a64ee68b6d11899222d5a9a01 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_17_mlp_gate_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edf4de964079986f1f878f816dd2eb0aab9f1a62a9fda213ee8520983b495442 +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_18_self_attn_k_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_18_self_attn_k_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..de4948fddff4ec7953b234d2f61ecffe8fcc9e4a --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_18_self_attn_k_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:752879c949a10ddf33d214ca734944c9c231322aa5b8dfccac99bf0f4160694b +size 393216 diff --git a/deepseek-r1-1.5b-packed/model_layers_19_mlp_down_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_19_mlp_down_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..567a3f29f01f8193e05c049c4da50cd656e71ee3 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_19_mlp_down_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:010448aa44fe1534eb246e0934276a8f7abe72cd6fd654526b33f590239a4ead +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_1_mlp_up_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_1_mlp_up_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..91f55ea63bf1d731aa8614fbc20be4e41ed7f5f9 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_1_mlp_up_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bba8ebd07fadb183794f9e793b29b2991f95b005ef5f9b2bd139fa6f27a4c5fc +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_1_self_attn_o_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_1_self_attn_o_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..e2008019eb8913e2c4b7b373d763df446a841ca5 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_1_self_attn_o_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:264d743b356d1c334878111aa6d1c1fbd184503bb3bc5e09618c4659ef84c4d6 +size 294912 diff --git a/deepseek-r1-1.5b-packed/model_layers_21_mlp_up_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_21_mlp_up_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..da68358d73dcd24bfcf6bcc23350f14487baec55 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_21_mlp_up_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:774b53819b9b8a03fadecd394347e52ffc411742c185cf2319980568b3143a4d +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_21_self_attn_k_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_21_self_attn_k_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..8c3bbf0a1307fb61e4493886e19829ea3c0dfb9d --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_21_self_attn_k_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd6051f5e803aef03a6189d91ed552950db96c01ff8d95f6a8f0b10802cdaca3 +size 393216 diff --git a/deepseek-r1-1.5b-packed/model_layers_21_self_attn_o_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_21_self_attn_o_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..cf891b6942bde50487b497369527b3dad6c33d98 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_21_self_attn_o_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f508641bd74b15cab5f8447096b62c8f215d82daab61c0459ebecf37035af34e +size 2359296 diff --git a/deepseek-r1-1.5b-packed/model_layers_22_mlp_down_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_22_mlp_down_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..147ec6682bee7654cc91b0420066581bed12c2b6 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_22_mlp_down_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb3fd1a1415c48fcc8555907931b5c30148ddcca2698ac3c5d75185370fee750 +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_22_mlp_gate_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_22_mlp_gate_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..dcb3132b5069c1fe60662d4e283bd5c1b017c0e2 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_22_mlp_gate_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:946039fa437eaf913c0253718c3c6debe813135bdd56c6f36bfa4e21844e7a9c +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_22_self_attn_o_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_22_self_attn_o_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..8f89222fdbe4450d13e09d53c996f539f7b7e4cb --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_22_self_attn_o_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c226189bbb87071269d4ad03b322b637491172fe6d7d96289a914b071d7477fa +size 294912 diff --git a/deepseek-r1-1.5b-packed/model_layers_22_self_attn_q_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_22_self_attn_q_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..77b6a5a8b142f62758de1c166ecced8802586047 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_22_self_attn_q_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6907845bc4ecc5a36df6c259e44456f8430eef36352b767d990e2c88ab01e609 +size 2359296 diff --git a/deepseek-r1-1.5b-packed/model_layers_23_self_attn_v_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_23_self_attn_v_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..2af9cbbc5a6078fbbf4d0a8147c3015a8c4d90dc --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_23_self_attn_v_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f379a20c016047a33b4a3f93f5ba772dde679d1d5380d04194cb5a616ecb49f6 +size 393216 diff --git a/deepseek-r1-1.5b-packed/model_layers_26_self_attn_k_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_26_self_attn_k_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..43a662d67b2d0fc0a6ad5d4f6401cd0d9b90f06b --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_26_self_attn_k_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:588563f62366d62a26ec4441ec1f916225f291894f4c87b21e0943d645f8cd06 +size 393216 diff --git a/deepseek-r1-1.5b-packed/model_layers_26_self_attn_v_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_26_self_attn_v_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..2f90002c981b68bdae7328a86a7ec0e8f3f1288d --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_26_self_attn_v_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42a311a1cd660cd05dac30c09aa50f156dcef871814576d09260d3f3af971aca +size 393216 diff --git a/deepseek-r1-1.5b-packed/model_layers_27_mlp_down_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_27_mlp_down_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..d36d1c2ebb6cbd5a8eb7a25a8921177fc36981a1 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_27_mlp_down_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07d27e5c15f9b50f5e6a781953c79efe06ce7d7596d476894237df2835027ee6 +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_27_mlp_up_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_27_mlp_up_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..d5a5cbad4fc1285be0d5cd82ccb3194391aa949d --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_27_mlp_up_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9119d5b576a6262380bbf062dd59b12f0b918f2413febf82d97523d0bc8c662 +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_27_self_attn_k_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_27_self_attn_k_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..4b518483ac99d8bc7b149b8e0628f90d9b0bfa8f --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_27_self_attn_k_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9192789d7258981d648a90720f5803ae76aa45ef15fd1ad808dceb78528d049 +size 393216 diff --git a/deepseek-r1-1.5b-packed/model_layers_27_self_attn_q_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_27_self_attn_q_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..876e9f724fc8b7554640a8a901145270af703dd8 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_27_self_attn_q_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f389dd739bf059f5224c6da7b5174dc1100ce403ebfcab503b9cef7c7f2c581d +size 2359296 diff --git a/deepseek-r1-1.5b-packed/model_layers_2_mlp_down_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_2_mlp_down_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..9d4c24212fb1458e2cb29fe61010d94d8e88de28 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_2_mlp_down_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4910406d2aff7bd205f58fcf484f9abd4b7efb7bb2a5740d5afc071e55cd2711 +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_4_mlp_down_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_4_mlp_down_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..303eb6f37b00e7b022b20aba64f15aa8e451974f --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_4_mlp_down_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1bcab82d136da631759d9405c0c749661f2c0885a79a79351613900de7e5c53 +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_4_self_attn_o_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_4_self_attn_o_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..590ab442633f188dcd6d404689e77aa64d18d88a --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_4_self_attn_o_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e19c6b8b0ae5aa69d9cb6b05db7a21daef7e6738961f1b34c2313f49606caa41 +size 294912 diff --git a/deepseek-r1-1.5b-packed/model_layers_5_self_attn_o_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_5_self_attn_o_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..4713697103e199d6d1764fedd842080e3feecd59 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_5_self_attn_o_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c011f8749c3f0375bfdeacd01cc0543cc143243520ad81a832540af962e82e5 +size 294912 diff --git a/deepseek-r1-1.5b-packed/model_layers_5_self_attn_q_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_5_self_attn_q_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..25d138ac7ad161538ce403fcf1ec8d9b3c35463d --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_5_self_attn_q_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea502782ca87f1068f0d34260206df8b18c106eee96ce075fe5eecc7e9382cf4 +size 294912 diff --git a/deepseek-r1-1.5b-packed/model_layers_7_self_attn_v_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_7_self_attn_v_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..1ec5c5f0a483ab90661bdb1fcdfff467a81d43ea --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_7_self_attn_v_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5be274f0b9b14a48edc65aee157c19e5ff20c73c4d5bd43056413b92777a51b +size 393216 diff --git a/deepseek-r1-1.5b-packed/model_layers_8_mlp_down_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_8_mlp_down_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..3eecdecc7c3a576d4c5d5b014e769f84444bb34d --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_8_mlp_down_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72b26c96f2eaf270ca50aed5b24aefb778b19b3e0e877d09ad9b377e9a6d14fb +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_8_mlp_up_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_8_mlp_up_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..3d738577bf5cf8d3c170bedb8b7bcf2d99cadff6 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_8_mlp_up_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55cdfc24d76a06204d9e3194fc94b0c3c886d24ce4e28304bd8c7f2461444c70 +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_9_self_attn_o_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_9_self_attn_o_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..8d4b3ad06fed43ef432893b3fdd8492c66cf696b --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_9_self_attn_o_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83da93fab4f9f1858e84c194fa26e590277deb9000987817247773f2db8bee1c +size 2359296 diff --git a/deepseek-r1-1.5b-packed/model_layers_9_self_attn_q_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_9_self_attn_q_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..e4aa77305e8d5a7ae2e0f2e693c3c019687b37bc --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_9_self_attn_q_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7006658255e51471ba38a583a36085249f53e8043667c346a4ebb3f3062ea816 +size 2359296 diff --git a/deepseek-r1-1.5b-packed/model_layers_9_self_attn_v_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_9_self_attn_v_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..f361283654e7fc5226ba3cc4e74acca9abc89e83 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_9_self_attn_v_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14fde50d93aa4509ffde9814225ad374f98d7513043388d27b48bc5d650d2a8d +size 393216 diff --git a/deepseek-r1-1.5b-unary/model_layers_0_post_attention_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary/model_layers_0_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..4480c766ecab325c11ba7ca372dbd090f47e5b66 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_0_post_attention_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary/model_layers_10_self_attn_k_proj_bias.fp16 b/deepseek-r1-1.5b-unary/model_layers_10_self_attn_k_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..f12798d185f3a26d7b58dca998da4ad94d1d0a4c Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_10_self_attn_k_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary/model_layers_12_self_attn_v_proj_weight.sign b/deepseek-r1-1.5b-unary/model_layers_12_self_attn_v_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..b4076ccf87841fe1ee1143cf43974272853d3b56 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_12_self_attn_v_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-unary/model_layers_13_self_attn_k_proj_weight.sign b/deepseek-r1-1.5b-unary/model_layers_13_self_attn_k_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..358791dad919c9996e22a57d0a0bd31d4a12663b Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_13_self_attn_k_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-unary/model_layers_14_input_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary/model_layers_14_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..e6b9907c05e8517ba0c33c7c2bd8a1ec773f2920 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_14_input_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary/model_layers_19_self_attn_k_proj_weight.scales b/deepseek-r1-1.5b-unary/model_layers_19_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..a7a656d283a9fd8787490ced2eb477b0a8c85f0e Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_19_self_attn_k_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary/model_layers_21_mlp_gate_proj_weight.scales b/deepseek-r1-1.5b-unary/model_layers_21_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..9cb764c2bc2c54bbf710fb9139675c0fd10f367c Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_21_mlp_gate_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary/model_layers_2_input_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary/model_layers_2_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..d9e2acdd45184532118543b69e8e3ab58c2968b5 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_2_input_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary/model_layers_2_self_attn_k_proj_weight.sign b/deepseek-r1-1.5b-unary/model_layers_2_self_attn_k_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..ca6a2ba88a59c60ec72ccda164705ea6049e2fa1 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_2_self_attn_k_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-unary/model_layers_2_self_attn_q_proj_weight.scales b/deepseek-r1-1.5b-unary/model_layers_2_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..f892a99d0d3d04dde113edba1f500740233354f0 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_2_self_attn_q_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary/model_layers_8_self_attn_v_proj_bias.fp16 b/deepseek-r1-1.5b-unary/model_layers_8_self_attn_v_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..e04ef83dbf00969a6897ae2d80515558c4a2e370 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_8_self_attn_v_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary/model_layers_9_self_attn_v_proj_weight.scales b/deepseek-r1-1.5b-unary/model_layers_9_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..33941f2865d211773ccbfaf6eda95bbc072ae10b Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_9_self_attn_v_proj_weight.scales differ