diff --git a/.gitattributes b/.gitattributes index bb6d29ee085af819e98d0b5cc7920b234af67408..d6d602616b8e48d8172549f60bc8e4ae5ed9b8fe 100644 --- a/.gitattributes +++ b/.gitattributes @@ -3372,3 +3372,52 @@ qwen3-4b-log-unary/model_layers_35_self_attn_o_proj_weight.sign filter=lfs diff= qwen3-4b-log-unary/model_layers_14_mlp_gate_proj_weight.sign filter=lfs diff=lfs merge=lfs -text qwen3-4b-log-unary/model_layers_21_mlp_down_proj_weight.sign filter=lfs diff=lfs merge=lfs -text qwen3-4b-log-unary/model_layers_12_mlp_up_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_21_mlp_gate_proj_weight.pos filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_10_mlp_gate_proj_weight.neg filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_4_self_attn_o_proj_weight.neg filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_11_mlp_down_proj_weight.pos filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_19_self_attn_q_proj_weight.neg filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_21_self_attn_o_proj_weight.pos filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_18_self_attn_o_proj_weight.pos filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_18_mlp_up_proj_weight.pos filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_5_self_attn_o_proj_weight.pos filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_22_mlp_down_proj_weight.neg filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_25_self_attn_q_proj_weight.pos filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_15_mlp_down_proj_weight.pos filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_14_self_attn_o_proj_weight.neg filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_2_mlp_up_proj_weight.neg filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_3_mlp_up_proj_weight.pos filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_1_self_attn_q_proj_weight.neg filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_21_mlp_up_proj_weight.neg filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_3_mlp_down_proj_weight.pos filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_22_mlp_up_proj_weight.pos filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_6_mlp_up_proj_weight.neg filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_5_mlp_up_proj_weight.pos filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_23_self_attn_q_proj_weight.pos filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_8_mlp_gate_proj_weight.neg filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_2_self_attn_q_proj_weight.neg filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_7_self_attn_o_proj_weight.neg filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_12_self_attn_o_proj_weight.neg filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_16_mlp_up_proj_weight.pos filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_3_mlp_down_proj_weight.neg filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_10_self_attn_o_proj_weight.neg filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_4_mlp_up_proj_weight.pos filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_0_mlp_gate_proj_weight.neg filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_12_mlp_up_proj_weight.neg filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_11_mlp_gate_proj_weight.pos filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_21_mlp_gate_proj_weight.neg filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_26_mlp_gate_proj_weight.neg filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_8_mlp_down_proj_weight.pos filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_14_self_attn_o_proj_weight.pos filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_5_self_attn_q_proj_weight.pos filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_17_mlp_gate_proj_weight.pos filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_4_mlp_down_proj_weight.pos filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_27_mlp_gate_proj_weight.pos filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_23_mlp_up_proj_weight.pos filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_16_mlp_up_proj_weight.neg filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_19_self_attn_q_proj_weight.pos filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_7_mlp_down_proj_weight.neg filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_17_mlp_down_proj_weight.neg filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_14_mlp_down_proj_weight.pos filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_10_mlp_down_proj_weight.neg filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_23_mlp_down_proj_weight.neg filter=lfs diff=lfs merge=lfs -text diff --git a/deepseek-r1-1.5b-gunary/model_layers_21_self_attn_k_proj_bias.fp16 b/deepseek-r1-1.5b-gunary/model_layers_21_self_attn_k_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..523a27c555fd02fa97a9a1d03f2641fb2b98655b Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_21_self_attn_k_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-ternary/model_layers_0_mlp_gate_proj_weight.neg b/deepseek-r1-1.5b-ternary/model_layers_0_mlp_gate_proj_weight.neg new file mode 100644 index 0000000000000000000000000000000000000000..2f3bcd850bfcb70102c916ba672767b7fdb86ecf --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_0_mlp_gate_proj_weight.neg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de489dd041b078296df2bfdf3957ee757314de4a17e48ef8928c67484928d4a3 +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_10_mlp_down_proj_weight.neg b/deepseek-r1-1.5b-ternary/model_layers_10_mlp_down_proj_weight.neg new file mode 100644 index 0000000000000000000000000000000000000000..18667444194e68e8e646c46ac39f8a1489ab17ce --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_10_mlp_down_proj_weight.neg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3462dfd3b9426d07c67c759bc05e072625faa8d8b5c426d818ca336651a4db3 +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_10_mlp_gate_proj_weight.neg b/deepseek-r1-1.5b-ternary/model_layers_10_mlp_gate_proj_weight.neg new file mode 100644 index 0000000000000000000000000000000000000000..b5f95cf4f4250598a9f0b460eab5e78df3b11d5f --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_10_mlp_gate_proj_weight.neg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e5388a8688a1f1020023b7c64a10a534abc73b9fa667c7418d3c6a1e09b5edb +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_10_self_attn_o_proj_weight.neg b/deepseek-r1-1.5b-ternary/model_layers_10_self_attn_o_proj_weight.neg new file mode 100644 index 0000000000000000000000000000000000000000..78b0e8e281d8671e4191ce4f61cb230b7ffe2d53 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_10_self_attn_o_proj_weight.neg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59da0e8e2bef01955a29588574acbc531714c1ed4be58205e19f1d15077224b5 +size 294912 diff --git a/deepseek-r1-1.5b-ternary/model_layers_11_mlp_down_proj_weight.pos b/deepseek-r1-1.5b-ternary/model_layers_11_mlp_down_proj_weight.pos new file mode 100644 index 0000000000000000000000000000000000000000..8250255b5cb01987b152b7719075ccebf645ba08 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_11_mlp_down_proj_weight.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2234be9dfa8679b5377218d2c9a087d193506fa5ec3466c47baaa924becab497 +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_11_mlp_gate_proj_weight.pos b/deepseek-r1-1.5b-ternary/model_layers_11_mlp_gate_proj_weight.pos new file mode 100644 index 0000000000000000000000000000000000000000..696b518c6880a17b13adf71a3dfb6aa9f4b97e5c --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_11_mlp_gate_proj_weight.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5deb6871e4f603ac8a2902a52a8b50519b3f0db755cf523a713d326fb84b1271 +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_12_mlp_up_proj_weight.neg b/deepseek-r1-1.5b-ternary/model_layers_12_mlp_up_proj_weight.neg new file mode 100644 index 0000000000000000000000000000000000000000..11824df332e030dd4c837873619cbe09933192cb --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_12_mlp_up_proj_weight.neg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b0488008f7a3492aca2c31600eae8628633cd136e817c5a7a7862ec86d49745 +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_12_self_attn_o_proj_weight.neg b/deepseek-r1-1.5b-ternary/model_layers_12_self_attn_o_proj_weight.neg new file mode 100644 index 0000000000000000000000000000000000000000..166d86803835592174016fe1039130876f855ff5 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_12_self_attn_o_proj_weight.neg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24b622d6462b3e932e02afd7a6a27e1be3613e825e9a5a64a29903581dbad5bc +size 294912 diff --git a/deepseek-r1-1.5b-ternary/model_layers_14_mlp_down_proj_weight.pos b/deepseek-r1-1.5b-ternary/model_layers_14_mlp_down_proj_weight.pos new file mode 100644 index 0000000000000000000000000000000000000000..a5c9d2ac52e98d11c210942fb2194ea104f69313 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_14_mlp_down_proj_weight.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cbff4fce66e97beac6bb5b73c8b71e3fe08bb0faa73d8dfe220cf7752c4f259 +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_14_self_attn_o_proj_weight.neg b/deepseek-r1-1.5b-ternary/model_layers_14_self_attn_o_proj_weight.neg new file mode 100644 index 0000000000000000000000000000000000000000..46e28b421cee246c862ca1cef87c22a3b5763041 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_14_self_attn_o_proj_weight.neg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf01c0f7247214f93de6a50a4f65eb1f16f279be635d0f82923c842559773e7f +size 294912 diff --git a/deepseek-r1-1.5b-ternary/model_layers_14_self_attn_o_proj_weight.pos b/deepseek-r1-1.5b-ternary/model_layers_14_self_attn_o_proj_weight.pos new file mode 100644 index 0000000000000000000000000000000000000000..fb07e2ff3c80c83308f243cb61352bf8a14ba153 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_14_self_attn_o_proj_weight.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9007fc19f7189394e897296bb7d64b63129f7f85503046bbb2ebe4dc26e93bc4 +size 294912 diff --git a/deepseek-r1-1.5b-ternary/model_layers_15_mlp_down_proj_weight.pos b/deepseek-r1-1.5b-ternary/model_layers_15_mlp_down_proj_weight.pos new file mode 100644 index 0000000000000000000000000000000000000000..afac718354ff52b0c16d7b95f55dae9d20cfa580 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_15_mlp_down_proj_weight.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8dcf9541fd16f739e08d74fd5f517c9902e7051ce4c974fb03b008ee6d84319f +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_16_mlp_up_proj_weight.neg b/deepseek-r1-1.5b-ternary/model_layers_16_mlp_up_proj_weight.neg new file mode 100644 index 0000000000000000000000000000000000000000..84e81caa4efb80175f8e5dfe2e2e1cd6c09e4a5a --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_16_mlp_up_proj_weight.neg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a4426c8083aa7e392ca710efdd65264a0c9c4eb6cc2009e3c386e68039f9490 +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_16_mlp_up_proj_weight.pos b/deepseek-r1-1.5b-ternary/model_layers_16_mlp_up_proj_weight.pos new file mode 100644 index 0000000000000000000000000000000000000000..2ea066b845be8573cb5bad9dfa96295009b45c2c --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_16_mlp_up_proj_weight.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20991dd62db1c465f05535c62d46c4780fdfd5a635bebed811ecda1ca60d7db1 +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_17_mlp_down_proj_weight.neg b/deepseek-r1-1.5b-ternary/model_layers_17_mlp_down_proj_weight.neg new file mode 100644 index 0000000000000000000000000000000000000000..abccb629c26ab56a5715f4688e16fc09bbf18e43 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_17_mlp_down_proj_weight.neg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea52af4db0fc5b759bc77415307b5a7fc3a802614f9f2df2ea06c0a086c01f8b +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_17_mlp_gate_proj_weight.pos b/deepseek-r1-1.5b-ternary/model_layers_17_mlp_gate_proj_weight.pos new file mode 100644 index 0000000000000000000000000000000000000000..8b76415daaba4a9136c9c31b99d6060f2f79ae6a --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_17_mlp_gate_proj_weight.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35dfff0a7c66c1b0b38fb5f3f1f10e6a105d16cbabb0f553fbd374ee408472fa +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_18_mlp_up_proj_weight.pos b/deepseek-r1-1.5b-ternary/model_layers_18_mlp_up_proj_weight.pos new file mode 100644 index 0000000000000000000000000000000000000000..9a0e62b5f89d5a02ea8c1824d17ec3cfdf253789 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_18_mlp_up_proj_weight.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccf0b420c9fa96261d1a6e32ef8d882abc047cf9b1b444abdefd264714c15b46 +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_18_self_attn_o_proj_weight.pos b/deepseek-r1-1.5b-ternary/model_layers_18_self_attn_o_proj_weight.pos new file mode 100644 index 0000000000000000000000000000000000000000..516a83ed91f7aaa713254dfc52f44d9f9ff3bdea --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_18_self_attn_o_proj_weight.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09a35140fef4e3fae793001b49a9742eaa2a482286af9da8687607a2111c421e +size 294912 diff --git a/deepseek-r1-1.5b-ternary/model_layers_19_self_attn_q_proj_weight.neg b/deepseek-r1-1.5b-ternary/model_layers_19_self_attn_q_proj_weight.neg new file mode 100644 index 0000000000000000000000000000000000000000..c685f66310a87a23784170731138c2a4375e58d5 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_19_self_attn_q_proj_weight.neg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6aeb6783c725134d48dedb4213e1613952e370d0059eb57e1ce0c6b232a9090a +size 294912 diff --git a/deepseek-r1-1.5b-ternary/model_layers_19_self_attn_q_proj_weight.pos b/deepseek-r1-1.5b-ternary/model_layers_19_self_attn_q_proj_weight.pos new file mode 100644 index 0000000000000000000000000000000000000000..3af254d8e9693b58e893c8db07a582dc81e474b7 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_19_self_attn_q_proj_weight.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:652b2a4a2b436b14806b9fed94989c7fc1dab6508d46099e7c853a5068ed28be +size 294912 diff --git a/deepseek-r1-1.5b-ternary/model_layers_1_self_attn_q_proj_weight.neg b/deepseek-r1-1.5b-ternary/model_layers_1_self_attn_q_proj_weight.neg new file mode 100644 index 0000000000000000000000000000000000000000..09034b02d74db6d8cbf3c325f0624c54c7b96fbe --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_1_self_attn_q_proj_weight.neg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba25ff746e015eaab25813f2af4ceb17e9c8f9abb50c9516d70a01e5e3cfaba2 +size 294912 diff --git a/deepseek-r1-1.5b-ternary/model_layers_21_mlp_gate_proj_weight.neg b/deepseek-r1-1.5b-ternary/model_layers_21_mlp_gate_proj_weight.neg new file mode 100644 index 0000000000000000000000000000000000000000..a6274de65773b7f472937bc4e6507a9b4de69a86 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_21_mlp_gate_proj_weight.neg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:425d692aff34d727e9dc018d70935a0e1357916bf873498da28a0bd874bbd63b +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_21_mlp_gate_proj_weight.pos b/deepseek-r1-1.5b-ternary/model_layers_21_mlp_gate_proj_weight.pos new file mode 100644 index 0000000000000000000000000000000000000000..37e1a61275d2cf0e57dec9eeeca47adc103084bf --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_21_mlp_gate_proj_weight.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92bf85e0c847fa1abfc558f3ba06561cb9d1e7f3117808c12490a32c64d15f8d +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_21_mlp_up_proj_weight.neg b/deepseek-r1-1.5b-ternary/model_layers_21_mlp_up_proj_weight.neg new file mode 100644 index 0000000000000000000000000000000000000000..0ed27cdaa79e2b1df00fa55f4268a386dd56ccc9 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_21_mlp_up_proj_weight.neg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2b8797ae6228937734c740231274307e3cb0718f6ffb613164a1cc5f779e5c6 +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_21_self_attn_o_proj_weight.pos b/deepseek-r1-1.5b-ternary/model_layers_21_self_attn_o_proj_weight.pos new file mode 100644 index 0000000000000000000000000000000000000000..93e83d0cddeaada3c1f61530a9fac2d4dd3bfcca --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_21_self_attn_o_proj_weight.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a051535127ce040530c92b18e8b18800d5f259c081e52a480bb117898c5aedf +size 294912 diff --git a/deepseek-r1-1.5b-ternary/model_layers_22_mlp_down_proj_weight.neg b/deepseek-r1-1.5b-ternary/model_layers_22_mlp_down_proj_weight.neg new file mode 100644 index 0000000000000000000000000000000000000000..31791c98c13d77bfe020d095cb418c73e13ea2e1 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_22_mlp_down_proj_weight.neg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de898333032d190f4887fa86619c319f0a8697656a119468a131ec4fe16b9159 +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_22_mlp_up_proj_weight.pos b/deepseek-r1-1.5b-ternary/model_layers_22_mlp_up_proj_weight.pos new file mode 100644 index 0000000000000000000000000000000000000000..98850292bd496ec64240b896b15194c40986d868 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_22_mlp_up_proj_weight.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:425892268cb48f981b1ce4dfd4ddad025ff5506894d4e73cb18238e4de1db1d3 +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_23_mlp_down_proj_weight.neg b/deepseek-r1-1.5b-ternary/model_layers_23_mlp_down_proj_weight.neg new file mode 100644 index 0000000000000000000000000000000000000000..8c4a97771d3df857bc55a2b97e5b24c1536be901 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_23_mlp_down_proj_weight.neg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55d0d34ec2e1d7d1bef900d0a2aa397d31c7a8b51253a83c616b128cad64b53a +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_23_mlp_up_proj_weight.pos b/deepseek-r1-1.5b-ternary/model_layers_23_mlp_up_proj_weight.pos new file mode 100644 index 0000000000000000000000000000000000000000..018fa1460e36418f4c222fda604397a84c11298d --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_23_mlp_up_proj_weight.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e7e311cb4d16d846ccafdb92b4499d2ddbdb6ad648f0d9f5212803bd9580daa +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_23_self_attn_q_proj_weight.pos b/deepseek-r1-1.5b-ternary/model_layers_23_self_attn_q_proj_weight.pos new file mode 100644 index 0000000000000000000000000000000000000000..c2e24f9fba7f0a55a6f0f8aef23eed48dbfae72e --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_23_self_attn_q_proj_weight.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d813b92570faff377d0bebab455d5c1f7c4de52a5543e2e950047afcadeb102 +size 294912 diff --git a/deepseek-r1-1.5b-ternary/model_layers_25_self_attn_q_proj_weight.pos b/deepseek-r1-1.5b-ternary/model_layers_25_self_attn_q_proj_weight.pos new file mode 100644 index 0000000000000000000000000000000000000000..425403a1f8e0a4142c51ed981b98519397099ec4 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_25_self_attn_q_proj_weight.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9e9071416181392dc6c267e32bb8d81c2ec6e10e140037c22dd42f1b7c64d3d +size 294912 diff --git a/deepseek-r1-1.5b-ternary/model_layers_26_mlp_gate_proj_weight.neg b/deepseek-r1-1.5b-ternary/model_layers_26_mlp_gate_proj_weight.neg new file mode 100644 index 0000000000000000000000000000000000000000..4f0d0591fa84577b86c73c9fccd16474a15d2601 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_26_mlp_gate_proj_weight.neg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b819c077f00e2d1ce42f8d69fe7505568c9db0f75064c0163635be8d0cb8ccdd +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_27_mlp_gate_proj_weight.pos b/deepseek-r1-1.5b-ternary/model_layers_27_mlp_gate_proj_weight.pos new file mode 100644 index 0000000000000000000000000000000000000000..7f178583a933074023a9016504d314a395097f1c --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_27_mlp_gate_proj_weight.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d60dbb21674219ecf6de83e0af4c0ca27fabde724995da0e8b2ba405bf3e859f +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_2_mlp_up_proj_weight.neg b/deepseek-r1-1.5b-ternary/model_layers_2_mlp_up_proj_weight.neg new file mode 100644 index 0000000000000000000000000000000000000000..da794d79f6e6091e3940344844c03f77ce1c60b1 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_2_mlp_up_proj_weight.neg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73b3889d44fe85ed05c0b0fc5b97a44df2452ec2d83d83920e375f62bf6c7e0c +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_2_self_attn_q_proj_weight.neg b/deepseek-r1-1.5b-ternary/model_layers_2_self_attn_q_proj_weight.neg new file mode 100644 index 0000000000000000000000000000000000000000..21a20663a5b07ec59b23d32755743e4860f1ae68 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_2_self_attn_q_proj_weight.neg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98161995eb25076ed4fdd35a313307fb1bf6deb1be35704b977cf45da53866a3 +size 294912 diff --git a/deepseek-r1-1.5b-ternary/model_layers_3_mlp_down_proj_weight.neg b/deepseek-r1-1.5b-ternary/model_layers_3_mlp_down_proj_weight.neg new file mode 100644 index 0000000000000000000000000000000000000000..3973f00fb73ba4e99731fdc2f0155f596ad5dd14 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_3_mlp_down_proj_weight.neg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6f4cfe1994854ecfe3661ee6e26aebe41c8c083b3e771fcc869edc56d99ce11 +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_3_mlp_down_proj_weight.pos b/deepseek-r1-1.5b-ternary/model_layers_3_mlp_down_proj_weight.pos new file mode 100644 index 0000000000000000000000000000000000000000..d6473f9b51162013142996a1610af8350d968948 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_3_mlp_down_proj_weight.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a5c215f2d6e5d315f435d4cfc784a90edeb3ad2959307a9ca1e43a2c8af9711 +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_3_mlp_up_proj_weight.pos b/deepseek-r1-1.5b-ternary/model_layers_3_mlp_up_proj_weight.pos new file mode 100644 index 0000000000000000000000000000000000000000..8093e851ee6c70afb90e2dcda051915ca2435481 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_3_mlp_up_proj_weight.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0af187d3d7cabb7d40532bd1b1e3a39f4df40bde94282c639ec764ef8c232489 +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_4_mlp_down_proj_weight.pos b/deepseek-r1-1.5b-ternary/model_layers_4_mlp_down_proj_weight.pos new file mode 100644 index 0000000000000000000000000000000000000000..3968cf6edb49e6760aab895f3ccbaca35b7c372b --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_4_mlp_down_proj_weight.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a04cd57597475d07339c7967ce3777bdf6d28cb41408539c88a4a5353db8380 +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_4_mlp_up_proj_weight.pos b/deepseek-r1-1.5b-ternary/model_layers_4_mlp_up_proj_weight.pos new file mode 100644 index 0000000000000000000000000000000000000000..b0d1c79f21b8847fc06593176e1d2e88dda16d23 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_4_mlp_up_proj_weight.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b072bf0ed483cdd4de445d55a5155e026785176234858caad086ed6914dbc1b +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_4_self_attn_o_proj_weight.neg b/deepseek-r1-1.5b-ternary/model_layers_4_self_attn_o_proj_weight.neg new file mode 100644 index 0000000000000000000000000000000000000000..51633d184a1f01754a61ee356d0cd64992149f9c --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_4_self_attn_o_proj_weight.neg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba22d2a6324dc7be784edd4247df14d6c143f5026b8b110f08a013351bce8be5 +size 294912 diff --git a/deepseek-r1-1.5b-ternary/model_layers_5_mlp_up_proj_weight.pos b/deepseek-r1-1.5b-ternary/model_layers_5_mlp_up_proj_weight.pos new file mode 100644 index 0000000000000000000000000000000000000000..c7acb644811a2ce1382f5cb644a689c7cf2946a2 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_5_mlp_up_proj_weight.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:478cbfe9e6a0b41164a6ab7b3809c2120203cbc8adb96ae084a7b6de49038e04 +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_5_self_attn_o_proj_weight.pos b/deepseek-r1-1.5b-ternary/model_layers_5_self_attn_o_proj_weight.pos new file mode 100644 index 0000000000000000000000000000000000000000..aa0aae663facab0168a063970ac3b438d7262cd5 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_5_self_attn_o_proj_weight.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc0f5faf3513cd391086d7c1337684e1818b4af5efae1f5345704b70da3df607 +size 294912 diff --git a/deepseek-r1-1.5b-ternary/model_layers_5_self_attn_q_proj_weight.pos b/deepseek-r1-1.5b-ternary/model_layers_5_self_attn_q_proj_weight.pos new file mode 100644 index 0000000000000000000000000000000000000000..ed0ab3ed0fca754b4f3b43f2061e4b438a5fa064 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_5_self_attn_q_proj_weight.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:005c37f33205eacaf398a7cf62c315ce0cd576abc4fe7f0bfe0802eebe23e178 +size 294912 diff --git a/deepseek-r1-1.5b-ternary/model_layers_6_mlp_up_proj_weight.neg b/deepseek-r1-1.5b-ternary/model_layers_6_mlp_up_proj_weight.neg new file mode 100644 index 0000000000000000000000000000000000000000..2970a228b6191de09002e4d49db3b689962f32df --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_6_mlp_up_proj_weight.neg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4a9b0f7f109076bd1a9293648dddb2c355980efaae72954bee4dff0babf1ba6 +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_7_mlp_down_proj_weight.neg b/deepseek-r1-1.5b-ternary/model_layers_7_mlp_down_proj_weight.neg new file mode 100644 index 0000000000000000000000000000000000000000..0d27d99d05f85dfb9b1211eb6b6b04cdd220408a --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_7_mlp_down_proj_weight.neg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5ffd2c5dd95f95a2178f3bd7698e954681356d8aa8518213a65b30333c1ab01 +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_7_self_attn_o_proj_weight.neg b/deepseek-r1-1.5b-ternary/model_layers_7_self_attn_o_proj_weight.neg new file mode 100644 index 0000000000000000000000000000000000000000..903d756cbc6eb9640c1cbbeed636d715d57de751 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_7_self_attn_o_proj_weight.neg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f85a1678addaff23f2e763505b855d2e1605c5fac8828d0e8497e0720077b0d +size 294912 diff --git a/deepseek-r1-1.5b-ternary/model_layers_8_mlp_down_proj_weight.pos b/deepseek-r1-1.5b-ternary/model_layers_8_mlp_down_proj_weight.pos new file mode 100644 index 0000000000000000000000000000000000000000..e2a2d6457d76b352eb784b2a0b6df4539e316267 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_8_mlp_down_proj_weight.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0067e74da702cb0a0c04039ec4a8629652e1ebdcbe7e36f4854ab1bcdc7dcef5 +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_8_mlp_gate_proj_weight.neg b/deepseek-r1-1.5b-ternary/model_layers_8_mlp_gate_proj_weight.neg new file mode 100644 index 0000000000000000000000000000000000000000..d91d11b8b11fd8cefd9f4aecb61e736f7251387f --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_8_mlp_gate_proj_weight.neg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6efda9ecb72e978e9edc86c62426cc13864467dfc38052d607a96f0b94d2e64 +size 1720320