diff --git a/.gitattributes b/.gitattributes index 0730de8543c876ba0b7aa2def195d8c7b312ab08..b848baeda59422018d85e80bf99f0394cb9ebfd0 100644 --- a/.gitattributes +++ b/.gitattributes @@ -3232,3 +3232,34 @@ qwen3-4b-log-unary/model_layers_9_self_attn_k_proj_weight.planes filter=lfs diff qwen3-4b-log-unary/model_layers_2_self_attn_o_proj_weight.planes filter=lfs diff=lfs merge=lfs -text qwen3-4b-log-unary/model_layers_10_self_attn_v_proj_weight.sign filter=lfs diff=lfs merge=lfs -text qwen3-4b-log-unary/model_layers_21_self_attn_k_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_27_self_attn_o_proj_weight.pos filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_18_mlp_down_proj_weight.pos filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_18_mlp_down_proj_weight.neg filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_13_mlp_up_proj_weight.pos filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_12_mlp_down_proj_weight.pos filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_23_self_attn_q_proj_weight.neg filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_6_self_attn_o_proj_weight.neg filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_15_self_attn_q_proj_weight.pos filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_5_mlp_gate_proj_weight.pos filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_26_mlp_up_proj_weight.neg filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_12_self_attn_q_proj_weight.pos filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_3_mlp_gate_proj_weight.neg filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_21_self_attn_q_proj_weight.neg filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_7_mlp_down_proj_weight.pos filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_24_mlp_up_proj_weight.pos filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_9_self_attn_q_proj_weight.neg filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_26_mlp_down_proj_weight.neg filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_19_mlp_down_proj_weight.neg filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_20_mlp_gate_proj_weight.pos filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_13_self_attn_q_proj_weight.neg filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_1_self_attn_o_proj_weight.pos filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_8_self_attn_q_proj_weight.neg filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_25_mlp_up_proj_weight.neg filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_11_mlp_up_proj_weight.neg filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_25_mlp_gate_proj_weight.neg filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_10_self_attn_o_proj_weight.pos filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_26_self_attn_o_proj_weight.neg filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_14_mlp_down_proj_weight.neg filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_7_mlp_gate_proj_weight.pos filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_15_mlp_up_proj_weight.neg filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_14_mlp_up_proj_weight.pos filter=lfs diff=lfs merge=lfs -text diff --git a/deepseek-r1-1.5b-ternary/model_layers_10_self_attn_o_proj_weight.pos b/deepseek-r1-1.5b-ternary/model_layers_10_self_attn_o_proj_weight.pos new file mode 100644 index 0000000000000000000000000000000000000000..7c4eb74ef6941432d2fd7ea2322498896d40e60c --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_10_self_attn_o_proj_weight.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3219043486dbae59c2b67bbe335fa99c97a12363bab0b049bec80ba47e208e9 +size 294912 diff --git a/deepseek-r1-1.5b-ternary/model_layers_11_mlp_up_proj_weight.neg b/deepseek-r1-1.5b-ternary/model_layers_11_mlp_up_proj_weight.neg new file mode 100644 index 0000000000000000000000000000000000000000..c14294ece0657a4deef3750384df7bbf9dc49ce0 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_11_mlp_up_proj_weight.neg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6121e942585b457d197d6460dfdd693ce544acad8d97be9f5472c0ee09b1093 +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_12_mlp_down_proj_weight.pos b/deepseek-r1-1.5b-ternary/model_layers_12_mlp_down_proj_weight.pos new file mode 100644 index 0000000000000000000000000000000000000000..b047dd05f763bba53dd87e123024a6a0c428a705 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_12_mlp_down_proj_weight.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42007875947bdaf3867b60facdd7f9c9e64487cfd9b350ec186ef884c691ef19 +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_12_self_attn_q_proj_weight.pos b/deepseek-r1-1.5b-ternary/model_layers_12_self_attn_q_proj_weight.pos new file mode 100644 index 0000000000000000000000000000000000000000..b6ce641b5bec6bf55effd2a4514c518e3e54e748 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_12_self_attn_q_proj_weight.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:615b7807bf718aaf811ec54722315a2663ec5e8f46a519eb9c1a1207468a7480 +size 294912 diff --git a/deepseek-r1-1.5b-ternary/model_layers_13_mlp_up_proj_weight.pos b/deepseek-r1-1.5b-ternary/model_layers_13_mlp_up_proj_weight.pos new file mode 100644 index 0000000000000000000000000000000000000000..4d53927f1eddbe19542cb947783d7f78014ab106 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_13_mlp_up_proj_weight.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ede9305151ecac8e169f64dcd10e903d4b1f542c0ab952a9bdb4427605378bdf +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_13_self_attn_q_proj_weight.neg b/deepseek-r1-1.5b-ternary/model_layers_13_self_attn_q_proj_weight.neg new file mode 100644 index 0000000000000000000000000000000000000000..4a92ce307b2f230b1adb40db990e39325ad0ea72 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_13_self_attn_q_proj_weight.neg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1db74416504f8dbe868293124fbae567b0de09f58b80b1382ea253d4fee2fb76 +size 294912 diff --git a/deepseek-r1-1.5b-ternary/model_layers_14_mlp_down_proj_weight.neg b/deepseek-r1-1.5b-ternary/model_layers_14_mlp_down_proj_weight.neg new file mode 100644 index 0000000000000000000000000000000000000000..aada9cd28848bfd23a7891488c47d5af21f54ba9 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_14_mlp_down_proj_weight.neg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f2e650fe8017b2925e9d78fec93b8829e4e0ef6916fee26246754219fbc7eb3 +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_14_mlp_up_proj_weight.pos b/deepseek-r1-1.5b-ternary/model_layers_14_mlp_up_proj_weight.pos new file mode 100644 index 0000000000000000000000000000000000000000..5fdba03e68757cece3e6a4edf6f295e6adfca5d5 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_14_mlp_up_proj_weight.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d470eb097a253bb241760be942b2b0fcac8933768be4b3fad19d03854c7633f8 +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_15_mlp_up_proj_weight.neg b/deepseek-r1-1.5b-ternary/model_layers_15_mlp_up_proj_weight.neg new file mode 100644 index 0000000000000000000000000000000000000000..3de5e3eb8243c1b052f8a63c340c666632589a3c --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_15_mlp_up_proj_weight.neg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:630ef541ef172c024bd1ee9f3bcb2db91c6c3c1b7ac24a4ee4db5dc601d21d1d +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_15_self_attn_q_proj_weight.pos b/deepseek-r1-1.5b-ternary/model_layers_15_self_attn_q_proj_weight.pos new file mode 100644 index 0000000000000000000000000000000000000000..7365c39ff0507cdb98d33393f1e28223061046f3 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_15_self_attn_q_proj_weight.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fe3a3f00f81c650926f634fcd5374deb35a307a911d7bb4fbb4f190b7b383ce +size 294912 diff --git a/deepseek-r1-1.5b-ternary/model_layers_18_mlp_down_proj_weight.neg b/deepseek-r1-1.5b-ternary/model_layers_18_mlp_down_proj_weight.neg new file mode 100644 index 0000000000000000000000000000000000000000..2f225d43ca20ac6a4b9c6f8920e9dd95b1e4bd59 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_18_mlp_down_proj_weight.neg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f275de47fd65df82147056e373b968d0bd11d3879cea5844939eeace1f7d77c0 +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_18_mlp_down_proj_weight.pos b/deepseek-r1-1.5b-ternary/model_layers_18_mlp_down_proj_weight.pos new file mode 100644 index 0000000000000000000000000000000000000000..901d4a0bb883d12d1f7e0c9639ebe941f2bb70af --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_18_mlp_down_proj_weight.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c81c38d4c401106e3f10f95cdda545b4d3307b386f868d8d59fb9798cecbd00 +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_19_mlp_down_proj_weight.neg b/deepseek-r1-1.5b-ternary/model_layers_19_mlp_down_proj_weight.neg new file mode 100644 index 0000000000000000000000000000000000000000..ecb093216e047409804f5ac191d0544b6cf59472 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_19_mlp_down_proj_weight.neg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eaebe5842363c5354a5a2afb3cd6fa02931fc402c25271ced949305ee0b80f4f +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_1_self_attn_o_proj_weight.pos b/deepseek-r1-1.5b-ternary/model_layers_1_self_attn_o_proj_weight.pos new file mode 100644 index 0000000000000000000000000000000000000000..df920ed6e59a7a235613059511139bf86f035037 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_1_self_attn_o_proj_weight.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a815e3e856c8cafe8078d40d9062d83408e1b388a218e8007d4d6f54265802e9 +size 294912 diff --git a/deepseek-r1-1.5b-ternary/model_layers_20_mlp_gate_proj_weight.pos b/deepseek-r1-1.5b-ternary/model_layers_20_mlp_gate_proj_weight.pos new file mode 100644 index 0000000000000000000000000000000000000000..10e81b37543eb9487e7d68cecf2dc987e01d8bec --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_20_mlp_gate_proj_weight.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bba38a2db9baffbeb85ff35bf8607a6abaf1274049e6e3f5a8ca40c56eec2623 +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_21_self_attn_q_proj_weight.neg b/deepseek-r1-1.5b-ternary/model_layers_21_self_attn_q_proj_weight.neg new file mode 100644 index 0000000000000000000000000000000000000000..8ef87adc7ffdf61306f38d21dd58507b0efb31a0 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_21_self_attn_q_proj_weight.neg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a040d0fe2e4f47e68897c0eadbf13d265faeb82f24e2d7a125b7b4239e13c3cb +size 294912 diff --git a/deepseek-r1-1.5b-ternary/model_layers_23_self_attn_q_proj_weight.neg b/deepseek-r1-1.5b-ternary/model_layers_23_self_attn_q_proj_weight.neg new file mode 100644 index 0000000000000000000000000000000000000000..feeccf2c01da349cf26a6f538cdfcd1dfc8f78ed --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_23_self_attn_q_proj_weight.neg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1726b282393f54777e7708786ac284cf0de8fc4afa1d5d85ebe7dd71528bfea3 +size 294912 diff --git a/deepseek-r1-1.5b-ternary/model_layers_24_mlp_up_proj_weight.pos b/deepseek-r1-1.5b-ternary/model_layers_24_mlp_up_proj_weight.pos new file mode 100644 index 0000000000000000000000000000000000000000..51fc1c9e37176dab304cddcf11478f87961c0b81 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_24_mlp_up_proj_weight.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7a37fee03c94e1ccf22e5068df68bab2c5bb8b7fcbcbba8250ca316dd45cbf9 +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_25_mlp_gate_proj_weight.neg b/deepseek-r1-1.5b-ternary/model_layers_25_mlp_gate_proj_weight.neg new file mode 100644 index 0000000000000000000000000000000000000000..5bbfb3639e913b07ae04ac372608f6d3845f877c --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_25_mlp_gate_proj_weight.neg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17b08f276defa535f96f1125f34fecd554964f9e235de989e0f995b819cf5b64 +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_25_mlp_up_proj_weight.neg b/deepseek-r1-1.5b-ternary/model_layers_25_mlp_up_proj_weight.neg new file mode 100644 index 0000000000000000000000000000000000000000..d91d9d766df14369c156aab58bfb6d6b8a6d522f --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_25_mlp_up_proj_weight.neg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdaaab57de6df8e9b2ee666a175a62bd9c44610a5eed7cd2cda159abe12c87d7 +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_26_mlp_down_proj_weight.neg b/deepseek-r1-1.5b-ternary/model_layers_26_mlp_down_proj_weight.neg new file mode 100644 index 0000000000000000000000000000000000000000..38e1f2c2bf75c3939114987038acb19c698161f9 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_26_mlp_down_proj_weight.neg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1895b4a47d16357d1ef606e5907e17617e9b09ca6f32cff60e0881fb6765b10 +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_26_mlp_up_proj_weight.neg b/deepseek-r1-1.5b-ternary/model_layers_26_mlp_up_proj_weight.neg new file mode 100644 index 0000000000000000000000000000000000000000..ae366c8993339ba4d708ea71ee6d03cf2079da53 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_26_mlp_up_proj_weight.neg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90d517473f2ddd0936aca1f6ee1191d1543db8405ca3ac45919c09919166ebeb +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_26_self_attn_o_proj_weight.neg b/deepseek-r1-1.5b-ternary/model_layers_26_self_attn_o_proj_weight.neg new file mode 100644 index 0000000000000000000000000000000000000000..e3a3562d4b832b662ae330343a767b05e99296f4 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_26_self_attn_o_proj_weight.neg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d299eaa6b418388971671d706aab8ab90cb30151356cb9981fc6d3cba6c93e8d +size 294912 diff --git a/deepseek-r1-1.5b-ternary/model_layers_27_self_attn_o_proj_weight.pos b/deepseek-r1-1.5b-ternary/model_layers_27_self_attn_o_proj_weight.pos new file mode 100644 index 0000000000000000000000000000000000000000..156ab03d16abd83cfee091dcfce629b5630df1f1 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_27_self_attn_o_proj_weight.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:716bd70955ee24bbe346691641068d528df6c57e0f5398cc4f2b49fc31622646 +size 294912 diff --git a/deepseek-r1-1.5b-ternary/model_layers_3_mlp_gate_proj_weight.neg b/deepseek-r1-1.5b-ternary/model_layers_3_mlp_gate_proj_weight.neg new file mode 100644 index 0000000000000000000000000000000000000000..c9e12bd9d02008c9861bb20d06b8621d38fc8972 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_3_mlp_gate_proj_weight.neg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c874147270d86a22c92080a5cd466481ee44b40e007b03e179329f4658d85eb6 +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_5_mlp_gate_proj_weight.pos b/deepseek-r1-1.5b-ternary/model_layers_5_mlp_gate_proj_weight.pos new file mode 100644 index 0000000000000000000000000000000000000000..9ba3eca07f3276f6d530eef8121f169d618e27a1 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_5_mlp_gate_proj_weight.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1188f623b15b89ccfabe6d967c5ef0136845f685e1c2d321fa34283d31c9948 +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_6_self_attn_o_proj_weight.neg b/deepseek-r1-1.5b-ternary/model_layers_6_self_attn_o_proj_weight.neg new file mode 100644 index 0000000000000000000000000000000000000000..302f2d3a591d26e722a12f42c8335d17ce4c1764 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_6_self_attn_o_proj_weight.neg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb399f3ee5642e440e6bc9fc2e950134048bbb70587b3664004ea6faa9a45f08 +size 294912 diff --git a/deepseek-r1-1.5b-ternary/model_layers_7_mlp_down_proj_weight.pos b/deepseek-r1-1.5b-ternary/model_layers_7_mlp_down_proj_weight.pos new file mode 100644 index 0000000000000000000000000000000000000000..0dccc96d391c2fed7cdac98894d01acfe00cdebc --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_7_mlp_down_proj_weight.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5609c675d2f3084b82d82ee2728f13f808713f525f6262890f297b336ef4af3e +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_7_mlp_gate_proj_weight.pos b/deepseek-r1-1.5b-ternary/model_layers_7_mlp_gate_proj_weight.pos new file mode 100644 index 0000000000000000000000000000000000000000..8f595f399dca0c246a46c83858af5de302b2ba62 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_7_mlp_gate_proj_weight.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:841011e739626de31758b4f1e01879b9c0ee4890891bb6ef51533d20fe36aff9 +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_8_self_attn_q_proj_weight.neg b/deepseek-r1-1.5b-ternary/model_layers_8_self_attn_q_proj_weight.neg new file mode 100644 index 0000000000000000000000000000000000000000..a496c222d5324944fc79416079be7e5752c0bace --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_8_self_attn_q_proj_weight.neg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9746c8acdc6101e41f4ae2de4536f616a13ea39c5412758cebdf51465fe1c369 +size 294912 diff --git a/deepseek-r1-1.5b-ternary/model_layers_9_self_attn_q_proj_weight.neg b/deepseek-r1-1.5b-ternary/model_layers_9_self_attn_q_proj_weight.neg new file mode 100644 index 0000000000000000000000000000000000000000..02bdb82c5578fb790eb2d1e7dda7a8412e498b05 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_9_self_attn_q_proj_weight.neg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54e038e4336db2e45d85c099397a1c687a830db23974883fcb48c5afb63daf52 +size 294912 diff --git a/deepseek-r1-1.5b-unary4/model_layers_10_self_attn_k_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_10_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..d905c70a96d19c29b3ccd87a3abbe7c0ee4ca05d Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_10_self_attn_k_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_11_self_attn_k_proj_bias.fp16 b/deepseek-r1-1.5b-unary4/model_layers_11_self_attn_k_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..062db56c99f1e0f7bf63d93fa5f28fe511ff6724 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_11_self_attn_k_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_11_self_attn_k_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_11_self_attn_k_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..87742a2fe0b0324b37227ec7682d74d5e4a099bd Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_11_self_attn_k_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_11_self_attn_q_proj_bias.fp16 b/deepseek-r1-1.5b-unary4/model_layers_11_self_attn_q_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..fc82d2ce243529e924112e93801b2c31a9313d50 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_11_self_attn_q_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_13_input_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary4/model_layers_13_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..40b686b83607f6f85048ff11f9cbd63cf1b152cc Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_13_input_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_13_self_attn_k_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_13_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..b9efd30638712632c06d558c0021f908a67b781d Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_13_self_attn_k_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_14_self_attn_q_proj_bias.fp16 b/deepseek-r1-1.5b-unary4/model_layers_14_self_attn_q_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..e5876032eaa1d29719c88f2b615f066d349e86c8 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_14_self_attn_q_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_15_mlp_gate_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_15_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..0dd449924833cb0e97881467455293bdc9f18947 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_15_mlp_gate_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_16_mlp_down_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_16_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..51c269b227a055320052a795f0302fae60c05577 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_16_mlp_down_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_16_self_attn_v_proj_bias.fp16 b/deepseek-r1-1.5b-unary4/model_layers_16_self_attn_v_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..c4a9452345d31cf2acb2cb43c8fb5bca0b8cda38 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_16_self_attn_v_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_17_input_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary4/model_layers_17_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..f65011ae1846ecb6b63f5a65c0ddbbfc830647e7 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_17_input_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_17_self_attn_q_proj_bias.fp16 b/deepseek-r1-1.5b-unary4/model_layers_17_self_attn_q_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..879dbcd56affcfbd20af9b3450a827e8ac23ac7d Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_17_self_attn_q_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_17_self_attn_v_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_17_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..daf019fc1b165bc90b46edda11b8a4d32ca57ed4 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_17_self_attn_v_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_17_self_attn_v_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_17_self_attn_v_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..8c11b3e37584b9f60aa081bfab76f150119307ad Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_17_self_attn_v_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_18_input_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary4/model_layers_18_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..2ff9fc8328968f5077b4ed41a2b51418c2497e1c Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_18_input_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_18_mlp_up_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_18_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..d9ddb4a3209540873b3521a477abe12191a64c56 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_18_mlp_up_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_1_self_attn_q_proj_bias.fp16 b/deepseek-r1-1.5b-unary4/model_layers_1_self_attn_q_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..4a3cb661de572e92ca2eabe5520d09b7c23ea1ec Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_1_self_attn_q_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_20_self_attn_q_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_20_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..0bab0171e436222839a887fddae7f86aadeec2c7 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_20_self_attn_q_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_21_mlp_up_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_21_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..63ca14253b5a8c44e50964e27ebb1a20a14fadd7 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_21_mlp_up_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_21_post_attention_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary4/model_layers_21_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..ec4f1d9e6e111004533d729fba9ea0804f85e64f Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_21_post_attention_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_22_mlp_gate_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_22_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..39b54d034f1d366e4a276d72bef601f089936fdb Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_22_mlp_gate_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_22_self_attn_k_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_22_self_attn_k_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..51103b9cea5888331cf394ba8948f1014a260a27 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_22_self_attn_k_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_22_self_attn_v_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_22_self_attn_v_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..a6bf2c514bd4821cc4ebdd3e4771816fe0cba1ad Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_22_self_attn_v_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_23_input_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary4/model_layers_23_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..e58a9cd071083fb0d971e7be3858bccd1e181435 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_23_input_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_24_self_attn_k_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_24_self_attn_k_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..c42e816d956112f842229ee1c8bc6cf6e6e5273f Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_24_self_attn_k_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_24_self_attn_v_proj_bias.fp16 b/deepseek-r1-1.5b-unary4/model_layers_24_self_attn_v_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..eb6638a94a285d4a1475833b26c02f53813f38b8 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_24_self_attn_v_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_26_mlp_gate_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_26_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..b2cebd507f77f82b0e250ca262e52c7edc415c29 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_26_mlp_gate_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_27_self_attn_q_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_27_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..9eefacb90ea4163998301d95dfa346dc3f80483d Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_27_self_attn_q_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_2_mlp_down_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_2_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..c7f7dc7857eab292a0f751695af0a26c0ad9d15a Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_2_mlp_down_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_2_mlp_up_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_2_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..0b177a802d200b4e03f55ae744af7b1387590f21 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_2_mlp_up_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_3_self_attn_v_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_3_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..f539f21b24dccaf70ef816ef1f6ed12c92890a36 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_3_self_attn_v_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_4_post_attention_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary4/model_layers_4_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..3a03ea4fd346e87a8d6485b675c1b00490291f03 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_4_post_attention_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_4_self_attn_q_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_4_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..32727c09c4182eaf65f1aecc21dd60379271e680 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_4_self_attn_q_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_5_self_attn_k_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_5_self_attn_k_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..384e5550886585bdd202059a7247c072f880b9bd Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_5_self_attn_k_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_5_self_attn_o_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_5_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..99bc57ff6909824367c8f9c23a52ab49ba43bc9f Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_5_self_attn_o_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_5_self_attn_v_proj_bias.fp16 b/deepseek-r1-1.5b-unary4/model_layers_5_self_attn_v_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..3e83b22c90d1a1d8d811f6d3775f594a5413fa17 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_5_self_attn_v_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_6_mlp_gate_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_6_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..4d7d37acff808911ab6a4711f4ca04ef7750f7e3 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_6_mlp_gate_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_6_post_attention_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary4/model_layers_6_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..f6158498a312a5b3a2e8841c68ab65ff5807e776 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_6_post_attention_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_6_self_attn_v_proj_bias.fp16 b/deepseek-r1-1.5b-unary4/model_layers_6_self_attn_v_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..db6c4087a62ab997ea93da9955d9737ac2c25eb4 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_6_self_attn_v_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_7_self_attn_q_proj_bias.fp16 b/deepseek-r1-1.5b-unary4/model_layers_7_self_attn_q_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..576a52e8dd0a1e8d80a1cf150cbb1e8cdce0e7e1 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_7_self_attn_q_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_8_mlp_gate_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_8_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..7b91e716d2d80792a3728ce561b577fe645f7d96 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_8_mlp_gate_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_8_self_attn_q_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_8_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..aa212e434c44b3ef8b26dd0712636133490962b4 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_8_self_attn_q_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_9_mlp_up_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_9_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..2f3e61f7ed055aa428d7ec4632011ec41baa2a9f Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_9_mlp_up_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_9_self_attn_q_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_9_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..12ee0005328aa1373c1de0a0cf994582824e1bcb Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_9_self_attn_q_proj_weight.scales differ