diff --git a/.gitattributes b/.gitattributes index a67e0194e795e2df5c96935065365193c47aff59..751faafc01b91b36672c8cfd22955661deaddfe0 100644 --- a/.gitattributes +++ b/.gitattributes @@ -3528,3 +3528,4 @@ qwen3-4b-log-unary/model_layers_11_self_attn_o_proj_weight.planes filter=lfs dif qwen3-4b-log-unary/model_layers_34_mlp_up_proj_weight.sign filter=lfs diff=lfs merge=lfs -text deepseek-r1-1.5b-packed/model_layers_18_mlp_up_proj_weight.mags filter=lfs diff=lfs merge=lfs -text qwen3-4b-log-unary/model_layers_5_self_attn_o_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_24_mlp_gate_proj_weight.neg filter=lfs diff=lfs merge=lfs -text diff --git a/deepseek-r1-1.5b-gunary/model_layers_12_self_attn_k_proj_weight.gscales b/deepseek-r1-1.5b-gunary/model_layers_12_self_attn_k_proj_weight.gscales new file mode 100644 index 0000000000000000000000000000000000000000..42afc2c542fb85d733210571142d849e30764516 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_12_self_attn_k_proj_weight.gscales differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_12_self_attn_k_proj_weight.sign b/deepseek-r1-1.5b-gunary/model_layers_12_self_attn_k_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..5ddb67908a3e1b5a8fab7a2e821812244d7471c2 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_12_self_attn_k_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_18_self_attn_v_proj_weight.sign b/deepseek-r1-1.5b-gunary/model_layers_18_self_attn_v_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..184cdd91769cea30eb5effe47e6a3f294d339aff Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_18_self_attn_v_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_2_post_attention_layernorm_weight.fp16 b/deepseek-r1-1.5b-gunary/model_layers_2_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..2748fa2d55b1db412a89fe2d6dd1b34e4b2a9908 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_2_post_attention_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_4_self_attn_q_proj_bias.fp16 b/deepseek-r1-1.5b-gunary/model_layers_4_self_attn_q_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..7c97d9dd0ea718ded20befe3f46a8222a3a4ae71 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_4_self_attn_q_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_8_self_attn_q_proj_bias.fp16 b/deepseek-r1-1.5b-gunary/model_layers_8_self_attn_q_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..773d9f4cd8132578aea15925ad5cf523650e2337 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_8_self_attn_q_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-ternary/model_layers_24_mlp_gate_proj_weight.neg b/deepseek-r1-1.5b-ternary/model_layers_24_mlp_gate_proj_weight.neg new file mode 100644 index 0000000000000000000000000000000000000000..d88c146554927db5103409159cfee207712e8985 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_24_mlp_gate_proj_weight.neg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd740d654b330f3a920457211bf4f825d798e43b5d1cf584a6492b8f464a6388 +size 1720320 diff --git a/deepseek-r1-1.5b-unary31/model_layers_10_self_attn_q_proj_weight.scales b/deepseek-r1-1.5b-unary31/model_layers_10_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..6c092067f3012e8106f156918e4781b6526bd5dc Binary files /dev/null and b/deepseek-r1-1.5b-unary31/model_layers_10_self_attn_q_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary31/model_layers_2_self_attn_k_proj_weight.scales b/deepseek-r1-1.5b-unary31/model_layers_2_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..47ad0b90191114648f2abfdb3c36b6d5ff258c15 Binary files /dev/null and b/deepseek-r1-1.5b-unary31/model_layers_2_self_attn_k_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary31/model_layers_2_self_attn_v_proj_weight.sign b/deepseek-r1-1.5b-unary31/model_layers_2_self_attn_v_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..54e10b0a1996c9562c5bca8bc222428cc18cb1b3 Binary files /dev/null and b/deepseek-r1-1.5b-unary31/model_layers_2_self_attn_v_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-unary31/model_layers_3_self_attn_k_proj_weight.scales b/deepseek-r1-1.5b-unary31/model_layers_3_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..dcbae3d66e1d4829433d93c4bc97d7db93a6b241 Binary files /dev/null and b/deepseek-r1-1.5b-unary31/model_layers_3_self_attn_k_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary31/model_layers_3_self_attn_o_proj_weight.scales b/deepseek-r1-1.5b-unary31/model_layers_3_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..21ddc24e9014b1b98b26b3fdb74cfa1cb93d4e57 Binary files /dev/null and b/deepseek-r1-1.5b-unary31/model_layers_3_self_attn_o_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary31/model_layers_3_self_attn_v_proj_weight.sign b/deepseek-r1-1.5b-unary31/model_layers_3_self_attn_v_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..f6412c8c456dc1a8c48a49f9e951416f24149c1d Binary files /dev/null and b/deepseek-r1-1.5b-unary31/model_layers_3_self_attn_v_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-unary31/model_layers_4_self_attn_v_proj_weight.scales b/deepseek-r1-1.5b-unary31/model_layers_4_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..5efd93a8505b5e7c1631887a0e81d7873223b3e5 Binary files /dev/null and b/deepseek-r1-1.5b-unary31/model_layers_4_self_attn_v_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary31/model_layers_5_self_attn_v_proj_weight.scales b/deepseek-r1-1.5b-unary31/model_layers_5_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..deae47566054bd6b486741891abdb6cb80dca5b6 Binary files /dev/null and b/deepseek-r1-1.5b-unary31/model_layers_5_self_attn_v_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary31/model_layers_7_mlp_down_proj_weight.scales b/deepseek-r1-1.5b-unary31/model_layers_7_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..ba1203607869d932e1afa770ab53d62254e10849 Binary files /dev/null and b/deepseek-r1-1.5b-unary31/model_layers_7_mlp_down_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary31/model_layers_7_self_attn_k_proj_weight.sign b/deepseek-r1-1.5b-unary31/model_layers_7_self_attn_k_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..b75512f1b5c81055d03cccc3681c031d9d14ac34 Binary files /dev/null and b/deepseek-r1-1.5b-unary31/model_layers_7_self_attn_k_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-unary31/model_layers_7_self_attn_v_proj_weight.scales b/deepseek-r1-1.5b-unary31/model_layers_7_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..2a20d0281d23198acabce5a6a1e3794389a41b0d Binary files /dev/null and b/deepseek-r1-1.5b-unary31/model_layers_7_self_attn_v_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary31/model_layers_9_mlp_gate_proj_weight.scales b/deepseek-r1-1.5b-unary31/model_layers_9_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..e7dbb1f83f68123dfa06683199bb80ed01bbf494 Binary files /dev/null and b/deepseek-r1-1.5b-unary31/model_layers_9_mlp_gate_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/config.json b/deepseek-r1-1.5b-unary4/config.json new file mode 100644 index 0000000000000000000000000000000000000000..efab9d9e451a33aa92ed12929160ab7c22d3081b --- /dev/null +++ b/deepseek-r1-1.5b-unary4/config.json @@ -0,0 +1,13 @@ +{ + "hidden_size": 1536, + "intermediate_size": 8960, + "num_attention_heads": 12, + "num_key_value_heads": 2, + "num_hidden_layers": 28, + "vocab_size": 151936, + "head_dim": 128, + "rope_theta": 1000000.0, + "rms_norm_eps": 1e-06, + "n_planes": 4, + "quant_type": "unary" +} \ No newline at end of file diff --git a/deepseek-r1-1.5b-unary4/model_layers_0_self_attn_q_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_0_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..77bc0cd0a1c0a7914539e4078368c9dd2a8f523e Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_0_self_attn_q_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_10_mlp_up_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_10_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..202da7f7c1bb2fb58b9cb1c2033d5033632cdab5 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_10_mlp_up_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_10_self_attn_q_proj_bias.fp16 b/deepseek-r1-1.5b-unary4/model_layers_10_self_attn_q_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..37c91a27fa24debad38b68b4937718c96e4bfad4 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_10_self_attn_q_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_12_self_attn_k_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_12_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..0c19f75d180454ad933a8eab241a280058abd582 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_12_self_attn_k_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_12_self_attn_o_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_12_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..f2e7132b28d31ac29918deb2363c7417e50f215f Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_12_self_attn_o_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_14_self_attn_v_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_14_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..9da89618ed771fa669605de33535f3816ec7dcf3 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_14_self_attn_v_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_16_mlp_gate_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_16_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..997ebb01485f09a08d0fd78cdd6fc75244703bde Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_16_mlp_gate_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_16_self_attn_q_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_16_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..f601f2e1864f19a6d7ddfc795020788ab3818902 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_16_self_attn_q_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_17_post_attention_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary4/model_layers_17_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..00f889b99c2652a77daf31ccc53d9ced249c3ba3 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_17_post_attention_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_17_self_attn_k_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_17_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..355487ecbcaf644f2d869f4ae3ab685bf60edaf4 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_17_self_attn_k_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_18_self_attn_o_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_18_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..8d2d3fc9d5d469020c465d7553edaf3e95fe74ec Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_18_self_attn_o_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_19_self_attn_k_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_19_self_attn_k_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..56f5b49ac5d1a30f808692813610c02ccef558b2 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_19_self_attn_k_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_1_mlp_down_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_1_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..1f9cca2118f3e4b4a45572a2d8c7cd28a419d79a Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_1_mlp_down_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_1_self_attn_k_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_1_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..f5de4f28cb8842372c5cebd3507c771046f0effd Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_1_self_attn_k_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_1_self_attn_v_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_1_self_attn_v_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..f1e1175627aaadb9325d72b57300b82b25c3043f Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_1_self_attn_v_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_20_input_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary4/model_layers_20_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..8a042895077821880f716b03d5f483d8669cad26 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_20_input_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_21_self_attn_q_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_21_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..13380dd77e67f8820917ed3d0786c488271419cb Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_21_self_attn_q_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_22_mlp_down_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_22_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..9a873db319b7fe8e0a36ae6b08fa51af7b551406 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_22_mlp_down_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_23_mlp_gate_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_23_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..53d0869d0c112037abb21061383f3dda3df9c92d Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_23_mlp_gate_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_23_self_attn_o_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_23_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..a24b609981f171e55285999f43dafb0495932f0a Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_23_self_attn_o_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_25_self_attn_q_proj_bias.fp16 b/deepseek-r1-1.5b-unary4/model_layers_25_self_attn_q_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..410431ac742d3ccc6fd4615eb565773e2aa1f755 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_25_self_attn_q_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_26_self_attn_v_proj_bias.fp16 b/deepseek-r1-1.5b-unary4/model_layers_26_self_attn_v_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..989a0bba49e703f8f30ef4a088a69eccd1f5872a Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_26_self_attn_v_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_27_self_attn_v_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_27_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..994220e221473aecf342ddb7beb4e4371e48c55f Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_27_self_attn_v_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_3_self_attn_q_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_3_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..bf714dc3ac081bbd4e49b21d0502e5b7b93b11f9 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_3_self_attn_q_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_5_self_attn_k_proj_bias.fp16 b/deepseek-r1-1.5b-unary4/model_layers_5_self_attn_k_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..770c264cb5354136f1f8a94488d3d68584801d04 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_5_self_attn_k_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_5_self_attn_k_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_5_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..c597261c35e22e89a2b7e0df32f38e79ec84d31e Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_5_self_attn_k_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_7_self_attn_k_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_7_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..fbbb7f5b7ce6c3d75c5c8189b41140114f16e51e Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_7_self_attn_k_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_8_self_attn_k_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_8_self_attn_k_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..ee7d9d76fb8cd89d756b6e6e59276f5a4c3f77b1 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_8_self_attn_k_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_8_self_attn_v_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_8_self_attn_v_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..66d95813be7a1cc30ac2e2ea3bef606235a0a5c9 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_8_self_attn_v_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_9_self_attn_q_proj_bias.fp16 b/deepseek-r1-1.5b-unary4/model_layers_9_self_attn_q_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..481af2f5d19a52afd43711747fe2739a46864394 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_9_self_attn_q_proj_bias.fp16 differ