diff --git a/deepseek-r1-1.5b-unary/model_layers_0_self_attn_q_proj_bias.fp16 b/deepseek-r1-1.5b-unary/model_layers_0_self_attn_q_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..2c352bc4acbcfe7d8e68b1208ac895328786874d Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_0_self_attn_q_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary/model_layers_10_self_attn_q_proj_weight.scales b/deepseek-r1-1.5b-unary/model_layers_10_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..e8f951739849562d4f2fbb27a5c0333dda8de198 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_10_self_attn_q_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary/model_layers_11_input_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary/model_layers_11_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..eacc9d59d93c4c2cb731f52dbd3be00600d8ba57 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_11_input_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary/model_layers_11_mlp_down_proj_weight.scales b/deepseek-r1-1.5b-unary/model_layers_11_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..41f9b5d273d1c2d6ceb54d356c658c27f17489af Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_11_mlp_down_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary/model_layers_11_self_attn_v_proj_weight.scales b/deepseek-r1-1.5b-unary/model_layers_11_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..e178e4dea14f8cbc2158e78e9453ffbe74924c6a Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_11_self_attn_v_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary/model_layers_12_mlp_up_proj_weight.scales b/deepseek-r1-1.5b-unary/model_layers_12_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..71e678c8408277f6354d2def303a82d7a8668cb2 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_12_mlp_up_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary/model_layers_13_post_attention_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary/model_layers_13_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..408a4fe4330502360320954c9e4c94350380eafe Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_13_post_attention_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary/model_layers_13_self_attn_o_proj_weight.scales b/deepseek-r1-1.5b-unary/model_layers_13_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..1e31d78b3bad6f908d241f9fb1e85e3219d36310 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_13_self_attn_o_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary/model_layers_13_self_attn_q_proj_bias.fp16 b/deepseek-r1-1.5b-unary/model_layers_13_self_attn_q_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..3037fc4357b76a73522638553ed5df0a87ece153 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_13_self_attn_q_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary/model_layers_13_self_attn_v_proj_bias.fp16 b/deepseek-r1-1.5b-unary/model_layers_13_self_attn_v_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..a3117acfad675e99118bcbf2cc87a0db9725a61b Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_13_self_attn_v_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary/model_layers_14_post_attention_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary/model_layers_14_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..90cf66bf7f23dbce533a9156464500f3ce2edc41 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_14_post_attention_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary/model_layers_14_self_attn_v_proj_weight.sign b/deepseek-r1-1.5b-unary/model_layers_14_self_attn_v_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..ddd17bd040c43fb89bc3225fddacecf6a4194e9c Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_14_self_attn_v_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-unary/model_layers_15_post_attention_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary/model_layers_15_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..6a3082c006e0687aaff28354dbf020b4d36b5cde Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_15_post_attention_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary/model_layers_15_self_attn_q_proj_weight.scales b/deepseek-r1-1.5b-unary/model_layers_15_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..9319de5540d0184a233728259d48981117c00b6b Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_15_self_attn_q_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary/model_layers_15_self_attn_v_proj_bias.fp16 b/deepseek-r1-1.5b-unary/model_layers_15_self_attn_v_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..bedd656cb61e1b16ec719cd5cefd43ad5eea50d2 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_15_self_attn_v_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary/model_layers_15_self_attn_v_proj_weight.scales b/deepseek-r1-1.5b-unary/model_layers_15_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..c8cbb0662fcce6048efb4bee412893018c075bfd Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_15_self_attn_v_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary/model_layers_16_self_attn_o_proj_weight.scales b/deepseek-r1-1.5b-unary/model_layers_16_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..548b0e9d38f20d99d5ced9d83853d0196af54777 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_16_self_attn_o_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary/model_layers_17_mlp_gate_proj_weight.scales b/deepseek-r1-1.5b-unary/model_layers_17_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..bfcbab7ba64ced75e0ff5024d5143ae5ad230774 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_17_mlp_gate_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary/model_layers_18_post_attention_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary/model_layers_18_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..ed1a9c348a0afdb0628a14416dd97e4f101469b7 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_18_post_attention_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary/model_layers_19_mlp_down_proj_weight.scales b/deepseek-r1-1.5b-unary/model_layers_19_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..ccc5b5c9a54c602105f624d8752647d1b4afe0c0 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_19_mlp_down_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary/model_layers_19_self_attn_k_proj_bias.fp16 b/deepseek-r1-1.5b-unary/model_layers_19_self_attn_k_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..e9bfa94937c80e227e43b2591120196b57471c90 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_19_self_attn_k_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary/model_layers_19_self_attn_q_proj_bias.fp16 b/deepseek-r1-1.5b-unary/model_layers_19_self_attn_q_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..0385ee4a6ec2b0e55b92dbd7e5ac8995d8a1d4b4 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_19_self_attn_q_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary/model_layers_19_self_attn_v_proj_bias.fp16 b/deepseek-r1-1.5b-unary/model_layers_19_self_attn_v_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..3a20a9baba7e7b5f11f0c545db427ef58d85c276 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_19_self_attn_v_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary/model_layers_19_self_attn_v_proj_weight.scales b/deepseek-r1-1.5b-unary/model_layers_19_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..ec9921ab603b168cf8ce262260d0fe6c6bab2787 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_19_self_attn_v_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary/model_layers_19_self_attn_v_proj_weight.sign b/deepseek-r1-1.5b-unary/model_layers_19_self_attn_v_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..8679c894b553ce64de5b70a825a0a873fa8e9ca2 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_19_self_attn_v_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-unary/model_layers_1_input_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary/model_layers_1_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..58cfcc62ae22017c2355f5abacc4896ccebb9722 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_1_input_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary/model_layers_1_post_attention_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary/model_layers_1_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..2d66603de162864edd6c64e2fa5a308470e3bd81 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_1_post_attention_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary/model_layers_1_self_attn_k_proj_bias.fp16 b/deepseek-r1-1.5b-unary/model_layers_1_self_attn_k_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..c75ebdad2866070c227bef111584780a52937315 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_1_self_attn_k_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary/model_layers_1_self_attn_v_proj_bias.fp16 b/deepseek-r1-1.5b-unary/model_layers_1_self_attn_v_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..b3c2a5838c67fa4a37e84df45a2ca5ca5edf8c05 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_1_self_attn_v_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary/model_layers_20_mlp_gate_proj_weight.scales b/deepseek-r1-1.5b-unary/model_layers_20_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..81aaba296ad7c841e53d65df5e15458ed2313ae7 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_20_mlp_gate_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary/model_layers_20_mlp_up_proj_weight.scales b/deepseek-r1-1.5b-unary/model_layers_20_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..0a6d6e2b784b2d33898c9e65c145cd61d20f4fdc Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_20_mlp_up_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary/model_layers_20_self_attn_o_proj_weight.scales b/deepseek-r1-1.5b-unary/model_layers_20_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..4fc29fa38d779bbae8e206934f8ba89d47b83486 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_20_self_attn_o_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary/model_layers_20_self_attn_v_proj_bias.fp16 b/deepseek-r1-1.5b-unary/model_layers_20_self_attn_v_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..e1a165cb2b8233c01da78474d8e7d4efaca539a8 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_20_self_attn_v_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary/model_layers_20_self_attn_v_proj_weight.scales b/deepseek-r1-1.5b-unary/model_layers_20_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..bc4db5c3a83292cf690fcdd0723c9b76763707e2 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_20_self_attn_v_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary/model_layers_20_self_attn_v_proj_weight.sign b/deepseek-r1-1.5b-unary/model_layers_20_self_attn_v_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..7c7099ee79fcef9517f82a2ba04b1dbce25e2c35 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_20_self_attn_v_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-unary/model_layers_21_input_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary/model_layers_21_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..d5442af4c06c3d73058f742d5dcc76b443362488 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_21_input_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary/model_layers_21_self_attn_k_proj_weight.scales b/deepseek-r1-1.5b-unary/model_layers_21_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..e1c39bb6c95dce200a86359bf369be6a4a716c95 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_21_self_attn_k_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary/model_layers_21_self_attn_o_proj_weight.scales b/deepseek-r1-1.5b-unary/model_layers_21_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..95c9f9b4423843e5e0fa6cae0d263ffb747471a5 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_21_self_attn_o_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary/model_layers_21_self_attn_q_proj_bias.fp16 b/deepseek-r1-1.5b-unary/model_layers_21_self_attn_q_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..52070528d3cccbe104ed1b0c462add5e96f45f7f Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_21_self_attn_q_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary/model_layers_21_self_attn_v_proj_weight.sign b/deepseek-r1-1.5b-unary/model_layers_21_self_attn_v_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..81e039cac497040a37965af24aae244a5cf88db4 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_21_self_attn_v_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-unary/model_layers_22_self_attn_k_proj_weight.scales b/deepseek-r1-1.5b-unary/model_layers_22_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..677116e91b49a6f95f0d6e293a414409c46eb320 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_22_self_attn_k_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary/model_layers_23_self_attn_k_proj_weight.scales b/deepseek-r1-1.5b-unary/model_layers_23_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..ecb3e1f157d5d622d190d3e47209636fdb0da335 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_23_self_attn_k_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary/model_layers_23_self_attn_k_proj_weight.sign b/deepseek-r1-1.5b-unary/model_layers_23_self_attn_k_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..43d15ee5f7410a6e40f817bf031986f8e1d6b7a3 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_23_self_attn_k_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-unary/model_layers_23_self_attn_v_proj_weight.sign b/deepseek-r1-1.5b-unary/model_layers_23_self_attn_v_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..4ef81943afb80885f5e238cf86434993ea70d8a2 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_23_self_attn_v_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-unary/model_layers_24_input_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary/model_layers_24_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..93e70fde36a686eebd1aec80445a130a6481199c Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_24_input_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary/model_layers_24_mlp_up_proj_weight.scales b/deepseek-r1-1.5b-unary/model_layers_24_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..6226d34b57ddca78f50e8ca2f9f5652bed9dc7a1 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_24_mlp_up_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary/model_layers_24_self_attn_o_proj_weight.scales b/deepseek-r1-1.5b-unary/model_layers_24_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..046c15a55ce5ddec13995b9ab8d5521af62f8ffc Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_24_self_attn_o_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary/model_layers_24_self_attn_q_proj_weight.scales b/deepseek-r1-1.5b-unary/model_layers_24_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..7b90562f5a55c088f01ab31e2c421883eb6920bf Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_24_self_attn_q_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary/model_layers_25_mlp_gate_proj_weight.scales b/deepseek-r1-1.5b-unary/model_layers_25_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..0839104ae1d5b25c794d5e5c12b32e14f007b85d Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_25_mlp_gate_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary/model_layers_25_self_attn_v_proj_bias.fp16 b/deepseek-r1-1.5b-unary/model_layers_25_self_attn_v_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..142ca0c79cb9915517d2f204fcfc606ac025a0a0 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_25_self_attn_v_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary/model_layers_26_mlp_down_proj_weight.scales b/deepseek-r1-1.5b-unary/model_layers_26_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..2183616ef709effd4d4e1324fc40032818936bc3 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_26_mlp_down_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary/model_layers_26_self_attn_k_proj_bias.fp16 b/deepseek-r1-1.5b-unary/model_layers_26_self_attn_k_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..dd99f004d6c98e411bf9172a770ad5f195453c7e Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_26_self_attn_k_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary/model_layers_26_self_attn_q_proj_bias.fp16 b/deepseek-r1-1.5b-unary/model_layers_26_self_attn_q_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..2c70b4025d2fcb30057f94c604e21dcd3d32c7f4 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_26_self_attn_q_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary/model_layers_27_input_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary/model_layers_27_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..708e7b09ea3bdacf953c85c2016e72a4401f6a04 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_27_input_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary/model_layers_27_mlp_down_proj_weight.scales b/deepseek-r1-1.5b-unary/model_layers_27_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..1b0fd017c7c2cda198fde0abbdaaf1bf12c66bc2 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_27_mlp_down_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary/model_layers_27_mlp_gate_proj_weight.scales b/deepseek-r1-1.5b-unary/model_layers_27_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..3b2da7f82a76df77be16abb40741fd5e9fd83570 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_27_mlp_gate_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary/model_layers_2_self_attn_k_proj_weight.scales b/deepseek-r1-1.5b-unary/model_layers_2_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..47abc435f78dfc3d42f09ec2bf2a6b00f4690472 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_2_self_attn_k_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary/model_layers_2_self_attn_v_proj_weight.sign b/deepseek-r1-1.5b-unary/model_layers_2_self_attn_v_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..54e10b0a1996c9562c5bca8bc222428cc18cb1b3 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_2_self_attn_v_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-unary/model_layers_3_post_attention_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary/model_layers_3_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..ad465cdd9a80e575767bd43668970d6c3d90ba56 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_3_post_attention_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary/model_layers_3_self_attn_k_proj_bias.fp16 b/deepseek-r1-1.5b-unary/model_layers_3_self_attn_k_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..b595fc36fe9aa3fc0a983a8b1c45139997e9ba3c Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_3_self_attn_k_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary/model_layers_3_self_attn_k_proj_weight.scales b/deepseek-r1-1.5b-unary/model_layers_3_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..65d7ea8949935a03cf84d0b438dc5d900b918011 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_3_self_attn_k_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary/model_layers_3_self_attn_o_proj_weight.scales b/deepseek-r1-1.5b-unary/model_layers_3_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..760615eb9d3eb79e7f79be55b13bbd874909e423 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_3_self_attn_o_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary/model_layers_3_self_attn_v_proj_weight.sign b/deepseek-r1-1.5b-unary/model_layers_3_self_attn_v_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..f6412c8c456dc1a8c48a49f9e951416f24149c1d Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_3_self_attn_v_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-unary/model_layers_4_input_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary/model_layers_4_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..9759f6063d7b643556860d11f1a24b45ed57fde6 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_4_input_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary/model_layers_4_self_attn_k_proj_bias.fp16 b/deepseek-r1-1.5b-unary/model_layers_4_self_attn_k_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..bc0c148982304816c4fa3512b7ca23a3472c72c4 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_4_self_attn_k_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary/model_layers_4_self_attn_v_proj_weight.scales b/deepseek-r1-1.5b-unary/model_layers_4_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..90912a036c4f11efe066bffd33c85cbc3b6c8843 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_4_self_attn_v_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary/model_layers_5_post_attention_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary/model_layers_5_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..8e7dbb2d076dba9b00e64dca081de6af929c0449 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_5_post_attention_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary/model_layers_5_self_attn_v_proj_weight.scales b/deepseek-r1-1.5b-unary/model_layers_5_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..40e9ab2af87c5ae211badf3c6370cb9d339f1134 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_5_self_attn_v_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary/model_layers_5_self_attn_v_proj_weight.sign b/deepseek-r1-1.5b-unary/model_layers_5_self_attn_v_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..4e0516674c8589adce63faca186bbce066216aab Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_5_self_attn_v_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-unary/model_layers_6_mlp_down_proj_weight.scales b/deepseek-r1-1.5b-unary/model_layers_6_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..849adf4d99d232847f9395e47713b9c9f67c9193 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_6_mlp_down_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary/model_layers_7_input_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary/model_layers_7_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..a100651bc62c5a0ec14f2ee20c3b9e4baff985f4 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_7_input_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary/model_layers_7_mlp_down_proj_weight.scales b/deepseek-r1-1.5b-unary/model_layers_7_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..16d446efd8ff0030b440243dabae3079b0a139bb Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_7_mlp_down_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary/model_layers_7_mlp_gate_proj_weight.scales b/deepseek-r1-1.5b-unary/model_layers_7_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..85f1dbc37c726908dba8c06d47206b3535ca8f1b Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_7_mlp_gate_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary/model_layers_7_post_attention_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary/model_layers_7_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..4746b01a76a4bc8f7a9d77f39085d4892ab22ce8 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_7_post_attention_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary/model_layers_7_self_attn_k_proj_bias.fp16 b/deepseek-r1-1.5b-unary/model_layers_7_self_attn_k_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..589cae7e3d3b84aaeb145ccadc7e8aae3818aa45 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_7_self_attn_k_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary/model_layers_7_self_attn_k_proj_weight.sign b/deepseek-r1-1.5b-unary/model_layers_7_self_attn_k_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..b75512f1b5c81055d03cccc3681c031d9d14ac34 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_7_self_attn_k_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-unary/model_layers_7_self_attn_o_proj_weight.scales b/deepseek-r1-1.5b-unary/model_layers_7_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..7747456bc56704e56aa59c5c01c7f2cf2c39ce4f Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_7_self_attn_o_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary/model_layers_7_self_attn_v_proj_bias.fp16 b/deepseek-r1-1.5b-unary/model_layers_7_self_attn_v_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..14221fa1e5c8786aae8a1af0f58a90713768558f Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_7_self_attn_v_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary/model_layers_7_self_attn_v_proj_weight.scales b/deepseek-r1-1.5b-unary/model_layers_7_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..343883ef9a26092688b7b2732a9a0e08ab645158 Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_7_self_attn_v_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary/model_layers_9_mlp_gate_proj_weight.scales b/deepseek-r1-1.5b-unary/model_layers_9_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..8d6f613a5885b56f74c7c00739ee8ba9877b9bde Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_9_mlp_gate_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary/model_layers_9_self_attn_o_proj_weight.scales b/deepseek-r1-1.5b-unary/model_layers_9_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..37a8459fc5a5bcc4f56f13cccbd3a69ece29b02f Binary files /dev/null and b/deepseek-r1-1.5b-unary/model_layers_9_self_attn_o_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/config.json b/qwen3-4b-thinking-unary/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6988f134db143052042f2bd6e0c897bc6a605189 --- /dev/null +++ b/qwen3-4b-thinking-unary/config.json @@ -0,0 +1,30 @@ +{ + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 151643, + "eos_token_id": 151645, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "max_position_embeddings": 262144, + "max_window_layers": 36, + "model_type": "qwen3", + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 5000000, + "sliding_window": null, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.51.0", + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/qwen3-4b-thinking-unary/model_layers_0_self_attn_k_norm_weight.fp16 b/qwen3-4b-thinking-unary/model_layers_0_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..2d1fc93585fd0cfe51bd59489d07bb0f8a46476f Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_0_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-thinking-unary/model_layers_0_self_attn_q_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_0_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..dedfe8d7c5b8710e33df08d13ab0b5300bf8bff9 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_0_self_attn_q_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_10_mlp_up_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_10_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..2f0fb2addca5062643c6687dea4af261f8ddeb2a Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_10_mlp_up_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_12_self_attn_k_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_12_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..4e0fd22aa09ad5349c794fad86f23fd7e91f0b70 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_12_self_attn_k_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_12_self_attn_o_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_12_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..cd3c098c45847b10e16970bbf545ea7cbb3915d0 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_12_self_attn_o_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_14_self_attn_v_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_14_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..b65728e3acbe6fd2a44a7a299c65352c6e4d02cc Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_14_self_attn_v_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_15_mlp_gate_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_15_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..f0c84b7a558514f30af25686878e4e9485e09391 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_15_mlp_gate_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_16_mlp_gate_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_16_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..da62b19455c2176911c5a9f1d3d13521c523839d Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_16_mlp_gate_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_16_self_attn_k_norm_weight.fp16 b/qwen3-4b-thinking-unary/model_layers_16_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..926f80083b0ab34002cf1d4ff5f76983fe6946a3 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_16_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-thinking-unary/model_layers_16_self_attn_q_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_16_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..f6c47923e11dd03bf24631be9cb543ecdfed83e9 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_16_self_attn_q_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_17_input_layernorm_weight.fp16 b/qwen3-4b-thinking-unary/model_layers_17_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..653d0b85e2bf4cae8442c2a4c26b39a000860738 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_17_input_layernorm_weight.fp16 differ diff --git a/qwen3-4b-thinking-unary/model_layers_17_post_attention_layernorm_weight.fp16 b/qwen3-4b-thinking-unary/model_layers_17_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..0dfaf07c2fcc38d53204ad26c8468a355bab10c8 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_17_post_attention_layernorm_weight.fp16 differ diff --git a/qwen3-4b-thinking-unary/model_layers_17_self_attn_k_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_17_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..e9bdfe642e1df69c0bed9e298f20193e7555bf05 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_17_self_attn_k_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_18_self_attn_o_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_18_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..4ef55337b6c5af0f008b69bf70d6ecc982a16c1c Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_18_self_attn_o_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_1_mlp_down_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_1_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..ff5d967278be672a5131827f37ff30a999bf01cc Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_1_mlp_down_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_1_self_attn_k_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_1_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..f5cded39b9721e7f6a11a5026401eb6c11be6251 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_1_self_attn_k_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_20_input_layernorm_weight.fp16 b/qwen3-4b-thinking-unary/model_layers_20_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..3a4274965b1448a5a2187a81c288c052ea215035 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_20_input_layernorm_weight.fp16 differ diff --git a/qwen3-4b-thinking-unary/model_layers_20_self_attn_k_norm_weight.fp16 b/qwen3-4b-thinking-unary/model_layers_20_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..87c8350bf5a5e0e45544f572a2aef59d93a83de3 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_20_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-thinking-unary/model_layers_21_self_attn_q_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_21_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..80e44a681ab1214a8185c184eb85dcf0bb9c8cad Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_21_self_attn_q_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_22_mlp_down_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_22_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..c2282c7ac19529c6453cc43de84e3459012b5cea Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_22_mlp_down_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_23_mlp_gate_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_23_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..8d00d2c6daab398491cacbf06e753e3554f8908a Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_23_mlp_gate_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_23_self_attn_o_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_23_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..f5a4abb27b84c45d85c6a4e8849f25417980f013 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_23_self_attn_o_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_26_self_attn_k_norm_weight.fp16 b/qwen3-4b-thinking-unary/model_layers_26_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..1213d1736ddab6ee762112c5bcf456651022d16e Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_26_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-thinking-unary/model_layers_27_self_attn_q_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_27_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..e26bd1d80b5b72a4d8111fe10c70e3be921041dd Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_27_self_attn_q_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_27_self_attn_v_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_27_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..8c4536e70bb80387e1eed6532ac2a92ef5428794 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_27_self_attn_v_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_29_input_layernorm_weight.fp16 b/qwen3-4b-thinking-unary/model_layers_29_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..e103712921dc7639ba128fa7954d50310365407a Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_29_input_layernorm_weight.fp16 differ diff --git a/qwen3-4b-thinking-unary/model_layers_29_mlp_down_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_29_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..f6674d864e787d554c78b4233dc65042bc656ee3 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_29_mlp_down_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_29_mlp_gate_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_29_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..7b0f77454e999f1c86fbc03432b12084f9d4a6d6 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_29_mlp_gate_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_30_self_attn_q_norm_weight.fp16 b/qwen3-4b-thinking-unary/model_layers_30_self_attn_q_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..8806a86c0dbe66f3e1923ab3a0ea79c0fc97ef19 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_30_self_attn_q_norm_weight.fp16 differ diff --git a/qwen3-4b-thinking-unary/model_layers_31_mlp_gate_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_31_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..4e2bbba8151e3d58678a7c32bb0fa1c8546363dd Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_31_mlp_gate_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_32_self_attn_k_norm_weight.fp16 b/qwen3-4b-thinking-unary/model_layers_32_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..31bf9b9fd0b59cfc43751daaff3537eb992030c6 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_32_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-thinking-unary/model_layers_33_self_attn_o_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_33_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..6faefe32aacc084c791d53d78f520f52c3504718 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_33_self_attn_o_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_34_self_attn_v_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_34_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..342c4095d1aebd5d1772c0e7b7d55ffa7886975d Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_34_self_attn_v_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_3_self_attn_k_norm_weight.fp16 b/qwen3-4b-thinking-unary/model_layers_3_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..a3f7e671929e7c73c9c272d716babd3123cf2439 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_3_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-thinking-unary/model_layers_3_self_attn_q_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_3_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..7a656f800afe602b28ba54a4094d38a2ab1494da Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_3_self_attn_q_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_3_self_attn_v_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_3_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..d6a11bda2afc0b04a06219c904f94d14e69d8c56 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_3_self_attn_v_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_5_self_attn_k_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_5_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..a60973943789fa00633d9a85abd40099062cd605 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_5_self_attn_k_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_5_self_attn_o_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_5_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..6c9897dd59b157cfee0e1b01e4aa57fb38cd60ef Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_5_self_attn_o_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_5_self_attn_q_norm_weight.fp16 b/qwen3-4b-thinking-unary/model_layers_5_self_attn_q_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..5df43c454ab4fe0fef5a3038d5ca059c2a1d9482 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_5_self_attn_q_norm_weight.fp16 differ diff --git a/qwen3-4b-thinking-unary/model_layers_7_self_attn_k_norm_weight.fp16 b/qwen3-4b-thinking-unary/model_layers_7_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..4058b3fec31f47fecd9af500f4d4739294be11d9 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_7_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-thinking-unary/model_layers_7_self_attn_k_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_7_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..f410874ab3cdc4fe691d2aef8327fc13b3780184 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_7_self_attn_k_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_8_self_attn_q_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_8_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..b95e879c0a1b18b60c185f3a1c57aa2ab530cb82 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_8_self_attn_q_proj_weight.scales differ diff --git a/qwen3-4b-thinking-unary/model_layers_9_mlp_up_proj_weight.scales b/qwen3-4b-thinking-unary/model_layers_9_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..d4fa55bf68e7afc7f50d170473e97a262ddca5d8 Binary files /dev/null and b/qwen3-4b-thinking-unary/model_layers_9_mlp_up_proj_weight.scales differ