diff --git a/.gitattributes b/.gitattributes index b4de4042e416af4374d86af7d7580b3ff86826ba..154dc814f1d76d215cd8f1e23a48dad24d323e4b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -637,3 +637,163 @@ deepseek-r1-1.5b-unary4/model_layers_22_self_attn_v_proj_weight.planes filter=lf deepseek-r1-1.5b-unary4/model_layers_26_mlp_gate_proj_weight.sign filter=lfs diff=lfs merge=lfs -text deepseek-r1-1.5b-unary4/model_layers_1_self_attn_q_proj_weight.sign filter=lfs diff=lfs merge=lfs -text deepseek-r1-1.5b-unary4/model_layers_22_self_attn_o_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_12_self_attn_v_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_22_mlp_down_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_19_mlp_gate_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_25_mlp_down_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_26_self_attn_v_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_24_mlp_down_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_22_self_attn_o_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_13_self_attn_v_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_15_self_attn_k_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_0_self_attn_o_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_21_mlp_down_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_18_self_attn_q_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_5_mlp_gate_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_26_mlp_gate_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_12_mlp_down_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_11_self_attn_o_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_7_self_attn_q_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_11_mlp_up_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_4_mlp_down_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_2_mlp_gate_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_14_mlp_down_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_20_mlp_down_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_27_mlp_down_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_19_mlp_down_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_14_self_attn_o_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_15_self_attn_q_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_26_mlp_up_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_17_mlp_gate_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_6_self_attn_o_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_19_mlp_up_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_9_mlp_gate_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_10_self_attn_v_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_2_mlp_gate_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_0_self_attn_k_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_16_mlp_down_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_5_mlp_up_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_22_self_attn_q_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_27_self_attn_q_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_15_self_attn_o_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_16_self_attn_v_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_13_self_attn_k_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_6_self_attn_q_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_17_mlp_gate_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_4_self_attn_o_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_17_self_attn_q_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_17_self_attn_k_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_13_self_attn_q_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_24_mlp_up_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_8_self_attn_o_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_27_mlp_gate_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_4_mlp_up_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_19_mlp_up_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_10_self_attn_o_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_15_mlp_gate_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_5_mlp_up_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_21_mlp_gate_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_18_self_attn_o_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_9_self_attn_o_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_0_self_attn_q_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_3_mlp_down_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_12_mlp_up_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_25_mlp_down_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_8_mlp_gate_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_13_mlp_up_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_3_mlp_up_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_1_mlp_down_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_15_mlp_up_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_20_self_attn_q_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_20_self_attn_v_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_5_self_attn_q_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_24_self_attn_v_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_4_mlp_gate_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_20_mlp_up_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_21_self_attn_q_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_13_mlp_gate_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_27_self_attn_o_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_24_mlp_gate_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_22_self_attn_q_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_22_mlp_up_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_15_mlp_down_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_16_mlp_up_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_24_self_attn_k_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_13_mlp_up_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_26_mlp_down_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_9_self_attn_v_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_16_self_attn_q_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_0_self_attn_v_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_23_mlp_down_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_19_mlp_down_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_15_self_attn_o_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_20_self_attn_o_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_15_self_attn_v_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_24_mlp_gate_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_2_self_attn_k_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_6_mlp_gate_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_4_self_attn_q_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_1_self_attn_k_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_2_self_attn_q_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_10_mlp_gate_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_14_self_attn_k_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_1_self_attn_o_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_20_self_attn_k_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_17_mlp_up_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_7_self_attn_o_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_2_self_attn_v_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_5_self_attn_o_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_23_mlp_gate_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_16_mlp_up_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_11_self_attn_q_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_4_mlp_down_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_7_mlp_gate_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_15_mlp_up_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_1_self_attn_q_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_13_mlp_gate_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_20_mlp_up_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_12_self_attn_q_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_8_self_attn_v_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_8_mlp_down_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_23_self_attn_q_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_24_self_attn_o_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_25_mlp_gate_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_12_mlp_gate_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_2_mlp_down_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_26_self_attn_k_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_3_self_attn_k_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_5_mlp_down_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_18_mlp_up_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_14_mlp_up_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_0_self_attn_o_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_11_mlp_gate_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_17_self_attn_v_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_6_mlp_down_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_12_self_attn_k_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_20_mlp_down_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_10_self_attn_q_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_14_mlp_gate_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_21_mlp_down_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_12_mlp_up_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_6_mlp_down_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_8_mlp_up_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_12_self_attn_q_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_15_self_attn_q_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_27_self_attn_o_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_16_self_attn_k_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_7_mlp_down_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_26_mlp_up_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_11_self_attn_k_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_10_self_attn_o_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_9_mlp_gate_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_1_mlp_up_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_19_self_attn_o_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_21_self_attn_o_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_4_self_attn_q_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_15_mlp_down_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_8_mlp_gate_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_13_self_attn_o_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_4_self_attn_k_proj_weight.planes filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_9_mlp_down_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_13_mlp_down_proj_weight.sign filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-unary4/model_layers_10_mlp_down_proj_weight.sign filter=lfs diff=lfs merge=lfs -text diff --git a/deepseek-r1-1.5b-unary4/model_layers_0_self_attn_k_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_0_self_attn_k_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..75cad0c29e92a6b5512a747b290a86b56de3d1b6 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_0_self_attn_k_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa94b58beb0bf8adb0bc8d39f8759491a5a6b3f176c7412970ce55355a0dd1eb +size 196608 diff --git a/deepseek-r1-1.5b-unary4/model_layers_0_self_attn_o_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_0_self_attn_o_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..293859258542826af281b691251a49f1d0a37ac6 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_0_self_attn_o_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:857224fb95d2540934f18e7300d711e018b3dcf113591e1321c06f4258cb12f0 +size 1179648 diff --git a/deepseek-r1-1.5b-unary4/model_layers_0_self_attn_o_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_0_self_attn_o_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..fa5c3f4361129e89d21fe2ecaff339213555bc39 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_0_self_attn_o_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afa7c60db5e54d8009750dd241125670907a01f6b0741fecf14e3983914efbbe +size 294912 diff --git a/deepseek-r1-1.5b-unary4/model_layers_0_self_attn_q_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_0_self_attn_q_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..3a9726c7f444e86e942274c339f2c7d261e5c8ef --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_0_self_attn_q_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b27e73cf0f5f5912b11c3b8a2373ebef8b8e67a690c34ac5565941d02ac567b0 +size 1179648 diff --git a/deepseek-r1-1.5b-unary4/model_layers_0_self_attn_v_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_0_self_attn_v_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..ae598d6be4dad9378c37e15d816774d15830cd9c --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_0_self_attn_v_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f53f83594c6850d67064cfcb38e7ee64ecacd8bb665a32fff41a7c38d6c787e1 +size 196608 diff --git a/deepseek-r1-1.5b-unary4/model_layers_10_mlp_down_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_10_mlp_down_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..7a09e94f674f967f120dd2f0984281b08932fc84 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_10_mlp_down_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c5430535790e7ee20be30386c6fa4478877dc3885dae5ad930fb9c09a1eeedf +size 1720320 diff --git a/deepseek-r1-1.5b-unary4/model_layers_10_mlp_gate_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_10_mlp_gate_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..497f864b16a2660ed1c044b4c6bb785aafe6dbec --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_10_mlp_gate_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4774a75c6515de3177e286d22d948777c4c78b1cba28c86393a760f3b30d4f8a +size 6881280 diff --git a/deepseek-r1-1.5b-unary4/model_layers_10_self_attn_o_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_10_self_attn_o_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..f68b7f45ad427a5029c789ccb3dc33d496408c81 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_10_self_attn_o_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5303be2ea3d388e414f18e13cde77013753c3899fe6edd697a695a2003b231c2 +size 1179648 diff --git a/deepseek-r1-1.5b-unary4/model_layers_10_self_attn_o_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_10_self_attn_o_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..1d50e783243a00834ff0fc7e7090d549545a7469 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_10_self_attn_o_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c8d38e33a29916b3a384d78f3154474f9b47aa396ce1f0b4ab1c44518164ce8 +size 294912 diff --git a/deepseek-r1-1.5b-unary4/model_layers_10_self_attn_q_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_10_self_attn_q_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..5c7b19469c3d783cff89c24fccd6642e93cd7502 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_10_self_attn_q_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1dc09c6a4f16a82e36b3efcd5c89d9950359c3d6dc7f8620649bb682ff3a483c +size 1179648 diff --git a/deepseek-r1-1.5b-unary4/model_layers_10_self_attn_v_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_10_self_attn_v_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..de34a5860283ba1e667717f5225b2b91b14617d8 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_10_self_attn_v_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8abf5343f513d54c98bc5542c8abf0dec557d6f9fdb60a7377bee7af183ed1a9 +size 196608 diff --git a/deepseek-r1-1.5b-unary4/model_layers_11_mlp_gate_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_11_mlp_gate_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..5dfdcf759eccc82f7eaacc6b2b752be9cd23fd22 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_11_mlp_gate_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2422715d9cb42eaefaff21120cfde70189525f51f0f51971cc5e950f1bdc48b2 +size 1720320 diff --git a/deepseek-r1-1.5b-unary4/model_layers_11_mlp_up_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_11_mlp_up_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..ad6ad61fcea9489f97b12214fb38093361fedf78 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_11_mlp_up_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b7a5f3a1c4f5b00d240b49aefc7889546755e6776dec3abec018307942480c7 +size 1720320 diff --git a/deepseek-r1-1.5b-unary4/model_layers_11_self_attn_k_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_11_self_attn_k_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..c14c996c8f925d1b77296dfad9a4fd4cec11b277 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_11_self_attn_k_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91ca7913f4d4daba5dbe6a2282cd8f8c022c992f381a2e4f587014f350e6be45 +size 196608 diff --git a/deepseek-r1-1.5b-unary4/model_layers_11_self_attn_o_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_11_self_attn_o_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..be772cac113895bd0760b95353794f43df943d50 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_11_self_attn_o_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d611b70c73fe8e6eba72a5ba82b60fd569b34ceb4e7293ddc306bcbd89e6b31 +size 294912 diff --git a/deepseek-r1-1.5b-unary4/model_layers_11_self_attn_q_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_11_self_attn_q_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..7b4e2cc6e90ecb5b16e7a48d3c44158983d98520 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_11_self_attn_q_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60aa5f439af1750b697977171496e97a96c845dd439232a9a82e60e6ddb6d45c +size 294912 diff --git a/deepseek-r1-1.5b-unary4/model_layers_12_mlp_down_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_12_mlp_down_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..5ac60ec5cbeba9c0589371ab52afe1130066ef55 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_12_mlp_down_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:529f227fe6dcc789d5e630591de08f934b14595c485810502b41fac46cfe5f9e +size 1720320 diff --git a/deepseek-r1-1.5b-unary4/model_layers_12_mlp_gate_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_12_mlp_gate_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..b66921958f52e7db7af0781819ba0765f2c79cc3 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_12_mlp_gate_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1a6ff0c0a5ef27a1a4555ec6ad44c4a63c4d03112961ca830f93ae06eecc7f7 +size 6881280 diff --git a/deepseek-r1-1.5b-unary4/model_layers_12_mlp_up_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_12_mlp_up_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..b90a7e3ea8a7d99e4a84645aa2dfda2eb5aef2b3 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_12_mlp_up_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a377c5b8d79227e31d6ed9baf7f97edd69ab8501e07aa7cce5229192c2e41a8 +size 6881280 diff --git a/deepseek-r1-1.5b-unary4/model_layers_12_mlp_up_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_12_mlp_up_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..26823f636f1d9e2c3f33665b2ff8b45c4d5fcfc9 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_12_mlp_up_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1457d0402e211b3ace67d73e6557b16c2bb570384bcf80a736957f6602646e90 +size 1720320 diff --git a/deepseek-r1-1.5b-unary4/model_layers_12_self_attn_k_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_12_self_attn_k_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..705c215db59af64e212c21399fd7562b5e135c8c --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_12_self_attn_k_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac26d0f8d18ed6d0c3fe742dff6f0f6108ff35cab28cd8e090e5cdecfa989388 +size 196608 diff --git a/deepseek-r1-1.5b-unary4/model_layers_12_self_attn_q_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_12_self_attn_q_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..eb2210b7a33b707db5dc76623575208cf4b17335 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_12_self_attn_q_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3217b5edb53900b88a21b72534fa586628816ac7e456dbdf9cd853bda8336d9f +size 1179648 diff --git a/deepseek-r1-1.5b-unary4/model_layers_12_self_attn_q_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_12_self_attn_q_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..2d8e51b78b44da4df6c1f3b20e71b82a9f6a440d --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_12_self_attn_q_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa8874205a1141bacaae9fdcf35ac12c56211a602ebefcde5e571e9429da90d4 +size 294912 diff --git a/deepseek-r1-1.5b-unary4/model_layers_12_self_attn_v_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_12_self_attn_v_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..38932cbf7cda5c22fdba8c7a2e0cdf66ed7c149e --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_12_self_attn_v_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59ea0c1e1fff9039f309d8fb6fcb4a0a3e1bf6e0b654aedfd69f9e4779666602 +size 196608 diff --git a/deepseek-r1-1.5b-unary4/model_layers_13_mlp_down_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_13_mlp_down_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..f4a346515c3b6c7dd77036523cb0770b909dc462 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_13_mlp_down_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b19b277401a0cbc2723d6fc65f8c1d4765b0a03aeeb190adb422769425e8f5b3 +size 1720320 diff --git a/deepseek-r1-1.5b-unary4/model_layers_13_mlp_gate_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_13_mlp_gate_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..9a418422c2f0eefaf86544f687523b5b3f407481 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_13_mlp_gate_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac99282038cf8cd8e834990e63fbecfca747e61ea02130e4b88ba39c57ee625f +size 6881280 diff --git a/deepseek-r1-1.5b-unary4/model_layers_13_mlp_gate_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_13_mlp_gate_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..33902a82194b1836feec031e3b67fc4b18b8b532 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_13_mlp_gate_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:658a3e7cfbbbc9864d5a25f76433fea533f8675ec8b145d76dd1f0ee770a0947 +size 1720320 diff --git a/deepseek-r1-1.5b-unary4/model_layers_13_mlp_up_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_13_mlp_up_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..3d50475758618f39926446feaf40c5feb85baee3 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_13_mlp_up_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cff4b70c3b221a7d0855b311d1358611893ea1f455ad773798b07e2b5860a1b9 +size 6881280 diff --git a/deepseek-r1-1.5b-unary4/model_layers_13_mlp_up_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_13_mlp_up_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..85e6554a4b74072f8e135620ce71d54e7847313b --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_13_mlp_up_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c52f49f7109f6b3b8fa48d71e2d0b1c3a6934eff0e61b5b613e10aaa047e706 +size 1720320 diff --git a/deepseek-r1-1.5b-unary4/model_layers_13_self_attn_k_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_13_self_attn_k_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..d77450bff18a034ffc4474ff0ed34a3360cf856f --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_13_self_attn_k_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94843ca28be62ce519030003fb00c05f190531028b570d188f0202642bba3060 +size 196608 diff --git a/deepseek-r1-1.5b-unary4/model_layers_13_self_attn_o_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_13_self_attn_o_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..8560c57126c3c4a029dcb3f975f1d2d59d65aa09 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_13_self_attn_o_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f327cc707dc55c907d977eb0e379e910794574fb36268752913fc042a383b8a9 +size 294912 diff --git a/deepseek-r1-1.5b-unary4/model_layers_13_self_attn_q_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_13_self_attn_q_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..a530ba57142c28ab6d86fad8bcf9dcc1ff46f972 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_13_self_attn_q_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e99fcf7ac870047d47d0c8d8e9410e91e7983a800dea53e3d764fa8a52f837c +size 1179648 diff --git a/deepseek-r1-1.5b-unary4/model_layers_13_self_attn_v_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_13_self_attn_v_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..7f198900bd0b278f12f7b99df04bb8cc68a73c40 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_13_self_attn_v_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a97a54886af2e995d3f93945a65067ae6d6da27fde52d273912743901a03fcaa +size 196608 diff --git a/deepseek-r1-1.5b-unary4/model_layers_14_mlp_down_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_14_mlp_down_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..bf0ea36f02310adb3bd1bfcf63dd2d398226920f --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_14_mlp_down_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c83cb5a88b3300dec9bedff5b81c980a47652ae26afc602560199012eb3c77e +size 1720320 diff --git a/deepseek-r1-1.5b-unary4/model_layers_14_mlp_gate_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_14_mlp_gate_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..7952f2604f475d6905507654afef935438b785cb --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_14_mlp_gate_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6a62b3e005442b9ac9a51a505347398cd9a43d80b5bedf204ae184f08a8c715 +size 1720320 diff --git a/deepseek-r1-1.5b-unary4/model_layers_14_mlp_up_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_14_mlp_up_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..b28694e44b645096e929cf1cb93f4fe7259107bc --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_14_mlp_up_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ad5ac7a001aad2ba4ff8666731b7b197faf8ec964894eac5cf39ea59c9eb07d +size 6881280 diff --git a/deepseek-r1-1.5b-unary4/model_layers_14_self_attn_k_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_14_self_attn_k_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..335260b837fe42f1e5c7ad846709a4f77e0a9a9b --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_14_self_attn_k_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7dfc0d17810c2240928b2d31fa07d24fc163f72b4557b06851903ca17fa8a1e9 +size 196608 diff --git a/deepseek-r1-1.5b-unary4/model_layers_14_self_attn_o_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_14_self_attn_o_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..b31b3911d48ce6eadb08ed60258fe6ea27a069ac --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_14_self_attn_o_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ab51db9b7c598f9dbfbf25f941a0115e5bea9799b9cf3db8d837cb60156687c +size 1179648 diff --git a/deepseek-r1-1.5b-unary4/model_layers_15_mlp_down_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_15_mlp_down_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..67fb482ce1804cded2dba7f120dec26d9248234e --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_15_mlp_down_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95a32faef3e164c23a3222d470186c15bc31aa5a84f2e7b8e9f546cad03aa5f8 +size 6881280 diff --git a/deepseek-r1-1.5b-unary4/model_layers_15_mlp_down_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_15_mlp_down_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..9c4893ff0b9b5e75f8af5a3320e7e4d5fe5ad185 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_15_mlp_down_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b684bcc0c65183e127d15e995f38140b7c7fa34626ffbdf52377bfd192205a9e +size 1720320 diff --git a/deepseek-r1-1.5b-unary4/model_layers_15_mlp_gate_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_15_mlp_gate_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..c12a94a0aca37140df9741a84643d19544dbdcc8 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_15_mlp_gate_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37eaf4b08800fc8155b2341202a8d45e8e98a50d65e8a58fe0ce8052d0828f63 +size 6881280 diff --git a/deepseek-r1-1.5b-unary4/model_layers_15_mlp_up_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_15_mlp_up_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..9061354c7071bd365217a4d70ee7055bdc4c2e98 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_15_mlp_up_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f94a7a6c53e6df8e0ef8e8bd1211637f8f40f597baa134c82fa904b4c69faf8 +size 6881280 diff --git a/deepseek-r1-1.5b-unary4/model_layers_15_mlp_up_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_15_mlp_up_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..4b0e336e74d3333dd2879317b2d0db8587c2b92e --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_15_mlp_up_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5206a02260028ee28fbde6edf9df24199656473b0a98f2b038b7fc841608e572 +size 1720320 diff --git a/deepseek-r1-1.5b-unary4/model_layers_15_self_attn_k_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_15_self_attn_k_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..bef4efa9640782d5639112e8ff3d0a9d7d28a7bf --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_15_self_attn_k_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca749add2e918b0fe50d7447cbf4239643d036af4172199c71c24e7acc9f43de +size 196608 diff --git a/deepseek-r1-1.5b-unary4/model_layers_15_self_attn_o_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_15_self_attn_o_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..e819f52540cabded2238b712c0793ec21f60dcea --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_15_self_attn_o_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:991adbb977f284d68a1d8677e10e13fcab2295e549095743f5022b426c6b20dd +size 1179648 diff --git a/deepseek-r1-1.5b-unary4/model_layers_15_self_attn_o_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_15_self_attn_o_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..4944aa9b33a326f107dd23f7df3840cceb4b6830 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_15_self_attn_o_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a564f99a29539779b4e31ec8e2238c03ccace4c568b8495bd5028f5fc08e9416 +size 294912 diff --git a/deepseek-r1-1.5b-unary4/model_layers_15_self_attn_q_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_15_self_attn_q_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..dacb2b21ce0063faabbda378e0730eb542bce4ed --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_15_self_attn_q_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:176f0daff6ca09d4537c37b0e2c3f2bdce26ad312d47035c2eca74c9aa14ccac +size 1179648 diff --git a/deepseek-r1-1.5b-unary4/model_layers_15_self_attn_q_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_15_self_attn_q_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..a1d258b32d9d8f79bf13ccc91baff40f14a06774 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_15_self_attn_q_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:948dd3df371302f40faa36ab26de7f6b12e1bc3acb9bb0e7b6c11be8ffce8b73 +size 294912 diff --git a/deepseek-r1-1.5b-unary4/model_layers_15_self_attn_v_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_15_self_attn_v_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..e3f9244ca2b8ec4bc645e5042387de8d70bf778b --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_15_self_attn_v_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c72becc9c81e37613cbedddd79303bdf7d08f6e76c96aaa9948b6f21af1effc +size 196608 diff --git a/deepseek-r1-1.5b-unary4/model_layers_16_mlp_down_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_16_mlp_down_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..7b765d92a13526d3627d2c1b94bf748309fc7a75 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_16_mlp_down_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f627c09a596845e3d789efe2accb30b956cdad47f79f54fb9bc252285435cade +size 1720320 diff --git a/deepseek-r1-1.5b-unary4/model_layers_16_mlp_up_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_16_mlp_up_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..4cbfb6ddbc31bc6aaebf4b06cc67e18adedcf5cc --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_16_mlp_up_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70dff773ddbaa8e81275ab85ad9b02f74316ebcd5bdd8d227f02a10eb96db545 +size 6881280 diff --git a/deepseek-r1-1.5b-unary4/model_layers_16_mlp_up_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_16_mlp_up_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..b0db4a1dccf512a67567295255e3a9f1c924821b --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_16_mlp_up_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41e63fd23e4b5551f3fec479e2dd079c70b8215f7963c4e64c0434f3d982c126 +size 1720320 diff --git a/deepseek-r1-1.5b-unary4/model_layers_16_self_attn_k_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_16_self_attn_k_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..afe356596d9a29058c61e3f7e63e7bd3a1260069 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_16_self_attn_k_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aaa668aa4ad56b4c5d114d7539cdaf4fc25858eb0f05d8ba0b29b1658605b089 +size 196608 diff --git a/deepseek-r1-1.5b-unary4/model_layers_16_self_attn_q_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_16_self_attn_q_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..ee3817fd651822b0a42566d3e8ccb119f76d5e66 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_16_self_attn_q_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1430be9ce1602d021fc493de45d5d0dd78c51fb2dbe3c7aff20067a84d607386 +size 1179648 diff --git a/deepseek-r1-1.5b-unary4/model_layers_16_self_attn_v_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_16_self_attn_v_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..100a696a9623e794f8c45773a6b9886dc8e4647d --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_16_self_attn_v_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c92ce5216e108768a4553e7c27023da52de363851e1044fdc46f929a28d35db +size 196608 diff --git a/deepseek-r1-1.5b-unary4/model_layers_17_mlp_gate_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_17_mlp_gate_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..5ccb83c706fb88b966ed933d087c7ceadf1b144f --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_17_mlp_gate_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83c8340d3df62e2f24a23963f99e734a1d7a905c51c476c505d077a04f62e493 +size 6881280 diff --git a/deepseek-r1-1.5b-unary4/model_layers_17_mlp_gate_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_17_mlp_gate_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..2f5e8e8918da0c0a64ee68b6d11899222d5a9a01 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_17_mlp_gate_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edf4de964079986f1f878f816dd2eb0aab9f1a62a9fda213ee8520983b495442 +size 1720320 diff --git a/deepseek-r1-1.5b-unary4/model_layers_17_mlp_up_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_17_mlp_up_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..88db8c4f3c3d5b7494390a92acfa3c037f6e21cd --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_17_mlp_up_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1af0c0db2c406667c5d8b7c93675b9495e909db4604253b8c76f10fc469e8e9 +size 1720320 diff --git a/deepseek-r1-1.5b-unary4/model_layers_17_self_attn_k_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_17_self_attn_k_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..b38406c680fe214229219380a6a2b7c5b5b2e98d --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_17_self_attn_k_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22792969fd0693e81af1729639f47838029a814d265709d32046d859081f5da7 +size 196608 diff --git a/deepseek-r1-1.5b-unary4/model_layers_17_self_attn_q_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_17_self_attn_q_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..45fad4f208ea11dc81ac90bfa70fd9d856dc0c14 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_17_self_attn_q_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a554b14fd5746bac41c3ef535010ae89c748931bff48458b26544b80c7e08249 +size 294912 diff --git a/deepseek-r1-1.5b-unary4/model_layers_17_self_attn_v_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_17_self_attn_v_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..51129d8c487818ee3c82a787d1f4356cdc6c97c3 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_17_self_attn_v_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a36bf8912c8e925d1923fd6ac908507164cd2b2cddff60ac1f466dd2467a4da7 +size 196608 diff --git a/deepseek-r1-1.5b-unary4/model_layers_18_mlp_up_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_18_mlp_up_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..8cf0385cdb4834a51baa836e62e81b05faee75c2 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_18_mlp_up_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:355d3b5453535e36f675e7432aec1c53177dee759659462b93403bf8b028d5bc +size 6881280 diff --git a/deepseek-r1-1.5b-unary4/model_layers_18_self_attn_o_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_18_self_attn_o_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..6722ae54944fcad56784ef30f7dfeedbac1d24fb --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_18_self_attn_o_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32b50049b64f5410d0a383dd7be0a127f7d977dfe81fb3bc8ec0e49b5773bd43 +size 1179648 diff --git a/deepseek-r1-1.5b-unary4/model_layers_18_self_attn_q_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_18_self_attn_q_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..ffeca290c27990fe96979b52a41e67bba57e4796 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_18_self_attn_q_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cc81c46014cb5f49b797d672a1c79bc669beb3e2e51bc70faea935c03d04274 +size 1179648 diff --git a/deepseek-r1-1.5b-unary4/model_layers_19_mlp_down_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_19_mlp_down_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..aa67948e1887958ae84b8a20650d784bafb6bcd8 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_19_mlp_down_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a30e16a04eaa48910c269be49ac7613bfb35e8820558d02e3c31ec4353f0e480 +size 6881280 diff --git a/deepseek-r1-1.5b-unary4/model_layers_19_mlp_down_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_19_mlp_down_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..567a3f29f01f8193e05c049c4da50cd656e71ee3 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_19_mlp_down_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:010448aa44fe1534eb246e0934276a8f7abe72cd6fd654526b33f590239a4ead +size 1720320 diff --git a/deepseek-r1-1.5b-unary4/model_layers_19_mlp_gate_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_19_mlp_gate_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..dcd1669ef3529c50b99fbd4b7ce4c456c50f1c77 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_19_mlp_gate_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3841fa4a3deed727ad7bbd14feddf9b931fa10c645be97f7a41d84cbcbd5b85d +size 1720320 diff --git a/deepseek-r1-1.5b-unary4/model_layers_19_mlp_up_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_19_mlp_up_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..61d05fca977f9e5561856d94c0a5e734b797e8cb --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_19_mlp_up_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97940a2eb8e8a74df35049f61b66056fed1f6649ec275f5a3e5796462fb7a59b +size 6881280 diff --git a/deepseek-r1-1.5b-unary4/model_layers_19_mlp_up_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_19_mlp_up_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..a4f9829b0c603bd5d6ad9bf7a4651820659c08c1 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_19_mlp_up_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a04a4f84162379d5b772eb736221f21122838357c7f701c728b695f7f21f48f +size 1720320 diff --git a/deepseek-r1-1.5b-unary4/model_layers_19_self_attn_o_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_19_self_attn_o_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..e77158ae4d623a5f76eccf8377aa15653ec294e6 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_19_self_attn_o_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23de363ab2a99229fb1cb3b018bb1da93cdd2645c3e516f2099c8c4c540376ca +size 294912 diff --git a/deepseek-r1-1.5b-unary4/model_layers_1_mlp_down_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_1_mlp_down_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..6f7be89666c6a71b56ea9d3ab3eb9f366393a8c8 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_1_mlp_down_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:431585ec6292217d6b8d7f6c56e402c3a5067273ae6f01ff773f6b546c295222 +size 1720320 diff --git a/deepseek-r1-1.5b-unary4/model_layers_1_mlp_up_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_1_mlp_up_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..e760140a86e30951ea0fa762d0e62e8dcc774115 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_1_mlp_up_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a161d226b750496245767146f576d75554387c2831fabca60a236b62c0c1e68 +size 1720320 diff --git a/deepseek-r1-1.5b-unary4/model_layers_1_self_attn_k_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_1_self_attn_k_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..2594156a2b921d8c6afef98b286eecd81384ef2f --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_1_self_attn_k_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1012faab320eb45922fb0cb308db47a5f1abf405d8210928e3aba393b82415b3 +size 196608 diff --git a/deepseek-r1-1.5b-unary4/model_layers_1_self_attn_o_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_1_self_attn_o_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..e2008019eb8913e2c4b7b373d763df446a841ca5 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_1_self_attn_o_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:264d743b356d1c334878111aa6d1c1fbd184503bb3bc5e09618c4659ef84c4d6 +size 294912 diff --git a/deepseek-r1-1.5b-unary4/model_layers_1_self_attn_q_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_1_self_attn_q_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..c05f426e1df0c7b3884eb56d107195ac853e0428 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_1_self_attn_q_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f604b47bd694877e2e3740f7b22fdb51653f57ea121a0ebda66e99f5601e04e7 +size 1179648 diff --git a/deepseek-r1-1.5b-unary4/model_layers_20_mlp_down_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_20_mlp_down_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..65ab18d9509d233916566996fe44a06ecbbd20c1 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_20_mlp_down_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d156f6fd6f9c059867791db7238a436bef5f394350d26f783602f0969c68e9f +size 6881280 diff --git a/deepseek-r1-1.5b-unary4/model_layers_20_mlp_down_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_20_mlp_down_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..fdea8e225a4ab9e8fd57a96fd4c7d24a08767624 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_20_mlp_down_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82eb6254b67e39cb4f75ca2cff5178f7958b2c9933640c8d5a3bbdafbc1ede2e +size 1720320 diff --git a/deepseek-r1-1.5b-unary4/model_layers_20_mlp_up_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_20_mlp_up_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..202461edfe21a8497841ecc99836cc8ccde12935 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_20_mlp_up_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58389ba704cacd399459b0b3857e6ff89c9f713bd92e8c7c604c36c3796f64a7 +size 6881280 diff --git a/deepseek-r1-1.5b-unary4/model_layers_20_mlp_up_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_20_mlp_up_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..6d025c3333f5f11bf7d4b61d6753cb635fe4f42f --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_20_mlp_up_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61b4acc9b871a03e3ca9e6aa439b6d57e8364a974a4ad281b8ce01faef153fa4 +size 1720320 diff --git a/deepseek-r1-1.5b-unary4/model_layers_20_self_attn_k_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_20_self_attn_k_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..a6ef5dd38daed3e13f143f9bd5d959966e6d45c0 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_20_self_attn_k_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e731202a7b61bdd187361e3d24f2c4241205e4f56071f3f9504f1b6d1a2e565 +size 196608 diff --git a/deepseek-r1-1.5b-unary4/model_layers_20_self_attn_o_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_20_self_attn_o_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..e2144d72989f5c6b7c1ac0f0b7252f431aba7f0c --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_20_self_attn_o_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a4a585a32f0371e342ebb4bee0cf81a3cb7d7df8d4febd3f35b0ad8afa95450 +size 294912 diff --git a/deepseek-r1-1.5b-unary4/model_layers_20_self_attn_q_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_20_self_attn_q_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..8c3bb3bf479a92d4667029d95d67b24b0310f672 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_20_self_attn_q_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddba45471e861b67e2ed1cdf00a5e6c5822907130dbaa78f99fc7d21cd50c72c +size 294912 diff --git a/deepseek-r1-1.5b-unary4/model_layers_20_self_attn_v_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_20_self_attn_v_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..9979cbee1d287a65455285a9c1d30920b5dff66c --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_20_self_attn_v_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:873ac174f501c6706a7cd27b35f718acebf0489a9261498406ab45ea6e017f85 +size 196608 diff --git a/deepseek-r1-1.5b-unary4/model_layers_21_mlp_down_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_21_mlp_down_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..48d5bed3a5faf7b1475174dc489a9af3124a14d0 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_21_mlp_down_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54322be0e137aed1d376a348e8e4727f22de6daf4886c8230e912c80d1683b95 +size 6881280 diff --git a/deepseek-r1-1.5b-unary4/model_layers_21_mlp_down_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_21_mlp_down_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..556166d452dcf759b9cb7de78a433d2c3a7e29f5 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_21_mlp_down_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73c5c223d3469abf946437a1c4647aef057a6a3b959a0ee258395e2dc9f4a672 +size 1720320 diff --git a/deepseek-r1-1.5b-unary4/model_layers_21_mlp_gate_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_21_mlp_gate_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..292bb361fae7f2df3edcd3a9351d441678e12618 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_21_mlp_gate_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a1a4312d28fecede0d93821a65cac0b3f476920169c3ca0f01dca662f56bfcf +size 6881280 diff --git a/deepseek-r1-1.5b-unary4/model_layers_21_self_attn_o_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_21_self_attn_o_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..9798872450798b53cb76d19d9b789c9368978af6 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_21_self_attn_o_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f58786212aae211d7b578bbd5f4eee321435b1dfa8580749bc795f0c2877b23c +size 294912 diff --git a/deepseek-r1-1.5b-unary4/model_layers_21_self_attn_q_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_21_self_attn_q_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..0f5959400473a657e88c84ee9a6356c4bf95aa73 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_21_self_attn_q_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca22f2945b7c5d0e7dcf0ba6bcad468f5a7e60e5ad079d663797c001166fc7f9 +size 1179648 diff --git a/deepseek-r1-1.5b-unary4/model_layers_22_mlp_down_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_22_mlp_down_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..7b57522b6c6ff40011d05f5c340fe6b6e0a2eb7b --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_22_mlp_down_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:774a2f29e2e687db57bbf73f525217559147f289ab18852b086ba29905049ed7 +size 1720320 diff --git a/deepseek-r1-1.5b-unary4/model_layers_22_mlp_up_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_22_mlp_up_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..d6702f04cf7b27409e409b593680b2a80dce3e02 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_22_mlp_up_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c30b358a69d79f451d5868c4e84117d67a8cc0cb3d58b8c4a3dfafb0d134cde3 +size 6881280 diff --git a/deepseek-r1-1.5b-unary4/model_layers_22_self_attn_o_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_22_self_attn_o_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..2d6cfdc2e81a7c148393e0d56281e7ca1f6ccf10 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_22_self_attn_o_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48382dbfd7638ed0b4ae9efa05e873fdc0bf945921dde55afbcfbf51fcc4efcf +size 1179648 diff --git a/deepseek-r1-1.5b-unary4/model_layers_22_self_attn_q_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_22_self_attn_q_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..5827818a3ac0750ae2e7e4790b6250fc321cfb3f --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_22_self_attn_q_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98effbb7242b0c36a7af7453b59e293baedfffbe43ae703affa8f7db6c8d8a58 +size 1179648 diff --git a/deepseek-r1-1.5b-unary4/model_layers_22_self_attn_q_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_22_self_attn_q_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..1bb05f7e72f3d7ddb8116dfbf51fde120d873cf1 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_22_self_attn_q_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6ab91beccbd5a75ff7a1ae04f39e842b58e138fb8b31199659e1f7b3030aaee +size 294912 diff --git a/deepseek-r1-1.5b-unary4/model_layers_23_mlp_down_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_23_mlp_down_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..7cf69dd6647e009397ae498513220e4d2cd640b3 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_23_mlp_down_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11761f933ffa146c3907301466cde0665cc48ae51077a3eb12e656deec5d92fb +size 1720320 diff --git a/deepseek-r1-1.5b-unary4/model_layers_23_mlp_gate_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_23_mlp_gate_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..2870dd8fc996d9f566c5088a68f36b020c0bed81 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_23_mlp_gate_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61d895590e6284c2dd1405d2240c8ad6aae1c796cde290546a82bdebe8473f38 +size 1720320 diff --git a/deepseek-r1-1.5b-unary4/model_layers_23_self_attn_q_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_23_self_attn_q_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..6365c3e119b1e9945c4582e97ec92d4f4ce512e2 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_23_self_attn_q_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a70796b985a0fe19a5671f4d64ec6528d5214d53daf10774ff79f69b810dc1cc +size 1179648 diff --git a/deepseek-r1-1.5b-unary4/model_layers_24_mlp_down_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_24_mlp_down_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..d9b9dd3425bb5977b10865486c86e061404d3581 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_24_mlp_down_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:120320421f014c8a7096139661e69386ad52a30f18cf8459ddb48296329f4d1d +size 6881280 diff --git a/deepseek-r1-1.5b-unary4/model_layers_24_mlp_gate_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_24_mlp_gate_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..86ac71da37870984aedd298c6990b4115c9a3145 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_24_mlp_gate_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:faac08716671b206a9d8d63223b0b5e0f1299a5921028c3609f797946b0199b0 +size 6881280 diff --git a/deepseek-r1-1.5b-unary4/model_layers_24_mlp_gate_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_24_mlp_gate_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..a76fee3d266d08df1c9bec133863adf22995d19d --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_24_mlp_gate_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af1a304248edd39a9d41760fd0f5d5029c19e8b9c30b62f1b59b6c55fe0c3d5d +size 1720320 diff --git a/deepseek-r1-1.5b-unary4/model_layers_24_mlp_up_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_24_mlp_up_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..ccf5b71d38b7dc60f0954aa2a4446cc97fa8c00e --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_24_mlp_up_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c6f64ab4d2a764feb0eb837bbcf1dae21256f55e43d65f36904b9a22e2d047a +size 6881280 diff --git a/deepseek-r1-1.5b-unary4/model_layers_24_self_attn_k_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_24_self_attn_k_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..b1956c1282de770289182b9878e14622592f8570 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_24_self_attn_k_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de1f47f2dd202d609358e36d4c4722c2a45175f693e9ed4833f6d1a9770d474a +size 196608 diff --git a/deepseek-r1-1.5b-unary4/model_layers_24_self_attn_o_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_24_self_attn_o_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..191ea6a59658781ac4afc562d65bf899dca18260 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_24_self_attn_o_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bfbc184af548b8befda9ea2005aa24d8ab6d352e4d58ab89bd7cffcd95751f2 +size 1179648 diff --git a/deepseek-r1-1.5b-unary4/model_layers_24_self_attn_v_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_24_self_attn_v_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..05db9d5be7060963c702e1f704bee97bee80574f --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_24_self_attn_v_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ab246089cf4d5420fb6adf6c77e78ea8f9702d5119d7227dab8eed6eba0e811 +size 196608 diff --git a/deepseek-r1-1.5b-unary4/model_layers_25_mlp_down_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_25_mlp_down_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..b10d78a5a7a02c67f672951abc9879bb274dd025 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_25_mlp_down_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb7d7b634cc4ede5ec9fa0b32b77697ae3929682ecefc77b09660063b1e5b76b +size 6881280 diff --git a/deepseek-r1-1.5b-unary4/model_layers_25_mlp_down_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_25_mlp_down_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..14bb49af69ba1c4b3377a5f2a9758ca00cf2e2e8 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_25_mlp_down_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8ce1b399351e9657477c25670f61498d8e08279888235a85d7455eb8220d596 +size 1720320 diff --git a/deepseek-r1-1.5b-unary4/model_layers_25_mlp_gate_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_25_mlp_gate_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..2699081c585d8b5dc09f75b297c6d10cfcf3b62c --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_25_mlp_gate_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b37c8f5ea9f97f53f50e136c28c7483193688af30a43cbd5577448f9c5b3d19 +size 1720320 diff --git a/deepseek-r1-1.5b-unary4/model_layers_26_mlp_down_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_26_mlp_down_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..4f9de3be71c5ec1dfd99d8cd8230838e8d6845cf --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_26_mlp_down_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:222a87faa874fbea24fdabacff0bb11c2e89869abc86fec0b09f5c556c061112 +size 1720320 diff --git a/deepseek-r1-1.5b-unary4/model_layers_26_mlp_gate_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_26_mlp_gate_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..6d904581139e6766f504d30cb74e5c124c6e3a47 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_26_mlp_gate_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f54e789ca818fdd5b21798dc183dc4f87e64fb6ed79308636c7c88b68137ff18 +size 6881280 diff --git a/deepseek-r1-1.5b-unary4/model_layers_26_mlp_up_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_26_mlp_up_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..2405fb909639a87f03a8c42137efd135508abe4a --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_26_mlp_up_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd9461c80697b88fbab7366081bf976875bdc01e1e28a4696fa66302c031ba7c +size 6881280 diff --git a/deepseek-r1-1.5b-unary4/model_layers_26_mlp_up_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_26_mlp_up_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..426c573d753358b912a19ce138c988544250d6eb --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_26_mlp_up_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92e1883be31668ba4dea2a96bb00286e7812c2309c109fefc110faf0d2ac552d +size 1720320 diff --git a/deepseek-r1-1.5b-unary4/model_layers_26_self_attn_k_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_26_self_attn_k_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..32a349aa28770628b71714ec71b121f4c09f0c9e --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_26_self_attn_k_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5630e94dff4dc0735ea110949ff69247ffcc06d9db22a336eb76c7cc270c6755 +size 196608 diff --git a/deepseek-r1-1.5b-unary4/model_layers_26_self_attn_v_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_26_self_attn_v_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..a0cc2d8f129ec44d37cde1247a6f8559a5855983 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_26_self_attn_v_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af48ab912d635a24d9d936c80c234ce23690a5809a5cdae9f73d625f1de0db5a +size 196608 diff --git a/deepseek-r1-1.5b-unary4/model_layers_27_mlp_down_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_27_mlp_down_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..f253155abcfa6a862513e3df89a18621984ec02a --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_27_mlp_down_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e57a483695ee7ef2a54fbe25d012e1543f58baa9bf4cd4da4415f58118a02dc +size 6881280 diff --git a/deepseek-r1-1.5b-unary4/model_layers_27_mlp_gate_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_27_mlp_gate_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..656446e6e32aa05bd912731ddbd3dc3e5acb3644 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_27_mlp_gate_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b62e97a66895a842e89cc1253b6112b25fc1fc1d60e244e46d11078c67e5074 +size 6881280 diff --git a/deepseek-r1-1.5b-unary4/model_layers_27_self_attn_o_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_27_self_attn_o_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..c52f1eb64000b65778e3c168cbe9fb3f0dad6a0b --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_27_self_attn_o_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bd1cfb7f16776c3f4fa43a8ca87826059ab82022e3fae154177d351ba60a7aa +size 1179648 diff --git a/deepseek-r1-1.5b-unary4/model_layers_27_self_attn_o_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_27_self_attn_o_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..487e8608df5e531d5f29a9e76f665be4e92d1dec --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_27_self_attn_o_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f989aaae9b5c070f1c5236956b2a7a0c0821b0355b6556e2cf0dcc9f879a6117 +size 294912 diff --git a/deepseek-r1-1.5b-unary4/model_layers_27_self_attn_q_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_27_self_attn_q_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..64ab74564c53d85d96e43fa5097e0f8e3c61fc0a --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_27_self_attn_q_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b8b0014cbcfbaf7cb3eb956b4d7e17a2e92882759775dd6c7b4229fde725b8a +size 1179648 diff --git a/deepseek-r1-1.5b-unary4/model_layers_2_mlp_down_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_2_mlp_down_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..9d4c24212fb1458e2cb29fe61010d94d8e88de28 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_2_mlp_down_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4910406d2aff7bd205f58fcf484f9abd4b7efb7bb2a5740d5afc071e55cd2711 +size 1720320 diff --git a/deepseek-r1-1.5b-unary4/model_layers_2_mlp_gate_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_2_mlp_gate_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..f64db1016fda99375b29d73a582d0c68dd87f27d --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_2_mlp_gate_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1ad59ea3824607dc9e43bff8520d426ea05aed8b0fc805fc0db78d4c3fb136c +size 6881280 diff --git a/deepseek-r1-1.5b-unary4/model_layers_2_mlp_gate_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_2_mlp_gate_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..6d18284006e739ef206049225cc91908ce5c340e --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_2_mlp_gate_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6acd236a009c2c646537035d07ed56d1f1a7e4bfd8e9b83a2e6c6ecf7c531bc8 +size 1720320 diff --git a/deepseek-r1-1.5b-unary4/model_layers_2_self_attn_k_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_2_self_attn_k_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..f6b0efc1f665166bc291afa7428486962bc25bf9 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_2_self_attn_k_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f18ab3f5ec39d694360a2d4d8ab04b87ed627eec5c8106e410b646c7e54ce0f2 +size 196608 diff --git a/deepseek-r1-1.5b-unary4/model_layers_2_self_attn_q_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_2_self_attn_q_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..b1149a040f4608797494122c589687e6b82c302c --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_2_self_attn_q_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c94a8af543524fc2d9d45ab7151b7c7f3dd90a4ec01a55fcf8b76ec0fb5b5a5 +size 1179648 diff --git a/deepseek-r1-1.5b-unary4/model_layers_2_self_attn_v_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_2_self_attn_v_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..52e922f3080b72069141255de73cc9d0e12aee8a --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_2_self_attn_v_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57341d0892f69622a981437f3b4b369b161ba0b89313a6f7a6594e9bb69b7e54 +size 196608 diff --git a/deepseek-r1-1.5b-unary4/model_layers_3_mlp_down_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_3_mlp_down_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..d355acc9b8fb5d0881e6fc107d99dd57fafa4479 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_3_mlp_down_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa1d0309fa2fb63c3bfb39366553febdc29122a41f2d64b24af8f4e3ec45f1d0 +size 6881280 diff --git a/deepseek-r1-1.5b-unary4/model_layers_3_mlp_up_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_3_mlp_up_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..7ac8274054aee3e444fc610882d2910adc1a60d6 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_3_mlp_up_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd3b8fb93a12a2e9411ec693f5e761da80b09734ca758f2ddd06cb83365677bb +size 1720320 diff --git a/deepseek-r1-1.5b-unary4/model_layers_3_self_attn_k_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_3_self_attn_k_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..1027440f4b5a223a6414b3502631501acaece820 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_3_self_attn_k_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61210f097448039ecc43b0db4882c87565ed5f5e099493253b6bd6432b0116b4 +size 196608 diff --git a/deepseek-r1-1.5b-unary4/model_layers_4_mlp_down_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_4_mlp_down_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..0e739836902e3c4990564f64891377a766072ae9 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_4_mlp_down_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acc34d110524c3c49e75dcd699faae93ac1851bd275c06e68ca26f519441a0f1 +size 6881280 diff --git a/deepseek-r1-1.5b-unary4/model_layers_4_mlp_down_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_4_mlp_down_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..6add3d38b2d1d9a1e8a164f079cf0c1ef62ac239 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_4_mlp_down_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:082dc967b006eda7fdf89cc81ef5d4fdd5048139757404489f58ce0a12f29b36 +size 1720320 diff --git a/deepseek-r1-1.5b-unary4/model_layers_4_mlp_gate_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_4_mlp_gate_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..3af0b17afa4cc24021014c7ff42fbebeee037071 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_4_mlp_gate_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25957c37b037784e3f9a14c3daf0867c44071e33e334906dec1aa685ef85e1f1 +size 6881280 diff --git a/deepseek-r1-1.5b-unary4/model_layers_4_mlp_up_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_4_mlp_up_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..5df012b5d24d77c14cd3ba580b1aa24c6346406e --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_4_mlp_up_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5504f389d7257777f1fcc688c1626c90d960a9eb59612f3a6877ec2da722cb8 +size 1720320 diff --git a/deepseek-r1-1.5b-unary4/model_layers_4_self_attn_k_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_4_self_attn_k_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..adabdd1c4f8bdb0064685f8d4522a3b0c9feb7bd --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_4_self_attn_k_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be655bd7503d4742366a2e39fe7e038679a2b4b303a0bd7bd3d9e81eb1a9e347 +size 196608 diff --git a/deepseek-r1-1.5b-unary4/model_layers_4_self_attn_o_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_4_self_attn_o_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..590ab442633f188dcd6d404689e77aa64d18d88a --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_4_self_attn_o_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e19c6b8b0ae5aa69d9cb6b05db7a21daef7e6738961f1b34c2313f49606caa41 +size 294912 diff --git a/deepseek-r1-1.5b-unary4/model_layers_4_self_attn_q_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_4_self_attn_q_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..2b78727150e33bbeadd5cf4033d99ecdba03bc80 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_4_self_attn_q_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6772860836f452f1220aba03974650463bc389b693c7c48c7ffff26bf12ef19 +size 1179648 diff --git a/deepseek-r1-1.5b-unary4/model_layers_4_self_attn_q_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_4_self_attn_q_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..b6d315417c3d50166d4b2fb277f44adaaee4f719 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_4_self_attn_q_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:480a23f93eb763fe243109aff55a64ab2e83432703a30608c4cc738f638604a2 +size 294912 diff --git a/deepseek-r1-1.5b-unary4/model_layers_5_mlp_down_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_5_mlp_down_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..d4f11fd2cb034cd562044d6dfc4cd58339429c0c --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_5_mlp_down_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c45fbbe8647edf44b099797b2180f6358914ae23627fb690fcbb0d4e0073dc16 +size 1720320 diff --git a/deepseek-r1-1.5b-unary4/model_layers_5_mlp_gate_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_5_mlp_gate_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..5b737a8d28c18cbcc1b29da860cdea1929072482 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_5_mlp_gate_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a68e7336430778e9733edc4b4ad39c93bc9cd99b62f04a7ec57c10996cf289d +size 1720320 diff --git a/deepseek-r1-1.5b-unary4/model_layers_5_mlp_up_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_5_mlp_up_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..837f7f0e30fc6462d67101f64bcfc27a17acf323 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_5_mlp_up_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2794c03d5b53c95e9ea1b9be220d786e81dd151d886e9d1217089fc67b5ae5d +size 6881280 diff --git a/deepseek-r1-1.5b-unary4/model_layers_5_mlp_up_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_5_mlp_up_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..2b83049a6bcf2bef96c73f21549f241c75c5b80a --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_5_mlp_up_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c20790dbd8bffbb8a189bd894d7d9810d4e3a790466265c64ff8215eaf05cf7 +size 1720320 diff --git a/deepseek-r1-1.5b-unary4/model_layers_5_self_attn_o_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_5_self_attn_o_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..b0c1f899837fbb5e16f02ffb6b493e9af9cfb0bc --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_5_self_attn_o_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aaf007e93f9e2226d1a782350d7550a3652452775b55a5b8429f1bf1b4cc9f4f +size 1179648 diff --git a/deepseek-r1-1.5b-unary4/model_layers_5_self_attn_q_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_5_self_attn_q_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..1d48a0de7d9fed4927c02c5fcee14b6c97ac973a --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_5_self_attn_q_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7733f53c7de34f482c3981b14a78b153dad482a9aa7f0bd4242faedcd450b68d +size 1179648 diff --git a/deepseek-r1-1.5b-unary4/model_layers_6_mlp_down_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_6_mlp_down_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..0754e6a06c9501de214ada3754502e280a97886c --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_6_mlp_down_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:842af1be567f6205afde16eabcd079cd6c9785107f8fb61c564603a41c4f9036 +size 6881280 diff --git a/deepseek-r1-1.5b-unary4/model_layers_6_mlp_down_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_6_mlp_down_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..7f6535489ceff0c44c943fe6782be067f31729ad --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_6_mlp_down_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:946ab4cebd19429bf274a62cfe4903c99103ffe90478a708f97c589a71e5b102 +size 1720320 diff --git a/deepseek-r1-1.5b-unary4/model_layers_6_mlp_gate_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_6_mlp_gate_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..9cdee53a2f1714ba63faf4d5d106c486b0b05943 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_6_mlp_gate_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1940e1c5c0ec03694254094fffaa692ef5927a3f3e47872358d279dc42d539b9 +size 1720320 diff --git a/deepseek-r1-1.5b-unary4/model_layers_6_self_attn_o_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_6_self_attn_o_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..07e7c7a73ba6bcc7adecb137b602ac7fd2210595 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_6_self_attn_o_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7a9f590a35b581691627817b2942cd6eae33f8341c64adafef9beacf9c33f98 +size 1179648 diff --git a/deepseek-r1-1.5b-unary4/model_layers_6_self_attn_q_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_6_self_attn_q_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..dd0f784b73d04762f550a4cfee4abb8d767eb103 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_6_self_attn_q_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57d01e4ee171edbf32521fb4fb4600773ab6652c60971be5f497e97e41f7e911 +size 294912 diff --git a/deepseek-r1-1.5b-unary4/model_layers_7_mlp_down_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_7_mlp_down_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..7421d50cd3ec51e0d2d5226027f6be118ea9bf40 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_7_mlp_down_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6e2d6a32eee7723183cd0ec5db57e944ca013f74bd3f793bbde4b155d4b5267 +size 6881280 diff --git a/deepseek-r1-1.5b-unary4/model_layers_7_mlp_gate_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_7_mlp_gate_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..8747604dc9c7145608711af5295c9f506987dd19 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_7_mlp_gate_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74bdd5718e7934dc74600e39dcdd7fed829c7070b14b9ea4fc551362340ea040 +size 6881280 diff --git a/deepseek-r1-1.5b-unary4/model_layers_7_self_attn_o_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_7_self_attn_o_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..1aae40d0cdbe62563b4c8b6f00647be0b40d86af --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_7_self_attn_o_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ba504a45b037adaa0c85e4690e695cb1f5b8c1733736967f9ec1de592db3519 +size 1179648 diff --git a/deepseek-r1-1.5b-unary4/model_layers_7_self_attn_q_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_7_self_attn_q_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..655e5409900fa1913895d7bd21450d8f1756b2c3 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_7_self_attn_q_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a8b52265338818b83fdb5e2be4b1b9bee3712c537b58b483b08b5432e26faa9 +size 1179648 diff --git a/deepseek-r1-1.5b-unary4/model_layers_8_mlp_down_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_8_mlp_down_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..5d5022b9a01f5bcae7fe698e6ca2909f00650dbb --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_8_mlp_down_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6f24414e4afc9611a2f38800fa26723f5c774be244dac3330acefc141cba18 +size 1720320 diff --git a/deepseek-r1-1.5b-unary4/model_layers_8_mlp_gate_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_8_mlp_gate_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..e72c99e062552317ee04f9c516668be77999740d --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_8_mlp_gate_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a06fb19530d48fb5833ec36a6d95adb8aabaec4d36d69dee39d9626016c1bc4 +size 6881280 diff --git a/deepseek-r1-1.5b-unary4/model_layers_8_mlp_gate_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_8_mlp_gate_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..1340a1447f132fca9c0a97810416590739d77829 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_8_mlp_gate_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b147df9cdc62e23e654a6d42c51e2c49222a600a0e1755db94b087bd17c3f00a +size 1720320 diff --git a/deepseek-r1-1.5b-unary4/model_layers_8_mlp_up_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_8_mlp_up_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..488cd4b04f5025b17af0da67fbaa95314248cbbf --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_8_mlp_up_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf7331f544dcb979ffed98ebd6aa85fa6d23ac8418aca7ddb071488af55427bb +size 6881280 diff --git a/deepseek-r1-1.5b-unary4/model_layers_8_self_attn_o_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_8_self_attn_o_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..380f74aa09648185dfc441e8f80aaed19098a32b --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_8_self_attn_o_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7279ec5ff3ecd8c1901dbd95b2945e6c3e0cdc02fb77abecb494ddc9dc220747 +size 1179648 diff --git a/deepseek-r1-1.5b-unary4/model_layers_8_self_attn_v_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_8_self_attn_v_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..a616a4e561fdcaa0db512af7695a0b25ebd76774 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_8_self_attn_v_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bdc5e4cb4528a61b79af8aa0bcfd63b1b893b41892a24c3b58eac85fcc5e88c +size 196608 diff --git a/deepseek-r1-1.5b-unary4/model_layers_9_mlp_down_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_9_mlp_down_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..8ee4ee400036ceeb2424a22029a7b85384701bde --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_9_mlp_down_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:183f4dc7071f39c281ae75ebc315a3d78341fd143e2af80c38726e17df818d42 +size 1720320 diff --git a/deepseek-r1-1.5b-unary4/model_layers_9_mlp_gate_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_9_mlp_gate_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..207a31b3d1430bede8527fad2e80205be6b8fef6 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_9_mlp_gate_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c6f9eb1d40551d5493bfba795a373cdddfa758a577faac3b98d91b7a39d3c16 +size 6881280 diff --git a/deepseek-r1-1.5b-unary4/model_layers_9_mlp_gate_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_9_mlp_gate_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..0ed617cb33adfad64ba79a154492eb4216bc6010 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_9_mlp_gate_proj_weight.sign @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18c32a9787b099b2afbd50f74d79759336805cffc85ec9ff2137042c5e98c88e +size 1720320 diff --git a/deepseek-r1-1.5b-unary4/model_layers_9_self_attn_o_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_9_self_attn_o_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..81124911afc358063c30d5707f5afa994e199e66 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_9_self_attn_o_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee8ab1b9f0e51b0f61ccaf818037a7ec75b49d3f7c4104380359dc72d0628279 +size 1179648 diff --git a/deepseek-r1-1.5b-unary4/model_layers_9_self_attn_v_proj_weight.planes b/deepseek-r1-1.5b-unary4/model_layers_9_self_attn_v_proj_weight.planes new file mode 100644 index 0000000000000000000000000000000000000000..d175a1cfd9491d000a65bf4f42fad083265e4af5 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/model_layers_9_self_attn_v_proj_weight.planes @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f677d029bd98318da48b612936891e24e2bb216b76e0ef24651a003d7ecb23b +size 196608 diff --git a/qwen3-4b-log-unary/manifest.json b/qwen3-4b-log-unary/manifest.json new file mode 100644 index 0000000000000000000000000000000000000000..7799dac24c46e0ef61a5c7242455c2f52338a709 --- /dev/null +++ b/qwen3-4b-log-unary/manifest.json @@ -0,0 +1,1486 @@ +{ + "unary": { + "model.layers.0.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.0.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.0.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.0.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.0.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.0.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.0.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.1.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.1.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.1.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.1.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.1.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.1.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.1.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.10.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.10.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.10.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.10.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.10.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.10.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.10.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.11.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.11.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.11.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.11.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.11.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.11.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.11.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.12.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.12.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.12.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.12.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.12.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.12.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.12.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.13.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.13.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.13.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.13.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.13.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.13.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.13.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.14.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.14.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.14.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.14.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.14.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.14.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.14.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.15.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.15.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.15.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.15.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.15.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.15.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.2.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.2.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.2.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.2.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.2.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.2.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.2.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.3.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.3.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.3.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.3.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.3.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.3.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.3.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.4.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.4.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.4.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.4.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.4.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.4.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.4.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.5.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.5.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.5.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.5.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.5.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.5.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.5.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.6.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.6.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.6.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.6.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.6.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.6.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.6.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.7.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.7.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.7.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.7.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.7.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.7.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.7.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.8.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.8.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.8.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.8.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.8.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.8.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.8.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.9.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.9.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.9.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.9.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.9.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.9.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.9.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.15.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.16.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.16.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.16.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.16.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.16.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.16.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.16.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.17.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.17.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.17.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.17.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.17.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.17.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.17.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.18.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.18.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.18.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.18.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.18.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.18.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.18.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.19.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.19.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.19.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.19.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.19.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.19.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.19.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.20.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.20.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.20.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.20.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.20.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.20.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.20.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.21.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.21.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.21.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.21.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.21.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.21.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.21.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.22.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.22.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.22.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.22.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.22.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.22.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.22.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.23.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.23.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.23.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.23.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.23.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.23.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.23.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.24.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.24.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.24.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.24.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.24.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.24.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.24.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.25.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.25.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.25.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.25.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.25.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.25.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.25.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.26.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.26.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.26.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.26.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.26.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.26.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.26.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.27.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.27.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.27.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.27.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.27.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.27.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.27.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.28.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.28.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.28.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.28.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.28.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.28.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.28.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.29.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.29.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.29.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.29.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.29.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.29.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.29.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.30.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.30.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.30.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.30.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.30.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.30.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.30.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.31.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.31.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.31.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.31.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.31.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.31.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.31.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.32.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.32.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.32.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.32.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.32.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.32.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.32.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.33.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.33.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.33.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.33.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.33.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.33.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.33.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.34.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.34.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.34.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.34.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.34.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.34.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.34.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.35.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.35.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.35.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.35.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.35.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.35.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.35.mlp.up_proj.weight": [ + 9728, + 2560 + ] + }, + "fp16": { + "model.embed_tokens.weight": [ + 151936, + 2560 + ], + "model.layers.0.input_layernorm.weight": [ + 2560 + ], + "model.layers.0.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.0.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.0.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.1.input_layernorm.weight": [ + 2560 + ], + "model.layers.1.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.1.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.1.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.10.input_layernorm.weight": [ + 2560 + ], + "model.layers.10.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.10.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.10.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.11.input_layernorm.weight": [ + 2560 + ], + "model.layers.11.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.11.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.11.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.12.input_layernorm.weight": [ + 2560 + ], + "model.layers.12.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.12.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.12.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.13.input_layernorm.weight": [ + 2560 + ], + "model.layers.13.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.13.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.13.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.14.input_layernorm.weight": [ + 2560 + ], + "model.layers.14.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.14.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.14.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.15.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.15.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.2.input_layernorm.weight": [ + 2560 + ], + "model.layers.2.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.2.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.2.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.3.input_layernorm.weight": [ + 2560 + ], + "model.layers.3.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.3.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.3.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.4.input_layernorm.weight": [ + 2560 + ], + "model.layers.4.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.4.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.4.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.5.input_layernorm.weight": [ + 2560 + ], + "model.layers.5.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.5.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.5.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.6.input_layernorm.weight": [ + 2560 + ], + "model.layers.6.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.6.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.6.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.7.input_layernorm.weight": [ + 2560 + ], + "model.layers.7.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.7.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.7.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.8.input_layernorm.weight": [ + 2560 + ], + "model.layers.8.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.8.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.8.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.9.input_layernorm.weight": [ + 2560 + ], + "model.layers.9.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.9.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.9.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.15.input_layernorm.weight": [ + 2560 + ], + "model.layers.15.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.16.input_layernorm.weight": [ + 2560 + ], + "model.layers.16.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.16.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.16.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.17.input_layernorm.weight": [ + 2560 + ], + "model.layers.17.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.17.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.17.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.18.input_layernorm.weight": [ + 2560 + ], + "model.layers.18.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.18.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.18.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.19.input_layernorm.weight": [ + 2560 + ], + "model.layers.19.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.19.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.19.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.20.input_layernorm.weight": [ + 2560 + ], + "model.layers.20.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.20.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.20.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.21.input_layernorm.weight": [ + 2560 + ], + "model.layers.21.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.21.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.21.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.22.input_layernorm.weight": [ + 2560 + ], + "model.layers.22.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.22.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.22.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.23.input_layernorm.weight": [ + 2560 + ], + "model.layers.23.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.23.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.23.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.24.input_layernorm.weight": [ + 2560 + ], + "model.layers.24.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.24.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.24.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.25.input_layernorm.weight": [ + 2560 + ], + "model.layers.25.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.25.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.25.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.26.input_layernorm.weight": [ + 2560 + ], + "model.layers.26.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.26.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.26.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.27.input_layernorm.weight": [ + 2560 + ], + "model.layers.27.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.27.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.27.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.28.input_layernorm.weight": [ + 2560 + ], + "model.layers.28.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.28.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.28.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.29.input_layernorm.weight": [ + 2560 + ], + "model.layers.29.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.29.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.29.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.30.input_layernorm.weight": [ + 2560 + ], + "model.layers.30.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.30.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.30.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.31.input_layernorm.weight": [ + 2560 + ], + "model.layers.31.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.31.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.31.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.32.input_layernorm.weight": [ + 2560 + ], + "model.layers.32.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.32.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.32.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.33.input_layernorm.weight": [ + 2560 + ], + "model.layers.33.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.33.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.33.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.34.input_layernorm.weight": [ + 2560 + ], + "model.layers.34.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.34.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.34.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.35.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.35.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.35.input_layernorm.weight": [ + 2560 + ], + "model.layers.35.post_attention_layernorm.weight": [ + 2560 + ], + "model.norm.weight": [ + 2560 + ] + }, + "n_planes": 4, + "n_layers": 36, + "encoding": "log_unary", + "config": { + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 151643, + "eos_token_id": 151645, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "max_position_embeddings": 262144, + "max_window_layers": 36, + "model_type": "qwen3", + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 5000000, + "sliding_window": null, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.51.0", + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 + } +} \ No newline at end of file diff --git a/qwen3-4b-log-unary/model_layers_0_input_layernorm_weight.fp16 b/qwen3-4b-log-unary/model_layers_0_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..7d6a6d361decbfdbfad4cb43a425cca8a93133a7 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_0_input_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_0_mlp_down_proj_weight.scales b/qwen3-4b-log-unary/model_layers_0_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..07c6510d8b70cc878df0ea3eb8074894c461cee0 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_0_mlp_down_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_0_mlp_gate_proj_weight.scales b/qwen3-4b-log-unary/model_layers_0_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..1fea5786832c937d5545490f1eac6531bda9daca Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_0_mlp_gate_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_0_mlp_up_proj_weight.scales b/qwen3-4b-log-unary/model_layers_0_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..cbdac684d76fdeea954871811038fd6ba065bd94 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_0_mlp_up_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_0_post_attention_layernorm_weight.fp16 b/qwen3-4b-log-unary/model_layers_0_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..ddf2325d89c2a0c2969bef8d0733a097cd8ddd92 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_0_post_attention_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_0_self_attn_k_proj_weight.scales b/qwen3-4b-log-unary/model_layers_0_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..4d00d29cc66b13f42c01300d45a544d6370be781 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_0_self_attn_k_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_0_self_attn_v_proj_weight.scales b/qwen3-4b-log-unary/model_layers_0_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..0f3f0185f1747efd8160c7efd0bf693d7cfcfa9e Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_0_self_attn_v_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_10_mlp_gate_proj_weight.scales b/qwen3-4b-log-unary/model_layers_10_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..26c86c34b8d25ee220c434b512c27fdafdbc3dd0 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_10_mlp_gate_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_10_post_attention_layernorm_weight.fp16 b/qwen3-4b-log-unary/model_layers_10_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..02758bc568552a8c6ec08aa9dc432d6711a79e35 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_10_post_attention_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_10_self_attn_o_proj_weight.scales b/qwen3-4b-log-unary/model_layers_10_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..49e9df6d3cdcc99b85b7f9119218f4fa6116770f Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_10_self_attn_o_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_10_self_attn_v_proj_weight.scales b/qwen3-4b-log-unary/model_layers_10_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..fdd8e177c5a3146609dea0cf253a42aa647f0345 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_10_self_attn_v_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_11_mlp_down_proj_weight.scales b/qwen3-4b-log-unary/model_layers_11_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..f73e4854bc1e99b7b322e9eb0a28634301f6279e Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_11_mlp_down_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_11_mlp_gate_proj_weight.scales b/qwen3-4b-log-unary/model_layers_11_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..9a8d2fc8d1677186637834ecd2feea525ca5e093 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_11_mlp_gate_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_11_mlp_up_proj_weight.scales b/qwen3-4b-log-unary/model_layers_11_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..8b6fe113eaf570e9394c551d35b58abb5279bd23 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_11_mlp_up_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_11_post_attention_layernorm_weight.fp16 b/qwen3-4b-log-unary/model_layers_11_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..dbf915a35ef7d2e1bd2a93fcf15751d4343f01f6 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_11_post_attention_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_11_self_attn_k_norm_weight.fp16 b/qwen3-4b-log-unary/model_layers_11_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..543283bf4e4b671bc550d9dc89312b71c4f26212 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_11_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_11_self_attn_k_proj_weight.scales b/qwen3-4b-log-unary/model_layers_11_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..ad3a6bee2f38e060138cdd27efc5f7227a85eb2c Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_11_self_attn_k_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_11_self_attn_o_proj_weight.scales b/qwen3-4b-log-unary/model_layers_11_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..eb8c8007afe24438d73d34e5bd7f36bc47ddbf0c Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_11_self_attn_o_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_11_self_attn_q_norm_weight.fp16 b/qwen3-4b-log-unary/model_layers_11_self_attn_q_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..c4693d2251b5ca4d237704c45d22f5a620ec1e48 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_11_self_attn_q_norm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_11_self_attn_q_proj_weight.scales b/qwen3-4b-log-unary/model_layers_11_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..467799d041f904193423ae90b067fefaa2c87d0c Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_11_self_attn_q_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_12_mlp_down_proj_weight.scales b/qwen3-4b-log-unary/model_layers_12_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..4e598e8c6795e9fc76ce4038a8c8ed846e0bd8fc Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_12_mlp_down_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_12_post_attention_layernorm_weight.fp16 b/qwen3-4b-log-unary/model_layers_12_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..c72775782c008188e316cf30a6b6fdd14c1f5b5f Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_12_post_attention_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_12_self_attn_k_norm_weight.fp16 b/qwen3-4b-log-unary/model_layers_12_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..0883ab99614ba954f1fb5bc15b471437c5f1891e Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_12_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_12_self_attn_q_norm_weight.fp16 b/qwen3-4b-log-unary/model_layers_12_self_attn_q_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..504ad300474a7a2ec467cabc7699bdd8e6f8e8e3 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_12_self_attn_q_norm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_12_self_attn_q_proj_weight.scales b/qwen3-4b-log-unary/model_layers_12_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..6ae7b44064a2d75915764b846116e35e55dc6d27 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_12_self_attn_q_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_12_self_attn_v_proj_weight.scales b/qwen3-4b-log-unary/model_layers_12_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..e7378cfc7b3e9a247e43fd223d9d8e1de082e7e3 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_12_self_attn_v_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_13_mlp_down_proj_weight.scales b/qwen3-4b-log-unary/model_layers_13_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..beb37eff4467a0b11e54393fb9910a918ea0be11 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_13_mlp_down_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_13_mlp_gate_proj_weight.scales b/qwen3-4b-log-unary/model_layers_13_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..59cc18e033ba99e9a92e8e9fdf061502ced34b2d Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_13_mlp_gate_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_13_post_attention_layernorm_weight.fp16 b/qwen3-4b-log-unary/model_layers_13_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..a570d2e3c4bfa60d0a58d8fec2e2608a1983dae1 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_13_post_attention_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_13_self_attn_o_proj_weight.scales b/qwen3-4b-log-unary/model_layers_13_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..7fd9a51c4239df92bbc31390adec0caddb7bcc54 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_13_self_attn_o_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_13_self_attn_q_proj_weight.scales b/qwen3-4b-log-unary/model_layers_13_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..0a4d39af86b88f606943fbebedb9a86207cda8d2 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_13_self_attn_q_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_13_self_attn_v_proj_weight.scales b/qwen3-4b-log-unary/model_layers_13_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..7702d475facf2cfc79844d7ffbedd067d3b995d1 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_13_self_attn_v_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_14_input_layernorm_weight.fp16 b/qwen3-4b-log-unary/model_layers_14_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..11da36bdf7e6d2f8df7ab1c9445f5190a41c11cf Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_14_input_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_14_mlp_down_proj_weight.scales b/qwen3-4b-log-unary/model_layers_14_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..1f3f541132e522744f2d0c838a0c049241ce02d5 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_14_mlp_down_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_14_mlp_up_proj_weight.scales b/qwen3-4b-log-unary/model_layers_14_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..f5a5b9226d4ec8d6a65ce0c448d4c547534b37f7 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_14_mlp_up_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_14_post_attention_layernorm_weight.fp16 b/qwen3-4b-log-unary/model_layers_14_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..b081241e31201fc433b5d2d1af7ceb760ded4ac1 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_14_post_attention_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_14_self_attn_k_norm_weight.fp16 b/qwen3-4b-log-unary/model_layers_14_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..a236be9659b02635488cb8616abd215f61833a76 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_14_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_14_self_attn_k_proj_weight.scales b/qwen3-4b-log-unary/model_layers_14_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..2fffffef45b17a70cebaf5f0ff390a75892c1378 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_14_self_attn_k_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_14_self_attn_o_proj_weight.scales b/qwen3-4b-log-unary/model_layers_14_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..1bf0cc5f99daa76be3392ff19bf1fddfa4ffee87 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_14_self_attn_o_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_14_self_attn_q_proj_weight.scales b/qwen3-4b-log-unary/model_layers_14_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..935b826adfbaebd335adedee6aa43f4f92254cfc Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_14_self_attn_q_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_15_input_layernorm_weight.fp16 b/qwen3-4b-log-unary/model_layers_15_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..1ccef3e33c55d557c20b1aab4784a71dd418cd3a Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_15_input_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_15_mlp_down_proj_weight.scales b/qwen3-4b-log-unary/model_layers_15_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..8995b2d2fb7e37d6cf8f4f6470bd4a21a8aa0528 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_15_mlp_down_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_15_post_attention_layernorm_weight.fp16 b/qwen3-4b-log-unary/model_layers_15_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..e3b334b95d9efcb5153e0b6413ad7f09c1dc9ca7 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_15_post_attention_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_15_self_attn_k_norm_weight.fp16 b/qwen3-4b-log-unary/model_layers_15_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..766c14e53d76639c9ffb39ca01a9cd72364ea5c9 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_15_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_15_self_attn_k_proj_weight.scales b/qwen3-4b-log-unary/model_layers_15_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..a60423a45cf58e938194dc4f568ebd2bd79949b2 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_15_self_attn_k_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_15_self_attn_q_norm_weight.fp16 b/qwen3-4b-log-unary/model_layers_15_self_attn_q_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..724ad34147a4055d8b0ca3ccae35af69e72fa596 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_15_self_attn_q_norm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_16_input_layernorm_weight.fp16 b/qwen3-4b-log-unary/model_layers_16_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..c70b74f08ffeefef3a5a3fa00b106cc036989562 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_16_input_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_16_mlp_up_proj_weight.scales b/qwen3-4b-log-unary/model_layers_16_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..573a010455e2f67be22a112a4eec168f66d67615 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_16_mlp_up_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_16_post_attention_layernorm_weight.fp16 b/qwen3-4b-log-unary/model_layers_16_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..b469747ae9b8a06bd2b2144248fadcef1abf4712 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_16_post_attention_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_16_self_attn_k_proj_weight.scales b/qwen3-4b-log-unary/model_layers_16_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..352d1bd399185c977c572312cc65e1aecaf74066 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_16_self_attn_k_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_16_self_attn_q_norm_weight.fp16 b/qwen3-4b-log-unary/model_layers_16_self_attn_q_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..f05561e2e62859d73001db0a12f877fc48712afa Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_16_self_attn_q_norm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_16_self_attn_v_proj_weight.scales b/qwen3-4b-log-unary/model_layers_16_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..67d7366dbfc165df30344bdcd31758cf01ffe096 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_16_self_attn_v_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_17_mlp_down_proj_weight.scales b/qwen3-4b-log-unary/model_layers_17_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..b9f4c5d9925905870e3f090bf435289836ea1ba4 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_17_mlp_down_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_17_self_attn_k_norm_weight.fp16 b/qwen3-4b-log-unary/model_layers_17_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..ac1b61e002ca09295b263727c5972b496509b4fa Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_17_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_17_self_attn_o_proj_weight.scales b/qwen3-4b-log-unary/model_layers_17_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..6224764e3ebfb0a63e9da10f64554f2c24304eb8 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_17_self_attn_o_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_17_self_attn_q_norm_weight.fp16 b/qwen3-4b-log-unary/model_layers_17_self_attn_q_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..42ab2ab5978bc6d932b93d44f49bb5dbcbf7c719 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_17_self_attn_q_norm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_17_self_attn_q_proj_weight.scales b/qwen3-4b-log-unary/model_layers_17_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..bca8aeaad5d56dc0cc7cb633dfa73eba924f784e Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_17_self_attn_q_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_18_mlp_gate_proj_weight.scales b/qwen3-4b-log-unary/model_layers_18_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..951ebfb2ee4ec4a992496619f18e5bafa55d2649 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_18_mlp_gate_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_18_post_attention_layernorm_weight.fp16 b/qwen3-4b-log-unary/model_layers_18_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..d3896479d9b1f8a80366bb11ba7c7595ef41c974 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_18_post_attention_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_18_self_attn_k_norm_weight.fp16 b/qwen3-4b-log-unary/model_layers_18_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..24f4e07cb36a8a3cd989a1e1fa4cf7fc18d5d58b Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_18_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_18_self_attn_q_norm_weight.fp16 b/qwen3-4b-log-unary/model_layers_18_self_attn_q_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..ee28b2b10a2c7272ca7bef05a72dfe6282306f9c Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_18_self_attn_q_norm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_18_self_attn_q_proj_weight.scales b/qwen3-4b-log-unary/model_layers_18_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..0e8808fe022ea95dd0ac9c7a36167a82898bd8f3 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_18_self_attn_q_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_18_self_attn_v_proj_weight.scales b/qwen3-4b-log-unary/model_layers_18_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..706ea604b6c2594f68654ece9785ba2ec1cc8df3 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_18_self_attn_v_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_19_input_layernorm_weight.fp16 b/qwen3-4b-log-unary/model_layers_19_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..a67441406b0a13a3b88da5a65454f45ccfbafb53 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_19_input_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_19_mlp_gate_proj_weight.scales b/qwen3-4b-log-unary/model_layers_19_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..ba06ff9f6da86358f5024ee2cdd27c2ec7d4b537 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_19_mlp_gate_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_19_mlp_up_proj_weight.scales b/qwen3-4b-log-unary/model_layers_19_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..7c3c0b70c0ddc4098b407718d6ebe71e660e867d Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_19_mlp_up_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_19_post_attention_layernorm_weight.fp16 b/qwen3-4b-log-unary/model_layers_19_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..48a0376387ce163419853ff017fec3d0851c3cd8 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_19_post_attention_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_19_self_attn_k_proj_weight.scales b/qwen3-4b-log-unary/model_layers_19_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..bec7c49db3ecca10f25c51f88fc9cb64cd4ba9a4 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_19_self_attn_k_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_19_self_attn_o_proj_weight.scales b/qwen3-4b-log-unary/model_layers_19_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..38ed5d47cabb6141a61ff5306df221963b384651 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_19_self_attn_o_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_19_self_attn_q_proj_weight.scales b/qwen3-4b-log-unary/model_layers_19_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..31600e7c68da3eef82b131fd1e87059b700966a8 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_19_self_attn_q_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_19_self_attn_v_proj_weight.scales b/qwen3-4b-log-unary/model_layers_19_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..a674e57b0d3783592d70799369343986792ab532 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_19_self_attn_v_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_1_input_layernorm_weight.fp16 b/qwen3-4b-log-unary/model_layers_1_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..6cd201175102c40b23a4ee170ebabb7e90a7671f Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_1_input_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_1_mlp_gate_proj_weight.scales b/qwen3-4b-log-unary/model_layers_1_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..3cfb6c82c2eb41e0827a5110b1aa76722b2ae73a Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_1_mlp_gate_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_1_mlp_up_proj_weight.scales b/qwen3-4b-log-unary/model_layers_1_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..ff46977406529a84c96ca35dc8b2fc98b9ecf17f Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_1_mlp_up_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_1_post_attention_layernorm_weight.fp16 b/qwen3-4b-log-unary/model_layers_1_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..696f1e824dcade6562adf0da8bf4dca4bec97fe6 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_1_post_attention_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_1_self_attn_o_proj_weight.scales b/qwen3-4b-log-unary/model_layers_1_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..51ec9e86fbe1df70b55e2a0e382df2ace928af42 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_1_self_attn_o_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_1_self_attn_q_norm_weight.fp16 b/qwen3-4b-log-unary/model_layers_1_self_attn_q_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..ba78fb8ed7135d6aaf3af1d7f6387d4e910ce335 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_1_self_attn_q_norm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_1_self_attn_q_proj_weight.scales b/qwen3-4b-log-unary/model_layers_1_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..6abeb1202736fbe5c828587783475164da8e814c Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_1_self_attn_q_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_1_self_attn_v_proj_weight.scales b/qwen3-4b-log-unary/model_layers_1_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..6aecd99a5a0114da6d9b4d9780754d1910ef0e08 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_1_self_attn_v_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_20_mlp_down_proj_weight.scales b/qwen3-4b-log-unary/model_layers_20_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..2aab9a9087b14f3934b1ef6f1e67f035204fab94 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_20_mlp_down_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_20_mlp_gate_proj_weight.scales b/qwen3-4b-log-unary/model_layers_20_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..8811c78176d8e1c264777f8ff59a772bff4a5400 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_20_mlp_gate_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_20_post_attention_layernorm_weight.fp16 b/qwen3-4b-log-unary/model_layers_20_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..3a08f67cbf67e5030ebf024e373aa57a0514dc8b Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_20_post_attention_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_20_self_attn_k_proj_weight.scales b/qwen3-4b-log-unary/model_layers_20_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..4c660e5dfc2c81a48706a8dea2d2a301258004e8 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_20_self_attn_k_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_20_self_attn_v_proj_weight.scales b/qwen3-4b-log-unary/model_layers_20_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..53ee52c055b5838e85d566cc3c60f05ca2a08f72 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_20_self_attn_v_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_21_mlp_down_proj_weight.scales b/qwen3-4b-log-unary/model_layers_21_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..ac4647868df95bf7531d43e3abf02caffe081323 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_21_mlp_down_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_21_mlp_gate_proj_weight.scales b/qwen3-4b-log-unary/model_layers_21_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..d1a27f09a13ee47144476a083a2ad791bc9f3bef Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_21_mlp_gate_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_21_self_attn_k_proj_weight.scales b/qwen3-4b-log-unary/model_layers_21_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..d607fc35e4aaea650d535309245624b2627bc701 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_21_self_attn_k_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_21_self_attn_q_norm_weight.fp16 b/qwen3-4b-log-unary/model_layers_21_self_attn_q_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..c7073f3a34e086f04bd24f34447b717ccd0eb0c5 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_21_self_attn_q_norm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_21_self_attn_v_proj_weight.scales b/qwen3-4b-log-unary/model_layers_21_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..f7c37448aa568f7d5072f87a41ae603361500068 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_21_self_attn_v_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_22_input_layernorm_weight.fp16 b/qwen3-4b-log-unary/model_layers_22_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..62961011519284ad7ef0ed01c6fd14d6b15b8d69 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_22_input_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_22_mlp_up_proj_weight.scales b/qwen3-4b-log-unary/model_layers_22_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..940369ff5256d57fd07983b88be9a19f0de0a5c8 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_22_mlp_up_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_22_post_attention_layernorm_weight.fp16 b/qwen3-4b-log-unary/model_layers_22_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..3605aac990e6a2b83c209737024d39b3544f0298 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_22_post_attention_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_22_self_attn_k_norm_weight.fp16 b/qwen3-4b-log-unary/model_layers_22_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..77ea4121ce19ebb37bf749bdb6c8ad56ad5a4a22 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_22_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_22_self_attn_o_proj_weight.scales b/qwen3-4b-log-unary/model_layers_22_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..348522603383999a2c6710946b0d688434a25f12 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_22_self_attn_o_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_22_self_attn_q_norm_weight.fp16 b/qwen3-4b-log-unary/model_layers_22_self_attn_q_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..b928f52858f39b005286bf4aebb42e827717e90b Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_22_self_attn_q_norm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_22_self_attn_v_proj_weight.scales b/qwen3-4b-log-unary/model_layers_22_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..754f5c3ce4c04b6aedb4c4b2ab7d9bd1770855b6 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_22_self_attn_v_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_23_mlp_down_proj_weight.scales b/qwen3-4b-log-unary/model_layers_23_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..685713efcd07f419a52762fc6176a4ea4916462c Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_23_mlp_down_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_23_mlp_up_proj_weight.scales b/qwen3-4b-log-unary/model_layers_23_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..58fcd3512614649e284cd0959a81005fe19cee9b Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_23_mlp_up_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_23_post_attention_layernorm_weight.fp16 b/qwen3-4b-log-unary/model_layers_23_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..edd1ee750c5b58691dffc8d61779d407e78ab34f Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_23_post_attention_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_23_self_attn_k_norm_weight.fp16 b/qwen3-4b-log-unary/model_layers_23_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..7fd7ed66bf2f9531dcc4d402dc0d29c127d00833 --- /dev/null +++ b/qwen3-4b-log-unary/model_layers_23_self_attn_k_norm_weight.fp16 @@ -0,0 +1 @@ +=>@=(=.?`??=:=8=:>=P:>>@h@>>h;X@@@@@?p>C@@X@@C@8@0@x@=P@X@ @?@`G@?BA@@@@@@X@@h@@ AA?2`9=)8>`+(=0=?(?0@P@(?X?h@@?@<(=(@H@x?(?>>?P@x@@5@@@@(@4h?@@0@@H@p@@@(@=x@?hA`@pABAx@@@`@`@@@@@ \ No newline at end of file diff --git a/qwen3-4b-log-unary/model_layers_23_self_attn_q_norm_weight.fp16 b/qwen3-4b-log-unary/model_layers_23_self_attn_q_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..9d075c188b9c0b923e8a0ee666b2d3a4ddd79dad Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_23_self_attn_q_norm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_23_self_attn_q_proj_weight.scales b/qwen3-4b-log-unary/model_layers_23_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..d4cebf0293c74d27eee9ea978a9f2b37f8ad6440 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_23_self_attn_q_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_23_self_attn_v_proj_weight.scales b/qwen3-4b-log-unary/model_layers_23_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..47c9cff36235dfca060addc5dee19b826b90816e Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_23_self_attn_v_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_24_mlp_down_proj_weight.scales b/qwen3-4b-log-unary/model_layers_24_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..99d2344b99c7d3ba3ba3ba3a6789dab7951b4c48 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_24_mlp_down_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_24_mlp_up_proj_weight.scales b/qwen3-4b-log-unary/model_layers_24_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..60c042c5b0862489b8a648b0de54c6c1a1ff0694 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_24_mlp_up_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_24_post_attention_layernorm_weight.fp16 b/qwen3-4b-log-unary/model_layers_24_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..0412a21ab149581679de6bc4fa643b8549f57bea Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_24_post_attention_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_24_self_attn_k_norm_weight.fp16 b/qwen3-4b-log-unary/model_layers_24_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..a79973a802bc625916935d47702ae0c767624b2c Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_24_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_24_self_attn_k_proj_weight.scales b/qwen3-4b-log-unary/model_layers_24_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..a336450282cc5cfefd6ed6c67ab49db2a9d3624f Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_24_self_attn_k_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_24_self_attn_o_proj_weight.scales b/qwen3-4b-log-unary/model_layers_24_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..a6bd6fd560b4dad44974a4bc5200d84742868aab Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_24_self_attn_o_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_24_self_attn_q_norm_weight.fp16 b/qwen3-4b-log-unary/model_layers_24_self_attn_q_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..f5f6c18bc501e4b673e235b19294a07b5fea6902 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_24_self_attn_q_norm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_24_self_attn_q_proj_weight.scales b/qwen3-4b-log-unary/model_layers_24_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..ed0d3acc153fb3f9f953b8aee47c362adab5ed73 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_24_self_attn_q_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_24_self_attn_v_proj_weight.scales b/qwen3-4b-log-unary/model_layers_24_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..02ebe2ff752a014b8760b672d18d62cfb98e6471 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_24_self_attn_v_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_25_input_layernorm_weight.fp16 b/qwen3-4b-log-unary/model_layers_25_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..c3b883c85234fbfda9408486c19136177f7a8179 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_25_input_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_25_mlp_down_proj_weight.scales b/qwen3-4b-log-unary/model_layers_25_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..c57919dd83fca240c30304f90782e07b2eff90ad Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_25_mlp_down_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_25_mlp_up_proj_weight.scales b/qwen3-4b-log-unary/model_layers_25_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..16901d61dbaecd8d6cd7e0a2e44e16326ffa6680 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_25_mlp_up_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_25_post_attention_layernorm_weight.fp16 b/qwen3-4b-log-unary/model_layers_25_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..463a7b6f7d23c0fa7b1df9b5fbd15808f90b6f69 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_25_post_attention_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_25_self_attn_k_norm_weight.fp16 b/qwen3-4b-log-unary/model_layers_25_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..a25d8735a35f2b0380b40819df02211332d6d368 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_25_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_25_self_attn_k_proj_weight.scales b/qwen3-4b-log-unary/model_layers_25_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..4cbe15d446964beb821078e57c865106f654ea5a Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_25_self_attn_k_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_25_self_attn_o_proj_weight.scales b/qwen3-4b-log-unary/model_layers_25_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..57399fb5c7bc44ed8718e5df0cfd98d4a59c78a3 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_25_self_attn_o_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_25_self_attn_q_norm_weight.fp16 b/qwen3-4b-log-unary/model_layers_25_self_attn_q_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..59fd8bffaa290247c5719a6d865d5698ea39f2b1 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_25_self_attn_q_norm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_25_self_attn_q_proj_weight.scales b/qwen3-4b-log-unary/model_layers_25_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..7f440b6650f55939b536ed4e3e07e05011de948c Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_25_self_attn_q_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_26_mlp_down_proj_weight.scales b/qwen3-4b-log-unary/model_layers_26_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..86e7505287ee79314cbeb5b2acecee5aec14859e Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_26_mlp_down_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_26_mlp_up_proj_weight.scales b/qwen3-4b-log-unary/model_layers_26_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..c1d45b35ddc1fdca14fcfb2be1568938463cf70a Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_26_mlp_up_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_26_post_attention_layernorm_weight.fp16 b/qwen3-4b-log-unary/model_layers_26_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..8b5351749cde7cbe8e766144e43738c05214a7b9 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_26_post_attention_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_26_self_attn_k_proj_weight.scales b/qwen3-4b-log-unary/model_layers_26_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..69c77a7b3b4650c3ed7a118daa61a311e171dd2c Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_26_self_attn_k_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_26_self_attn_o_proj_weight.scales b/qwen3-4b-log-unary/model_layers_26_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..cc66bfe3ed3d758cf7554eeaa29d12607933cc0e Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_26_self_attn_o_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_26_self_attn_q_norm_weight.fp16 b/qwen3-4b-log-unary/model_layers_26_self_attn_q_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..7abdcbe867f25606fd2522855c0c75352a562862 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_26_self_attn_q_norm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_26_self_attn_q_proj_weight.scales b/qwen3-4b-log-unary/model_layers_26_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..2c78c5c88a02545e62835362d020caf4a513e24e Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_26_self_attn_q_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_26_self_attn_v_proj_weight.scales b/qwen3-4b-log-unary/model_layers_26_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..c0acff50b7a639d85306b655a55a32fb5894aa2d Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_26_self_attn_v_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_27_input_layernorm_weight.fp16 b/qwen3-4b-log-unary/model_layers_27_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..476a87d7a409eccc97de9c91f423371ed0ba4d1d Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_27_input_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_27_mlp_down_proj_weight.scales b/qwen3-4b-log-unary/model_layers_27_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..bb79051c90139a2215534b4946ff947c670a0bde Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_27_mlp_down_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_27_mlp_up_proj_weight.scales b/qwen3-4b-log-unary/model_layers_27_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..df3c1bf195c9a8f95ec6f21060a74a44829eae00 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_27_mlp_up_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_27_post_attention_layernorm_weight.fp16 b/qwen3-4b-log-unary/model_layers_27_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..cfb8971084397f0880a1bdf1f2c254a021dba92c Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_27_post_attention_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_27_self_attn_k_norm_weight.fp16 b/qwen3-4b-log-unary/model_layers_27_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..c5b854db5ab51c3b337c4759ec972e70bd3c9ab1 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_27_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_27_self_attn_k_proj_weight.scales b/qwen3-4b-log-unary/model_layers_27_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..94ff1af49f83f42c6a19802a1ace61ae2d4bfbc1 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_27_self_attn_k_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_27_self_attn_o_proj_weight.scales b/qwen3-4b-log-unary/model_layers_27_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..71ef112e50cd976142a4e44dd2fa292175e8a394 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_27_self_attn_o_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_28_mlp_down_proj_weight.scales b/qwen3-4b-log-unary/model_layers_28_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..0a8f720b8d66548f632e185aa80615018c810961 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_28_mlp_down_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_28_post_attention_layernorm_weight.fp16 b/qwen3-4b-log-unary/model_layers_28_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..c9160dd2efaad195039cd73bde743aa826a7eb74 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_28_post_attention_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_28_self_attn_k_norm_weight.fp16 b/qwen3-4b-log-unary/model_layers_28_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..f23764e023fda3642110cec97866208f90479f7a Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_28_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_28_self_attn_q_norm_weight.fp16 b/qwen3-4b-log-unary/model_layers_28_self_attn_q_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..a0ec838b134222936aa4a98cd28de97e9d1ce8b1 --- /dev/null +++ b/qwen3-4b-log-unary/model_layers_28_self_attn_q_norm_weight.fp16 @@ -0,0 +1 @@ + >@>0>>>h=>?>=> >X??P>>8?? ?>p@P?H?>>=(?h??h@??Cx?@h? ????B?(??P?8?>-X>p>@>@>h>>>8?0>X>=8>8>?P>p$(@<>x?h?>? ??>>>>p?>>=>?p?????>=H?h? 8h?@>?@?>?:(??`?p?>>E>>P>H?>? >@>>>>0>@? \ No newline at end of file diff --git a/qwen3-4b-log-unary/model_layers_29_mlp_up_proj_weight.scales b/qwen3-4b-log-unary/model_layers_29_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..52dd998a45bb36020cf203f8030f7bc1dee706b2 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_29_mlp_up_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_29_post_attention_layernorm_weight.fp16 b/qwen3-4b-log-unary/model_layers_29_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..d703bb82504f382b954d1629ce3947993e05c5ca Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_29_post_attention_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_29_self_attn_k_norm_weight.fp16 b/qwen3-4b-log-unary/model_layers_29_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..c230b4dd3dc3a2385a8ca144dcbbbf3f6656b332 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_29_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_29_self_attn_k_proj_weight.scales b/qwen3-4b-log-unary/model_layers_29_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..1b8745c870b6d15cd3544c32348affd6a85b9b0b Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_29_self_attn_k_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_29_self_attn_o_proj_weight.scales b/qwen3-4b-log-unary/model_layers_29_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..51aeb3919eb4ab65b89d1cde6a79c1761e6fcae5 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_29_self_attn_o_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_29_self_attn_q_norm_weight.fp16 b/qwen3-4b-log-unary/model_layers_29_self_attn_q_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..9167a6d95459f03f8c698064620b7c4d78b06296 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_29_self_attn_q_norm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_29_self_attn_v_proj_weight.scales b/qwen3-4b-log-unary/model_layers_29_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..56ad177e76b9f8a7f0ef234525ad0d3c26dabb96 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_29_self_attn_v_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_2_input_layernorm_weight.fp16 b/qwen3-4b-log-unary/model_layers_2_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..52fba755b6dba7d9499687ce8c6ed48409dcc802 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_2_input_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_2_mlp_gate_proj_weight.scales b/qwen3-4b-log-unary/model_layers_2_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..ae42dcb182b17df32f7a690bd29c0e2a29bac4e7 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_2_mlp_gate_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_2_post_attention_layernorm_weight.fp16 b/qwen3-4b-log-unary/model_layers_2_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..caa6ce385b5f79576ba426ddab05bc1c46aa5519 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_2_post_attention_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_2_self_attn_k_norm_weight.fp16 b/qwen3-4b-log-unary/model_layers_2_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..3c09b1c5dafea8caa581b98fa2cb6aa2a5d79386 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_2_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_2_self_attn_o_proj_weight.scales b/qwen3-4b-log-unary/model_layers_2_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..0f9c8ed0e4e8a748399278a81a83bcb4c80d354f Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_2_self_attn_o_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_2_self_attn_q_norm_weight.fp16 b/qwen3-4b-log-unary/model_layers_2_self_attn_q_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..a2375f141a0dbb68bf7ac76f2464ca2375d7e11a Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_2_self_attn_q_norm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_2_self_attn_q_proj_weight.scales b/qwen3-4b-log-unary/model_layers_2_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..7ba8df72ffdc76e22bf6f9c28d5f20ab6eda1ddf Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_2_self_attn_q_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_2_self_attn_v_proj_weight.scales b/qwen3-4b-log-unary/model_layers_2_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..e006d18525117bf7f746414a3d20b76954228672 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_2_self_attn_v_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_30_mlp_down_proj_weight.scales b/qwen3-4b-log-unary/model_layers_30_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..9bfcbfcdbf0566c9e87f1c83c439e992d381d9c6 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_30_mlp_down_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_30_self_attn_k_norm_weight.fp16 b/qwen3-4b-log-unary/model_layers_30_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..c2940d94f0e770bd352e87ef61e715e95cfc1ded Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_30_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_30_self_attn_k_proj_weight.scales b/qwen3-4b-log-unary/model_layers_30_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..3a2ba9ddc15f2fb1f59fdb586e46c2227d81fef3 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_30_self_attn_k_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_30_self_attn_o_proj_weight.scales b/qwen3-4b-log-unary/model_layers_30_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..8bfefacdd0f307b91ade4e6ff79076ddaef6a8b8 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_30_self_attn_o_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_30_self_attn_q_proj_weight.scales b/qwen3-4b-log-unary/model_layers_30_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..f447b5a5c84bd6e0a09d2720ac7519aed02b8687 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_30_self_attn_q_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_31_input_layernorm_weight.fp16 b/qwen3-4b-log-unary/model_layers_31_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..4fe8cd33e4fa44ae996077beb1d68e5cf53eefc6 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_31_input_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_31_mlp_down_proj_weight.scales b/qwen3-4b-log-unary/model_layers_31_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..e48203045e3342e57d8328fe517dcc39eed17ebc Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_31_mlp_down_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_31_mlp_up_proj_weight.scales b/qwen3-4b-log-unary/model_layers_31_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..9a000b83c63ed33e604431c09f93d734de855994 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_31_mlp_up_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_31_post_attention_layernorm_weight.fp16 b/qwen3-4b-log-unary/model_layers_31_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..ac52a7eb7ff5a36cc7ba29362d85815128c8cad9 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_31_post_attention_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_31_self_attn_k_proj_weight.scales b/qwen3-4b-log-unary/model_layers_31_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..4cb8d6d4dde12742f3a67d8031737e787d1136e4 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_31_self_attn_k_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_31_self_attn_o_proj_weight.scales b/qwen3-4b-log-unary/model_layers_31_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..e35439f16eef8b627e7f450444fb5f684908b01c Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_31_self_attn_o_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_31_self_attn_q_norm_weight.fp16 b/qwen3-4b-log-unary/model_layers_31_self_attn_q_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..5d92b235223f457d6749995796b4dbd224273d76 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_31_self_attn_q_norm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_31_self_attn_q_proj_weight.scales b/qwen3-4b-log-unary/model_layers_31_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..6cff78a2f5ce00b5cb32bc61f1fda856bfbf0bc2 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_31_self_attn_q_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_31_self_attn_v_proj_weight.scales b/qwen3-4b-log-unary/model_layers_31_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..a6512b732ed404cd5833dc3b2ad680930847dc53 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_31_self_attn_v_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_32_input_layernorm_weight.fp16 b/qwen3-4b-log-unary/model_layers_32_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..6f84265fb39beb01bb0ef18d4e6512532f4169ea Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_32_input_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_32_mlp_down_proj_weight.scales b/qwen3-4b-log-unary/model_layers_32_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..036f73bfc42801c0e7d5e3a5fa5ec01f962e88be Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_32_mlp_down_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_32_mlp_up_proj_weight.scales b/qwen3-4b-log-unary/model_layers_32_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..7f9265e921426f5cd326e31565049b5f6cc78f2c Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_32_mlp_up_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_32_self_attn_k_proj_weight.scales b/qwen3-4b-log-unary/model_layers_32_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..a26039bc9a2cd8822e89dae8896dad42cd187920 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_32_self_attn_k_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_32_self_attn_q_norm_weight.fp16 b/qwen3-4b-log-unary/model_layers_32_self_attn_q_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..40d444def0a89ca19a8430068f9df2283a6df0bb Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_32_self_attn_q_norm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_32_self_attn_q_proj_weight.scales b/qwen3-4b-log-unary/model_layers_32_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..68de0d2f3413181560e756a02cc80d660038a5fc Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_32_self_attn_q_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_32_self_attn_v_proj_weight.scales b/qwen3-4b-log-unary/model_layers_32_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..67f243b504a93bfa8785786d840f880f144c95b5 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_32_self_attn_v_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_33_mlp_gate_proj_weight.scales b/qwen3-4b-log-unary/model_layers_33_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..546ae40bed2482ee99f14f49bef83da9e88fe2c0 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_33_mlp_gate_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_33_mlp_up_proj_weight.scales b/qwen3-4b-log-unary/model_layers_33_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..20bc8a0f6e2a0b948e81116aae67773998d3b56b Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_33_mlp_up_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_33_post_attention_layernorm_weight.fp16 b/qwen3-4b-log-unary/model_layers_33_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..fd384281bdda70a6e9e8cd505d2acc5f2614020f Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_33_post_attention_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_33_self_attn_k_proj_weight.scales b/qwen3-4b-log-unary/model_layers_33_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..8afc4c6cbeaec8ad2720a2327ded888b3c1e9f4c Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_33_self_attn_k_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_33_self_attn_q_norm_weight.fp16 b/qwen3-4b-log-unary/model_layers_33_self_attn_q_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..fa9a71517e9feec0697a24e10277e1f8421720d4 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_33_self_attn_q_norm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_33_self_attn_q_proj_weight.scales b/qwen3-4b-log-unary/model_layers_33_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..800629da2dacd0cf8dceec160ac33612a89ab084 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_33_self_attn_q_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_33_self_attn_v_proj_weight.scales b/qwen3-4b-log-unary/model_layers_33_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..9cd3676f5be2f759342f15648cd525ee7fe193dc Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_33_self_attn_v_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_34_mlp_down_proj_weight.scales b/qwen3-4b-log-unary/model_layers_34_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..0e7634bd0aca7c3b3f4a2d40565130711ad298f5 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_34_mlp_down_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_34_mlp_gate_proj_weight.scales b/qwen3-4b-log-unary/model_layers_34_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..e02d966f990ed6a544191b1266d69fbaf830c518 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_34_mlp_gate_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_34_post_attention_layernorm_weight.fp16 b/qwen3-4b-log-unary/model_layers_34_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..8d108355318d466d183957adfa48ab05dfba8d52 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_34_post_attention_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_34_self_attn_k_norm_weight.fp16 b/qwen3-4b-log-unary/model_layers_34_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..617190809e5489a3aac619a27247945faadac639 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_34_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_34_self_attn_o_proj_weight.scales b/qwen3-4b-log-unary/model_layers_34_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..c1d9f15d6869b8dce2ea39822b2dad73e33caf40 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_34_self_attn_o_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_34_self_attn_q_norm_weight.fp16 b/qwen3-4b-log-unary/model_layers_34_self_attn_q_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..8f05f363141cf1d049ec9ecced011934b334e94b Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_34_self_attn_q_norm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_34_self_attn_q_proj_weight.scales b/qwen3-4b-log-unary/model_layers_34_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..196485dd8dac09585947d1b29f9b48585c641edc Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_34_self_attn_q_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_35_input_layernorm_weight.fp16 b/qwen3-4b-log-unary/model_layers_35_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..ff0012f22d441c988180c4597747a740641a163a Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_35_input_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_35_mlp_gate_proj_weight.scales b/qwen3-4b-log-unary/model_layers_35_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..7a25f5c26bfcc04b1e0298a2578fbc944d427aa9 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_35_mlp_gate_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_35_mlp_up_proj_weight.scales b/qwen3-4b-log-unary/model_layers_35_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..b97849e9734b33e3ed0d11d98ca03b4fe160bf5c Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_35_mlp_up_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_35_post_attention_layernorm_weight.fp16 b/qwen3-4b-log-unary/model_layers_35_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..db93efddc4cc82dc5a9c5d8bc065e57c9720e85d Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_35_post_attention_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_35_self_attn_k_norm_weight.fp16 b/qwen3-4b-log-unary/model_layers_35_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..d12fd5fd41d9bcee8eb9338d7fccf3adec55a457 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_35_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_35_self_attn_q_proj_weight.scales b/qwen3-4b-log-unary/model_layers_35_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..f5579fe0c9bd3094712608109a891c45eddbcedf Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_35_self_attn_q_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_35_self_attn_v_proj_weight.scales b/qwen3-4b-log-unary/model_layers_35_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..2590cad0d3dad0755c6ee8cdf4438abfd8c9fa38 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_35_self_attn_v_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_3_mlp_down_proj_weight.scales b/qwen3-4b-log-unary/model_layers_3_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..4c27f6c34a1515c93c908aa903d534ddf5dde914 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_3_mlp_down_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_3_mlp_gate_proj_weight.scales b/qwen3-4b-log-unary/model_layers_3_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..cc53b06faf5ae4c917c56faa8270202490aab141 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_3_mlp_gate_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_3_mlp_up_proj_weight.scales b/qwen3-4b-log-unary/model_layers_3_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..f73a3e2b96880ee0c0664aea172b965d4455aae8 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_3_mlp_up_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_3_self_attn_q_norm_weight.fp16 b/qwen3-4b-log-unary/model_layers_3_self_attn_q_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..50ba44e8887376265cb4df8a761bd3c327395059 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_3_self_attn_q_norm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_4_input_layernorm_weight.fp16 b/qwen3-4b-log-unary/model_layers_4_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..e44ae9067c229054114f6d04946dc6981c4df6f8 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_4_input_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_4_mlp_down_proj_weight.scales b/qwen3-4b-log-unary/model_layers_4_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..376096a58b3334c478e3d979fc7d11676a7e4c98 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_4_mlp_down_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_4_mlp_gate_proj_weight.scales b/qwen3-4b-log-unary/model_layers_4_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..949e994e527b89563fdeea0b2b13abf505e47696 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_4_mlp_gate_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_4_mlp_up_proj_weight.scales b/qwen3-4b-log-unary/model_layers_4_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..8c2d377e2a315e96ae70a9c3aa305e5ce300fee8 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_4_mlp_up_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_4_self_attn_k_norm_weight.fp16 b/qwen3-4b-log-unary/model_layers_4_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..6debe817ce6410c0d06405cdbfd9835a72fee41b Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_4_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_4_self_attn_k_proj_weight.scales b/qwen3-4b-log-unary/model_layers_4_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..274e83ecc4e8da9cf20855616b39cb046e776c11 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_4_self_attn_k_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_4_self_attn_o_proj_weight.scales b/qwen3-4b-log-unary/model_layers_4_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..7bc276757d1ee76e8673ed48dca3dce47430c683 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_4_self_attn_o_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_5_input_layernorm_weight.fp16 b/qwen3-4b-log-unary/model_layers_5_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..6aba1869e49e06ab5c6a49f3d47fd2db4e9f1900 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_5_input_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_5_mlp_down_proj_weight.scales b/qwen3-4b-log-unary/model_layers_5_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..ab4e265e252cf283ef425dc369498144d1537244 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_5_mlp_down_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_5_mlp_gate_proj_weight.scales b/qwen3-4b-log-unary/model_layers_5_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..1097ca80c6fd285523143792c92f2d8bbed685b7 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_5_mlp_gate_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_5_mlp_up_proj_weight.scales b/qwen3-4b-log-unary/model_layers_5_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..0eae8d8e1f57232441920f368db466e037d10b3b Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_5_mlp_up_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_5_self_attn_k_norm_weight.fp16 b/qwen3-4b-log-unary/model_layers_5_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..f8727032d79fea0119c084cd0f28458fea2abedb Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_5_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_6_input_layernorm_weight.fp16 b/qwen3-4b-log-unary/model_layers_6_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..db1a79d6fa9a1d7b2b8328cdc10d034daa59eb48 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_6_input_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_6_mlp_down_proj_weight.scales b/qwen3-4b-log-unary/model_layers_6_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..c52f6caff70d3f1722d4685725b4910dd410b928 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_6_mlp_down_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_6_mlp_up_proj_weight.scales b/qwen3-4b-log-unary/model_layers_6_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..00f72a968abacce22e61db59f695e00626cc4e23 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_6_mlp_up_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_6_self_attn_k_proj_weight.scales b/qwen3-4b-log-unary/model_layers_6_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..34b7d81cbebcfaed8f92403e523d259c6fd1889d Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_6_self_attn_k_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_6_self_attn_o_proj_weight.scales b/qwen3-4b-log-unary/model_layers_6_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..1bc56f739681df735250a0965d233fbb880b4b59 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_6_self_attn_o_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_6_self_attn_q_proj_weight.scales b/qwen3-4b-log-unary/model_layers_6_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..a423002923bd135cd1a9ace60b58c9df86ab862b Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_6_self_attn_q_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_7_mlp_gate_proj_weight.scales b/qwen3-4b-log-unary/model_layers_7_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..6505cc9a075ff75253116df666d1accd9ad29b06 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_7_mlp_gate_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_7_mlp_up_proj_weight.scales b/qwen3-4b-log-unary/model_layers_7_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..483977c89d6c4d6bbd35dc37610e9b6cc9ab7acd Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_7_mlp_up_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_7_self_attn_o_proj_weight.scales b/qwen3-4b-log-unary/model_layers_7_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..6c738ff8cd5e80c551fce760b6449fb255dcb565 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_7_self_attn_o_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_7_self_attn_q_norm_weight.fp16 b/qwen3-4b-log-unary/model_layers_7_self_attn_q_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..3b96c8c921f14b44298e503d46ee16392ab3e772 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_7_self_attn_q_norm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_7_self_attn_q_proj_weight.scales b/qwen3-4b-log-unary/model_layers_7_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..f6d809349d7020778334a54c8de812c369dc4dcf Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_7_self_attn_q_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_8_input_layernorm_weight.fp16 b/qwen3-4b-log-unary/model_layers_8_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..7dfd72b16dd478f920c223f4e937ec132848df58 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_8_input_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_8_mlp_up_proj_weight.scales b/qwen3-4b-log-unary/model_layers_8_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..5a9f3a1280f621004769c9ee387ac5c73b0b51bf Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_8_mlp_up_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_8_post_attention_layernorm_weight.fp16 b/qwen3-4b-log-unary/model_layers_8_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..461f3d92a416feda72f3e42103975fada160278c Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_8_post_attention_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_8_self_attn_k_norm_weight.fp16 b/qwen3-4b-log-unary/model_layers_8_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..128450801eff49c63339396743dfdb6262885a03 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_8_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_8_self_attn_k_proj_weight.scales b/qwen3-4b-log-unary/model_layers_8_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..6443c7859af98cc059f34c2811e70e8b5c0a2753 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_8_self_attn_k_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_8_self_attn_o_proj_weight.scales b/qwen3-4b-log-unary/model_layers_8_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..fbc5134b7c3dd1491dcafb4aeeba71e5acfa2666 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_8_self_attn_o_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_8_self_attn_v_proj_weight.scales b/qwen3-4b-log-unary/model_layers_8_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..f60faf879c89ba11e5c5de30a43e0439b65801f3 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_8_self_attn_v_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_9_input_layernorm_weight.fp16 b/qwen3-4b-log-unary/model_layers_9_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..937efdde2436741ea88c78e4e0b9ba38b1dd6fe0 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_9_input_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_9_post_attention_layernorm_weight.fp16 b/qwen3-4b-log-unary/model_layers_9_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..be863fa42560e40943b61288efae5f6caf3d9ac0 Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_9_post_attention_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_9_self_attn_k_norm_weight.fp16 b/qwen3-4b-log-unary/model_layers_9_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..ed6eb308122b8f6fb0a442d1865b87ded5ad94bf Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_9_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/model_layers_9_self_attn_k_proj_weight.scales b/qwen3-4b-log-unary/model_layers_9_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..6776796ac3e530bd8a65fe4634664f456d574b0d Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_9_self_attn_k_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_9_self_attn_o_proj_weight.scales b/qwen3-4b-log-unary/model_layers_9_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..58875541ef8810d6c31ab8340912b128e03bea1a Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_9_self_attn_o_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_layers_9_self_attn_v_proj_weight.scales b/qwen3-4b-log-unary/model_layers_9_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..cd98bfc0f1b51cd79acd3959caaf8b72cbacd14b Binary files /dev/null and b/qwen3-4b-log-unary/model_layers_9_self_attn_v_proj_weight.scales differ diff --git a/qwen3-4b-log-unary/model_norm_weight.fp16 b/qwen3-4b-log-unary/model_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..88950773dc3cd2f63e02b5cb9c817ee99ce237dc Binary files /dev/null and b/qwen3-4b-log-unary/model_norm_weight.fp16 differ diff --git a/qwen3-4b-log-unary/tokenizer_config.json b/qwen3-4b-log-unary/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8dec7d58dda385ce95b469aa2d277ec162168e58 --- /dev/null +++ b/qwen3-4b-log-unary/tokenizer_config.json @@ -0,0 +1,239 @@ +{ + "add_prefix_space": false, + "added_tokens_decoder": { + "151643": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151644": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151645": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151646": { + "content": "<|object_ref_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151647": { + "content": "<|object_ref_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151648": { + "content": "<|box_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151649": { + "content": "<|box_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151650": { + "content": "<|quad_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151651": { + "content": "<|quad_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151652": { + "content": "<|vision_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151653": { + "content": "<|vision_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151654": { + "content": "<|vision_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151655": { + "content": "<|image_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151656": { + "content": "<|video_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151657": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151658": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151659": { + "content": "<|fim_prefix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151660": { + "content": "<|fim_middle|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151661": { + "content": "<|fim_suffix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151662": { + "content": "<|fim_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151663": { + "content": "<|repo_name|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151664": { + "content": "<|file_sep|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151665": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151666": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151667": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151668": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "bos_token": null, + "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0].role == 'system' %}\n {{- messages[0].content + '\\n\\n' }}\n {%- endif %}\n {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within XML tags:\\n\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n\\n\\nFor each function call, return a json object with function name and arguments within XML tags:\\n\\n{\\\"name\\\": , \\\"arguments\\\": }\\n<|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0].role == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}\n{%- for message in messages[::-1] %}\n {%- set index = (messages|length - 1) - loop.index0 %}\n {%- if ns.multi_step_tool and message.role == \"user\" and message.content is string and not(message.content.startswith('') and message.content.endswith('')) %}\n {%- set ns.multi_step_tool = false %}\n {%- set ns.last_query_index = index %}\n {%- endif %}\n{%- endfor %}\n{%- for message in messages %}\n {%- if message.content is string %}\n {%- set content = message.content %}\n {%- else %}\n {%- set content = '' %}\n {%- endif %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {%- set reasoning_content = '' %}\n {%- if message.reasoning_content is string %}\n {%- set reasoning_content = message.reasoning_content %}\n {%- else %}\n {%- if '' in content %}\n {%- set reasoning_content = content.split('')[0].rstrip('\\n').split('')[-1].lstrip('\\n') %}\n {%- set content = content.split('')[-1].lstrip('\\n') %}\n {%- endif %}\n {%- endif %}\n {%- if loop.index0 > ns.last_query_index %}\n {%- if loop.last or (not loop.last and reasoning_content) %}\n {{- '<|im_start|>' + message.role + '\\n\\n' + reasoning_content.strip('\\n') + '\\n\\n\\n' + content.lstrip('\\n') }}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- if message.tool_calls %}\n {%- for tool_call in message.tool_calls %}\n {%- if (loop.first and content) or (not loop.first) %}\n {{- '\\n' }}\n {%- endif %}\n {%- if tool_call.function %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {%- if tool_call.arguments is string %}\n {{- tool_call.arguments }}\n {%- else %}\n {{- tool_call.arguments | tojson }}\n {%- endif %}\n {{- '}\\n' }}\n {%- endfor %}\n {%- endif %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n\\n' }}\n {{- content }}\n {{- '\\n' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n\\n' }}\n{%- endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 262144, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null, + "add_bos_token": false +} \ No newline at end of file