diff --git a/deepseek-r1-1.5b-unary4/manifest.json b/deepseek-r1-1.5b-unary4/manifest.json new file mode 100644 index 0000000000000000000000000000000000000000..c4acc78fa65d1689ccc415e7101e3b017a7a2fe7 --- /dev/null +++ b/deepseek-r1-1.5b-unary4/manifest.json @@ -0,0 +1,1221 @@ +{ + "unary": { + "model.layers.0.self_attn.q_proj.weight": [ + 1536, + 1536 + ], + "model.layers.0.self_attn.k_proj.weight": [ + 256, + 1536 + ], + "model.layers.0.self_attn.v_proj.weight": [ + 256, + 1536 + ], + "model.layers.0.self_attn.o_proj.weight": [ + 1536, + 1536 + ], + "model.layers.0.mlp.gate_proj.weight": [ + 8960, + 1536 + ], + "model.layers.0.mlp.up_proj.weight": [ + 8960, + 1536 + ], + "model.layers.0.mlp.down_proj.weight": [ + 1536, + 8960 + ], + "model.layers.1.self_attn.q_proj.weight": [ + 1536, + 1536 + ], + "model.layers.1.self_attn.k_proj.weight": [ + 256, + 1536 + ], + "model.layers.1.self_attn.v_proj.weight": [ + 256, + 1536 + ], + "model.layers.1.self_attn.o_proj.weight": [ + 1536, + 1536 + ], + "model.layers.1.mlp.gate_proj.weight": [ + 8960, + 1536 + ], + "model.layers.1.mlp.up_proj.weight": [ + 8960, + 1536 + ], + "model.layers.1.mlp.down_proj.weight": [ + 1536, + 8960 + ], + "model.layers.2.self_attn.q_proj.weight": [ + 1536, + 1536 + ], + "model.layers.2.self_attn.k_proj.weight": [ + 256, + 1536 + ], + "model.layers.2.self_attn.v_proj.weight": [ + 256, + 1536 + ], + "model.layers.2.self_attn.o_proj.weight": [ + 1536, + 1536 + ], + "model.layers.2.mlp.gate_proj.weight": [ + 8960, + 1536 + ], + "model.layers.2.mlp.up_proj.weight": [ + 8960, + 1536 + ], + "model.layers.2.mlp.down_proj.weight": [ + 1536, + 8960 + ], + "model.layers.3.self_attn.q_proj.weight": [ + 1536, + 1536 + ], + "model.layers.3.self_attn.k_proj.weight": [ + 256, + 1536 + ], + "model.layers.3.self_attn.v_proj.weight": [ + 256, + 1536 + ], + "model.layers.3.self_attn.o_proj.weight": [ + 1536, + 1536 + ], + "model.layers.3.mlp.gate_proj.weight": [ + 8960, + 1536 + ], + "model.layers.3.mlp.up_proj.weight": [ + 8960, + 1536 + ], + "model.layers.3.mlp.down_proj.weight": [ + 1536, + 8960 + ], + "model.layers.4.self_attn.q_proj.weight": [ + 1536, + 1536 + ], + "model.layers.4.self_attn.k_proj.weight": [ + 256, + 1536 + ], + "model.layers.4.self_attn.v_proj.weight": [ + 256, + 1536 + ], + "model.layers.4.self_attn.o_proj.weight": [ + 1536, + 1536 + ], + "model.layers.4.mlp.gate_proj.weight": [ + 8960, + 1536 + ], + "model.layers.4.mlp.up_proj.weight": [ + 8960, + 1536 + ], + "model.layers.4.mlp.down_proj.weight": [ + 1536, + 8960 + ], + "model.layers.5.self_attn.q_proj.weight": [ + 1536, + 1536 + ], + "model.layers.5.self_attn.k_proj.weight": [ + 256, + 1536 + ], + "model.layers.5.self_attn.v_proj.weight": [ + 256, + 1536 + ], + "model.layers.5.self_attn.o_proj.weight": [ + 1536, + 1536 + ], + "model.layers.5.mlp.gate_proj.weight": [ + 8960, + 1536 + ], + "model.layers.5.mlp.up_proj.weight": [ + 8960, + 1536 + ], + "model.layers.5.mlp.down_proj.weight": [ + 1536, + 8960 + ], + "model.layers.6.self_attn.q_proj.weight": [ + 1536, + 1536 + ], + "model.layers.6.self_attn.k_proj.weight": [ + 256, + 1536 + ], + "model.layers.6.self_attn.v_proj.weight": [ + 256, + 1536 + ], + "model.layers.6.self_attn.o_proj.weight": [ + 1536, + 1536 + ], + "model.layers.6.mlp.gate_proj.weight": [ + 8960, + 1536 + ], + "model.layers.6.mlp.up_proj.weight": [ + 8960, + 1536 + ], + "model.layers.6.mlp.down_proj.weight": [ + 1536, + 8960 + ], + "model.layers.7.self_attn.q_proj.weight": [ + 1536, + 1536 + ], + "model.layers.7.self_attn.k_proj.weight": [ + 256, + 1536 + ], + "model.layers.7.self_attn.v_proj.weight": [ + 256, + 1536 + ], + "model.layers.7.self_attn.o_proj.weight": [ + 1536, + 1536 + ], + "model.layers.7.mlp.gate_proj.weight": [ + 8960, + 1536 + ], + "model.layers.7.mlp.up_proj.weight": [ + 8960, + 1536 + ], + "model.layers.7.mlp.down_proj.weight": [ + 1536, + 8960 + ], + "model.layers.8.self_attn.q_proj.weight": [ + 1536, + 1536 + ], + "model.layers.8.self_attn.k_proj.weight": [ + 256, + 1536 + ], + "model.layers.8.self_attn.v_proj.weight": [ + 256, + 1536 + ], + "model.layers.8.self_attn.o_proj.weight": [ + 1536, + 1536 + ], + "model.layers.8.mlp.gate_proj.weight": [ + 8960, + 1536 + ], + "model.layers.8.mlp.up_proj.weight": [ + 8960, + 1536 + ], + "model.layers.8.mlp.down_proj.weight": [ + 1536, + 8960 + ], + "model.layers.9.self_attn.q_proj.weight": [ + 1536, + 1536 + ], + "model.layers.9.self_attn.k_proj.weight": [ + 256, + 1536 + ], + "model.layers.9.self_attn.v_proj.weight": [ + 256, + 1536 + ], + "model.layers.9.self_attn.o_proj.weight": [ + 1536, + 1536 + ], + "model.layers.9.mlp.gate_proj.weight": [ + 8960, + 1536 + ], + "model.layers.9.mlp.up_proj.weight": [ + 8960, + 1536 + ], + "model.layers.9.mlp.down_proj.weight": [ + 1536, + 8960 + ], + "model.layers.10.self_attn.q_proj.weight": [ + 1536, + 1536 + ], + "model.layers.10.self_attn.k_proj.weight": [ + 256, + 1536 + ], + "model.layers.10.self_attn.v_proj.weight": [ + 256, + 1536 + ], + "model.layers.10.self_attn.o_proj.weight": [ + 1536, + 1536 + ], + "model.layers.10.mlp.gate_proj.weight": [ + 8960, + 1536 + ], + "model.layers.10.mlp.up_proj.weight": [ + 8960, + 1536 + ], + "model.layers.10.mlp.down_proj.weight": [ + 1536, + 8960 + ], + "model.layers.11.self_attn.q_proj.weight": [ + 1536, + 1536 + ], + "model.layers.11.self_attn.k_proj.weight": [ + 256, + 1536 + ], + "model.layers.11.self_attn.v_proj.weight": [ + 256, + 1536 + ], + "model.layers.11.self_attn.o_proj.weight": [ + 1536, + 1536 + ], + "model.layers.11.mlp.gate_proj.weight": [ + 8960, + 1536 + ], + "model.layers.11.mlp.up_proj.weight": [ + 8960, + 1536 + ], + "model.layers.11.mlp.down_proj.weight": [ + 1536, + 8960 + ], + "model.layers.12.self_attn.q_proj.weight": [ + 1536, + 1536 + ], + "model.layers.12.self_attn.k_proj.weight": [ + 256, + 1536 + ], + "model.layers.12.self_attn.v_proj.weight": [ + 256, + 1536 + ], + "model.layers.12.self_attn.o_proj.weight": [ + 1536, + 1536 + ], + "model.layers.12.mlp.gate_proj.weight": [ + 8960, + 1536 + ], + "model.layers.12.mlp.up_proj.weight": [ + 8960, + 1536 + ], + "model.layers.12.mlp.down_proj.weight": [ + 1536, + 8960 + ], + "model.layers.13.self_attn.q_proj.weight": [ + 1536, + 1536 + ], + "model.layers.13.self_attn.k_proj.weight": [ + 256, + 1536 + ], + "model.layers.13.self_attn.v_proj.weight": [ + 256, + 1536 + ], + "model.layers.13.self_attn.o_proj.weight": [ + 1536, + 1536 + ], + "model.layers.13.mlp.gate_proj.weight": [ + 8960, + 1536 + ], + "model.layers.13.mlp.up_proj.weight": [ + 8960, + 1536 + ], + "model.layers.13.mlp.down_proj.weight": [ + 1536, + 8960 + ], + "model.layers.14.self_attn.q_proj.weight": [ + 1536, + 1536 + ], + "model.layers.14.self_attn.k_proj.weight": [ + 256, + 1536 + ], + "model.layers.14.self_attn.v_proj.weight": [ + 256, + 1536 + ], + "model.layers.14.self_attn.o_proj.weight": [ + 1536, + 1536 + ], + "model.layers.14.mlp.gate_proj.weight": [ + 8960, + 1536 + ], + "model.layers.14.mlp.up_proj.weight": [ + 8960, + 1536 + ], + "model.layers.14.mlp.down_proj.weight": [ + 1536, + 8960 + ], + "model.layers.15.self_attn.q_proj.weight": [ + 1536, + 1536 + ], + "model.layers.15.self_attn.k_proj.weight": [ + 256, + 1536 + ], + "model.layers.15.self_attn.v_proj.weight": [ + 256, + 1536 + ], + "model.layers.15.self_attn.o_proj.weight": [ + 1536, + 1536 + ], + "model.layers.15.mlp.gate_proj.weight": [ + 8960, + 1536 + ], + "model.layers.15.mlp.up_proj.weight": [ + 8960, + 1536 + ], + "model.layers.15.mlp.down_proj.weight": [ + 1536, + 8960 + ], + "model.layers.16.self_attn.q_proj.weight": [ + 1536, + 1536 + ], + "model.layers.16.self_attn.k_proj.weight": [ + 256, + 1536 + ], + "model.layers.16.self_attn.v_proj.weight": [ + 256, + 1536 + ], + "model.layers.16.self_attn.o_proj.weight": [ + 1536, + 1536 + ], + "model.layers.16.mlp.gate_proj.weight": [ + 8960, + 1536 + ], + "model.layers.16.mlp.up_proj.weight": [ + 8960, + 1536 + ], + "model.layers.16.mlp.down_proj.weight": [ + 1536, + 8960 + ], + "model.layers.17.self_attn.q_proj.weight": [ + 1536, + 1536 + ], + "model.layers.17.self_attn.k_proj.weight": [ + 256, + 1536 + ], + "model.layers.17.self_attn.v_proj.weight": [ + 256, + 1536 + ], + "model.layers.17.self_attn.o_proj.weight": [ + 1536, + 1536 + ], + "model.layers.17.mlp.gate_proj.weight": [ + 8960, + 1536 + ], + "model.layers.17.mlp.up_proj.weight": [ + 8960, + 1536 + ], + "model.layers.17.mlp.down_proj.weight": [ + 1536, + 8960 + ], + "model.layers.18.self_attn.q_proj.weight": [ + 1536, + 1536 + ], + "model.layers.18.self_attn.k_proj.weight": [ + 256, + 1536 + ], + "model.layers.18.self_attn.v_proj.weight": [ + 256, + 1536 + ], + "model.layers.18.self_attn.o_proj.weight": [ + 1536, + 1536 + ], + "model.layers.18.mlp.gate_proj.weight": [ + 8960, + 1536 + ], + "model.layers.18.mlp.up_proj.weight": [ + 8960, + 1536 + ], + "model.layers.18.mlp.down_proj.weight": [ + 1536, + 8960 + ], + "model.layers.19.self_attn.q_proj.weight": [ + 1536, + 1536 + ], + "model.layers.19.self_attn.k_proj.weight": [ + 256, + 1536 + ], + "model.layers.19.self_attn.v_proj.weight": [ + 256, + 1536 + ], + "model.layers.19.self_attn.o_proj.weight": [ + 1536, + 1536 + ], + "model.layers.19.mlp.gate_proj.weight": [ + 8960, + 1536 + ], + "model.layers.19.mlp.up_proj.weight": [ + 8960, + 1536 + ], + "model.layers.19.mlp.down_proj.weight": [ + 1536, + 8960 + ], + "model.layers.20.self_attn.q_proj.weight": [ + 1536, + 1536 + ], + "model.layers.20.self_attn.k_proj.weight": [ + 256, + 1536 + ], + "model.layers.20.self_attn.v_proj.weight": [ + 256, + 1536 + ], + "model.layers.20.self_attn.o_proj.weight": [ + 1536, + 1536 + ], + "model.layers.20.mlp.gate_proj.weight": [ + 8960, + 1536 + ], + "model.layers.20.mlp.up_proj.weight": [ + 8960, + 1536 + ], + "model.layers.20.mlp.down_proj.weight": [ + 1536, + 8960 + ], + "model.layers.21.self_attn.q_proj.weight": [ + 1536, + 1536 + ], + "model.layers.21.self_attn.k_proj.weight": [ + 256, + 1536 + ], + "model.layers.21.self_attn.v_proj.weight": [ + 256, + 1536 + ], + "model.layers.21.self_attn.o_proj.weight": [ + 1536, + 1536 + ], + "model.layers.21.mlp.gate_proj.weight": [ + 8960, + 1536 + ], + "model.layers.21.mlp.up_proj.weight": [ + 8960, + 1536 + ], + "model.layers.21.mlp.down_proj.weight": [ + 1536, + 8960 + ], + "model.layers.22.self_attn.q_proj.weight": [ + 1536, + 1536 + ], + "model.layers.22.self_attn.k_proj.weight": [ + 256, + 1536 + ], + "model.layers.22.self_attn.v_proj.weight": [ + 256, + 1536 + ], + "model.layers.22.self_attn.o_proj.weight": [ + 1536, + 1536 + ], + "model.layers.22.mlp.gate_proj.weight": [ + 8960, + 1536 + ], + "model.layers.22.mlp.up_proj.weight": [ + 8960, + 1536 + ], + "model.layers.22.mlp.down_proj.weight": [ + 1536, + 8960 + ], + "model.layers.23.self_attn.q_proj.weight": [ + 1536, + 1536 + ], + "model.layers.23.self_attn.k_proj.weight": [ + 256, + 1536 + ], + "model.layers.23.self_attn.v_proj.weight": [ + 256, + 1536 + ], + "model.layers.23.self_attn.o_proj.weight": [ + 1536, + 1536 + ], + "model.layers.23.mlp.gate_proj.weight": [ + 8960, + 1536 + ], + "model.layers.23.mlp.up_proj.weight": [ + 8960, + 1536 + ], + "model.layers.23.mlp.down_proj.weight": [ + 1536, + 8960 + ], + "model.layers.24.self_attn.q_proj.weight": [ + 1536, + 1536 + ], + "model.layers.24.self_attn.k_proj.weight": [ + 256, + 1536 + ], + "model.layers.24.self_attn.v_proj.weight": [ + 256, + 1536 + ], + "model.layers.24.self_attn.o_proj.weight": [ + 1536, + 1536 + ], + "model.layers.24.mlp.gate_proj.weight": [ + 8960, + 1536 + ], + "model.layers.24.mlp.up_proj.weight": [ + 8960, + 1536 + ], + "model.layers.24.mlp.down_proj.weight": [ + 1536, + 8960 + ], + "model.layers.25.self_attn.q_proj.weight": [ + 1536, + 1536 + ], + "model.layers.25.self_attn.k_proj.weight": [ + 256, + 1536 + ], + "model.layers.25.self_attn.v_proj.weight": [ + 256, + 1536 + ], + "model.layers.25.self_attn.o_proj.weight": [ + 1536, + 1536 + ], + "model.layers.25.mlp.gate_proj.weight": [ + 8960, + 1536 + ], + "model.layers.25.mlp.up_proj.weight": [ + 8960, + 1536 + ], + "model.layers.25.mlp.down_proj.weight": [ + 1536, + 8960 + ], + "model.layers.26.self_attn.q_proj.weight": [ + 1536, + 1536 + ], + "model.layers.26.self_attn.k_proj.weight": [ + 256, + 1536 + ], + "model.layers.26.self_attn.v_proj.weight": [ + 256, + 1536 + ], + "model.layers.26.self_attn.o_proj.weight": [ + 1536, + 1536 + ], + "model.layers.26.mlp.gate_proj.weight": [ + 8960, + 1536 + ], + "model.layers.26.mlp.up_proj.weight": [ + 8960, + 1536 + ], + "model.layers.26.mlp.down_proj.weight": [ + 1536, + 8960 + ], + "model.layers.27.self_attn.q_proj.weight": [ + 1536, + 1536 + ], + "model.layers.27.self_attn.k_proj.weight": [ + 256, + 1536 + ], + "model.layers.27.self_attn.v_proj.weight": [ + 256, + 1536 + ], + "model.layers.27.self_attn.o_proj.weight": [ + 1536, + 1536 + ], + "model.layers.27.mlp.gate_proj.weight": [ + 8960, + 1536 + ], + "model.layers.27.mlp.up_proj.weight": [ + 8960, + 1536 + ], + "model.layers.27.mlp.down_proj.weight": [ + 1536, + 8960 + ] + }, + "fp16": { + "model.embed_tokens.weight": [ + 151936, + 1536 + ], + "model.layers.0.self_attn.q_proj.bias": [ + 1536 + ], + "model.layers.0.self_attn.k_proj.bias": [ + 256 + ], + "model.layers.0.self_attn.v_proj.bias": [ + 256 + ], + "model.layers.0.input_layernorm.weight": [ + 1536 + ], + "model.layers.0.post_attention_layernorm.weight": [ + 1536 + ], + "model.layers.1.self_attn.q_proj.bias": [ + 1536 + ], + "model.layers.1.self_attn.k_proj.bias": [ + 256 + ], + "model.layers.1.self_attn.v_proj.bias": [ + 256 + ], + "model.layers.1.input_layernorm.weight": [ + 1536 + ], + "model.layers.1.post_attention_layernorm.weight": [ + 1536 + ], + "model.layers.2.self_attn.q_proj.bias": [ + 1536 + ], + "model.layers.2.self_attn.k_proj.bias": [ + 256 + ], + "model.layers.2.self_attn.v_proj.bias": [ + 256 + ], + "model.layers.2.input_layernorm.weight": [ + 1536 + ], + "model.layers.2.post_attention_layernorm.weight": [ + 1536 + ], + "model.layers.3.self_attn.q_proj.bias": [ + 1536 + ], + "model.layers.3.self_attn.k_proj.bias": [ + 256 + ], + "model.layers.3.self_attn.v_proj.bias": [ + 256 + ], + "model.layers.3.input_layernorm.weight": [ + 1536 + ], + "model.layers.3.post_attention_layernorm.weight": [ + 1536 + ], + "model.layers.4.self_attn.q_proj.bias": [ + 1536 + ], + "model.layers.4.self_attn.k_proj.bias": [ + 256 + ], + "model.layers.4.self_attn.v_proj.bias": [ + 256 + ], + "model.layers.4.input_layernorm.weight": [ + 1536 + ], + "model.layers.4.post_attention_layernorm.weight": [ + 1536 + ], + "model.layers.5.self_attn.q_proj.bias": [ + 1536 + ], + "model.layers.5.self_attn.k_proj.bias": [ + 256 + ], + "model.layers.5.self_attn.v_proj.bias": [ + 256 + ], + "model.layers.5.input_layernorm.weight": [ + 1536 + ], + "model.layers.5.post_attention_layernorm.weight": [ + 1536 + ], + "model.layers.6.self_attn.q_proj.bias": [ + 1536 + ], + "model.layers.6.self_attn.k_proj.bias": [ + 256 + ], + "model.layers.6.self_attn.v_proj.bias": [ + 256 + ], + "model.layers.6.input_layernorm.weight": [ + 1536 + ], + "model.layers.6.post_attention_layernorm.weight": [ + 1536 + ], + "model.layers.7.self_attn.q_proj.bias": [ + 1536 + ], + "model.layers.7.self_attn.k_proj.bias": [ + 256 + ], + "model.layers.7.self_attn.v_proj.bias": [ + 256 + ], + "model.layers.7.input_layernorm.weight": [ + 1536 + ], + "model.layers.7.post_attention_layernorm.weight": [ + 1536 + ], + "model.layers.8.self_attn.q_proj.bias": [ + 1536 + ], + "model.layers.8.self_attn.k_proj.bias": [ + 256 + ], + "model.layers.8.self_attn.v_proj.bias": [ + 256 + ], + "model.layers.8.input_layernorm.weight": [ + 1536 + ], + "model.layers.8.post_attention_layernorm.weight": [ + 1536 + ], + "model.layers.9.self_attn.q_proj.bias": [ + 1536 + ], + "model.layers.9.self_attn.k_proj.bias": [ + 256 + ], + "model.layers.9.self_attn.v_proj.bias": [ + 256 + ], + "model.layers.9.input_layernorm.weight": [ + 1536 + ], + "model.layers.9.post_attention_layernorm.weight": [ + 1536 + ], + "model.layers.10.self_attn.q_proj.bias": [ + 1536 + ], + "model.layers.10.self_attn.k_proj.bias": [ + 256 + ], + "model.layers.10.self_attn.v_proj.bias": [ + 256 + ], + "model.layers.10.input_layernorm.weight": [ + 1536 + ], + "model.layers.10.post_attention_layernorm.weight": [ + 1536 + ], + "model.layers.11.self_attn.q_proj.bias": [ + 1536 + ], + "model.layers.11.self_attn.k_proj.bias": [ + 256 + ], + "model.layers.11.self_attn.v_proj.bias": [ + 256 + ], + "model.layers.11.input_layernorm.weight": [ + 1536 + ], + "model.layers.11.post_attention_layernorm.weight": [ + 1536 + ], + "model.layers.12.self_attn.q_proj.bias": [ + 1536 + ], + "model.layers.12.self_attn.k_proj.bias": [ + 256 + ], + "model.layers.12.self_attn.v_proj.bias": [ + 256 + ], + "model.layers.12.input_layernorm.weight": [ + 1536 + ], + "model.layers.12.post_attention_layernorm.weight": [ + 1536 + ], + "model.layers.13.self_attn.q_proj.bias": [ + 1536 + ], + "model.layers.13.self_attn.k_proj.bias": [ + 256 + ], + "model.layers.13.self_attn.v_proj.bias": [ + 256 + ], + "model.layers.13.input_layernorm.weight": [ + 1536 + ], + "model.layers.13.post_attention_layernorm.weight": [ + 1536 + ], + "model.layers.14.self_attn.q_proj.bias": [ + 1536 + ], + "model.layers.14.self_attn.k_proj.bias": [ + 256 + ], + "model.layers.14.self_attn.v_proj.bias": [ + 256 + ], + "model.layers.14.input_layernorm.weight": [ + 1536 + ], + "model.layers.14.post_attention_layernorm.weight": [ + 1536 + ], + "model.layers.15.self_attn.q_proj.bias": [ + 1536 + ], + "model.layers.15.self_attn.k_proj.bias": [ + 256 + ], + "model.layers.15.self_attn.v_proj.bias": [ + 256 + ], + "model.layers.15.input_layernorm.weight": [ + 1536 + ], + "model.layers.15.post_attention_layernorm.weight": [ + 1536 + ], + "model.layers.16.self_attn.q_proj.bias": [ + 1536 + ], + "model.layers.16.self_attn.k_proj.bias": [ + 256 + ], + "model.layers.16.self_attn.v_proj.bias": [ + 256 + ], + "model.layers.16.input_layernorm.weight": [ + 1536 + ], + "model.layers.16.post_attention_layernorm.weight": [ + 1536 + ], + "model.layers.17.self_attn.q_proj.bias": [ + 1536 + ], + "model.layers.17.self_attn.k_proj.bias": [ + 256 + ], + "model.layers.17.self_attn.v_proj.bias": [ + 256 + ], + "model.layers.17.input_layernorm.weight": [ + 1536 + ], + "model.layers.17.post_attention_layernorm.weight": [ + 1536 + ], + "model.layers.18.self_attn.q_proj.bias": [ + 1536 + ], + "model.layers.18.self_attn.k_proj.bias": [ + 256 + ], + "model.layers.18.self_attn.v_proj.bias": [ + 256 + ], + "model.layers.18.input_layernorm.weight": [ + 1536 + ], + "model.layers.18.post_attention_layernorm.weight": [ + 1536 + ], + "model.layers.19.self_attn.q_proj.bias": [ + 1536 + ], + "model.layers.19.self_attn.k_proj.bias": [ + 256 + ], + "model.layers.19.self_attn.v_proj.bias": [ + 256 + ], + "model.layers.19.input_layernorm.weight": [ + 1536 + ], + "model.layers.19.post_attention_layernorm.weight": [ + 1536 + ], + "model.layers.20.self_attn.q_proj.bias": [ + 1536 + ], + "model.layers.20.self_attn.k_proj.bias": [ + 256 + ], + "model.layers.20.self_attn.v_proj.bias": [ + 256 + ], + "model.layers.20.input_layernorm.weight": [ + 1536 + ], + "model.layers.20.post_attention_layernorm.weight": [ + 1536 + ], + "model.layers.21.self_attn.q_proj.bias": [ + 1536 + ], + "model.layers.21.self_attn.k_proj.bias": [ + 256 + ], + "model.layers.21.self_attn.v_proj.bias": [ + 256 + ], + "model.layers.21.input_layernorm.weight": [ + 1536 + ], + "model.layers.21.post_attention_layernorm.weight": [ + 1536 + ], + "model.layers.22.self_attn.q_proj.bias": [ + 1536 + ], + "model.layers.22.self_attn.k_proj.bias": [ + 256 + ], + "model.layers.22.self_attn.v_proj.bias": [ + 256 + ], + "model.layers.22.input_layernorm.weight": [ + 1536 + ], + "model.layers.22.post_attention_layernorm.weight": [ + 1536 + ], + "model.layers.23.self_attn.q_proj.bias": [ + 1536 + ], + "model.layers.23.self_attn.k_proj.bias": [ + 256 + ], + "model.layers.23.self_attn.v_proj.bias": [ + 256 + ], + "model.layers.23.input_layernorm.weight": [ + 1536 + ], + "model.layers.23.post_attention_layernorm.weight": [ + 1536 + ], + "model.layers.24.self_attn.q_proj.bias": [ + 1536 + ], + "model.layers.24.self_attn.k_proj.bias": [ + 256 + ], + "model.layers.24.self_attn.v_proj.bias": [ + 256 + ], + "model.layers.24.input_layernorm.weight": [ + 1536 + ], + "model.layers.24.post_attention_layernorm.weight": [ + 1536 + ], + "model.layers.25.self_attn.q_proj.bias": [ + 1536 + ], + "model.layers.25.self_attn.k_proj.bias": [ + 256 + ], + "model.layers.25.self_attn.v_proj.bias": [ + 256 + ], + "model.layers.25.input_layernorm.weight": [ + 1536 + ], + "model.layers.25.post_attention_layernorm.weight": [ + 1536 + ], + "model.layers.26.self_attn.q_proj.bias": [ + 1536 + ], + "model.layers.26.self_attn.k_proj.bias": [ + 256 + ], + "model.layers.26.self_attn.v_proj.bias": [ + 256 + ], + "model.layers.26.input_layernorm.weight": [ + 1536 + ], + "model.layers.26.post_attention_layernorm.weight": [ + 1536 + ], + "model.layers.27.self_attn.q_proj.bias": [ + 1536 + ], + "model.layers.27.self_attn.k_proj.bias": [ + 256 + ], + "model.layers.27.self_attn.v_proj.bias": [ + 256 + ], + "model.layers.27.input_layernorm.weight": [ + 1536 + ], + "model.layers.27.post_attention_layernorm.weight": [ + 1536 + ], + "model.norm.weight": [ + 1536 + ], + "lm_head.weight": [ + 151936, + 1536 + ] + } +} \ No newline at end of file diff --git a/deepseek-r1-1.5b-unary4/model_layers_0_input_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary4/model_layers_0_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..233ce643b3f9c528046d3dc5afdddb05313de8c2 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_0_input_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_0_mlp_up_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_0_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..c5edb2a9ee9ac6fff297b20aaa08f1cc40fa74f7 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_0_mlp_up_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_0_post_attention_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary4/model_layers_0_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..4480c766ecab325c11ba7ca372dbd090f47e5b66 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_0_post_attention_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_0_self_attn_k_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_0_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..87a82d9ff51341d58789ded13420f29d793fb7e0 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_0_self_attn_k_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_0_self_attn_q_proj_bias.fp16 b/deepseek-r1-1.5b-unary4/model_layers_0_self_attn_q_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..2c352bc4acbcfe7d8e68b1208ac895328786874d Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_0_self_attn_q_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_0_self_attn_v_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_0_self_attn_v_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..006e47383cfb0e4b91fb990c71ca79e92b5d1766 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_0_self_attn_v_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_10_mlp_gate_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_10_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..043f4841cdab123d3ffdf032616ebc01070ce28c Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_10_mlp_gate_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_10_self_attn_k_proj_bias.fp16 b/deepseek-r1-1.5b-unary4/model_layers_10_self_attn_k_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..f12798d185f3a26d7b58dca998da4ad94d1d0a4c Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_10_self_attn_k_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_10_self_attn_o_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_10_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..1cb5745a6d4f8a70c62790e920a7b2438b1df63d Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_10_self_attn_o_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_10_self_attn_q_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_10_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..9c9aa7dd8fa9dad35a7b165b0c1241a0675cd4d5 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_10_self_attn_q_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_10_self_attn_v_proj_bias.fp16 b/deepseek-r1-1.5b-unary4/model_layers_10_self_attn_v_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..1f3165213bff8155723b1ad8450afb9bab120399 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_10_self_attn_v_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_10_self_attn_v_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_10_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..152cbd78b7a9a9ccc5c6880bea5d6937c884fab7 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_10_self_attn_v_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_11_input_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary4/model_layers_11_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..eacc9d59d93c4c2cb731f52dbd3be00600d8ba57 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_11_input_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_11_mlp_down_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_11_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..593945fa0a671c367e505da46abdf8e0a833cb44 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_11_mlp_down_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_11_post_attention_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary4/model_layers_11_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..07dbda66c47ff7da482e86524713d24c5b2007eb Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_11_post_attention_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_11_self_attn_v_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_11_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..56650b7fb5f7b24136370d812abba3683327314f Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_11_self_attn_v_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_11_self_attn_v_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_11_self_attn_v_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..f802f66e6b93746d41e2fcc658629febf5d95166 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_11_self_attn_v_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_12_mlp_down_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_12_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..784a227c05edf932b52ff6578a745ceec1a60f96 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_12_mlp_down_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_12_mlp_up_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_12_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..5cc517bc5a7e48428d253aaeafe5729f7a796f5f Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_12_mlp_up_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_12_post_attention_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary4/model_layers_12_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..0fb4e1e8dc6a981526052aec4e271aa819bd9dd0 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_12_post_attention_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_12_self_attn_q_proj_bias.fp16 b/deepseek-r1-1.5b-unary4/model_layers_12_self_attn_q_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..414a66d8c673cc979bd5a898a973593fa31cf455 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_12_self_attn_q_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_12_self_attn_v_proj_bias.fp16 b/deepseek-r1-1.5b-unary4/model_layers_12_self_attn_v_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..48ddbf6e5ee891fbc08367f4d314ec3ba9aa207c Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_12_self_attn_v_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_12_self_attn_v_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_12_self_attn_v_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..b4076ccf87841fe1ee1143cf43974272853d3b56 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_12_self_attn_v_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_13_mlp_down_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_13_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..b7b14d141b18a6cdfada6daf868548ac073b735d Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_13_mlp_down_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_13_post_attention_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary4/model_layers_13_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..408a4fe4330502360320954c9e4c94350380eafe Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_13_post_attention_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_13_self_attn_k_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_13_self_attn_k_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..358791dad919c9996e22a57d0a0bd31d4a12663b Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_13_self_attn_k_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_13_self_attn_o_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_13_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..533c121fa72662d3055f10007f4a7074e67df1de Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_13_self_attn_o_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_13_self_attn_q_proj_bias.fp16 b/deepseek-r1-1.5b-unary4/model_layers_13_self_attn_q_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..3037fc4357b76a73522638553ed5df0a87ece153 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_13_self_attn_q_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_13_self_attn_v_proj_bias.fp16 b/deepseek-r1-1.5b-unary4/model_layers_13_self_attn_v_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..a3117acfad675e99118bcbf2cc87a0db9725a61b Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_13_self_attn_v_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_13_self_attn_v_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_13_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..4807fec777b7dc5e34562ed636ec818c9e37cb1a Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_13_self_attn_v_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_14_input_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary4/model_layers_14_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..e6b9907c05e8517ba0c33c7c2bd8a1ec773f2920 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_14_input_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_14_post_attention_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary4/model_layers_14_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..90cf66bf7f23dbce533a9156464500f3ce2edc41 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_14_post_attention_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_14_self_attn_k_proj_bias.fp16 b/deepseek-r1-1.5b-unary4/model_layers_14_self_attn_k_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..76ea4059bdeab2deffabfad4bd11dfd773a92fb5 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_14_self_attn_k_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_14_self_attn_k_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_14_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..fb749c3c3aa1d822e04387763fdd29fa7552b412 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_14_self_attn_k_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_14_self_attn_k_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_14_self_attn_k_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..8b1984ac7f912f049cb3614d3f1991e7560e41c0 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_14_self_attn_k_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_14_self_attn_o_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_14_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..7138c277023025b789169b37ebd809c89de6b0be Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_14_self_attn_o_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_14_self_attn_q_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_14_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..01e324806553a0716da2e3246eefec5d353afa55 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_14_self_attn_q_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_14_self_attn_v_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_14_self_attn_v_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..ddd17bd040c43fb89bc3225fddacecf6a4194e9c Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_14_self_attn_v_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_15_input_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary4/model_layers_15_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..14ef0f2d011bfcb50bd99d8abb99dce2bbe26139 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_15_input_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_15_post_attention_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary4/model_layers_15_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..6a3082c006e0687aaff28354dbf020b4d36b5cde Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_15_post_attention_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_15_self_attn_q_proj_bias.fp16 b/deepseek-r1-1.5b-unary4/model_layers_15_self_attn_q_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..5f3f811ab08b222b08bbf35215d38486e6eda690 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_15_self_attn_q_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_15_self_attn_q_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_15_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..18abf778957958340bf1598dc12ab07038c277eb Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_15_self_attn_q_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_15_self_attn_v_proj_bias.fp16 b/deepseek-r1-1.5b-unary4/model_layers_15_self_attn_v_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..bedd656cb61e1b16ec719cd5cefd43ad5eea50d2 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_15_self_attn_v_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_15_self_attn_v_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_15_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..fb871b76530450d3108b1fa84c3d2e0d704055c7 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_15_self_attn_v_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_15_self_attn_v_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_15_self_attn_v_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..35ede3442b96c602595389e8aa53db0f1d9b7b7e Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_15_self_attn_v_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_16_self_attn_k_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_16_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..bf41be2fded4b656929bb341d58ceb09ebda30a9 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_16_self_attn_k_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_16_self_attn_o_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_16_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..b76a6ffb4a768ef38196027184786b8a4bed9cf6 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_16_self_attn_o_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_17_mlp_down_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_17_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..1393ff00f96508993f4424eb2b95d6309ff2e68e Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_17_mlp_down_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_17_mlp_gate_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_17_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..a3eca1cc2b95148e02246bdae28b18f26d2124c1 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_17_mlp_gate_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_17_self_attn_k_proj_bias.fp16 b/deepseek-r1-1.5b-unary4/model_layers_17_self_attn_k_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..d44cca0dd8ea6a11bbd4a9bbdc6575b50c34f948 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_17_self_attn_k_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_17_self_attn_k_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_17_self_attn_k_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..5bfc95ecfd4a6ff76e5c7e64ecb810384de3bc8e Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_17_self_attn_k_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_17_self_attn_v_proj_bias.fp16 b/deepseek-r1-1.5b-unary4/model_layers_17_self_attn_v_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..e1330e28f676fcdf827d481bed580bb4e1ae166d Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_17_self_attn_v_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_18_mlp_gate_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_18_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..e23a2ddccc2a06ebab048ad1c8a62957d82222f1 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_18_mlp_gate_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_18_post_attention_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary4/model_layers_18_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..ed1a9c348a0afdb0628a14416dd97e4f101469b7 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_18_post_attention_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_18_self_attn_k_proj_bias.fp16 b/deepseek-r1-1.5b-unary4/model_layers_18_self_attn_k_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..ceb6c8052cd154fb367ace8eac711dffacdc01e1 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_18_self_attn_k_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_18_self_attn_q_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_18_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..a2c380b46eeef61b2c45053765784a3940b0857a Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_18_self_attn_q_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_18_self_attn_v_proj_bias.fp16 b/deepseek-r1-1.5b-unary4/model_layers_18_self_attn_v_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..8aba38e12f9ca74ade5997916a7a3bb25535de37 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_18_self_attn_v_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_19_mlp_down_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_19_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..c85df1020528e98453f9b59180f6e54d35846abe Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_19_mlp_down_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_19_mlp_gate_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_19_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..08e2349b55db9fec709a2de05cbcf7fab82a081f Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_19_mlp_gate_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_19_mlp_up_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_19_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..0bdb9d0899b645f3933bdeda3559e4d774c6470e Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_19_mlp_up_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_19_post_attention_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary4/model_layers_19_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..04ca4d44cbedf77c6ad428dfb3974df793140ec8 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_19_post_attention_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_19_self_attn_k_proj_bias.fp16 b/deepseek-r1-1.5b-unary4/model_layers_19_self_attn_k_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..e9bfa94937c80e227e43b2591120196b57471c90 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_19_self_attn_k_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_19_self_attn_k_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_19_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..6a8a3bf2bcbf09c2459e2294a837b1934aca5f13 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_19_self_attn_k_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_19_self_attn_q_proj_bias.fp16 b/deepseek-r1-1.5b-unary4/model_layers_19_self_attn_q_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..0385ee4a6ec2b0e55b92dbd7e5ac8995d8a1d4b4 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_19_self_attn_q_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_19_self_attn_q_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_19_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..5aa275924695833039dce3c5f3a86f2a8b24b670 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_19_self_attn_q_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_19_self_attn_v_proj_bias.fp16 b/deepseek-r1-1.5b-unary4/model_layers_19_self_attn_v_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..3a20a9baba7e7b5f11f0c545db427ef58d85c276 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_19_self_attn_v_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_19_self_attn_v_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_19_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..4eaac0852823905a8a2e2699fa8685dc25088620 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_19_self_attn_v_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_19_self_attn_v_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_19_self_attn_v_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..8679c894b553ce64de5b70a825a0a873fa8e9ca2 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_19_self_attn_v_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_1_input_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary4/model_layers_1_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..58cfcc62ae22017c2355f5abacc4896ccebb9722 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_1_input_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_1_mlp_up_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_1_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..b350162dc115c0b5a8695c587bcb3953d3a22ae9 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_1_mlp_up_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_1_post_attention_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary4/model_layers_1_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..2d66603de162864edd6c64e2fa5a308470e3bd81 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_1_post_attention_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_1_self_attn_k_proj_bias.fp16 b/deepseek-r1-1.5b-unary4/model_layers_1_self_attn_k_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..c75ebdad2866070c227bef111584780a52937315 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_1_self_attn_k_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_1_self_attn_k_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_1_self_attn_k_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..03141edd00880f0b86b88c6636653c35cb13e52b Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_1_self_attn_k_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_1_self_attn_q_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_1_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..d1ccccc947ea65d650410fb4de629e5c94c0ff9c Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_1_self_attn_q_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_1_self_attn_v_proj_bias.fp16 b/deepseek-r1-1.5b-unary4/model_layers_1_self_attn_v_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..b3c2a5838c67fa4a37e84df45a2ca5ca5edf8c05 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_1_self_attn_v_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_20_mlp_down_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_20_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..c214f05c1933b5f4e8b6e95556b6a16fe8af113e Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_20_mlp_down_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_20_mlp_gate_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_20_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..a90f60b4b9d12c2e7f7d7d5dbf1ac1a8310cd5f1 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_20_mlp_gate_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_20_mlp_up_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_20_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..a27234f0b9e43970b4fa298341f8815b304ca0b3 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_20_mlp_up_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_20_self_attn_k_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_20_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..d64e0578f68bd40110897ac34978e73d2e461b1a Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_20_self_attn_k_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_20_self_attn_o_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_20_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..a39c032c1ced7e0141094db8784a8d2fea70fcf7 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_20_self_attn_o_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_20_self_attn_v_proj_bias.fp16 b/deepseek-r1-1.5b-unary4/model_layers_20_self_attn_v_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..e1a165cb2b8233c01da78474d8e7d4efaca539a8 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_20_self_attn_v_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_20_self_attn_v_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_20_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..a35407fc9975edf0d5a09310615c08ba7ef873bd Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_20_self_attn_v_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_20_self_attn_v_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_20_self_attn_v_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..7c7099ee79fcef9517f82a2ba04b1dbce25e2c35 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_20_self_attn_v_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_21_input_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary4/model_layers_21_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..d5442af4c06c3d73058f742d5dcc76b443362488 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_21_input_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_21_mlp_down_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_21_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..a7c9e1fba988dba752094df6585916a708a4a219 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_21_mlp_down_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_21_mlp_gate_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_21_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..65127e11c746e2b6941c20231cdb986d64dd740e Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_21_mlp_gate_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_21_self_attn_k_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_21_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..083440a6f38d676cbade32f6fdc6359a643a4d9c Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_21_self_attn_k_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_21_self_attn_o_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_21_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..edf1e9e0c832699df6b4e48f2cccf8a6052ceec6 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_21_self_attn_o_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_21_self_attn_q_proj_bias.fp16 b/deepseek-r1-1.5b-unary4/model_layers_21_self_attn_q_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..52070528d3cccbe104ed1b0c462add5e96f45f7f Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_21_self_attn_q_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_21_self_attn_v_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_21_self_attn_v_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..81e039cac497040a37965af24aae244a5cf88db4 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_21_self_attn_v_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_22_input_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary4/model_layers_22_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..754f2a4d90debcf3aeb44f2778b04eecfb1374df Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_22_input_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_22_post_attention_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary4/model_layers_22_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..e5cc5cafbd37ea08214564cf9ab44fc1a3b5e27e Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_22_post_attention_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_22_self_attn_k_proj_bias.fp16 b/deepseek-r1-1.5b-unary4/model_layers_22_self_attn_k_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..ff1a8c461ee206363ef2e09a6280e56f03f90b28 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_22_self_attn_k_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_22_self_attn_k_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_22_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..91987a78e2814b3ecb6aa1d8b4b29b5d1f981c04 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_22_self_attn_k_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_22_self_attn_v_proj_bias.fp16 b/deepseek-r1-1.5b-unary4/model_layers_22_self_attn_v_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..7f2b2ebf279ef9735c9b788475890f5baf33c229 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_22_self_attn_v_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_23_mlp_up_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_23_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..9ad2d5be87283a63227fbfd432c27d8510182134 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_23_mlp_up_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_23_post_attention_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary4/model_layers_23_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..82154a9c197746ddd042cfd153b56df23baa21a8 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_23_post_attention_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_23_self_attn_k_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_23_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..d03c26d3b58413586dc34d6931998c7814b4989d Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_23_self_attn_k_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_23_self_attn_k_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_23_self_attn_k_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..43d15ee5f7410a6e40f817bf031986f8e1d6b7a3 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_23_self_attn_k_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_23_self_attn_q_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_23_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..2780d7b47d2e7d6937eff680bbee5ff9e3545934 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_23_self_attn_q_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_23_self_attn_v_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_23_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..c7aec4bdf5f00a9f5bd181215748bf29e0a733a9 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_23_self_attn_v_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_23_self_attn_v_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_23_self_attn_v_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..4ef81943afb80885f5e238cf86434993ea70d8a2 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_23_self_attn_v_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_24_input_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary4/model_layers_24_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..93e70fde36a686eebd1aec80445a130a6481199c Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_24_input_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_24_mlp_up_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_24_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..161a6cbf15e603f26666b3074176d3e152f01549 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_24_mlp_up_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_24_post_attention_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary4/model_layers_24_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..079e34a4c59205a8a7a736082a01921b74c20300 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_24_post_attention_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_24_self_attn_k_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_24_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..2a5b9aeb2b16259650090bdba110720957538c68 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_24_self_attn_k_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_24_self_attn_o_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_24_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..53fdf1e76c793f2624656520e93e5c145d29105a Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_24_self_attn_o_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_24_self_attn_q_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_24_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..9ee6e812da92a0d995fd40e88d4453bedac4c151 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_24_self_attn_q_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_24_self_attn_v_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_24_self_attn_v_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..dad0478b9205249ed10763b623d54d145ccd9f7c Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_24_self_attn_v_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_25_mlp_down_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_25_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..ea72a6aba280937fe4cd070e540a02880c9eef35 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_25_mlp_down_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_25_mlp_gate_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_25_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..a4d10d2a2551103b7dda08086d70834a6751c4b0 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_25_mlp_gate_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_25_mlp_up_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_25_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..47663dbb841099526b97d74737cd5d0a279a3079 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_25_mlp_up_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_25_post_attention_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary4/model_layers_25_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..71643d2c327be4bc02a592d44cd0b39cbfcdbdfd Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_25_post_attention_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_25_self_attn_q_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_25_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..4d58902ea7ed176717ccd1f2727ab8dfd533f954 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_25_self_attn_q_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_25_self_attn_v_proj_bias.fp16 b/deepseek-r1-1.5b-unary4/model_layers_25_self_attn_v_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..142ca0c79cb9915517d2f204fcfc606ac025a0a0 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_25_self_attn_v_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_25_self_attn_v_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_25_self_attn_v_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..9a854b00a5400d5b8b93a75e67a156be8ba5b0d0 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_25_self_attn_v_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_26_mlp_down_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_26_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..8ba5ea535bc11c5209b0360930b8a9e0346085f4 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_26_mlp_down_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_26_mlp_up_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_26_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..5e3308b0a65d0d2533b8ee502062bb636c14fac7 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_26_mlp_up_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_26_self_attn_k_proj_bias.fp16 b/deepseek-r1-1.5b-unary4/model_layers_26_self_attn_k_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..dd99f004d6c98e411bf9172a770ad5f195453c7e Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_26_self_attn_k_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_26_self_attn_k_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_26_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..d914873aef70629ff06a9c508f1ba4afd0209112 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_26_self_attn_k_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_26_self_attn_q_proj_bias.fp16 b/deepseek-r1-1.5b-unary4/model_layers_26_self_attn_q_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..2c70b4025d2fcb30057f94c604e21dcd3d32c7f4 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_26_self_attn_q_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_27_input_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary4/model_layers_27_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..708e7b09ea3bdacf953c85c2016e72a4401f6a04 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_27_input_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_27_mlp_down_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_27_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..98182fd7dd28ee2df4d2677c9fbed52d4ea2d247 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_27_mlp_down_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_27_mlp_gate_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_27_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..b88e3c9ccb08c42fa787033fea6fe2175d61c802 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_27_mlp_gate_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_27_post_attention_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary4/model_layers_27_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..556cc9f350ff7645f4505a070ddacdeef1913989 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_27_post_attention_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_27_self_attn_k_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_27_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..244af6b40b0013cbb5a1d1b8d9c28ab07348f08e Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_27_self_attn_k_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_27_self_attn_k_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_27_self_attn_k_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..d128a1ded366538fb300a15041d18af63b7cf768 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_27_self_attn_k_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_27_self_attn_o_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_27_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..e1be101f2f87a76552f0d278e8b82d89432c6a1c Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_27_self_attn_o_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_27_self_attn_q_proj_bias.fp16 b/deepseek-r1-1.5b-unary4/model_layers_27_self_attn_q_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..aaef25656c40ebbfa5da1c1f6dfb9f3a7db0db08 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_27_self_attn_q_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_27_self_attn_v_proj_bias.fp16 b/deepseek-r1-1.5b-unary4/model_layers_27_self_attn_v_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..e4fd8e8655a0ca9a1ee2d2fda440d4f046a78b09 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_27_self_attn_v_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_27_self_attn_v_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_27_self_attn_v_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..075911e4210586b9cb0d3a01902ec36c2d905586 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_27_self_attn_v_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_2_input_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary4/model_layers_2_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..d9e2acdd45184532118543b69e8e3ab58c2968b5 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_2_input_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_2_self_attn_k_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_2_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..3c2a444bdd963184da296a901f278840932f2146 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_2_self_attn_k_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_2_self_attn_k_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_2_self_attn_k_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..ca6a2ba88a59c60ec72ccda164705ea6049e2fa1 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_2_self_attn_k_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_2_self_attn_o_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_2_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..ca3b183212f972289494ac6782a809ede3062e30 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_2_self_attn_o_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_2_self_attn_q_proj_bias.fp16 b/deepseek-r1-1.5b-unary4/model_layers_2_self_attn_q_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..d817c1d6d24bfca1c159b43793459dd86d5eefa9 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_2_self_attn_q_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_2_self_attn_q_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_2_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..f39d64c7602bb0d84432f2ed20373fc28eda139f Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_2_self_attn_q_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_2_self_attn_v_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_2_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..4a88b20d700691ff43f2f3fef8033ba8d903c147 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_2_self_attn_v_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_2_self_attn_v_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_2_self_attn_v_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..54e10b0a1996c9562c5bca8bc222428cc18cb1b3 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_2_self_attn_v_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_3_mlp_down_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_3_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..996cceb5d07ee3d8df915bd3fe32eefaf1eee6c3 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_3_mlp_down_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_3_mlp_gate_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_3_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..12a76d8e649ac23342ca85c8341b63512633c4f9 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_3_mlp_gate_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_3_post_attention_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary4/model_layers_3_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..ad465cdd9a80e575767bd43668970d6c3d90ba56 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_3_post_attention_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_3_self_attn_k_proj_bias.fp16 b/deepseek-r1-1.5b-unary4/model_layers_3_self_attn_k_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..b595fc36fe9aa3fc0a983a8b1c45139997e9ba3c Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_3_self_attn_k_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_3_self_attn_k_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_3_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..d468b5c64304d02850919327253389402b9fa30a Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_3_self_attn_k_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_3_self_attn_k_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_3_self_attn_k_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..7498db3ff36f39cffccb804a1920907a5c97f787 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_3_self_attn_k_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_3_self_attn_o_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_3_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..c314b054b2a3defb00bbd905f639c375ff0e3b46 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_3_self_attn_o_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_3_self_attn_q_proj_bias.fp16 b/deepseek-r1-1.5b-unary4/model_layers_3_self_attn_q_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..56f78dfa4c2382c6ae97b723eb578835f421efdd Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_3_self_attn_q_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_3_self_attn_v_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_3_self_attn_v_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..f6412c8c456dc1a8c48a49f9e951416f24149c1d Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_3_self_attn_v_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_4_input_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary4/model_layers_4_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..9759f6063d7b643556860d11f1a24b45ed57fde6 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_4_input_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_4_mlp_gate_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_4_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..b4fbe7f472e0b0d562395aa07e606eefd393c646 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_4_mlp_gate_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_4_mlp_up_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_4_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..c80ab70faefc5922b79ad4895a7e0dc80478d3d3 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_4_mlp_up_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_4_self_attn_k_proj_bias.fp16 b/deepseek-r1-1.5b-unary4/model_layers_4_self_attn_k_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..bc0c148982304816c4fa3512b7ca23a3472c72c4 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_4_self_attn_k_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_4_self_attn_o_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_4_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..3c3b4c7fedfcffae47b40b55f64da22b10141895 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_4_self_attn_o_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_4_self_attn_v_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_4_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..06606bd7836ce36691a7ed5fded936bb471b100a Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_4_self_attn_v_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_4_self_attn_v_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_4_self_attn_v_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..5e9a333c25f3936527245b9f270b9c80343b135b Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_4_self_attn_v_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_5_mlp_down_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_5_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..bb5207de077fae17de79b0edb154b33dbba46ff6 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_5_mlp_down_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_5_mlp_gate_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_5_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..7479fbd639108a96305e7332d72066ccec144db9 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_5_mlp_gate_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_5_mlp_up_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_5_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..1759a6d513e05261322ed619247870a3bd05b36e Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_5_mlp_up_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_5_post_attention_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary4/model_layers_5_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..8e7dbb2d076dba9b00e64dca081de6af929c0449 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_5_post_attention_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_5_self_attn_v_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_5_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..e28ef361a0a47daf1f0573fb8b1b6bcef30c9328 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_5_self_attn_v_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_5_self_attn_v_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_5_self_attn_v_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..4e0516674c8589adce63faca186bbce066216aab Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_5_self_attn_v_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_6_mlp_down_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_6_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..c52252e4fb42ac974622d029a65e122b4468e885 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_6_mlp_down_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_6_mlp_up_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_6_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..80b651956b9dcb811cab8587d15c8bf2cfa4b6a1 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_6_mlp_up_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_6_self_attn_o_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_6_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..7bd516815d3ec6aa99accaa4ed4c8dfc783a5b85 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_6_self_attn_o_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_6_self_attn_q_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_6_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..20b71a85bb386857dca347d6e19c883375a87b65 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_6_self_attn_q_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_7_input_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary4/model_layers_7_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..a100651bc62c5a0ec14f2ee20c3b9e4baff985f4 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_7_input_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_7_mlp_down_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_7_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..a1cd5a715e1161d8e1cce29a47343715605fd344 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_7_mlp_down_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_7_mlp_gate_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_7_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..5e81b8e86b96793a24da379feb8c1c7b40d95ee2 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_7_mlp_gate_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_7_mlp_up_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_7_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..0a58c50d6af775511f1561ee3284d4840bf30303 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_7_mlp_up_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_7_post_attention_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary4/model_layers_7_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..4746b01a76a4bc8f7a9d77f39085d4892ab22ce8 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_7_post_attention_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_7_self_attn_k_proj_bias.fp16 b/deepseek-r1-1.5b-unary4/model_layers_7_self_attn_k_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..589cae7e3d3b84aaeb145ccadc7e8aae3818aa45 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_7_self_attn_k_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_7_self_attn_k_proj_weight.sign b/deepseek-r1-1.5b-unary4/model_layers_7_self_attn_k_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..b75512f1b5c81055d03cccc3681c031d9d14ac34 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_7_self_attn_k_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_7_self_attn_o_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_7_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..c2df4fc5e9b3ddfb3a7439f914ae26db5bb8080f Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_7_self_attn_o_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_7_self_attn_q_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_7_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..aca4529ddb03e8ab54254e4ea6558ff79ffaff7c Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_7_self_attn_q_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_7_self_attn_v_proj_bias.fp16 b/deepseek-r1-1.5b-unary4/model_layers_7_self_attn_v_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..14221fa1e5c8786aae8a1af0f58a90713768558f Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_7_self_attn_v_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_7_self_attn_v_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_7_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..218a05db643cd27cbae9894de50293c82d9f0439 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_7_self_attn_v_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_8_mlp_up_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_8_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..5d581fd8f36c9b6993ea7cc69826245b7e1b5a32 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_8_mlp_up_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_8_post_attention_layernorm_weight.fp16 b/deepseek-r1-1.5b-unary4/model_layers_8_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..60c464b1cc41331c65df1bcaf3c73a9090c56ec6 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_8_post_attention_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_8_self_attn_k_proj_bias.fp16 b/deepseek-r1-1.5b-unary4/model_layers_8_self_attn_k_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..49e7b29758b2a97d0f0ae70b1abb28d7714dd94d Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_8_self_attn_k_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_8_self_attn_k_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_8_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..6adb11461a1a90101a9642cb0155a42c1f79c2af Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_8_self_attn_k_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_8_self_attn_o_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_8_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..2067a120b42fa824f9cbb0a59268474461b6a054 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_8_self_attn_o_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_8_self_attn_v_proj_bias.fp16 b/deepseek-r1-1.5b-unary4/model_layers_8_self_attn_v_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..e04ef83dbf00969a6897ae2d80515558c4a2e370 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_8_self_attn_v_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_9_mlp_gate_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_9_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..afe9b185f6dde7bcdc25e36de1bdd693148c6644 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_9_mlp_gate_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_9_self_attn_o_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_9_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..6a74f2aaa99276b2ec89b211603daab8cd238dba Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_9_self_attn_o_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_layers_9_self_attn_v_proj_weight.scales b/deepseek-r1-1.5b-unary4/model_layers_9_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..b0dd8d4b605a914272f33d195fc538ada21ae67b Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_layers_9_self_attn_v_proj_weight.scales differ diff --git a/deepseek-r1-1.5b-unary4/model_norm_weight.fp16 b/deepseek-r1-1.5b-unary4/model_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..e028ca2722863bfa678baf3b160412fa384f30a8 Binary files /dev/null and b/deepseek-r1-1.5b-unary4/model_norm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/config.json b/qwen3-4b-log5-unary/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6988f134db143052042f2bd6e0c897bc6a605189 --- /dev/null +++ b/qwen3-4b-log5-unary/config.json @@ -0,0 +1,30 @@ +{ + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 151643, + "eos_token_id": 151645, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "max_position_embeddings": 262144, + "max_window_layers": 36, + "model_type": "qwen3", + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 5000000, + "sliding_window": null, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.51.0", + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/qwen3-4b-log5-unary/manifest.json b/qwen3-4b-log5-unary/manifest.json new file mode 100644 index 0000000000000000000000000000000000000000..362ece95d154843d99ee16106fece0c8b47677f3 --- /dev/null +++ b/qwen3-4b-log5-unary/manifest.json @@ -0,0 +1,1486 @@ +{ + "unary": { + "model.layers.0.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.0.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.0.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.0.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.0.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.0.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.0.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.1.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.1.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.1.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.1.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.1.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.1.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.1.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.10.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.10.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.10.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.10.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.10.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.10.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.10.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.11.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.11.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.11.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.11.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.11.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.11.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.11.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.12.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.12.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.12.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.12.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.12.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.12.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.12.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.13.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.13.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.13.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.13.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.13.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.13.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.13.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.14.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.14.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.14.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.14.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.14.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.14.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.14.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.15.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.15.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.15.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.15.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.15.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.15.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.2.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.2.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.2.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.2.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.2.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.2.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.2.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.3.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.3.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.3.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.3.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.3.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.3.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.3.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.4.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.4.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.4.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.4.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.4.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.4.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.4.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.5.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.5.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.5.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.5.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.5.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.5.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.5.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.6.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.6.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.6.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.6.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.6.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.6.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.6.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.7.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.7.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.7.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.7.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.7.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.7.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.7.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.8.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.8.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.8.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.8.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.8.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.8.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.8.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.9.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.9.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.9.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.9.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.9.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.9.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.9.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.15.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.16.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.16.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.16.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.16.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.16.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.16.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.16.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.17.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.17.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.17.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.17.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.17.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.17.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.17.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.18.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.18.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.18.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.18.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.18.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.18.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.18.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.19.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.19.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.19.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.19.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.19.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.19.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.19.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.20.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.20.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.20.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.20.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.20.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.20.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.20.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.21.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.21.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.21.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.21.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.21.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.21.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.21.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.22.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.22.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.22.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.22.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.22.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.22.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.22.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.23.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.23.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.23.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.23.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.23.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.23.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.23.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.24.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.24.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.24.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.24.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.24.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.24.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.24.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.25.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.25.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.25.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.25.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.25.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.25.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.25.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.26.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.26.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.26.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.26.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.26.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.26.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.26.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.27.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.27.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.27.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.27.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.27.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.27.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.27.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.28.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.28.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.28.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.28.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.28.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.28.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.28.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.29.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.29.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.29.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.29.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.29.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.29.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.29.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.30.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.30.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.30.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.30.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.30.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.30.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.30.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.31.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.31.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.31.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.31.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.31.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.31.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.31.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.32.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.32.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.32.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.32.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.32.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.32.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.32.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.33.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.33.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.33.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.33.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.33.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.33.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.33.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.34.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.34.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.34.mlp.up_proj.weight": [ + 9728, + 2560 + ], + "model.layers.34.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.34.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.34.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.34.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.35.mlp.gate_proj.weight": [ + 9728, + 2560 + ], + "model.layers.35.self_attn.k_proj.weight": [ + 1024, + 2560 + ], + "model.layers.35.self_attn.o_proj.weight": [ + 2560, + 4096 + ], + "model.layers.35.self_attn.q_proj.weight": [ + 4096, + 2560 + ], + "model.layers.35.self_attn.v_proj.weight": [ + 1024, + 2560 + ], + "model.layers.35.mlp.down_proj.weight": [ + 2560, + 9728 + ], + "model.layers.35.mlp.up_proj.weight": [ + 9728, + 2560 + ] + }, + "fp16": { + "model.embed_tokens.weight": [ + 151936, + 2560 + ], + "model.layers.0.input_layernorm.weight": [ + 2560 + ], + "model.layers.0.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.0.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.0.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.1.input_layernorm.weight": [ + 2560 + ], + "model.layers.1.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.1.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.1.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.10.input_layernorm.weight": [ + 2560 + ], + "model.layers.10.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.10.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.10.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.11.input_layernorm.weight": [ + 2560 + ], + "model.layers.11.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.11.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.11.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.12.input_layernorm.weight": [ + 2560 + ], + "model.layers.12.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.12.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.12.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.13.input_layernorm.weight": [ + 2560 + ], + "model.layers.13.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.13.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.13.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.14.input_layernorm.weight": [ + 2560 + ], + "model.layers.14.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.14.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.14.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.15.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.15.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.2.input_layernorm.weight": [ + 2560 + ], + "model.layers.2.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.2.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.2.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.3.input_layernorm.weight": [ + 2560 + ], + "model.layers.3.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.3.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.3.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.4.input_layernorm.weight": [ + 2560 + ], + "model.layers.4.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.4.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.4.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.5.input_layernorm.weight": [ + 2560 + ], + "model.layers.5.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.5.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.5.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.6.input_layernorm.weight": [ + 2560 + ], + "model.layers.6.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.6.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.6.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.7.input_layernorm.weight": [ + 2560 + ], + "model.layers.7.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.7.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.7.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.8.input_layernorm.weight": [ + 2560 + ], + "model.layers.8.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.8.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.8.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.9.input_layernorm.weight": [ + 2560 + ], + "model.layers.9.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.9.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.9.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.15.input_layernorm.weight": [ + 2560 + ], + "model.layers.15.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.16.input_layernorm.weight": [ + 2560 + ], + "model.layers.16.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.16.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.16.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.17.input_layernorm.weight": [ + 2560 + ], + "model.layers.17.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.17.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.17.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.18.input_layernorm.weight": [ + 2560 + ], + "model.layers.18.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.18.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.18.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.19.input_layernorm.weight": [ + 2560 + ], + "model.layers.19.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.19.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.19.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.20.input_layernorm.weight": [ + 2560 + ], + "model.layers.20.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.20.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.20.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.21.input_layernorm.weight": [ + 2560 + ], + "model.layers.21.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.21.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.21.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.22.input_layernorm.weight": [ + 2560 + ], + "model.layers.22.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.22.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.22.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.23.input_layernorm.weight": [ + 2560 + ], + "model.layers.23.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.23.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.23.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.24.input_layernorm.weight": [ + 2560 + ], + "model.layers.24.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.24.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.24.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.25.input_layernorm.weight": [ + 2560 + ], + "model.layers.25.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.25.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.25.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.26.input_layernorm.weight": [ + 2560 + ], + "model.layers.26.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.26.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.26.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.27.input_layernorm.weight": [ + 2560 + ], + "model.layers.27.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.27.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.27.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.28.input_layernorm.weight": [ + 2560 + ], + "model.layers.28.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.28.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.28.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.29.input_layernorm.weight": [ + 2560 + ], + "model.layers.29.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.29.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.29.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.30.input_layernorm.weight": [ + 2560 + ], + "model.layers.30.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.30.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.30.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.31.input_layernorm.weight": [ + 2560 + ], + "model.layers.31.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.31.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.31.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.32.input_layernorm.weight": [ + 2560 + ], + "model.layers.32.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.32.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.32.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.33.input_layernorm.weight": [ + 2560 + ], + "model.layers.33.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.33.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.33.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.34.input_layernorm.weight": [ + 2560 + ], + "model.layers.34.post_attention_layernorm.weight": [ + 2560 + ], + "model.layers.34.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.34.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.35.self_attn.k_norm.weight": [ + 128 + ], + "model.layers.35.self_attn.q_norm.weight": [ + 128 + ], + "model.layers.35.input_layernorm.weight": [ + 2560 + ], + "model.layers.35.post_attention_layernorm.weight": [ + 2560 + ], + "model.norm.weight": [ + 2560 + ] + }, + "n_planes": 5, + "n_layers": 36, + "encoding": "log_unary", + "config": { + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 151643, + "eos_token_id": 151645, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "max_position_embeddings": 262144, + "max_window_layers": 36, + "model_type": "qwen3", + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 5000000, + "sliding_window": null, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.51.0", + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 + } +} \ No newline at end of file diff --git a/qwen3-4b-log5-unary/model_layers_0_mlp_down_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_0_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..6e2a7673dfb06e55580e770a64aa3d4a8dc78c11 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_0_mlp_down_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_0_mlp_gate_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_0_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..63d36b198607918f2c743a723dadb8317d66e1a1 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_0_mlp_gate_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_0_self_attn_k_norm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_0_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..2d1fc93585fd0cfe51bd59489d07bb0f8a46476f Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_0_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_0_self_attn_o_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_0_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..4cd76c3e982902418053eed408b948c28884ff05 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_0_self_attn_o_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_0_self_attn_q_norm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_0_self_attn_q_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..815785e2efaaa8396aa888f46c78ffea3446d44d Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_0_self_attn_q_norm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_0_self_attn_q_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_0_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..32841e753c9524c1f25cfd8ca325dbe991953120 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_0_self_attn_q_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_0_self_attn_v_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_0_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..37a119826086863f8bb40c355fc30052f7254fb2 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_0_self_attn_v_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_10_input_layernorm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_10_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..6a8f990803324ace40d5f252584aa5f8cf227886 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_10_input_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_10_mlp_down_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_10_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..18951812aa8d141fbee9d5d2a585f34134908401 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_10_mlp_down_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_10_mlp_up_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_10_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..00622175e31990980ac532f71803700d5ee6f3b1 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_10_mlp_up_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_10_post_attention_layernorm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_10_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..02758bc568552a8c6ec08aa9dc432d6711a79e35 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_10_post_attention_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_10_self_attn_k_norm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_10_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..3213b380a0bef9d633e8f69535016a9da6361927 --- /dev/null +++ b/qwen3-4b-log5-unary/model_layers_10_self_attn_k_norm_weight.fp16 @@ -0,0 +1 @@ +è9˜;¸=8ˆ8x=9X=à=8=¸=˜=(>P:8=¨>P>0x=à=p>0>8–(>h>H>è=p@@>Ø<à=ð=XBH=H>8>x9@>ˆ< >X>8>HB >€>x=à@à<¸;HC°H8=>¨?@À?¨B`?ˆ@H>ðAx@@C >H>p<2h<¨=Ø6È?˜=X¸=à:Ð=ˆ=˜=h=à€=À=H>>ð>X>x>è-@?°>h>À@P>h@Ø>È>˜>h)ð>Ð>P@˜<¸@è@Ð<.€@P?ð=>ð=à=H@8=@>è?Ø>hB \ No newline at end of file diff --git a/qwen3-4b-log5-unary/model_layers_10_self_attn_k_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_10_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..cc20ad91d786a58c45d46f0064e7b167fea80ddb Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_10_self_attn_k_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_10_self_attn_v_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_10_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..1e3f17c40c9f71c12b469a9e533aba593bb94a9b Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_10_self_attn_v_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_11_mlp_gate_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_11_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..9cf24b49c317307b62db4e87cc4afc0c173c949c Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_11_mlp_gate_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_11_mlp_up_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_11_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..1f261e906400e2757169d4773797b74b464232ab Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_11_mlp_up_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_11_self_attn_k_norm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_11_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..543283bf4e4b671bc550d9dc89312b71c4f26212 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_11_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_11_self_attn_k_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_11_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..73bc00684bb812debb7439079090c55288182f84 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_11_self_attn_k_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_11_self_attn_o_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_11_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..c97d0ad3f8c0415fe727e57eb75ed0e78f8f5b2b Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_11_self_attn_o_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_11_self_attn_q_norm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_11_self_attn_q_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..c4693d2251b5ca4d237704c45d22f5a620ec1e48 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_11_self_attn_q_norm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_11_self_attn_q_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_11_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..5248494634718747ec17909bc5a43ab9b8f145da Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_11_self_attn_q_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_12_input_layernorm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_12_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..08ff3d995132b6e1f1780acbec091d84fbc3457c Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_12_input_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_12_mlp_gate_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_12_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..6883f39e5ed2df37167732f3d8c9e33ed196ddbd Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_12_mlp_gate_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_12_self_attn_k_norm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_12_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..0883ab99614ba954f1fb5bc15b471437c5f1891e Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_12_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_12_self_attn_k_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_12_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..7066c770608c82ed064f0eb8d8e9aad5562e4581 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_12_self_attn_k_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_12_self_attn_o_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_12_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..1d9707b707d155e7708949aeb668947ab349e38d Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_12_self_attn_o_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_12_self_attn_q_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_12_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..608c32166f7068a76f671d73eefd5719d7f570d1 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_12_self_attn_q_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_12_self_attn_v_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_12_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..31dcee4109deb177da4743886179bd7e86d1ce2f Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_12_self_attn_v_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_13_input_layernorm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_13_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..3d9eab9bef0050afccb6912b293ac2ea05c0617f Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_13_input_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_13_mlp_gate_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_13_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..eb531a647c1a839bdd6fcb0d69967d7c3fc50325 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_13_mlp_gate_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_13_mlp_up_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_13_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..3fc2ae47c4cc25e0954b7b70f904246f73c9cd89 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_13_mlp_up_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_13_self_attn_k_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_13_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..f12e989cbad27cc461337cc73d9cedef55de4bcc Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_13_self_attn_k_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_13_self_attn_q_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_13_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..9e372011589ae1ffd1a824b647bf8473957062c8 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_13_self_attn_q_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_14_mlp_down_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_14_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..3914860f72752f7159744dbbb6781c51f4e501a0 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_14_mlp_down_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_14_mlp_gate_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_14_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..66c82fcd6818aa551eb9513ea656a71cbefb1716 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_14_mlp_gate_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_14_mlp_up_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_14_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..510ae72d17ab6167b24711c97867936be2b7232e Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_14_mlp_up_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_14_self_attn_v_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_14_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..6270653eab27a1814a14190b01466219246f1d55 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_14_self_attn_v_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_15_input_layernorm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_15_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..1ccef3e33c55d557c20b1aab4784a71dd418cd3a Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_15_input_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_15_mlp_down_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_15_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..a2c9d66afe810ecfab5b635eab7fba710202045b Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_15_mlp_down_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_15_mlp_gate_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_15_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..fd41230416b43edfed5ccb958232e72cfa5de9c1 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_15_mlp_gate_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_15_mlp_up_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_15_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..9b9666b01a68f8393ef85bb245d91a3d8192a726 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_15_mlp_up_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_15_self_attn_k_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_15_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..c14a84ae0d4231de09e7820fc1709df4a0cb4cd2 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_15_self_attn_k_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_15_self_attn_o_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_15_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..3ab9c7d7ff8cf3fe019f18fe69a03d11163e805f Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_15_self_attn_o_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_15_self_attn_q_norm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_15_self_attn_q_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..724ad34147a4055d8b0ca3ccae35af69e72fa596 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_15_self_attn_q_norm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_16_input_layernorm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_16_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..c70b74f08ffeefef3a5a3fa00b106cc036989562 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_16_input_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_16_mlp_down_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_16_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..b90d1d2d374c95d672d1b27b3a4336feaae426f4 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_16_mlp_down_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_16_mlp_gate_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_16_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..a436ba57e27dd9f80fabc809c4c57c1323ec2fb5 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_16_mlp_gate_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_16_mlp_up_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_16_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..b44e3191bb3c4363dc3b3bf8ba5d0d3222102438 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_16_mlp_up_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_16_post_attention_layernorm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_16_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..b469747ae9b8a06bd2b2144248fadcef1abf4712 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_16_post_attention_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_16_self_attn_k_norm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_16_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..926f80083b0ab34002cf1d4ff5f76983fe6946a3 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_16_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_16_self_attn_q_norm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_16_self_attn_q_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..f05561e2e62859d73001db0a12f877fc48712afa Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_16_self_attn_q_norm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_16_self_attn_q_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_16_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..6d20880dc308832f0742c22a478226b95c172a0e Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_16_self_attn_q_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_16_self_attn_v_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_16_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..be767918faf40ecdde9fdd8236d0872ee030fe15 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_16_self_attn_v_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_17_input_layernorm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_17_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..653d0b85e2bf4cae8442c2a4c26b39a000860738 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_17_input_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_17_mlp_up_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_17_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..2d2993aa5fea7ac0fd960ec1bb421e1396583ae9 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_17_mlp_up_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_17_post_attention_layernorm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_17_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..0dfaf07c2fcc38d53204ad26c8468a355bab10c8 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_17_post_attention_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_17_self_attn_k_norm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_17_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..ac1b61e002ca09295b263727c5972b496509b4fa Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_17_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_17_self_attn_k_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_17_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..1265bb024d2ffcc62e78b57ca2a2a06ca0a9743a Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_17_self_attn_k_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_17_self_attn_o_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_17_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..eb573804e85a61bc872dbbc2ef441ed6aec5d4c6 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_17_self_attn_o_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_17_self_attn_q_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_17_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..a01b0f619d75de99087735d435639f054729c917 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_17_self_attn_q_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_17_self_attn_v_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_17_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..500f603a96b4f15425cdacf186e54e73ede7564e Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_17_self_attn_v_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_18_input_layernorm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_18_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..1d3a157bbd1e7461d60465598c65768f9de35807 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_18_input_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_18_mlp_down_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_18_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..e3043f004999f80c68c5dcb51bde8963567ad29a Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_18_mlp_down_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_18_mlp_up_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_18_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..1b34e639855bd979c8cb628e87822401977cc954 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_18_mlp_up_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_18_self_attn_k_norm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_18_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..24f4e07cb36a8a3cd989a1e1fa4cf7fc18d5d58b Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_18_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_18_self_attn_k_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_18_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..1f812b1b19161107aa9f8acde7011c0113c48e46 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_18_self_attn_k_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_18_self_attn_o_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_18_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..1e8b47617559f902c6de4e0845ffebdf0681091e Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_18_self_attn_o_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_18_self_attn_q_norm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_18_self_attn_q_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..ee28b2b10a2c7272ca7bef05a72dfe6282306f9c Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_18_self_attn_q_norm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_18_self_attn_v_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_18_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..2fd94aa25cace105e135da72f6e888e8392264d7 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_18_self_attn_v_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_19_input_layernorm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_19_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..a67441406b0a13a3b88da5a65454f45ccfbafb53 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_19_input_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_19_self_attn_o_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_19_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..fa9cb85d4760f915139426c9585c697952b2351b Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_19_self_attn_o_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_19_self_attn_q_norm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_19_self_attn_q_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..2809723ae2ee0f2ef33e8274c6f88628a739e7aa Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_19_self_attn_q_norm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_1_mlp_down_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_1_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..61617c6d4bd406279a0327f4b56d45e743ac9c83 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_1_mlp_down_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_1_mlp_gate_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_1_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..dc6ccbc21ce2cbb828145f62865384d85882d4e6 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_1_mlp_gate_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_1_self_attn_k_norm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_1_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..9df669eb5abd260b22441ce967342a4cb6917fca Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_1_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_1_self_attn_k_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_1_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..d9f8ef8113d63eb5698d00fb6f3c5d14e6efd20e Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_1_self_attn_k_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_1_self_attn_o_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_1_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..f18924923848555594d31f64d397907101c32a86 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_1_self_attn_o_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_1_self_attn_v_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_1_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..a2ff5881c7f474338c875f510151dcb1e508bc46 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_1_self_attn_v_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_20_input_layernorm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_20_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..3a4274965b1448a5a2187a81c288c052ea215035 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_20_input_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_20_mlp_down_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_20_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..98d90ccd1acc80943d6c0c85ac253b7ac64b9bcb Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_20_mlp_down_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_20_post_attention_layernorm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_20_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..3a08f67cbf67e5030ebf024e373aa57a0514dc8b Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_20_post_attention_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_20_self_attn_k_norm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_20_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..87c8350bf5a5e0e45544f572a2aef59d93a83de3 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_20_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_20_self_attn_k_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_20_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..699fb04cfb4f34f225b557a3a16e63ec9aed5fa5 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_20_self_attn_k_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_20_self_attn_q_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_20_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..39d8e3d74112cc389cb97a0f69447823c6f217e9 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_20_self_attn_q_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_21_mlp_up_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_21_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..315ffd11267ecd0dbed31b09d918623635c54350 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_21_mlp_up_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_21_post_attention_layernorm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_21_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..c79a01362792eb7102a34e4bba0c43319734683e Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_21_post_attention_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_21_self_attn_q_norm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_21_self_attn_q_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..c7073f3a34e086f04bd24f34447b717ccd0eb0c5 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_21_self_attn_q_norm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_21_self_attn_q_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_21_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..e6e45390b495dabd4a127ad2c3d015e7ed2a0c52 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_21_self_attn_q_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_21_self_attn_v_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_21_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..0a469012d31d8566c63ae4837127e49b0e8a580c Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_21_self_attn_v_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_22_input_layernorm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_22_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..62961011519284ad7ef0ed01c6fd14d6b15b8d69 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_22_input_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_22_mlp_down_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_22_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..3b2888cbd5cf60fd932d8804f8ecaed4b8f0ec77 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_22_mlp_down_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_22_mlp_gate_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_22_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..24d42b7fdfa970bdb58594ffe586f642cb6fd091 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_22_mlp_gate_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_22_mlp_up_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_22_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..0fd1a0273e164790bff168519b537b1f06d709b4 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_22_mlp_up_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_22_self_attn_o_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_22_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..659cb8102a6c1425ed4455c560d60c701280f7ed Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_22_self_attn_o_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_22_self_attn_q_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_22_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..8cfb124726e640073267697c757b1e5ded791c95 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_22_self_attn_q_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_22_self_attn_v_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_22_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..d76ee6287059b7d3871ece33ad323bff5e434c2e Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_22_self_attn_v_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_23_input_layernorm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_23_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..987be2f4e16467cf356f26fad8ed264024163280 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_23_input_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_23_mlp_down_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_23_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..f07377c1ad05c2fea106799ea4ae45967a18a43b Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_23_mlp_down_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_23_mlp_gate_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_23_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..fe03c51548d2d73ac63770b6d0020d9498f605bf Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_23_mlp_gate_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_23_post_attention_layernorm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_23_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..edd1ee750c5b58691dffc8d61779d407e78ab34f Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_23_post_attention_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_23_self_attn_o_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_23_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..b1e1b15a5abce9a5430bb3fc859447fbed34b7e8 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_23_self_attn_o_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_23_self_attn_q_norm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_23_self_attn_q_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..9d075c188b9c0b923e8a0ee666b2d3a4ddd79dad Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_23_self_attn_q_norm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_24_mlp_down_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_24_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..a6b8f4a6bde1ba42a1f69dfd78804e85fce5849c Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_24_mlp_down_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_24_mlp_gate_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_24_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..7b4405061a383d2b11d31d02cdcecec71eaa4c9f Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_24_mlp_gate_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_24_self_attn_k_norm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_24_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..a79973a802bc625916935d47702ae0c767624b2c Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_24_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_24_self_attn_v_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_24_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..a86f5fd1801d3f5e5c63dda3f0a00f0a46ffb580 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_24_self_attn_v_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_25_input_layernorm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_25_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..c3b883c85234fbfda9408486c19136177f7a8179 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_25_input_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_25_mlp_down_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_25_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..f0f8d739d81a54ebc072967feeee78313730fea4 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_25_mlp_down_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_25_mlp_up_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_25_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..ba427e302835c55063c9ad5581c138f547c43a60 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_25_mlp_up_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_25_self_attn_k_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_25_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..f87b43bd0d12b6d9cd0bbacfb7466f15e6d6bec7 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_25_self_attn_k_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_25_self_attn_o_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_25_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..d0f6c539cf931366bf9661ae6b8a657403d1259f Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_25_self_attn_o_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_25_self_attn_v_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_25_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..829cb8088f4b04325d2ed393b792bf8fec16d702 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_25_self_attn_v_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_26_input_layernorm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_26_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..3c20cbd596bd68bb3fbab112294d80e5e55dcb5b Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_26_input_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_26_mlp_gate_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_26_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..5c4f1bab6888adefba9141873ad4f4832fb82a10 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_26_mlp_gate_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_26_post_attention_layernorm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_26_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..8b5351749cde7cbe8e766144e43738c05214a7b9 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_26_post_attention_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_26_self_attn_k_norm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_26_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..1213d1736ddab6ee762112c5bcf456651022d16e Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_26_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_26_self_attn_o_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_26_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..15adbfdb5c5af2071f79564d204c8745d1563dd5 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_26_self_attn_o_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_26_self_attn_q_norm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_26_self_attn_q_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..7abdcbe867f25606fd2522855c0c75352a562862 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_26_self_attn_q_norm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_26_self_attn_q_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_26_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..9e1877156e8c6626d27da01271096009c122fca9 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_26_self_attn_q_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_26_self_attn_v_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_26_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..51541d8f9ba3ca49b1632d4f3f4a78809f4790b9 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_26_self_attn_v_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_27_mlp_up_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_27_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..c8d57bd7bf98f6c93f29918e06c730f89b092af0 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_27_mlp_up_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_27_self_attn_q_norm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_27_self_attn_q_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..b1cfead52deae58a31e1fb5e02624699f50e7df2 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_27_self_attn_q_norm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_27_self_attn_q_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_27_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..3c2d9dfbaa890d1be18dfed0e86b4bbdd705ba4c Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_27_self_attn_q_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_27_self_attn_v_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_27_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..956cfcbeca5f9807366dd6f359ca11e44ed293e6 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_27_self_attn_v_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_28_input_layernorm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_28_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..9183bfcea4ec9f1ee29797c89616862e1fdf4c05 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_28_input_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_28_self_attn_k_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_28_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..3ad1b015fdcdf96b124d6bda23cd15a1e5f74b34 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_28_self_attn_k_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_28_self_attn_o_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_28_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..d5edd64b253db86bda96e85f0a59ac96f6c6225e Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_28_self_attn_o_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_28_self_attn_v_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_28_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..d1d996efc97a257b5c62c9bf46f8d42733eedd80 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_28_self_attn_v_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_29_input_layernorm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_29_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..e103712921dc7639ba128fa7954d50310365407a Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_29_input_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_29_mlp_down_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_29_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..d53ac3ff257cb2bc1fd255b80aea305014f7a6df Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_29_mlp_down_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_29_mlp_gate_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_29_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..64b565e63c708402a96d38f428004cbcfcbc3f16 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_29_mlp_gate_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_29_self_attn_k_norm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_29_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..c230b4dd3dc3a2385a8ca144dcbbbf3f6656b332 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_29_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_29_self_attn_k_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_29_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..21d07a2e75e7eebf47d88d2aa15375ce34c09068 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_29_self_attn_k_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_29_self_attn_o_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_29_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..73570b1cf3f864766119ac03ece325ae08df2b39 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_29_self_attn_o_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_29_self_attn_q_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_29_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..3de7ae8e8ba2676370928efd36a87a66e66664c0 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_29_self_attn_q_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_2_mlp_down_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_2_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..5342f80a71fc35eb596abc135546d5f8d4cbc8b1 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_2_mlp_down_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_2_mlp_gate_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_2_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..cada603faaafd246d4ce24c6dd5300c6f928410d Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_2_mlp_gate_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_2_mlp_up_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_2_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..0ced821c17527da72ba3087e1d79dfe1042d721c Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_2_mlp_up_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_2_post_attention_layernorm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_2_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..caa6ce385b5f79576ba426ddab05bc1c46aa5519 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_2_post_attention_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_2_self_attn_k_norm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_2_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..3c09b1c5dafea8caa581b98fa2cb6aa2a5d79386 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_2_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_2_self_attn_o_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_2_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..1c6d3032040098f1b44884ebf1cb00b3646a2138 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_2_self_attn_o_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_30_input_layernorm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_30_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..152fcc26ed498d9716f0bc8256fe6501f7caa980 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_30_input_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_30_mlp_down_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_30_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..2693a255a8a96edd20fc2b7e8fe5829c925fe47e Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_30_mlp_down_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_30_post_attention_layernorm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_30_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..9af0d0170b9b92c59f94a8a5fc4a803d59c6dc85 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_30_post_attention_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_30_self_attn_k_norm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_30_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..c2940d94f0e770bd352e87ef61e715e95cfc1ded Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_30_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_30_self_attn_k_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_30_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..795d50adb3cdbb22b241bc465431da4d7306b545 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_30_self_attn_k_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_30_self_attn_q_norm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_30_self_attn_q_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..8806a86c0dbe66f3e1923ab3a0ea79c0fc97ef19 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_30_self_attn_q_norm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_31_mlp_down_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_31_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..0cf29560743739361cad3db96b76d543256aee31 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_31_mlp_down_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_31_mlp_gate_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_31_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..500a8684e6a02912e7c30895817e94afd0bf4831 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_31_mlp_gate_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_32_input_layernorm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_32_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..6f84265fb39beb01bb0ef18d4e6512532f4169ea Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_32_input_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_32_mlp_gate_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_32_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..291e17d6697e9ee5f81582ae542d80be63777200 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_32_mlp_gate_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_32_self_attn_k_norm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_32_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..31bf9b9fd0b59cfc43751daaff3537eb992030c6 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_32_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_32_self_attn_k_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_32_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..cfc79e047f424e2cdaf7aa9d9e2b123a76e65363 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_32_self_attn_k_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_32_self_attn_q_norm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_32_self_attn_q_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..40d444def0a89ca19a8430068f9df2283a6df0bb Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_32_self_attn_q_norm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_32_self_attn_q_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_32_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..65e5247212667137c46ccb16c402e37dfe5005dd Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_32_self_attn_q_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_32_self_attn_v_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_32_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..9f9a0a93a4be30ee4e6781f32298f106ffc5e231 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_32_self_attn_v_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_33_mlp_down_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_33_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..a0fb3ab433df5b95b28c54619a56cf27f2467f84 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_33_mlp_down_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_33_mlp_gate_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_33_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..b37c85f46b7e25656615afc6ba14a27c671c134a Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_33_mlp_gate_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_33_self_attn_k_norm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_33_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..44d5c48fb1bce244f2c552d62eb059cd5ab60c0e Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_33_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_33_self_attn_o_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_33_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..3413029692f861396aabdd24dff2a81bb0ccb632 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_33_self_attn_o_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_33_self_attn_v_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_33_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..9ed00f78f83a984005e8b57dd60f4569bc384d0f Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_33_self_attn_v_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_34_self_attn_o_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_34_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..c1cedcfa9e6fa5be0323fa902f481a25594136e3 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_34_self_attn_o_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_34_self_attn_q_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_34_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..3dabe80c8b7ab0931d18fc818c56c90469b4b33b Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_34_self_attn_q_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_34_self_attn_v_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_34_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..75573eba0e6a28ed0d33466a258b2467c31c66e9 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_34_self_attn_v_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_35_input_layernorm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_35_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..ff0012f22d441c988180c4597747a740641a163a Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_35_input_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_35_mlp_down_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_35_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..f812b944af4122fce54ed8b57b589b65117b6af2 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_35_mlp_down_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_35_self_attn_k_norm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_35_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..d12fd5fd41d9bcee8eb9338d7fccf3adec55a457 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_35_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_35_self_attn_k_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_35_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..0e68129f2a3cc6f76a16f43f9ede5fd6732c552b Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_35_self_attn_k_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_35_self_attn_o_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_35_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..960463eb3a9f05f6617302642adf59586bd7cabb Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_35_self_attn_o_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_35_self_attn_q_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_35_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..0ce21c0ae632728d1de61f82cbb69e83f67d361a Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_35_self_attn_q_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_3_input_layernorm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_3_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..324818264e751c5ef0d6e5390922aaab936a3da5 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_3_input_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_3_mlp_up_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_3_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..f7d5a018774e1cd54c4b701b3f0d26c7decb1053 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_3_mlp_up_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_3_self_attn_k_norm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_3_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..a3f7e671929e7c73c9c272d716babd3123cf2439 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_3_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_3_self_attn_q_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_3_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..2fe5ebb3757c48317995e751288e4d328f58feea Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_3_self_attn_q_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_3_self_attn_v_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_3_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..ef3de572ef35cdd4a0d5ddd88b97546f7f122f99 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_3_self_attn_v_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_4_mlp_down_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_4_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..4190c4901441603a43ee29035cdee8b31a061537 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_4_mlp_down_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_4_post_attention_layernorm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_4_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..3ddb49073503a9a10cadaeee84d58bb698f337a0 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_4_post_attention_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_4_self_attn_k_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_4_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..90620cb01ed256b82c2843f245eb5caa553c15ad Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_4_self_attn_k_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_4_self_attn_q_norm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_4_self_attn_q_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..6dc554e676a6c463ac15e3e6f728f548ca028463 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_4_self_attn_q_norm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_4_self_attn_q_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_4_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..dca5981f58969de7c6d9415f5637dd0fd97ae0fe Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_4_self_attn_q_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_5_input_layernorm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_5_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..6aba1869e49e06ab5c6a49f3d47fd2db4e9f1900 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_5_input_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_5_mlp_up_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_5_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..0beed40daed0426e772d5fd54f1462206dfe4e74 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_5_mlp_up_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_5_self_attn_k_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_5_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..9678dd5335600702820ad7afdd625e2c277745d6 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_5_self_attn_k_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_5_self_attn_o_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_5_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..0647b108c513f3010839328f39f6b520464f7548 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_5_self_attn_o_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_5_self_attn_q_norm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_5_self_attn_q_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..5df43c454ab4fe0fef5a3038d5ca059c2a1d9482 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_5_self_attn_q_norm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_5_self_attn_q_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_5_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..c443b92385f06ebefae9fa922bde04f688149258 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_5_self_attn_q_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_6_input_layernorm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_6_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..db1a79d6fa9a1d7b2b8328cdc10d034daa59eb48 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_6_input_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_6_mlp_gate_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_6_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..032c856c0521cb7424e9441f0355d28d9f23f393 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_6_mlp_gate_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_6_post_attention_layernorm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_6_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..78e9320f1563cdfba9707bf290791cde80efe099 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_6_post_attention_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_6_self_attn_k_norm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_6_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..24727c97db0293939843aff5dec7cc98e0d500c1 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_6_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_6_self_attn_k_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_6_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..ef9b9805078cbc7e01556b5ad327121eebf84582 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_6_self_attn_k_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_6_self_attn_q_norm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_6_self_attn_q_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..32ce55ab2fa665601bb3480d6394169cb0721a85 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_6_self_attn_q_norm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_6_self_attn_v_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_6_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..1e07ce141d5fac2808c10908ef078e02e69cf8c5 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_6_self_attn_v_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_7_self_attn_k_norm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_7_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..4058b3fec31f47fecd9af500f4d4739294be11d9 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_7_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_7_self_attn_k_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_7_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..16e8a797f4922ef1ef306ccb22c94e044514f017 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_7_self_attn_k_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_7_self_attn_q_norm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_7_self_attn_q_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..3b96c8c921f14b44298e503d46ee16392ab3e772 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_7_self_attn_q_norm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_8_input_layernorm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_8_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..7dfd72b16dd478f920c223f4e937ec132848df58 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_8_input_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_8_mlp_down_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_8_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..0c2fe469910170957203a64583e487bbcf2d0ff0 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_8_mlp_down_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_8_mlp_gate_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_8_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..35112c40c3132d777cef21549ae031ba36055386 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_8_mlp_gate_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_8_self_attn_k_norm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_8_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..128450801eff49c63339396743dfdb6262885a03 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_8_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_8_self_attn_q_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_8_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..f3aaa8c7ab329d4a8ce293905dd1259d17dd0851 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_8_self_attn_q_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_8_self_attn_v_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_8_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..39c958da377b47a1b3c4314ee77c68a012d7d97b Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_8_self_attn_v_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_9_input_layernorm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_9_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..937efdde2436741ea88c78e4e0b9ba38b1dd6fe0 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_9_input_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_9_mlp_down_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_9_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..656ee7610157e7bd687da9cbe8f5c9348bda4bf7 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_9_mlp_down_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_9_mlp_up_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_9_mlp_up_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..9ceea4faef3af5231a364e29fc2e21579a0fb85e Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_9_mlp_up_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_9_post_attention_layernorm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_9_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..be863fa42560e40943b61288efae5f6caf3d9ac0 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_9_post_attention_layernorm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_9_self_attn_k_norm_weight.fp16 b/qwen3-4b-log5-unary/model_layers_9_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..ed6eb308122b8f6fb0a442d1865b87ded5ad94bf Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_9_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/model_layers_9_self_attn_k_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_9_self_attn_k_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..86955a4f838887512544a9296da5e02e61f722d5 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_9_self_attn_k_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_layers_9_self_attn_q_proj_weight.scales b/qwen3-4b-log5-unary/model_layers_9_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..dcbeed3e786e35100592346d23ba056e364e09e7 Binary files /dev/null and b/qwen3-4b-log5-unary/model_layers_9_self_attn_q_proj_weight.scales differ diff --git a/qwen3-4b-log5-unary/model_norm_weight.fp16 b/qwen3-4b-log5-unary/model_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..88950773dc3cd2f63e02b5cb9c817ee99ce237dc Binary files /dev/null and b/qwen3-4b-log5-unary/model_norm_weight.fp16 differ diff --git a/qwen3-4b-log5-unary/tokenizer_config.json b/qwen3-4b-log5-unary/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8dec7d58dda385ce95b469aa2d277ec162168e58 --- /dev/null +++ b/qwen3-4b-log5-unary/tokenizer_config.json @@ -0,0 +1,239 @@ +{ + "add_prefix_space": false, + "added_tokens_decoder": { + "151643": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151644": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151645": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151646": { + "content": "<|object_ref_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151647": { + "content": "<|object_ref_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151648": { + "content": "<|box_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151649": { + "content": "<|box_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151650": { + "content": "<|quad_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151651": { + "content": "<|quad_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151652": { + "content": "<|vision_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151653": { + "content": "<|vision_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151654": { + "content": "<|vision_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151655": { + "content": "<|image_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151656": { + "content": "<|video_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151657": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151658": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151659": { + "content": "<|fim_prefix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151660": { + "content": "<|fim_middle|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151661": { + "content": "<|fim_suffix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151662": { + "content": "<|fim_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151663": { + "content": "<|repo_name|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151664": { + "content": "<|file_sep|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151665": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151666": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151667": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151668": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "bos_token": null, + "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0].role == 'system' %}\n {{- messages[0].content + '\\n\\n' }}\n {%- endif %}\n {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within XML tags:\\n\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n\\n\\nFor each function call, return a json object with function name and arguments within XML tags:\\n\\n{\\\"name\\\": , \\\"arguments\\\": }\\n<|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0].role == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}\n{%- for message in messages[::-1] %}\n {%- set index = (messages|length - 1) - loop.index0 %}\n {%- if ns.multi_step_tool and message.role == \"user\" and message.content is string and not(message.content.startswith('') and message.content.endswith('')) %}\n {%- set ns.multi_step_tool = false %}\n {%- set ns.last_query_index = index %}\n {%- endif %}\n{%- endfor %}\n{%- for message in messages %}\n {%- if message.content is string %}\n {%- set content = message.content %}\n {%- else %}\n {%- set content = '' %}\n {%- endif %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {%- set reasoning_content = '' %}\n {%- if message.reasoning_content is string %}\n {%- set reasoning_content = message.reasoning_content %}\n {%- else %}\n {%- if '' in content %}\n {%- set reasoning_content = content.split('')[0].rstrip('\\n').split('')[-1].lstrip('\\n') %}\n {%- set content = content.split('')[-1].lstrip('\\n') %}\n {%- endif %}\n {%- endif %}\n {%- if loop.index0 > ns.last_query_index %}\n {%- if loop.last or (not loop.last and reasoning_content) %}\n {{- '<|im_start|>' + message.role + '\\n\\n' + reasoning_content.strip('\\n') + '\\n\\n\\n' + content.lstrip('\\n') }}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- if message.tool_calls %}\n {%- for tool_call in message.tool_calls %}\n {%- if (loop.first and content) or (not loop.first) %}\n {{- '\\n' }}\n {%- endif %}\n {%- if tool_call.function %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {%- if tool_call.arguments is string %}\n {{- tool_call.arguments }}\n {%- else %}\n {{- tool_call.arguments | tojson }}\n {%- endif %}\n {{- '}\\n' }}\n {%- endfor %}\n {%- endif %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n\\n' }}\n {{- content }}\n {{- '\\n' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n\\n' }}\n{%- endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 262144, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null, + "add_bos_token": false +} \ No newline at end of file