diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..5301e2e54ef45cdcf9e16394cece4217117c7217 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,255 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_9_self_attn_q_proj_weight.pos filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_22_mlp_gate_proj_weight.neg filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_0_mlp_gate_proj_weight.pos filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_4_mlp_gate_proj_weight.neg filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_12_mlp_up_proj_weight.pos filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_20_self_attn_o_proj_weight.neg filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_15_self_attn_o_proj_weight.pos filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_1_mlp_gate_proj_weight.pos filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_9_mlp_down_proj_weight.pos filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_6_mlp_gate_proj_weight.neg filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_4_mlp_up_proj_weight.neg filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_22_mlp_down_proj_weight.pos filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_6_mlp_gate_proj_weight.pos filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_6_self_attn_q_proj_weight.neg filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_3_self_attn_q_proj_weight.neg filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_9_self_attn_o_proj_weight.pos filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_16_self_attn_q_proj_weight.pos filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_15_mlp_gate_proj_weight.neg filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_23_mlp_up_proj_weight.neg filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-ternary/model_layers_1_mlp_down_proj_weight.neg filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_20_self_attn_o_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_15_self_attn_q_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_24_self_attn_q_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_2_self_attn_o_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_22_self_attn_q_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_12_self_attn_k_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_12_mlp_gate_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_21_self_attn_q_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_18_mlp_gate_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_10_self_attn_o_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_16_mlp_gate_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_12_self_attn_v_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_1_mlp_down_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_16_mlp_down_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_18_mlp_up_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_20_mlp_up_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_12_self_attn_o_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_7_mlp_gate_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_11_self_attn_o_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_12_mlp_down_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_8_self_attn_q_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_19_self_attn_k_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_23_mlp_up_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_2_self_attn_o_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_7_self_attn_q_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_0_self_attn_q_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_11_self_attn_q_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_6_mlp_gate_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_10_mlp_up_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_0_mlp_up_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_16_self_attn_o_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_23_mlp_down_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_20_mlp_down_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_14_self_attn_o_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_14_self_attn_k_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_12_self_attn_q_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_25_mlp_up_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_9_self_attn_o_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_20_mlp_down_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_26_self_attn_o_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_5_mlp_up_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_3_mlp_down_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_4_mlp_up_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_26_mlp_gate_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_16_mlp_up_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_9_self_attn_q_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_25_self_attn_o_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_10_mlp_gate_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_24_mlp_up_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_19_self_attn_o_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_18_mlp_down_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_21_self_attn_o_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_15_mlp_gate_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_0_mlp_down_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_21_self_attn_q_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_24_self_attn_v_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_1_mlp_gate_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_6_self_attn_o_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_17_mlp_up_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_23_self_attn_q_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_3_self_attn_o_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_22_self_attn_v_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_22_mlp_gate_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_3_mlp_gate_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_24_self_attn_o_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_3_self_attn_q_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_14_mlp_down_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_23_mlp_down_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_26_mlp_gate_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_2_mlp_up_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_26_self_attn_o_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_12_self_attn_o_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_10_mlp_gate_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_16_mlp_up_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_11_mlp_down_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_5_self_attn_k_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_21_self_attn_v_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_19_self_attn_q_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_8_mlp_down_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_16_self_attn_q_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_17_self_attn_v_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_9_self_attn_k_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_11_mlp_up_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_18_self_attn_q_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_11_self_attn_k_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_1_mlp_up_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_24_mlp_down_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_20_self_attn_o_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_4_mlp_gate_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_20_mlp_up_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_16_mlp_gate_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_5_self_attn_o_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_7_self_attn_o_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_1_self_attn_q_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_3_self_attn_v_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_24_self_attn_o_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_14_mlp_down_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_26_mlp_up_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_13_mlp_up_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_6_self_attn_q_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_13_self_attn_v_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_26_self_attn_q_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_3_mlp_gate_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_18_self_attn_o_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_13_mlp_down_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_6_mlp_gate_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_10_self_attn_k_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_2_self_attn_k_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_14_self_attn_q_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_19_self_attn_o_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_19_mlp_down_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_17_mlp_up_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_11_self_attn_q_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_23_self_attn_o_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_19_self_attn_q_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_0_self_attn_o_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_5_mlp_gate_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_8_mlp_gate_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_9_mlp_gate_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_16_self_attn_k_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_10_self_attn_v_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_16_self_attn_v_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_25_mlp_gate_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_24_mlp_gate_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_20_self_attn_q_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_9_mlp_up_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_5_mlp_down_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_10_mlp_down_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_17_self_attn_k_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_15_self_attn_k_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_1_self_attn_k_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_9_mlp_up_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_25_self_attn_k_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_9_mlp_gate_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_17_mlp_down_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_27_self_attn_v_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_3_mlp_down_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_0_mlp_down_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_6_self_attn_q_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_0_self_attn_v_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_2_self_attn_v_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_8_self_attn_k_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_0_self_attn_k_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_9_mlp_down_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_20_mlp_gate_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_4_self_attn_q_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_13_mlp_gate_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_17_self_attn_o_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_21_mlp_up_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_13_self_attn_q_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_27_self_attn_q_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_23_mlp_up_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_18_self_attn_o_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_7_self_attn_q_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_11_mlp_down_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_10_self_attn_q_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_7_self_attn_k_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_23_mlp_gate_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_15_mlp_down_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_9_mlp_down_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_22_mlp_up_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_8_mlp_up_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_16_self_attn_q_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_21_mlp_gate_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_4_self_attn_q_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_22_self_attn_o_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_14_mlp_gate_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_26_self_attn_q_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_23_self_attn_k_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_25_self_attn_o_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_20_self_attn_q_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_18_mlp_down_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_20_self_attn_k_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_19_self_attn_v_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_18_mlp_gate_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_18_self_attn_v_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_8_self_attn_o_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_19_mlp_gate_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_13_self_attn_o_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_5_mlp_down_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_12_mlp_down_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_12_mlp_up_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_25_self_attn_q_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_11_mlp_up_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_15_self_attn_o_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_17_mlp_gate_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_25_mlp_gate_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_22_mlp_up_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_15_self_attn_q_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_23_self_attn_q_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_22_self_attn_k_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_13_self_attn_k_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_2_self_attn_q_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_20_mlp_gate_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_1_mlp_down_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_12_mlp_up_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_17_self_attn_q_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_13_self_attn_o_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_25_mlp_down_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_7_mlp_down_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_23_self_attn_o_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_5_mlp_gate_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_7_mlp_up_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_27_mlp_up_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_14_self_attn_v_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_6_mlp_up_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_15_self_attn_v_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_7_self_attn_o_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_1_self_attn_q_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_24_mlp_up_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_13_mlp_up_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_10_self_attn_q_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_6_self_attn_v_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_15_mlp_up_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_4_mlp_gate_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_6_mlp_up_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_25_mlp_down_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_22_mlp_down_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_24_self_attn_k_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_17_self_attn_o_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_26_mlp_down_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_4_self_attn_o_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_6_mlp_down_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_6_mlp_down_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_17_self_attn_q_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_1_self_attn_v_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_11_self_attn_v_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_21_mlp_gate_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_21_mlp_down_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_6_self_attn_o_proj_weight.signs filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_1_self_attn_o_proj_weight.mags filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-packed/model_layers_14_mlp_up_proj_weight.signs filter=lfs diff=lfs merge=lfs -text diff --git a/deepseek-r1-1.5b-gunary/model_layers_0_input_layernorm_weight.fp16 b/deepseek-r1-1.5b-gunary/model_layers_0_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..233ce643b3f9c528046d3dc5afdddb05313de8c2 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_0_input_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_0_post_attention_layernorm_weight.fp16 b/deepseek-r1-1.5b-gunary/model_layers_0_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..4480c766ecab325c11ba7ca372dbd090f47e5b66 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_0_post_attention_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_0_self_attn_q_proj_bias.fp16 b/deepseek-r1-1.5b-gunary/model_layers_0_self_attn_q_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..2c352bc4acbcfe7d8e68b1208ac895328786874d Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_0_self_attn_q_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_0_self_attn_v_proj_weight.gscales b/deepseek-r1-1.5b-gunary/model_layers_0_self_attn_v_proj_weight.gscales new file mode 100644 index 0000000000000000000000000000000000000000..747d43c704cb26f615b746c886dd36115a4ab01a Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_0_self_attn_v_proj_weight.gscales differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_0_self_attn_v_proj_weight.sign b/deepseek-r1-1.5b-gunary/model_layers_0_self_attn_v_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..006e47383cfb0e4b91fb990c71ca79e92b5d1766 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_0_self_attn_v_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_10_self_attn_k_proj_bias.fp16 b/deepseek-r1-1.5b-gunary/model_layers_10_self_attn_k_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..f12798d185f3a26d7b58dca998da4ad94d1d0a4c Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_10_self_attn_k_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_10_self_attn_v_proj_bias.fp16 b/deepseek-r1-1.5b-gunary/model_layers_10_self_attn_v_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..1f3165213bff8155723b1ad8450afb9bab120399 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_10_self_attn_v_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_10_self_attn_v_proj_weight.gscales b/deepseek-r1-1.5b-gunary/model_layers_10_self_attn_v_proj_weight.gscales new file mode 100644 index 0000000000000000000000000000000000000000..8f3316ba755940f16a97099c4f82bd041ec6afa2 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_10_self_attn_v_proj_weight.gscales differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_11_input_layernorm_weight.fp16 b/deepseek-r1-1.5b-gunary/model_layers_11_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..eacc9d59d93c4c2cb731f52dbd3be00600d8ba57 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_11_input_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_11_post_attention_layernorm_weight.fp16 b/deepseek-r1-1.5b-gunary/model_layers_11_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..07dbda66c47ff7da482e86524713d24c5b2007eb Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_11_post_attention_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_11_self_attn_k_proj_weight.gscales b/deepseek-r1-1.5b-gunary/model_layers_11_self_attn_k_proj_weight.gscales new file mode 100644 index 0000000000000000000000000000000000000000..48cfa83f5840c506b5f7cc47b5d84e6ca78fe969 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_11_self_attn_k_proj_weight.gscales differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_11_self_attn_v_proj_weight.gscales b/deepseek-r1-1.5b-gunary/model_layers_11_self_attn_v_proj_weight.gscales new file mode 100644 index 0000000000000000000000000000000000000000..80bd6049f22167066524580d3d1ca4f7eb685a59 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_11_self_attn_v_proj_weight.gscales differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_12_post_attention_layernorm_weight.fp16 b/deepseek-r1-1.5b-gunary/model_layers_12_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..0fb4e1e8dc6a981526052aec4e271aa819bd9dd0 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_12_post_attention_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_12_self_attn_q_proj_bias.fp16 b/deepseek-r1-1.5b-gunary/model_layers_12_self_attn_q_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..414a66d8c673cc979bd5a898a973593fa31cf455 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_12_self_attn_q_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_12_self_attn_v_proj_bias.fp16 b/deepseek-r1-1.5b-gunary/model_layers_12_self_attn_v_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..48ddbf6e5ee891fbc08367f4d314ec3ba9aa207c Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_12_self_attn_v_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_12_self_attn_v_proj_weight.sign b/deepseek-r1-1.5b-gunary/model_layers_12_self_attn_v_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..b4076ccf87841fe1ee1143cf43974272853d3b56 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_12_self_attn_v_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_13_post_attention_layernorm_weight.fp16 b/deepseek-r1-1.5b-gunary/model_layers_13_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..408a4fe4330502360320954c9e4c94350380eafe Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_13_post_attention_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_13_self_attn_k_proj_weight.gscales b/deepseek-r1-1.5b-gunary/model_layers_13_self_attn_k_proj_weight.gscales new file mode 100644 index 0000000000000000000000000000000000000000..b4f4d00575c83564f631e4a8be61d88175822fb7 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_13_self_attn_k_proj_weight.gscales differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_13_self_attn_k_proj_weight.sign b/deepseek-r1-1.5b-gunary/model_layers_13_self_attn_k_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..358791dad919c9996e22a57d0a0bd31d4a12663b Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_13_self_attn_k_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_13_self_attn_q_proj_bias.fp16 b/deepseek-r1-1.5b-gunary/model_layers_13_self_attn_q_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..3037fc4357b76a73522638553ed5df0a87ece153 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_13_self_attn_q_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_13_self_attn_v_proj_bias.fp16 b/deepseek-r1-1.5b-gunary/model_layers_13_self_attn_v_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..a3117acfad675e99118bcbf2cc87a0db9725a61b Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_13_self_attn_v_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_14_input_layernorm_weight.fp16 b/deepseek-r1-1.5b-gunary/model_layers_14_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..e6b9907c05e8517ba0c33c7c2bd8a1ec773f2920 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_14_input_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_14_post_attention_layernorm_weight.fp16 b/deepseek-r1-1.5b-gunary/model_layers_14_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..90cf66bf7f23dbce533a9156464500f3ce2edc41 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_14_post_attention_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_14_self_attn_k_proj_weight.sign b/deepseek-r1-1.5b-gunary/model_layers_14_self_attn_k_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..8b1984ac7f912f049cb3614d3f1991e7560e41c0 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_14_self_attn_k_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_14_self_attn_v_proj_weight.gscales b/deepseek-r1-1.5b-gunary/model_layers_14_self_attn_v_proj_weight.gscales new file mode 100644 index 0000000000000000000000000000000000000000..70d895f268ad622636cba3fbe71bc0c9bddc71e6 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_14_self_attn_v_proj_weight.gscales differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_14_self_attn_v_proj_weight.sign b/deepseek-r1-1.5b-gunary/model_layers_14_self_attn_v_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..ddd17bd040c43fb89bc3225fddacecf6a4194e9c Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_14_self_attn_v_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_15_post_attention_layernorm_weight.fp16 b/deepseek-r1-1.5b-gunary/model_layers_15_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..6a3082c006e0687aaff28354dbf020b4d36b5cde Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_15_post_attention_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_15_self_attn_q_proj_bias.fp16 b/deepseek-r1-1.5b-gunary/model_layers_15_self_attn_q_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..5f3f811ab08b222b08bbf35215d38486e6eda690 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_15_self_attn_q_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_15_self_attn_v_proj_bias.fp16 b/deepseek-r1-1.5b-gunary/model_layers_15_self_attn_v_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..bedd656cb61e1b16ec719cd5cefd43ad5eea50d2 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_15_self_attn_v_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_15_self_attn_v_proj_weight.gscales b/deepseek-r1-1.5b-gunary/model_layers_15_self_attn_v_proj_weight.gscales new file mode 100644 index 0000000000000000000000000000000000000000..3f7dd7a3c5463add66ef92c6ed66d44dc58f3878 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_15_self_attn_v_proj_weight.gscales differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_15_self_attn_v_proj_weight.sign b/deepseek-r1-1.5b-gunary/model_layers_15_self_attn_v_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..35ede3442b96c602595389e8aa53db0f1d9b7b7e Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_15_self_attn_v_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_17_self_attn_k_proj_bias.fp16 b/deepseek-r1-1.5b-gunary/model_layers_17_self_attn_k_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..d44cca0dd8ea6a11bbd4a9bbdc6575b50c34f948 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_17_self_attn_k_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_17_self_attn_k_proj_weight.gscales b/deepseek-r1-1.5b-gunary/model_layers_17_self_attn_k_proj_weight.gscales new file mode 100644 index 0000000000000000000000000000000000000000..9474288a3f20bb7db05a5371f02683b910863a29 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_17_self_attn_k_proj_weight.gscales differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_17_self_attn_k_proj_weight.sign b/deepseek-r1-1.5b-gunary/model_layers_17_self_attn_k_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..5bfc95ecfd4a6ff76e5c7e64ecb810384de3bc8e Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_17_self_attn_k_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_17_self_attn_v_proj_bias.fp16 b/deepseek-r1-1.5b-gunary/model_layers_17_self_attn_v_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..e1330e28f676fcdf827d481bed580bb4e1ae166d Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_17_self_attn_v_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_18_post_attention_layernorm_weight.fp16 b/deepseek-r1-1.5b-gunary/model_layers_18_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..ed1a9c348a0afdb0628a14416dd97e4f101469b7 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_18_post_attention_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_18_self_attn_k_proj_bias.fp16 b/deepseek-r1-1.5b-gunary/model_layers_18_self_attn_k_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..ceb6c8052cd154fb367ace8eac711dffacdc01e1 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_18_self_attn_k_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_18_self_attn_k_proj_weight.gscales b/deepseek-r1-1.5b-gunary/model_layers_18_self_attn_k_proj_weight.gscales new file mode 100644 index 0000000000000000000000000000000000000000..7499eb8782085fda10c70508df9f2cb8f71490b3 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_18_self_attn_k_proj_weight.gscales differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_18_self_attn_v_proj_weight.gscales b/deepseek-r1-1.5b-gunary/model_layers_18_self_attn_v_proj_weight.gscales new file mode 100644 index 0000000000000000000000000000000000000000..569c51b8f42ded1d666c92a1ea6a2c236fd15448 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_18_self_attn_v_proj_weight.gscales differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_19_post_attention_layernorm_weight.fp16 b/deepseek-r1-1.5b-gunary/model_layers_19_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..04ca4d44cbedf77c6ad428dfb3974df793140ec8 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_19_post_attention_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_19_self_attn_k_proj_bias.fp16 b/deepseek-r1-1.5b-gunary/model_layers_19_self_attn_k_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..e9bfa94937c80e227e43b2591120196b57471c90 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_19_self_attn_k_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_19_self_attn_k_proj_weight.gscales b/deepseek-r1-1.5b-gunary/model_layers_19_self_attn_k_proj_weight.gscales new file mode 100644 index 0000000000000000000000000000000000000000..97d813cd920ed5e711f51fca30464e028116509c Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_19_self_attn_k_proj_weight.gscales differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_19_self_attn_q_proj_bias.fp16 b/deepseek-r1-1.5b-gunary/model_layers_19_self_attn_q_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..0385ee4a6ec2b0e55b92dbd7e5ac8995d8a1d4b4 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_19_self_attn_q_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_19_self_attn_v_proj_bias.fp16 b/deepseek-r1-1.5b-gunary/model_layers_19_self_attn_v_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..3a20a9baba7e7b5f11f0c545db427ef58d85c276 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_19_self_attn_v_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_19_self_attn_v_proj_weight.gscales b/deepseek-r1-1.5b-gunary/model_layers_19_self_attn_v_proj_weight.gscales new file mode 100644 index 0000000000000000000000000000000000000000..7eb006cd1b562906665916407dc5e4705a5b342e Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_19_self_attn_v_proj_weight.gscales differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_19_self_attn_v_proj_weight.sign b/deepseek-r1-1.5b-gunary/model_layers_19_self_attn_v_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..8679c894b553ce64de5b70a825a0a873fa8e9ca2 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_19_self_attn_v_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_1_input_layernorm_weight.fp16 b/deepseek-r1-1.5b-gunary/model_layers_1_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..58cfcc62ae22017c2355f5abacc4896ccebb9722 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_1_input_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_1_post_attention_layernorm_weight.fp16 b/deepseek-r1-1.5b-gunary/model_layers_1_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..2d66603de162864edd6c64e2fa5a308470e3bd81 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_1_post_attention_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_1_self_attn_k_proj_bias.fp16 b/deepseek-r1-1.5b-gunary/model_layers_1_self_attn_k_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..c75ebdad2866070c227bef111584780a52937315 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_1_self_attn_k_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_1_self_attn_v_proj_bias.fp16 b/deepseek-r1-1.5b-gunary/model_layers_1_self_attn_v_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..b3c2a5838c67fa4a37e84df45a2ca5ca5edf8c05 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_1_self_attn_v_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_1_self_attn_v_proj_weight.gscales b/deepseek-r1-1.5b-gunary/model_layers_1_self_attn_v_proj_weight.gscales new file mode 100644 index 0000000000000000000000000000000000000000..f5ec48e8570271d8cc8936054af4316c983c7ca8 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_1_self_attn_v_proj_weight.gscales differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_20_self_attn_k_proj_weight.gscales b/deepseek-r1-1.5b-gunary/model_layers_20_self_attn_k_proj_weight.gscales new file mode 100644 index 0000000000000000000000000000000000000000..8b757ea1f1a393c99bf3e6b2763a07fdaf626e2d Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_20_self_attn_k_proj_weight.gscales differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_20_self_attn_v_proj_bias.fp16 b/deepseek-r1-1.5b-gunary/model_layers_20_self_attn_v_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..e1a165cb2b8233c01da78474d8e7d4efaca539a8 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_20_self_attn_v_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_20_self_attn_v_proj_weight.gscales b/deepseek-r1-1.5b-gunary/model_layers_20_self_attn_v_proj_weight.gscales new file mode 100644 index 0000000000000000000000000000000000000000..28e0bee493684aabdcde42858955b48f346e1814 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_20_self_attn_v_proj_weight.gscales differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_20_self_attn_v_proj_weight.sign b/deepseek-r1-1.5b-gunary/model_layers_20_self_attn_v_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..7c7099ee79fcef9517f82a2ba04b1dbce25e2c35 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_20_self_attn_v_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_21_input_layernorm_weight.fp16 b/deepseek-r1-1.5b-gunary/model_layers_21_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..d5442af4c06c3d73058f742d5dcc76b443362488 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_21_input_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_21_self_attn_q_proj_bias.fp16 b/deepseek-r1-1.5b-gunary/model_layers_21_self_attn_q_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..52070528d3cccbe104ed1b0c462add5e96f45f7f Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_21_self_attn_q_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_21_self_attn_v_proj_weight.sign b/deepseek-r1-1.5b-gunary/model_layers_21_self_attn_v_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..81e039cac497040a37965af24aae244a5cf88db4 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_21_self_attn_v_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_22_post_attention_layernorm_weight.fp16 b/deepseek-r1-1.5b-gunary/model_layers_22_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..e5cc5cafbd37ea08214564cf9ab44fc1a3b5e27e Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_22_post_attention_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_22_self_attn_k_proj_bias.fp16 b/deepseek-r1-1.5b-gunary/model_layers_22_self_attn_k_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..ff1a8c461ee206363ef2e09a6280e56f03f90b28 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_22_self_attn_k_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_22_self_attn_k_proj_weight.gscales b/deepseek-r1-1.5b-gunary/model_layers_22_self_attn_k_proj_weight.gscales new file mode 100644 index 0000000000000000000000000000000000000000..e5a9cec82efbe50a563c89d1a0146b8975c07391 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_22_self_attn_k_proj_weight.gscales differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_23_self_attn_k_proj_weight.sign b/deepseek-r1-1.5b-gunary/model_layers_23_self_attn_k_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..43d15ee5f7410a6e40f817bf031986f8e1d6b7a3 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_23_self_attn_k_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_23_self_attn_v_proj_weight.gscales b/deepseek-r1-1.5b-gunary/model_layers_23_self_attn_v_proj_weight.gscales new file mode 100644 index 0000000000000000000000000000000000000000..feb3ae0bdb304db1d56da1427502329fb5eaf559 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_23_self_attn_v_proj_weight.gscales differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_23_self_attn_v_proj_weight.sign b/deepseek-r1-1.5b-gunary/model_layers_23_self_attn_v_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..4ef81943afb80885f5e238cf86434993ea70d8a2 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_23_self_attn_v_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_24_input_layernorm_weight.fp16 b/deepseek-r1-1.5b-gunary/model_layers_24_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..93e70fde36a686eebd1aec80445a130a6481199c Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_24_input_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_24_post_attention_layernorm_weight.fp16 b/deepseek-r1-1.5b-gunary/model_layers_24_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..079e34a4c59205a8a7a736082a01921b74c20300 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_24_post_attention_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_24_self_attn_k_proj_weight.gscales b/deepseek-r1-1.5b-gunary/model_layers_24_self_attn_k_proj_weight.gscales new file mode 100644 index 0000000000000000000000000000000000000000..2b13677f44e61908555c4e022e85e1124a92a82f Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_24_self_attn_k_proj_weight.gscales differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_24_self_attn_v_proj_weight.gscales b/deepseek-r1-1.5b-gunary/model_layers_24_self_attn_v_proj_weight.gscales new file mode 100644 index 0000000000000000000000000000000000000000..fec9c706f3c63b781d6a8d192e5f08a07ff81827 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_24_self_attn_v_proj_weight.gscales differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_24_self_attn_v_proj_weight.sign b/deepseek-r1-1.5b-gunary/model_layers_24_self_attn_v_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..dad0478b9205249ed10763b623d54d145ccd9f7c Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_24_self_attn_v_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_25_post_attention_layernorm_weight.fp16 b/deepseek-r1-1.5b-gunary/model_layers_25_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..71643d2c327be4bc02a592d44cd0b39cbfcdbdfd Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_25_post_attention_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_25_self_attn_k_proj_weight.gscales b/deepseek-r1-1.5b-gunary/model_layers_25_self_attn_k_proj_weight.gscales new file mode 100644 index 0000000000000000000000000000000000000000..ac49c453601c026461d35c82afb3da3f3ed58b25 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_25_self_attn_k_proj_weight.gscales differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_25_self_attn_v_proj_bias.fp16 b/deepseek-r1-1.5b-gunary/model_layers_25_self_attn_v_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..142ca0c79cb9915517d2f204fcfc606ac025a0a0 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_25_self_attn_v_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_25_self_attn_v_proj_weight.sign b/deepseek-r1-1.5b-gunary/model_layers_25_self_attn_v_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..9a854b00a5400d5b8b93a75e67a156be8ba5b0d0 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_25_self_attn_v_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_26_self_attn_k_proj_bias.fp16 b/deepseek-r1-1.5b-gunary/model_layers_26_self_attn_k_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..dd99f004d6c98e411bf9172a770ad5f195453c7e Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_26_self_attn_k_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_26_self_attn_q_proj_bias.fp16 b/deepseek-r1-1.5b-gunary/model_layers_26_self_attn_q_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..2c70b4025d2fcb30057f94c604e21dcd3d32c7f4 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_26_self_attn_q_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_27_input_layernorm_weight.fp16 b/deepseek-r1-1.5b-gunary/model_layers_27_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..708e7b09ea3bdacf953c85c2016e72a4401f6a04 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_27_input_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_27_post_attention_layernorm_weight.fp16 b/deepseek-r1-1.5b-gunary/model_layers_27_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..556cc9f350ff7645f4505a070ddacdeef1913989 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_27_post_attention_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_27_self_attn_k_proj_weight.sign b/deepseek-r1-1.5b-gunary/model_layers_27_self_attn_k_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..d128a1ded366538fb300a15041d18af63b7cf768 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_27_self_attn_k_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_27_self_attn_v_proj_bias.fp16 b/deepseek-r1-1.5b-gunary/model_layers_27_self_attn_v_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..e4fd8e8655a0ca9a1ee2d2fda440d4f046a78b09 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_27_self_attn_v_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_27_self_attn_v_proj_weight.sign b/deepseek-r1-1.5b-gunary/model_layers_27_self_attn_v_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..075911e4210586b9cb0d3a01902ec36c2d905586 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_27_self_attn_v_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_2_input_layernorm_weight.fp16 b/deepseek-r1-1.5b-gunary/model_layers_2_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..d9e2acdd45184532118543b69e8e3ab58c2968b5 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_2_input_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_2_self_attn_k_proj_weight.gscales b/deepseek-r1-1.5b-gunary/model_layers_2_self_attn_k_proj_weight.gscales new file mode 100644 index 0000000000000000000000000000000000000000..c07f1aa7ac171c7b5ef3f3949ccde3268c15bec1 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_2_self_attn_k_proj_weight.gscales differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_2_self_attn_k_proj_weight.sign b/deepseek-r1-1.5b-gunary/model_layers_2_self_attn_k_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..ca6a2ba88a59c60ec72ccda164705ea6049e2fa1 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_2_self_attn_k_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_2_self_attn_q_proj_bias.fp16 b/deepseek-r1-1.5b-gunary/model_layers_2_self_attn_q_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..d817c1d6d24bfca1c159b43793459dd86d5eefa9 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_2_self_attn_q_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_2_self_attn_v_proj_weight.gscales b/deepseek-r1-1.5b-gunary/model_layers_2_self_attn_v_proj_weight.gscales new file mode 100644 index 0000000000000000000000000000000000000000..347c9c5598e4d0113849d00f8c7ec374d2b546b5 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_2_self_attn_v_proj_weight.gscales differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_2_self_attn_v_proj_weight.sign b/deepseek-r1-1.5b-gunary/model_layers_2_self_attn_v_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..54e10b0a1996c9562c5bca8bc222428cc18cb1b3 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_2_self_attn_v_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_3_post_attention_layernorm_weight.fp16 b/deepseek-r1-1.5b-gunary/model_layers_3_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..ad465cdd9a80e575767bd43668970d6c3d90ba56 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_3_post_attention_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_3_self_attn_k_proj_bias.fp16 b/deepseek-r1-1.5b-gunary/model_layers_3_self_attn_k_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..b595fc36fe9aa3fc0a983a8b1c45139997e9ba3c Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_3_self_attn_k_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_3_self_attn_k_proj_weight.gscales b/deepseek-r1-1.5b-gunary/model_layers_3_self_attn_k_proj_weight.gscales new file mode 100644 index 0000000000000000000000000000000000000000..ad475e865fcb658f9113c3d2c81a4ccb4989dc1a Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_3_self_attn_k_proj_weight.gscales differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_3_self_attn_k_proj_weight.sign b/deepseek-r1-1.5b-gunary/model_layers_3_self_attn_k_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..7498db3ff36f39cffccb804a1920907a5c97f787 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_3_self_attn_k_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_3_self_attn_q_proj_bias.fp16 b/deepseek-r1-1.5b-gunary/model_layers_3_self_attn_q_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..56f78dfa4c2382c6ae97b723eb578835f421efdd Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_3_self_attn_q_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_3_self_attn_v_proj_weight.sign b/deepseek-r1-1.5b-gunary/model_layers_3_self_attn_v_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..f6412c8c456dc1a8c48a49f9e951416f24149c1d Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_3_self_attn_v_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_4_input_layernorm_weight.fp16 b/deepseek-r1-1.5b-gunary/model_layers_4_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..9759f6063d7b643556860d11f1a24b45ed57fde6 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_4_input_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_4_self_attn_k_proj_bias.fp16 b/deepseek-r1-1.5b-gunary/model_layers_4_self_attn_k_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..bc0c148982304816c4fa3512b7ca23a3472c72c4 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_4_self_attn_k_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_5_post_attention_layernorm_weight.fp16 b/deepseek-r1-1.5b-gunary/model_layers_5_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..8e7dbb2d076dba9b00e64dca081de6af929c0449 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_5_post_attention_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_5_self_attn_v_proj_weight.sign b/deepseek-r1-1.5b-gunary/model_layers_5_self_attn_v_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..4e0516674c8589adce63faca186bbce066216aab Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_5_self_attn_v_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_7_input_layernorm_weight.fp16 b/deepseek-r1-1.5b-gunary/model_layers_7_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..a100651bc62c5a0ec14f2ee20c3b9e4baff985f4 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_7_input_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_7_post_attention_layernorm_weight.fp16 b/deepseek-r1-1.5b-gunary/model_layers_7_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..4746b01a76a4bc8f7a9d77f39085d4892ab22ce8 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_7_post_attention_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_7_self_attn_k_proj_bias.fp16 b/deepseek-r1-1.5b-gunary/model_layers_7_self_attn_k_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..589cae7e3d3b84aaeb145ccadc7e8aae3818aa45 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_7_self_attn_k_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_7_self_attn_k_proj_weight.gscales b/deepseek-r1-1.5b-gunary/model_layers_7_self_attn_k_proj_weight.gscales new file mode 100644 index 0000000000000000000000000000000000000000..77c6acc4d38d431cf0209613f0673717eb94e9cd Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_7_self_attn_k_proj_weight.gscales differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_7_self_attn_k_proj_weight.sign b/deepseek-r1-1.5b-gunary/model_layers_7_self_attn_k_proj_weight.sign new file mode 100644 index 0000000000000000000000000000000000000000..b75512f1b5c81055d03cccc3681c031d9d14ac34 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_7_self_attn_k_proj_weight.sign differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_7_self_attn_v_proj_bias.fp16 b/deepseek-r1-1.5b-gunary/model_layers_7_self_attn_v_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..14221fa1e5c8786aae8a1af0f58a90713768558f Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_7_self_attn_v_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_7_self_attn_v_proj_weight.gscales b/deepseek-r1-1.5b-gunary/model_layers_7_self_attn_v_proj_weight.gscales new file mode 100644 index 0000000000000000000000000000000000000000..ae91372705f3951c3dfa56f393f2ee06cfe0bc6a Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_7_self_attn_v_proj_weight.gscales differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_8_post_attention_layernorm_weight.fp16 b/deepseek-r1-1.5b-gunary/model_layers_8_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..60c464b1cc41331c65df1bcaf3c73a9090c56ec6 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_8_post_attention_layernorm_weight.fp16 differ diff --git a/deepseek-r1-1.5b-gunary/model_layers_8_self_attn_v_proj_bias.fp16 b/deepseek-r1-1.5b-gunary/model_layers_8_self_attn_v_proj_bias.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..e04ef83dbf00969a6897ae2d80515558c4a2e370 Binary files /dev/null and b/deepseek-r1-1.5b-gunary/model_layers_8_self_attn_v_proj_bias.fp16 differ diff --git a/deepseek-r1-1.5b-packed/model_layers_0_mlp_down_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_0_mlp_down_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..06762306a547084b9cc3e2a0a63ed9441a28b00f --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_0_mlp_down_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2cde165986cba3780a2e8b6c73d1cebffa9bdfc1701f41eb65527ccba14550c +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_0_mlp_down_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_0_mlp_down_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..c75e9f7883463882c7fed4e8c9b3f7d130cfe90e --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_0_mlp_down_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b49338069c9db0f188c0b6e9b2a9bbb1c3ee94d02d186706f7d297872e2711a7 +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_0_mlp_up_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_0_mlp_up_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..694ff6e11114994cc664ce2c576effcaebc34dbf --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_0_mlp_up_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:034225e03d6ff2020c24b8fbd7057cfd4048724b42b20406a22e179f96a9e631 +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_0_self_attn_k_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_0_self_attn_k_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..dda6671e77c6f7d400d2d0d7c9fa48c094b48194 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_0_self_attn_k_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e135467da5a3fe46e654c33b50b0f7ebe06933cf5e627929a4e83934d8280b5 +size 393216 diff --git a/deepseek-r1-1.5b-packed/model_layers_0_self_attn_o_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_0_self_attn_o_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..bb9d54ac80c4e53d6e603eb3d0a2dbd8cdd5eee6 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_0_self_attn_o_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a1b873a74bbf2b060af3babb15abb9cb8f74d423b17f4ee72c3f147ad46ee4a +size 2359296 diff --git a/deepseek-r1-1.5b-packed/model_layers_0_self_attn_q_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_0_self_attn_q_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..0189743b69cde7d0a0cdc0bf21233071ab1516c7 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_0_self_attn_q_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43b783fd25f5bc84ab7adff61b840dedbc9f445b1e9b3ba6eef61e0a94e365e6 +size 2359296 diff --git a/deepseek-r1-1.5b-packed/model_layers_0_self_attn_v_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_0_self_attn_v_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..fe01057ccbbe24743397a9614f1092c53fcd0ff0 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_0_self_attn_v_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69ca64cd4e1adc89274a6085087f46d4c7c4f50014f0553db9c4f89ab8d8f194 +size 393216 diff --git a/deepseek-r1-1.5b-packed/model_layers_10_mlp_down_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_10_mlp_down_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..7a09e94f674f967f120dd2f0984281b08932fc84 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_10_mlp_down_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c5430535790e7ee20be30386c6fa4478877dc3885dae5ad930fb9c09a1eeedf +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_10_mlp_gate_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_10_mlp_gate_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..b4f8fcd66d88fb93d716723c04a9b88cd171b476 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_10_mlp_gate_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4b6c1e360a11c97dd0fd08d3e76782c08eaf17dc107a4a22383911bcdcf5c39 +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_10_mlp_gate_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_10_mlp_gate_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..9a8f223f9fe8fc9869aed75a9dfd41594096bf16 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_10_mlp_gate_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16a44f8cc78acaecf3ce04f9c82ae74c8b981ec9c16ab2fc83b53d970cd02d12 +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_10_mlp_up_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_10_mlp_up_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..8b53660185d604efa08fd9e4b2ce104571bac146 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_10_mlp_up_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df3ce09697100d5f8f7f48e26f29debb5bf8a5603c415685679ebb692757b6ee +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_10_self_attn_k_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_10_self_attn_k_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..5d0ebaf4bb66e941e2bd848ee8e8e18fbb6cc197 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_10_self_attn_k_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa8355289ac6533d2836bc04745b381ac670388c423c8395087d74cbcf0f4788 +size 393216 diff --git a/deepseek-r1-1.5b-packed/model_layers_10_self_attn_o_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_10_self_attn_o_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..1d50e783243a00834ff0fc7e7090d549545a7469 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_10_self_attn_o_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c8d38e33a29916b3a384d78f3154474f9b47aa396ce1f0b4ab1c44518164ce8 +size 294912 diff --git a/deepseek-r1-1.5b-packed/model_layers_10_self_attn_q_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_10_self_attn_q_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..135cdee0c53c16c0f2d3c21f0f91163430d39661 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_10_self_attn_q_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da6b6c64bf0922af1c4d9b65fb109ce2026dd0d3a4569aee6b7bf6e004096e77 +size 2359296 diff --git a/deepseek-r1-1.5b-packed/model_layers_10_self_attn_q_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_10_self_attn_q_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..e4fdbf2d1500d88e97184bedfa89f8e16ba73441 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_10_self_attn_q_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fe42ad280bffdb73f427b1fcb4601b4c336923d9c34223edcbd271ce82dd90b +size 294912 diff --git a/deepseek-r1-1.5b-packed/model_layers_10_self_attn_v_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_10_self_attn_v_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..2b3f76d4fd65d7a68296870939955207b668bf64 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_10_self_attn_v_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61240e32a61a0d9afd922b8348e720b5a8b97d44200b7db14f40fb50c94fd090 +size 393216 diff --git a/deepseek-r1-1.5b-packed/model_layers_11_mlp_down_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_11_mlp_down_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..87b15e2fb23b16375f41964f37de56d4c71d4074 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_11_mlp_down_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc0077f5055296f08bb4d3a9e2d56b4cc70a70a032f4b5e2b5a9ac9e10749d13 +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_11_mlp_down_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_11_mlp_down_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..9944ab22f7fe39cad155cb35656fa9ba641b8686 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_11_mlp_down_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b07cb8f37f8c3fb5f800b008ee48cc27f089db360aa642d7498bc09b8314e21c +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_11_mlp_up_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_11_mlp_up_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..c6adf867e463f279e56ab52b200855eb321498c6 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_11_mlp_up_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92c1611822f694db7a265552f30f3574a633c2e14a824d7320158674ff8980ff +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_11_mlp_up_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_11_mlp_up_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..ad6ad61fcea9489f97b12214fb38093361fedf78 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_11_mlp_up_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b7a5f3a1c4f5b00d240b49aefc7889546755e6776dec3abec018307942480c7 +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_11_self_attn_k_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_11_self_attn_k_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..7fe3b44529eaa3babb088f5e16d61060d763b3ce --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_11_self_attn_k_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3ba354261b47b1613ac4966d5ff4fc2c9b5c7bf42c736e741dd5a6652d58b37 +size 393216 diff --git a/deepseek-r1-1.5b-packed/model_layers_11_self_attn_o_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_11_self_attn_o_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..be772cac113895bd0760b95353794f43df943d50 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_11_self_attn_o_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d611b70c73fe8e6eba72a5ba82b60fd569b34ceb4e7293ddc306bcbd89e6b31 +size 294912 diff --git a/deepseek-r1-1.5b-packed/model_layers_11_self_attn_q_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_11_self_attn_q_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..3eb143f7f3f2d828a29139c2325ad94f5a4e6296 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_11_self_attn_q_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eabf3790f90d19f61f6347a320fd1a52530936e4e204b3f33e482b3e1c806063 +size 2359296 diff --git a/deepseek-r1-1.5b-packed/model_layers_11_self_attn_q_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_11_self_attn_q_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..7b4e2cc6e90ecb5b16e7a48d3c44158983d98520 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_11_self_attn_q_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60aa5f439af1750b697977171496e97a96c845dd439232a9a82e60e6ddb6d45c +size 294912 diff --git a/deepseek-r1-1.5b-packed/model_layers_11_self_attn_v_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_11_self_attn_v_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..a6d8b56697a3f8ac3f51fd4225d010f3b59780e5 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_11_self_attn_v_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15d6aeac9f105239ba41c9b2f56eb7b744fbdcc99b17bc45f4bd37c983a44749 +size 393216 diff --git a/deepseek-r1-1.5b-packed/model_layers_12_mlp_down_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_12_mlp_down_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..28905e7461280364924dd41e4ccb9422c4d9dba6 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_12_mlp_down_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75d15462d52b33f25bc53f90bd507236ddb9aa73c1d32b5c869c1e34f1b54cb5 +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_12_mlp_down_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_12_mlp_down_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..5ac60ec5cbeba9c0589371ab52afe1130066ef55 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_12_mlp_down_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:529f227fe6dcc789d5e630591de08f934b14595c485810502b41fac46cfe5f9e +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_12_mlp_gate_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_12_mlp_gate_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..2b9b21163fe6fb7c4c9dcfab243b6cdfd1802024 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_12_mlp_gate_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7c836480f99d47c2adf0b21e043d62a6155dee5954123aa932b8476ff5cea8e +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_12_mlp_up_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_12_mlp_up_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..5c15b3fef0c70c54cc3ffadca9c2b52020e559c2 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_12_mlp_up_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abe142254f7346f82336c46d936f0f9adfc54f5940b80e9965f49ef48885ec4b +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_12_mlp_up_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_12_mlp_up_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..26823f636f1d9e2c3f33665b2ff8b45c4d5fcfc9 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_12_mlp_up_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1457d0402e211b3ace67d73e6557b16c2bb570384bcf80a736957f6602646e90 +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_12_self_attn_k_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_12_self_attn_k_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..96fa6527f4bd8fcb9b9b2a9db5dc33fcc6038288 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_12_self_attn_k_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4788f1dfd1f9c2c7a8e55ff73f529625f93e6277b707468a7318e9a7d937c38 +size 393216 diff --git a/deepseek-r1-1.5b-packed/model_layers_12_self_attn_o_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_12_self_attn_o_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..31a849cf51cb4fd703ff30438375234e84339ce1 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_12_self_attn_o_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98ec504425b33b25fc9d3bca15c2a4737a7f12279cdb2f366bce4daa2d09e9cc +size 2359296 diff --git a/deepseek-r1-1.5b-packed/model_layers_12_self_attn_o_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_12_self_attn_o_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..718d38385a80fdb17109749d972d2896fa839c64 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_12_self_attn_o_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b99c13197a9fd4d1233f95ba7aeaff88e06a93d91c95bb771fda4f56567a3cf2 +size 294912 diff --git a/deepseek-r1-1.5b-packed/model_layers_12_self_attn_q_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_12_self_attn_q_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..e15142f34766d1ef262f740e82de139dfb6c6111 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_12_self_attn_q_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d61f01f4d2a6ec387c8879a96d1a73f621b6263769cac9d6de03fd12a840732 +size 2359296 diff --git a/deepseek-r1-1.5b-packed/model_layers_12_self_attn_v_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_12_self_attn_v_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..65f7fc96b60c96d9da3d5f3a2204dfe0560479e5 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_12_self_attn_v_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccd7b29ca36362df6352e7b27acab88ad6e1ae0277d543bf62612f178f39a0d0 +size 393216 diff --git a/deepseek-r1-1.5b-packed/model_layers_13_mlp_down_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_13_mlp_down_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..f4a346515c3b6c7dd77036523cb0770b909dc462 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_13_mlp_down_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b19b277401a0cbc2723d6fc65f8c1d4765b0a03aeeb190adb422769425e8f5b3 +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_13_mlp_gate_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_13_mlp_gate_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..33902a82194b1836feec031e3b67fc4b18b8b532 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_13_mlp_gate_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:658a3e7cfbbbc9864d5a25f76433fea533f8675ec8b145d76dd1f0ee770a0947 +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_13_mlp_up_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_13_mlp_up_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..c94fd999650767f7992293ef2a349b0bfdba8b02 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_13_mlp_up_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae3a68e18d0116a5e6e07db580865acedba0892474a9e9adf5e3919fd028a813 +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_13_mlp_up_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_13_mlp_up_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..85e6554a4b74072f8e135620ce71d54e7847313b --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_13_mlp_up_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c52f49f7109f6b3b8fa48d71e2d0b1c3a6934eff0e61b5b613e10aaa047e706 +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_13_self_attn_k_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_13_self_attn_k_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..6fca7ea884a529dc984e3372f67f6bab5e0f1fa5 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_13_self_attn_k_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:434e466a5d471e7532a5417ac5f90a8f8570b3641d39a9bc89773c67c3d1f00d +size 393216 diff --git a/deepseek-r1-1.5b-packed/model_layers_13_self_attn_o_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_13_self_attn_o_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..971a8e30308802fee9831b19f2bbdb94a5716be0 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_13_self_attn_o_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aed5775ba80abcbb85cc7eb57c3a69c59f74f7cac39dcba368fc6f48abf954cb +size 2359296 diff --git a/deepseek-r1-1.5b-packed/model_layers_13_self_attn_o_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_13_self_attn_o_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..8560c57126c3c4a029dcb3f975f1d2d59d65aa09 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_13_self_attn_o_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f327cc707dc55c907d977eb0e379e910794574fb36268752913fc042a383b8a9 +size 294912 diff --git a/deepseek-r1-1.5b-packed/model_layers_13_self_attn_q_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_13_self_attn_q_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..68521e8c93bf651fd7a829721cbb52c9e02530d2 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_13_self_attn_q_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f77567267cb678b75a75436084cd6345d32a1fefcadc4ae5a11f4b7908598bec +size 294912 diff --git a/deepseek-r1-1.5b-packed/model_layers_13_self_attn_v_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_13_self_attn_v_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..3b2b5de4561a9a4bd61d7add8f8042c2289d019b --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_13_self_attn_v_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:016308148265f732290e37c3e5912675099bd5ea693006b4127199024590e044 +size 393216 diff --git a/deepseek-r1-1.5b-packed/model_layers_14_mlp_down_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_14_mlp_down_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..375530cd5b3b3195d6e72ba796765a8d16b72020 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_14_mlp_down_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f02f8784d888aeff388a8065e5a89c62d5e76147bc8b80e49bda42802ff80477 +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_14_mlp_down_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_14_mlp_down_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..bf0ea36f02310adb3bd1bfcf63dd2d398226920f --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_14_mlp_down_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c83cb5a88b3300dec9bedff5b81c980a47652ae26afc602560199012eb3c77e +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_14_mlp_gate_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_14_mlp_gate_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..05a7573be43130813e7ed8fa1058b63299d48cfc --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_14_mlp_gate_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4ec99f70880b62d193ab90fdc7621c1456ff3446c45453bd6ce8c0025c00e61 +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_14_mlp_up_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_14_mlp_up_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..20aa1bd99aa57e1318906c52120c8c7107d0cff5 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_14_mlp_up_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71036189bc6c7101b6bebd0447e5613b8373df5f4687f35de2606e06905c73e1 +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_14_self_attn_k_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_14_self_attn_k_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..48c05f2539d4b2cc0c69ef486ea3f04636f3543b --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_14_self_attn_k_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:324a52d578cacede78e72ad5e3f74659eb93bd12d23da469c89c0c3e321ac183 +size 393216 diff --git a/deepseek-r1-1.5b-packed/model_layers_14_self_attn_o_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_14_self_attn_o_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..8c97b807846c1fda19caac90adcf549bb21e83a2 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_14_self_attn_o_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c55ab3e056864ef5c38cc5b3bd264bd6019b4431fe1a6d2ec5e8477e2a730004 +size 2359296 diff --git a/deepseek-r1-1.5b-packed/model_layers_14_self_attn_q_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_14_self_attn_q_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..ac3dcdb89c95d8aa1233eac555b5301277fc5d83 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_14_self_attn_q_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd493cd3ef01617b4b4b50d531ae73cdcbd4d74604cc616bd5a7a11572d8ae66 +size 2359296 diff --git a/deepseek-r1-1.5b-packed/model_layers_14_self_attn_v_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_14_self_attn_v_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..e1b60ead923e8f51b06a0bd3d363c3bf884b2411 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_14_self_attn_v_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da9d9af130ad15dbf7dbbc9d1a7e6a690b899e66430dfe037d44d4d092240daa +size 393216 diff --git a/deepseek-r1-1.5b-packed/model_layers_15_mlp_down_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_15_mlp_down_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..3ac2a89573bb3b2c97c17c21d02faed8670bd365 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_15_mlp_down_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:012d4bb55047ff41b30557a92fbdccc422d1810599a6667ec72d5af54883beb2 +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_15_mlp_gate_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_15_mlp_gate_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..4a204bf08c49122a190d0ecf100ef2ae4391f1b6 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_15_mlp_gate_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b636e697246faedb0d90b580a65fd0a1ad443ed906f864412a077102ee0a90c0 +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_15_mlp_up_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_15_mlp_up_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..cbad6ec7764a129db93feacc01fefb2807861b91 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_15_mlp_up_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2712e665dc3960040a47d496b5eb77d914f6100164e1515b376b2ef5445c4ac5 +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_15_self_attn_k_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_15_self_attn_k_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..a021fe99fcd880e60bfd8a9d535433272ecfe6d2 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_15_self_attn_k_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac84d89ecdd5a5069a48dccbc6da6f0624626c261b1c36fd69bcacfe7217d561 +size 393216 diff --git a/deepseek-r1-1.5b-packed/model_layers_15_self_attn_o_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_15_self_attn_o_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..e40b0a2a5936e89e70fc01b1ab1ef89eb1753696 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_15_self_attn_o_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:730f2b553a16d219001ac1202c426fa7500d69ec089774b8bcb9b45a87ec26ea +size 2359296 diff --git a/deepseek-r1-1.5b-packed/model_layers_15_self_attn_q_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_15_self_attn_q_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..5acf4372efc6de8624fb150e1933c1664fbff7c7 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_15_self_attn_q_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1df7e83b894e8ee3a61d20edf8fbb549fac349d95e4e52467223e4a240aa94b5 +size 2359296 diff --git a/deepseek-r1-1.5b-packed/model_layers_15_self_attn_q_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_15_self_attn_q_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..a1d258b32d9d8f79bf13ccc91baff40f14a06774 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_15_self_attn_q_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:948dd3df371302f40faa36ab26de7f6b12e1bc3acb9bb0e7b6c11be8ffce8b73 +size 294912 diff --git a/deepseek-r1-1.5b-packed/model_layers_15_self_attn_v_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_15_self_attn_v_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..2ee021d8b01314da7bd620ad75c46c9fd78e51c9 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_15_self_attn_v_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de2c1e4bf4c9e84bc893e1635dd354cd2ffd2c87dfd74812cf31277a8ff5db3e +size 393216 diff --git a/deepseek-r1-1.5b-packed/model_layers_16_mlp_down_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_16_mlp_down_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..ed40a24b4e27d95ca0b5f3e46e90f542ef014207 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_16_mlp_down_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fee34f5aa311738b3f247d9b6fb86df66b0bddf3a4d076dc4bbb53888dd4702 +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_16_mlp_gate_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_16_mlp_gate_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..61c3aa9c1b67f6ce93ca777ad68d14bc7a5e3e62 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_16_mlp_gate_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:038c616c02e43af78c6092a7b27f091815d14b393f7fc35318054a75fc88ff86 +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_16_mlp_gate_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_16_mlp_gate_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..68db0f64d31e402df933296b9f0b756032552aa9 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_16_mlp_gate_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9578a63294e0ce37f60c77926cab85701ef420a7faaa82200671e446ce22168a +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_16_mlp_up_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_16_mlp_up_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..1a025bda0f8a692b95e8d2fa4d47840428c1ce85 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_16_mlp_up_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be67e9efca219a0401ff94e238782e3348cd305f678e8a2dead3e67110a67e17 +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_16_mlp_up_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_16_mlp_up_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..b0db4a1dccf512a67567295255e3a9f1c924821b --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_16_mlp_up_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41e63fd23e4b5551f3fec479e2dd079c70b8215f7963c4e64c0434f3d982c126 +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_16_self_attn_k_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_16_self_attn_k_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..7e58cdf53c24842c483fe33e200c1183fab6fa8d --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_16_self_attn_k_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e51ac96c6905d9fa6cc385f762ae388e67b1fec7740298b63a4009dd33e226d +size 393216 diff --git a/deepseek-r1-1.5b-packed/model_layers_16_self_attn_o_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_16_self_attn_o_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..d22d1506766be2b2ca2d5c89b1e907b5f182bd2d --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_16_self_attn_o_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:248c756a50c049dea8756c1340a66a306f13e33c7957d847923f389b98228a42 +size 2359296 diff --git a/deepseek-r1-1.5b-packed/model_layers_16_self_attn_q_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_16_self_attn_q_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..1ac0e33cf711ae46dae47102cbd4dac632cf7eb2 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_16_self_attn_q_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91325410c7ea5375d0c8b2e936c612415c16715845b78db0414d7d1c644ea2dc +size 2359296 diff --git a/deepseek-r1-1.5b-packed/model_layers_16_self_attn_q_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_16_self_attn_q_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..98d270a234a20e9cd20771d295101eaec73317b7 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_16_self_attn_q_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2b813fc3e9787f2d0bc03c73674ad124a10e24382e18b0b29acba670d9f8e33 +size 294912 diff --git a/deepseek-r1-1.5b-packed/model_layers_16_self_attn_v_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_16_self_attn_v_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..d1f0c811637472eb1912088158344f8b94a44435 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_16_self_attn_v_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f65c6888e14883624c38ff0dffb370f520c0d36d9fa89a8855b7161bfe60dfb7 +size 393216 diff --git a/deepseek-r1-1.5b-packed/model_layers_17_mlp_down_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_17_mlp_down_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..1324f72132bc20f7c500225bc1bc04981f8dc394 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_17_mlp_down_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa6ae029c327e02c89d6a1291c264d37b0383752e090d25ff7125d5701c15bca +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_17_mlp_gate_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_17_mlp_gate_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..d4aef5efc8fa7b5be6ce3811ba6e72d262f526a3 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_17_mlp_gate_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb2832e537d503bac316c0eea4761d43dfef86bfd3f8e2264ed738fbfddcb4c4 +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_17_mlp_up_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_17_mlp_up_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..17f04ddb334a495ff21e6635683cea07e3d5e0e2 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_17_mlp_up_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86319ebaa077ff7db80cb7f75863a4637f5223e17e815842d846ec9beedcb78d +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_17_mlp_up_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_17_mlp_up_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..88db8c4f3c3d5b7494390a92acfa3c037f6e21cd --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_17_mlp_up_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1af0c0db2c406667c5d8b7c93675b9495e909db4604253b8c76f10fc469e8e9 +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_17_self_attn_k_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_17_self_attn_k_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..e5797bd6a6eb15cee6ce7a362787d78e2576fc44 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_17_self_attn_k_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6b9da91c7c9a53e48f563cc732367269eb256403a41029771128d98e1c753a3 +size 393216 diff --git a/deepseek-r1-1.5b-packed/model_layers_17_self_attn_o_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_17_self_attn_o_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..502f4ce210913a22292f0e37c907e794c85a3308 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_17_self_attn_o_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de54529e71dc30f4eff70e4a8d625a3543616eb0f023aef2019546cae5e49f30 +size 2359296 diff --git a/deepseek-r1-1.5b-packed/model_layers_17_self_attn_o_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_17_self_attn_o_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..c8412571cdf7c7d7a235d8e4e1e77ac4c5277398 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_17_self_attn_o_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7057ab1398767efb88fd1695756e64064a78d4fb909c74dde10113e98ea3961 +size 294912 diff --git a/deepseek-r1-1.5b-packed/model_layers_17_self_attn_q_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_17_self_attn_q_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..1d660d97d4cd3ce350290846c621c654d44b0dd6 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_17_self_attn_q_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c57f807c1768521045dd73c75044877fe39266194469d79914dca388b0f29b9 +size 2359296 diff --git a/deepseek-r1-1.5b-packed/model_layers_17_self_attn_q_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_17_self_attn_q_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..45fad4f208ea11dc81ac90bfa70fd9d856dc0c14 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_17_self_attn_q_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a554b14fd5746bac41c3ef535010ae89c748931bff48458b26544b80c7e08249 +size 294912 diff --git a/deepseek-r1-1.5b-packed/model_layers_17_self_attn_v_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_17_self_attn_v_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..d4e9d0908db071e600a99ab707317de05aebb983 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_17_self_attn_v_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5c02c055680b29f102303556a81e7ab7e058830e722f918317926719ccd6d7e +size 393216 diff --git a/deepseek-r1-1.5b-packed/model_layers_18_mlp_down_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_18_mlp_down_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..14e4ccc13c96ceca49bd0c3f400f6f8c211525b4 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_18_mlp_down_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b03652a7621c54e0a614a2ac98df2680596997f80eba92f855d3ebbec962911a +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_18_mlp_down_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_18_mlp_down_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..7bc648b61480d4d8efee98ca25d05cbc8161e131 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_18_mlp_down_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfc89df5a705e127c12e7632293733fe7834ed756b75e2222e2b0385dcc5c969 +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_18_mlp_gate_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_18_mlp_gate_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..b43bf1c6f5375e5fc050d37d9e6e1cc4387b7bdc --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_18_mlp_gate_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e353459960d53e338baf42635ab796385259cbe53a6703c2e3d8cf163e74549 +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_18_mlp_gate_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_18_mlp_gate_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..3a7e4174524872129dd9b1f87493b0021bec5ccf --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_18_mlp_gate_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8da4270e466c8d0be4527cc144f503fb296a5a5707d1049a1f5ebba2fa5cdb10 +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_18_mlp_up_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_18_mlp_up_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..b0f9725b406834a08492d7d164664fa27841b26a --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_18_mlp_up_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:043ea7daa9281049fa820608b120e4c985a3577629f4e72f6054edc745347496 +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_18_self_attn_o_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_18_self_attn_o_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..5331b82d78dc722c1ba8d87cb8e464bd73a96d95 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_18_self_attn_o_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d66dc68c075cdacfb5011237de39d4b3c74b0074c0ff205783e14fd34279b3f3 +size 2359296 diff --git a/deepseek-r1-1.5b-packed/model_layers_18_self_attn_o_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_18_self_attn_o_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..3eae6b26572f860e4ea119ec70a0a128cc7a7a56 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_18_self_attn_o_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7809020bf04c489051e22e07494b20557ab1d7b99b6ac74741b55206a6092b4 +size 294912 diff --git a/deepseek-r1-1.5b-packed/model_layers_18_self_attn_q_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_18_self_attn_q_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..ba57a33af1c106714a357218a1ac0242a6a2eb8f --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_18_self_attn_q_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6098facb6b4c35a29b49336149e35f3dd8a43e92c21930975eb02920d69c4f52 +size 2359296 diff --git a/deepseek-r1-1.5b-packed/model_layers_18_self_attn_v_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_18_self_attn_v_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..75dc02bcfef599f6d4f183a26be932f924d38d1a --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_18_self_attn_v_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ab36e58091df382c19378b9c80d133d952603ce37e361ff46a68368a25b2ad9 +size 393216 diff --git a/deepseek-r1-1.5b-packed/model_layers_19_mlp_down_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_19_mlp_down_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..e746005a35718b5fbdd3f70e8382dce6036b5f44 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_19_mlp_down_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9fa243b8ee33f015b57f3c9be8545aba89f04a5de91898ab015d350fb606541 +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_19_mlp_gate_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_19_mlp_gate_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..5f9976e907854b947bff403af659f5c132b1ca5b --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_19_mlp_gate_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64fd51c317484412e955621f0cf690d3782e854499888ea1946c7656464b002f +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_19_self_attn_k_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_19_self_attn_k_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..20ce039977f8cf40e892bf52753941624109bb55 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_19_self_attn_k_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0459dbe8cf37b4ad0f54995742779721b980b2c7a01c5ee0132b019f5402a4f6 +size 393216 diff --git a/deepseek-r1-1.5b-packed/model_layers_19_self_attn_o_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_19_self_attn_o_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..e7b344f3748683daf4e1b2e9907d6a9e683ef9a3 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_19_self_attn_o_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9cc6eb8376e97ed8572a93f8ff005098b2b74b2b799821ec950392efcb0c6db +size 2359296 diff --git a/deepseek-r1-1.5b-packed/model_layers_19_self_attn_o_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_19_self_attn_o_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..e77158ae4d623a5f76eccf8377aa15653ec294e6 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_19_self_attn_o_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23de363ab2a99229fb1cb3b018bb1da93cdd2645c3e516f2099c8c4c540376ca +size 294912 diff --git a/deepseek-r1-1.5b-packed/model_layers_19_self_attn_q_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_19_self_attn_q_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..58e57818734d459b39ef75b71dff44f7d4023963 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_19_self_attn_q_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50a3c9aeee2e28227b8dfcbdf22bd7a6e7a1ebc110f81cbfdb354043f8f30d71 +size 2359296 diff --git a/deepseek-r1-1.5b-packed/model_layers_19_self_attn_q_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_19_self_attn_q_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..fe59371560baa115b55a7b935e80a4de8d985360 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_19_self_attn_q_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90ac6c40f7e941c95fa70bdb6d6de89acf222c37738fc3e9e1d529c5229ed176 +size 294912 diff --git a/deepseek-r1-1.5b-packed/model_layers_19_self_attn_v_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_19_self_attn_v_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..88351ea668a203e192f70ac5416d81d2f6932431 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_19_self_attn_v_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0c4a124452a48fb634a6645ce7f491dfcee24c1f675c90b658a79d4f30ab38d +size 393216 diff --git a/deepseek-r1-1.5b-packed/model_layers_1_mlp_down_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_1_mlp_down_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..c6485646d1b02d4d93750c64e7332e77edba08d3 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_1_mlp_down_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36510b213637052552a476309b7dc0bc510f529027343fc8c78b702efda0a002 +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_1_mlp_down_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_1_mlp_down_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..6f7be89666c6a71b56ea9d3ab3eb9f366393a8c8 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_1_mlp_down_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:431585ec6292217d6b8d7f6c56e402c3a5067273ae6f01ff773f6b546c295222 +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_1_mlp_gate_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_1_mlp_gate_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..d7681f640097785a13121b0f39b5bc7b5a51a367 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_1_mlp_gate_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4cda39ecbdbbadfb7b673916142b686d13542dc8b4afcecbf1198317f307ccc +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_1_mlp_up_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_1_mlp_up_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..e760140a86e30951ea0fa762d0e62e8dcc774115 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_1_mlp_up_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a161d226b750496245767146f576d75554387c2831fabca60a236b62c0c1e68 +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_1_self_attn_k_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_1_self_attn_k_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..2931c1d39d8c1c7f718ca386f58c3298a9293fdf --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_1_self_attn_k_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5f73c61792eb736a617bd0079964a6f45b61f85d003ad65959593c0e07bc87d +size 393216 diff --git a/deepseek-r1-1.5b-packed/model_layers_1_self_attn_o_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_1_self_attn_o_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..9f4847a0137c900932f1e1d301c9f75985f2b560 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_1_self_attn_o_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02b1a1df17db7fdae1e516aada63f6b69b3e2f91fb176d9d26a53a0fd9ec91af +size 2359296 diff --git a/deepseek-r1-1.5b-packed/model_layers_1_self_attn_q_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_1_self_attn_q_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..b293b5dea649a053b6197319bce709fcfca64bda --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_1_self_attn_q_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9aa7d96a563ace2bbdc5035d89b1fa11400496c5ac02c0070791fd7e8e7cc9f +size 2359296 diff --git a/deepseek-r1-1.5b-packed/model_layers_1_self_attn_q_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_1_self_attn_q_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..a626a86fffa107c2a2802ba32636c7d9d750cf0d --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_1_self_attn_q_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bcf90e3fe150f12fb44938fb4e6d4837e2b673b6f2942a018af7d3c0ade3bd5 +size 294912 diff --git a/deepseek-r1-1.5b-packed/model_layers_1_self_attn_v_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_1_self_attn_v_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..e935426ebb4dce40e78f662227c121d0f9ba91a8 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_1_self_attn_v_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13a37cff92b195553bbfb4f487bfd8394bcedc42c967266e72ddf98a0e55fb9c +size 393216 diff --git a/deepseek-r1-1.5b-packed/model_layers_20_mlp_down_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_20_mlp_down_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..6cd9bf5c997225ce1671bdc3a1b00da41cdad6fe --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_20_mlp_down_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fae5d82ca18e4282a1f422bcd4bdac73373e5b2d11876ec22a1ca3a80e88b80a +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_20_mlp_down_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_20_mlp_down_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..fdea8e225a4ab9e8fd57a96fd4c7d24a08767624 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_20_mlp_down_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82eb6254b67e39cb4f75ca2cff5178f7958b2c9933640c8d5a3bbdafbc1ede2e +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_20_mlp_gate_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_20_mlp_gate_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..2b4f79463bb2b595c64037bcacedbe62b9e467f4 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_20_mlp_gate_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07442a99df983c9fac40743de5c9ff1feb2d29cfac6ca3a4d4aae1ecaf51adf1 +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_20_mlp_gate_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_20_mlp_gate_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..4cd99d3feb54c5dc8adbaa8f062bf3159f34f925 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_20_mlp_gate_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca09eda29f10b54121f931046c3432e47e930285cf2ec0b9ec936f4bf8d9e7ea +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_20_mlp_up_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_20_mlp_up_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..deb94b30378dc1fff06f2bc0ac80be175f6884e4 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_20_mlp_up_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:241e41a468311d3c442c27a8abe4a67e4c5d106d0ffa5fff2dda727331aebd61 +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_20_mlp_up_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_20_mlp_up_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..6d025c3333f5f11bf7d4b61d6753cb635fe4f42f --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_20_mlp_up_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61b4acc9b871a03e3ca9e6aa439b6d57e8364a974a4ad281b8ce01faef153fa4 +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_20_self_attn_k_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_20_self_attn_k_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..3cc2c642b0a92252aed52fd7116ea4002afcdacf --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_20_self_attn_k_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c737ff4687fb802ee8f133204882c6b50a4ca88d8e0abd2584cdcaf0d5832504 +size 393216 diff --git a/deepseek-r1-1.5b-packed/model_layers_20_self_attn_o_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_20_self_attn_o_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..29383457a32d467ccad6405323a40734a7f733bb --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_20_self_attn_o_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:114e83b071948f279c185d439730928bdca57d1cfe8cbda51a07651bb09e8f16 +size 2359296 diff --git a/deepseek-r1-1.5b-packed/model_layers_20_self_attn_o_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_20_self_attn_o_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..e2144d72989f5c6b7c1ac0f0b7252f431aba7f0c --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_20_self_attn_o_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a4a585a32f0371e342ebb4bee0cf81a3cb7d7df8d4febd3f35b0ad8afa95450 +size 294912 diff --git a/deepseek-r1-1.5b-packed/model_layers_20_self_attn_q_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_20_self_attn_q_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..da8ff2d84e454c6cab1e34957a5c580e080bf640 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_20_self_attn_q_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ea5b5daf1290630c2fe1b2639d23ba492416568bc1f5f9fe4df0bb259d1f5f1 +size 2359296 diff --git a/deepseek-r1-1.5b-packed/model_layers_20_self_attn_q_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_20_self_attn_q_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..8c3bb3bf479a92d4667029d95d67b24b0310f672 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_20_self_attn_q_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddba45471e861b67e2ed1cdf00a5e6c5822907130dbaa78f99fc7d21cd50c72c +size 294912 diff --git a/deepseek-r1-1.5b-packed/model_layers_21_mlp_down_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_21_mlp_down_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..556166d452dcf759b9cb7de78a433d2c3a7e29f5 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_21_mlp_down_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73c5c223d3469abf946437a1c4647aef057a6a3b959a0ee258395e2dc9f4a672 +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_21_mlp_gate_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_21_mlp_gate_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..c39f07391fd137f35e652948c93e96d490a335d2 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_21_mlp_gate_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:295a8122f787f8f471a3d7b4460d19b2fe36605ed9799301cde19e96ba0710dd +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_21_mlp_gate_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_21_mlp_gate_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..f00efb51c0274828ebb2f4edcfc0f44f2a57f10e --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_21_mlp_gate_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acadc8064fe5a979ab0f239764a43335dab33aad1b7cf3c645355e0abc0b5e22 +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_21_mlp_up_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_21_mlp_up_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..baf9221961a7393153b94142283d105e5df2c498 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_21_mlp_up_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:154c898127685a5d1f62f4d132b2ddb359cf897cf0567784b03eff0baa5c31fb +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_21_self_attn_o_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_21_self_attn_o_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..9798872450798b53cb76d19d9b789c9368978af6 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_21_self_attn_o_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f58786212aae211d7b578bbd5f4eee321435b1dfa8580749bc795f0c2877b23c +size 294912 diff --git a/deepseek-r1-1.5b-packed/model_layers_21_self_attn_q_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_21_self_attn_q_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..e6aa377589866d43a053227a1668b015e15dc215 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_21_self_attn_q_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fa7ac820790544c75d101935c32cc68513e49af827369efc5a28f964b36da1a +size 2359296 diff --git a/deepseek-r1-1.5b-packed/model_layers_21_self_attn_q_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_21_self_attn_q_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..76a6dffe65dd11314ec5764d82aecad61358cf41 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_21_self_attn_q_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2de5f2a6748bfae662d5f29cc9974111ade6445151f4ccf652d60c179f5d44f7 +size 294912 diff --git a/deepseek-r1-1.5b-packed/model_layers_21_self_attn_v_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_21_self_attn_v_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..156cd874fa39cce0c044025b1a3907426de2cd96 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_21_self_attn_v_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bda7e5e508c33bf79c9774d2bfafdfa6aba7d62fb8aed9167dc4b02369998c3 +size 393216 diff --git a/deepseek-r1-1.5b-packed/model_layers_22_mlp_down_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_22_mlp_down_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..7b57522b6c6ff40011d05f5c340fe6b6e0a2eb7b --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_22_mlp_down_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:774a2f29e2e687db57bbf73f525217559147f289ab18852b086ba29905049ed7 +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_22_mlp_gate_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_22_mlp_gate_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..d8334f2f3038dfd5d0cc1b892c53b1545f0ceb96 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_22_mlp_gate_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cead535970fe45255a6aace0b589ee94e9ccc3daf2499e7af6e07d56599e3c3 +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_22_mlp_up_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_22_mlp_up_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..2010c1f896a93c1ea88bf58655ab13bc89cd87ec --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_22_mlp_up_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab249e22fc4b8be9899010300d5f36dda94452adf321fe4e984351ea2a36210b +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_22_mlp_up_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_22_mlp_up_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..4cbb5ad6e5f6417217bfa1af6ca7c523183bee89 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_22_mlp_up_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98cc45f5b31b9cbbd8cbe13eeef8525a34715dfef49bf561f0878465e6199f3e +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_22_self_attn_k_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_22_self_attn_k_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..c138d18054be2f708ec711df464ac65549791557 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_22_self_attn_k_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:880a15d20479df0a4b81d7f69884e7bbd01d431a061fcf4475524c9f8b6cfb4e +size 393216 diff --git a/deepseek-r1-1.5b-packed/model_layers_22_self_attn_o_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_22_self_attn_o_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..f30d02eb2b7e89e23fd2cc852d6661b271c47b63 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_22_self_attn_o_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b764bfccb5e0539f4fa64afb493fe6ff8af850a11a30006f037dc58ce187371e +size 2359296 diff --git a/deepseek-r1-1.5b-packed/model_layers_22_self_attn_q_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_22_self_attn_q_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..1bb05f7e72f3d7ddb8116dfbf51fde120d873cf1 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_22_self_attn_q_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6ab91beccbd5a75ff7a1ae04f39e842b58e138fb8b31199659e1f7b3030aaee +size 294912 diff --git a/deepseek-r1-1.5b-packed/model_layers_22_self_attn_v_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_22_self_attn_v_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..51fd6007d32f3c119ce2ba1f7009625441c199e8 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_22_self_attn_v_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04d0c9e0ee81bf05b4a9bfafe04536c2a3355e29e83c98e97c4be405aa8a3b41 +size 393216 diff --git a/deepseek-r1-1.5b-packed/model_layers_23_mlp_down_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_23_mlp_down_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..55c8830c26296c88b4df9987a70e8576e3205bd4 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_23_mlp_down_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:386eed6f020adf30c70f603ee3a06fcffb87779ffe53e862c22eb065bdb47805 +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_23_mlp_down_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_23_mlp_down_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..7cf69dd6647e009397ae498513220e4d2cd640b3 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_23_mlp_down_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11761f933ffa146c3907301466cde0665cc48ae51077a3eb12e656deec5d92fb +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_23_mlp_gate_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_23_mlp_gate_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..2870dd8fc996d9f566c5088a68f36b020c0bed81 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_23_mlp_gate_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61d895590e6284c2dd1405d2240c8ad6aae1c796cde290546a82bdebe8473f38 +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_23_mlp_up_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_23_mlp_up_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..f5bcbe8d77745c82a3f5437cd4ebd0df04113c7e --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_23_mlp_up_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edf534cd5c5ae753cd5570e0da388bfd59ed38f58715c6e7c0a1182542077b0d +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_23_mlp_up_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_23_mlp_up_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..33f134083cd0fa3854d858e8afa9366603777a05 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_23_mlp_up_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0683d8ec1bf8eb9f5a1df2d9e837cb1ce319e14d49e5082b027c54f17ccf2ff +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_23_self_attn_k_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_23_self_attn_k_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..5c566d9148dfdbfb8f7c32858aba5473e47ab4ee --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_23_self_attn_k_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18d51c7f64ae56ad3e6daee234a65ee0462febc7bd172bc1d5307f8e37dc05ed +size 393216 diff --git a/deepseek-r1-1.5b-packed/model_layers_23_self_attn_o_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_23_self_attn_o_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..31d3f3b7f1bcc3a1b1c08fc262d2453660fb84a0 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_23_self_attn_o_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdefd0f92b4e58e5c6e3ce260db526d051692773779103061ddf321b9ca42bae +size 2359296 diff --git a/deepseek-r1-1.5b-packed/model_layers_23_self_attn_o_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_23_self_attn_o_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..55a527bb3c1ecbbd996713b8b7b663867ebe5474 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_23_self_attn_o_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b77a2daacf3897a92ae31cf0ce42214a146ac3839e2a21aa0ebcd531ca44d0a5 +size 294912 diff --git a/deepseek-r1-1.5b-packed/model_layers_23_self_attn_q_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_23_self_attn_q_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..770ea108b2f19ab9971e8442142b3690eabcdedd --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_23_self_attn_q_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94dbc1b22282650845acd88fda9ea59e18cb168225c4767a399748b924c6b158 +size 2359296 diff --git a/deepseek-r1-1.5b-packed/model_layers_23_self_attn_q_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_23_self_attn_q_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..0ebcbc6a03a0b38051e288c48351634986f0a34a --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_23_self_attn_q_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a84a087a3c34184a09309c0b21a4bc37b5621c648b79c96925b070e2f898ac85 +size 294912 diff --git a/deepseek-r1-1.5b-packed/model_layers_24_mlp_down_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_24_mlp_down_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..f245ede975e3192b6dccb76e34b5c6d92e99c67f --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_24_mlp_down_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9bdca05b66860b67faba221ffcd9ddbb24a5bd590a130d96068e89161156185 +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_24_mlp_gate_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_24_mlp_gate_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..0d03529981a9d4414307b2b6dfb36d0e4a5f5f14 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_24_mlp_gate_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab4e7087c7e22838eb1cf18c0ab598b88cd7e7bb97eec319561f72ec77f9cd31 +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_24_mlp_up_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_24_mlp_up_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..4c30e310f6b3358dad28c11cb1308aab96b0e9bd --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_24_mlp_up_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69cb94128a9af4544cea9be717572cb1b4b702ff4aae798245c8e7dbe9829d52 +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_24_mlp_up_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_24_mlp_up_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..08e7b9ab0826f74243f5c2e603ca4375e9dec3d2 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_24_mlp_up_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1902e7982921f1cee1d119405a2017b9e63ea63a39fdddb61c3d0dfcd4d566d2 +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_24_self_attn_k_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_24_self_attn_k_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..06d12eedd0a70cc72a56cfe77b6ca1b88d2d176a --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_24_self_attn_k_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf7c31d29a680e2497be66a19aab4d01ac9ec87f09f1a163969668e642da925b +size 393216 diff --git a/deepseek-r1-1.5b-packed/model_layers_24_self_attn_o_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_24_self_attn_o_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..a5f2523febdc3fad883409174a0ecdb8985db6de --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_24_self_attn_o_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7611818441555a8479b3dcaeaf3aa36e43f493ec55e0f25b58ed7fb66404261 +size 2359296 diff --git a/deepseek-r1-1.5b-packed/model_layers_24_self_attn_o_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_24_self_attn_o_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..a08ab5cc8dc08690389002eeaec14fa28c01d57a --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_24_self_attn_o_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f162c150955a6eb1b2e7582a717b768fb622d2f80caa5768334a71b942ffbe11 +size 294912 diff --git a/deepseek-r1-1.5b-packed/model_layers_24_self_attn_q_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_24_self_attn_q_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..b86bb243cc4ea176534eba12ba16b86ba0324f7d --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_24_self_attn_q_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2639dc04465f655b2aee10c1b40a78f672901f2bc61f27e1c8f1654582595525 +size 294912 diff --git a/deepseek-r1-1.5b-packed/model_layers_24_self_attn_v_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_24_self_attn_v_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..c479721f0b9605154dded6cd40f964c228ea2e0e --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_24_self_attn_v_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00682b1b5041ad4b5e6132ac154c80341ab32758096f383d3d528400cc78c6ec +size 393216 diff --git a/deepseek-r1-1.5b-packed/model_layers_25_mlp_down_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_25_mlp_down_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..85a91004f373f8a888716d769a9a3e96ee07eb8c --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_25_mlp_down_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91e8af2800c3dcb1a14ccf86f53eb69e8cab2cc3fa3ce70adbd2fd30c07c2d25 +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_25_mlp_down_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_25_mlp_down_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..14bb49af69ba1c4b3377a5f2a9758ca00cf2e2e8 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_25_mlp_down_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8ce1b399351e9657477c25670f61498d8e08279888235a85d7455eb8220d596 +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_25_mlp_gate_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_25_mlp_gate_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..029e7fd498c6a03a4d77987040c95ae92e84192e --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_25_mlp_gate_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:608a40a71477bfe962404ecb6080cb0754af4b511dedef04f552aed16a521e46 +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_25_mlp_gate_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_25_mlp_gate_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..2699081c585d8b5dc09f75b297c6d10cfcf3b62c --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_25_mlp_gate_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b37c8f5ea9f97f53f50e136c28c7483193688af30a43cbd5577448f9c5b3d19 +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_25_mlp_up_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_25_mlp_up_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..869b29ad26d5ca90e78c6e428ab70e1d94e556fa --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_25_mlp_up_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29379c93764dda453c0c7b85c02f2d94ae2e355d705494b5b5a29bd53c57520f +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_25_self_attn_k_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_25_self_attn_k_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..a1561fc25ab0c8f747a502e9a5d461e444d65de1 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_25_self_attn_k_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3291fbc35435b504be3e41c726627b5fba5a2cf95e3117b5b005aae328e44682 +size 393216 diff --git a/deepseek-r1-1.5b-packed/model_layers_25_self_attn_o_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_25_self_attn_o_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..e57a782950f8082d55801a2c6cb9841624b85d20 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_25_self_attn_o_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c41a8acf50424c766664126dcee6aece665ed6b2c94278b4dde9b9bf1a1d1f00 +size 2359296 diff --git a/deepseek-r1-1.5b-packed/model_layers_25_self_attn_o_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_25_self_attn_o_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..00981cc6002c15e0706ad076d0290e6971602f73 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_25_self_attn_o_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df33914df14e8b7e5d563e83e3fe6b42fc7ef7c614eebc00779618ce83e04d5d +size 294912 diff --git a/deepseek-r1-1.5b-packed/model_layers_25_self_attn_q_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_25_self_attn_q_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..f8c4d0e3441d081d2d44ceaab52dfb5d6b268d37 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_25_self_attn_q_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89c93c39df96c8bb5a6bcd65e3f737448be05a1bbe37e1df279be8f5bbcfd3c5 +size 2359296 diff --git a/deepseek-r1-1.5b-packed/model_layers_26_mlp_down_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_26_mlp_down_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..4f9de3be71c5ec1dfd99d8cd8230838e8d6845cf --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_26_mlp_down_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:222a87faa874fbea24fdabacff0bb11c2e89869abc86fec0b09f5c556c061112 +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_26_mlp_gate_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_26_mlp_gate_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..b00e809678bfd70ed5d185b74a9170e3a261fff1 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_26_mlp_gate_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56a6037d6b8a6ca110bd1d1b400c20c826614e3c07c149a5deebad55a0407ff2 +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_26_mlp_gate_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_26_mlp_gate_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..34cb9c5845fcca93bbfda851426bf39a45655e63 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_26_mlp_gate_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13416bd3258b9f68fdc1764a0ae820e60c18e20fe2df5e8bf99e445bbf3d46ea +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_26_mlp_up_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_26_mlp_up_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..a65d35f1243d2a05bc5eab991a1fb0bd7e53d14a --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_26_mlp_up_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03c17ae27019c4f2af83030dc6e0f043d5f8e58cd55d7efe2d3cd2ad99bbcbaa +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_26_self_attn_o_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_26_self_attn_o_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..67ca84b0c15c07bf2c4aa655281537f3dc9877b0 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_26_self_attn_o_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:757ea9a79e7333e9d3ffdfed17a1b737856303d8239ccccc1f4ebae812957770 +size 2359296 diff --git a/deepseek-r1-1.5b-packed/model_layers_26_self_attn_o_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_26_self_attn_o_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..6725398a58da59592c86abd7b7bc5e9cd102ee2b --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_26_self_attn_o_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e6eb1cde78520b1d0f41f169ef73551a302396b269a2dfb2fcaef6e50f4391d +size 294912 diff --git a/deepseek-r1-1.5b-packed/model_layers_26_self_attn_q_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_26_self_attn_q_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..b31b0b92ef9e4f858e719a8fd4db222d63775d2b --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_26_self_attn_q_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6fe5522255a088ad0b4d95e4acc5df9877a77745703d5d8394d6996444f698e +size 2359296 diff --git a/deepseek-r1-1.5b-packed/model_layers_26_self_attn_q_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_26_self_attn_q_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..efa6edd3a260dfde31f724e0b24f794570a0edcf --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_26_self_attn_q_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64cd45387e12da3c6e36de138ee43d14b376ff1d9e29a5c578423ced5d588610 +size 294912 diff --git a/deepseek-r1-1.5b-packed/model_layers_27_mlp_up_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_27_mlp_up_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..fc423d43412af779c6ef35c3cb1c927e235cb8ea --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_27_mlp_up_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62499808b595d1c797caf60ba342552690625cd54e616a55033ed8efd17a2d1c +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_27_self_attn_q_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_27_self_attn_q_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..dd59fc4384c09fc059b4a2e44a20aae992e4c6ba --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_27_self_attn_q_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf3c436e498a4ccdf3285b8ff407f6e68b05d1977fc9e053991f430c5f723856 +size 294912 diff --git a/deepseek-r1-1.5b-packed/model_layers_27_self_attn_v_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_27_self_attn_v_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..f2a20b78508f11db2d506590c44a8cbbf484d949 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_27_self_attn_v_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0560436977f811bd152ce1048b8cfbb86a9d38de7c63788a63678f8c46d3fffa +size 393216 diff --git a/deepseek-r1-1.5b-packed/model_layers_2_mlp_up_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_2_mlp_up_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..c5417a844321188a2cbf1efaa7e9af9620da0ba3 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_2_mlp_up_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2959ca73da13885c8bd2b97eba55955a11c54af86f4ded2216013ef7283830ac +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_2_self_attn_k_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_2_self_attn_k_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..820848ddda008045203361a74a44538600a37ecd --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_2_self_attn_k_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9f76adc67cba3fd8e9a6fc24eafececdf27cdf90a84a9edbef7e1686d7a9bfc +size 393216 diff --git a/deepseek-r1-1.5b-packed/model_layers_2_self_attn_o_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_2_self_attn_o_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..6bdb47274afe7b479308ff1e0e08817667ddc79a --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_2_self_attn_o_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0edc31bccea6141e2ade10e4d590449d4adc6791379bfb8a9a3298b650f76ad +size 2359296 diff --git a/deepseek-r1-1.5b-packed/model_layers_2_self_attn_o_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_2_self_attn_o_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..dedc1ef084d019c2871fabc2d811b83990c74081 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_2_self_attn_o_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b68e9039d9c4eac2bc530a3c6f30ec6b4bef42514ad763d6426927b5ef6245d +size 294912 diff --git a/deepseek-r1-1.5b-packed/model_layers_2_self_attn_q_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_2_self_attn_q_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..d10f37c35d5a1f5357cc4e447316c3e4ac8cabb2 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_2_self_attn_q_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:816d513cdeac8dc0605a5808e97cfc152dddd76d3c3ac8451508ba3b140cbf6b +size 294912 diff --git a/deepseek-r1-1.5b-packed/model_layers_2_self_attn_v_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_2_self_attn_v_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..d32dc8db02c17171573ebd5880257e0308dc9af0 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_2_self_attn_v_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:534507f1a6b77ff9dd24420294b126247b952d4d3eec250b093ff646ddfc5cde +size 393216 diff --git a/deepseek-r1-1.5b-packed/model_layers_3_mlp_down_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_3_mlp_down_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..b3e2b461faff7f20c4227e3c257f38a4976d5b14 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_3_mlp_down_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a841f978aef509f46a7d63288e7609b4ec76429a047e0058e99a897c9971f76 +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_3_mlp_down_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_3_mlp_down_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..5d82add41ab3145e77b2776f89a496bc68103076 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_3_mlp_down_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ff2e7d5e2d1eb45e4d6d632bfdd4f4b541720f921e8198927099700e4a6f91b +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_3_mlp_gate_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_3_mlp_gate_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..79ae815ede60992f7b7b31c8479bad27ffae0d27 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_3_mlp_gate_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ed6dedbb80c9baa1b0fd9fd803ae3624decb83b512f388a176dcd3f2b31a1b8 +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_3_mlp_gate_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_3_mlp_gate_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..a8a4a34f4627de28681a7ef6baf444341add0356 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_3_mlp_gate_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71f770399800a7dd1df24cc413f67013848e44dd3c181d48d80aaa11dc45202a +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_3_self_attn_o_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_3_self_attn_o_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..b50a83fc4e91d6f06864b8089099b15c0e49c587 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_3_self_attn_o_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85f109c99b6772a1e109af05082a1ae47a7913106c720c2c53abb50d0cafbbcf +size 2359296 diff --git a/deepseek-r1-1.5b-packed/model_layers_3_self_attn_q_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_3_self_attn_q_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..12a90022c8521c3b7ca0b9c273dc1a34fa7bfa9e --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_3_self_attn_q_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f93826f191766743d7070d498832af10a3f28fd35f190eb9bdd1940a4494912b +size 294912 diff --git a/deepseek-r1-1.5b-packed/model_layers_3_self_attn_v_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_3_self_attn_v_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..9ebc6c16351cca927233585491892aaddc33ef71 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_3_self_attn_v_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a7301373e76dbdb32f7201df385d495b5fe29912745259bbbcd24b52f5cc1c5 +size 393216 diff --git a/deepseek-r1-1.5b-packed/model_layers_4_mlp_gate_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_4_mlp_gate_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..33d508a2e176f3794e4e10cec9fbf87f01f53401 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_4_mlp_gate_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08adb4c2e2839b8a1d2bd41b09f8942d12138884a96dc849ad3ffa3c21797b11 +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_4_mlp_gate_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_4_mlp_gate_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..85f1804740d2fa63cb5919cfc7c5f201e6f71db6 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_4_mlp_gate_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74526432e6daa0d6f46fcb15e7a06a0dce50ab8453df5aade0383b27da9b1eaa +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_4_mlp_up_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_4_mlp_up_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..6795711f3ea885223a264750268da3ec1ddaf9f8 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_4_mlp_up_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d41ad79dfbf2dc7519fef7d0d30ec32cb18c1b1685deac755215d2fe733eadc +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_4_self_attn_o_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_4_self_attn_o_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..558d0f59e83c30ea332c99afd92639b69088df66 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_4_self_attn_o_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79d65e1005be102763e6523d8912e0032cd3491d84f8272957adfdf36debab27 +size 2359296 diff --git a/deepseek-r1-1.5b-packed/model_layers_4_self_attn_q_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_4_self_attn_q_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..3f246421fff3cc75099053ae98a32e072e430460 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_4_self_attn_q_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36d7220389e68baee6d6566fe4316c455f9c4882bbf15387f94c4a9af7796a58 +size 2359296 diff --git a/deepseek-r1-1.5b-packed/model_layers_4_self_attn_q_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_4_self_attn_q_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..b6d315417c3d50166d4b2fb277f44adaaee4f719 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_4_self_attn_q_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:480a23f93eb763fe243109aff55a64ab2e83432703a30608c4cc738f638604a2 +size 294912 diff --git a/deepseek-r1-1.5b-packed/model_layers_5_mlp_down_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_5_mlp_down_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..ba61332d675a10b73344d81dfdb7044046d6bee1 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_5_mlp_down_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:934dbc8868942978982842a44ea67ec0afa6e367c380a135e52925b36b1b5d9f +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_5_mlp_down_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_5_mlp_down_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..d4f11fd2cb034cd562044d6dfc4cd58339429c0c --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_5_mlp_down_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c45fbbe8647edf44b099797b2180f6358914ae23627fb690fcbb0d4e0073dc16 +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_5_mlp_gate_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_5_mlp_gate_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..1c8ff86b96ca2a578875ea4c2897129ce1305f75 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_5_mlp_gate_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e98f67109c36e2cf33be574333159e9bafc62ab847fb53ac21e78e61a6a1513 +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_5_mlp_gate_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_5_mlp_gate_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..5b737a8d28c18cbcc1b29da860cdea1929072482 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_5_mlp_gate_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a68e7336430778e9733edc4b4ad39c93bc9cd99b62f04a7ec57c10996cf289d +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_5_mlp_up_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_5_mlp_up_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..2b83049a6bcf2bef96c73f21549f241c75c5b80a --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_5_mlp_up_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c20790dbd8bffbb8a189bd894d7d9810d4e3a790466265c64ff8215eaf05cf7 +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_5_self_attn_k_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_5_self_attn_k_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..e1b46a09e4bcde8cddbbdf24c4547ea6191f6569 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_5_self_attn_k_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:282f62abffc52f7eb5ff2be6f5580973d4d499b200c5a953f2cb58b75f220fc3 +size 393216 diff --git a/deepseek-r1-1.5b-packed/model_layers_5_self_attn_o_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_5_self_attn_o_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..953e4b2872f8fa5511d2888a5c53fee1ae73a444 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_5_self_attn_o_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86c946c69a7633288db0051017203fcf61325dea3060c181ea2d8caf5ae2d3fb +size 2359296 diff --git a/deepseek-r1-1.5b-packed/model_layers_6_mlp_down_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_6_mlp_down_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..bec0c227405a5f45ff74221c48a1cc6c47596508 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_6_mlp_down_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c01aee0d87063f9854153149a8f9bf88179cc4fe7a577ed2bae2165feaef8a9f +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_6_mlp_down_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_6_mlp_down_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..7f6535489ceff0c44c943fe6782be067f31729ad --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_6_mlp_down_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:946ab4cebd19429bf274a62cfe4903c99103ffe90478a708f97c589a71e5b102 +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_6_mlp_gate_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_6_mlp_gate_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..272abdca64b20ade65524dd024e0ad6f2a013ece --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_6_mlp_gate_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e180a4ae9cc25c2421f55b4ebdd4c71efaab5c26d7972b102be41ae44202f339 +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_6_mlp_gate_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_6_mlp_gate_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..9cdee53a2f1714ba63faf4d5d106c486b0b05943 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_6_mlp_gate_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1940e1c5c0ec03694254094fffaa692ef5927a3f3e47872358d279dc42d539b9 +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_6_mlp_up_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_6_mlp_up_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..126b6ea10867d50a1cf9884ff0124f22d00da762 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_6_mlp_up_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a417890ac35239d5d049441cd278bc352031fe6da1ded58c7ba88597eb8898c +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_6_mlp_up_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_6_mlp_up_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..4efa1ba0a0818418d8100943433e79f8532e40ea --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_6_mlp_up_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37d33709c502641f073aa35f245dbf6d0cf1476003e40b205adf392cc2850e22 +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_6_self_attn_o_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_6_self_attn_o_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..53135a48d5dac4407be8508b0a0afa5b535928e2 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_6_self_attn_o_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:207880b787544e01dfad4de096460966f5437e40b8e5f117fafc97ed7a8f77dd +size 2359296 diff --git a/deepseek-r1-1.5b-packed/model_layers_6_self_attn_o_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_6_self_attn_o_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..72c05f28798691e9f47a84ce0ad076c4e1cd45a3 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_6_self_attn_o_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac08f9f1d20b448a545ee5b3d4ba5417536eed3a1f9ab700c5b9ddf5164afd81 +size 294912 diff --git a/deepseek-r1-1.5b-packed/model_layers_6_self_attn_q_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_6_self_attn_q_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..46d6420a9430d1b062d396a0c5cb1b5957080532 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_6_self_attn_q_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9407b2be695b2fc56693a989ceb16834f017fa040b0ac7e91b5891a89acfbd7 +size 2359296 diff --git a/deepseek-r1-1.5b-packed/model_layers_6_self_attn_q_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_6_self_attn_q_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..dd0f784b73d04762f550a4cfee4abb8d767eb103 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_6_self_attn_q_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57d01e4ee171edbf32521fb4fb4600773ab6652c60971be5f497e97e41f7e911 +size 294912 diff --git a/deepseek-r1-1.5b-packed/model_layers_6_self_attn_v_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_6_self_attn_v_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..f4fda4e8b547187c460e30fcb762117557b014d7 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_6_self_attn_v_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2070ae7d32df2e2d5dba7fb0ce78c82dc510159c0a8338dde6aa0a931f30d34 +size 393216 diff --git a/deepseek-r1-1.5b-packed/model_layers_7_mlp_down_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_7_mlp_down_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..f2bc88d3ecc168172d9815944b90a249f1f3a15c --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_7_mlp_down_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:087599244f33e6fd7a31116288b6b30df58d20c8976398a921c514a79c77a640 +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_7_mlp_gate_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_7_mlp_gate_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..b1d0f1179083c2195b48d27882237ea6113ac59c --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_7_mlp_gate_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c88a2e6e9a0e0e0789d49155e2fc65f8aaa26db5ccf88ccd9d6a3d700a90319 +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_7_mlp_up_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_7_mlp_up_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..af05444b368793368db939e4e67d77a9a8af0d3f --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_7_mlp_up_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25cf76554e2555174bc8ffcd13dccd0a36ab2de593a8b4e2e25e548f88dfc044 +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_7_self_attn_k_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_7_self_attn_k_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..017d33393099f1c1c9530d04ff1c610ca7f57790 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_7_self_attn_k_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c011103da141ff9516c992ab4dd5b9b13d0d507be97c4714311c62dd7faac3d +size 393216 diff --git a/deepseek-r1-1.5b-packed/model_layers_7_self_attn_o_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_7_self_attn_o_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..aded4023cba036eca0ecd82bf3aaa624cea0a720 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_7_self_attn_o_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa1e0801032006a240e34b0e33090b88eb2685815df23fe0aa880a76d5b3c0fc +size 2359296 diff --git a/deepseek-r1-1.5b-packed/model_layers_7_self_attn_o_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_7_self_attn_o_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..4d6433dbbb230ba3c9bb6fe2ad65ebe996566e12 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_7_self_attn_o_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4b8961d1cbe2020f8bae5485e7b78fbb1f950e54ae311abc242bc26fe868554 +size 294912 diff --git a/deepseek-r1-1.5b-packed/model_layers_7_self_attn_q_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_7_self_attn_q_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..7a1dcdb625148705a2a21c0cd7220f963de170e7 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_7_self_attn_q_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:243a84945cef5a58cdcee12513312f65571425c7afe8a72a83845fa599e20a4b +size 2359296 diff --git a/deepseek-r1-1.5b-packed/model_layers_7_self_attn_q_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_7_self_attn_q_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..e4317211761b1e83b5112d19e59467e8a51bd1c3 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_7_self_attn_q_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f0f5174ef1e027e26b7f72c9decec1e1cd168ee0abdcede355593c9aa2d0955 +size 294912 diff --git a/deepseek-r1-1.5b-packed/model_layers_8_mlp_down_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_8_mlp_down_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..5d5022b9a01f5bcae7fe698e6ca2909f00650dbb --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_8_mlp_down_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6f24414e4afc9611a2f38800fa26723f5c774be244dac3330acefc141cba18 +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_8_mlp_gate_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_8_mlp_gate_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..1340a1447f132fca9c0a97810416590739d77829 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_8_mlp_gate_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b147df9cdc62e23e654a6d42c51e2c49222a600a0e1755db94b087bd17c3f00a +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_8_mlp_up_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_8_mlp_up_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..6cd856e61b79e2b67a3acccacd108895063741c3 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_8_mlp_up_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3f6aa79044e0dc8b68a383cbbe079719694dd1d5d0cdc3b5b41a12fb68288dc +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_8_self_attn_k_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_8_self_attn_k_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..11024896e3fd4043b517db6e5d4957092aa3f96d --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_8_self_attn_k_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f666a8cd5f2a199e54e260146db742cea1ccc6515de6f37f06fc621bc9dd83ec +size 393216 diff --git a/deepseek-r1-1.5b-packed/model_layers_8_self_attn_o_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_8_self_attn_o_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..5023e04bd8772f32f50dea6d4d557611d56eab7d --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_8_self_attn_o_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c7d8bef278e22dd78993b0ab74e7b3fdb2cc5bc89d5a0721c6f0e309f9ec295 +size 2359296 diff --git a/deepseek-r1-1.5b-packed/model_layers_8_self_attn_q_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_8_self_attn_q_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..a585069c982ec6830f80482e7004cbd25ee16f16 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_8_self_attn_q_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0ab3edffd1314453ca9dbf8f4c4dcc8ce603ae6691e2997b71f8d7c2596c963 +size 294912 diff --git a/deepseek-r1-1.5b-packed/model_layers_9_mlp_down_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_9_mlp_down_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..11ff30296b2332768a869e1ba8ff41af6d11a781 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_9_mlp_down_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dea54a861924206217a59dd2d04386ba1e3acd1928542dfc1c0fa543c9bdf97 +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_9_mlp_down_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_9_mlp_down_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..8ee4ee400036ceeb2424a22029a7b85384701bde --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_9_mlp_down_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:183f4dc7071f39c281ae75ebc315a3d78341fd143e2af80c38726e17df818d42 +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_9_mlp_gate_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_9_mlp_gate_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..477fc46e16ee8742c68354ab6c386a925b318497 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_9_mlp_gate_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b6a33cdf35656c5f2b742176e728ec97ec97621294cdb56f226808a3906b27c +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_9_mlp_gate_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_9_mlp_gate_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..0ed617cb33adfad64ba79a154492eb4216bc6010 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_9_mlp_gate_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18c32a9787b099b2afbd50f74d79759336805cffc85ec9ff2137042c5e98c88e +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_9_mlp_up_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_9_mlp_up_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..ebd0acce9533b9263fbeffe3d4f52562af44e755 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_9_mlp_up_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efdac7fb1e83b0711510d2f234e69db2ccae3f9c71424096a208cb622b1d9472 +size 13762560 diff --git a/deepseek-r1-1.5b-packed/model_layers_9_mlp_up_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_9_mlp_up_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..6b23d7902cb40d7d8ea83d221cd2894883a95d6a --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_9_mlp_up_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f125038add4d7f7ba79af7fd1067d156c54caa304de922e0b527a588c8446db +size 1720320 diff --git a/deepseek-r1-1.5b-packed/model_layers_9_self_attn_k_proj_weight.mags b/deepseek-r1-1.5b-packed/model_layers_9_self_attn_k_proj_weight.mags new file mode 100644 index 0000000000000000000000000000000000000000..4d534baecdb26f85b3bdd5cd2e95f81a2589bf95 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_9_self_attn_k_proj_weight.mags @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38893ab7b4d0e064b57c167e0a35745660b31c7f196bbe030decdda29c5ad48c +size 393216 diff --git a/deepseek-r1-1.5b-packed/model_layers_9_self_attn_o_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_9_self_attn_o_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..7779f95a229a2f7df437f13048d611beca13a501 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_9_self_attn_o_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:086eb2f7afe654b0dc8994b401a412e911b5a95214c23afcb78fca520086e017 +size 294912 diff --git a/deepseek-r1-1.5b-packed/model_layers_9_self_attn_q_proj_weight.signs b/deepseek-r1-1.5b-packed/model_layers_9_self_attn_q_proj_weight.signs new file mode 100644 index 0000000000000000000000000000000000000000..df93509d64067043b6f37b652ff5aae9ef8f6ca9 --- /dev/null +++ b/deepseek-r1-1.5b-packed/model_layers_9_self_attn_q_proj_weight.signs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46b073e52123219e49d4c0d15a9a4f5f075c5d9d8d6171ef8e0bb462e030f00a +size 294912 diff --git a/deepseek-r1-1.5b-ternary/model_layers_0_mlp_gate_proj_weight.pos b/deepseek-r1-1.5b-ternary/model_layers_0_mlp_gate_proj_weight.pos new file mode 100644 index 0000000000000000000000000000000000000000..8cba03c8e0fb60f9cc34e2a9bfd4219adfc1d1dd --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_0_mlp_gate_proj_weight.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1007e1529a7d8a3d6076b02dd7c526f92612a0ba1fc90f751876a89452df3bd +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_12_mlp_up_proj_weight.pos b/deepseek-r1-1.5b-ternary/model_layers_12_mlp_up_proj_weight.pos new file mode 100644 index 0000000000000000000000000000000000000000..b490f4c97eedb35f0278a4ce65c37a77c53b812c --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_12_mlp_up_proj_weight.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c02572444a3d9ef22024cfce3466aeb076148098982bf460c3198567eb28a8b0 +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_15_mlp_gate_proj_weight.neg b/deepseek-r1-1.5b-ternary/model_layers_15_mlp_gate_proj_weight.neg new file mode 100644 index 0000000000000000000000000000000000000000..e6cabf354ac956256e64affda9dfc410822eeaee --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_15_mlp_gate_proj_weight.neg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:781899f30c8b8b5bb03c797c681f90b1d8fdfd0029cbc07fd89ea258ea28435d +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_15_self_attn_o_proj_weight.pos b/deepseek-r1-1.5b-ternary/model_layers_15_self_attn_o_proj_weight.pos new file mode 100644 index 0000000000000000000000000000000000000000..eb56e7faf433b82de833187de2b3f79acf9aa5ac --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_15_self_attn_o_proj_weight.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:421d7cbc0421e87be083a7c868a24ca1e6dc7e703012b2fe646a729f6c7fffac +size 294912 diff --git a/deepseek-r1-1.5b-ternary/model_layers_16_self_attn_q_proj_weight.pos b/deepseek-r1-1.5b-ternary/model_layers_16_self_attn_q_proj_weight.pos new file mode 100644 index 0000000000000000000000000000000000000000..0346289b954386924485ae37d0b05a09ece21df6 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_16_self_attn_q_proj_weight.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f09a5e84a08a8dccfff0270937260cd67a003264d0b424e8cdad4e347125876f +size 294912 diff --git a/deepseek-r1-1.5b-ternary/model_layers_1_mlp_down_proj_weight.neg b/deepseek-r1-1.5b-ternary/model_layers_1_mlp_down_proj_weight.neg new file mode 100644 index 0000000000000000000000000000000000000000..2e415465249ec92147d409b9fd93e6027f41fd59 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_1_mlp_down_proj_weight.neg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:825a5d86b535c5c0d9c0eac49c6729890278049cdf479c12f368b37a2ad2f47a +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_1_mlp_gate_proj_weight.pos b/deepseek-r1-1.5b-ternary/model_layers_1_mlp_gate_proj_weight.pos new file mode 100644 index 0000000000000000000000000000000000000000..0cae83e8a239aa3a10af2ca8a0a79899551f17f8 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_1_mlp_gate_proj_weight.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa845cba2fb0ee7adc78e1121d4dbbd98b1c5d9199789724644e2434212595cf +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_20_self_attn_o_proj_weight.neg b/deepseek-r1-1.5b-ternary/model_layers_20_self_attn_o_proj_weight.neg new file mode 100644 index 0000000000000000000000000000000000000000..a26a5a6643f3dcd93f57d747bde2098d4b4d883d --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_20_self_attn_o_proj_weight.neg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:734fbec963028ec265c082057534a302c6a1f7eb34ff304956d50e8baabe2a08 +size 294912 diff --git a/deepseek-r1-1.5b-ternary/model_layers_22_mlp_down_proj_weight.pos b/deepseek-r1-1.5b-ternary/model_layers_22_mlp_down_proj_weight.pos new file mode 100644 index 0000000000000000000000000000000000000000..77b63df0985aa9c70607ded6822a3fa1f2e9efc9 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_22_mlp_down_proj_weight.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c051d0691eea8778ae7cff442c5ec49a3564b5a8cbad091f5e159b7555c7d4e8 +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_22_mlp_gate_proj_weight.neg b/deepseek-r1-1.5b-ternary/model_layers_22_mlp_gate_proj_weight.neg new file mode 100644 index 0000000000000000000000000000000000000000..022542324dfbaa00dae078eb11a7c496e487bf78 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_22_mlp_gate_proj_weight.neg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b76d24daecec9ae03796f82d23ac92075b0fc35557975c7450a0056e5c6cdb2 +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_23_mlp_up_proj_weight.neg b/deepseek-r1-1.5b-ternary/model_layers_23_mlp_up_proj_weight.neg new file mode 100644 index 0000000000000000000000000000000000000000..820c3cbbd7c14c03d4f10dab70725595648d1d19 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_23_mlp_up_proj_weight.neg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7357db7ead54e5a81d639544945f1cae403e5761246ee6d2cdd7f19864f9ae0f +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_3_self_attn_q_proj_weight.neg b/deepseek-r1-1.5b-ternary/model_layers_3_self_attn_q_proj_weight.neg new file mode 100644 index 0000000000000000000000000000000000000000..6daf212196d82874bdbdc9c5e8575cdbd7693cda --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_3_self_attn_q_proj_weight.neg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1a23e18f008e545319f7fae5f8e114d2f5efb7ec65c2c57d453bcd80a195c96 +size 294912 diff --git a/deepseek-r1-1.5b-ternary/model_layers_4_mlp_gate_proj_weight.neg b/deepseek-r1-1.5b-ternary/model_layers_4_mlp_gate_proj_weight.neg new file mode 100644 index 0000000000000000000000000000000000000000..dd4d5e122fc063f618166b57eb390edf1a40f8a3 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_4_mlp_gate_proj_weight.neg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0813c3bec10ea7af05096b4910f29d7ff159c772cd90ade22a738f02c077802c +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_4_mlp_up_proj_weight.neg b/deepseek-r1-1.5b-ternary/model_layers_4_mlp_up_proj_weight.neg new file mode 100644 index 0000000000000000000000000000000000000000..d5bb1d6a21d3d970fce528bcb0792010033656be --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_4_mlp_up_proj_weight.neg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbe805990ee4259bda51ead06cfa109d7716c139c204906b91898408f75b647a +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_6_mlp_gate_proj_weight.neg b/deepseek-r1-1.5b-ternary/model_layers_6_mlp_gate_proj_weight.neg new file mode 100644 index 0000000000000000000000000000000000000000..64c10f29d8b34f4ca1e3771e841b1358c5c666c4 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_6_mlp_gate_proj_weight.neg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c4aefbe67daba25eb80f5b7876e5eecd56d34432ab34cdd5a8e95ded4e58aa0 +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_6_mlp_gate_proj_weight.pos b/deepseek-r1-1.5b-ternary/model_layers_6_mlp_gate_proj_weight.pos new file mode 100644 index 0000000000000000000000000000000000000000..d9508ef1d8e0fe0cfa7035dce570e008c330a5b1 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_6_mlp_gate_proj_weight.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1d9ea1cd530bcbfde978ec6c754516b75db2996098da7a9e8960124137c89e3 +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_6_self_attn_q_proj_weight.neg b/deepseek-r1-1.5b-ternary/model_layers_6_self_attn_q_proj_weight.neg new file mode 100644 index 0000000000000000000000000000000000000000..854a6609fa8fa7b929131bd64764c40c8daa333c --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_6_self_attn_q_proj_weight.neg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a974db9bcef4a5ad6522b4d4d73c41773882be2442c3498094682204aadfcfcf +size 294912 diff --git a/deepseek-r1-1.5b-ternary/model_layers_9_mlp_down_proj_weight.pos b/deepseek-r1-1.5b-ternary/model_layers_9_mlp_down_proj_weight.pos new file mode 100644 index 0000000000000000000000000000000000000000..fc17ccd34446548816d9a6e57b34a2744ecd9a9e --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_9_mlp_down_proj_weight.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bae2d91c47a5fbf78d4c018edb9420c18dc14e506914bf03bb8a879a2c95c7aa +size 1720320 diff --git a/deepseek-r1-1.5b-ternary/model_layers_9_self_attn_o_proj_weight.pos b/deepseek-r1-1.5b-ternary/model_layers_9_self_attn_o_proj_weight.pos new file mode 100644 index 0000000000000000000000000000000000000000..a085d0c6560e3760c16ce20a89bb694512021565 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_9_self_attn_o_proj_weight.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:111a6a5c812ba927e98b985e063450bd9382f409ba2b4c5c6451df7e37bd377f +size 294912 diff --git a/deepseek-r1-1.5b-ternary/model_layers_9_self_attn_q_proj_weight.pos b/deepseek-r1-1.5b-ternary/model_layers_9_self_attn_q_proj_weight.pos new file mode 100644 index 0000000000000000000000000000000000000000..fd902ced018a1caaff52eb91b15f1cbb82cf5e95 --- /dev/null +++ b/deepseek-r1-1.5b-ternary/model_layers_9_self_attn_q_proj_weight.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a19297812081c8faf46a956e7235573737fd5d36dddaf5ae56e6772d0576a9bc +size 294912 diff --git a/qwen3-4b-proper-unary/config.json b/qwen3-4b-proper-unary/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6988f134db143052042f2bd6e0c897bc6a605189 --- /dev/null +++ b/qwen3-4b-proper-unary/config.json @@ -0,0 +1,30 @@ +{ + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 151643, + "eos_token_id": 151645, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "max_position_embeddings": 262144, + "max_window_layers": 36, + "model_type": "qwen3", + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 5000000, + "sliding_window": null, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.51.0", + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/qwen3-4b-proper-unary/generation_config.json b/qwen3-4b-proper-unary/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20a8a9156fc8c3f25295ca067f61fdf120d517c5 --- /dev/null +++ b/qwen3-4b-proper-unary/generation_config.json @@ -0,0 +1,13 @@ +{ + "bos_token_id": 151643, + "do_sample": true, + "eos_token_id": [ + 151645, + 151643 + ], + "pad_token_id": 151643, + "temperature": 0.6, + "top_k": 20, + "top_p": 0.95, + "transformers_version": "4.51.0" +} \ No newline at end of file diff --git a/qwen3-4b-proper-unary/model_layers_0_input_layernorm_weight.fp16 b/qwen3-4b-proper-unary/model_layers_0_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..7d6a6d361decbfdbfad4cb43a425cca8a93133a7 Binary files /dev/null and b/qwen3-4b-proper-unary/model_layers_0_input_layernorm_weight.fp16 differ diff --git a/qwen3-4b-proper-unary/model_layers_0_mlp_down_proj_weight.scales b/qwen3-4b-proper-unary/model_layers_0_mlp_down_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..7d58534345e822204c977f49c685283d413a0998 Binary files /dev/null and b/qwen3-4b-proper-unary/model_layers_0_mlp_down_proj_weight.scales differ diff --git a/qwen3-4b-proper-unary/model_layers_0_mlp_gate_proj_weight.scales b/qwen3-4b-proper-unary/model_layers_0_mlp_gate_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..a2b8faf70bf05aa3ec9daa63a2a62bf545af5a1d Binary files /dev/null and b/qwen3-4b-proper-unary/model_layers_0_mlp_gate_proj_weight.scales differ diff --git a/qwen3-4b-proper-unary/model_layers_0_self_attn_k_norm_weight.fp16 b/qwen3-4b-proper-unary/model_layers_0_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..2d1fc93585fd0cfe51bd59489d07bb0f8a46476f Binary files /dev/null and b/qwen3-4b-proper-unary/model_layers_0_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-proper-unary/model_layers_0_self_attn_o_proj_weight.scales b/qwen3-4b-proper-unary/model_layers_0_self_attn_o_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..370705ffe895fa678f23721b6dcf04f8e8290a23 Binary files /dev/null and b/qwen3-4b-proper-unary/model_layers_0_self_attn_o_proj_weight.scales differ diff --git a/qwen3-4b-proper-unary/model_layers_0_self_attn_q_norm_weight.fp16 b/qwen3-4b-proper-unary/model_layers_0_self_attn_q_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..815785e2efaaa8396aa888f46c78ffea3446d44d Binary files /dev/null and b/qwen3-4b-proper-unary/model_layers_0_self_attn_q_norm_weight.fp16 differ diff --git a/qwen3-4b-proper-unary/model_layers_0_self_attn_q_proj_weight.scales b/qwen3-4b-proper-unary/model_layers_0_self_attn_q_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..9a0acdc92205363756982f6444556d99d845c85a Binary files /dev/null and b/qwen3-4b-proper-unary/model_layers_0_self_attn_q_proj_weight.scales differ diff --git a/qwen3-4b-proper-unary/model_layers_0_self_attn_v_proj_weight.scales b/qwen3-4b-proper-unary/model_layers_0_self_attn_v_proj_weight.scales new file mode 100644 index 0000000000000000000000000000000000000000..11cc3f4aa1e10355a7107502a0170e01ce7ed09b Binary files /dev/null and b/qwen3-4b-proper-unary/model_layers_0_self_attn_v_proj_weight.scales differ diff --git a/qwen3-4b-proper-unary/model_layers_10_input_layernorm_weight.fp16 b/qwen3-4b-proper-unary/model_layers_10_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..6a8f990803324ace40d5f252584aa5f8cf227886 Binary files /dev/null and b/qwen3-4b-proper-unary/model_layers_10_input_layernorm_weight.fp16 differ diff --git a/qwen3-4b-proper-unary/model_layers_10_post_attention_layernorm_weight.fp16 b/qwen3-4b-proper-unary/model_layers_10_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..02758bc568552a8c6ec08aa9dc432d6711a79e35 Binary files /dev/null and b/qwen3-4b-proper-unary/model_layers_10_post_attention_layernorm_weight.fp16 differ diff --git a/qwen3-4b-proper-unary/model_layers_10_self_attn_k_norm_weight.fp16 b/qwen3-4b-proper-unary/model_layers_10_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..3213b380a0bef9d633e8f69535016a9da6361927 --- /dev/null +++ b/qwen3-4b-proper-unary/model_layers_10_self_attn_k_norm_weight.fp16 @@ -0,0 +1 @@ +9;=88x=9X==8===(>P:8=>P>0x==p>0>8(>h>H>=p@@><==XBH=H>8>x9@>< >X>8>HB >>x=@<;HCH8=>?@?B`?@H>Ax@@C >H>p<2h<=6?=X=:===h===H>>>X>x>-@?>h>@P>h@>>>h)>>P@<@@<.@P?=>==H@8=@>?>hB \ No newline at end of file diff --git a/qwen3-4b-proper-unary/model_layers_11_post_attention_layernorm_weight.fp16 b/qwen3-4b-proper-unary/model_layers_11_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..dbf915a35ef7d2e1bd2a93fcf15751d4343f01f6 Binary files /dev/null and b/qwen3-4b-proper-unary/model_layers_11_post_attention_layernorm_weight.fp16 differ diff --git a/qwen3-4b-proper-unary/model_layers_11_self_attn_k_norm_weight.fp16 b/qwen3-4b-proper-unary/model_layers_11_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..543283bf4e4b671bc550d9dc89312b71c4f26212 Binary files /dev/null and b/qwen3-4b-proper-unary/model_layers_11_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-proper-unary/model_layers_11_self_attn_q_norm_weight.fp16 b/qwen3-4b-proper-unary/model_layers_11_self_attn_q_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..c4693d2251b5ca4d237704c45d22f5a620ec1e48 Binary files /dev/null and b/qwen3-4b-proper-unary/model_layers_11_self_attn_q_norm_weight.fp16 differ diff --git a/qwen3-4b-proper-unary/model_layers_12_input_layernorm_weight.fp16 b/qwen3-4b-proper-unary/model_layers_12_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..08ff3d995132b6e1f1780acbec091d84fbc3457c Binary files /dev/null and b/qwen3-4b-proper-unary/model_layers_12_input_layernorm_weight.fp16 differ diff --git a/qwen3-4b-proper-unary/model_layers_12_self_attn_k_norm_weight.fp16 b/qwen3-4b-proper-unary/model_layers_12_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..0883ab99614ba954f1fb5bc15b471437c5f1891e Binary files /dev/null and b/qwen3-4b-proper-unary/model_layers_12_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-proper-unary/model_layers_12_self_attn_q_norm_weight.fp16 b/qwen3-4b-proper-unary/model_layers_12_self_attn_q_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..504ad300474a7a2ec467cabc7699bdd8e6f8e8e3 Binary files /dev/null and b/qwen3-4b-proper-unary/model_layers_12_self_attn_q_norm_weight.fp16 differ diff --git a/qwen3-4b-proper-unary/model_layers_13_input_layernorm_weight.fp16 b/qwen3-4b-proper-unary/model_layers_13_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..3d9eab9bef0050afccb6912b293ac2ea05c0617f Binary files /dev/null and b/qwen3-4b-proper-unary/model_layers_13_input_layernorm_weight.fp16 differ diff --git a/qwen3-4b-proper-unary/model_layers_15_self_attn_q_norm_weight.fp16 b/qwen3-4b-proper-unary/model_layers_15_self_attn_q_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..724ad34147a4055d8b0ca3ccae35af69e72fa596 Binary files /dev/null and b/qwen3-4b-proper-unary/model_layers_15_self_attn_q_norm_weight.fp16 differ diff --git a/qwen3-4b-proper-unary/model_layers_1_self_attn_k_norm_weight.fp16 b/qwen3-4b-proper-unary/model_layers_1_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..9df669eb5abd260b22441ce967342a4cb6917fca Binary files /dev/null and b/qwen3-4b-proper-unary/model_layers_1_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-proper-unary/model_layers_2_post_attention_layernorm_weight.fp16 b/qwen3-4b-proper-unary/model_layers_2_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..caa6ce385b5f79576ba426ddab05bc1c46aa5519 Binary files /dev/null and b/qwen3-4b-proper-unary/model_layers_2_post_attention_layernorm_weight.fp16 differ diff --git a/qwen3-4b-proper-unary/model_layers_2_self_attn_k_norm_weight.fp16 b/qwen3-4b-proper-unary/model_layers_2_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..3c09b1c5dafea8caa581b98fa2cb6aa2a5d79386 Binary files /dev/null and b/qwen3-4b-proper-unary/model_layers_2_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-proper-unary/model_layers_3_input_layernorm_weight.fp16 b/qwen3-4b-proper-unary/model_layers_3_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..324818264e751c5ef0d6e5390922aaab936a3da5 Binary files /dev/null and b/qwen3-4b-proper-unary/model_layers_3_input_layernorm_weight.fp16 differ diff --git a/qwen3-4b-proper-unary/model_layers_3_self_attn_k_norm_weight.fp16 b/qwen3-4b-proper-unary/model_layers_3_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..a3f7e671929e7c73c9c272d716babd3123cf2439 Binary files /dev/null and b/qwen3-4b-proper-unary/model_layers_3_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-proper-unary/model_layers_4_post_attention_layernorm_weight.fp16 b/qwen3-4b-proper-unary/model_layers_4_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..3ddb49073503a9a10cadaeee84d58bb698f337a0 Binary files /dev/null and b/qwen3-4b-proper-unary/model_layers_4_post_attention_layernorm_weight.fp16 differ diff --git a/qwen3-4b-proper-unary/model_layers_4_self_attn_q_norm_weight.fp16 b/qwen3-4b-proper-unary/model_layers_4_self_attn_q_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..6dc554e676a6c463ac15e3e6f728f548ca028463 Binary files /dev/null and b/qwen3-4b-proper-unary/model_layers_4_self_attn_q_norm_weight.fp16 differ diff --git a/qwen3-4b-proper-unary/model_layers_5_input_layernorm_weight.fp16 b/qwen3-4b-proper-unary/model_layers_5_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..6aba1869e49e06ab5c6a49f3d47fd2db4e9f1900 Binary files /dev/null and b/qwen3-4b-proper-unary/model_layers_5_input_layernorm_weight.fp16 differ diff --git a/qwen3-4b-proper-unary/model_layers_5_self_attn_k_norm_weight.fp16 b/qwen3-4b-proper-unary/model_layers_5_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..f8727032d79fea0119c084cd0f28458fea2abedb Binary files /dev/null and b/qwen3-4b-proper-unary/model_layers_5_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-proper-unary/model_layers_5_self_attn_q_norm_weight.fp16 b/qwen3-4b-proper-unary/model_layers_5_self_attn_q_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..5df43c454ab4fe0fef5a3038d5ca059c2a1d9482 Binary files /dev/null and b/qwen3-4b-proper-unary/model_layers_5_self_attn_q_norm_weight.fp16 differ diff --git a/qwen3-4b-proper-unary/model_layers_6_input_layernorm_weight.fp16 b/qwen3-4b-proper-unary/model_layers_6_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..db1a79d6fa9a1d7b2b8328cdc10d034daa59eb48 Binary files /dev/null and b/qwen3-4b-proper-unary/model_layers_6_input_layernorm_weight.fp16 differ diff --git a/qwen3-4b-proper-unary/model_layers_6_post_attention_layernorm_weight.fp16 b/qwen3-4b-proper-unary/model_layers_6_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..78e9320f1563cdfba9707bf290791cde80efe099 Binary files /dev/null and b/qwen3-4b-proper-unary/model_layers_6_post_attention_layernorm_weight.fp16 differ diff --git a/qwen3-4b-proper-unary/model_layers_6_self_attn_k_norm_weight.fp16 b/qwen3-4b-proper-unary/model_layers_6_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..24727c97db0293939843aff5dec7cc98e0d500c1 Binary files /dev/null and b/qwen3-4b-proper-unary/model_layers_6_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-proper-unary/model_layers_6_self_attn_q_norm_weight.fp16 b/qwen3-4b-proper-unary/model_layers_6_self_attn_q_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..32ce55ab2fa665601bb3480d6394169cb0721a85 Binary files /dev/null and b/qwen3-4b-proper-unary/model_layers_6_self_attn_q_norm_weight.fp16 differ diff --git a/qwen3-4b-proper-unary/model_layers_7_self_attn_k_norm_weight.fp16 b/qwen3-4b-proper-unary/model_layers_7_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..4058b3fec31f47fecd9af500f4d4739294be11d9 Binary files /dev/null and b/qwen3-4b-proper-unary/model_layers_7_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-proper-unary/model_layers_7_self_attn_q_norm_weight.fp16 b/qwen3-4b-proper-unary/model_layers_7_self_attn_q_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..3b96c8c921f14b44298e503d46ee16392ab3e772 Binary files /dev/null and b/qwen3-4b-proper-unary/model_layers_7_self_attn_q_norm_weight.fp16 differ diff --git a/qwen3-4b-proper-unary/model_layers_8_input_layernorm_weight.fp16 b/qwen3-4b-proper-unary/model_layers_8_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..7dfd72b16dd478f920c223f4e937ec132848df58 Binary files /dev/null and b/qwen3-4b-proper-unary/model_layers_8_input_layernorm_weight.fp16 differ diff --git a/qwen3-4b-proper-unary/model_layers_8_self_attn_k_norm_weight.fp16 b/qwen3-4b-proper-unary/model_layers_8_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..128450801eff49c63339396743dfdb6262885a03 Binary files /dev/null and b/qwen3-4b-proper-unary/model_layers_8_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-proper-unary/model_layers_9_input_layernorm_weight.fp16 b/qwen3-4b-proper-unary/model_layers_9_input_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..937efdde2436741ea88c78e4e0b9ba38b1dd6fe0 Binary files /dev/null and b/qwen3-4b-proper-unary/model_layers_9_input_layernorm_weight.fp16 differ diff --git a/qwen3-4b-proper-unary/model_layers_9_post_attention_layernorm_weight.fp16 b/qwen3-4b-proper-unary/model_layers_9_post_attention_layernorm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..be863fa42560e40943b61288efae5f6caf3d9ac0 Binary files /dev/null and b/qwen3-4b-proper-unary/model_layers_9_post_attention_layernorm_weight.fp16 differ diff --git a/qwen3-4b-proper-unary/model_layers_9_self_attn_k_norm_weight.fp16 b/qwen3-4b-proper-unary/model_layers_9_self_attn_k_norm_weight.fp16 new file mode 100644 index 0000000000000000000000000000000000000000..ed6eb308122b8f6fb0a442d1865b87ded5ad94bf Binary files /dev/null and b/qwen3-4b-proper-unary/model_layers_9_self_attn_k_norm_weight.fp16 differ diff --git a/qwen3-4b-proper-unary/tokenizer_config.json b/qwen3-4b-proper-unary/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8dec7d58dda385ce95b469aa2d277ec162168e58 --- /dev/null +++ b/qwen3-4b-proper-unary/tokenizer_config.json @@ -0,0 +1,239 @@ +{ + "add_prefix_space": false, + "added_tokens_decoder": { + "151643": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151644": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151645": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151646": { + "content": "<|object_ref_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151647": { + "content": "<|object_ref_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151648": { + "content": "<|box_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151649": { + "content": "<|box_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151650": { + "content": "<|quad_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151651": { + "content": "<|quad_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151652": { + "content": "<|vision_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151653": { + "content": "<|vision_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151654": { + "content": "<|vision_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151655": { + "content": "<|image_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151656": { + "content": "<|video_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151657": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151658": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151659": { + "content": "<|fim_prefix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151660": { + "content": "<|fim_middle|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151661": { + "content": "<|fim_suffix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151662": { + "content": "<|fim_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151663": { + "content": "<|repo_name|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151664": { + "content": "<|file_sep|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151665": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151666": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151667": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151668": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "bos_token": null, + "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0].role == 'system' %}\n {{- messages[0].content + '\\n\\n' }}\n {%- endif %}\n {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within XML tags:\\n\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n\\n\\nFor each function call, return a json object with function name and arguments within XML tags:\\n\\n{\\\"name\\\": , \\\"arguments\\\": }\\n<|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0].role == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}\n{%- for message in messages[::-1] %}\n {%- set index = (messages|length - 1) - loop.index0 %}\n {%- if ns.multi_step_tool and message.role == \"user\" and message.content is string and not(message.content.startswith('') and message.content.endswith('')) %}\n {%- set ns.multi_step_tool = false %}\n {%- set ns.last_query_index = index %}\n {%- endif %}\n{%- endfor %}\n{%- for message in messages %}\n {%- if message.content is string %}\n {%- set content = message.content %}\n {%- else %}\n {%- set content = '' %}\n {%- endif %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {%- set reasoning_content = '' %}\n {%- if message.reasoning_content is string %}\n {%- set reasoning_content = message.reasoning_content %}\n {%- else %}\n {%- if '' in content %}\n {%- set reasoning_content = content.split('')[0].rstrip('\\n').split('')[-1].lstrip('\\n') %}\n {%- set content = content.split('')[-1].lstrip('\\n') %}\n {%- endif %}\n {%- endif %}\n {%- if loop.index0 > ns.last_query_index %}\n {%- if loop.last or (not loop.last and reasoning_content) %}\n {{- '<|im_start|>' + message.role + '\\n\\n' + reasoning_content.strip('\\n') + '\\n\\n\\n' + content.lstrip('\\n') }}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- if message.tool_calls %}\n {%- for tool_call in message.tool_calls %}\n {%- if (loop.first and content) or (not loop.first) %}\n {{- '\\n' }}\n {%- endif %}\n {%- if tool_call.function %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {%- if tool_call.arguments is string %}\n {{- tool_call.arguments }}\n {%- else %}\n {{- tool_call.arguments | tojson }}\n {%- endif %}\n {{- '}\\n' }}\n {%- endfor %}\n {%- endif %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n\\n' }}\n {{- content }}\n {{- '\\n' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n\\n' }}\n{%- endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 262144, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null, + "add_bos_token": false +} \ No newline at end of file