| { |
| "metadata": { |
| "total_size": 38575088320, |
| "total_parameters": 68562622464 |
| }, |
| "weight_map": { |
| "lm_head.biases": "model-00008-of-00008.safetensors", |
| "lm_head.scales": "model-00008-of-00008.safetensors", |
| "lm_head.weight": "model-00008-of-00008.safetensors", |
| "model.layers.0.input_layernorm.0.weight": "model-00005-of-00008.safetensors", |
| "model.layers.0.input_layernorm.1.weight": "model-00005-of-00008.safetensors", |
| "model.layers.0.mlp.router.classifier.biases": "model-00005-of-00008.safetensors", |
| "model.layers.0.mlp.router.classifier.scales": "model-00005-of-00008.safetensors", |
| "model.layers.0.mlp.router.classifier.weight": "model-00005-of-00008.safetensors", |
| "model.layers.0.mlp.router.e_score_correction_bias": "model-00005-of-00008.safetensors", |
| "model.layers.0.mlp.switch_mlp.down_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.0.mlp.switch_mlp.down_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.0.mlp.switch_mlp.down_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.0.mlp.switch_mlp.gate_proj.biases": "model-00004-of-00008.safetensors", |
| "model.layers.0.mlp.switch_mlp.gate_proj.scales": "model-00004-of-00008.safetensors", |
| "model.layers.0.mlp.switch_mlp.gate_proj.weight": "model-00004-of-00008.safetensors", |
| "model.layers.0.mlp.switch_mlp.up_proj.biases": "model-00004-of-00008.safetensors", |
| "model.layers.0.mlp.switch_mlp.up_proj.scales": "model-00004-of-00008.safetensors", |
| "model.layers.0.mlp.switch_mlp.up_proj.weight": "model-00004-of-00008.safetensors", |
| "model.layers.0.mlps.0.down_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.0.mlps.0.down_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.0.mlps.0.down_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.0.mlps.0.gate_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.0.mlps.0.gate_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.0.mlps.0.gate_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.0.mlps.0.up_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.0.mlps.0.up_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.0.mlps.0.up_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.0.mlps.1.down_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.0.mlps.1.down_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.0.mlps.1.down_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.0.mlps.1.gate_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.0.mlps.1.gate_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.0.mlps.1.gate_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.0.mlps.1.up_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.0.mlps.1.up_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.0.mlps.1.up_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.0.post_attention_layernorm.0.weight": "model-00005-of-00008.safetensors", |
| "model.layers.0.post_attention_layernorm.1.weight": "model-00005-of-00008.safetensors", |
| "model.layers.0.self_attn.0.embed_q.biases": "model-00005-of-00008.safetensors", |
| "model.layers.0.self_attn.0.embed_q.scales": "model-00005-of-00008.safetensors", |
| "model.layers.0.self_attn.0.embed_q.weight": "model-00005-of-00008.safetensors", |
| "model.layers.0.self_attn.0.kv_a_layernorm.weight": "model-00005-of-00008.safetensors", |
| "model.layers.0.self_attn.0.kv_a_proj_with_mqa.biases": "model-00005-of-00008.safetensors", |
| "model.layers.0.self_attn.0.kv_a_proj_with_mqa.scales": "model-00005-of-00008.safetensors", |
| "model.layers.0.self_attn.0.kv_a_proj_with_mqa.weight": "model-00005-of-00008.safetensors", |
| "model.layers.0.self_attn.0.o_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.0.self_attn.0.o_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.0.self_attn.0.o_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.0.self_attn.0.q_a_layernorm.weight": "model-00005-of-00008.safetensors", |
| "model.layers.0.self_attn.0.q_a_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.0.self_attn.0.q_a_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.0.self_attn.0.q_a_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.0.self_attn.0.q_b_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.0.self_attn.0.q_b_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.0.self_attn.0.q_b_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.0.self_attn.0.unembed_out.biases": "model-00005-of-00008.safetensors", |
| "model.layers.0.self_attn.0.unembed_out.scales": "model-00005-of-00008.safetensors", |
| "model.layers.0.self_attn.0.unembed_out.weight": "model-00005-of-00008.safetensors", |
| "model.layers.0.self_attn.1.embed_q.biases": "model-00005-of-00008.safetensors", |
| "model.layers.0.self_attn.1.embed_q.scales": "model-00005-of-00008.safetensors", |
| "model.layers.0.self_attn.1.embed_q.weight": "model-00005-of-00008.safetensors", |
| "model.layers.0.self_attn.1.kv_a_layernorm.weight": "model-00005-of-00008.safetensors", |
| "model.layers.0.self_attn.1.kv_a_proj_with_mqa.biases": "model-00005-of-00008.safetensors", |
| "model.layers.0.self_attn.1.kv_a_proj_with_mqa.scales": "model-00005-of-00008.safetensors", |
| "model.layers.0.self_attn.1.kv_a_proj_with_mqa.weight": "model-00005-of-00008.safetensors", |
| "model.layers.0.self_attn.1.o_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.0.self_attn.1.o_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.0.self_attn.1.o_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.0.self_attn.1.q_a_layernorm.weight": "model-00005-of-00008.safetensors", |
| "model.layers.0.self_attn.1.q_a_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.0.self_attn.1.q_a_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.0.self_attn.1.q_a_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.0.self_attn.1.q_b_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.0.self_attn.1.q_b_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.0.self_attn.1.q_b_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.0.self_attn.1.unembed_out.biases": "model-00005-of-00008.safetensors", |
| "model.layers.0.self_attn.1.unembed_out.scales": "model-00005-of-00008.safetensors", |
| "model.layers.0.self_attn.1.unembed_out.weight": "model-00005-of-00008.safetensors", |
| "model.layers.1.input_layernorm.0.weight": "model-00005-of-00008.safetensors", |
| "model.layers.1.input_layernorm.1.weight": "model-00005-of-00008.safetensors", |
| "model.layers.1.mlp.router.classifier.biases": "model-00005-of-00008.safetensors", |
| "model.layers.1.mlp.router.classifier.scales": "model-00005-of-00008.safetensors", |
| "model.layers.1.mlp.router.classifier.weight": "model-00005-of-00008.safetensors", |
| "model.layers.1.mlp.router.e_score_correction_bias": "model-00005-of-00008.safetensors", |
| "model.layers.1.mlp.switch_mlp.down_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.1.mlp.switch_mlp.down_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.1.mlp.switch_mlp.down_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.1.mlp.switch_mlp.gate_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.1.mlp.switch_mlp.gate_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.1.mlp.switch_mlp.gate_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.1.mlp.switch_mlp.up_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.1.mlp.switch_mlp.up_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.1.mlp.switch_mlp.up_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.1.mlps.0.down_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.1.mlps.0.down_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.1.mlps.0.down_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.1.mlps.0.gate_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.1.mlps.0.gate_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.1.mlps.0.gate_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.1.mlps.0.up_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.1.mlps.0.up_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.1.mlps.0.up_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.1.mlps.1.down_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.1.mlps.1.down_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.1.mlps.1.down_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.1.mlps.1.gate_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.1.mlps.1.gate_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.1.mlps.1.gate_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.1.mlps.1.up_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.1.mlps.1.up_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.1.mlps.1.up_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.1.post_attention_layernorm.0.weight": "model-00005-of-00008.safetensors", |
| "model.layers.1.post_attention_layernorm.1.weight": "model-00005-of-00008.safetensors", |
| "model.layers.1.self_attn.0.embed_q.biases": "model-00005-of-00008.safetensors", |
| "model.layers.1.self_attn.0.embed_q.scales": "model-00005-of-00008.safetensors", |
| "model.layers.1.self_attn.0.embed_q.weight": "model-00005-of-00008.safetensors", |
| "model.layers.1.self_attn.0.kv_a_layernorm.weight": "model-00005-of-00008.safetensors", |
| "model.layers.1.self_attn.0.kv_a_proj_with_mqa.biases": "model-00005-of-00008.safetensors", |
| "model.layers.1.self_attn.0.kv_a_proj_with_mqa.scales": "model-00005-of-00008.safetensors", |
| "model.layers.1.self_attn.0.kv_a_proj_with_mqa.weight": "model-00005-of-00008.safetensors", |
| "model.layers.1.self_attn.0.o_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.1.self_attn.0.o_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.1.self_attn.0.o_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.1.self_attn.0.q_a_layernorm.weight": "model-00005-of-00008.safetensors", |
| "model.layers.1.self_attn.0.q_a_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.1.self_attn.0.q_a_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.1.self_attn.0.q_a_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.1.self_attn.0.q_b_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.1.self_attn.0.q_b_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.1.self_attn.0.q_b_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.1.self_attn.0.unembed_out.biases": "model-00005-of-00008.safetensors", |
| "model.layers.1.self_attn.0.unembed_out.scales": "model-00005-of-00008.safetensors", |
| "model.layers.1.self_attn.0.unembed_out.weight": "model-00005-of-00008.safetensors", |
| "model.layers.1.self_attn.1.embed_q.biases": "model-00005-of-00008.safetensors", |
| "model.layers.1.self_attn.1.embed_q.scales": "model-00005-of-00008.safetensors", |
| "model.layers.1.self_attn.1.embed_q.weight": "model-00005-of-00008.safetensors", |
| "model.layers.1.self_attn.1.kv_a_layernorm.weight": "model-00005-of-00008.safetensors", |
| "model.layers.1.self_attn.1.kv_a_proj_with_mqa.biases": "model-00005-of-00008.safetensors", |
| "model.layers.1.self_attn.1.kv_a_proj_with_mqa.scales": "model-00005-of-00008.safetensors", |
| "model.layers.1.self_attn.1.kv_a_proj_with_mqa.weight": "model-00005-of-00008.safetensors", |
| "model.layers.1.self_attn.1.o_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.1.self_attn.1.o_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.1.self_attn.1.o_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.1.self_attn.1.q_a_layernorm.weight": "model-00005-of-00008.safetensors", |
| "model.layers.1.self_attn.1.q_a_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.1.self_attn.1.q_a_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.1.self_attn.1.q_a_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.1.self_attn.1.q_b_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.1.self_attn.1.q_b_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.1.self_attn.1.q_b_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.1.self_attn.1.unembed_out.biases": "model-00005-of-00008.safetensors", |
| "model.layers.1.self_attn.1.unembed_out.scales": "model-00005-of-00008.safetensors", |
| "model.layers.1.self_attn.1.unembed_out.weight": "model-00005-of-00008.safetensors", |
| "model.layers.10.input_layernorm.0.weight": "model-00007-of-00008.safetensors", |
| "model.layers.10.input_layernorm.1.weight": "model-00007-of-00008.safetensors", |
| "model.layers.10.mlp.router.classifier.biases": "model-00007-of-00008.safetensors", |
| "model.layers.10.mlp.router.classifier.scales": "model-00007-of-00008.safetensors", |
| "model.layers.10.mlp.router.classifier.weight": "model-00007-of-00008.safetensors", |
| "model.layers.10.mlp.router.e_score_correction_bias": "model-00007-of-00008.safetensors", |
| "model.layers.10.mlp.switch_mlp.down_proj.biases": "model-00007-of-00008.safetensors", |
| "model.layers.10.mlp.switch_mlp.down_proj.scales": "model-00007-of-00008.safetensors", |
| "model.layers.10.mlp.switch_mlp.down_proj.weight": "model-00007-of-00008.safetensors", |
| "model.layers.10.mlp.switch_mlp.gate_proj.biases": "model-00007-of-00008.safetensors", |
| "model.layers.10.mlp.switch_mlp.gate_proj.scales": "model-00007-of-00008.safetensors", |
| "model.layers.10.mlp.switch_mlp.gate_proj.weight": "model-00007-of-00008.safetensors", |
| "model.layers.10.mlp.switch_mlp.up_proj.biases": "model-00007-of-00008.safetensors", |
| "model.layers.10.mlp.switch_mlp.up_proj.scales": "model-00007-of-00008.safetensors", |
| "model.layers.10.mlp.switch_mlp.up_proj.weight": "model-00007-of-00008.safetensors", |
| "model.layers.10.mlps.0.down_proj.biases": "model-00007-of-00008.safetensors", |
| "model.layers.10.mlps.0.down_proj.scales": "model-00007-of-00008.safetensors", |
| "model.layers.10.mlps.0.down_proj.weight": "model-00007-of-00008.safetensors", |
| "model.layers.10.mlps.0.gate_proj.biases": "model-00007-of-00008.safetensors", |
| "model.layers.10.mlps.0.gate_proj.scales": "model-00007-of-00008.safetensors", |
| "model.layers.10.mlps.0.gate_proj.weight": "model-00007-of-00008.safetensors", |
| "model.layers.10.mlps.0.up_proj.biases": "model-00007-of-00008.safetensors", |
| "model.layers.10.mlps.0.up_proj.scales": "model-00007-of-00008.safetensors", |
| "model.layers.10.mlps.0.up_proj.weight": "model-00007-of-00008.safetensors", |
| "model.layers.10.mlps.1.down_proj.biases": "model-00007-of-00008.safetensors", |
| "model.layers.10.mlps.1.down_proj.scales": "model-00007-of-00008.safetensors", |
| "model.layers.10.mlps.1.down_proj.weight": "model-00007-of-00008.safetensors", |
| "model.layers.10.mlps.1.gate_proj.biases": "model-00007-of-00008.safetensors", |
| "model.layers.10.mlps.1.gate_proj.scales": "model-00007-of-00008.safetensors", |
| "model.layers.10.mlps.1.gate_proj.weight": "model-00007-of-00008.safetensors", |
| "model.layers.10.mlps.1.up_proj.biases": "model-00007-of-00008.safetensors", |
| "model.layers.10.mlps.1.up_proj.scales": "model-00007-of-00008.safetensors", |
| "model.layers.10.mlps.1.up_proj.weight": "model-00007-of-00008.safetensors", |
| "model.layers.10.post_attention_layernorm.0.weight": "model-00007-of-00008.safetensors", |
| "model.layers.10.post_attention_layernorm.1.weight": "model-00007-of-00008.safetensors", |
| "model.layers.10.self_attn.0.embed_q.biases": "model-00007-of-00008.safetensors", |
| "model.layers.10.self_attn.0.embed_q.scales": "model-00007-of-00008.safetensors", |
| "model.layers.10.self_attn.0.embed_q.weight": "model-00007-of-00008.safetensors", |
| "model.layers.10.self_attn.0.kv_a_layernorm.weight": "model-00007-of-00008.safetensors", |
| "model.layers.10.self_attn.0.kv_a_proj_with_mqa.biases": "model-00007-of-00008.safetensors", |
| "model.layers.10.self_attn.0.kv_a_proj_with_mqa.scales": "model-00007-of-00008.safetensors", |
| "model.layers.10.self_attn.0.kv_a_proj_with_mqa.weight": "model-00007-of-00008.safetensors", |
| "model.layers.10.self_attn.0.o_proj.biases": "model-00007-of-00008.safetensors", |
| "model.layers.10.self_attn.0.o_proj.scales": "model-00007-of-00008.safetensors", |
| "model.layers.10.self_attn.0.o_proj.weight": "model-00007-of-00008.safetensors", |
| "model.layers.10.self_attn.0.q_a_layernorm.weight": "model-00007-of-00008.safetensors", |
| "model.layers.10.self_attn.0.q_a_proj.biases": "model-00007-of-00008.safetensors", |
| "model.layers.10.self_attn.0.q_a_proj.scales": "model-00007-of-00008.safetensors", |
| "model.layers.10.self_attn.0.q_a_proj.weight": "model-00007-of-00008.safetensors", |
| "model.layers.10.self_attn.0.q_b_proj.biases": "model-00007-of-00008.safetensors", |
| "model.layers.10.self_attn.0.q_b_proj.scales": "model-00007-of-00008.safetensors", |
| "model.layers.10.self_attn.0.q_b_proj.weight": "model-00007-of-00008.safetensors", |
| "model.layers.10.self_attn.0.unembed_out.biases": "model-00007-of-00008.safetensors", |
| "model.layers.10.self_attn.0.unembed_out.scales": "model-00007-of-00008.safetensors", |
| "model.layers.10.self_attn.0.unembed_out.weight": "model-00007-of-00008.safetensors", |
| "model.layers.10.self_attn.1.embed_q.biases": "model-00007-of-00008.safetensors", |
| "model.layers.10.self_attn.1.embed_q.scales": "model-00007-of-00008.safetensors", |
| "model.layers.10.self_attn.1.embed_q.weight": "model-00007-of-00008.safetensors", |
| "model.layers.10.self_attn.1.kv_a_layernorm.weight": "model-00007-of-00008.safetensors", |
| "model.layers.10.self_attn.1.kv_a_proj_with_mqa.biases": "model-00007-of-00008.safetensors", |
| "model.layers.10.self_attn.1.kv_a_proj_with_mqa.scales": "model-00007-of-00008.safetensors", |
| "model.layers.10.self_attn.1.kv_a_proj_with_mqa.weight": "model-00007-of-00008.safetensors", |
| "model.layers.10.self_attn.1.o_proj.biases": "model-00007-of-00008.safetensors", |
| "model.layers.10.self_attn.1.o_proj.scales": "model-00007-of-00008.safetensors", |
| "model.layers.10.self_attn.1.o_proj.weight": "model-00007-of-00008.safetensors", |
| "model.layers.10.self_attn.1.q_a_layernorm.weight": "model-00007-of-00008.safetensors", |
| "model.layers.10.self_attn.1.q_a_proj.biases": "model-00007-of-00008.safetensors", |
| "model.layers.10.self_attn.1.q_a_proj.scales": "model-00007-of-00008.safetensors", |
| "model.layers.10.self_attn.1.q_a_proj.weight": "model-00007-of-00008.safetensors", |
| "model.layers.10.self_attn.1.q_b_proj.biases": "model-00007-of-00008.safetensors", |
| "model.layers.10.self_attn.1.q_b_proj.scales": "model-00007-of-00008.safetensors", |
| "model.layers.10.self_attn.1.q_b_proj.weight": "model-00007-of-00008.safetensors", |
| "model.layers.10.self_attn.1.unembed_out.biases": "model-00007-of-00008.safetensors", |
| "model.layers.10.self_attn.1.unembed_out.scales": "model-00007-of-00008.safetensors", |
| "model.layers.10.self_attn.1.unembed_out.weight": "model-00007-of-00008.safetensors", |
| "model.layers.11.input_layernorm.0.weight": "model-00008-of-00008.safetensors", |
| "model.layers.11.input_layernorm.1.weight": "model-00008-of-00008.safetensors", |
| "model.layers.11.mlp.router.classifier.biases": "model-00008-of-00008.safetensors", |
| "model.layers.11.mlp.router.classifier.scales": "model-00008-of-00008.safetensors", |
| "model.layers.11.mlp.router.classifier.weight": "model-00008-of-00008.safetensors", |
| "model.layers.11.mlp.router.e_score_correction_bias": "model-00008-of-00008.safetensors", |
| "model.layers.11.mlp.switch_mlp.down_proj.biases": "model-00008-of-00008.safetensors", |
| "model.layers.11.mlp.switch_mlp.down_proj.scales": "model-00008-of-00008.safetensors", |
| "model.layers.11.mlp.switch_mlp.down_proj.weight": "model-00008-of-00008.safetensors", |
| "model.layers.11.mlp.switch_mlp.gate_proj.biases": "model-00007-of-00008.safetensors", |
| "model.layers.11.mlp.switch_mlp.gate_proj.scales": "model-00007-of-00008.safetensors", |
| "model.layers.11.mlp.switch_mlp.gate_proj.weight": "model-00007-of-00008.safetensors", |
| "model.layers.11.mlp.switch_mlp.up_proj.biases": "model-00007-of-00008.safetensors", |
| "model.layers.11.mlp.switch_mlp.up_proj.scales": "model-00007-of-00008.safetensors", |
| "model.layers.11.mlp.switch_mlp.up_proj.weight": "model-00007-of-00008.safetensors", |
| "model.layers.11.mlps.0.down_proj.biases": "model-00008-of-00008.safetensors", |
| "model.layers.11.mlps.0.down_proj.scales": "model-00008-of-00008.safetensors", |
| "model.layers.11.mlps.0.down_proj.weight": "model-00008-of-00008.safetensors", |
| "model.layers.11.mlps.0.gate_proj.biases": "model-00008-of-00008.safetensors", |
| "model.layers.11.mlps.0.gate_proj.scales": "model-00008-of-00008.safetensors", |
| "model.layers.11.mlps.0.gate_proj.weight": "model-00008-of-00008.safetensors", |
| "model.layers.11.mlps.0.up_proj.biases": "model-00008-of-00008.safetensors", |
| "model.layers.11.mlps.0.up_proj.scales": "model-00008-of-00008.safetensors", |
| "model.layers.11.mlps.0.up_proj.weight": "model-00008-of-00008.safetensors", |
| "model.layers.11.mlps.1.down_proj.biases": "model-00008-of-00008.safetensors", |
| "model.layers.11.mlps.1.down_proj.scales": "model-00008-of-00008.safetensors", |
| "model.layers.11.mlps.1.down_proj.weight": "model-00008-of-00008.safetensors", |
| "model.layers.11.mlps.1.gate_proj.biases": "model-00008-of-00008.safetensors", |
| "model.layers.11.mlps.1.gate_proj.scales": "model-00008-of-00008.safetensors", |
| "model.layers.11.mlps.1.gate_proj.weight": "model-00008-of-00008.safetensors", |
| "model.layers.11.mlps.1.up_proj.biases": "model-00008-of-00008.safetensors", |
| "model.layers.11.mlps.1.up_proj.scales": "model-00008-of-00008.safetensors", |
| "model.layers.11.mlps.1.up_proj.weight": "model-00008-of-00008.safetensors", |
| "model.layers.11.post_attention_layernorm.0.weight": "model-00008-of-00008.safetensors", |
| "model.layers.11.post_attention_layernorm.1.weight": "model-00008-of-00008.safetensors", |
| "model.layers.11.self_attn.0.embed_q.biases": "model-00008-of-00008.safetensors", |
| "model.layers.11.self_attn.0.embed_q.scales": "model-00008-of-00008.safetensors", |
| "model.layers.11.self_attn.0.embed_q.weight": "model-00008-of-00008.safetensors", |
| "model.layers.11.self_attn.0.kv_a_layernorm.weight": "model-00008-of-00008.safetensors", |
| "model.layers.11.self_attn.0.kv_a_proj_with_mqa.biases": "model-00008-of-00008.safetensors", |
| "model.layers.11.self_attn.0.kv_a_proj_with_mqa.scales": "model-00008-of-00008.safetensors", |
| "model.layers.11.self_attn.0.kv_a_proj_with_mqa.weight": "model-00008-of-00008.safetensors", |
| "model.layers.11.self_attn.0.o_proj.biases": "model-00008-of-00008.safetensors", |
| "model.layers.11.self_attn.0.o_proj.scales": "model-00008-of-00008.safetensors", |
| "model.layers.11.self_attn.0.o_proj.weight": "model-00008-of-00008.safetensors", |
| "model.layers.11.self_attn.0.q_a_layernorm.weight": "model-00008-of-00008.safetensors", |
| "model.layers.11.self_attn.0.q_a_proj.biases": "model-00008-of-00008.safetensors", |
| "model.layers.11.self_attn.0.q_a_proj.scales": "model-00008-of-00008.safetensors", |
| "model.layers.11.self_attn.0.q_a_proj.weight": "model-00008-of-00008.safetensors", |
| "model.layers.11.self_attn.0.q_b_proj.biases": "model-00008-of-00008.safetensors", |
| "model.layers.11.self_attn.0.q_b_proj.scales": "model-00008-of-00008.safetensors", |
| "model.layers.11.self_attn.0.q_b_proj.weight": "model-00008-of-00008.safetensors", |
| "model.layers.11.self_attn.0.unembed_out.biases": "model-00008-of-00008.safetensors", |
| "model.layers.11.self_attn.0.unembed_out.scales": "model-00008-of-00008.safetensors", |
| "model.layers.11.self_attn.0.unembed_out.weight": "model-00008-of-00008.safetensors", |
| "model.layers.11.self_attn.1.embed_q.biases": "model-00008-of-00008.safetensors", |
| "model.layers.11.self_attn.1.embed_q.scales": "model-00008-of-00008.safetensors", |
| "model.layers.11.self_attn.1.embed_q.weight": "model-00008-of-00008.safetensors", |
| "model.layers.11.self_attn.1.kv_a_layernorm.weight": "model-00008-of-00008.safetensors", |
| "model.layers.11.self_attn.1.kv_a_proj_with_mqa.biases": "model-00008-of-00008.safetensors", |
| "model.layers.11.self_attn.1.kv_a_proj_with_mqa.scales": "model-00008-of-00008.safetensors", |
| "model.layers.11.self_attn.1.kv_a_proj_with_mqa.weight": "model-00008-of-00008.safetensors", |
| "model.layers.11.self_attn.1.o_proj.biases": "model-00008-of-00008.safetensors", |
| "model.layers.11.self_attn.1.o_proj.scales": "model-00008-of-00008.safetensors", |
| "model.layers.11.self_attn.1.o_proj.weight": "model-00008-of-00008.safetensors", |
| "model.layers.11.self_attn.1.q_a_layernorm.weight": "model-00008-of-00008.safetensors", |
| "model.layers.11.self_attn.1.q_a_proj.biases": "model-00008-of-00008.safetensors", |
| "model.layers.11.self_attn.1.q_a_proj.scales": "model-00008-of-00008.safetensors", |
| "model.layers.11.self_attn.1.q_a_proj.weight": "model-00008-of-00008.safetensors", |
| "model.layers.11.self_attn.1.q_b_proj.biases": "model-00008-of-00008.safetensors", |
| "model.layers.11.self_attn.1.q_b_proj.scales": "model-00008-of-00008.safetensors", |
| "model.layers.11.self_attn.1.q_b_proj.weight": "model-00008-of-00008.safetensors", |
| "model.layers.11.self_attn.1.unembed_out.biases": "model-00008-of-00008.safetensors", |
| "model.layers.11.self_attn.1.unembed_out.scales": "model-00008-of-00008.safetensors", |
| "model.layers.11.self_attn.1.unembed_out.weight": "model-00008-of-00008.safetensors", |
| "model.layers.12.input_layernorm.0.weight": "model-00008-of-00008.safetensors", |
| "model.layers.12.input_layernorm.1.weight": "model-00008-of-00008.safetensors", |
| "model.layers.12.mlp.router.classifier.biases": "model-00008-of-00008.safetensors", |
| "model.layers.12.mlp.router.classifier.scales": "model-00008-of-00008.safetensors", |
| "model.layers.12.mlp.router.classifier.weight": "model-00008-of-00008.safetensors", |
| "model.layers.12.mlp.router.e_score_correction_bias": "model-00008-of-00008.safetensors", |
| "model.layers.12.mlp.switch_mlp.down_proj.biases": "model-00008-of-00008.safetensors", |
| "model.layers.12.mlp.switch_mlp.down_proj.scales": "model-00008-of-00008.safetensors", |
| "model.layers.12.mlp.switch_mlp.down_proj.weight": "model-00008-of-00008.safetensors", |
| "model.layers.12.mlp.switch_mlp.gate_proj.biases": "model-00008-of-00008.safetensors", |
| "model.layers.12.mlp.switch_mlp.gate_proj.scales": "model-00008-of-00008.safetensors", |
| "model.layers.12.mlp.switch_mlp.gate_proj.weight": "model-00008-of-00008.safetensors", |
| "model.layers.12.mlp.switch_mlp.up_proj.biases": "model-00008-of-00008.safetensors", |
| "model.layers.12.mlp.switch_mlp.up_proj.scales": "model-00008-of-00008.safetensors", |
| "model.layers.12.mlp.switch_mlp.up_proj.weight": "model-00008-of-00008.safetensors", |
| "model.layers.12.mlps.0.down_proj.biases": "model-00008-of-00008.safetensors", |
| "model.layers.12.mlps.0.down_proj.scales": "model-00008-of-00008.safetensors", |
| "model.layers.12.mlps.0.down_proj.weight": "model-00008-of-00008.safetensors", |
| "model.layers.12.mlps.0.gate_proj.biases": "model-00008-of-00008.safetensors", |
| "model.layers.12.mlps.0.gate_proj.scales": "model-00008-of-00008.safetensors", |
| "model.layers.12.mlps.0.gate_proj.weight": "model-00008-of-00008.safetensors", |
| "model.layers.12.mlps.0.up_proj.biases": "model-00008-of-00008.safetensors", |
| "model.layers.12.mlps.0.up_proj.scales": "model-00008-of-00008.safetensors", |
| "model.layers.12.mlps.0.up_proj.weight": "model-00008-of-00008.safetensors", |
| "model.layers.12.mlps.1.down_proj.biases": "model-00008-of-00008.safetensors", |
| "model.layers.12.mlps.1.down_proj.scales": "model-00008-of-00008.safetensors", |
| "model.layers.12.mlps.1.down_proj.weight": "model-00008-of-00008.safetensors", |
| "model.layers.12.mlps.1.gate_proj.biases": "model-00008-of-00008.safetensors", |
| "model.layers.12.mlps.1.gate_proj.scales": "model-00008-of-00008.safetensors", |
| "model.layers.12.mlps.1.gate_proj.weight": "model-00008-of-00008.safetensors", |
| "model.layers.12.mlps.1.up_proj.biases": "model-00008-of-00008.safetensors", |
| "model.layers.12.mlps.1.up_proj.scales": "model-00008-of-00008.safetensors", |
| "model.layers.12.mlps.1.up_proj.weight": "model-00008-of-00008.safetensors", |
| "model.layers.12.post_attention_layernorm.0.weight": "model-00008-of-00008.safetensors", |
| "model.layers.12.post_attention_layernorm.1.weight": "model-00008-of-00008.safetensors", |
| "model.layers.12.self_attn.0.embed_q.biases": "model-00008-of-00008.safetensors", |
| "model.layers.12.self_attn.0.embed_q.scales": "model-00008-of-00008.safetensors", |
| "model.layers.12.self_attn.0.embed_q.weight": "model-00008-of-00008.safetensors", |
| "model.layers.12.self_attn.0.kv_a_layernorm.weight": "model-00008-of-00008.safetensors", |
| "model.layers.12.self_attn.0.kv_a_proj_with_mqa.biases": "model-00008-of-00008.safetensors", |
| "model.layers.12.self_attn.0.kv_a_proj_with_mqa.scales": "model-00008-of-00008.safetensors", |
| "model.layers.12.self_attn.0.kv_a_proj_with_mqa.weight": "model-00008-of-00008.safetensors", |
| "model.layers.12.self_attn.0.o_proj.biases": "model-00008-of-00008.safetensors", |
| "model.layers.12.self_attn.0.o_proj.scales": "model-00008-of-00008.safetensors", |
| "model.layers.12.self_attn.0.o_proj.weight": "model-00008-of-00008.safetensors", |
| "model.layers.12.self_attn.0.q_a_layernorm.weight": "model-00008-of-00008.safetensors", |
| "model.layers.12.self_attn.0.q_a_proj.biases": "model-00008-of-00008.safetensors", |
| "model.layers.12.self_attn.0.q_a_proj.scales": "model-00008-of-00008.safetensors", |
| "model.layers.12.self_attn.0.q_a_proj.weight": "model-00008-of-00008.safetensors", |
| "model.layers.12.self_attn.0.q_b_proj.biases": "model-00008-of-00008.safetensors", |
| "model.layers.12.self_attn.0.q_b_proj.scales": "model-00008-of-00008.safetensors", |
| "model.layers.12.self_attn.0.q_b_proj.weight": "model-00008-of-00008.safetensors", |
| "model.layers.12.self_attn.0.unembed_out.biases": "model-00008-of-00008.safetensors", |
| "model.layers.12.self_attn.0.unembed_out.scales": "model-00008-of-00008.safetensors", |
| "model.layers.12.self_attn.0.unembed_out.weight": "model-00008-of-00008.safetensors", |
| "model.layers.12.self_attn.1.embed_q.biases": "model-00008-of-00008.safetensors", |
| "model.layers.12.self_attn.1.embed_q.scales": "model-00008-of-00008.safetensors", |
| "model.layers.12.self_attn.1.embed_q.weight": "model-00008-of-00008.safetensors", |
| "model.layers.12.self_attn.1.kv_a_layernorm.weight": "model-00008-of-00008.safetensors", |
| "model.layers.12.self_attn.1.kv_a_proj_with_mqa.biases": "model-00008-of-00008.safetensors", |
| "model.layers.12.self_attn.1.kv_a_proj_with_mqa.scales": "model-00008-of-00008.safetensors", |
| "model.layers.12.self_attn.1.kv_a_proj_with_mqa.weight": "model-00008-of-00008.safetensors", |
| "model.layers.12.self_attn.1.o_proj.biases": "model-00008-of-00008.safetensors", |
| "model.layers.12.self_attn.1.o_proj.scales": "model-00008-of-00008.safetensors", |
| "model.layers.12.self_attn.1.o_proj.weight": "model-00008-of-00008.safetensors", |
| "model.layers.12.self_attn.1.q_a_layernorm.weight": "model-00008-of-00008.safetensors", |
| "model.layers.12.self_attn.1.q_a_proj.biases": "model-00008-of-00008.safetensors", |
| "model.layers.12.self_attn.1.q_a_proj.scales": "model-00008-of-00008.safetensors", |
| "model.layers.12.self_attn.1.q_a_proj.weight": "model-00008-of-00008.safetensors", |
| "model.layers.12.self_attn.1.q_b_proj.biases": "model-00008-of-00008.safetensors", |
| "model.layers.12.self_attn.1.q_b_proj.scales": "model-00008-of-00008.safetensors", |
| "model.layers.12.self_attn.1.q_b_proj.weight": "model-00008-of-00008.safetensors", |
| "model.layers.12.self_attn.1.unembed_out.biases": "model-00008-of-00008.safetensors", |
| "model.layers.12.self_attn.1.unembed_out.scales": "model-00008-of-00008.safetensors", |
| "model.layers.12.self_attn.1.unembed_out.weight": "model-00008-of-00008.safetensors", |
| "model.layers.13.input_layernorm.0.weight": "model-00008-of-00008.safetensors", |
| "model.layers.13.input_layernorm.1.weight": "model-00008-of-00008.safetensors", |
| "model.layers.13.mlp.router.classifier.biases": "model-00008-of-00008.safetensors", |
| "model.layers.13.mlp.router.classifier.scales": "model-00008-of-00008.safetensors", |
| "model.layers.13.mlp.router.classifier.weight": "model-00008-of-00008.safetensors", |
| "model.layers.13.mlp.router.e_score_correction_bias": "model-00008-of-00008.safetensors", |
| "model.layers.13.mlp.switch_mlp.down_proj.biases": "model-00008-of-00008.safetensors", |
| "model.layers.13.mlp.switch_mlp.down_proj.scales": "model-00008-of-00008.safetensors", |
| "model.layers.13.mlp.switch_mlp.down_proj.weight": "model-00008-of-00008.safetensors", |
| "model.layers.13.mlp.switch_mlp.gate_proj.biases": "model-00008-of-00008.safetensors", |
| "model.layers.13.mlp.switch_mlp.gate_proj.scales": "model-00008-of-00008.safetensors", |
| "model.layers.13.mlp.switch_mlp.gate_proj.weight": "model-00008-of-00008.safetensors", |
| "model.layers.13.mlp.switch_mlp.up_proj.biases": "model-00008-of-00008.safetensors", |
| "model.layers.13.mlp.switch_mlp.up_proj.scales": "model-00008-of-00008.safetensors", |
| "model.layers.13.mlp.switch_mlp.up_proj.weight": "model-00008-of-00008.safetensors", |
| "model.layers.13.mlps.0.down_proj.biases": "model-00008-of-00008.safetensors", |
| "model.layers.13.mlps.0.down_proj.scales": "model-00008-of-00008.safetensors", |
| "model.layers.13.mlps.0.down_proj.weight": "model-00008-of-00008.safetensors", |
| "model.layers.13.mlps.0.gate_proj.biases": "model-00008-of-00008.safetensors", |
| "model.layers.13.mlps.0.gate_proj.scales": "model-00008-of-00008.safetensors", |
| "model.layers.13.mlps.0.gate_proj.weight": "model-00008-of-00008.safetensors", |
| "model.layers.13.mlps.0.up_proj.biases": "model-00008-of-00008.safetensors", |
| "model.layers.13.mlps.0.up_proj.scales": "model-00008-of-00008.safetensors", |
| "model.layers.13.mlps.0.up_proj.weight": "model-00008-of-00008.safetensors", |
| "model.layers.13.mlps.1.down_proj.biases": "model-00008-of-00008.safetensors", |
| "model.layers.13.mlps.1.down_proj.scales": "model-00008-of-00008.safetensors", |
| "model.layers.13.mlps.1.down_proj.weight": "model-00008-of-00008.safetensors", |
| "model.layers.13.mlps.1.gate_proj.biases": "model-00008-of-00008.safetensors", |
| "model.layers.13.mlps.1.gate_proj.scales": "model-00008-of-00008.safetensors", |
| "model.layers.13.mlps.1.gate_proj.weight": "model-00008-of-00008.safetensors", |
| "model.layers.13.mlps.1.up_proj.biases": "model-00008-of-00008.safetensors", |
| "model.layers.13.mlps.1.up_proj.scales": "model-00008-of-00008.safetensors", |
| "model.layers.13.mlps.1.up_proj.weight": "model-00008-of-00008.safetensors", |
| "model.layers.13.post_attention_layernorm.0.weight": "model-00008-of-00008.safetensors", |
| "model.layers.13.post_attention_layernorm.1.weight": "model-00008-of-00008.safetensors", |
| "model.layers.13.self_attn.0.embed_q.biases": "model-00008-of-00008.safetensors", |
| "model.layers.13.self_attn.0.embed_q.scales": "model-00008-of-00008.safetensors", |
| "model.layers.13.self_attn.0.embed_q.weight": "model-00008-of-00008.safetensors", |
| "model.layers.13.self_attn.0.kv_a_layernorm.weight": "model-00008-of-00008.safetensors", |
| "model.layers.13.self_attn.0.kv_a_proj_with_mqa.biases": "model-00008-of-00008.safetensors", |
| "model.layers.13.self_attn.0.kv_a_proj_with_mqa.scales": "model-00008-of-00008.safetensors", |
| "model.layers.13.self_attn.0.kv_a_proj_with_mqa.weight": "model-00008-of-00008.safetensors", |
| "model.layers.13.self_attn.0.o_proj.biases": "model-00008-of-00008.safetensors", |
| "model.layers.13.self_attn.0.o_proj.scales": "model-00008-of-00008.safetensors", |
| "model.layers.13.self_attn.0.o_proj.weight": "model-00008-of-00008.safetensors", |
| "model.layers.13.self_attn.0.q_a_layernorm.weight": "model-00008-of-00008.safetensors", |
| "model.layers.13.self_attn.0.q_a_proj.biases": "model-00008-of-00008.safetensors", |
| "model.layers.13.self_attn.0.q_a_proj.scales": "model-00008-of-00008.safetensors", |
| "model.layers.13.self_attn.0.q_a_proj.weight": "model-00008-of-00008.safetensors", |
| "model.layers.13.self_attn.0.q_b_proj.biases": "model-00008-of-00008.safetensors", |
| "model.layers.13.self_attn.0.q_b_proj.scales": "model-00008-of-00008.safetensors", |
| "model.layers.13.self_attn.0.q_b_proj.weight": "model-00008-of-00008.safetensors", |
| "model.layers.13.self_attn.0.unembed_out.biases": "model-00008-of-00008.safetensors", |
| "model.layers.13.self_attn.0.unembed_out.scales": "model-00008-of-00008.safetensors", |
| "model.layers.13.self_attn.0.unembed_out.weight": "model-00008-of-00008.safetensors", |
| "model.layers.13.self_attn.1.embed_q.biases": "model-00008-of-00008.safetensors", |
| "model.layers.13.self_attn.1.embed_q.scales": "model-00008-of-00008.safetensors", |
| "model.layers.13.self_attn.1.embed_q.weight": "model-00008-of-00008.safetensors", |
| "model.layers.13.self_attn.1.kv_a_layernorm.weight": "model-00008-of-00008.safetensors", |
| "model.layers.13.self_attn.1.kv_a_proj_with_mqa.biases": "model-00008-of-00008.safetensors", |
| "model.layers.13.self_attn.1.kv_a_proj_with_mqa.scales": "model-00008-of-00008.safetensors", |
| "model.layers.13.self_attn.1.kv_a_proj_with_mqa.weight": "model-00008-of-00008.safetensors", |
| "model.layers.13.self_attn.1.o_proj.biases": "model-00008-of-00008.safetensors", |
| "model.layers.13.self_attn.1.o_proj.scales": "model-00008-of-00008.safetensors", |
| "model.layers.13.self_attn.1.o_proj.weight": "model-00008-of-00008.safetensors", |
| "model.layers.13.self_attn.1.q_a_layernorm.weight": "model-00008-of-00008.safetensors", |
| "model.layers.13.self_attn.1.q_a_proj.biases": "model-00008-of-00008.safetensors", |
| "model.layers.13.self_attn.1.q_a_proj.scales": "model-00008-of-00008.safetensors", |
| "model.layers.13.self_attn.1.q_a_proj.weight": "model-00008-of-00008.safetensors", |
| "model.layers.13.self_attn.1.q_b_proj.biases": "model-00008-of-00008.safetensors", |
| "model.layers.13.self_attn.1.q_b_proj.scales": "model-00008-of-00008.safetensors", |
| "model.layers.13.self_attn.1.q_b_proj.weight": "model-00008-of-00008.safetensors", |
| "model.layers.13.self_attn.1.unembed_out.biases": "model-00008-of-00008.safetensors", |
| "model.layers.13.self_attn.1.unembed_out.scales": "model-00008-of-00008.safetensors", |
| "model.layers.13.self_attn.1.unembed_out.weight": "model-00008-of-00008.safetensors", |
| "model.layers.2.input_layernorm.0.weight": "model-00005-of-00008.safetensors", |
| "model.layers.2.input_layernorm.1.weight": "model-00005-of-00008.safetensors", |
| "model.layers.2.mlp.router.classifier.biases": "model-00005-of-00008.safetensors", |
| "model.layers.2.mlp.router.classifier.scales": "model-00005-of-00008.safetensors", |
| "model.layers.2.mlp.router.classifier.weight": "model-00005-of-00008.safetensors", |
| "model.layers.2.mlp.router.e_score_correction_bias": "model-00005-of-00008.safetensors", |
| "model.layers.2.mlp.switch_mlp.down_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.2.mlp.switch_mlp.down_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.2.mlp.switch_mlp.down_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.2.mlp.switch_mlp.gate_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.2.mlp.switch_mlp.gate_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.2.mlp.switch_mlp.gate_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.2.mlp.switch_mlp.up_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.2.mlp.switch_mlp.up_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.2.mlp.switch_mlp.up_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.2.mlps.0.down_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.2.mlps.0.down_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.2.mlps.0.down_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.2.mlps.0.gate_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.2.mlps.0.gate_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.2.mlps.0.gate_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.2.mlps.0.up_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.2.mlps.0.up_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.2.mlps.0.up_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.2.mlps.1.down_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.2.mlps.1.down_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.2.mlps.1.down_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.2.mlps.1.gate_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.2.mlps.1.gate_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.2.mlps.1.gate_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.2.mlps.1.up_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.2.mlps.1.up_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.2.mlps.1.up_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.2.post_attention_layernorm.0.weight": "model-00005-of-00008.safetensors", |
| "model.layers.2.post_attention_layernorm.1.weight": "model-00005-of-00008.safetensors", |
| "model.layers.2.self_attn.0.embed_q.biases": "model-00005-of-00008.safetensors", |
| "model.layers.2.self_attn.0.embed_q.scales": "model-00005-of-00008.safetensors", |
| "model.layers.2.self_attn.0.embed_q.weight": "model-00005-of-00008.safetensors", |
| "model.layers.2.self_attn.0.kv_a_layernorm.weight": "model-00005-of-00008.safetensors", |
| "model.layers.2.self_attn.0.kv_a_proj_with_mqa.biases": "model-00005-of-00008.safetensors", |
| "model.layers.2.self_attn.0.kv_a_proj_with_mqa.scales": "model-00005-of-00008.safetensors", |
| "model.layers.2.self_attn.0.kv_a_proj_with_mqa.weight": "model-00005-of-00008.safetensors", |
| "model.layers.2.self_attn.0.o_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.2.self_attn.0.o_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.2.self_attn.0.o_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.2.self_attn.0.q_a_layernorm.weight": "model-00005-of-00008.safetensors", |
| "model.layers.2.self_attn.0.q_a_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.2.self_attn.0.q_a_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.2.self_attn.0.q_a_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.2.self_attn.0.q_b_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.2.self_attn.0.q_b_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.2.self_attn.0.q_b_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.2.self_attn.0.unembed_out.biases": "model-00005-of-00008.safetensors", |
| "model.layers.2.self_attn.0.unembed_out.scales": "model-00005-of-00008.safetensors", |
| "model.layers.2.self_attn.0.unembed_out.weight": "model-00005-of-00008.safetensors", |
| "model.layers.2.self_attn.1.embed_q.biases": "model-00005-of-00008.safetensors", |
| "model.layers.2.self_attn.1.embed_q.scales": "model-00005-of-00008.safetensors", |
| "model.layers.2.self_attn.1.embed_q.weight": "model-00005-of-00008.safetensors", |
| "model.layers.2.self_attn.1.kv_a_layernorm.weight": "model-00005-of-00008.safetensors", |
| "model.layers.2.self_attn.1.kv_a_proj_with_mqa.biases": "model-00005-of-00008.safetensors", |
| "model.layers.2.self_attn.1.kv_a_proj_with_mqa.scales": "model-00005-of-00008.safetensors", |
| "model.layers.2.self_attn.1.kv_a_proj_with_mqa.weight": "model-00005-of-00008.safetensors", |
| "model.layers.2.self_attn.1.o_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.2.self_attn.1.o_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.2.self_attn.1.o_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.2.self_attn.1.q_a_layernorm.weight": "model-00005-of-00008.safetensors", |
| "model.layers.2.self_attn.1.q_a_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.2.self_attn.1.q_a_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.2.self_attn.1.q_a_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.2.self_attn.1.q_b_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.2.self_attn.1.q_b_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.2.self_attn.1.q_b_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.2.self_attn.1.unembed_out.biases": "model-00005-of-00008.safetensors", |
| "model.layers.2.self_attn.1.unembed_out.scales": "model-00005-of-00008.safetensors", |
| "model.layers.2.self_attn.1.unembed_out.weight": "model-00005-of-00008.safetensors", |
| "model.layers.3.input_layernorm.0.weight": "model-00005-of-00008.safetensors", |
| "model.layers.3.input_layernorm.1.weight": "model-00005-of-00008.safetensors", |
| "model.layers.3.mlp.router.classifier.biases": "model-00005-of-00008.safetensors", |
| "model.layers.3.mlp.router.classifier.scales": "model-00005-of-00008.safetensors", |
| "model.layers.3.mlp.router.classifier.weight": "model-00005-of-00008.safetensors", |
| "model.layers.3.mlp.router.e_score_correction_bias": "model-00005-of-00008.safetensors", |
| "model.layers.3.mlp.switch_mlp.down_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.3.mlp.switch_mlp.down_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.3.mlp.switch_mlp.down_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.3.mlp.switch_mlp.gate_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.3.mlp.switch_mlp.gate_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.3.mlp.switch_mlp.gate_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.3.mlp.switch_mlp.up_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.3.mlp.switch_mlp.up_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.3.mlp.switch_mlp.up_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.3.mlps.0.down_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.3.mlps.0.down_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.3.mlps.0.down_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.3.mlps.0.gate_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.3.mlps.0.gate_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.3.mlps.0.gate_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.3.mlps.0.up_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.3.mlps.0.up_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.3.mlps.0.up_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.3.mlps.1.down_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.3.mlps.1.down_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.3.mlps.1.down_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.3.mlps.1.gate_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.3.mlps.1.gate_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.3.mlps.1.gate_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.3.mlps.1.up_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.3.mlps.1.up_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.3.mlps.1.up_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.3.post_attention_layernorm.0.weight": "model-00005-of-00008.safetensors", |
| "model.layers.3.post_attention_layernorm.1.weight": "model-00005-of-00008.safetensors", |
| "model.layers.3.self_attn.0.embed_q.biases": "model-00005-of-00008.safetensors", |
| "model.layers.3.self_attn.0.embed_q.scales": "model-00005-of-00008.safetensors", |
| "model.layers.3.self_attn.0.embed_q.weight": "model-00005-of-00008.safetensors", |
| "model.layers.3.self_attn.0.kv_a_layernorm.weight": "model-00005-of-00008.safetensors", |
| "model.layers.3.self_attn.0.kv_a_proj_with_mqa.biases": "model-00005-of-00008.safetensors", |
| "model.layers.3.self_attn.0.kv_a_proj_with_mqa.scales": "model-00005-of-00008.safetensors", |
| "model.layers.3.self_attn.0.kv_a_proj_with_mqa.weight": "model-00005-of-00008.safetensors", |
| "model.layers.3.self_attn.0.o_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.3.self_attn.0.o_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.3.self_attn.0.o_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.3.self_attn.0.q_a_layernorm.weight": "model-00005-of-00008.safetensors", |
| "model.layers.3.self_attn.0.q_a_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.3.self_attn.0.q_a_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.3.self_attn.0.q_a_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.3.self_attn.0.q_b_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.3.self_attn.0.q_b_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.3.self_attn.0.q_b_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.3.self_attn.0.unembed_out.biases": "model-00005-of-00008.safetensors", |
| "model.layers.3.self_attn.0.unembed_out.scales": "model-00005-of-00008.safetensors", |
| "model.layers.3.self_attn.0.unembed_out.weight": "model-00005-of-00008.safetensors", |
| "model.layers.3.self_attn.1.embed_q.biases": "model-00005-of-00008.safetensors", |
| "model.layers.3.self_attn.1.embed_q.scales": "model-00005-of-00008.safetensors", |
| "model.layers.3.self_attn.1.embed_q.weight": "model-00005-of-00008.safetensors", |
| "model.layers.3.self_attn.1.kv_a_layernorm.weight": "model-00005-of-00008.safetensors", |
| "model.layers.3.self_attn.1.kv_a_proj_with_mqa.biases": "model-00005-of-00008.safetensors", |
| "model.layers.3.self_attn.1.kv_a_proj_with_mqa.scales": "model-00005-of-00008.safetensors", |
| "model.layers.3.self_attn.1.kv_a_proj_with_mqa.weight": "model-00005-of-00008.safetensors", |
| "model.layers.3.self_attn.1.o_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.3.self_attn.1.o_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.3.self_attn.1.o_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.3.self_attn.1.q_a_layernorm.weight": "model-00005-of-00008.safetensors", |
| "model.layers.3.self_attn.1.q_a_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.3.self_attn.1.q_a_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.3.self_attn.1.q_a_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.3.self_attn.1.q_b_proj.biases": "model-00005-of-00008.safetensors", |
| "model.layers.3.self_attn.1.q_b_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.3.self_attn.1.q_b_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.3.self_attn.1.unembed_out.biases": "model-00005-of-00008.safetensors", |
| "model.layers.3.self_attn.1.unembed_out.scales": "model-00005-of-00008.safetensors", |
| "model.layers.3.self_attn.1.unembed_out.weight": "model-00005-of-00008.safetensors", |
| "model.layers.4.input_layernorm.0.weight": "model-00006-of-00008.safetensors", |
| "model.layers.4.input_layernorm.1.weight": "model-00006-of-00008.safetensors", |
| "model.layers.4.mlp.router.classifier.biases": "model-00006-of-00008.safetensors", |
| "model.layers.4.mlp.router.classifier.scales": "model-00006-of-00008.safetensors", |
| "model.layers.4.mlp.router.classifier.weight": "model-00006-of-00008.safetensors", |
| "model.layers.4.mlp.router.e_score_correction_bias": "model-00006-of-00008.safetensors", |
| "model.layers.4.mlp.switch_mlp.down_proj.biases": "model-00006-of-00008.safetensors", |
| "model.layers.4.mlp.switch_mlp.down_proj.scales": "model-00006-of-00008.safetensors", |
| "model.layers.4.mlp.switch_mlp.down_proj.weight": "model-00006-of-00008.safetensors", |
| "model.layers.4.mlp.switch_mlp.gate_proj.biases": "model-00006-of-00008.safetensors", |
| "model.layers.4.mlp.switch_mlp.gate_proj.scales": "model-00005-of-00008.safetensors", |
| "model.layers.4.mlp.switch_mlp.gate_proj.weight": "model-00005-of-00008.safetensors", |
| "model.layers.4.mlp.switch_mlp.up_proj.biases": "model-00006-of-00008.safetensors", |
| "model.layers.4.mlp.switch_mlp.up_proj.scales": "model-00006-of-00008.safetensors", |
| "model.layers.4.mlp.switch_mlp.up_proj.weight": "model-00006-of-00008.safetensors", |
| "model.layers.4.mlps.0.down_proj.biases": "model-00006-of-00008.safetensors", |
| "model.layers.4.mlps.0.down_proj.scales": "model-00006-of-00008.safetensors", |
| "model.layers.4.mlps.0.down_proj.weight": "model-00006-of-00008.safetensors", |
| "model.layers.4.mlps.0.gate_proj.biases": "model-00006-of-00008.safetensors", |
| "model.layers.4.mlps.0.gate_proj.scales": "model-00006-of-00008.safetensors", |
| "model.layers.4.mlps.0.gate_proj.weight": "model-00006-of-00008.safetensors", |
| "model.layers.4.mlps.0.up_proj.biases": "model-00006-of-00008.safetensors", |
| "model.layers.4.mlps.0.up_proj.scales": "model-00006-of-00008.safetensors", |
| "model.layers.4.mlps.0.up_proj.weight": "model-00006-of-00008.safetensors", |
| "model.layers.4.mlps.1.down_proj.biases": "model-00006-of-00008.safetensors", |
| "model.layers.4.mlps.1.down_proj.scales": "model-00006-of-00008.safetensors", |
| "model.layers.4.mlps.1.down_proj.weight": "model-00006-of-00008.safetensors", |
| "model.layers.4.mlps.1.gate_proj.biases": "model-00006-of-00008.safetensors", |
| "model.layers.4.mlps.1.gate_proj.scales": "model-00006-of-00008.safetensors", |
| "model.layers.4.mlps.1.gate_proj.weight": "model-00006-of-00008.safetensors", |
| "model.layers.4.mlps.1.up_proj.biases": "model-00006-of-00008.safetensors", |
| "model.layers.4.mlps.1.up_proj.scales": "model-00006-of-00008.safetensors", |
| "model.layers.4.mlps.1.up_proj.weight": "model-00006-of-00008.safetensors", |
| "model.layers.4.post_attention_layernorm.0.weight": "model-00006-of-00008.safetensors", |
| "model.layers.4.post_attention_layernorm.1.weight": "model-00006-of-00008.safetensors", |
| "model.layers.4.self_attn.0.embed_q.biases": "model-00006-of-00008.safetensors", |
| "model.layers.4.self_attn.0.embed_q.scales": "model-00006-of-00008.safetensors", |
| "model.layers.4.self_attn.0.embed_q.weight": "model-00006-of-00008.safetensors", |
| "model.layers.4.self_attn.0.kv_a_layernorm.weight": "model-00006-of-00008.safetensors", |
| "model.layers.4.self_attn.0.kv_a_proj_with_mqa.biases": "model-00006-of-00008.safetensors", |
| "model.layers.4.self_attn.0.kv_a_proj_with_mqa.scales": "model-00006-of-00008.safetensors", |
| "model.layers.4.self_attn.0.kv_a_proj_with_mqa.weight": "model-00006-of-00008.safetensors", |
| "model.layers.4.self_attn.0.o_proj.biases": "model-00006-of-00008.safetensors", |
| "model.layers.4.self_attn.0.o_proj.scales": "model-00006-of-00008.safetensors", |
| "model.layers.4.self_attn.0.o_proj.weight": "model-00006-of-00008.safetensors", |
| "model.layers.4.self_attn.0.q_a_layernorm.weight": "model-00006-of-00008.safetensors", |
| "model.layers.4.self_attn.0.q_a_proj.biases": "model-00006-of-00008.safetensors", |
| "model.layers.4.self_attn.0.q_a_proj.scales": "model-00006-of-00008.safetensors", |
| "model.layers.4.self_attn.0.q_a_proj.weight": "model-00006-of-00008.safetensors", |
| "model.layers.4.self_attn.0.q_b_proj.biases": "model-00006-of-00008.safetensors", |
| "model.layers.4.self_attn.0.q_b_proj.scales": "model-00006-of-00008.safetensors", |
| "model.layers.4.self_attn.0.q_b_proj.weight": "model-00006-of-00008.safetensors", |
| "model.layers.4.self_attn.0.unembed_out.biases": "model-00006-of-00008.safetensors", |
| "model.layers.4.self_attn.0.unembed_out.scales": "model-00006-of-00008.safetensors", |
| "model.layers.4.self_attn.0.unembed_out.weight": "model-00006-of-00008.safetensors", |
| "model.layers.4.self_attn.1.embed_q.biases": "model-00006-of-00008.safetensors", |
| "model.layers.4.self_attn.1.embed_q.scales": "model-00006-of-00008.safetensors", |
| "model.layers.4.self_attn.1.embed_q.weight": "model-00006-of-00008.safetensors", |
| "model.layers.4.self_attn.1.kv_a_layernorm.weight": "model-00006-of-00008.safetensors", |
| "model.layers.4.self_attn.1.kv_a_proj_with_mqa.biases": "model-00006-of-00008.safetensors", |
| "model.layers.4.self_attn.1.kv_a_proj_with_mqa.scales": "model-00006-of-00008.safetensors", |
| "model.layers.4.self_attn.1.kv_a_proj_with_mqa.weight": "model-00006-of-00008.safetensors", |
| "model.layers.4.self_attn.1.o_proj.biases": "model-00006-of-00008.safetensors", |
| "model.layers.4.self_attn.1.o_proj.scales": "model-00006-of-00008.safetensors", |
| "model.layers.4.self_attn.1.o_proj.weight": "model-00006-of-00008.safetensors", |
| "model.layers.4.self_attn.1.q_a_layernorm.weight": "model-00006-of-00008.safetensors", |
| "model.layers.4.self_attn.1.q_a_proj.biases": "model-00006-of-00008.safetensors", |
| "model.layers.4.self_attn.1.q_a_proj.scales": "model-00006-of-00008.safetensors", |
| "model.layers.4.self_attn.1.q_a_proj.weight": "model-00006-of-00008.safetensors", |
| "model.layers.4.self_attn.1.q_b_proj.biases": "model-00006-of-00008.safetensors", |
| "model.layers.4.self_attn.1.q_b_proj.scales": "model-00006-of-00008.safetensors", |
| "model.layers.4.self_attn.1.q_b_proj.weight": "model-00006-of-00008.safetensors", |
| "model.layers.4.self_attn.1.unembed_out.biases": "model-00006-of-00008.safetensors", |
| "model.layers.4.self_attn.1.unembed_out.scales": "model-00006-of-00008.safetensors", |
| "model.layers.4.self_attn.1.unembed_out.weight": "model-00006-of-00008.safetensors", |
| "model.layers.5.input_layernorm.0.weight": "model-00006-of-00008.safetensors", |
| "model.layers.5.input_layernorm.1.weight": "model-00006-of-00008.safetensors", |
| "model.layers.5.mlp.router.classifier.biases": "model-00006-of-00008.safetensors", |
| "model.layers.5.mlp.router.classifier.scales": "model-00006-of-00008.safetensors", |
| "model.layers.5.mlp.router.classifier.weight": "model-00006-of-00008.safetensors", |
| "model.layers.5.mlp.router.e_score_correction_bias": "model-00006-of-00008.safetensors", |
| "model.layers.5.mlp.switch_mlp.down_proj.biases": "model-00006-of-00008.safetensors", |
| "model.layers.5.mlp.switch_mlp.down_proj.scales": "model-00006-of-00008.safetensors", |
| "model.layers.5.mlp.switch_mlp.down_proj.weight": "model-00006-of-00008.safetensors", |
| "model.layers.5.mlp.switch_mlp.gate_proj.biases": "model-00006-of-00008.safetensors", |
| "model.layers.5.mlp.switch_mlp.gate_proj.scales": "model-00006-of-00008.safetensors", |
| "model.layers.5.mlp.switch_mlp.gate_proj.weight": "model-00006-of-00008.safetensors", |
| "model.layers.5.mlp.switch_mlp.up_proj.biases": "model-00006-of-00008.safetensors", |
| "model.layers.5.mlp.switch_mlp.up_proj.scales": "model-00006-of-00008.safetensors", |
| "model.layers.5.mlp.switch_mlp.up_proj.weight": "model-00006-of-00008.safetensors", |
| "model.layers.5.mlps.0.down_proj.biases": "model-00006-of-00008.safetensors", |
| "model.layers.5.mlps.0.down_proj.scales": "model-00006-of-00008.safetensors", |
| "model.layers.5.mlps.0.down_proj.weight": "model-00006-of-00008.safetensors", |
| "model.layers.5.mlps.0.gate_proj.biases": "model-00006-of-00008.safetensors", |
| "model.layers.5.mlps.0.gate_proj.scales": "model-00006-of-00008.safetensors", |
| "model.layers.5.mlps.0.gate_proj.weight": "model-00006-of-00008.safetensors", |
| "model.layers.5.mlps.0.up_proj.biases": "model-00006-of-00008.safetensors", |
| "model.layers.5.mlps.0.up_proj.scales": "model-00006-of-00008.safetensors", |
| "model.layers.5.mlps.0.up_proj.weight": "model-00006-of-00008.safetensors", |
| "model.layers.5.mlps.1.down_proj.biases": "model-00006-of-00008.safetensors", |
| "model.layers.5.mlps.1.down_proj.scales": "model-00006-of-00008.safetensors", |
| "model.layers.5.mlps.1.down_proj.weight": "model-00006-of-00008.safetensors", |
| "model.layers.5.mlps.1.gate_proj.biases": "model-00006-of-00008.safetensors", |
| "model.layers.5.mlps.1.gate_proj.scales": "model-00006-of-00008.safetensors", |
| "model.layers.5.mlps.1.gate_proj.weight": "model-00006-of-00008.safetensors", |
| "model.layers.5.mlps.1.up_proj.biases": "model-00006-of-00008.safetensors", |
| "model.layers.5.mlps.1.up_proj.scales": "model-00006-of-00008.safetensors", |
| "model.layers.5.mlps.1.up_proj.weight": "model-00006-of-00008.safetensors", |
| "model.layers.5.post_attention_layernorm.0.weight": "model-00006-of-00008.safetensors", |
| "model.layers.5.post_attention_layernorm.1.weight": "model-00006-of-00008.safetensors", |
| "model.layers.5.self_attn.0.embed_q.biases": "model-00006-of-00008.safetensors", |
| "model.layers.5.self_attn.0.embed_q.scales": "model-00006-of-00008.safetensors", |
| "model.layers.5.self_attn.0.embed_q.weight": "model-00006-of-00008.safetensors", |
| "model.layers.5.self_attn.0.kv_a_layernorm.weight": "model-00006-of-00008.safetensors", |
| "model.layers.5.self_attn.0.kv_a_proj_with_mqa.biases": "model-00006-of-00008.safetensors", |
| "model.layers.5.self_attn.0.kv_a_proj_with_mqa.scales": "model-00006-of-00008.safetensors", |
| "model.layers.5.self_attn.0.kv_a_proj_with_mqa.weight": "model-00006-of-00008.safetensors", |
| "model.layers.5.self_attn.0.o_proj.biases": "model-00006-of-00008.safetensors", |
| "model.layers.5.self_attn.0.o_proj.scales": "model-00006-of-00008.safetensors", |
| "model.layers.5.self_attn.0.o_proj.weight": "model-00006-of-00008.safetensors", |
| "model.layers.5.self_attn.0.q_a_layernorm.weight": "model-00006-of-00008.safetensors", |
| "model.layers.5.self_attn.0.q_a_proj.biases": "model-00006-of-00008.safetensors", |
| "model.layers.5.self_attn.0.q_a_proj.scales": "model-00006-of-00008.safetensors", |
| "model.layers.5.self_attn.0.q_a_proj.weight": "model-00006-of-00008.safetensors", |
| "model.layers.5.self_attn.0.q_b_proj.biases": "model-00006-of-00008.safetensors", |
| "model.layers.5.self_attn.0.q_b_proj.scales": "model-00006-of-00008.safetensors", |
| "model.layers.5.self_attn.0.q_b_proj.weight": "model-00006-of-00008.safetensors", |
| "model.layers.5.self_attn.0.unembed_out.biases": "model-00006-of-00008.safetensors", |
| "model.layers.5.self_attn.0.unembed_out.scales": "model-00006-of-00008.safetensors", |
| "model.layers.5.self_attn.0.unembed_out.weight": "model-00006-of-00008.safetensors", |
| "model.layers.5.self_attn.1.embed_q.biases": "model-00006-of-00008.safetensors", |
| "model.layers.5.self_attn.1.embed_q.scales": "model-00006-of-00008.safetensors", |
| "model.layers.5.self_attn.1.embed_q.weight": "model-00006-of-00008.safetensors", |
| "model.layers.5.self_attn.1.kv_a_layernorm.weight": "model-00006-of-00008.safetensors", |
| "model.layers.5.self_attn.1.kv_a_proj_with_mqa.biases": "model-00006-of-00008.safetensors", |
| "model.layers.5.self_attn.1.kv_a_proj_with_mqa.scales": "model-00006-of-00008.safetensors", |
| "model.layers.5.self_attn.1.kv_a_proj_with_mqa.weight": "model-00006-of-00008.safetensors", |
| "model.layers.5.self_attn.1.o_proj.biases": "model-00006-of-00008.safetensors", |
| "model.layers.5.self_attn.1.o_proj.scales": "model-00006-of-00008.safetensors", |
| "model.layers.5.self_attn.1.o_proj.weight": "model-00006-of-00008.safetensors", |
| "model.layers.5.self_attn.1.q_a_layernorm.weight": "model-00006-of-00008.safetensors", |
| "model.layers.5.self_attn.1.q_a_proj.biases": "model-00006-of-00008.safetensors", |
| "model.layers.5.self_attn.1.q_a_proj.scales": "model-00006-of-00008.safetensors", |
| "model.layers.5.self_attn.1.q_a_proj.weight": "model-00006-of-00008.safetensors", |
| "model.layers.5.self_attn.1.q_b_proj.biases": "model-00006-of-00008.safetensors", |
| "model.layers.5.self_attn.1.q_b_proj.scales": "model-00006-of-00008.safetensors", |
| "model.layers.5.self_attn.1.q_b_proj.weight": "model-00006-of-00008.safetensors", |
| "model.layers.5.self_attn.1.unembed_out.biases": "model-00006-of-00008.safetensors", |
| "model.layers.5.self_attn.1.unembed_out.scales": "model-00006-of-00008.safetensors", |
| "model.layers.5.self_attn.1.unembed_out.weight": "model-00006-of-00008.safetensors", |
| "model.layers.6.input_layernorm.0.weight": "model-00006-of-00008.safetensors", |
| "model.layers.6.input_layernorm.1.weight": "model-00006-of-00008.safetensors", |
| "model.layers.6.mlp.router.classifier.biases": "model-00006-of-00008.safetensors", |
| "model.layers.6.mlp.router.classifier.scales": "model-00006-of-00008.safetensors", |
| "model.layers.6.mlp.router.classifier.weight": "model-00006-of-00008.safetensors", |
| "model.layers.6.mlp.router.e_score_correction_bias": "model-00006-of-00008.safetensors", |
| "model.layers.6.mlp.switch_mlp.down_proj.biases": "model-00006-of-00008.safetensors", |
| "model.layers.6.mlp.switch_mlp.down_proj.scales": "model-00006-of-00008.safetensors", |
| "model.layers.6.mlp.switch_mlp.down_proj.weight": "model-00006-of-00008.safetensors", |
| "model.layers.6.mlp.switch_mlp.gate_proj.biases": "model-00006-of-00008.safetensors", |
| "model.layers.6.mlp.switch_mlp.gate_proj.scales": "model-00006-of-00008.safetensors", |
| "model.layers.6.mlp.switch_mlp.gate_proj.weight": "model-00006-of-00008.safetensors", |
| "model.layers.6.mlp.switch_mlp.up_proj.biases": "model-00006-of-00008.safetensors", |
| "model.layers.6.mlp.switch_mlp.up_proj.scales": "model-00006-of-00008.safetensors", |
| "model.layers.6.mlp.switch_mlp.up_proj.weight": "model-00006-of-00008.safetensors", |
| "model.layers.6.mlps.0.down_proj.biases": "model-00006-of-00008.safetensors", |
| "model.layers.6.mlps.0.down_proj.scales": "model-00006-of-00008.safetensors", |
| "model.layers.6.mlps.0.down_proj.weight": "model-00006-of-00008.safetensors", |
| "model.layers.6.mlps.0.gate_proj.biases": "model-00006-of-00008.safetensors", |
| "model.layers.6.mlps.0.gate_proj.scales": "model-00006-of-00008.safetensors", |
| "model.layers.6.mlps.0.gate_proj.weight": "model-00006-of-00008.safetensors", |
| "model.layers.6.mlps.0.up_proj.biases": "model-00006-of-00008.safetensors", |
| "model.layers.6.mlps.0.up_proj.scales": "model-00006-of-00008.safetensors", |
| "model.layers.6.mlps.0.up_proj.weight": "model-00006-of-00008.safetensors", |
| "model.layers.6.mlps.1.down_proj.biases": "model-00006-of-00008.safetensors", |
| "model.layers.6.mlps.1.down_proj.scales": "model-00006-of-00008.safetensors", |
| "model.layers.6.mlps.1.down_proj.weight": "model-00006-of-00008.safetensors", |
| "model.layers.6.mlps.1.gate_proj.biases": "model-00006-of-00008.safetensors", |
| "model.layers.6.mlps.1.gate_proj.scales": "model-00006-of-00008.safetensors", |
| "model.layers.6.mlps.1.gate_proj.weight": "model-00006-of-00008.safetensors", |
| "model.layers.6.mlps.1.up_proj.biases": "model-00006-of-00008.safetensors", |
| "model.layers.6.mlps.1.up_proj.scales": "model-00006-of-00008.safetensors", |
| "model.layers.6.mlps.1.up_proj.weight": "model-00006-of-00008.safetensors", |
| "model.layers.6.post_attention_layernorm.0.weight": "model-00006-of-00008.safetensors", |
| "model.layers.6.post_attention_layernorm.1.weight": "model-00006-of-00008.safetensors", |
| "model.layers.6.self_attn.0.embed_q.biases": "model-00006-of-00008.safetensors", |
| "model.layers.6.self_attn.0.embed_q.scales": "model-00006-of-00008.safetensors", |
| "model.layers.6.self_attn.0.embed_q.weight": "model-00006-of-00008.safetensors", |
| "model.layers.6.self_attn.0.kv_a_layernorm.weight": "model-00006-of-00008.safetensors", |
| "model.layers.6.self_attn.0.kv_a_proj_with_mqa.biases": "model-00006-of-00008.safetensors", |
| "model.layers.6.self_attn.0.kv_a_proj_with_mqa.scales": "model-00006-of-00008.safetensors", |
| "model.layers.6.self_attn.0.kv_a_proj_with_mqa.weight": "model-00006-of-00008.safetensors", |
| "model.layers.6.self_attn.0.o_proj.biases": "model-00006-of-00008.safetensors", |
| "model.layers.6.self_attn.0.o_proj.scales": "model-00006-of-00008.safetensors", |
| "model.layers.6.self_attn.0.o_proj.weight": "model-00006-of-00008.safetensors", |
| "model.layers.6.self_attn.0.q_a_layernorm.weight": "model-00006-of-00008.safetensors", |
| "model.layers.6.self_attn.0.q_a_proj.biases": "model-00006-of-00008.safetensors", |
| "model.layers.6.self_attn.0.q_a_proj.scales": "model-00006-of-00008.safetensors", |
| "model.layers.6.self_attn.0.q_a_proj.weight": "model-00006-of-00008.safetensors", |
| "model.layers.6.self_attn.0.q_b_proj.biases": "model-00006-of-00008.safetensors", |
| "model.layers.6.self_attn.0.q_b_proj.scales": "model-00006-of-00008.safetensors", |
| "model.layers.6.self_attn.0.q_b_proj.weight": "model-00006-of-00008.safetensors", |
| "model.layers.6.self_attn.0.unembed_out.biases": "model-00006-of-00008.safetensors", |
| "model.layers.6.self_attn.0.unembed_out.scales": "model-00006-of-00008.safetensors", |
| "model.layers.6.self_attn.0.unembed_out.weight": "model-00006-of-00008.safetensors", |
| "model.layers.6.self_attn.1.embed_q.biases": "model-00006-of-00008.safetensors", |
| "model.layers.6.self_attn.1.embed_q.scales": "model-00006-of-00008.safetensors", |
| "model.layers.6.self_attn.1.embed_q.weight": "model-00006-of-00008.safetensors", |
| "model.layers.6.self_attn.1.kv_a_layernorm.weight": "model-00006-of-00008.safetensors", |
| "model.layers.6.self_attn.1.kv_a_proj_with_mqa.biases": "model-00006-of-00008.safetensors", |
| "model.layers.6.self_attn.1.kv_a_proj_with_mqa.scales": "model-00006-of-00008.safetensors", |
| "model.layers.6.self_attn.1.kv_a_proj_with_mqa.weight": "model-00006-of-00008.safetensors", |
| "model.layers.6.self_attn.1.o_proj.biases": "model-00006-of-00008.safetensors", |
| "model.layers.6.self_attn.1.o_proj.scales": "model-00006-of-00008.safetensors", |
| "model.layers.6.self_attn.1.o_proj.weight": "model-00006-of-00008.safetensors", |
| "model.layers.6.self_attn.1.q_a_layernorm.weight": "model-00006-of-00008.safetensors", |
| "model.layers.6.self_attn.1.q_a_proj.biases": "model-00006-of-00008.safetensors", |
| "model.layers.6.self_attn.1.q_a_proj.scales": "model-00006-of-00008.safetensors", |
| "model.layers.6.self_attn.1.q_a_proj.weight": "model-00006-of-00008.safetensors", |
| "model.layers.6.self_attn.1.q_b_proj.biases": "model-00006-of-00008.safetensors", |
| "model.layers.6.self_attn.1.q_b_proj.scales": "model-00006-of-00008.safetensors", |
| "model.layers.6.self_attn.1.q_b_proj.weight": "model-00006-of-00008.safetensors", |
| "model.layers.6.self_attn.1.unembed_out.biases": "model-00006-of-00008.safetensors", |
| "model.layers.6.self_attn.1.unembed_out.scales": "model-00006-of-00008.safetensors", |
| "model.layers.6.self_attn.1.unembed_out.weight": "model-00006-of-00008.safetensors", |
| "model.layers.7.input_layernorm.0.weight": "model-00007-of-00008.safetensors", |
| "model.layers.7.input_layernorm.1.weight": "model-00007-of-00008.safetensors", |
| "model.layers.7.mlp.router.classifier.biases": "model-00006-of-00008.safetensors", |
| "model.layers.7.mlp.router.classifier.scales": "model-00006-of-00008.safetensors", |
| "model.layers.7.mlp.router.classifier.weight": "model-00006-of-00008.safetensors", |
| "model.layers.7.mlp.router.e_score_correction_bias": "model-00006-of-00008.safetensors", |
| "model.layers.7.mlp.switch_mlp.down_proj.biases": "model-00006-of-00008.safetensors", |
| "model.layers.7.mlp.switch_mlp.down_proj.scales": "model-00006-of-00008.safetensors", |
| "model.layers.7.mlp.switch_mlp.down_proj.weight": "model-00006-of-00008.safetensors", |
| "model.layers.7.mlp.switch_mlp.gate_proj.biases": "model-00006-of-00008.safetensors", |
| "model.layers.7.mlp.switch_mlp.gate_proj.scales": "model-00006-of-00008.safetensors", |
| "model.layers.7.mlp.switch_mlp.gate_proj.weight": "model-00006-of-00008.safetensors", |
| "model.layers.7.mlp.switch_mlp.up_proj.biases": "model-00006-of-00008.safetensors", |
| "model.layers.7.mlp.switch_mlp.up_proj.scales": "model-00006-of-00008.safetensors", |
| "model.layers.7.mlp.switch_mlp.up_proj.weight": "model-00006-of-00008.safetensors", |
| "model.layers.7.mlps.0.down_proj.biases": "model-00007-of-00008.safetensors", |
| "model.layers.7.mlps.0.down_proj.scales": "model-00007-of-00008.safetensors", |
| "model.layers.7.mlps.0.down_proj.weight": "model-00007-of-00008.safetensors", |
| "model.layers.7.mlps.0.gate_proj.biases": "model-00006-of-00008.safetensors", |
| "model.layers.7.mlps.0.gate_proj.scales": "model-00006-of-00008.safetensors", |
| "model.layers.7.mlps.0.gate_proj.weight": "model-00006-of-00008.safetensors", |
| "model.layers.7.mlps.0.up_proj.biases": "model-00007-of-00008.safetensors", |
| "model.layers.7.mlps.0.up_proj.scales": "model-00007-of-00008.safetensors", |
| "model.layers.7.mlps.0.up_proj.weight": "model-00007-of-00008.safetensors", |
| "model.layers.7.mlps.1.down_proj.biases": "model-00007-of-00008.safetensors", |
| "model.layers.7.mlps.1.down_proj.scales": "model-00007-of-00008.safetensors", |
| "model.layers.7.mlps.1.down_proj.weight": "model-00007-of-00008.safetensors", |
| "model.layers.7.mlps.1.gate_proj.biases": "model-00007-of-00008.safetensors", |
| "model.layers.7.mlps.1.gate_proj.scales": "model-00007-of-00008.safetensors", |
| "model.layers.7.mlps.1.gate_proj.weight": "model-00007-of-00008.safetensors", |
| "model.layers.7.mlps.1.up_proj.biases": "model-00007-of-00008.safetensors", |
| "model.layers.7.mlps.1.up_proj.scales": "model-00007-of-00008.safetensors", |
| "model.layers.7.mlps.1.up_proj.weight": "model-00007-of-00008.safetensors", |
| "model.layers.7.post_attention_layernorm.0.weight": "model-00007-of-00008.safetensors", |
| "model.layers.7.post_attention_layernorm.1.weight": "model-00007-of-00008.safetensors", |
| "model.layers.7.self_attn.0.embed_q.biases": "model-00006-of-00008.safetensors", |
| "model.layers.7.self_attn.0.embed_q.scales": "model-00006-of-00008.safetensors", |
| "model.layers.7.self_attn.0.embed_q.weight": "model-00006-of-00008.safetensors", |
| "model.layers.7.self_attn.0.kv_a_layernorm.weight": "model-00006-of-00008.safetensors", |
| "model.layers.7.self_attn.0.kv_a_proj_with_mqa.biases": "model-00006-of-00008.safetensors", |
| "model.layers.7.self_attn.0.kv_a_proj_with_mqa.scales": "model-00006-of-00008.safetensors", |
| "model.layers.7.self_attn.0.kv_a_proj_with_mqa.weight": "model-00006-of-00008.safetensors", |
| "model.layers.7.self_attn.0.o_proj.biases": "model-00006-of-00008.safetensors", |
| "model.layers.7.self_attn.0.o_proj.scales": "model-00006-of-00008.safetensors", |
| "model.layers.7.self_attn.0.o_proj.weight": "model-00006-of-00008.safetensors", |
| "model.layers.7.self_attn.0.q_a_layernorm.weight": "model-00006-of-00008.safetensors", |
| "model.layers.7.self_attn.0.q_a_proj.biases": "model-00006-of-00008.safetensors", |
| "model.layers.7.self_attn.0.q_a_proj.scales": "model-00006-of-00008.safetensors", |
| "model.layers.7.self_attn.0.q_a_proj.weight": "model-00006-of-00008.safetensors", |
| "model.layers.7.self_attn.0.q_b_proj.biases": "model-00006-of-00008.safetensors", |
| "model.layers.7.self_attn.0.q_b_proj.scales": "model-00006-of-00008.safetensors", |
| "model.layers.7.self_attn.0.q_b_proj.weight": "model-00006-of-00008.safetensors", |
| "model.layers.7.self_attn.0.unembed_out.biases": "model-00006-of-00008.safetensors", |
| "model.layers.7.self_attn.0.unembed_out.scales": "model-00006-of-00008.safetensors", |
| "model.layers.7.self_attn.0.unembed_out.weight": "model-00006-of-00008.safetensors", |
| "model.layers.7.self_attn.1.embed_q.biases": "model-00006-of-00008.safetensors", |
| "model.layers.7.self_attn.1.embed_q.scales": "model-00006-of-00008.safetensors", |
| "model.layers.7.self_attn.1.embed_q.weight": "model-00006-of-00008.safetensors", |
| "model.layers.7.self_attn.1.kv_a_layernorm.weight": "model-00006-of-00008.safetensors", |
| "model.layers.7.self_attn.1.kv_a_proj_with_mqa.biases": "model-00006-of-00008.safetensors", |
| "model.layers.7.self_attn.1.kv_a_proj_with_mqa.scales": "model-00006-of-00008.safetensors", |
| "model.layers.7.self_attn.1.kv_a_proj_with_mqa.weight": "model-00006-of-00008.safetensors", |
| "model.layers.7.self_attn.1.o_proj.biases": "model-00006-of-00008.safetensors", |
| "model.layers.7.self_attn.1.o_proj.scales": "model-00006-of-00008.safetensors", |
| "model.layers.7.self_attn.1.o_proj.weight": "model-00006-of-00008.safetensors", |
| "model.layers.7.self_attn.1.q_a_layernorm.weight": "model-00006-of-00008.safetensors", |
| "model.layers.7.self_attn.1.q_a_proj.biases": "model-00006-of-00008.safetensors", |
| "model.layers.7.self_attn.1.q_a_proj.scales": "model-00006-of-00008.safetensors", |
| "model.layers.7.self_attn.1.q_a_proj.weight": "model-00006-of-00008.safetensors", |
| "model.layers.7.self_attn.1.q_b_proj.biases": "model-00006-of-00008.safetensors", |
| "model.layers.7.self_attn.1.q_b_proj.scales": "model-00006-of-00008.safetensors", |
| "model.layers.7.self_attn.1.q_b_proj.weight": "model-00006-of-00008.safetensors", |
| "model.layers.7.self_attn.1.unembed_out.biases": "model-00006-of-00008.safetensors", |
| "model.layers.7.self_attn.1.unembed_out.scales": "model-00006-of-00008.safetensors", |
| "model.layers.7.self_attn.1.unembed_out.weight": "model-00006-of-00008.safetensors", |
| "model.layers.8.input_layernorm.0.weight": "model-00007-of-00008.safetensors", |
| "model.layers.8.input_layernorm.1.weight": "model-00007-of-00008.safetensors", |
| "model.layers.8.mlp.router.classifier.biases": "model-00007-of-00008.safetensors", |
| "model.layers.8.mlp.router.classifier.scales": "model-00007-of-00008.safetensors", |
| "model.layers.8.mlp.router.classifier.weight": "model-00007-of-00008.safetensors", |
| "model.layers.8.mlp.router.e_score_correction_bias": "model-00007-of-00008.safetensors", |
| "model.layers.8.mlp.switch_mlp.down_proj.biases": "model-00007-of-00008.safetensors", |
| "model.layers.8.mlp.switch_mlp.down_proj.scales": "model-00007-of-00008.safetensors", |
| "model.layers.8.mlp.switch_mlp.down_proj.weight": "model-00007-of-00008.safetensors", |
| "model.layers.8.mlp.switch_mlp.gate_proj.biases": "model-00007-of-00008.safetensors", |
| "model.layers.8.mlp.switch_mlp.gate_proj.scales": "model-00007-of-00008.safetensors", |
| "model.layers.8.mlp.switch_mlp.gate_proj.weight": "model-00007-of-00008.safetensors", |
| "model.layers.8.mlp.switch_mlp.up_proj.biases": "model-00007-of-00008.safetensors", |
| "model.layers.8.mlp.switch_mlp.up_proj.scales": "model-00007-of-00008.safetensors", |
| "model.layers.8.mlp.switch_mlp.up_proj.weight": "model-00007-of-00008.safetensors", |
| "model.layers.8.mlps.0.down_proj.biases": "model-00007-of-00008.safetensors", |
| "model.layers.8.mlps.0.down_proj.scales": "model-00007-of-00008.safetensors", |
| "model.layers.8.mlps.0.down_proj.weight": "model-00007-of-00008.safetensors", |
| "model.layers.8.mlps.0.gate_proj.biases": "model-00007-of-00008.safetensors", |
| "model.layers.8.mlps.0.gate_proj.scales": "model-00007-of-00008.safetensors", |
| "model.layers.8.mlps.0.gate_proj.weight": "model-00007-of-00008.safetensors", |
| "model.layers.8.mlps.0.up_proj.biases": "model-00007-of-00008.safetensors", |
| "model.layers.8.mlps.0.up_proj.scales": "model-00007-of-00008.safetensors", |
| "model.layers.8.mlps.0.up_proj.weight": "model-00007-of-00008.safetensors", |
| "model.layers.8.mlps.1.down_proj.biases": "model-00007-of-00008.safetensors", |
| "model.layers.8.mlps.1.down_proj.scales": "model-00007-of-00008.safetensors", |
| "model.layers.8.mlps.1.down_proj.weight": "model-00007-of-00008.safetensors", |
| "model.layers.8.mlps.1.gate_proj.biases": "model-00007-of-00008.safetensors", |
| "model.layers.8.mlps.1.gate_proj.scales": "model-00007-of-00008.safetensors", |
| "model.layers.8.mlps.1.gate_proj.weight": "model-00007-of-00008.safetensors", |
| "model.layers.8.mlps.1.up_proj.biases": "model-00007-of-00008.safetensors", |
| "model.layers.8.mlps.1.up_proj.scales": "model-00007-of-00008.safetensors", |
| "model.layers.8.mlps.1.up_proj.weight": "model-00007-of-00008.safetensors", |
| "model.layers.8.post_attention_layernorm.0.weight": "model-00007-of-00008.safetensors", |
| "model.layers.8.post_attention_layernorm.1.weight": "model-00007-of-00008.safetensors", |
| "model.layers.8.self_attn.0.embed_q.biases": "model-00007-of-00008.safetensors", |
| "model.layers.8.self_attn.0.embed_q.scales": "model-00007-of-00008.safetensors", |
| "model.layers.8.self_attn.0.embed_q.weight": "model-00007-of-00008.safetensors", |
| "model.layers.8.self_attn.0.kv_a_layernorm.weight": "model-00007-of-00008.safetensors", |
| "model.layers.8.self_attn.0.kv_a_proj_with_mqa.biases": "model-00007-of-00008.safetensors", |
| "model.layers.8.self_attn.0.kv_a_proj_with_mqa.scales": "model-00007-of-00008.safetensors", |
| "model.layers.8.self_attn.0.kv_a_proj_with_mqa.weight": "model-00007-of-00008.safetensors", |
| "model.layers.8.self_attn.0.o_proj.biases": "model-00007-of-00008.safetensors", |
| "model.layers.8.self_attn.0.o_proj.scales": "model-00007-of-00008.safetensors", |
| "model.layers.8.self_attn.0.o_proj.weight": "model-00007-of-00008.safetensors", |
| "model.layers.8.self_attn.0.q_a_layernorm.weight": "model-00007-of-00008.safetensors", |
| "model.layers.8.self_attn.0.q_a_proj.biases": "model-00007-of-00008.safetensors", |
| "model.layers.8.self_attn.0.q_a_proj.scales": "model-00007-of-00008.safetensors", |
| "model.layers.8.self_attn.0.q_a_proj.weight": "model-00007-of-00008.safetensors", |
| "model.layers.8.self_attn.0.q_b_proj.biases": "model-00007-of-00008.safetensors", |
| "model.layers.8.self_attn.0.q_b_proj.scales": "model-00007-of-00008.safetensors", |
| "model.layers.8.self_attn.0.q_b_proj.weight": "model-00007-of-00008.safetensors", |
| "model.layers.8.self_attn.0.unembed_out.biases": "model-00007-of-00008.safetensors", |
| "model.layers.8.self_attn.0.unembed_out.scales": "model-00007-of-00008.safetensors", |
| "model.layers.8.self_attn.0.unembed_out.weight": "model-00007-of-00008.safetensors", |
| "model.layers.8.self_attn.1.embed_q.biases": "model-00007-of-00008.safetensors", |
| "model.layers.8.self_attn.1.embed_q.scales": "model-00007-of-00008.safetensors", |
| "model.layers.8.self_attn.1.embed_q.weight": "model-00007-of-00008.safetensors", |
| "model.layers.8.self_attn.1.kv_a_layernorm.weight": "model-00007-of-00008.safetensors", |
| "model.layers.8.self_attn.1.kv_a_proj_with_mqa.biases": "model-00007-of-00008.safetensors", |
| "model.layers.8.self_attn.1.kv_a_proj_with_mqa.scales": "model-00007-of-00008.safetensors", |
| "model.layers.8.self_attn.1.kv_a_proj_with_mqa.weight": "model-00007-of-00008.safetensors", |
| "model.layers.8.self_attn.1.o_proj.biases": "model-00007-of-00008.safetensors", |
| "model.layers.8.self_attn.1.o_proj.scales": "model-00007-of-00008.safetensors", |
| "model.layers.8.self_attn.1.o_proj.weight": "model-00007-of-00008.safetensors", |
| "model.layers.8.self_attn.1.q_a_layernorm.weight": "model-00007-of-00008.safetensors", |
| "model.layers.8.self_attn.1.q_a_proj.biases": "model-00007-of-00008.safetensors", |
| "model.layers.8.self_attn.1.q_a_proj.scales": "model-00007-of-00008.safetensors", |
| "model.layers.8.self_attn.1.q_a_proj.weight": "model-00007-of-00008.safetensors", |
| "model.layers.8.self_attn.1.q_b_proj.biases": "model-00007-of-00008.safetensors", |
| "model.layers.8.self_attn.1.q_b_proj.scales": "model-00007-of-00008.safetensors", |
| "model.layers.8.self_attn.1.q_b_proj.weight": "model-00007-of-00008.safetensors", |
| "model.layers.8.self_attn.1.unembed_out.biases": "model-00007-of-00008.safetensors", |
| "model.layers.8.self_attn.1.unembed_out.scales": "model-00007-of-00008.safetensors", |
| "model.layers.8.self_attn.1.unembed_out.weight": "model-00007-of-00008.safetensors", |
| "model.layers.9.input_layernorm.0.weight": "model-00007-of-00008.safetensors", |
| "model.layers.9.input_layernorm.1.weight": "model-00007-of-00008.safetensors", |
| "model.layers.9.mlp.router.classifier.biases": "model-00007-of-00008.safetensors", |
| "model.layers.9.mlp.router.classifier.scales": "model-00007-of-00008.safetensors", |
| "model.layers.9.mlp.router.classifier.weight": "model-00007-of-00008.safetensors", |
| "model.layers.9.mlp.router.e_score_correction_bias": "model-00007-of-00008.safetensors", |
| "model.layers.9.mlp.switch_mlp.down_proj.biases": "model-00007-of-00008.safetensors", |
| "model.layers.9.mlp.switch_mlp.down_proj.scales": "model-00007-of-00008.safetensors", |
| "model.layers.9.mlp.switch_mlp.down_proj.weight": "model-00007-of-00008.safetensors", |
| "model.layers.9.mlp.switch_mlp.gate_proj.biases": "model-00007-of-00008.safetensors", |
| "model.layers.9.mlp.switch_mlp.gate_proj.scales": "model-00007-of-00008.safetensors", |
| "model.layers.9.mlp.switch_mlp.gate_proj.weight": "model-00007-of-00008.safetensors", |
| "model.layers.9.mlp.switch_mlp.up_proj.biases": "model-00007-of-00008.safetensors", |
| "model.layers.9.mlp.switch_mlp.up_proj.scales": "model-00007-of-00008.safetensors", |
| "model.layers.9.mlp.switch_mlp.up_proj.weight": "model-00007-of-00008.safetensors", |
| "model.layers.9.mlps.0.down_proj.biases": "model-00007-of-00008.safetensors", |
| "model.layers.9.mlps.0.down_proj.scales": "model-00007-of-00008.safetensors", |
| "model.layers.9.mlps.0.down_proj.weight": "model-00007-of-00008.safetensors", |
| "model.layers.9.mlps.0.gate_proj.biases": "model-00007-of-00008.safetensors", |
| "model.layers.9.mlps.0.gate_proj.scales": "model-00007-of-00008.safetensors", |
| "model.layers.9.mlps.0.gate_proj.weight": "model-00007-of-00008.safetensors", |
| "model.layers.9.mlps.0.up_proj.biases": "model-00007-of-00008.safetensors", |
| "model.layers.9.mlps.0.up_proj.scales": "model-00007-of-00008.safetensors", |
| "model.layers.9.mlps.0.up_proj.weight": "model-00007-of-00008.safetensors", |
| "model.layers.9.mlps.1.down_proj.biases": "model-00007-of-00008.safetensors", |
| "model.layers.9.mlps.1.down_proj.scales": "model-00007-of-00008.safetensors", |
| "model.layers.9.mlps.1.down_proj.weight": "model-00007-of-00008.safetensors", |
| "model.layers.9.mlps.1.gate_proj.biases": "model-00007-of-00008.safetensors", |
| "model.layers.9.mlps.1.gate_proj.scales": "model-00007-of-00008.safetensors", |
| "model.layers.9.mlps.1.gate_proj.weight": "model-00007-of-00008.safetensors", |
| "model.layers.9.mlps.1.up_proj.biases": "model-00007-of-00008.safetensors", |
| "model.layers.9.mlps.1.up_proj.scales": "model-00007-of-00008.safetensors", |
| "model.layers.9.mlps.1.up_proj.weight": "model-00007-of-00008.safetensors", |
| "model.layers.9.post_attention_layernorm.0.weight": "model-00007-of-00008.safetensors", |
| "model.layers.9.post_attention_layernorm.1.weight": "model-00007-of-00008.safetensors", |
| "model.layers.9.self_attn.0.embed_q.biases": "model-00007-of-00008.safetensors", |
| "model.layers.9.self_attn.0.embed_q.scales": "model-00007-of-00008.safetensors", |
| "model.layers.9.self_attn.0.embed_q.weight": "model-00007-of-00008.safetensors", |
| "model.layers.9.self_attn.0.kv_a_layernorm.weight": "model-00007-of-00008.safetensors", |
| "model.layers.9.self_attn.0.kv_a_proj_with_mqa.biases": "model-00007-of-00008.safetensors", |
| "model.layers.9.self_attn.0.kv_a_proj_with_mqa.scales": "model-00007-of-00008.safetensors", |
| "model.layers.9.self_attn.0.kv_a_proj_with_mqa.weight": "model-00007-of-00008.safetensors", |
| "model.layers.9.self_attn.0.o_proj.biases": "model-00007-of-00008.safetensors", |
| "model.layers.9.self_attn.0.o_proj.scales": "model-00007-of-00008.safetensors", |
| "model.layers.9.self_attn.0.o_proj.weight": "model-00007-of-00008.safetensors", |
| "model.layers.9.self_attn.0.q_a_layernorm.weight": "model-00007-of-00008.safetensors", |
| "model.layers.9.self_attn.0.q_a_proj.biases": "model-00007-of-00008.safetensors", |
| "model.layers.9.self_attn.0.q_a_proj.scales": "model-00007-of-00008.safetensors", |
| "model.layers.9.self_attn.0.q_a_proj.weight": "model-00007-of-00008.safetensors", |
| "model.layers.9.self_attn.0.q_b_proj.biases": "model-00007-of-00008.safetensors", |
| "model.layers.9.self_attn.0.q_b_proj.scales": "model-00007-of-00008.safetensors", |
| "model.layers.9.self_attn.0.q_b_proj.weight": "model-00007-of-00008.safetensors", |
| "model.layers.9.self_attn.0.unembed_out.biases": "model-00007-of-00008.safetensors", |
| "model.layers.9.self_attn.0.unembed_out.scales": "model-00007-of-00008.safetensors", |
| "model.layers.9.self_attn.0.unembed_out.weight": "model-00007-of-00008.safetensors", |
| "model.layers.9.self_attn.1.embed_q.biases": "model-00007-of-00008.safetensors", |
| "model.layers.9.self_attn.1.embed_q.scales": "model-00007-of-00008.safetensors", |
| "model.layers.9.self_attn.1.embed_q.weight": "model-00007-of-00008.safetensors", |
| "model.layers.9.self_attn.1.kv_a_layernorm.weight": "model-00007-of-00008.safetensors", |
| "model.layers.9.self_attn.1.kv_a_proj_with_mqa.biases": "model-00007-of-00008.safetensors", |
| "model.layers.9.self_attn.1.kv_a_proj_with_mqa.scales": "model-00007-of-00008.safetensors", |
| "model.layers.9.self_attn.1.kv_a_proj_with_mqa.weight": "model-00007-of-00008.safetensors", |
| "model.layers.9.self_attn.1.o_proj.biases": "model-00007-of-00008.safetensors", |
| "model.layers.9.self_attn.1.o_proj.scales": "model-00007-of-00008.safetensors", |
| "model.layers.9.self_attn.1.o_proj.weight": "model-00007-of-00008.safetensors", |
| "model.layers.9.self_attn.1.q_a_layernorm.weight": "model-00007-of-00008.safetensors", |
| "model.layers.9.self_attn.1.q_a_proj.biases": "model-00007-of-00008.safetensors", |
| "model.layers.9.self_attn.1.q_a_proj.scales": "model-00007-of-00008.safetensors", |
| "model.layers.9.self_attn.1.q_a_proj.weight": "model-00007-of-00008.safetensors", |
| "model.layers.9.self_attn.1.q_b_proj.biases": "model-00007-of-00008.safetensors", |
| "model.layers.9.self_attn.1.q_b_proj.scales": "model-00007-of-00008.safetensors", |
| "model.layers.9.self_attn.1.q_b_proj.weight": "model-00007-of-00008.safetensors", |
| "model.layers.9.self_attn.1.unembed_out.biases": "model-00007-of-00008.safetensors", |
| "model.layers.9.self_attn.1.unembed_out.scales": "model-00007-of-00008.safetensors", |
| "model.layers.9.self_attn.1.unembed_out.weight": "model-00007-of-00008.safetensors", |
| "model.ngram_embeddings.embedders.0.biases": "model-00001-of-00008.safetensors", |
| "model.ngram_embeddings.embedders.0.scales": "model-00001-of-00008.safetensors", |
| "model.ngram_embeddings.embedders.0.weight": "model-00001-of-00008.safetensors", |
| "model.ngram_embeddings.embedders.1.biases": "model-00001-of-00008.safetensors", |
| "model.ngram_embeddings.embedders.1.scales": "model-00001-of-00008.safetensors", |
| "model.ngram_embeddings.embedders.1.weight": "model-00001-of-00008.safetensors", |
| "model.ngram_embeddings.embedders.10.biases": "model-00004-of-00008.safetensors", |
| "model.ngram_embeddings.embedders.10.scales": "model-00004-of-00008.safetensors", |
| "model.ngram_embeddings.embedders.10.weight": "model-00004-of-00008.safetensors", |
| "model.ngram_embeddings.embedders.11.biases": "model-00004-of-00008.safetensors", |
| "model.ngram_embeddings.embedders.11.scales": "model-00004-of-00008.safetensors", |
| "model.ngram_embeddings.embedders.11.weight": "model-00004-of-00008.safetensors", |
| "model.ngram_embeddings.embedders.2.biases": "model-00001-of-00008.safetensors", |
| "model.ngram_embeddings.embedders.2.scales": "model-00001-of-00008.safetensors", |
| "model.ngram_embeddings.embedders.2.weight": "model-00001-of-00008.safetensors", |
| "model.ngram_embeddings.embedders.3.biases": "model-00002-of-00008.safetensors", |
| "model.ngram_embeddings.embedders.3.scales": "model-00002-of-00008.safetensors", |
| "model.ngram_embeddings.embedders.3.weight": "model-00002-of-00008.safetensors", |
| "model.ngram_embeddings.embedders.4.biases": "model-00002-of-00008.safetensors", |
| "model.ngram_embeddings.embedders.4.scales": "model-00002-of-00008.safetensors", |
| "model.ngram_embeddings.embedders.4.weight": "model-00002-of-00008.safetensors", |
| "model.ngram_embeddings.embedders.5.biases": "model-00002-of-00008.safetensors", |
| "model.ngram_embeddings.embedders.5.scales": "model-00002-of-00008.safetensors", |
| "model.ngram_embeddings.embedders.5.weight": "model-00002-of-00008.safetensors", |
| "model.ngram_embeddings.embedders.6.biases": "model-00003-of-00008.safetensors", |
| "model.ngram_embeddings.embedders.6.scales": "model-00003-of-00008.safetensors", |
| "model.ngram_embeddings.embedders.6.weight": "model-00003-of-00008.safetensors", |
| "model.ngram_embeddings.embedders.7.biases": "model-00003-of-00008.safetensors", |
| "model.ngram_embeddings.embedders.7.scales": "model-00003-of-00008.safetensors", |
| "model.ngram_embeddings.embedders.7.weight": "model-00003-of-00008.safetensors", |
| "model.ngram_embeddings.embedders.8.biases": "model-00003-of-00008.safetensors", |
| "model.ngram_embeddings.embedders.8.scales": "model-00003-of-00008.safetensors", |
| "model.ngram_embeddings.embedders.8.weight": "model-00003-of-00008.safetensors", |
| "model.ngram_embeddings.embedders.9.biases": "model-00004-of-00008.safetensors", |
| "model.ngram_embeddings.embedders.9.scales": "model-00004-of-00008.safetensors", |
| "model.ngram_embeddings.embedders.9.weight": "model-00004-of-00008.safetensors", |
| "model.ngram_embeddings.post_projs.0.biases": "model-00004-of-00008.safetensors", |
| "model.ngram_embeddings.post_projs.0.scales": "model-00004-of-00008.safetensors", |
| "model.ngram_embeddings.post_projs.0.weight": "model-00004-of-00008.safetensors", |
| "model.ngram_embeddings.post_projs.1.biases": "model-00004-of-00008.safetensors", |
| "model.ngram_embeddings.post_projs.1.scales": "model-00004-of-00008.safetensors", |
| "model.ngram_embeddings.post_projs.1.weight": "model-00004-of-00008.safetensors", |
| "model.ngram_embeddings.post_projs.10.biases": "model-00004-of-00008.safetensors", |
| "model.ngram_embeddings.post_projs.10.scales": "model-00004-of-00008.safetensors", |
| "model.ngram_embeddings.post_projs.10.weight": "model-00004-of-00008.safetensors", |
| "model.ngram_embeddings.post_projs.11.biases": "model-00004-of-00008.safetensors", |
| "model.ngram_embeddings.post_projs.11.scales": "model-00004-of-00008.safetensors", |
| "model.ngram_embeddings.post_projs.11.weight": "model-00004-of-00008.safetensors", |
| "model.ngram_embeddings.post_projs.2.biases": "model-00004-of-00008.safetensors", |
| "model.ngram_embeddings.post_projs.2.scales": "model-00004-of-00008.safetensors", |
| "model.ngram_embeddings.post_projs.2.weight": "model-00004-of-00008.safetensors", |
| "model.ngram_embeddings.post_projs.3.biases": "model-00004-of-00008.safetensors", |
| "model.ngram_embeddings.post_projs.3.scales": "model-00004-of-00008.safetensors", |
| "model.ngram_embeddings.post_projs.3.weight": "model-00004-of-00008.safetensors", |
| "model.ngram_embeddings.post_projs.4.biases": "model-00004-of-00008.safetensors", |
| "model.ngram_embeddings.post_projs.4.scales": "model-00004-of-00008.safetensors", |
| "model.ngram_embeddings.post_projs.4.weight": "model-00004-of-00008.safetensors", |
| "model.ngram_embeddings.post_projs.5.biases": "model-00004-of-00008.safetensors", |
| "model.ngram_embeddings.post_projs.5.scales": "model-00004-of-00008.safetensors", |
| "model.ngram_embeddings.post_projs.5.weight": "model-00004-of-00008.safetensors", |
| "model.ngram_embeddings.post_projs.6.biases": "model-00004-of-00008.safetensors", |
| "model.ngram_embeddings.post_projs.6.scales": "model-00004-of-00008.safetensors", |
| "model.ngram_embeddings.post_projs.6.weight": "model-00004-of-00008.safetensors", |
| "model.ngram_embeddings.post_projs.7.biases": "model-00004-of-00008.safetensors", |
| "model.ngram_embeddings.post_projs.7.scales": "model-00004-of-00008.safetensors", |
| "model.ngram_embeddings.post_projs.7.weight": "model-00004-of-00008.safetensors", |
| "model.ngram_embeddings.post_projs.8.biases": "model-00004-of-00008.safetensors", |
| "model.ngram_embeddings.post_projs.8.scales": "model-00004-of-00008.safetensors", |
| "model.ngram_embeddings.post_projs.8.weight": "model-00004-of-00008.safetensors", |
| "model.ngram_embeddings.post_projs.9.biases": "model-00004-of-00008.safetensors", |
| "model.ngram_embeddings.post_projs.9.scales": "model-00004-of-00008.safetensors", |
| "model.ngram_embeddings.post_projs.9.weight": "model-00004-of-00008.safetensors", |
| "model.ngram_embeddings.word_embeddings.biases": "model-00001-of-00008.safetensors", |
| "model.ngram_embeddings.word_embeddings.scales": "model-00001-of-00008.safetensors", |
| "model.ngram_embeddings.word_embeddings.weight": "model-00001-of-00008.safetensors", |
| "model.norm.weight": "model-00008-of-00008.safetensors" |
| } |
| } |