| { | |
| "mapped": [ | |
| "model.blocks.0.attn.W_K_cmp.weight", | |
| "model.blocks.0.attn.W_K_sel.weight", | |
| "model.blocks.0.attn.W_K_win.weight", | |
| "model.blocks.0.attn.W_Q.weight", | |
| "model.blocks.0.attn.W_V_cmp.weight", | |
| "model.blocks.0.attn.W_V_sel.weight", | |
| "model.blocks.0.attn.W_V_win.weight", | |
| "model.blocks.0.attn.out.weight", | |
| "model.blocks.0.mlp.fc1.weight", | |
| "model.blocks.0.mlp.fc2.weight", | |
| "model.blocks.0.norm1.weight", | |
| "model.blocks.0.norm2.weight", | |
| "model.blocks.1.attn.W_K_cmp.weight", | |
| "model.blocks.1.attn.W_K_sel.weight", | |
| "model.blocks.1.attn.W_K_win.weight", | |
| "model.blocks.1.attn.W_Q.weight", | |
| "model.blocks.1.attn.W_V_cmp.weight", | |
| "model.blocks.1.attn.W_V_sel.weight", | |
| "model.blocks.1.attn.W_V_win.weight", | |
| "model.blocks.1.attn.out.weight", | |
| "model.blocks.1.mlp.fc1.weight", | |
| "model.blocks.1.mlp.fc2.weight", | |
| "model.blocks.1.norm1.weight", | |
| "model.blocks.1.norm2.weight", | |
| "model.blocks.10.attn.W_K_cmp.weight", | |
| "model.blocks.10.attn.W_K_sel.weight", | |
| "model.blocks.10.attn.W_K_win.weight", | |
| "model.blocks.10.attn.W_Q.weight", | |
| "model.blocks.10.attn.W_V_cmp.weight", | |
| "model.blocks.10.attn.W_V_sel.weight", | |
| "model.blocks.10.attn.W_V_win.weight", | |
| "model.blocks.10.attn.out.weight", | |
| "model.blocks.10.mlp.fc1.weight", | |
| "model.blocks.10.mlp.fc2.weight", | |
| "model.blocks.10.norm1.weight", | |
| "model.blocks.10.norm2.weight", | |
| "model.blocks.11.attn.W_K_cmp.weight", | |
| "model.blocks.11.attn.W_K_sel.weight", | |
| "model.blocks.11.attn.W_K_win.weight", | |
| "model.blocks.11.attn.W_Q.weight", | |
| "model.blocks.11.attn.W_V_cmp.weight", | |
| "model.blocks.11.attn.W_V_sel.weight", | |
| "model.blocks.11.attn.W_V_win.weight", | |
| "model.blocks.11.attn.out.weight", | |
| "model.blocks.11.mlp.fc1.weight", | |
| "model.blocks.11.mlp.fc2.weight", | |
| "model.blocks.11.norm1.weight", | |
| "model.blocks.11.norm2.weight", | |
| "model.blocks.2.attn.W_K_cmp.weight", | |
| "model.blocks.2.attn.W_K_sel.weight", | |
| "model.blocks.2.attn.W_K_win.weight", | |
| "model.blocks.2.attn.W_Q.weight", | |
| "model.blocks.2.attn.W_V_cmp.weight", | |
| "model.blocks.2.attn.W_V_sel.weight", | |
| "model.blocks.2.attn.W_V_win.weight", | |
| "model.blocks.2.attn.out.weight", | |
| "model.blocks.2.mlp.fc1.weight", | |
| "model.blocks.2.mlp.fc2.weight", | |
| "model.blocks.2.norm1.weight", | |
| "model.blocks.2.norm2.weight", | |
| "model.blocks.3.attn.W_K_cmp.weight", | |
| "model.blocks.3.attn.W_K_sel.weight", | |
| "model.blocks.3.attn.W_K_win.weight", | |
| "model.blocks.3.attn.W_Q.weight", | |
| "model.blocks.3.attn.W_V_cmp.weight", | |
| "model.blocks.3.attn.W_V_sel.weight", | |
| "model.blocks.3.attn.W_V_win.weight", | |
| "model.blocks.3.attn.out.weight", | |
| "model.blocks.3.mlp.fc1.weight", | |
| "model.blocks.3.mlp.fc2.weight", | |
| "model.blocks.3.norm1.weight", | |
| "model.blocks.3.norm2.weight", | |
| "model.blocks.4.attn.W_K_cmp.weight", | |
| "model.blocks.4.attn.W_K_sel.weight", | |
| "model.blocks.4.attn.W_K_win.weight", | |
| "model.blocks.4.attn.W_Q.weight", | |
| "model.blocks.4.attn.W_V_cmp.weight", | |
| "model.blocks.4.attn.W_V_sel.weight", | |
| "model.blocks.4.attn.W_V_win.weight", | |
| "model.blocks.4.attn.out.weight", | |
| "model.blocks.4.mlp.fc1.weight", | |
| "model.blocks.4.mlp.fc2.weight", | |
| "model.blocks.4.norm1.weight", | |
| "model.blocks.4.norm2.weight", | |
| "model.blocks.5.attn.W_K_cmp.weight", | |
| "model.blocks.5.attn.W_K_sel.weight", | |
| "model.blocks.5.attn.W_K_win.weight", | |
| "model.blocks.5.attn.W_Q.weight", | |
| "model.blocks.5.attn.W_V_cmp.weight", | |
| "model.blocks.5.attn.W_V_sel.weight", | |
| "model.blocks.5.attn.W_V_win.weight", | |
| "model.blocks.5.attn.out.weight", | |
| "model.blocks.5.mlp.fc1.weight", | |
| "model.blocks.5.mlp.fc2.weight", | |
| "model.blocks.5.norm1.weight", | |
| "model.blocks.5.norm2.weight", | |
| "model.blocks.6.attn.W_K_cmp.weight", | |
| "model.blocks.6.attn.W_K_sel.weight", | |
| "model.blocks.6.attn.W_K_win.weight", | |
| "model.blocks.6.attn.W_Q.weight", | |
| "model.blocks.6.attn.W_V_cmp.weight", | |
| "model.blocks.6.attn.W_V_sel.weight", | |
| "model.blocks.6.attn.W_V_win.weight", | |
| "model.blocks.6.attn.out.weight", | |
| "model.blocks.6.mlp.fc1.weight", | |
| "model.blocks.6.mlp.fc2.weight", | |
| "model.blocks.6.norm1.weight", | |
| "model.blocks.6.norm2.weight", | |
| "model.blocks.7.attn.W_K_cmp.weight", | |
| "model.blocks.7.attn.W_K_sel.weight", | |
| "model.blocks.7.attn.W_K_win.weight", | |
| "model.blocks.7.attn.W_Q.weight", | |
| "model.blocks.7.attn.W_V_cmp.weight", | |
| "model.blocks.7.attn.W_V_sel.weight", | |
| "model.blocks.7.attn.W_V_win.weight", | |
| "model.blocks.7.attn.out.weight", | |
| "model.blocks.7.mlp.fc1.weight", | |
| "model.blocks.7.mlp.fc2.weight", | |
| "model.blocks.7.norm1.weight", | |
| "model.blocks.7.norm2.weight", | |
| "model.blocks.8.attn.W_K_cmp.weight", | |
| "model.blocks.8.attn.W_K_sel.weight", | |
| "model.blocks.8.attn.W_K_win.weight", | |
| "model.blocks.8.attn.W_Q.weight", | |
| "model.blocks.8.attn.W_V_cmp.weight", | |
| "model.blocks.8.attn.W_V_sel.weight", | |
| "model.blocks.8.attn.W_V_win.weight", | |
| "model.blocks.8.attn.out.weight", | |
| "model.blocks.8.mlp.fc1.weight", | |
| "model.blocks.8.mlp.fc2.weight", | |
| "model.blocks.8.norm1.weight", | |
| "model.blocks.8.norm2.weight", | |
| "model.blocks.9.attn.W_K_cmp.weight", | |
| "model.blocks.9.attn.W_K_sel.weight", | |
| "model.blocks.9.attn.W_K_win.weight", | |
| "model.blocks.9.attn.W_Q.weight", | |
| "model.blocks.9.attn.W_V_cmp.weight", | |
| "model.blocks.9.attn.W_V_sel.weight", | |
| "model.blocks.9.attn.W_V_win.weight", | |
| "model.blocks.9.attn.out.weight", | |
| "model.blocks.9.mlp.fc1.weight", | |
| "model.blocks.9.mlp.fc2.weight", | |
| "model.blocks.9.norm1.weight", | |
| "model.blocks.9.norm2.weight", | |
| "model.embed.weight", | |
| "model.lm_head.weight", | |
| "model.norm.weight" | |
| ], | |
| "missing": [ | |
| "model.blocks.0.attn.gate_fc1.bias", | |
| "model.blocks.0.attn.gate_fc1.weight", | |
| "model.blocks.0.attn.gate_fc2.bias", | |
| "model.blocks.0.attn.gate_fc2.weight", | |
| "model.blocks.1.attn.gate_fc1.bias", | |
| "model.blocks.1.attn.gate_fc1.weight", | |
| "model.blocks.1.attn.gate_fc2.bias", | |
| "model.blocks.1.attn.gate_fc2.weight", | |
| "model.blocks.10.attn.gate_fc1.bias", | |
| "model.blocks.10.attn.gate_fc1.weight", | |
| "model.blocks.10.attn.gate_fc2.bias", | |
| "model.blocks.10.attn.gate_fc2.weight", | |
| "model.blocks.11.attn.gate_fc1.bias", | |
| "model.blocks.11.attn.gate_fc1.weight", | |
| "model.blocks.11.attn.gate_fc2.bias", | |
| "model.blocks.11.attn.gate_fc2.weight", | |
| "model.blocks.2.attn.gate_fc1.bias", | |
| "model.blocks.2.attn.gate_fc1.weight", | |
| "model.blocks.2.attn.gate_fc2.bias", | |
| "model.blocks.2.attn.gate_fc2.weight", | |
| "model.blocks.3.attn.gate_fc1.bias", | |
| "model.blocks.3.attn.gate_fc1.weight", | |
| "model.blocks.3.attn.gate_fc2.bias", | |
| "model.blocks.3.attn.gate_fc2.weight", | |
| "model.blocks.4.attn.gate_fc1.bias", | |
| "model.blocks.4.attn.gate_fc1.weight", | |
| "model.blocks.4.attn.gate_fc2.bias", | |
| "model.blocks.4.attn.gate_fc2.weight", | |
| "model.blocks.5.attn.gate_fc1.bias", | |
| "model.blocks.5.attn.gate_fc1.weight", | |
| "model.blocks.5.attn.gate_fc2.bias", | |
| "model.blocks.5.attn.gate_fc2.weight", | |
| "model.blocks.6.attn.gate_fc1.bias", | |
| "model.blocks.6.attn.gate_fc1.weight", | |
| "model.blocks.6.attn.gate_fc2.bias", | |
| "model.blocks.6.attn.gate_fc2.weight", | |
| "model.blocks.7.attn.gate_fc1.bias", | |
| "model.blocks.7.attn.gate_fc1.weight", | |
| "model.blocks.7.attn.gate_fc2.bias", | |
| "model.blocks.7.attn.gate_fc2.weight", | |
| "model.blocks.8.attn.gate_fc1.bias", | |
| "model.blocks.8.attn.gate_fc1.weight", | |
| "model.blocks.8.attn.gate_fc2.bias", | |
| "model.blocks.8.attn.gate_fc2.weight", | |
| "model.blocks.9.attn.gate_fc1.bias", | |
| "model.blocks.9.attn.gate_fc1.weight", | |
| "model.blocks.9.attn.gate_fc2.bias", | |
| "model.blocks.9.attn.gate_fc2.weight", | |
| "model.norm.bias" | |
| ], | |
| "extra": [ | |
| "blocks.0.attn.gate.fc1.bias", | |
| "blocks.0.attn.gate.fc1.weight", | |
| "blocks.0.attn.gate.fc2.bias", | |
| "blocks.0.attn.gate.fc2.weight", | |
| "blocks.1.attn.gate.fc1.bias", | |
| "blocks.1.attn.gate.fc1.weight", | |
| "blocks.1.attn.gate.fc2.bias", | |
| "blocks.1.attn.gate.fc2.weight", | |
| "blocks.10.attn.gate.fc1.bias", | |
| "blocks.10.attn.gate.fc1.weight", | |
| "blocks.10.attn.gate.fc2.bias", | |
| "blocks.10.attn.gate.fc2.weight", | |
| "blocks.11.attn.gate.fc1.bias", | |
| "blocks.11.attn.gate.fc1.weight", | |
| "blocks.11.attn.gate.fc2.bias", | |
| "blocks.11.attn.gate.fc2.weight", | |
| "blocks.2.attn.gate.fc1.bias", | |
| "blocks.2.attn.gate.fc1.weight", | |
| "blocks.2.attn.gate.fc2.bias", | |
| "blocks.2.attn.gate.fc2.weight", | |
| "blocks.3.attn.gate.fc1.bias", | |
| "blocks.3.attn.gate.fc1.weight", | |
| "blocks.3.attn.gate.fc2.bias", | |
| "blocks.3.attn.gate.fc2.weight", | |
| "blocks.4.attn.gate.fc1.bias", | |
| "blocks.4.attn.gate.fc1.weight", | |
| "blocks.4.attn.gate.fc2.bias", | |
| "blocks.4.attn.gate.fc2.weight", | |
| "blocks.5.attn.gate.fc1.bias", | |
| "blocks.5.attn.gate.fc1.weight", | |
| "blocks.5.attn.gate.fc2.bias", | |
| "blocks.5.attn.gate.fc2.weight", | |
| "blocks.6.attn.gate.fc1.bias", | |
| "blocks.6.attn.gate.fc1.weight", | |
| "blocks.6.attn.gate.fc2.bias", | |
| "blocks.6.attn.gate.fc2.weight", | |
| "blocks.7.attn.gate.fc1.bias", | |
| "blocks.7.attn.gate.fc1.weight", | |
| "blocks.7.attn.gate.fc2.bias", | |
| "blocks.7.attn.gate.fc2.weight", | |
| "blocks.8.attn.gate.fc1.bias", | |
| "blocks.8.attn.gate.fc1.weight", | |
| "blocks.8.attn.gate.fc2.bias", | |
| "blocks.8.attn.gate.fc2.weight", | |
| "blocks.9.attn.gate.fc1.bias", | |
| "blocks.9.attn.gate.fc1.weight", | |
| "blocks.9.attn.gate.fc2.bias", | |
| "blocks.9.attn.gate.fc2.weight" | |
| ] | |
| } |