nsa-117m-byte / logs /logs_mapping.json
seconds-0's picture
NSA 117M initial export
3558023 verified
{
"mapped": [
"model.blocks.0.attn.W_K_cmp.weight",
"model.blocks.0.attn.W_K_sel.weight",
"model.blocks.0.attn.W_K_win.weight",
"model.blocks.0.attn.W_Q.weight",
"model.blocks.0.attn.W_V_cmp.weight",
"model.blocks.0.attn.W_V_sel.weight",
"model.blocks.0.attn.W_V_win.weight",
"model.blocks.0.attn.out.weight",
"model.blocks.0.mlp.fc1.weight",
"model.blocks.0.mlp.fc2.weight",
"model.blocks.0.norm1.weight",
"model.blocks.0.norm2.weight",
"model.blocks.1.attn.W_K_cmp.weight",
"model.blocks.1.attn.W_K_sel.weight",
"model.blocks.1.attn.W_K_win.weight",
"model.blocks.1.attn.W_Q.weight",
"model.blocks.1.attn.W_V_cmp.weight",
"model.blocks.1.attn.W_V_sel.weight",
"model.blocks.1.attn.W_V_win.weight",
"model.blocks.1.attn.out.weight",
"model.blocks.1.mlp.fc1.weight",
"model.blocks.1.mlp.fc2.weight",
"model.blocks.1.norm1.weight",
"model.blocks.1.norm2.weight",
"model.blocks.10.attn.W_K_cmp.weight",
"model.blocks.10.attn.W_K_sel.weight",
"model.blocks.10.attn.W_K_win.weight",
"model.blocks.10.attn.W_Q.weight",
"model.blocks.10.attn.W_V_cmp.weight",
"model.blocks.10.attn.W_V_sel.weight",
"model.blocks.10.attn.W_V_win.weight",
"model.blocks.10.attn.out.weight",
"model.blocks.10.mlp.fc1.weight",
"model.blocks.10.mlp.fc2.weight",
"model.blocks.10.norm1.weight",
"model.blocks.10.norm2.weight",
"model.blocks.11.attn.W_K_cmp.weight",
"model.blocks.11.attn.W_K_sel.weight",
"model.blocks.11.attn.W_K_win.weight",
"model.blocks.11.attn.W_Q.weight",
"model.blocks.11.attn.W_V_cmp.weight",
"model.blocks.11.attn.W_V_sel.weight",
"model.blocks.11.attn.W_V_win.weight",
"model.blocks.11.attn.out.weight",
"model.blocks.11.mlp.fc1.weight",
"model.blocks.11.mlp.fc2.weight",
"model.blocks.11.norm1.weight",
"model.blocks.11.norm2.weight",
"model.blocks.2.attn.W_K_cmp.weight",
"model.blocks.2.attn.W_K_sel.weight",
"model.blocks.2.attn.W_K_win.weight",
"model.blocks.2.attn.W_Q.weight",
"model.blocks.2.attn.W_V_cmp.weight",
"model.blocks.2.attn.W_V_sel.weight",
"model.blocks.2.attn.W_V_win.weight",
"model.blocks.2.attn.out.weight",
"model.blocks.2.mlp.fc1.weight",
"model.blocks.2.mlp.fc2.weight",
"model.blocks.2.norm1.weight",
"model.blocks.2.norm2.weight",
"model.blocks.3.attn.W_K_cmp.weight",
"model.blocks.3.attn.W_K_sel.weight",
"model.blocks.3.attn.W_K_win.weight",
"model.blocks.3.attn.W_Q.weight",
"model.blocks.3.attn.W_V_cmp.weight",
"model.blocks.3.attn.W_V_sel.weight",
"model.blocks.3.attn.W_V_win.weight",
"model.blocks.3.attn.out.weight",
"model.blocks.3.mlp.fc1.weight",
"model.blocks.3.mlp.fc2.weight",
"model.blocks.3.norm1.weight",
"model.blocks.3.norm2.weight",
"model.blocks.4.attn.W_K_cmp.weight",
"model.blocks.4.attn.W_K_sel.weight",
"model.blocks.4.attn.W_K_win.weight",
"model.blocks.4.attn.W_Q.weight",
"model.blocks.4.attn.W_V_cmp.weight",
"model.blocks.4.attn.W_V_sel.weight",
"model.blocks.4.attn.W_V_win.weight",
"model.blocks.4.attn.out.weight",
"model.blocks.4.mlp.fc1.weight",
"model.blocks.4.mlp.fc2.weight",
"model.blocks.4.norm1.weight",
"model.blocks.4.norm2.weight",
"model.blocks.5.attn.W_K_cmp.weight",
"model.blocks.5.attn.W_K_sel.weight",
"model.blocks.5.attn.W_K_win.weight",
"model.blocks.5.attn.W_Q.weight",
"model.blocks.5.attn.W_V_cmp.weight",
"model.blocks.5.attn.W_V_sel.weight",
"model.blocks.5.attn.W_V_win.weight",
"model.blocks.5.attn.out.weight",
"model.blocks.5.mlp.fc1.weight",
"model.blocks.5.mlp.fc2.weight",
"model.blocks.5.norm1.weight",
"model.blocks.5.norm2.weight",
"model.blocks.6.attn.W_K_cmp.weight",
"model.blocks.6.attn.W_K_sel.weight",
"model.blocks.6.attn.W_K_win.weight",
"model.blocks.6.attn.W_Q.weight",
"model.blocks.6.attn.W_V_cmp.weight",
"model.blocks.6.attn.W_V_sel.weight",
"model.blocks.6.attn.W_V_win.weight",
"model.blocks.6.attn.out.weight",
"model.blocks.6.mlp.fc1.weight",
"model.blocks.6.mlp.fc2.weight",
"model.blocks.6.norm1.weight",
"model.blocks.6.norm2.weight",
"model.blocks.7.attn.W_K_cmp.weight",
"model.blocks.7.attn.W_K_sel.weight",
"model.blocks.7.attn.W_K_win.weight",
"model.blocks.7.attn.W_Q.weight",
"model.blocks.7.attn.W_V_cmp.weight",
"model.blocks.7.attn.W_V_sel.weight",
"model.blocks.7.attn.W_V_win.weight",
"model.blocks.7.attn.out.weight",
"model.blocks.7.mlp.fc1.weight",
"model.blocks.7.mlp.fc2.weight",
"model.blocks.7.norm1.weight",
"model.blocks.7.norm2.weight",
"model.blocks.8.attn.W_K_cmp.weight",
"model.blocks.8.attn.W_K_sel.weight",
"model.blocks.8.attn.W_K_win.weight",
"model.blocks.8.attn.W_Q.weight",
"model.blocks.8.attn.W_V_cmp.weight",
"model.blocks.8.attn.W_V_sel.weight",
"model.blocks.8.attn.W_V_win.weight",
"model.blocks.8.attn.out.weight",
"model.blocks.8.mlp.fc1.weight",
"model.blocks.8.mlp.fc2.weight",
"model.blocks.8.norm1.weight",
"model.blocks.8.norm2.weight",
"model.blocks.9.attn.W_K_cmp.weight",
"model.blocks.9.attn.W_K_sel.weight",
"model.blocks.9.attn.W_K_win.weight",
"model.blocks.9.attn.W_Q.weight",
"model.blocks.9.attn.W_V_cmp.weight",
"model.blocks.9.attn.W_V_sel.weight",
"model.blocks.9.attn.W_V_win.weight",
"model.blocks.9.attn.out.weight",
"model.blocks.9.mlp.fc1.weight",
"model.blocks.9.mlp.fc2.weight",
"model.blocks.9.norm1.weight",
"model.blocks.9.norm2.weight",
"model.embed.weight",
"model.lm_head.weight",
"model.norm.weight"
],
"missing": [
"model.blocks.0.attn.gate_fc1.bias",
"model.blocks.0.attn.gate_fc1.weight",
"model.blocks.0.attn.gate_fc2.bias",
"model.blocks.0.attn.gate_fc2.weight",
"model.blocks.1.attn.gate_fc1.bias",
"model.blocks.1.attn.gate_fc1.weight",
"model.blocks.1.attn.gate_fc2.bias",
"model.blocks.1.attn.gate_fc2.weight",
"model.blocks.10.attn.gate_fc1.bias",
"model.blocks.10.attn.gate_fc1.weight",
"model.blocks.10.attn.gate_fc2.bias",
"model.blocks.10.attn.gate_fc2.weight",
"model.blocks.11.attn.gate_fc1.bias",
"model.blocks.11.attn.gate_fc1.weight",
"model.blocks.11.attn.gate_fc2.bias",
"model.blocks.11.attn.gate_fc2.weight",
"model.blocks.2.attn.gate_fc1.bias",
"model.blocks.2.attn.gate_fc1.weight",
"model.blocks.2.attn.gate_fc2.bias",
"model.blocks.2.attn.gate_fc2.weight",
"model.blocks.3.attn.gate_fc1.bias",
"model.blocks.3.attn.gate_fc1.weight",
"model.blocks.3.attn.gate_fc2.bias",
"model.blocks.3.attn.gate_fc2.weight",
"model.blocks.4.attn.gate_fc1.bias",
"model.blocks.4.attn.gate_fc1.weight",
"model.blocks.4.attn.gate_fc2.bias",
"model.blocks.4.attn.gate_fc2.weight",
"model.blocks.5.attn.gate_fc1.bias",
"model.blocks.5.attn.gate_fc1.weight",
"model.blocks.5.attn.gate_fc2.bias",
"model.blocks.5.attn.gate_fc2.weight",
"model.blocks.6.attn.gate_fc1.bias",
"model.blocks.6.attn.gate_fc1.weight",
"model.blocks.6.attn.gate_fc2.bias",
"model.blocks.6.attn.gate_fc2.weight",
"model.blocks.7.attn.gate_fc1.bias",
"model.blocks.7.attn.gate_fc1.weight",
"model.blocks.7.attn.gate_fc2.bias",
"model.blocks.7.attn.gate_fc2.weight",
"model.blocks.8.attn.gate_fc1.bias",
"model.blocks.8.attn.gate_fc1.weight",
"model.blocks.8.attn.gate_fc2.bias",
"model.blocks.8.attn.gate_fc2.weight",
"model.blocks.9.attn.gate_fc1.bias",
"model.blocks.9.attn.gate_fc1.weight",
"model.blocks.9.attn.gate_fc2.bias",
"model.blocks.9.attn.gate_fc2.weight",
"model.norm.bias"
],
"extra": [
"blocks.0.attn.gate.fc1.bias",
"blocks.0.attn.gate.fc1.weight",
"blocks.0.attn.gate.fc2.bias",
"blocks.0.attn.gate.fc2.weight",
"blocks.1.attn.gate.fc1.bias",
"blocks.1.attn.gate.fc1.weight",
"blocks.1.attn.gate.fc2.bias",
"blocks.1.attn.gate.fc2.weight",
"blocks.10.attn.gate.fc1.bias",
"blocks.10.attn.gate.fc1.weight",
"blocks.10.attn.gate.fc2.bias",
"blocks.10.attn.gate.fc2.weight",
"blocks.11.attn.gate.fc1.bias",
"blocks.11.attn.gate.fc1.weight",
"blocks.11.attn.gate.fc2.bias",
"blocks.11.attn.gate.fc2.weight",
"blocks.2.attn.gate.fc1.bias",
"blocks.2.attn.gate.fc1.weight",
"blocks.2.attn.gate.fc2.bias",
"blocks.2.attn.gate.fc2.weight",
"blocks.3.attn.gate.fc1.bias",
"blocks.3.attn.gate.fc1.weight",
"blocks.3.attn.gate.fc2.bias",
"blocks.3.attn.gate.fc2.weight",
"blocks.4.attn.gate.fc1.bias",
"blocks.4.attn.gate.fc1.weight",
"blocks.4.attn.gate.fc2.bias",
"blocks.4.attn.gate.fc2.weight",
"blocks.5.attn.gate.fc1.bias",
"blocks.5.attn.gate.fc1.weight",
"blocks.5.attn.gate.fc2.bias",
"blocks.5.attn.gate.fc2.weight",
"blocks.6.attn.gate.fc1.bias",
"blocks.6.attn.gate.fc1.weight",
"blocks.6.attn.gate.fc2.bias",
"blocks.6.attn.gate.fc2.weight",
"blocks.7.attn.gate.fc1.bias",
"blocks.7.attn.gate.fc1.weight",
"blocks.7.attn.gate.fc2.bias",
"blocks.7.attn.gate.fc2.weight",
"blocks.8.attn.gate.fc1.bias",
"blocks.8.attn.gate.fc1.weight",
"blocks.8.attn.gate.fc2.bias",
"blocks.8.attn.gate.fc2.weight",
"blocks.9.attn.gate.fc1.bias",
"blocks.9.attn.gate.fc1.weight",
"blocks.9.attn.gate.fc2.bias",
"blocks.9.attn.gate.fc2.weight"
]
}