llava15-7b-clt / training_summary.json
KokosDev
Major update: Add MLP→CLT mappings + retrained transcoders
bc702e3
{
"model": "llava-hf/llava-1.5-7b-hf",
"transcoder_type": "Cross-Layer Transcoder (CLT)",
"architecture": {
"hidden_dim": 4096,
"feature_dim": 8192,
"num_layers": 31,
"expansion_factor": 2.0
},
"training": {
"steps_per_layer": 5000,
"learning_rate": 0.0003,
"batch_samples": 16,
"sparsity_penalty": 0.01,
"validation_interval": 200
},
"results": {
"dead_features_pct": 0.0,
"avg_sparsity_early_layers": "2-4%",
"avg_sparsity_middle_layers": "3-7%",
"avg_sparsity_late_layers": "7-29%",
"avg_reconstruction_loss": "<0.1 for most layers"
},
"features": {
"mlp_to_clt_mapping": "Included (co-activation based)",
"decoder_weights": "Included (CLT \u2192 MLP reconstruction)",
"mapping_shape": "[4096, 8192]",
"description": "Each transcoder includes MLP\u2192CLT feature correlation mapping computed during training"
},
"usage": {
"transcoder_files": "transcoder_L{0-30}.pt",
"mapping_files": "mapping_L{0-30}.pt",
"note": "Layer indices match LLaVA's language model layers (0-30 out of 31 total layers)"
}
}