| { | |
| "model": "llava-hf/llava-1.5-7b-hf", | |
| "transcoder_type": "Cross-Layer Transcoder (CLT)", | |
| "architecture": { | |
| "hidden_dim": 4096, | |
| "feature_dim": 8192, | |
| "num_layers": 31, | |
| "expansion_factor": 2.0 | |
| }, | |
| "training": { | |
| "steps_per_layer": 5000, | |
| "learning_rate": 0.0003, | |
| "batch_samples": 16, | |
| "sparsity_penalty": 0.01, | |
| "validation_interval": 200 | |
| }, | |
| "results": { | |
| "dead_features_pct": 0.0, | |
| "avg_sparsity_early_layers": "2-4%", | |
| "avg_sparsity_middle_layers": "3-7%", | |
| "avg_sparsity_late_layers": "7-29%", | |
| "avg_reconstruction_loss": "<0.1 for most layers" | |
| }, | |
| "features": { | |
| "mlp_to_clt_mapping": "Included (co-activation based)", | |
| "decoder_weights": "Included (CLT \u2192 MLP reconstruction)", | |
| "mapping_shape": "[4096, 8192]", | |
| "description": "Each transcoder includes MLP\u2192CLT feature correlation mapping computed during training" | |
| }, | |
| "usage": { | |
| "transcoder_files": "transcoder_L{0-30}.pt", | |
| "mapping_files": "mapping_L{0-30}.pt", | |
| "note": "Layer indices match LLaVA's language model layers (0-30 out of 31 total layers)" | |
| } | |
| } |