llava15-7b-clt / training_summary.json

KokosDev

Major update: Add MLP→CLT mappings + retrained transcoders

bc702e3 6 months ago

1.11 kB

	{
	"model": "llava-hf/llava-1.5-7b-hf",
	"transcoder_type": "Cross-Layer Transcoder (CLT)",
	"architecture": {
	"hidden_dim": 4096,
	"feature_dim": 8192,
	"num_layers": 31,
	"expansion_factor": 2.0
	},
	"training": {
	"steps_per_layer": 5000,
	"learning_rate": 0.0003,
	"batch_samples": 16,
	"sparsity_penalty": 0.01,
	"validation_interval": 200
	},
	"results": {
	"dead_features_pct": 0.0,
	"avg_sparsity_early_layers": "2-4%",
	"avg_sparsity_middle_layers": "3-7%",
	"avg_sparsity_late_layers": "7-29%",
	"avg_reconstruction_loss": "<0.1 for most layers"
	},
	"features": {
	"mlp_to_clt_mapping": "Included (co-activation based)",
	"decoder_weights": "Included (CLT \u2192 MLP reconstruction)",
	"mapping_shape": "[4096, 8192]",
	"description": "Each transcoder includes MLP\u2192CLT feature correlation mapping computed during training"
	},
	"usage": {
	"transcoder_files": "transcoder_L{0-30}.pt",
	"mapping_files": "mapping_L{0-30}.pt",
	"note": "Layer indices match LLaVA's language model layers (0-30 out of 31 total layers)"
	}
	}