| { | |
| "trainer": { | |
| "dict_class": "CrossCoder", | |
| "trainer_class": "CrossCoderTrainer", | |
| "activation_dim": 768, | |
| "dict_size": 49152, | |
| "lr": 0.001, | |
| "l1_penalty": 0.03, | |
| "warmup_steps": 1000, | |
| "resample_steps": null, | |
| "device": "cuda", | |
| "layer": 3, | |
| "lm_name": "tiny-gpt-27M-mixtral-5l-active-27M", | |
| "wandb_name": "L3-mu3.0e-02-lr1e-03-64-mixed", | |
| "submodule_name": null, | |
| "use_mse_loss": false, | |
| "code_normalization": "MIXED", | |
| "code_normalization_alpha_sae": 0.7, | |
| "code_normalization_alpha_cc": 0.3, | |
| "target_rms": 1.0, | |
| "num_layers": 2 | |
| } | |
| } |