{ "trainer": { "trainer_class": "BatchTopKTrainer", "dict_class": "BatchTopKSAE", "lr": 0.0002, "steps": 200001, "auxk_alpha": 0.0, "warmup_steps": 10000, "decay_start": 160000, "threshold_beta": 0.999, "threshold_start_step": 1000, "top_k_aux": 512, "seed": 21, "activation_dim": 1024, "dict_size": 8192, "k": 50, "device": "cuda:6", "layer": 20, "lm_name": "hubert", "wandb_name": "BatchTopKSAE", "submodule_name": null } }