| { | |
| "trainer": { | |
| "trainer_class": "TopKTrainer", | |
| "dict_class": "AutoEncoderTopK", | |
| "lr": 8.944271909999159e-05, | |
| "steps": 12207, | |
| "auxk_alpha": 0.03125, | |
| "warmup_steps": 0, | |
| "decay_start": null, | |
| "threshold_beta": 0.999, | |
| "threshold_start_step": 1000, | |
| "seed": null, | |
| "activation_dim": 10240, | |
| "dict_size": 81920, | |
| "k": 128, | |
| "device": "cuda:2", | |
| "layer": 40, | |
| "lm_name": "blah", | |
| "wandb_name": "AutoEncoderTopK", | |
| "submodule_name": null | |
| }, | |
| "buffer": { | |
| "n_models": 2, | |
| "d_submodule": 5120, | |
| "io": "out", | |
| "n_ctxs": 256, | |
| "ctx_len": 512, | |
| "refresh_batch_size": 64, | |
| "out_batch_size": 4096, | |
| "device": "cuda:2", | |
| "rescale_acts": false | |
| } | |
| } |