| { | |
| "trainer": { | |
| "dict_class": "AutoEncoder", | |
| "trainer_class": "StandardTrainerAprilUpdate", | |
| "activation_dim": 2048, | |
| "dict_size": 8192, | |
| "lr": 0.001, | |
| "l1_penalty": 0.1, | |
| "warmup_steps": 1000, | |
| "sparsity_warmup_steps": 1000, | |
| "steps": 10000, | |
| "decay_start": null, | |
| "seed": 42, | |
| "device": "cuda", | |
| "layer": 0, | |
| "lm_name": "gelu-1l", | |
| "wandb_name": "StandardSAE_gelu-1l_8192_trainer_0", | |
| "submodule_name": null | |
| }, | |
| "buffer": { | |
| "d_submodule": 2048, | |
| "n_ctxs": 3000, | |
| "ctx_len": 128, | |
| "refresh_batch_size": 32, | |
| "out_batch_size": 1024, | |
| "device": "cuda" | |
| } | |
| } |