| { | |
| "trainer": { | |
| "dict_class": "AutoEncoder", | |
| "trainer_class": "StandardTrainerAprilUpdate", | |
| "activation_dim_per_model": 896, | |
| "total_activation_dim": 1792, | |
| "n_models": 2, | |
| "dict_size": 57344, | |
| "lr": 0.0001, | |
| "l1_penalty": 0.1, | |
| "warmup_steps": 0, | |
| "sparsity_warmup_steps": 0, | |
| "steps": 610, | |
| "decay_start": null, | |
| "seed": null, | |
| "device": "cuda:2", | |
| "layer": 7, | |
| "lm_name": "blah", | |
| "wandb_name": "StandardTrainerAprilUpdate", | |
| "submodule_name": null, | |
| "frac_features_shared": 0.04, | |
| "shared_l1_penalty": 0.02, | |
| "num_shared_features": 2293 | |
| }, | |
| "buffer": { | |
| "n_datasets": 2, | |
| "n_models": 2, | |
| "d_submodule": 896, | |
| "io": "out", | |
| "n_ctxs": 512, | |
| "ctx_len": 256, | |
| "refresh_batch_size": 1024, | |
| "out_batch_size": 16384, | |
| "device": "cuda:2", | |
| "rescale_acts": false | |
| } | |
| } |