{ "trainer": { "dict_class": "VSAEMixtureGaussian", "trainer_class": "VSAEMixtureTrainer", "activation_dim": 2048, "dict_size": 8192, "lr": 0.0005, "kl_coeff": 100, "warmup_steps": 500, "sparsity_warmup_steps": 500, "steps": 10000, "decay_start": 8000, "resample_steps": null, "var_flag": 0, "n_correlated_pairs": 0, "n_anticorrelated_pairs": 0, "use_april_update_mode": true, "seed": null, "device": "cuda", "layer": 0, "lm_name": "gelu-1l", "wandb_name": "VSAEMix_gelu-1l_d8192_lr0.0005_kl100_corr0_anticorr0_trainer_0", "submodule_name": null }, "buffer": { "d_submodule": 2048, "n_ctxs": 3000, "ctx_len": 128, "refresh_batch_size": 32, "out_batch_size": 1024, "device": "cuda" } }