File size: 948 Bytes
33b4244
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
{
    "trainer": {
        "dict_class": "VSAEMixtureGaussian",
        "trainer_class": "VSAEMixtureTrainer",
        "activation_dim": 2048,
        "dict_size": 8192,
        "lr": 0.0005,
        "kl_coeff": 100,
        "warmup_steps": 500,
        "sparsity_warmup_steps": 500,
        "steps": 10000,
        "decay_start": 8000,
        "resample_steps": null,
        "var_flag": 0,
        "n_correlated_pairs": 0,
        "n_anticorrelated_pairs": 0,
        "use_april_update_mode": true,
        "seed": null,
        "device": "cuda",
        "layer": 0,
        "lm_name": "gelu-1l",
        "wandb_name": "VSAEMix_gelu-1l_d8192_lr0.0005_kl100_corr0_anticorr0_trainer_0",
        "submodule_name": null
    },
    "buffer": {
        "d_submodule": 2048,
        "n_ctxs": 3000,
        "ctx_len": 128,
        "refresh_batch_size": 32,
        "out_batch_size": 1024,
        "device": "cuda"
    }
}