File size: 678 Bytes
a3606b8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
{
    "trainer": {
        "dict_class": "CrossCoder",
        "trainer_class": "CrossCoderTrainer",
        "activation_dim": 768,
        "dict_size": 49152,
        "lr": 0.001,
        "l1_penalty": 0.03,
        "warmup_steps": 1000,
        "resample_steps": null,
        "device": "cuda",
        "layer": 3,
        "lm_name": "tiny-gpt-27M-mixtral-5l-active-27M",
        "wandb_name": "L3-mu3.0e-02-lr1e-03-64-mixed",
        "submodule_name": null,
        "use_mse_loss": false,
        "code_normalization": "MIXED",
        "code_normalization_alpha_sae": 0.7,
        "code_normalization_alpha_cc": 0.3,
        "target_rms": 1.0,
        "num_layers": 2
    }
}