Marmik commited on
Commit
a3606b8
·
verified ·
1 Parent(s): cf39def

Upload crosscoder-tinymoe-tinygpt-27M dictionary model

Browse files
Files changed (1) hide show
  1. trainer_config.json +23 -0
trainer_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "CrossCoder",
4
+ "trainer_class": "CrossCoderTrainer",
5
+ "activation_dim": 768,
6
+ "dict_size": 49152,
7
+ "lr": 0.001,
8
+ "l1_penalty": 0.03,
9
+ "warmup_steps": 1000,
10
+ "resample_steps": null,
11
+ "device": "cuda",
12
+ "layer": 3,
13
+ "lm_name": "tiny-gpt-27M-mixtral-5l-active-27M",
14
+ "wandb_name": "L3-mu3.0e-02-lr1e-03-64-mixed",
15
+ "submodule_name": null,
16
+ "use_mse_loss": false,
17
+ "code_normalization": "MIXED",
18
+ "code_normalization_alpha_sae": 0.7,
19
+ "code_normalization_alpha_cc": 0.3,
20
+ "target_rms": 1.0,
21
+ "num_layers": 2
22
+ }
23
+ }