Marmik commited on
Commit
cc797f2
·
verified ·
1 Parent(s): cded2db
Files changed (3) hide show
  1. checkpoint_28000.pt +3 -0
  2. config.json +22 -0
  3. model_final.pt +3 -0
checkpoint_28000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb103ebfaef05e2a2d18fd1c147c34133dba092eee6a603fd59a7548c3442a27
3
+ size 604198864
config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "CrossCoder",
4
+ "trainer_class": "CrossCoderTrainer",
5
+ "activation_dim": 768,
6
+ "dict_size": 49152,
7
+ "lr": 0.001,
8
+ "l1_penalty": 0.03,
9
+ "warmup_steps": 1000,
10
+ "resample_steps": null,
11
+ "device": "cuda",
12
+ "layer": 3,
13
+ "lm_name": "tiny-gpt-27M-mixtral-5l-active-27M",
14
+ "wandb_name": "L3-mu3.0e-02-lr1e-03-64-mixed",
15
+ "submodule_name": null,
16
+ "use_mse_loss": false,
17
+ "code_normalization": "MIXED",
18
+ "code_normalization_alpha_sae": 0.7,
19
+ "code_normalization_alpha_cc": 0.3,
20
+ "target_rms": 1.0
21
+ }
22
+ }
model_final.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57050bf18b863d2c43f8113d53477f11f134cdbaec6966ec321b54b83ca2b23d
3
+ size 604198789