zachdata commited on
Commit
2294543
·
verified ·
1 Parent(s): 04bb618

Upload 2 files

Browse files
Files changed (2) hide show
  1. ae.pt +3 -0
  2. config.json +31 -0
ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d26e91b1692cf23fed9edb8974e090cccdd39add152ed5a185770c727a02e96
3
+ size 67131048
config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "VSAEMultiGaussian",
4
+ "trainer_class": "VSAEMultiGaussianTrainer",
5
+ "activation_dim": 2048,
6
+ "dict_size": 8192,
7
+ "lr": 0.001,
8
+ "kl_coeff": 50,
9
+ "warmup_steps": 500,
10
+ "sparsity_warmup_steps": 500,
11
+ "corr_rate": 0.0,
12
+ "var_flag": 0,
13
+ "steps": 10000,
14
+ "decay_start": 8000,
15
+ "use_april_update_mode": true,
16
+ "seed": null,
17
+ "device": "cuda",
18
+ "layer": 0,
19
+ "lm_name": "gelu-1l",
20
+ "wandb_name": "VSAEMulti_gelu-1l_d8192_lr0.001_kl50_corr0.0_trainer_0",
21
+ "submodule_name": null
22
+ },
23
+ "buffer": {
24
+ "d_submodule": 2048,
25
+ "n_ctxs": 3000,
26
+ "ctx_len": 128,
27
+ "refresh_batch_size": 32,
28
+ "out_batch_size": 1024,
29
+ "device": "cuda"
30
+ }
31
+ }