zachdata commited on
Commit
33b4244
·
verified ·
1 Parent(s): 17ca3d3

Upload 2 files

Browse files
Files changed (2) hide show
  1. ae.pt +3 -0
  2. config.json +33 -0
ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8757126c7ec57ec3c5fc67eb6de5ebee09acfab7eb92698d14b1bb40792b8525
3
+ size 67131048
config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "VSAEMixtureGaussian",
4
+ "trainer_class": "VSAEMixtureTrainer",
5
+ "activation_dim": 2048,
6
+ "dict_size": 8192,
7
+ "lr": 0.0005,
8
+ "kl_coeff": 100,
9
+ "warmup_steps": 500,
10
+ "sparsity_warmup_steps": 500,
11
+ "steps": 10000,
12
+ "decay_start": 8000,
13
+ "resample_steps": null,
14
+ "var_flag": 0,
15
+ "n_correlated_pairs": 0,
16
+ "n_anticorrelated_pairs": 0,
17
+ "use_april_update_mode": true,
18
+ "seed": null,
19
+ "device": "cuda",
20
+ "layer": 0,
21
+ "lm_name": "gelu-1l",
22
+ "wandb_name": "VSAEMix_gelu-1l_d8192_lr0.0005_kl100_corr0_anticorr0_trainer_0",
23
+ "submodule_name": null
24
+ },
25
+ "buffer": {
26
+ "d_submodule": 2048,
27
+ "n_ctxs": 3000,
28
+ "ctx_len": 128,
29
+ "refresh_batch_size": 32,
30
+ "out_batch_size": 1024,
31
+ "device": "cuda"
32
+ }
33
+ }