AntonKorznikov commited on
Commit
245fbe8
·
verified ·
1 Parent(s): c482888

Delete trained_saes

Browse files
trained_saes/batch_top_k/google_gemma-2-2b_batch_top_k/resid_post_layer_12/trainer_0/ae.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:7fd9e44e2ace29771e0dd496366882948650e4b72fbfa522726eda50261580b0
3
- size 302066710
 
 
 
 
trained_saes/batch_top_k/google_gemma-2-2b_batch_top_k/resid_post_layer_12/trainer_0/config.json DELETED
@@ -1,32 +0,0 @@
1
- {
2
- "trainer": {
3
- "trainer_class": "BatchTopKTrainer",
4
- "dict_class": "BatchTopKSAE",
5
- "lr": 0.0003,
6
- "steps": 2441,
7
- "auxk_alpha": 0.03125,
8
- "warmup_steps": 1000,
9
- "decay_start": 1952,
10
- "threshold_beta": 0.999,
11
- "threshold_start_step": 1000,
12
- "top_k_aux": 1152,
13
- "seed": 0,
14
- "activation_dim": 2304,
15
- "dict_size": 16384,
16
- "k": 320,
17
- "device": "cuda:6",
18
- "layer": 12,
19
- "lm_name": "google/gemma-2-2b",
20
- "wandb_name": "BatchTopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_0",
21
- "submodule_name": "resid_post_layer_12"
22
- },
23
- "buffer": {
24
- "d_submodule": 2304,
25
- "io": "out",
26
- "n_ctxs": 1953,
27
- "ctx_len": 128,
28
- "refresh_batch_size": 4,
29
- "out_batch_size": 2048,
30
- "device": "cuda:6"
31
- }
32
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
trained_saes/batch_top_k/google_gemma-2-2b_batch_top_k/resid_post_layer_12/trainer_0/eval_results.json DELETED
@@ -1 +0,0 @@
1
- {"l2_loss": 52.62875, "l1_loss": 1287.76, "l0": 310.368125, "frac_variance_explained": 0.81931640625, "cossim": 0.93048828125, "l2_ratio": 0.92916015625, "relative_reconstruction_bias": 0.99939453125, "loss_original": 2.6491796875, "loss_reconstructed": 3.083125, "loss_zero": 12.4375, "frac_recovered": 0.95533203125, "frac_alive": 0.99993896484375, "hyperparameters": {"n_inputs": 200, "context_length": 128}}