Delete trained_saes
Browse files- trained_saes/batch_top_k/google_gemma-2-2b_batch_top_k/resid_post_layer_12/trainer_0/ae.pt +0 -3
- trained_saes/batch_top_k/google_gemma-2-2b_batch_top_k/resid_post_layer_12/trainer_0/config.json +0 -32
- trained_saes/batch_top_k/google_gemma-2-2b_batch_top_k/resid_post_layer_12/trainer_0/eval_results.json +0 -1
trained_saes/batch_top_k/google_gemma-2-2b_batch_top_k/resid_post_layer_12/trainer_0/ae.pt
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:7fd9e44e2ace29771e0dd496366882948650e4b72fbfa522726eda50261580b0
|
| 3 |
-
size 302066710
|
|
|
|
|
|
|
|
|
|
|
|
trained_saes/batch_top_k/google_gemma-2-2b_batch_top_k/resid_post_layer_12/trainer_0/config.json
DELETED
|
@@ -1,32 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"trainer": {
|
| 3 |
-
"trainer_class": "BatchTopKTrainer",
|
| 4 |
-
"dict_class": "BatchTopKSAE",
|
| 5 |
-
"lr": 0.0003,
|
| 6 |
-
"steps": 2441,
|
| 7 |
-
"auxk_alpha": 0.03125,
|
| 8 |
-
"warmup_steps": 1000,
|
| 9 |
-
"decay_start": 1952,
|
| 10 |
-
"threshold_beta": 0.999,
|
| 11 |
-
"threshold_start_step": 1000,
|
| 12 |
-
"top_k_aux": 1152,
|
| 13 |
-
"seed": 0,
|
| 14 |
-
"activation_dim": 2304,
|
| 15 |
-
"dict_size": 16384,
|
| 16 |
-
"k": 320,
|
| 17 |
-
"device": "cuda:6",
|
| 18 |
-
"layer": 12,
|
| 19 |
-
"lm_name": "google/gemma-2-2b",
|
| 20 |
-
"wandb_name": "BatchTopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_0",
|
| 21 |
-
"submodule_name": "resid_post_layer_12"
|
| 22 |
-
},
|
| 23 |
-
"buffer": {
|
| 24 |
-
"d_submodule": 2304,
|
| 25 |
-
"io": "out",
|
| 26 |
-
"n_ctxs": 1953,
|
| 27 |
-
"ctx_len": 128,
|
| 28 |
-
"refresh_batch_size": 4,
|
| 29 |
-
"out_batch_size": 2048,
|
| 30 |
-
"device": "cuda:6"
|
| 31 |
-
}
|
| 32 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
trained_saes/batch_top_k/google_gemma-2-2b_batch_top_k/resid_post_layer_12/trainer_0/eval_results.json
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
{"l2_loss": 52.62875, "l1_loss": 1287.76, "l0": 310.368125, "frac_variance_explained": 0.81931640625, "cossim": 0.93048828125, "l2_ratio": 0.92916015625, "relative_reconstruction_bias": 0.99939453125, "loss_original": 2.6491796875, "loss_reconstructed": 3.083125, "loss_zero": 12.4375, "frac_recovered": 0.95533203125, "frac_alive": 0.99993896484375, "hyperparameters": {"n_inputs": 200, "context_length": 128}}
|
|
|
|
|
|