Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_0/ae.pt +3 -0
- sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_0/checkpoints/ae_0.pt +3 -0
- sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_0/checkpoints/ae_244.pt +3 -0
- sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_0/checkpoints/ae_2441.pt +3 -0
- sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_0/checkpoints/ae_24414.pt +3 -0
- sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_0/checkpoints/ae_772.pt +3 -0
- sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_0/checkpoints/ae_7720.pt +3 -0
- sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_0/checkpoints/ae_77203.pt +3 -0
- sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_0/config.json +32 -0
- sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_0/eval_results.json +1 -0
- sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_1/ae.pt +3 -0
- sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_1/checkpoints/ae_0.pt +3 -0
- sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_1/checkpoints/ae_244.pt +3 -0
- sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_1/checkpoints/ae_2441.pt +3 -0
- sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_1/checkpoints/ae_24414.pt +3 -0
- sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_1/checkpoints/ae_772.pt +3 -0
- sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_1/checkpoints/ae_7720.pt +3 -0
- sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_1/checkpoints/ae_77203.pt +3 -0
- sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_1/config.json +32 -0
- sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_1/eval_results.json +1 -0
- sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_2/ae.pt +3 -0
- sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_2/checkpoints/ae_0.pt +3 -0
- sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_2/checkpoints/ae_244.pt +3 -0
- sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_2/checkpoints/ae_2441.pt +3 -0
- sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_2/checkpoints/ae_24414.pt +3 -0
- sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_2/checkpoints/ae_772.pt +3 -0
- sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_2/checkpoints/ae_7720.pt +3 -0
- sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_2/checkpoints/ae_77203.pt +3 -0
- sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_2/config.json +32 -0
- sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_2/eval_results.json +1 -0
- sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_12/ae.pt +3 -0
- sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_12/checkpoints/ae_0.pt +3 -0
- sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_12/checkpoints/ae_244.pt +3 -0
- sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_12/checkpoints/ae_2441.pt +3 -0
- sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_12/checkpoints/ae_24414.pt +3 -0
- sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_12/checkpoints/ae_772.pt +3 -0
- sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_12/checkpoints/ae_7720.pt +3 -0
- sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_12/checkpoints/ae_77203.pt +3 -0
- sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_12/config.json +36 -0
- sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_12/eval_results.json +1 -0
- sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_4/ae.pt +3 -0
- sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_4/checkpoints/ae_0.pt +3 -0
- sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_4/checkpoints/ae_244.pt +3 -0
- sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_4/checkpoints/ae_2441.pt +3 -0
- sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_4/checkpoints/ae_24414.pt +3 -0
- sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_4/checkpoints/ae_772.pt +3 -0
- sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_4/checkpoints/ae_7720.pt +3 -0
- sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_4/checkpoints/ae_77203.pt +3 -0
- sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_4/config.json +36 -0
- sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_4/eval_results.json +1 -0
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_0/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fe40bd7b4805a2188d03f27b59e7a105ec9687088b52c7a061967190aff0a2d2
|
| 3 |
+
size 1208232982
|
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_0/checkpoints/ae_0.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:912a6613ae5d68427a340080cda9b03d1a1b1f75232d5a1df2a3fe60ca6b977b
|
| 3 |
+
size 1208233130
|
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_0/checkpoints/ae_244.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e187cc55ff8c2e08aa0620181c5c4bfcbf26780f2acfdeb9da567ac8d03ca8df
|
| 3 |
+
size 1208233150
|
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_0/checkpoints/ae_2441.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cb6ee91c798ea2891af54167358a7eb5af2d11355c307d0e361777c3be801165
|
| 3 |
+
size 1208233224
|
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_0/checkpoints/ae_24414.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f40a0f45f9b0dfa4b4e7dce85425f70c6fe0b040db4f8622728829ab82a2929a
|
| 3 |
+
size 1208233426
|
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_0/checkpoints/ae_772.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a749e26d3c3cac3417bef8fc41041782419dc827d50ad2fb1a4449a833585ee9
|
| 3 |
+
size 1208233150
|
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_0/checkpoints/ae_7720.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e0fbb7e452d9216ef23d8116fc01bbe7c537bc25ec749625458093e1b44b3187
|
| 3 |
+
size 1208233224
|
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_0/checkpoints/ae_77203.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:36bcb18f08f39f5697ac0d8d83fd9d2a2cd0c506c7b7bda47f8fbf8732a82d27
|
| 3 |
+
size 1208233426
|
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_0/config.json
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "BatchTopKTrainer",
|
| 4 |
+
"dict_class": "BatchTopKSAE",
|
| 5 |
+
"lr": 0.0003,
|
| 6 |
+
"steps": 244140,
|
| 7 |
+
"auxk_alpha": 0.03125,
|
| 8 |
+
"warmup_steps": 1000,
|
| 9 |
+
"decay_start": 195312,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"top_k_aux": 1152,
|
| 13 |
+
"seed": 0,
|
| 14 |
+
"activation_dim": 2304,
|
| 15 |
+
"dict_size": 65536,
|
| 16 |
+
"k": 20,
|
| 17 |
+
"device": "cuda:0",
|
| 18 |
+
"layer": 12,
|
| 19 |
+
"lm_name": "google/gemma-2-2b",
|
| 20 |
+
"wandb_name": "BatchTopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_0",
|
| 21 |
+
"submodule_name": "resid_post_layer_12"
|
| 22 |
+
},
|
| 23 |
+
"buffer": {
|
| 24 |
+
"d_submodule": 2304,
|
| 25 |
+
"io": "out",
|
| 26 |
+
"n_ctxs": 244,
|
| 27 |
+
"ctx_len": 1024,
|
| 28 |
+
"refresh_batch_size": 4,
|
| 29 |
+
"out_batch_size": 2048,
|
| 30 |
+
"device": "cuda:0"
|
| 31 |
+
}
|
| 32 |
+
}
|
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_0/eval_results.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"l2_loss": 60.7625, "l1_loss": 284.62, "l0": 19.9164794921875, "frac_variance_explained": 0.8149609375, "cossim": 0.9034375, "l2_ratio": 0.90369140625, "relative_reconstruction_bias": 1.0023828125, "loss_original": 2.152646484375, "loss_reconstructed": 2.321103515625, "loss_zero": 12.4375, "frac_recovered": 0.98353515625, "frac_alive": 0.7290802001953125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_1/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d842493f0ce04c1ad670224583cc3329238e9911ee028caa725d0d654c4cff8e
|
| 3 |
+
size 1208232982
|
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_1/checkpoints/ae_0.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:79ffdfb53b16678d51f52d270dc2402ed7930058c28f95188dc908f8641b92fa
|
| 3 |
+
size 1208233130
|
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_1/checkpoints/ae_244.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c53a3f799a8f85c82b59ee32d3f25f262777c7650ac4caa2cf16ac96b3b4e6b9
|
| 3 |
+
size 1208233150
|
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_1/checkpoints/ae_2441.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cdffbe6d35fe2de78df5a6ff6a28d03c0a559730b4aceed9a42845d88d1853ae
|
| 3 |
+
size 1208233224
|
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_1/checkpoints/ae_24414.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:77c63b05bb9b3347a3aa22736fa9fe96b8367deb3867f4c5e6908aa6b3ac79cc
|
| 3 |
+
size 1208233426
|
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_1/checkpoints/ae_772.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:510aed3e97b7b1ccee2b323e282a28f5f25a689e8897a0f08baf20057895971f
|
| 3 |
+
size 1208233150
|
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_1/checkpoints/ae_7720.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1c076f1ae18d980e83453d66362aaaf8709e9e471ff663e01a40ef2bd45f5c63
|
| 3 |
+
size 1208233224
|
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_1/checkpoints/ae_77203.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4b3580d7e3a4c723859ef9a6b8e397f981552a7b13be8c00982fcce47256d7a0
|
| 3 |
+
size 1208233426
|
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_1/config.json
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "BatchTopKTrainer",
|
| 4 |
+
"dict_class": "BatchTopKSAE",
|
| 5 |
+
"lr": 0.0003,
|
| 6 |
+
"steps": 244140,
|
| 7 |
+
"auxk_alpha": 0.03125,
|
| 8 |
+
"warmup_steps": 1000,
|
| 9 |
+
"decay_start": 195312,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"top_k_aux": 1152,
|
| 13 |
+
"seed": 0,
|
| 14 |
+
"activation_dim": 2304,
|
| 15 |
+
"dict_size": 65536,
|
| 16 |
+
"k": 60,
|
| 17 |
+
"device": "cuda:0",
|
| 18 |
+
"layer": 12,
|
| 19 |
+
"lm_name": "google/gemma-2-2b",
|
| 20 |
+
"wandb_name": "BatchTopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_1",
|
| 21 |
+
"submodule_name": "resid_post_layer_12"
|
| 22 |
+
},
|
| 23 |
+
"buffer": {
|
| 24 |
+
"d_submodule": 2304,
|
| 25 |
+
"io": "out",
|
| 26 |
+
"n_ctxs": 244,
|
| 27 |
+
"ctx_len": 1024,
|
| 28 |
+
"refresh_batch_size": 4,
|
| 29 |
+
"out_batch_size": 2048,
|
| 30 |
+
"device": "cuda:0"
|
| 31 |
+
}
|
| 32 |
+
}
|
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_1/eval_results.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"l2_loss": 52.39125, "l1_loss": 458.7, "l0": 59.78791015625, "frac_variance_explained": 0.86453125, "cossim": 0.92958984375, "l2_ratio": 0.92955078125, "relative_reconstruction_bias": 1.0026953125, "loss_original": 2.152646484375, "loss_reconstructed": 2.22775390625, "loss_zero": 12.4375, "frac_recovered": 0.99208984375, "frac_alive": 0.807220458984375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_2/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:84c2a7cb34c2e6e14b65061f386d59e4a316251467e3a858d83afed1d8cbba94
|
| 3 |
+
size 1208232982
|
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_2/checkpoints/ae_0.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:076547e7ee110a4a2270daa29aa0597ee04225b858ddfd9061977a6513f4de15
|
| 3 |
+
size 1208233130
|
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_2/checkpoints/ae_244.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:89dbbd979289cad69668e49c70a180b29b4b414ea44932d18dfca7cd9cc9da3e
|
| 3 |
+
size 1208233150
|
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_2/checkpoints/ae_2441.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dca9f4fbe1c94e6e61cab1588e0983399dd05695ea603228570697c3b95f86fd
|
| 3 |
+
size 1208233224
|
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_2/checkpoints/ae_24414.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:161597924c347f5e7ac0f7c283ac5dd307f475b12308d273910edbf9763982c2
|
| 3 |
+
size 1208233426
|
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_2/checkpoints/ae_772.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a245faf12c3097abf61bbea9e990727acd982b09c048ed8305717935b50f900a
|
| 3 |
+
size 1208233150
|
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_2/checkpoints/ae_7720.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b79d42f02cd180f3217077b4e5e6d3385777fc6e3081d8e6138f55a2d0187005
|
| 3 |
+
size 1208233224
|
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_2/checkpoints/ae_77203.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d03518c1a5a5ae2426c0ef4113a59ce6ac9df5bb2fe861961ac5ecba3e960462
|
| 3 |
+
size 1208233426
|
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_2/config.json
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "BatchTopKTrainer",
|
| 4 |
+
"dict_class": "BatchTopKSAE",
|
| 5 |
+
"lr": 0.0003,
|
| 6 |
+
"steps": 244140,
|
| 7 |
+
"auxk_alpha": 0.03125,
|
| 8 |
+
"warmup_steps": 1000,
|
| 9 |
+
"decay_start": 195312,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"top_k_aux": 1152,
|
| 13 |
+
"seed": 0,
|
| 14 |
+
"activation_dim": 2304,
|
| 15 |
+
"dict_size": 65536,
|
| 16 |
+
"k": 100,
|
| 17 |
+
"device": "cuda:0",
|
| 18 |
+
"layer": 12,
|
| 19 |
+
"lm_name": "google/gemma-2-2b",
|
| 20 |
+
"wandb_name": "BatchTopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_2",
|
| 21 |
+
"submodule_name": "resid_post_layer_12"
|
| 22 |
+
},
|
| 23 |
+
"buffer": {
|
| 24 |
+
"d_submodule": 2304,
|
| 25 |
+
"io": "out",
|
| 26 |
+
"n_ctxs": 244,
|
| 27 |
+
"ctx_len": 1024,
|
| 28 |
+
"refresh_batch_size": 4,
|
| 29 |
+
"out_batch_size": 2048,
|
| 30 |
+
"device": "cuda:0"
|
| 31 |
+
}
|
| 32 |
+
}
|
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_2/eval_results.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"l2_loss": 48.6875, "l1_loss": 586.82, "l0": 99.3204052734375, "frac_variance_explained": 0.88283203125, "cossim": 0.93861328125, "l2_ratio": 0.9382421875, "relative_reconstruction_bias": 1.0021484375, "loss_original": 2.152646484375, "loss_reconstructed": 2.211435546875, "loss_zero": 12.4375, "frac_recovered": 0.99359375, "frac_alive": 0.8199920654296875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_12/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a0181eb6c7dad35f8433bff4b82b370e8173fe7e00b97690bbee182deb534786
|
| 3 |
+
size 1208496226
|
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_12/checkpoints/ae_0.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e2bd97a1caee8e0089218bddb6d1fcec9dd8fa2a035daf2e47182dbd373932b3
|
| 3 |
+
size 1208496706
|
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_12/checkpoints/ae_244.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ccf4dc0ffab6fff93b3cd12743830e0c4ab530a9464846a30137560b860a6e8f
|
| 3 |
+
size 1208496738
|
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_12/checkpoints/ae_2441.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:84fc339799f7ae291f8b53661f51f168b3e60d19e315064440159c4c8b849210
|
| 3 |
+
size 1208496882
|
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_12/checkpoints/ae_24414.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3e1ed7c43fdbf9446c838608fe662d2c0cd736b748761cf8355109adf067afcc
|
| 3 |
+
size 1208497090
|
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_12/checkpoints/ae_772.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e8921c3032da8e059b4c5e1df8d8126ba1d71f99fbc73f976d8cfebef43fab9f
|
| 3 |
+
size 1208496738
|
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_12/checkpoints/ae_7720.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d29a018a8aff008f1b84074cf6d0dcb7a89c47666ee7d3b24e8695fd4e0af3e0
|
| 3 |
+
size 1208496882
|
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_12/checkpoints/ae_77203.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:edf304d0a47ad8a44d9f1f7516f096933492d79c02e2f7619b15ff0506041329
|
| 3 |
+
size 1208497090
|
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_12/config.json
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "SampledActivationTrainer",
|
| 4 |
+
"dict_class": "SampledActivationSAE",
|
| 5 |
+
"lr": 0.0003,
|
| 6 |
+
"steps": 244140,
|
| 7 |
+
"auxk_alpha": 0.03125,
|
| 8 |
+
"warmup_steps": 1000,
|
| 9 |
+
"decay_start": 195312,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"sampling_update_freq": 1,
|
| 13 |
+
"sampling_method": "entropy",
|
| 14 |
+
"ridge_lambda": 0.01,
|
| 15 |
+
"sketching_size": 100,
|
| 16 |
+
"top_k_aux": 1152,
|
| 17 |
+
"seed": 0,
|
| 18 |
+
"activation_dim": 2304,
|
| 19 |
+
"dict_size": 65536,
|
| 20 |
+
"k": 100,
|
| 21 |
+
"device": "cuda:0",
|
| 22 |
+
"layer": 12,
|
| 23 |
+
"lm_name": "google/gemma-2-2b",
|
| 24 |
+
"wandb_name": "SampledActivationTrainer-entropy-google/gemma-2-2b-resid_post_layer_12_trainer_12",
|
| 25 |
+
"submodule_name": "resid_post_layer_12"
|
| 26 |
+
},
|
| 27 |
+
"buffer": {
|
| 28 |
+
"d_submodule": 2304,
|
| 29 |
+
"io": "out",
|
| 30 |
+
"n_ctxs": 244,
|
| 31 |
+
"ctx_len": 1024,
|
| 32 |
+
"refresh_batch_size": 4,
|
| 33 |
+
"out_batch_size": 2048,
|
| 34 |
+
"device": "cuda:0"
|
| 35 |
+
}
|
| 36 |
+
}
|
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_12/eval_results.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"l2_loss": 137.01, "l1_loss": 209.49, "l0": 44.241494140625, "frac_variance_explained": 0.1137109375, "cossim": 0.48615234375, "l2_ratio": 0.7834765625, "relative_reconstruction_bias": 1.46609375, "loss_original": 2.152646484375, "loss_reconstructed": 13.5984375, "loss_zero": 12.4375, "frac_recovered": -0.11469314575195312, "frac_alive": 0.04339599609375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_4/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bbf4dac5de0633da4d582ff1b017df174f0c6445429b24c2a35892c602c0678d
|
| 3 |
+
size 1208496226
|
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_4/checkpoints/ae_0.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:60d0dc8174d9f25f4182bf9e185dbf38103f34407c9edf4ecbef4b06a8af6bb4
|
| 3 |
+
size 1208496706
|
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_4/checkpoints/ae_244.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cd6e99e27e6273548600979e431bdb0ab1bd997a1ea691357366781c598e18b9
|
| 3 |
+
size 1208496738
|
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_4/checkpoints/ae_2441.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4ebc829633f44514df1904bf06c5e2c60adf023f449980d62d323a72f5aab830
|
| 3 |
+
size 1208496882
|
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_4/checkpoints/ae_24414.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3a62f183c68da69920db46f0d814f0e2a5bbc5201c80c6ad733647406cbb411a
|
| 3 |
+
size 1208497090
|
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_4/checkpoints/ae_772.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2371dc896b18e9197a275547edffb1c80d6b904ea3abf37064786d39faff439c
|
| 3 |
+
size 1208496738
|
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_4/checkpoints/ae_7720.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:abcb73ae56274da47594bf21f5a331b635ddaa8b5901dfd7f845d81eef200c9e
|
| 3 |
+
size 1208496882
|
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_4/checkpoints/ae_77203.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1f93ffbb48a158bb8213c606e4e6b5a1797d3fdd7178576970fd5ec9af4babe5
|
| 3 |
+
size 1208497090
|
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_4/config.json
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "SampledActivationTrainer",
|
| 4 |
+
"dict_class": "SampledActivationSAE",
|
| 5 |
+
"lr": 0.0003,
|
| 6 |
+
"steps": 244140,
|
| 7 |
+
"auxk_alpha": 0.03125,
|
| 8 |
+
"warmup_steps": 1000,
|
| 9 |
+
"decay_start": 195312,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"sampling_update_freq": 1,
|
| 13 |
+
"sampling_method": "entropy",
|
| 14 |
+
"ridge_lambda": 0.01,
|
| 15 |
+
"sketching_size": 100,
|
| 16 |
+
"top_k_aux": 1152,
|
| 17 |
+
"seed": 0,
|
| 18 |
+
"activation_dim": 2304,
|
| 19 |
+
"dict_size": 65536,
|
| 20 |
+
"k": 20,
|
| 21 |
+
"device": "cuda:0",
|
| 22 |
+
"layer": 12,
|
| 23 |
+
"lm_name": "google/gemma-2-2b",
|
| 24 |
+
"wandb_name": "SampledActivationTrainer-entropy-google/gemma-2-2b-resid_post_layer_12_trainer_4",
|
| 25 |
+
"submodule_name": "resid_post_layer_12"
|
| 26 |
+
},
|
| 27 |
+
"buffer": {
|
| 28 |
+
"d_submodule": 2304,
|
| 29 |
+
"io": "out",
|
| 30 |
+
"n_ctxs": 244,
|
| 31 |
+
"ctx_len": 1024,
|
| 32 |
+
"refresh_batch_size": 4,
|
| 33 |
+
"out_batch_size": 2048,
|
| 34 |
+
"device": "cuda:0"
|
| 35 |
+
}
|
| 36 |
+
}
|
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_4/eval_results.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"l2_loss": 167.82, "l1_loss": 83.60875, "l0": 7.6167822265625, "frac_variance_explained": 0.0778125, "cossim": 0.09481430053710938, "l2_ratio": 0.6545703125, "relative_reconstruction_bias": -1.485078125, "loss_original": 2.152646484375, "loss_reconstructed": 11.581875, "loss_zero": 12.4375, "frac_recovered": 0.08792800903320312, "frac_alive": 0.021514892578125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|