Narmeen07 commited on
Commit
10504ca
·
verified ·
1 Parent(s): 13d91c1

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_0/ae.pt +3 -0
  2. sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_0/checkpoints/ae_0.pt +3 -0
  3. sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_0/checkpoints/ae_244.pt +3 -0
  4. sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_0/checkpoints/ae_2441.pt +3 -0
  5. sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_0/checkpoints/ae_24414.pt +3 -0
  6. sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_0/checkpoints/ae_772.pt +3 -0
  7. sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_0/checkpoints/ae_7720.pt +3 -0
  8. sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_0/checkpoints/ae_77203.pt +3 -0
  9. sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_0/config.json +32 -0
  10. sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_0/eval_results.json +1 -0
  11. sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_1/ae.pt +3 -0
  12. sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_1/checkpoints/ae_0.pt +3 -0
  13. sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_1/checkpoints/ae_244.pt +3 -0
  14. sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_1/checkpoints/ae_2441.pt +3 -0
  15. sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_1/checkpoints/ae_24414.pt +3 -0
  16. sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_1/checkpoints/ae_772.pt +3 -0
  17. sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_1/checkpoints/ae_7720.pt +3 -0
  18. sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_1/checkpoints/ae_77203.pt +3 -0
  19. sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_1/config.json +32 -0
  20. sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_1/eval_results.json +1 -0
  21. sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_2/ae.pt +3 -0
  22. sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_2/checkpoints/ae_0.pt +3 -0
  23. sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_2/checkpoints/ae_244.pt +3 -0
  24. sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_2/checkpoints/ae_2441.pt +3 -0
  25. sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_2/checkpoints/ae_24414.pt +3 -0
  26. sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_2/checkpoints/ae_772.pt +3 -0
  27. sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_2/checkpoints/ae_7720.pt +3 -0
  28. sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_2/checkpoints/ae_77203.pt +3 -0
  29. sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_2/config.json +32 -0
  30. sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_2/eval_results.json +1 -0
  31. sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_12/ae.pt +3 -0
  32. sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_12/checkpoints/ae_0.pt +3 -0
  33. sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_12/checkpoints/ae_244.pt +3 -0
  34. sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_12/checkpoints/ae_2441.pt +3 -0
  35. sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_12/checkpoints/ae_24414.pt +3 -0
  36. sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_12/checkpoints/ae_772.pt +3 -0
  37. sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_12/checkpoints/ae_7720.pt +3 -0
  38. sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_12/checkpoints/ae_77203.pt +3 -0
  39. sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_12/config.json +36 -0
  40. sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_12/eval_results.json +1 -0
  41. sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_4/ae.pt +3 -0
  42. sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_4/checkpoints/ae_0.pt +3 -0
  43. sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_4/checkpoints/ae_244.pt +3 -0
  44. sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_4/checkpoints/ae_2441.pt +3 -0
  45. sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_4/checkpoints/ae_24414.pt +3 -0
  46. sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_4/checkpoints/ae_772.pt +3 -0
  47. sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_4/checkpoints/ae_7720.pt +3 -0
  48. sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_4/checkpoints/ae_77203.pt +3 -0
  49. sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_4/config.json +36 -0
  50. sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_4/eval_results.json +1 -0
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe40bd7b4805a2188d03f27b59e7a105ec9687088b52c7a061967190aff0a2d2
3
+ size 1208232982
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_0/checkpoints/ae_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:912a6613ae5d68427a340080cda9b03d1a1b1f75232d5a1df2a3fe60ca6b977b
3
+ size 1208233130
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_0/checkpoints/ae_244.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e187cc55ff8c2e08aa0620181c5c4bfcbf26780f2acfdeb9da567ac8d03ca8df
3
+ size 1208233150
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_0/checkpoints/ae_2441.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb6ee91c798ea2891af54167358a7eb5af2d11355c307d0e361777c3be801165
3
+ size 1208233224
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_0/checkpoints/ae_24414.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f40a0f45f9b0dfa4b4e7dce85425f70c6fe0b040db4f8622728829ab82a2929a
3
+ size 1208233426
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_0/checkpoints/ae_772.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a749e26d3c3cac3417bef8fc41041782419dc827d50ad2fb1a4449a833585ee9
3
+ size 1208233150
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_0/checkpoints/ae_7720.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0fbb7e452d9216ef23d8116fc01bbe7c537bc25ec749625458093e1b44b3187
3
+ size 1208233224
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_0/checkpoints/ae_77203.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36bcb18f08f39f5697ac0d8d83fd9d2a2cd0c506c7b7bda47f8fbf8732a82d27
3
+ size 1208233426
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_0/config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "BatchTopKTrainer",
4
+ "dict_class": "BatchTopKSAE",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 195312,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 1152,
13
+ "seed": 0,
14
+ "activation_dim": 2304,
15
+ "dict_size": 65536,
16
+ "k": 20,
17
+ "device": "cuda:0",
18
+ "layer": 12,
19
+ "lm_name": "google/gemma-2-2b",
20
+ "wandb_name": "BatchTopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_0",
21
+ "submodule_name": "resid_post_layer_12"
22
+ },
23
+ "buffer": {
24
+ "d_submodule": 2304,
25
+ "io": "out",
26
+ "n_ctxs": 244,
27
+ "ctx_len": 1024,
28
+ "refresh_batch_size": 4,
29
+ "out_batch_size": 2048,
30
+ "device": "cuda:0"
31
+ }
32
+ }
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_0/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 60.7625, "l1_loss": 284.62, "l0": 19.9164794921875, "frac_variance_explained": 0.8149609375, "cossim": 0.9034375, "l2_ratio": 0.90369140625, "relative_reconstruction_bias": 1.0023828125, "loss_original": 2.152646484375, "loss_reconstructed": 2.321103515625, "loss_zero": 12.4375, "frac_recovered": 0.98353515625, "frac_alive": 0.7290802001953125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_1/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d842493f0ce04c1ad670224583cc3329238e9911ee028caa725d0d654c4cff8e
3
+ size 1208232982
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_1/checkpoints/ae_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79ffdfb53b16678d51f52d270dc2402ed7930058c28f95188dc908f8641b92fa
3
+ size 1208233130
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_1/checkpoints/ae_244.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c53a3f799a8f85c82b59ee32d3f25f262777c7650ac4caa2cf16ac96b3b4e6b9
3
+ size 1208233150
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_1/checkpoints/ae_2441.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdffbe6d35fe2de78df5a6ff6a28d03c0a559730b4aceed9a42845d88d1853ae
3
+ size 1208233224
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_1/checkpoints/ae_24414.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77c63b05bb9b3347a3aa22736fa9fe96b8367deb3867f4c5e6908aa6b3ac79cc
3
+ size 1208233426
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_1/checkpoints/ae_772.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:510aed3e97b7b1ccee2b323e282a28f5f25a689e8897a0f08baf20057895971f
3
+ size 1208233150
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_1/checkpoints/ae_7720.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c076f1ae18d980e83453d66362aaaf8709e9e471ff663e01a40ef2bd45f5c63
3
+ size 1208233224
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_1/checkpoints/ae_77203.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b3580d7e3a4c723859ef9a6b8e397f981552a7b13be8c00982fcce47256d7a0
3
+ size 1208233426
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_1/config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "BatchTopKTrainer",
4
+ "dict_class": "BatchTopKSAE",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 195312,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 1152,
13
+ "seed": 0,
14
+ "activation_dim": 2304,
15
+ "dict_size": 65536,
16
+ "k": 60,
17
+ "device": "cuda:0",
18
+ "layer": 12,
19
+ "lm_name": "google/gemma-2-2b",
20
+ "wandb_name": "BatchTopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_1",
21
+ "submodule_name": "resid_post_layer_12"
22
+ },
23
+ "buffer": {
24
+ "d_submodule": 2304,
25
+ "io": "out",
26
+ "n_ctxs": 244,
27
+ "ctx_len": 1024,
28
+ "refresh_batch_size": 4,
29
+ "out_batch_size": 2048,
30
+ "device": "cuda:0"
31
+ }
32
+ }
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_1/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 52.39125, "l1_loss": 458.7, "l0": 59.78791015625, "frac_variance_explained": 0.86453125, "cossim": 0.92958984375, "l2_ratio": 0.92955078125, "relative_reconstruction_bias": 1.0026953125, "loss_original": 2.152646484375, "loss_reconstructed": 2.22775390625, "loss_zero": 12.4375, "frac_recovered": 0.99208984375, "frac_alive": 0.807220458984375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_2/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84c2a7cb34c2e6e14b65061f386d59e4a316251467e3a858d83afed1d8cbba94
3
+ size 1208232982
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_2/checkpoints/ae_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:076547e7ee110a4a2270daa29aa0597ee04225b858ddfd9061977a6513f4de15
3
+ size 1208233130
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_2/checkpoints/ae_244.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89dbbd979289cad69668e49c70a180b29b4b414ea44932d18dfca7cd9cc9da3e
3
+ size 1208233150
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_2/checkpoints/ae_2441.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dca9f4fbe1c94e6e61cab1588e0983399dd05695ea603228570697c3b95f86fd
3
+ size 1208233224
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_2/checkpoints/ae_24414.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:161597924c347f5e7ac0f7c283ac5dd307f475b12308d273910edbf9763982c2
3
+ size 1208233426
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_2/checkpoints/ae_772.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a245faf12c3097abf61bbea9e990727acd982b09c048ed8305717935b50f900a
3
+ size 1208233150
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_2/checkpoints/ae_7720.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b79d42f02cd180f3217077b4e5e6d3385777fc6e3081d8e6138f55a2d0187005
3
+ size 1208233224
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_2/checkpoints/ae_77203.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d03518c1a5a5ae2426c0ef4113a59ce6ac9df5bb2fe861961ac5ecba3e960462
3
+ size 1208233426
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_2/config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "BatchTopKTrainer",
4
+ "dict_class": "BatchTopKSAE",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 195312,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 1152,
13
+ "seed": 0,
14
+ "activation_dim": 2304,
15
+ "dict_size": 65536,
16
+ "k": 100,
17
+ "device": "cuda:0",
18
+ "layer": 12,
19
+ "lm_name": "google/gemma-2-2b",
20
+ "wandb_name": "BatchTopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_2",
21
+ "submodule_name": "resid_post_layer_12"
22
+ },
23
+ "buffer": {
24
+ "d_submodule": 2304,
25
+ "io": "out",
26
+ "n_ctxs": 244,
27
+ "ctx_len": 1024,
28
+ "refresh_batch_size": 4,
29
+ "out_batch_size": 2048,
30
+ "device": "cuda:0"
31
+ }
32
+ }
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/BatchTopKTrainer-google_gemma-2-2b-resid_post_layer_12_trainer_2/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 48.6875, "l1_loss": 586.82, "l0": 99.3204052734375, "frac_variance_explained": 0.88283203125, "cossim": 0.93861328125, "l2_ratio": 0.9382421875, "relative_reconstruction_bias": 1.0021484375, "loss_original": 2.152646484375, "loss_reconstructed": 2.211435546875, "loss_zero": 12.4375, "frac_recovered": 0.99359375, "frac_alive": 0.8199920654296875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_12/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0181eb6c7dad35f8433bff4b82b370e8173fe7e00b97690bbee182deb534786
3
+ size 1208496226
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_12/checkpoints/ae_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2bd97a1caee8e0089218bddb6d1fcec9dd8fa2a035daf2e47182dbd373932b3
3
+ size 1208496706
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_12/checkpoints/ae_244.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccf4dc0ffab6fff93b3cd12743830e0c4ab530a9464846a30137560b860a6e8f
3
+ size 1208496738
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_12/checkpoints/ae_2441.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84fc339799f7ae291f8b53661f51f168b3e60d19e315064440159c4c8b849210
3
+ size 1208496882
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_12/checkpoints/ae_24414.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e1ed7c43fdbf9446c838608fe662d2c0cd736b748761cf8355109adf067afcc
3
+ size 1208497090
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_12/checkpoints/ae_772.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8921c3032da8e059b4c5e1df8d8126ba1d71f99fbc73f976d8cfebef43fab9f
3
+ size 1208496738
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_12/checkpoints/ae_7720.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d29a018a8aff008f1b84074cf6d0dcb7a89c47666ee7d3b24e8695fd4e0af3e0
3
+ size 1208496882
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_12/checkpoints/ae_77203.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edf304d0a47ad8a44d9f1f7516f096933492d79c02e2f7619b15ff0506041329
3
+ size 1208497090
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_12/config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "SampledActivationTrainer",
4
+ "dict_class": "SampledActivationSAE",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 195312,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "sampling_update_freq": 1,
13
+ "sampling_method": "entropy",
14
+ "ridge_lambda": 0.01,
15
+ "sketching_size": 100,
16
+ "top_k_aux": 1152,
17
+ "seed": 0,
18
+ "activation_dim": 2304,
19
+ "dict_size": 65536,
20
+ "k": 100,
21
+ "device": "cuda:0",
22
+ "layer": 12,
23
+ "lm_name": "google/gemma-2-2b",
24
+ "wandb_name": "SampledActivationTrainer-entropy-google/gemma-2-2b-resid_post_layer_12_trainer_12",
25
+ "submodule_name": "resid_post_layer_12"
26
+ },
27
+ "buffer": {
28
+ "d_submodule": 2304,
29
+ "io": "out",
30
+ "n_ctxs": 244,
31
+ "ctx_len": 1024,
32
+ "refresh_batch_size": 4,
33
+ "out_batch_size": 2048,
34
+ "device": "cuda:0"
35
+ }
36
+ }
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_12/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 137.01, "l1_loss": 209.49, "l0": 44.241494140625, "frac_variance_explained": 0.1137109375, "cossim": 0.48615234375, "l2_ratio": 0.7834765625, "relative_reconstruction_bias": 1.46609375, "loss_original": 2.152646484375, "loss_reconstructed": 13.5984375, "loss_zero": 12.4375, "frac_recovered": -0.11469314575195312, "frac_alive": 0.04339599609375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_4/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbf4dac5de0633da4d582ff1b017df174f0c6445429b24c2a35892c602c0678d
3
+ size 1208496226
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_4/checkpoints/ae_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60d0dc8174d9f25f4182bf9e185dbf38103f34407c9edf4ecbef4b06a8af6bb4
3
+ size 1208496706
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_4/checkpoints/ae_244.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd6e99e27e6273548600979e431bdb0ab1bd997a1ea691357366781c598e18b9
3
+ size 1208496738
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_4/checkpoints/ae_2441.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ebc829633f44514df1904bf06c5e2c60adf023f449980d62d323a72f5aab830
3
+ size 1208496882
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_4/checkpoints/ae_24414.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a62f183c68da69920db46f0d814f0e2a5bbc5201c80c6ad733647406cbb411a
3
+ size 1208497090
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_4/checkpoints/ae_772.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2371dc896b18e9197a275547edffb1c80d6b904ea3abf37064786d39faff439c
3
+ size 1208496738
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_4/checkpoints/ae_7720.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:abcb73ae56274da47594bf21f5a331b635ddaa8b5901dfd7f845d81eef200c9e
3
+ size 1208496882
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_4/checkpoints/ae_77203.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f93ffbb48a158bb8213c606e4e6b5a1797d3fdd7178576970fd5ec9af4babe5
3
+ size 1208497090
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_4/config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "SampledActivationTrainer",
4
+ "dict_class": "SampledActivationSAE",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 195312,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "sampling_update_freq": 1,
13
+ "sampling_method": "entropy",
14
+ "ridge_lambda": 0.01,
15
+ "sketching_size": 100,
16
+ "top_k_aux": 1152,
17
+ "seed": 0,
18
+ "activation_dim": 2304,
19
+ "dict_size": 65536,
20
+ "k": 20,
21
+ "device": "cuda:0",
22
+ "layer": 12,
23
+ "lm_name": "google/gemma-2-2b",
24
+ "wandb_name": "SampledActivationTrainer-entropy-google/gemma-2-2b-resid_post_layer_12_trainer_4",
25
+ "submodule_name": "resid_post_layer_12"
26
+ },
27
+ "buffer": {
28
+ "d_submodule": 2304,
29
+ "io": "out",
30
+ "n_ctxs": 244,
31
+ "ctx_len": 1024,
32
+ "refresh_batch_size": 4,
33
+ "out_batch_size": 2048,
34
+ "device": "cuda:0"
35
+ }
36
+ }
sae_bundle_google_gemma-2-2b_batch_top_k_sampled_sae/resid_post_layer_12/SampledActivationTrainer-entropy-google_gemma-2-2b-resid_post_layer_12_trainer_4/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 167.82, "l1_loss": 83.60875, "l0": 7.6167822265625, "frac_variance_explained": 0.0778125, "cossim": 0.09481430053710938, "l2_ratio": 0.6545703125, "relative_reconstruction_bias": -1.485078125, "loss_original": 2.152646484375, "loss_reconstructed": 11.581875, "loss_zero": 12.4375, "frac_recovered": 0.08792800903320312, "frac_alive": 0.021514892578125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}