Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_0/ae.pt +3 -0
- home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_0/checkpoints/ae_0.pt +3 -0
- home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_0/checkpoints/ae_24.pt +3 -0
- home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_0/checkpoints/ae_244.pt +3 -0
- home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_0/checkpoints/ae_2441.pt +3 -0
- home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_0/checkpoints/ae_77.pt +3 -0
- home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_0/checkpoints/ae_772.pt +3 -0
- home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_0/checkpoints/ae_7720.pt +3 -0
- home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_0/config.json +32 -0
- home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_0/eval_results.json +1 -0
- home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_1/ae.pt +3 -0
- home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_1/checkpoints/ae_0.pt +3 -0
- home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_1/checkpoints/ae_24.pt +3 -0
- home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_1/checkpoints/ae_244.pt +3 -0
- home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_1/checkpoints/ae_2441.pt +3 -0
- home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_1/checkpoints/ae_77.pt +3 -0
- home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_1/checkpoints/ae_772.pt +3 -0
- home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_1/checkpoints/ae_7720.pt +3 -0
- home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_1/config.json +32 -0
- home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_1/eval_results.json +1 -0
- home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_10/ae.pt +3 -0
- home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_10/checkpoints/ae_0.pt +3 -0
- home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_10/checkpoints/ae_24.pt +3 -0
- home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_10/checkpoints/ae_244.pt +3 -0
- home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_10/checkpoints/ae_2441.pt +3 -0
- home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_10/checkpoints/ae_77.pt +3 -0
- home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_10/checkpoints/ae_772.pt +3 -0
- home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_10/checkpoints/ae_7720.pt +3 -0
- home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_10/config.json +34 -0
- home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_11/ae.pt +3 -0
- home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_11/checkpoints/ae_0.pt +3 -0
- home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_11/checkpoints/ae_24.pt +3 -0
- home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_11/checkpoints/ae_244.pt +3 -0
- home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_11/checkpoints/ae_2441.pt +3 -0
- home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_11/checkpoints/ae_77.pt +3 -0
- home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_11/checkpoints/ae_772.pt +3 -0
- home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_11/checkpoints/ae_7720.pt +3 -0
- home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_11/config.json +34 -0
- home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_2/ae.pt +3 -0
- home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_2/checkpoints/ae_0.pt +3 -0
- home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_2/checkpoints/ae_24.pt +3 -0
- home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_2/checkpoints/ae_244.pt +3 -0
- home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_2/checkpoints/ae_2441.pt +3 -0
- home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_2/checkpoints/ae_77.pt +3 -0
- home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_2/checkpoints/ae_772.pt +3 -0
- home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_2/checkpoints/ae_7720.pt +3 -0
- home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_2/config.json +32 -0
- home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_3/ae.pt +3 -0
- home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_3/checkpoints/ae_0.pt +3 -0
- home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_3/checkpoints/ae_24.pt +3 -0
home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_0/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:75ccc08fcd272b0cb276969b00e8d56dc800158116a6f49814a6f75c5f5b1b77
|
| 3 |
+
size 1208232982
|
home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_0/checkpoints/ae_0.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:912a6613ae5d68427a340080cda9b03d1a1b1f75232d5a1df2a3fe60ca6b977b
|
| 3 |
+
size 1208233130
|
home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_0/checkpoints/ae_24.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3195652411607107207f72b3cf15955ba67e9840cdde11cb0fa3594ac0b9cd5d
|
| 3 |
+
size 1208233140
|
home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_0/checkpoints/ae_244.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:df2e345156fe0041769e6c09cfaf6bbd0eefd9aa1a256fea853342eee338d2ce
|
| 3 |
+
size 1208233150
|
home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_0/checkpoints/ae_2441.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:382c651f5b2236f840c3b0b07efb2fe2a203276f9ec05e87a85415d3989e3ac4
|
| 3 |
+
size 1208233224
|
home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_0/checkpoints/ae_77.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fa5db0c4f45d775a42507fa2f8c876c7b07ab97f509785e598ecbc0f2ab57fe5
|
| 3 |
+
size 1208233140
|
home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_0/checkpoints/ae_772.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a53185f964d31aa970f6d00153c3d1013e648e14c9ac39bb347ef2190648ee45
|
| 3 |
+
size 1208233150
|
home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_0/checkpoints/ae_7720.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b612c3b2f931ff91d718a8780d554cce00f9b3269b1ecba6d76c51a0b9a6497d
|
| 3 |
+
size 1208233224
|
home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_0/config.json
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "BatchTopKTrainer",
|
| 4 |
+
"dict_class": "BatchTopKSAE",
|
| 5 |
+
"lr": 0.0003,
|
| 6 |
+
"steps": 24414,
|
| 7 |
+
"auxk_alpha": 0.03125,
|
| 8 |
+
"warmup_steps": 1000,
|
| 9 |
+
"decay_start": 19531,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"top_k_aux": 1152,
|
| 13 |
+
"seed": 0,
|
| 14 |
+
"activation_dim": 2304,
|
| 15 |
+
"dict_size": 65536,
|
| 16 |
+
"k": 20,
|
| 17 |
+
"device": "cuda:0",
|
| 18 |
+
"layer": 12,
|
| 19 |
+
"lm_name": "google/gemma-2-2b",
|
| 20 |
+
"wandb_name": "BatchTopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_0",
|
| 21 |
+
"submodule_name": "resid_post_layer_12"
|
| 22 |
+
},
|
| 23 |
+
"buffer": {
|
| 24 |
+
"d_submodule": 2304,
|
| 25 |
+
"io": "out",
|
| 26 |
+
"n_ctxs": 244,
|
| 27 |
+
"ctx_len": 1024,
|
| 28 |
+
"refresh_batch_size": 4,
|
| 29 |
+
"out_batch_size": 2048,
|
| 30 |
+
"device": "cuda:0"
|
| 31 |
+
}
|
| 32 |
+
}
|
home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_0/eval_results.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"l2_loss": 65.3975, "l1_loss": 289.87, "l0": 19.8868505859375, "frac_variance_explained": 0.78646484375, "cossim": 0.887578125, "l2_ratio": 0.88759765625, "relative_reconstruction_bias": 0.99931640625, "loss_original": 2.152490234375, "loss_reconstructed": 2.3981640625, "loss_zero": 12.4375, "frac_recovered": 0.975390625, "frac_alive": 0.463592529296875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|
home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_1/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3ab86a9d0f5bb031303a4773a773e8df03987bd0c28c4e8f50b432902ac567b8
|
| 3 |
+
size 1208232982
|
home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_1/checkpoints/ae_0.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:781a4fb5540fb733307a7c1c92a07194d3c9ecf12ceb6c43336d0bf51aa228bb
|
| 3 |
+
size 1208233130
|
home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_1/checkpoints/ae_24.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:34990f22af8bd09deed1818ce273dcd7478411d072195fc670e6e3126e4d8cf4
|
| 3 |
+
size 1208233140
|
home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_1/checkpoints/ae_244.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6bb0ce33af7079a2772fcbb1ed5b87db9a24a8966bb1256a27ddce8fed419acb
|
| 3 |
+
size 1208233150
|
home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_1/checkpoints/ae_2441.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4a44d3d47adbc1b3fe0c584fd27d2fb91d2630c734d16660c429ae31f9aa3047
|
| 3 |
+
size 1208233224
|
home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_1/checkpoints/ae_77.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aef15dc4473a8204991d8bf0195cdee90ed7a3698b68e516affa9f5e6f165624
|
| 3 |
+
size 1208233140
|
home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_1/checkpoints/ae_772.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4a1160be2fb0028955413ef16480650ca3b897a241d89361f69b03c4f8603351
|
| 3 |
+
size 1208233150
|
home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_1/checkpoints/ae_7720.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8e5939eed1317ac2ac32c5398bba3ef2ca4af9291614b895eddf197f31650f62
|
| 3 |
+
size 1208233224
|
home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_1/config.json
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "BatchTopKTrainer",
|
| 4 |
+
"dict_class": "BatchTopKSAE",
|
| 5 |
+
"lr": 0.0003,
|
| 6 |
+
"steps": 24414,
|
| 7 |
+
"auxk_alpha": 0.03125,
|
| 8 |
+
"warmup_steps": 1000,
|
| 9 |
+
"decay_start": 19531,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"top_k_aux": 1152,
|
| 13 |
+
"seed": 0,
|
| 14 |
+
"activation_dim": 2304,
|
| 15 |
+
"dict_size": 65536,
|
| 16 |
+
"k": 40,
|
| 17 |
+
"device": "cuda:0",
|
| 18 |
+
"layer": 12,
|
| 19 |
+
"lm_name": "google/gemma-2-2b",
|
| 20 |
+
"wandb_name": "BatchTopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_1",
|
| 21 |
+
"submodule_name": "resid_post_layer_12"
|
| 22 |
+
},
|
| 23 |
+
"buffer": {
|
| 24 |
+
"d_submodule": 2304,
|
| 25 |
+
"io": "out",
|
| 26 |
+
"n_ctxs": 244,
|
| 27 |
+
"ctx_len": 1024,
|
| 28 |
+
"refresh_batch_size": 4,
|
| 29 |
+
"out_batch_size": 2048,
|
| 30 |
+
"device": "cuda:0"
|
| 31 |
+
}
|
| 32 |
+
}
|
home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_1/eval_results.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"l2_loss": 58.46125, "l1_loss": 389.56, "l0": 39.7816357421875, "frac_variance_explained": 0.8316796875, "cossim": 0.91072265625, "l2_ratio": 0.91125, "relative_reconstruction_bias": 0.999765625, "loss_original": 2.152490234375, "loss_reconstructed": 2.277099609375, "loss_zero": 12.4375, "frac_recovered": 0.98771484375, "frac_alive": 0.638336181640625, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|
home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_10/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e717c3a46dc9e237e9da5e6803d4ef79780857449811093b582f672aae53773d
|
| 3 |
+
size 1208495858
|
home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_10/checkpoints/ae_0.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:67f84f69cef26b4c5726711f5d1a510d558ad1e19a85f810a95f90027a1dd6a8
|
| 3 |
+
size 1208496206
|
home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_10/checkpoints/ae_24.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:98503a6b837eb0b25ba71bbd77f9313882596b501c656d04e1d2d2f0c6313a38
|
| 3 |
+
size 1208496220
|
home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_10/checkpoints/ae_244.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:62e45c75a75fb332bc7f1bc7c6a640c342eebfe674fb5cbadd518f7dcb23768f
|
| 3 |
+
size 1208496234
|
home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_10/checkpoints/ae_2441.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4b06e297f0918379995a9616a76d9b0198b36c7c48ea345e101ef8d49272a253
|
| 3 |
+
size 1208496312
|
home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_10/checkpoints/ae_77.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:13d893392b42fde20c7d4ad4109b9f59dd8108d1bdc2c4b036524f3aeb23a8a1
|
| 3 |
+
size 1208496220
|
home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_10/checkpoints/ae_772.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c00055ab434376c8e9b48ecc83d17c56ea7d20f5dc831476da92904373ac7c9d
|
| 3 |
+
size 1208496234
|
home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_10/checkpoints/ae_7720.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:62afc7375bef72e86405be0ce869a3b6aa29d97d6ed0b0e164e667ae48624b3a
|
| 3 |
+
size 1208496312
|
home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_10/config.json
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "LeverageScoreTrainer",
|
| 4 |
+
"dict_class": "LeverageScoreSAE",
|
| 5 |
+
"lr": 0.0003,
|
| 6 |
+
"steps": 24414,
|
| 7 |
+
"auxk_alpha": 0.03125,
|
| 8 |
+
"warmup_steps": 1000,
|
| 9 |
+
"decay_start": 19531,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"leverage_update_freq": 10,
|
| 13 |
+
"leverage_sample_multiplier": 1.0,
|
| 14 |
+
"top_k_aux": 1152,
|
| 15 |
+
"seed": 0,
|
| 16 |
+
"activation_dim": 2304,
|
| 17 |
+
"dict_size": 65536,
|
| 18 |
+
"k": 320,
|
| 19 |
+
"device": "cuda:0",
|
| 20 |
+
"layer": 12,
|
| 21 |
+
"lm_name": "google/gemma-2-2b",
|
| 22 |
+
"wandb_name": "LeverageScoreTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_10",
|
| 23 |
+
"submodule_name": "resid_post_layer_12"
|
| 24 |
+
},
|
| 25 |
+
"buffer": {
|
| 26 |
+
"d_submodule": 2304,
|
| 27 |
+
"io": "out",
|
| 28 |
+
"n_ctxs": 244,
|
| 29 |
+
"ctx_len": 1024,
|
| 30 |
+
"refresh_batch_size": 4,
|
| 31 |
+
"out_batch_size": 2048,
|
| 32 |
+
"device": "cuda:0"
|
| 33 |
+
}
|
| 34 |
+
}
|
home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_11/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a9bb1104ac1dc51d92b98cba92928a5a123d24ba4564200865418aeb6d239c85
|
| 3 |
+
size 1208495858
|
home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_11/checkpoints/ae_0.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4fc26e2195476c71ada385341ae2220702ba6fbb6b57009c47c61ab09b19d54a
|
| 3 |
+
size 1208496206
|
home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_11/checkpoints/ae_24.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f37eed7710075c70ef96be5e1821fe27bafc32ee02bd1ce8996923a7f60f3056
|
| 3 |
+
size 1208496220
|
home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_11/checkpoints/ae_244.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bb436c7988f378fc1c5ebdebe11b3028a2669ae209679d8da27e157be84ce5af
|
| 3 |
+
size 1208496234
|
home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_11/checkpoints/ae_2441.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:14e8d5ef1cc7042dcc8efe949184179545e691235de8150874904f41558c6e15
|
| 3 |
+
size 1208496312
|
home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_11/checkpoints/ae_77.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b00f1e08ccb75e4e6e384819f89bf6457b3b85e84bfe75f3f1035c2afaff2754
|
| 3 |
+
size 1208496220
|
home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_11/checkpoints/ae_772.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e1da4962551d2f91d1ffe862d5ee1be5b2fa340ec0c42e1ee9fffc64fcf32e17
|
| 3 |
+
size 1208496234
|
home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_11/checkpoints/ae_7720.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5d7b7094ddb136f403a8c77fe696d657918fb5aa464c940270015c1607a24a1b
|
| 3 |
+
size 1208496312
|
home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_11/config.json
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "LeverageScoreTrainer",
|
| 4 |
+
"dict_class": "LeverageScoreSAE",
|
| 5 |
+
"lr": 0.0003,
|
| 6 |
+
"steps": 24414,
|
| 7 |
+
"auxk_alpha": 0.03125,
|
| 8 |
+
"warmup_steps": 1000,
|
| 9 |
+
"decay_start": 19531,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"leverage_update_freq": 10,
|
| 13 |
+
"leverage_sample_multiplier": 1.0,
|
| 14 |
+
"top_k_aux": 1152,
|
| 15 |
+
"seed": 0,
|
| 16 |
+
"activation_dim": 2304,
|
| 17 |
+
"dict_size": 65536,
|
| 18 |
+
"k": 640,
|
| 19 |
+
"device": "cuda:0",
|
| 20 |
+
"layer": 12,
|
| 21 |
+
"lm_name": "google/gemma-2-2b",
|
| 22 |
+
"wandb_name": "LeverageScoreTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_11",
|
| 23 |
+
"submodule_name": "resid_post_layer_12"
|
| 24 |
+
},
|
| 25 |
+
"buffer": {
|
| 26 |
+
"d_submodule": 2304,
|
| 27 |
+
"io": "out",
|
| 28 |
+
"n_ctxs": 244,
|
| 29 |
+
"ctx_len": 1024,
|
| 30 |
+
"refresh_batch_size": 4,
|
| 31 |
+
"out_batch_size": 2048,
|
| 32 |
+
"device": "cuda:0"
|
| 33 |
+
}
|
| 34 |
+
}
|
home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_2/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ab674100c4cae418233d1365d0266aa0986b63d5fea5ad534a02443260d7c60a
|
| 3 |
+
size 1208232982
|
home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_2/checkpoints/ae_0.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:79ffdfb53b16678d51f52d270dc2402ed7930058c28f95188dc908f8641b92fa
|
| 3 |
+
size 1208233130
|
home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_2/checkpoints/ae_24.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:383c8047722aefc792af52be080261533cfbbed7bf4f796c68427ea20b988447
|
| 3 |
+
size 1208233140
|
home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_2/checkpoints/ae_244.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9387e20addba9f25a31da8356a3072b01b34a7bcc1c05c4599a3493076455ebb
|
| 3 |
+
size 1208233150
|
home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_2/checkpoints/ae_2441.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:df5ed1c13eacc666d2095b1ea8e49b51e77872f225c23fec75c3abc69eaf66ee
|
| 3 |
+
size 1208233224
|
home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_2/checkpoints/ae_77.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0b60a0ac6a895feb7d447c292cf30a00fd127baea0bce61bd36d6f4c720ee352
|
| 3 |
+
size 1208233140
|
home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_2/checkpoints/ae_772.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dc77851e35471e7c5e1b35ee9f33a0fd3808678dfc0654ad2d5244774f919d56
|
| 3 |
+
size 1208233150
|
home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_2/checkpoints/ae_7720.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d15f43f4caa61425bf8aaedfbbdfbd5d8d4595dfab01c25b5ff881b4c6f3400c
|
| 3 |
+
size 1208233224
|
home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_2/config.json
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "BatchTopKTrainer",
|
| 4 |
+
"dict_class": "BatchTopKSAE",
|
| 5 |
+
"lr": 0.0003,
|
| 6 |
+
"steps": 24414,
|
| 7 |
+
"auxk_alpha": 0.03125,
|
| 8 |
+
"warmup_steps": 1000,
|
| 9 |
+
"decay_start": 19531,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"top_k_aux": 1152,
|
| 13 |
+
"seed": 0,
|
| 14 |
+
"activation_dim": 2304,
|
| 15 |
+
"dict_size": 65536,
|
| 16 |
+
"k": 60,
|
| 17 |
+
"device": "cuda:0",
|
| 18 |
+
"layer": 12,
|
| 19 |
+
"lm_name": "google/gemma-2-2b",
|
| 20 |
+
"wandb_name": "BatchTopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_2",
|
| 21 |
+
"submodule_name": "resid_post_layer_12"
|
| 22 |
+
},
|
| 23 |
+
"buffer": {
|
| 24 |
+
"d_submodule": 2304,
|
| 25 |
+
"io": "out",
|
| 26 |
+
"n_ctxs": 244,
|
| 27 |
+
"ctx_len": 1024,
|
| 28 |
+
"refresh_batch_size": 4,
|
| 29 |
+
"out_batch_size": 2048,
|
| 30 |
+
"device": "cuda:0"
|
| 31 |
+
}
|
| 32 |
+
}
|
home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_3/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e95d9c74614843aba20afff247a86a694609fb0578f5382f9b7aa7448f59d73b
|
| 3 |
+
size 1208232982
|
home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_3/checkpoints/ae_0.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:84a1bf3cdc4b4bf17608c6e281f6772ba57bfd1a321c193c777bb23fc6b9f513
|
| 3 |
+
size 1208233130
|
home/ubuntu/leverage-SAEs/leverageSAEs/checkpoints/._gemma_batch_leverage_google_gemma-2-2b_batch_top_k_leverage_score_2.5ratio/resid_post_layer_12/trainer_3/checkpoints/ae_24.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:57860a2c054a85850dafe78ca473f1b47b336b78d59186831962bff77653fb1f
|
| 3 |
+
size 1208233140
|