Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_0/ae.pt +3 -0
- saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_0/config.json +29 -0
- saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_0/eval_results.json +1 -0
- saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_1/ae.pt +3 -0
- saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_1/config.json +29 -0
- saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_1/eval_results.json +1 -0
- saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_10/ae.pt +3 -0
- saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_10/config.json +31 -0
- saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_10/eval_results.json +1 -0
- saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_11/ae.pt +3 -0
- saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_11/config.json +31 -0
- saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_11/eval_results.json +1 -0
- saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_12/ae.pt +3 -0
- saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_12/config.json +32 -0
- saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_12/eval_results.json +1 -0
- saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_13/ae.pt +3 -0
- saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_13/config.json +32 -0
- saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_13/eval_results.json +1 -0
- saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_14/ae.pt +3 -0
- saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_14/config.json +32 -0
- saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_14/eval_results.json +1 -0
- saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_15/ae.pt +3 -0
- saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_15/config.json +32 -0
- saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_15/eval_results.json +1 -0
- saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_16/ae.pt +3 -0
- saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_16/config.json +32 -0
- saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_16/eval_results.json +1 -0
- saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_17/ae.pt +3 -0
- saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_17/config.json +32 -0
- saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_17/eval_results.json +1 -0
- saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_2/ae.pt +3 -0
- saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_2/config.json +29 -0
- saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_2/eval_results.json +1 -0
- saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_3/ae.pt +3 -0
- saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_3/config.json +29 -0
- saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_3/eval_results.json +1 -0
- saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_4/ae.pt +3 -0
- saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_4/config.json +29 -0
- saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_4/eval_results.json +1 -0
- saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_5/ae.pt +3 -0
- saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_5/config.json +29 -0
- saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_5/eval_results.json +1 -0
- saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_6/ae.pt +3 -0
- saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_6/config.json +31 -0
- saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_6/eval_results.json +1 -0
- saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_7/ae.pt +3 -0
- saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_7/config.json +31 -0
- saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_7/eval_results.json +1 -0
- saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_8/ae.pt +3 -0
- saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_8/config.json +31 -0
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_0/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:98a7218b51e24617e6f77dcf389d76123924cca494f2b4d523f3510aa6ac6bcb
|
| 3 |
+
size 469843624
|
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_0/config.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"dict_class": "AutoEncoder",
|
| 4 |
+
"trainer_class": "StandardTrainerAprilUpdate",
|
| 5 |
+
"activation_dim": 3584,
|
| 6 |
+
"dict_size": 16384,
|
| 7 |
+
"lr": 0.0003,
|
| 8 |
+
"l1_penalty": 0.012,
|
| 9 |
+
"warmup_steps": 1000,
|
| 10 |
+
"sparsity_warmup_steps": 5000,
|
| 11 |
+
"steps": 244140,
|
| 12 |
+
"decay_start": 195312,
|
| 13 |
+
"seed": 3407,
|
| 14 |
+
"device": "cuda:0",
|
| 15 |
+
"layer": 31,
|
| 16 |
+
"lm_name": "google/gemma-2-9b",
|
| 17 |
+
"wandb_name": "StandardTrainerNew-google/gemma-2-9b-resid_post_layer_31_trainer_0",
|
| 18 |
+
"submodule_name": "resid_post_layer_31"
|
| 19 |
+
},
|
| 20 |
+
"buffer": {
|
| 21 |
+
"d_submodule": 3584,
|
| 22 |
+
"io": "out",
|
| 23 |
+
"n_ctxs": 122,
|
| 24 |
+
"ctx_len": 2048,
|
| 25 |
+
"refresh_batch_size": 4,
|
| 26 |
+
"out_batch_size": 2048,
|
| 27 |
+
"device": "cuda:0"
|
| 28 |
+
}
|
| 29 |
+
}
|
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_0/eval_results.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"l2_loss": 170.82, "l1_loss": 1930.28, "l0": 583.6903955078125, "frac_variance_explained": 0.7142578125, "cossim": 0.89908203125, "l2_ratio": 0.88638671875, "relative_reconstruction_bias": 0.989296875, "frac_alive": 0.74481201171875, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
|
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_1/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:157841a44ff8e2e23de041bb650b5f50f988da7ded3f1ef6006585cd0b9d55c8
|
| 3 |
+
size 469843624
|
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_1/config.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"dict_class": "AutoEncoder",
|
| 4 |
+
"trainer_class": "StandardTrainerAprilUpdate",
|
| 5 |
+
"activation_dim": 3584,
|
| 6 |
+
"dict_size": 16384,
|
| 7 |
+
"lr": 0.0003,
|
| 8 |
+
"l1_penalty": 0.015,
|
| 9 |
+
"warmup_steps": 1000,
|
| 10 |
+
"sparsity_warmup_steps": 5000,
|
| 11 |
+
"steps": 244140,
|
| 12 |
+
"decay_start": 195312,
|
| 13 |
+
"seed": 3407,
|
| 14 |
+
"device": "cuda:0",
|
| 15 |
+
"layer": 31,
|
| 16 |
+
"lm_name": "google/gemma-2-9b",
|
| 17 |
+
"wandb_name": "StandardTrainerNew-google/gemma-2-9b-resid_post_layer_31_trainer_1",
|
| 18 |
+
"submodule_name": "resid_post_layer_31"
|
| 19 |
+
},
|
| 20 |
+
"buffer": {
|
| 21 |
+
"d_submodule": 3584,
|
| 22 |
+
"io": "out",
|
| 23 |
+
"n_ctxs": 122,
|
| 24 |
+
"ctx_len": 2048,
|
| 25 |
+
"refresh_batch_size": 4,
|
| 26 |
+
"out_batch_size": 2048,
|
| 27 |
+
"device": "cuda:0"
|
| 28 |
+
}
|
| 29 |
+
}
|
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_1/eval_results.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"l2_loss": 178.87, "l1_loss": 1581.2, "l0": 416.1990014648438, "frac_variance_explained": 0.6869921875, "cossim": 0.8888671875, "l2_ratio": 0.8763671875, "relative_reconstruction_bias": 0.98818359375, "frac_alive": 0.74310302734375, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
|
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_10/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:91ddfd838ff20aa0ccac54d003e582b984e2b0fccbfc0a641f900247fb348c3a
|
| 3 |
+
size 469843990
|
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_10/config.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "TopKTrainer",
|
| 4 |
+
"dict_class": "AutoEncoderTopK",
|
| 5 |
+
"lr": 0.0003,
|
| 6 |
+
"steps": 244140,
|
| 7 |
+
"auxk_alpha": 0.03125,
|
| 8 |
+
"warmup_steps": 1000,
|
| 9 |
+
"decay_start": 195312,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"seed": 3407,
|
| 13 |
+
"activation_dim": 3584,
|
| 14 |
+
"dict_size": 16384,
|
| 15 |
+
"k": 520,
|
| 16 |
+
"device": "cuda:0",
|
| 17 |
+
"layer": 31,
|
| 18 |
+
"lm_name": "google/gemma-2-9b",
|
| 19 |
+
"wandb_name": "TopKTrainer-google/gemma-2-9b-resid_post_layer_31_trainer_10",
|
| 20 |
+
"submodule_name": "resid_post_layer_31"
|
| 21 |
+
},
|
| 22 |
+
"buffer": {
|
| 23 |
+
"d_submodule": 3584,
|
| 24 |
+
"io": "out",
|
| 25 |
+
"n_ctxs": 122,
|
| 26 |
+
"ctx_len": 2048,
|
| 27 |
+
"refresh_batch_size": 4,
|
| 28 |
+
"out_batch_size": 2048,
|
| 29 |
+
"device": "cuda:0"
|
| 30 |
+
}
|
| 31 |
+
}
|
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_10/eval_results.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"l2_loss": 141.775, "l1_loss": 5995.04, "l0": 520.0, "frac_variance_explained": 0.798984375, "cossim": 0.9305078125, "l2_ratio": 0.93306640625, "relative_reconstruction_bias": 1.0034765625, "frac_alive": 0.98553466796875, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
|
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_11/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dcf0596070383f3d071e666fa2b0be0f1283783774443101515c57b98b296298
|
| 3 |
+
size 469843990
|
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_11/config.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "TopKTrainer",
|
| 4 |
+
"dict_class": "AutoEncoderTopK",
|
| 5 |
+
"lr": 0.0003,
|
| 6 |
+
"steps": 244140,
|
| 7 |
+
"auxk_alpha": 0.03125,
|
| 8 |
+
"warmup_steps": 1000,
|
| 9 |
+
"decay_start": 195312,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"seed": 3407,
|
| 13 |
+
"activation_dim": 3584,
|
| 14 |
+
"dict_size": 16384,
|
| 15 |
+
"k": 820,
|
| 16 |
+
"device": "cuda:0",
|
| 17 |
+
"layer": 31,
|
| 18 |
+
"lm_name": "google/gemma-2-9b",
|
| 19 |
+
"wandb_name": "TopKTrainer-google/gemma-2-9b-resid_post_layer_31_trainer_11",
|
| 20 |
+
"submodule_name": "resid_post_layer_31"
|
| 21 |
+
},
|
| 22 |
+
"buffer": {
|
| 23 |
+
"d_submodule": 3584,
|
| 24 |
+
"io": "out",
|
| 25 |
+
"n_ctxs": 122,
|
| 26 |
+
"ctx_len": 2048,
|
| 27 |
+
"refresh_batch_size": 4,
|
| 28 |
+
"out_batch_size": 2048,
|
| 29 |
+
"device": "cuda:0"
|
| 30 |
+
}
|
| 31 |
+
}
|
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_11/eval_results.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"l2_loss": 127.6625, "l1_loss": 8207.52, "l0": 819.96537109375, "frac_variance_explained": 0.83650390625, "cossim": 0.944140625, "l2_ratio": 0.94552734375, "relative_reconstruction_bias": 1.003046875, "frac_alive": 0.9775390625, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
|
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_12/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:063aaf424f9a8a9c12a748b5bd8fabb5b5d8ddaa6d3d761ccb1a62f524e6c777
|
| 3 |
+
size 469843990
|
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_12/config.json
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "BatchTopKTrainer",
|
| 4 |
+
"dict_class": "BatchTopKSAE",
|
| 5 |
+
"lr": 0.0003,
|
| 6 |
+
"steps": 244140,
|
| 7 |
+
"auxk_alpha": 0.03125,
|
| 8 |
+
"warmup_steps": 1000,
|
| 9 |
+
"decay_start": 195312,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"top_k_aux": 1792,
|
| 13 |
+
"seed": 3407,
|
| 14 |
+
"activation_dim": 3584,
|
| 15 |
+
"dict_size": 16384,
|
| 16 |
+
"k": 50,
|
| 17 |
+
"device": "cuda:0",
|
| 18 |
+
"layer": 31,
|
| 19 |
+
"lm_name": "google/gemma-2-9b",
|
| 20 |
+
"wandb_name": "BatchTopKTrainer-google/gemma-2-9b-resid_post_layer_31_trainer_12",
|
| 21 |
+
"submodule_name": "resid_post_layer_31"
|
| 22 |
+
},
|
| 23 |
+
"buffer": {
|
| 24 |
+
"d_submodule": 3584,
|
| 25 |
+
"io": "out",
|
| 26 |
+
"n_ctxs": 122,
|
| 27 |
+
"ctx_len": 2048,
|
| 28 |
+
"refresh_batch_size": 4,
|
| 29 |
+
"out_batch_size": 2048,
|
| 30 |
+
"device": "cuda:0"
|
| 31 |
+
}
|
| 32 |
+
}
|
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_12/eval_results.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"l2_loss": 193.81, "l1_loss": 1294.76, "l0": 64.83081787109376, "frac_variance_explained": 0.636875, "cossim": 0.86908203125, "l2_ratio": 0.880234375, "relative_reconstruction_bias": 1.0128515625, "frac_alive": 0.80010986328125, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
|
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_13/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3c17443a8052cae19fcab904cc3179ab910280b23710e1adea9e1b2494959158
|
| 3 |
+
size 469843990
|
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_13/config.json
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "BatchTopKTrainer",
|
| 4 |
+
"dict_class": "BatchTopKSAE",
|
| 5 |
+
"lr": 0.0003,
|
| 6 |
+
"steps": 244140,
|
| 7 |
+
"auxk_alpha": 0.03125,
|
| 8 |
+
"warmup_steps": 1000,
|
| 9 |
+
"decay_start": 195312,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"top_k_aux": 1792,
|
| 13 |
+
"seed": 3407,
|
| 14 |
+
"activation_dim": 3584,
|
| 15 |
+
"dict_size": 16384,
|
| 16 |
+
"k": 80,
|
| 17 |
+
"device": "cuda:0",
|
| 18 |
+
"layer": 31,
|
| 19 |
+
"lm_name": "google/gemma-2-9b",
|
| 20 |
+
"wandb_name": "BatchTopKTrainer-google/gemma-2-9b-resid_post_layer_31_trainer_13",
|
| 21 |
+
"submodule_name": "resid_post_layer_31"
|
| 22 |
+
},
|
| 23 |
+
"buffer": {
|
| 24 |
+
"d_submodule": 3584,
|
| 25 |
+
"io": "out",
|
| 26 |
+
"n_ctxs": 122,
|
| 27 |
+
"ctx_len": 2048,
|
| 28 |
+
"refresh_batch_size": 4,
|
| 29 |
+
"out_batch_size": 2048,
|
| 30 |
+
"device": "cuda:0"
|
| 31 |
+
}
|
| 32 |
+
}
|
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_13/eval_results.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"l2_loss": 182.97, "l1_loss": 1810.48, "l0": 109.14227294921875, "frac_variance_explained": 0.6745703125, "cossim": 0.88337890625, "l2_ratio": 0.8923828125, "relative_reconstruction_bias": 1.010859375, "frac_alive": 0.8055419921875, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
|
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_14/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6bb2a5e191e5de65648bc66e7c4d9e6ecc745b59301d4bf1518c2576a0098e25
|
| 3 |
+
size 469843990
|
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_14/config.json
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "BatchTopKTrainer",
|
| 4 |
+
"dict_class": "BatchTopKSAE",
|
| 5 |
+
"lr": 0.0003,
|
| 6 |
+
"steps": 244140,
|
| 7 |
+
"auxk_alpha": 0.03125,
|
| 8 |
+
"warmup_steps": 1000,
|
| 9 |
+
"decay_start": 195312,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"top_k_aux": 1792,
|
| 13 |
+
"seed": 3407,
|
| 14 |
+
"activation_dim": 3584,
|
| 15 |
+
"dict_size": 16384,
|
| 16 |
+
"k": 160,
|
| 17 |
+
"device": "cuda:0",
|
| 18 |
+
"layer": 31,
|
| 19 |
+
"lm_name": "google/gemma-2-9b",
|
| 20 |
+
"wandb_name": "BatchTopKTrainer-google/gemma-2-9b-resid_post_layer_31_trainer_14",
|
| 21 |
+
"submodule_name": "resid_post_layer_31"
|
| 22 |
+
},
|
| 23 |
+
"buffer": {
|
| 24 |
+
"d_submodule": 3584,
|
| 25 |
+
"io": "out",
|
| 26 |
+
"n_ctxs": 122,
|
| 27 |
+
"ctx_len": 2048,
|
| 28 |
+
"refresh_batch_size": 4,
|
| 29 |
+
"out_batch_size": 2048,
|
| 30 |
+
"device": "cuda:0"
|
| 31 |
+
}
|
| 32 |
+
}
|
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_14/eval_results.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"l2_loss": 165.44, "l1_loss": 3043.84, "l0": 231.51020263671876, "frac_variance_explained": 0.731875, "cossim": 0.905234375, "l2_ratio": 0.91205078125, "relative_reconstruction_bias": 1.0090625, "frac_alive": 0.87921142578125, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
|
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_15/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7f72ffad12c39db2c87a1bd08cc4c30c6bb1e8866071cad6ddd164df1a4ac177
|
| 3 |
+
size 469843990
|
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_15/config.json
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "BatchTopKTrainer",
|
| 4 |
+
"dict_class": "BatchTopKSAE",
|
| 5 |
+
"lr": 0.0003,
|
| 6 |
+
"steps": 244140,
|
| 7 |
+
"auxk_alpha": 0.03125,
|
| 8 |
+
"warmup_steps": 1000,
|
| 9 |
+
"decay_start": 195312,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"top_k_aux": 1792,
|
| 13 |
+
"seed": 3407,
|
| 14 |
+
"activation_dim": 3584,
|
| 15 |
+
"dict_size": 16384,
|
| 16 |
+
"k": 320,
|
| 17 |
+
"device": "cuda:0",
|
| 18 |
+
"layer": 31,
|
| 19 |
+
"lm_name": "google/gemma-2-9b",
|
| 20 |
+
"wandb_name": "BatchTopKTrainer-google/gemma-2-9b-resid_post_layer_31_trainer_15",
|
| 21 |
+
"submodule_name": "resid_post_layer_31"
|
| 22 |
+
},
|
| 23 |
+
"buffer": {
|
| 24 |
+
"d_submodule": 3584,
|
| 25 |
+
"io": "out",
|
| 26 |
+
"n_ctxs": 122,
|
| 27 |
+
"ctx_len": 2048,
|
| 28 |
+
"refresh_batch_size": 4,
|
| 29 |
+
"out_batch_size": 2048,
|
| 30 |
+
"device": "cuda:0"
|
| 31 |
+
}
|
| 32 |
+
}
|
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_15/eval_results.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"l2_loss": 146.4, "l1_loss": 5059.04, "l0": 488.029560546875, "frac_variance_explained": 0.7890625, "cossim": 0.92640625, "l2_ratio": 0.9309375, "relative_reconstruction_bias": 1.0066796875, "frac_alive": 0.90704345703125, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
|
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_16/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2de842e70901f5df27a3f6ce261254274cdecf174afcbb1eb0f9d89a694529b1
|
| 3 |
+
size 469843990
|
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_16/config.json
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "BatchTopKTrainer",
|
| 4 |
+
"dict_class": "BatchTopKSAE",
|
| 5 |
+
"lr": 0.0003,
|
| 6 |
+
"steps": 244140,
|
| 7 |
+
"auxk_alpha": 0.03125,
|
| 8 |
+
"warmup_steps": 1000,
|
| 9 |
+
"decay_start": 195312,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"top_k_aux": 1792,
|
| 13 |
+
"seed": 3407,
|
| 14 |
+
"activation_dim": 3584,
|
| 15 |
+
"dict_size": 16384,
|
| 16 |
+
"k": 520,
|
| 17 |
+
"device": "cuda:0",
|
| 18 |
+
"layer": 31,
|
| 19 |
+
"lm_name": "google/gemma-2-9b",
|
| 20 |
+
"wandb_name": "BatchTopKTrainer-google/gemma-2-9b-resid_post_layer_31_trainer_16",
|
| 21 |
+
"submodule_name": "resid_post_layer_31"
|
| 22 |
+
},
|
| 23 |
+
"buffer": {
|
| 24 |
+
"d_submodule": 3584,
|
| 25 |
+
"io": "out",
|
| 26 |
+
"n_ctxs": 122,
|
| 27 |
+
"ctx_len": 2048,
|
| 28 |
+
"refresh_batch_size": 4,
|
| 29 |
+
"out_batch_size": 2048,
|
| 30 |
+
"device": "cuda:0"
|
| 31 |
+
}
|
| 32 |
+
}
|
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_16/eval_results.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"l2_loss": 131.415, "l1_loss": 7060.32, "l0": 786.8075756835938, "frac_variance_explained": 0.830390625, "cossim": 0.94095703125, "l2_ratio": 0.9434765625, "relative_reconstruction_bias": 1.0042578125, "frac_alive": 0.9190673828125, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
|
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_17/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:59f25bb554235264ec86315cde39858a286435e701e546c85479e543742a1315
|
| 3 |
+
size 469843990
|
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_17/config.json
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "BatchTopKTrainer",
|
| 4 |
+
"dict_class": "BatchTopKSAE",
|
| 5 |
+
"lr": 0.0003,
|
| 6 |
+
"steps": 244140,
|
| 7 |
+
"auxk_alpha": 0.03125,
|
| 8 |
+
"warmup_steps": 1000,
|
| 9 |
+
"decay_start": 195312,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"top_k_aux": 1792,
|
| 13 |
+
"seed": 3407,
|
| 14 |
+
"activation_dim": 3584,
|
| 15 |
+
"dict_size": 16384,
|
| 16 |
+
"k": 820,
|
| 17 |
+
"device": "cuda:0",
|
| 18 |
+
"layer": 31,
|
| 19 |
+
"lm_name": "google/gemma-2-9b",
|
| 20 |
+
"wandb_name": "BatchTopKTrainer-google/gemma-2-9b-resid_post_layer_31_trainer_17",
|
| 21 |
+
"submodule_name": "resid_post_layer_31"
|
| 22 |
+
},
|
| 23 |
+
"buffer": {
|
| 24 |
+
"d_submodule": 3584,
|
| 25 |
+
"io": "out",
|
| 26 |
+
"n_ctxs": 122,
|
| 27 |
+
"ctx_len": 2048,
|
| 28 |
+
"refresh_batch_size": 4,
|
| 29 |
+
"out_batch_size": 2048,
|
| 30 |
+
"device": "cuda:0"
|
| 31 |
+
}
|
| 32 |
+
}
|
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_17/eval_results.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"l2_loss": 114.0575, "l1_loss": 10167.36, "l0": 1232.5474438476563, "frac_variance_explained": 0.87095703125, "cossim": 0.9550390625, "l2_ratio": 0.95412109375, "relative_reconstruction_bias": 1.0001953125, "frac_alive": 0.877197265625, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
|
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_2/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1c239f541af061dac2d5ba272cbc7088fb462a775f4df194671b1a6deb168ed8
|
| 3 |
+
size 469843624
|
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_2/config.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"dict_class": "AutoEncoder",
|
| 4 |
+
"trainer_class": "StandardTrainerAprilUpdate",
|
| 5 |
+
"activation_dim": 3584,
|
| 6 |
+
"dict_size": 16384,
|
| 7 |
+
"lr": 0.0003,
|
| 8 |
+
"l1_penalty": 0.02,
|
| 9 |
+
"warmup_steps": 1000,
|
| 10 |
+
"sparsity_warmup_steps": 5000,
|
| 11 |
+
"steps": 244140,
|
| 12 |
+
"decay_start": 195312,
|
| 13 |
+
"seed": 3407,
|
| 14 |
+
"device": "cuda:0",
|
| 15 |
+
"layer": 31,
|
| 16 |
+
"lm_name": "google/gemma-2-9b",
|
| 17 |
+
"wandb_name": "StandardTrainerNew-google/gemma-2-9b-resid_post_layer_31_trainer_2",
|
| 18 |
+
"submodule_name": "resid_post_layer_31"
|
| 19 |
+
},
|
| 20 |
+
"buffer": {
|
| 21 |
+
"d_submodule": 3584,
|
| 22 |
+
"io": "out",
|
| 23 |
+
"n_ctxs": 122,
|
| 24 |
+
"ctx_len": 2048,
|
| 25 |
+
"refresh_batch_size": 4,
|
| 26 |
+
"out_batch_size": 2048,
|
| 27 |
+
"device": "cuda:0"
|
| 28 |
+
}
|
| 29 |
+
}
|
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_2/eval_results.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"l2_loss": 188.35, "l1_loss": 1242.88, "l0": 272.23369873046875, "frac_variance_explained": 0.65490234375, "cossim": 0.8765625, "l2_ratio": 0.86462890625, "relative_reconstruction_bias": 0.987734375, "frac_alive": 0.7396240234375, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
|
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_3/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:37420d8aa8c835623db9886f7440d6597bfc95c63eec52464079e1a5575f8d2d
|
| 3 |
+
size 469843624
|
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_3/config.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"dict_class": "AutoEncoder",
|
| 4 |
+
"trainer_class": "StandardTrainerAprilUpdate",
|
| 5 |
+
"activation_dim": 3584,
|
| 6 |
+
"dict_size": 16384,
|
| 7 |
+
"lr": 0.0003,
|
| 8 |
+
"l1_penalty": 0.03,
|
| 9 |
+
"warmup_steps": 1000,
|
| 10 |
+
"sparsity_warmup_steps": 5000,
|
| 11 |
+
"steps": 244140,
|
| 12 |
+
"decay_start": 195312,
|
| 13 |
+
"seed": 3407,
|
| 14 |
+
"device": "cuda:0",
|
| 15 |
+
"layer": 31,
|
| 16 |
+
"lm_name": "google/gemma-2-9b",
|
| 17 |
+
"wandb_name": "StandardTrainerNew-google/gemma-2-9b-resid_post_layer_31_trainer_3",
|
| 18 |
+
"submodule_name": "resid_post_layer_31"
|
| 19 |
+
},
|
| 20 |
+
"buffer": {
|
| 21 |
+
"d_submodule": 3584,
|
| 22 |
+
"io": "out",
|
| 23 |
+
"n_ctxs": 122,
|
| 24 |
+
"ctx_len": 2048,
|
| 25 |
+
"refresh_batch_size": 4,
|
| 26 |
+
"out_batch_size": 2048,
|
| 27 |
+
"device": "cuda:0"
|
| 28 |
+
}
|
| 29 |
+
}
|
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_3/eval_results.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"l2_loss": 200.405, "l1_loss": 910.36, "l0": 153.67116943359375, "frac_variance_explained": 0.61333984375, "cossim": 0.86, "l2_ratio": 0.84359375, "relative_reconstruction_bias": 0.98267578125, "frac_alive": 0.73577880859375, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
|
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_4/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3d5e105012c4222bd3a9901873e949b5469ee8e996899df4f882c1b0a6d1916f
|
| 3 |
+
size 469843624
|
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_4/config.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"dict_class": "AutoEncoder",
|
| 4 |
+
"trainer_class": "StandardTrainerAprilUpdate",
|
| 5 |
+
"activation_dim": 3584,
|
| 6 |
+
"dict_size": 16384,
|
| 7 |
+
"lr": 0.0003,
|
| 8 |
+
"l1_penalty": 0.04,
|
| 9 |
+
"warmup_steps": 1000,
|
| 10 |
+
"sparsity_warmup_steps": 5000,
|
| 11 |
+
"steps": 244140,
|
| 12 |
+
"decay_start": 195312,
|
| 13 |
+
"seed": 3407,
|
| 14 |
+
"device": "cuda:0",
|
| 15 |
+
"layer": 31,
|
| 16 |
+
"lm_name": "google/gemma-2-9b",
|
| 17 |
+
"wandb_name": "StandardTrainerNew-google/gemma-2-9b-resid_post_layer_31_trainer_4",
|
| 18 |
+
"submodule_name": "resid_post_layer_31"
|
| 19 |
+
},
|
| 20 |
+
"buffer": {
|
| 21 |
+
"d_submodule": 3584,
|
| 22 |
+
"io": "out",
|
| 23 |
+
"n_ctxs": 122,
|
| 24 |
+
"ctx_len": 2048,
|
| 25 |
+
"refresh_batch_size": 4,
|
| 26 |
+
"out_batch_size": 2048,
|
| 27 |
+
"device": "cuda:0"
|
| 28 |
+
}
|
| 29 |
+
}
|
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_4/eval_results.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"l2_loss": 208.625, "l1_loss": 737.86, "l0": 103.07125, "frac_variance_explained": 0.58234375, "cossim": 0.84818359375, "l2_ratio": 0.83033203125, "relative_reconstruction_bias": 0.97833984375, "frac_alive": 0.73297119140625, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
|
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_5/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2a4cd5233ab7740c7f6a017bb39362bd92ced07c870cee7fcdd1a0fdbd9ed2e7
|
| 3 |
+
size 469843624
|
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_5/config.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"dict_class": "AutoEncoder",
|
| 4 |
+
"trainer_class": "StandardTrainerAprilUpdate",
|
| 5 |
+
"activation_dim": 3584,
|
| 6 |
+
"dict_size": 16384,
|
| 7 |
+
"lr": 0.0003,
|
| 8 |
+
"l1_penalty": 0.06,
|
| 9 |
+
"warmup_steps": 1000,
|
| 10 |
+
"sparsity_warmup_steps": 5000,
|
| 11 |
+
"steps": 244140,
|
| 12 |
+
"decay_start": 195312,
|
| 13 |
+
"seed": 3407,
|
| 14 |
+
"device": "cuda:0",
|
| 15 |
+
"layer": 31,
|
| 16 |
+
"lm_name": "google/gemma-2-9b",
|
| 17 |
+
"wandb_name": "StandardTrainerNew-google/gemma-2-9b-resid_post_layer_31_trainer_5",
|
| 18 |
+
"submodule_name": "resid_post_layer_31"
|
| 19 |
+
},
|
| 20 |
+
"buffer": {
|
| 21 |
+
"d_submodule": 3584,
|
| 22 |
+
"io": "out",
|
| 23 |
+
"n_ctxs": 122,
|
| 24 |
+
"ctx_len": 2048,
|
| 25 |
+
"refresh_batch_size": 4,
|
| 26 |
+
"out_batch_size": 2048,
|
| 27 |
+
"device": "cuda:0"
|
| 28 |
+
}
|
| 29 |
+
}
|
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_5/eval_results.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"l2_loss": 221.23, "l1_loss": 536.56, "l0": 56.86413818359375, "frac_variance_explained": 0.53671875, "cossim": 0.82921875, "l2_ratio": 0.80978515625, "relative_reconstruction_bias": 0.97203125, "frac_alive": 0.728759765625, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
|
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_6/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aa974e2a849fb648664a1012cfaa639ed5b1010eaf4f31c775319a1136d776b5
|
| 3 |
+
size 469843990
|
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_6/config.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "TopKTrainer",
|
| 4 |
+
"dict_class": "AutoEncoderTopK",
|
| 5 |
+
"lr": 0.0003,
|
| 6 |
+
"steps": 244140,
|
| 7 |
+
"auxk_alpha": 0.03125,
|
| 8 |
+
"warmup_steps": 1000,
|
| 9 |
+
"decay_start": 195312,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"seed": 3407,
|
| 13 |
+
"activation_dim": 3584,
|
| 14 |
+
"dict_size": 16384,
|
| 15 |
+
"k": 50,
|
| 16 |
+
"device": "cuda:0",
|
| 17 |
+
"layer": 31,
|
| 18 |
+
"lm_name": "google/gemma-2-9b",
|
| 19 |
+
"wandb_name": "TopKTrainer-google/gemma-2-9b-resid_post_layer_31_trainer_6",
|
| 20 |
+
"submodule_name": "resid_post_layer_31"
|
| 21 |
+
},
|
| 22 |
+
"buffer": {
|
| 23 |
+
"d_submodule": 3584,
|
| 24 |
+
"io": "out",
|
| 25 |
+
"n_ctxs": 122,
|
| 26 |
+
"ctx_len": 2048,
|
| 27 |
+
"refresh_batch_size": 4,
|
| 28 |
+
"out_batch_size": 2048,
|
| 29 |
+
"device": "cuda:0"
|
| 30 |
+
}
|
| 31 |
+
}
|
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_6/eval_results.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"l2_loss": 197.0, "l1_loss": 1147.16, "l0": 50.0, "frac_variance_explained": 0.62369140625, "cossim": 0.86560546875, "l2_ratio": 0.8712109375, "relative_reconstruction_bias": 1.00546875, "frac_alive": 0.83428955078125, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
|
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_7/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:21fa5cb38b1e74960e40b58eb417ce868b7c65111244523bda3b901d3c2ba371
|
| 3 |
+
size 469843990
|
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_7/config.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "TopKTrainer",
|
| 4 |
+
"dict_class": "AutoEncoderTopK",
|
| 5 |
+
"lr": 0.0003,
|
| 6 |
+
"steps": 244140,
|
| 7 |
+
"auxk_alpha": 0.03125,
|
| 8 |
+
"warmup_steps": 1000,
|
| 9 |
+
"decay_start": 195312,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"seed": 3407,
|
| 13 |
+
"activation_dim": 3584,
|
| 14 |
+
"dict_size": 16384,
|
| 15 |
+
"k": 80,
|
| 16 |
+
"device": "cuda:0",
|
| 17 |
+
"layer": 31,
|
| 18 |
+
"lm_name": "google/gemma-2-9b",
|
| 19 |
+
"wandb_name": "TopKTrainer-google/gemma-2-9b-resid_post_layer_31_trainer_7",
|
| 20 |
+
"submodule_name": "resid_post_layer_31"
|
| 21 |
+
},
|
| 22 |
+
"buffer": {
|
| 23 |
+
"d_submodule": 3584,
|
| 24 |
+
"io": "out",
|
| 25 |
+
"n_ctxs": 122,
|
| 26 |
+
"ctx_len": 2048,
|
| 27 |
+
"refresh_batch_size": 4,
|
| 28 |
+
"out_batch_size": 2048,
|
| 29 |
+
"device": "cuda:0"
|
| 30 |
+
}
|
| 31 |
+
}
|
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_7/eval_results.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"l2_loss": 186.14, "l1_loss": 1509.64, "l0": 80.0, "frac_variance_explained": 0.6618359375, "cossim": 0.8799609375, "l2_ratio": 0.88447265625, "relative_reconstruction_bias": 1.0038671875, "frac_alive": 0.912109375, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
|
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_8/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:596fd8b134be06f70b707b8bad38732dca76fcf12856d505b088c0d0f617fdb2
|
| 3 |
+
size 469843990
|
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_8/config.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "TopKTrainer",
|
| 4 |
+
"dict_class": "AutoEncoderTopK",
|
| 5 |
+
"lr": 0.0003,
|
| 6 |
+
"steps": 244140,
|
| 7 |
+
"auxk_alpha": 0.03125,
|
| 8 |
+
"warmup_steps": 1000,
|
| 9 |
+
"decay_start": 195312,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"seed": 3407,
|
| 13 |
+
"activation_dim": 3584,
|
| 14 |
+
"dict_size": 16384,
|
| 15 |
+
"k": 160,
|
| 16 |
+
"device": "cuda:0",
|
| 17 |
+
"layer": 31,
|
| 18 |
+
"lm_name": "google/gemma-2-9b",
|
| 19 |
+
"wandb_name": "TopKTrainer-google/gemma-2-9b-resid_post_layer_31_trainer_8",
|
| 20 |
+
"submodule_name": "resid_post_layer_31"
|
| 21 |
+
},
|
| 22 |
+
"buffer": {
|
| 23 |
+
"d_submodule": 3584,
|
| 24 |
+
"io": "out",
|
| 25 |
+
"n_ctxs": 122,
|
| 26 |
+
"ctx_len": 2048,
|
| 27 |
+
"refresh_batch_size": 4,
|
| 28 |
+
"out_batch_size": 2048,
|
| 29 |
+
"device": "cuda:0"
|
| 30 |
+
}
|
| 31 |
+
}
|