Upload folder using huggingface_hub
Browse files- gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_0/ae.pt +3 -0
- gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_0/checkpoints/ae_0.pt +3 -0
- gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_0/config.json +29 -0
- gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_1/ae.pt +3 -0
- gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_1/checkpoints/ae_0.pt +3 -0
- gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_1/config.json +29 -0
- gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_10/ae.pt +3 -0
- gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_10/checkpoints/ae_0.pt +3 -0
- gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_10/config.json +29 -0
- gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_11/ae.pt +3 -0
- gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_11/checkpoints/ae_0.pt +3 -0
- gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_11/config.json +29 -0
- gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_12/ae.pt +3 -0
- gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_12/checkpoints/ae_0.pt +3 -0
- gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_12/config.json +29 -0
- gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_13/ae.pt +3 -0
- gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_13/checkpoints/ae_0.pt +3 -0
- gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_13/config.json +29 -0
- gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_14/ae.pt +3 -0
- gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_14/checkpoints/ae_0.pt +3 -0
- gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_14/config.json +29 -0
- gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_15/ae.pt +3 -0
- gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_15/checkpoints/ae_0.pt +3 -0
- gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_15/config.json +29 -0
- gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_2/ae.pt +3 -0
- gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_2/checkpoints/ae_0.pt +3 -0
- gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_2/config.json +29 -0
- gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_3/ae.pt +3 -0
- gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_3/checkpoints/ae_0.pt +3 -0
- gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_3/config.json +29 -0
- gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_4/ae.pt +3 -0
- gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_4/checkpoints/ae_0.pt +3 -0
- gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_4/config.json +29 -0
- gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_5/ae.pt +3 -0
- gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_5/checkpoints/ae_0.pt +3 -0
- gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_5/config.json +29 -0
- gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_6/ae.pt +3 -0
- gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_6/checkpoints/ae_0.pt +3 -0
- gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_6/config.json +29 -0
- gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_7/ae.pt +3 -0
- gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_7/checkpoints/ae_0.pt +3 -0
- gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_7/config.json +29 -0
- gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_8/ae.pt +3 -0
- gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_8/checkpoints/ae_0.pt +3 -0
- gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_8/config.json +29 -0
- gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_9/ae.pt +3 -0
- gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_9/checkpoints/ae_0.pt +3 -0
- gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_9/config.json +29 -0
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_0/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c08c118cf13cfad78d41869b23018638f3f3fc269a7ce7c256913faa8b843788
|
| 3 |
+
size 151038760
|
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_0/checkpoints/ae_0.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f25498784ccb4edde22c1a12360a194b39dca6725ac5cce5767c74735309782f
|
| 3 |
+
size 151038776
|
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_0/config.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "TrainerTopKAdditivity",
|
| 4 |
+
"dict_class": "AutoEncoderTopK",
|
| 5 |
+
"lr": 0.000282842712474619,
|
| 6 |
+
"steps": 48828,
|
| 7 |
+
"seed": 0,
|
| 8 |
+
"activation_dim": 2304,
|
| 9 |
+
"dict_size": 8192,
|
| 10 |
+
"k": 20,
|
| 11 |
+
"auxk_alpha": 0.03125,
|
| 12 |
+
"additivity_coeff": 0.0,
|
| 13 |
+
"intersection_coeff": 0.0,
|
| 14 |
+
"device": "cuda:0",
|
| 15 |
+
"layer": 4,
|
| 16 |
+
"lm_name": "google/gemma-2-2b",
|
| 17 |
+
"wandb_name": "TopKTrainer_Additivity-google/gemma-2-2b-resid_post_layer_4-add_coeff-0.0-inters_coeff-0.0",
|
| 18 |
+
"submodule_name": "resid_post_layer_4"
|
| 19 |
+
},
|
| 20 |
+
"buffer": {
|
| 21 |
+
"d_submodule": 2304,
|
| 22 |
+
"io": "out",
|
| 23 |
+
"n_ctxs": 8192,
|
| 24 |
+
"ctx_len": 128,
|
| 25 |
+
"refresh_batch_size": 32,
|
| 26 |
+
"out_batch_size": 2048,
|
| 27 |
+
"device": "cuda:0"
|
| 28 |
+
}
|
| 29 |
+
}
|
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_1/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b2bfb728ae57634e7e5f527ecddf9bc156088de8df8515d49aab60adcf4793d4
|
| 3 |
+
size 151038760
|
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_1/checkpoints/ae_0.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f25498784ccb4edde22c1a12360a194b39dca6725ac5cce5767c74735309782f
|
| 3 |
+
size 151038776
|
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_1/config.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "TrainerTopKAdditivity",
|
| 4 |
+
"dict_class": "AutoEncoderTopK",
|
| 5 |
+
"lr": 0.000282842712474619,
|
| 6 |
+
"steps": 48828,
|
| 7 |
+
"seed": 0,
|
| 8 |
+
"activation_dim": 2304,
|
| 9 |
+
"dict_size": 8192,
|
| 10 |
+
"k": 20,
|
| 11 |
+
"auxk_alpha": 0.03125,
|
| 12 |
+
"additivity_coeff": 0.0,
|
| 13 |
+
"intersection_coeff": 1.0,
|
| 14 |
+
"device": "cuda:0",
|
| 15 |
+
"layer": 4,
|
| 16 |
+
"lm_name": "google/gemma-2-2b",
|
| 17 |
+
"wandb_name": "TopKTrainer_Additivity-google/gemma-2-2b-resid_post_layer_4-add_coeff-0.0-inters_coeff-1.0",
|
| 18 |
+
"submodule_name": "resid_post_layer_4"
|
| 19 |
+
},
|
| 20 |
+
"buffer": {
|
| 21 |
+
"d_submodule": 2304,
|
| 22 |
+
"io": "out",
|
| 23 |
+
"n_ctxs": 8192,
|
| 24 |
+
"ctx_len": 128,
|
| 25 |
+
"refresh_batch_size": 32,
|
| 26 |
+
"out_batch_size": 2048,
|
| 27 |
+
"device": "cuda:0"
|
| 28 |
+
}
|
| 29 |
+
}
|
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_10/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2bae769118b245ff9edcba61bcef2dbd5cd5fb387a18481d503b5e16dc0d6ad2
|
| 3 |
+
size 151038760
|
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_10/checkpoints/ae_0.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f25498784ccb4edde22c1a12360a194b39dca6725ac5cce5767c74735309782f
|
| 3 |
+
size 151038776
|
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_10/config.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "TrainerTopKAdditivity",
|
| 4 |
+
"dict_class": "AutoEncoderTopK",
|
| 5 |
+
"lr": 0.000282842712474619,
|
| 6 |
+
"steps": 48828,
|
| 7 |
+
"seed": 0,
|
| 8 |
+
"activation_dim": 2304,
|
| 9 |
+
"dict_size": 8192,
|
| 10 |
+
"k": 125,
|
| 11 |
+
"auxk_alpha": 0.03125,
|
| 12 |
+
"additivity_coeff": 1.0,
|
| 13 |
+
"intersection_coeff": 0.0,
|
| 14 |
+
"device": "cuda:0",
|
| 15 |
+
"layer": 4,
|
| 16 |
+
"lm_name": "google/gemma-2-2b",
|
| 17 |
+
"wandb_name": "TopKTrainer_Additivity-google/gemma-2-2b-resid_post_layer_4-add_coeff-1.0-inters_coeff-0.0",
|
| 18 |
+
"submodule_name": "resid_post_layer_4"
|
| 19 |
+
},
|
| 20 |
+
"buffer": {
|
| 21 |
+
"d_submodule": 2304,
|
| 22 |
+
"io": "out",
|
| 23 |
+
"n_ctxs": 8192,
|
| 24 |
+
"ctx_len": 128,
|
| 25 |
+
"refresh_batch_size": 32,
|
| 26 |
+
"out_batch_size": 2048,
|
| 27 |
+
"device": "cuda:0"
|
| 28 |
+
}
|
| 29 |
+
}
|
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_11/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:820f6b4c0b1e66a4d530fb360986d63bdf509e9fd51e2ee38c116e4527eff946
|
| 3 |
+
size 151038760
|
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_11/checkpoints/ae_0.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f25498784ccb4edde22c1a12360a194b39dca6725ac5cce5767c74735309782f
|
| 3 |
+
size 151038776
|
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_11/config.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "TrainerTopKAdditivity",
|
| 4 |
+
"dict_class": "AutoEncoderTopK",
|
| 5 |
+
"lr": 0.000282842712474619,
|
| 6 |
+
"steps": 48828,
|
| 7 |
+
"seed": 0,
|
| 8 |
+
"activation_dim": 2304,
|
| 9 |
+
"dict_size": 8192,
|
| 10 |
+
"k": 125,
|
| 11 |
+
"auxk_alpha": 0.03125,
|
| 12 |
+
"additivity_coeff": 1.0,
|
| 13 |
+
"intersection_coeff": 1.0,
|
| 14 |
+
"device": "cuda:0",
|
| 15 |
+
"layer": 4,
|
| 16 |
+
"lm_name": "google/gemma-2-2b",
|
| 17 |
+
"wandb_name": "TopKTrainer_Additivity-google/gemma-2-2b-resid_post_layer_4-add_coeff-1.0-inters_coeff-1.0",
|
| 18 |
+
"submodule_name": "resid_post_layer_4"
|
| 19 |
+
},
|
| 20 |
+
"buffer": {
|
| 21 |
+
"d_submodule": 2304,
|
| 22 |
+
"io": "out",
|
| 23 |
+
"n_ctxs": 8192,
|
| 24 |
+
"ctx_len": 128,
|
| 25 |
+
"refresh_batch_size": 32,
|
| 26 |
+
"out_batch_size": 2048,
|
| 27 |
+
"device": "cuda:0"
|
| 28 |
+
}
|
| 29 |
+
}
|
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_12/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f688ed9340e158343c8798a903d557e934bbafe9e953c9bbc64470e7a90b81a9
|
| 3 |
+
size 151038760
|
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_12/checkpoints/ae_0.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f25498784ccb4edde22c1a12360a194b39dca6725ac5cce5767c74735309782f
|
| 3 |
+
size 151038776
|
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_12/config.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "TrainerTopKAdditivity",
|
| 4 |
+
"dict_class": "AutoEncoderTopK",
|
| 5 |
+
"lr": 0.000282842712474619,
|
| 6 |
+
"steps": 48828,
|
| 7 |
+
"seed": 0,
|
| 8 |
+
"activation_dim": 2304,
|
| 9 |
+
"dict_size": 8192,
|
| 10 |
+
"k": 313,
|
| 11 |
+
"auxk_alpha": 0.03125,
|
| 12 |
+
"additivity_coeff": 0.0,
|
| 13 |
+
"intersection_coeff": 0.0,
|
| 14 |
+
"device": "cuda:0",
|
| 15 |
+
"layer": 4,
|
| 16 |
+
"lm_name": "google/gemma-2-2b",
|
| 17 |
+
"wandb_name": "TopKTrainer_Additivity-google/gemma-2-2b-resid_post_layer_4-add_coeff-0.0-inters_coeff-0.0",
|
| 18 |
+
"submodule_name": "resid_post_layer_4"
|
| 19 |
+
},
|
| 20 |
+
"buffer": {
|
| 21 |
+
"d_submodule": 2304,
|
| 22 |
+
"io": "out",
|
| 23 |
+
"n_ctxs": 8192,
|
| 24 |
+
"ctx_len": 128,
|
| 25 |
+
"refresh_batch_size": 32,
|
| 26 |
+
"out_batch_size": 2048,
|
| 27 |
+
"device": "cuda:0"
|
| 28 |
+
}
|
| 29 |
+
}
|
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_13/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:305017b664e1888dfae76815b3a70729d93e0848904b1d2b6e5c8c81257bdf12
|
| 3 |
+
size 151038760
|
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_13/checkpoints/ae_0.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f25498784ccb4edde22c1a12360a194b39dca6725ac5cce5767c74735309782f
|
| 3 |
+
size 151038776
|
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_13/config.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "TrainerTopKAdditivity",
|
| 4 |
+
"dict_class": "AutoEncoderTopK",
|
| 5 |
+
"lr": 0.000282842712474619,
|
| 6 |
+
"steps": 48828,
|
| 7 |
+
"seed": 0,
|
| 8 |
+
"activation_dim": 2304,
|
| 9 |
+
"dict_size": 8192,
|
| 10 |
+
"k": 313,
|
| 11 |
+
"auxk_alpha": 0.03125,
|
| 12 |
+
"additivity_coeff": 0.0,
|
| 13 |
+
"intersection_coeff": 1.0,
|
| 14 |
+
"device": "cuda:0",
|
| 15 |
+
"layer": 4,
|
| 16 |
+
"lm_name": "google/gemma-2-2b",
|
| 17 |
+
"wandb_name": "TopKTrainer_Additivity-google/gemma-2-2b-resid_post_layer_4-add_coeff-0.0-inters_coeff-1.0",
|
| 18 |
+
"submodule_name": "resid_post_layer_4"
|
| 19 |
+
},
|
| 20 |
+
"buffer": {
|
| 21 |
+
"d_submodule": 2304,
|
| 22 |
+
"io": "out",
|
| 23 |
+
"n_ctxs": 8192,
|
| 24 |
+
"ctx_len": 128,
|
| 25 |
+
"refresh_batch_size": 32,
|
| 26 |
+
"out_batch_size": 2048,
|
| 27 |
+
"device": "cuda:0"
|
| 28 |
+
}
|
| 29 |
+
}
|
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_14/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f0bbbc75e32f3ee7c1c81fad6c4a24eda0ed491d274d2262bed6f2b524c55b20
|
| 3 |
+
size 151038760
|
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_14/checkpoints/ae_0.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f25498784ccb4edde22c1a12360a194b39dca6725ac5cce5767c74735309782f
|
| 3 |
+
size 151038776
|
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_14/config.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "TrainerTopKAdditivity",
|
| 4 |
+
"dict_class": "AutoEncoderTopK",
|
| 5 |
+
"lr": 0.000282842712474619,
|
| 6 |
+
"steps": 48828,
|
| 7 |
+
"seed": 0,
|
| 8 |
+
"activation_dim": 2304,
|
| 9 |
+
"dict_size": 8192,
|
| 10 |
+
"k": 313,
|
| 11 |
+
"auxk_alpha": 0.03125,
|
| 12 |
+
"additivity_coeff": 1.0,
|
| 13 |
+
"intersection_coeff": 0.0,
|
| 14 |
+
"device": "cuda:0",
|
| 15 |
+
"layer": 4,
|
| 16 |
+
"lm_name": "google/gemma-2-2b",
|
| 17 |
+
"wandb_name": "TopKTrainer_Additivity-google/gemma-2-2b-resid_post_layer_4-add_coeff-1.0-inters_coeff-0.0",
|
| 18 |
+
"submodule_name": "resid_post_layer_4"
|
| 19 |
+
},
|
| 20 |
+
"buffer": {
|
| 21 |
+
"d_submodule": 2304,
|
| 22 |
+
"io": "out",
|
| 23 |
+
"n_ctxs": 8192,
|
| 24 |
+
"ctx_len": 128,
|
| 25 |
+
"refresh_batch_size": 32,
|
| 26 |
+
"out_batch_size": 2048,
|
| 27 |
+
"device": "cuda:0"
|
| 28 |
+
}
|
| 29 |
+
}
|
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_15/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:87d876e42c61b8e7f3559e6ca9b8ce6a39d4ebdcd41b0ecefcebda465eb828b3
|
| 3 |
+
size 151038760
|
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_15/checkpoints/ae_0.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f25498784ccb4edde22c1a12360a194b39dca6725ac5cce5767c74735309782f
|
| 3 |
+
size 151038776
|
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_15/config.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "TrainerTopKAdditivity",
|
| 4 |
+
"dict_class": "AutoEncoderTopK",
|
| 5 |
+
"lr": 0.000282842712474619,
|
| 6 |
+
"steps": 48828,
|
| 7 |
+
"seed": 0,
|
| 8 |
+
"activation_dim": 2304,
|
| 9 |
+
"dict_size": 8192,
|
| 10 |
+
"k": 313,
|
| 11 |
+
"auxk_alpha": 0.03125,
|
| 12 |
+
"additivity_coeff": 1.0,
|
| 13 |
+
"intersection_coeff": 1.0,
|
| 14 |
+
"device": "cuda:0",
|
| 15 |
+
"layer": 4,
|
| 16 |
+
"lm_name": "google/gemma-2-2b",
|
| 17 |
+
"wandb_name": "TopKTrainer_Additivity-google/gemma-2-2b-resid_post_layer_4-add_coeff-1.0-inters_coeff-1.0",
|
| 18 |
+
"submodule_name": "resid_post_layer_4"
|
| 19 |
+
},
|
| 20 |
+
"buffer": {
|
| 21 |
+
"d_submodule": 2304,
|
| 22 |
+
"io": "out",
|
| 23 |
+
"n_ctxs": 8192,
|
| 24 |
+
"ctx_len": 128,
|
| 25 |
+
"refresh_batch_size": 32,
|
| 26 |
+
"out_batch_size": 2048,
|
| 27 |
+
"device": "cuda:0"
|
| 28 |
+
}
|
| 29 |
+
}
|
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_2/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:54e3d003092c270ce14d3bf939a58f77e16cc2cfd53653c5363764d8487e2cba
|
| 3 |
+
size 151038760
|
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_2/checkpoints/ae_0.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f25498784ccb4edde22c1a12360a194b39dca6725ac5cce5767c74735309782f
|
| 3 |
+
size 151038776
|
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_2/config.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "TrainerTopKAdditivity",
|
| 4 |
+
"dict_class": "AutoEncoderTopK",
|
| 5 |
+
"lr": 0.000282842712474619,
|
| 6 |
+
"steps": 48828,
|
| 7 |
+
"seed": 0,
|
| 8 |
+
"activation_dim": 2304,
|
| 9 |
+
"dict_size": 8192,
|
| 10 |
+
"k": 20,
|
| 11 |
+
"auxk_alpha": 0.03125,
|
| 12 |
+
"additivity_coeff": 1.0,
|
| 13 |
+
"intersection_coeff": 0.0,
|
| 14 |
+
"device": "cuda:0",
|
| 15 |
+
"layer": 4,
|
| 16 |
+
"lm_name": "google/gemma-2-2b",
|
| 17 |
+
"wandb_name": "TopKTrainer_Additivity-google/gemma-2-2b-resid_post_layer_4-add_coeff-1.0-inters_coeff-0.0",
|
| 18 |
+
"submodule_name": "resid_post_layer_4"
|
| 19 |
+
},
|
| 20 |
+
"buffer": {
|
| 21 |
+
"d_submodule": 2304,
|
| 22 |
+
"io": "out",
|
| 23 |
+
"n_ctxs": 8192,
|
| 24 |
+
"ctx_len": 128,
|
| 25 |
+
"refresh_batch_size": 32,
|
| 26 |
+
"out_batch_size": 2048,
|
| 27 |
+
"device": "cuda:0"
|
| 28 |
+
}
|
| 29 |
+
}
|
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_3/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:96d85b0bc0c1212246c0a983e79860cebc342801b3c3a19333e3e07873de8825
|
| 3 |
+
size 151038760
|
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_3/checkpoints/ae_0.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f25498784ccb4edde22c1a12360a194b39dca6725ac5cce5767c74735309782f
|
| 3 |
+
size 151038776
|
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_3/config.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "TrainerTopKAdditivity",
|
| 4 |
+
"dict_class": "AutoEncoderTopK",
|
| 5 |
+
"lr": 0.000282842712474619,
|
| 6 |
+
"steps": 48828,
|
| 7 |
+
"seed": 0,
|
| 8 |
+
"activation_dim": 2304,
|
| 9 |
+
"dict_size": 8192,
|
| 10 |
+
"k": 20,
|
| 11 |
+
"auxk_alpha": 0.03125,
|
| 12 |
+
"additivity_coeff": 1.0,
|
| 13 |
+
"intersection_coeff": 1.0,
|
| 14 |
+
"device": "cuda:0",
|
| 15 |
+
"layer": 4,
|
| 16 |
+
"lm_name": "google/gemma-2-2b",
|
| 17 |
+
"wandb_name": "TopKTrainer_Additivity-google/gemma-2-2b-resid_post_layer_4-add_coeff-1.0-inters_coeff-1.0",
|
| 18 |
+
"submodule_name": "resid_post_layer_4"
|
| 19 |
+
},
|
| 20 |
+
"buffer": {
|
| 21 |
+
"d_submodule": 2304,
|
| 22 |
+
"io": "out",
|
| 23 |
+
"n_ctxs": 8192,
|
| 24 |
+
"ctx_len": 128,
|
| 25 |
+
"refresh_batch_size": 32,
|
| 26 |
+
"out_batch_size": 2048,
|
| 27 |
+
"device": "cuda:0"
|
| 28 |
+
}
|
| 29 |
+
}
|
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_4/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0e3608fcb6501dcd3da4563d06284f325ff9b53618b208ddc0183167b3cf55e9
|
| 3 |
+
size 151038760
|
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_4/checkpoints/ae_0.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f25498784ccb4edde22c1a12360a194b39dca6725ac5cce5767c74735309782f
|
| 3 |
+
size 151038776
|
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_4/config.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "TrainerTopKAdditivity",
|
| 4 |
+
"dict_class": "AutoEncoderTopK",
|
| 5 |
+
"lr": 0.000282842712474619,
|
| 6 |
+
"steps": 48828,
|
| 7 |
+
"seed": 0,
|
| 8 |
+
"activation_dim": 2304,
|
| 9 |
+
"dict_size": 8192,
|
| 10 |
+
"k": 50,
|
| 11 |
+
"auxk_alpha": 0.03125,
|
| 12 |
+
"additivity_coeff": 0.0,
|
| 13 |
+
"intersection_coeff": 0.0,
|
| 14 |
+
"device": "cuda:0",
|
| 15 |
+
"layer": 4,
|
| 16 |
+
"lm_name": "google/gemma-2-2b",
|
| 17 |
+
"wandb_name": "TopKTrainer_Additivity-google/gemma-2-2b-resid_post_layer_4-add_coeff-0.0-inters_coeff-0.0",
|
| 18 |
+
"submodule_name": "resid_post_layer_4"
|
| 19 |
+
},
|
| 20 |
+
"buffer": {
|
| 21 |
+
"d_submodule": 2304,
|
| 22 |
+
"io": "out",
|
| 23 |
+
"n_ctxs": 8192,
|
| 24 |
+
"ctx_len": 128,
|
| 25 |
+
"refresh_batch_size": 32,
|
| 26 |
+
"out_batch_size": 2048,
|
| 27 |
+
"device": "cuda:0"
|
| 28 |
+
}
|
| 29 |
+
}
|
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_5/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:950a47c06016d9220d7ae99a25aca902a94afc414b584abec2d68b7ad7b8c1d2
|
| 3 |
+
size 151038760
|
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_5/checkpoints/ae_0.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f25498784ccb4edde22c1a12360a194b39dca6725ac5cce5767c74735309782f
|
| 3 |
+
size 151038776
|
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_5/config.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "TrainerTopKAdditivity",
|
| 4 |
+
"dict_class": "AutoEncoderTopK",
|
| 5 |
+
"lr": 0.000282842712474619,
|
| 6 |
+
"steps": 48828,
|
| 7 |
+
"seed": 0,
|
| 8 |
+
"activation_dim": 2304,
|
| 9 |
+
"dict_size": 8192,
|
| 10 |
+
"k": 50,
|
| 11 |
+
"auxk_alpha": 0.03125,
|
| 12 |
+
"additivity_coeff": 0.0,
|
| 13 |
+
"intersection_coeff": 1.0,
|
| 14 |
+
"device": "cuda:0",
|
| 15 |
+
"layer": 4,
|
| 16 |
+
"lm_name": "google/gemma-2-2b",
|
| 17 |
+
"wandb_name": "TopKTrainer_Additivity-google/gemma-2-2b-resid_post_layer_4-add_coeff-0.0-inters_coeff-1.0",
|
| 18 |
+
"submodule_name": "resid_post_layer_4"
|
| 19 |
+
},
|
| 20 |
+
"buffer": {
|
| 21 |
+
"d_submodule": 2304,
|
| 22 |
+
"io": "out",
|
| 23 |
+
"n_ctxs": 8192,
|
| 24 |
+
"ctx_len": 128,
|
| 25 |
+
"refresh_batch_size": 32,
|
| 26 |
+
"out_batch_size": 2048,
|
| 27 |
+
"device": "cuda:0"
|
| 28 |
+
}
|
| 29 |
+
}
|
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_6/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eae1519cf06910c54cd71d99252d17d302d928e77032f2515151e74d9296f345
|
| 3 |
+
size 151038760
|
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_6/checkpoints/ae_0.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f25498784ccb4edde22c1a12360a194b39dca6725ac5cce5767c74735309782f
|
| 3 |
+
size 151038776
|
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_6/config.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "TrainerTopKAdditivity",
|
| 4 |
+
"dict_class": "AutoEncoderTopK",
|
| 5 |
+
"lr": 0.000282842712474619,
|
| 6 |
+
"steps": 48828,
|
| 7 |
+
"seed": 0,
|
| 8 |
+
"activation_dim": 2304,
|
| 9 |
+
"dict_size": 8192,
|
| 10 |
+
"k": 50,
|
| 11 |
+
"auxk_alpha": 0.03125,
|
| 12 |
+
"additivity_coeff": 1.0,
|
| 13 |
+
"intersection_coeff": 0.0,
|
| 14 |
+
"device": "cuda:0",
|
| 15 |
+
"layer": 4,
|
| 16 |
+
"lm_name": "google/gemma-2-2b",
|
| 17 |
+
"wandb_name": "TopKTrainer_Additivity-google/gemma-2-2b-resid_post_layer_4-add_coeff-1.0-inters_coeff-0.0",
|
| 18 |
+
"submodule_name": "resid_post_layer_4"
|
| 19 |
+
},
|
| 20 |
+
"buffer": {
|
| 21 |
+
"d_submodule": 2304,
|
| 22 |
+
"io": "out",
|
| 23 |
+
"n_ctxs": 8192,
|
| 24 |
+
"ctx_len": 128,
|
| 25 |
+
"refresh_batch_size": 32,
|
| 26 |
+
"out_batch_size": 2048,
|
| 27 |
+
"device": "cuda:0"
|
| 28 |
+
}
|
| 29 |
+
}
|
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_7/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:58f04625f657547022d620bffbf9221f9abca58bb8550f37d62dd0025d58a46c
|
| 3 |
+
size 151038760
|
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_7/checkpoints/ae_0.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f25498784ccb4edde22c1a12360a194b39dca6725ac5cce5767c74735309782f
|
| 3 |
+
size 151038776
|
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_7/config.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "TrainerTopKAdditivity",
|
| 4 |
+
"dict_class": "AutoEncoderTopK",
|
| 5 |
+
"lr": 0.000282842712474619,
|
| 6 |
+
"steps": 48828,
|
| 7 |
+
"seed": 0,
|
| 8 |
+
"activation_dim": 2304,
|
| 9 |
+
"dict_size": 8192,
|
| 10 |
+
"k": 50,
|
| 11 |
+
"auxk_alpha": 0.03125,
|
| 12 |
+
"additivity_coeff": 1.0,
|
| 13 |
+
"intersection_coeff": 1.0,
|
| 14 |
+
"device": "cuda:0",
|
| 15 |
+
"layer": 4,
|
| 16 |
+
"lm_name": "google/gemma-2-2b",
|
| 17 |
+
"wandb_name": "TopKTrainer_Additivity-google/gemma-2-2b-resid_post_layer_4-add_coeff-1.0-inters_coeff-1.0",
|
| 18 |
+
"submodule_name": "resid_post_layer_4"
|
| 19 |
+
},
|
| 20 |
+
"buffer": {
|
| 21 |
+
"d_submodule": 2304,
|
| 22 |
+
"io": "out",
|
| 23 |
+
"n_ctxs": 8192,
|
| 24 |
+
"ctx_len": 128,
|
| 25 |
+
"refresh_batch_size": 32,
|
| 26 |
+
"out_batch_size": 2048,
|
| 27 |
+
"device": "cuda:0"
|
| 28 |
+
}
|
| 29 |
+
}
|
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_8/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:11ca8109bf927c514e58d5870122da7cd802f1bb17dcd6c9f989fe563fc30d09
|
| 3 |
+
size 151038760
|
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_8/checkpoints/ae_0.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f25498784ccb4edde22c1a12360a194b39dca6725ac5cce5767c74735309782f
|
| 3 |
+
size 151038776
|
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_8/config.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "TrainerTopKAdditivity",
|
| 4 |
+
"dict_class": "AutoEncoderTopK",
|
| 5 |
+
"lr": 0.000282842712474619,
|
| 6 |
+
"steps": 48828,
|
| 7 |
+
"seed": 0,
|
| 8 |
+
"activation_dim": 2304,
|
| 9 |
+
"dict_size": 8192,
|
| 10 |
+
"k": 125,
|
| 11 |
+
"auxk_alpha": 0.03125,
|
| 12 |
+
"additivity_coeff": 0.0,
|
| 13 |
+
"intersection_coeff": 0.0,
|
| 14 |
+
"device": "cuda:0",
|
| 15 |
+
"layer": 4,
|
| 16 |
+
"lm_name": "google/gemma-2-2b",
|
| 17 |
+
"wandb_name": "TopKTrainer_Additivity-google/gemma-2-2b-resid_post_layer_4-add_coeff-0.0-inters_coeff-0.0",
|
| 18 |
+
"submodule_name": "resid_post_layer_4"
|
| 19 |
+
},
|
| 20 |
+
"buffer": {
|
| 21 |
+
"d_submodule": 2304,
|
| 22 |
+
"io": "out",
|
| 23 |
+
"n_ctxs": 8192,
|
| 24 |
+
"ctx_len": 128,
|
| 25 |
+
"refresh_batch_size": 32,
|
| 26 |
+
"out_batch_size": 2048,
|
| 27 |
+
"device": "cuda:0"
|
| 28 |
+
}
|
| 29 |
+
}
|
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_9/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8bff051ff37ef1ddbf0645ab45aa203fbd9013ac1598e65b086e7e1040dc62f1
|
| 3 |
+
size 151038760
|
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_9/checkpoints/ae_0.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f25498784ccb4edde22c1a12360a194b39dca6725ac5cce5767c74735309782f
|
| 3 |
+
size 151038776
|
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_9/config.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "TrainerTopKAdditivity",
|
| 4 |
+
"dict_class": "AutoEncoderTopK",
|
| 5 |
+
"lr": 0.000282842712474619,
|
| 6 |
+
"steps": 48828,
|
| 7 |
+
"seed": 0,
|
| 8 |
+
"activation_dim": 2304,
|
| 9 |
+
"dict_size": 8192,
|
| 10 |
+
"k": 125,
|
| 11 |
+
"auxk_alpha": 0.03125,
|
| 12 |
+
"additivity_coeff": 0.0,
|
| 13 |
+
"intersection_coeff": 1.0,
|
| 14 |
+
"device": "cuda:0",
|
| 15 |
+
"layer": 4,
|
| 16 |
+
"lm_name": "google/gemma-2-2b",
|
| 17 |
+
"wandb_name": "TopKTrainer_Additivity-google/gemma-2-2b-resid_post_layer_4-add_coeff-0.0-inters_coeff-1.0",
|
| 18 |
+
"submodule_name": "resid_post_layer_4"
|
| 19 |
+
},
|
| 20 |
+
"buffer": {
|
| 21 |
+
"d_submodule": 2304,
|
| 22 |
+
"io": "out",
|
| 23 |
+
"n_ctxs": 8192,
|
| 24 |
+
"ctx_len": 128,
|
| 25 |
+
"refresh_batch_size": 32,
|
| 26 |
+
"out_batch_size": 2048,
|
| 27 |
+
"device": "cuda:0"
|
| 28 |
+
}
|
| 29 |
+
}
|