webcrg commited on
Commit
11276d1
·
verified ·
1 Parent(s): 935c823

Upload folder using huggingface_hub

Browse files
Files changed (48) hide show
  1. gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_0/ae.pt +3 -0
  2. gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_0/checkpoints/ae_0.pt +3 -0
  3. gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_0/config.json +29 -0
  4. gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_1/ae.pt +3 -0
  5. gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_1/checkpoints/ae_0.pt +3 -0
  6. gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_1/config.json +29 -0
  7. gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_10/ae.pt +3 -0
  8. gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_10/checkpoints/ae_0.pt +3 -0
  9. gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_10/config.json +29 -0
  10. gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_11/ae.pt +3 -0
  11. gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_11/checkpoints/ae_0.pt +3 -0
  12. gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_11/config.json +29 -0
  13. gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_12/ae.pt +3 -0
  14. gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_12/checkpoints/ae_0.pt +3 -0
  15. gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_12/config.json +29 -0
  16. gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_13/ae.pt +3 -0
  17. gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_13/checkpoints/ae_0.pt +3 -0
  18. gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_13/config.json +29 -0
  19. gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_14/ae.pt +3 -0
  20. gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_14/checkpoints/ae_0.pt +3 -0
  21. gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_14/config.json +29 -0
  22. gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_15/ae.pt +3 -0
  23. gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_15/checkpoints/ae_0.pt +3 -0
  24. gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_15/config.json +29 -0
  25. gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_2/ae.pt +3 -0
  26. gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_2/checkpoints/ae_0.pt +3 -0
  27. gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_2/config.json +29 -0
  28. gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_3/ae.pt +3 -0
  29. gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_3/checkpoints/ae_0.pt +3 -0
  30. gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_3/config.json +29 -0
  31. gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_4/ae.pt +3 -0
  32. gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_4/checkpoints/ae_0.pt +3 -0
  33. gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_4/config.json +29 -0
  34. gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_5/ae.pt +3 -0
  35. gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_5/checkpoints/ae_0.pt +3 -0
  36. gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_5/config.json +29 -0
  37. gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_6/ae.pt +3 -0
  38. gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_6/checkpoints/ae_0.pt +3 -0
  39. gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_6/config.json +29 -0
  40. gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_7/ae.pt +3 -0
  41. gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_7/checkpoints/ae_0.pt +3 -0
  42. gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_7/config.json +29 -0
  43. gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_8/ae.pt +3 -0
  44. gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_8/checkpoints/ae_0.pt +3 -0
  45. gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_8/config.json +29 -0
  46. gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_9/ae.pt +3 -0
  47. gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_9/checkpoints/ae_0.pt +3 -0
  48. gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_9/config.json +29 -0
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c08c118cf13cfad78d41869b23018638f3f3fc269a7ce7c256913faa8b843788
3
+ size 151038760
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_0/checkpoints/ae_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f25498784ccb4edde22c1a12360a194b39dca6725ac5cce5767c74735309782f
3
+ size 151038776
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_0/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopKAdditivity",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.000282842712474619,
6
+ "steps": 48828,
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 8192,
10
+ "k": 20,
11
+ "auxk_alpha": 0.03125,
12
+ "additivity_coeff": 0.0,
13
+ "intersection_coeff": 0.0,
14
+ "device": "cuda:0",
15
+ "layer": 4,
16
+ "lm_name": "google/gemma-2-2b",
17
+ "wandb_name": "TopKTrainer_Additivity-google/gemma-2-2b-resid_post_layer_4-add_coeff-0.0-inters_coeff-0.0",
18
+ "submodule_name": "resid_post_layer_4"
19
+ },
20
+ "buffer": {
21
+ "d_submodule": 2304,
22
+ "io": "out",
23
+ "n_ctxs": 8192,
24
+ "ctx_len": 128,
25
+ "refresh_batch_size": 32,
26
+ "out_batch_size": 2048,
27
+ "device": "cuda:0"
28
+ }
29
+ }
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_1/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2bfb728ae57634e7e5f527ecddf9bc156088de8df8515d49aab60adcf4793d4
3
+ size 151038760
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_1/checkpoints/ae_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f25498784ccb4edde22c1a12360a194b39dca6725ac5cce5767c74735309782f
3
+ size 151038776
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_1/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopKAdditivity",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.000282842712474619,
6
+ "steps": 48828,
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 8192,
10
+ "k": 20,
11
+ "auxk_alpha": 0.03125,
12
+ "additivity_coeff": 0.0,
13
+ "intersection_coeff": 1.0,
14
+ "device": "cuda:0",
15
+ "layer": 4,
16
+ "lm_name": "google/gemma-2-2b",
17
+ "wandb_name": "TopKTrainer_Additivity-google/gemma-2-2b-resid_post_layer_4-add_coeff-0.0-inters_coeff-1.0",
18
+ "submodule_name": "resid_post_layer_4"
19
+ },
20
+ "buffer": {
21
+ "d_submodule": 2304,
22
+ "io": "out",
23
+ "n_ctxs": 8192,
24
+ "ctx_len": 128,
25
+ "refresh_batch_size": 32,
26
+ "out_batch_size": 2048,
27
+ "device": "cuda:0"
28
+ }
29
+ }
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_10/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2bae769118b245ff9edcba61bcef2dbd5cd5fb387a18481d503b5e16dc0d6ad2
3
+ size 151038760
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_10/checkpoints/ae_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f25498784ccb4edde22c1a12360a194b39dca6725ac5cce5767c74735309782f
3
+ size 151038776
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_10/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopKAdditivity",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.000282842712474619,
6
+ "steps": 48828,
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 8192,
10
+ "k": 125,
11
+ "auxk_alpha": 0.03125,
12
+ "additivity_coeff": 1.0,
13
+ "intersection_coeff": 0.0,
14
+ "device": "cuda:0",
15
+ "layer": 4,
16
+ "lm_name": "google/gemma-2-2b",
17
+ "wandb_name": "TopKTrainer_Additivity-google/gemma-2-2b-resid_post_layer_4-add_coeff-1.0-inters_coeff-0.0",
18
+ "submodule_name": "resid_post_layer_4"
19
+ },
20
+ "buffer": {
21
+ "d_submodule": 2304,
22
+ "io": "out",
23
+ "n_ctxs": 8192,
24
+ "ctx_len": 128,
25
+ "refresh_batch_size": 32,
26
+ "out_batch_size": 2048,
27
+ "device": "cuda:0"
28
+ }
29
+ }
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_11/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:820f6b4c0b1e66a4d530fb360986d63bdf509e9fd51e2ee38c116e4527eff946
3
+ size 151038760
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_11/checkpoints/ae_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f25498784ccb4edde22c1a12360a194b39dca6725ac5cce5767c74735309782f
3
+ size 151038776
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_11/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopKAdditivity",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.000282842712474619,
6
+ "steps": 48828,
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 8192,
10
+ "k": 125,
11
+ "auxk_alpha": 0.03125,
12
+ "additivity_coeff": 1.0,
13
+ "intersection_coeff": 1.0,
14
+ "device": "cuda:0",
15
+ "layer": 4,
16
+ "lm_name": "google/gemma-2-2b",
17
+ "wandb_name": "TopKTrainer_Additivity-google/gemma-2-2b-resid_post_layer_4-add_coeff-1.0-inters_coeff-1.0",
18
+ "submodule_name": "resid_post_layer_4"
19
+ },
20
+ "buffer": {
21
+ "d_submodule": 2304,
22
+ "io": "out",
23
+ "n_ctxs": 8192,
24
+ "ctx_len": 128,
25
+ "refresh_batch_size": 32,
26
+ "out_batch_size": 2048,
27
+ "device": "cuda:0"
28
+ }
29
+ }
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_12/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f688ed9340e158343c8798a903d557e934bbafe9e953c9bbc64470e7a90b81a9
3
+ size 151038760
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_12/checkpoints/ae_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f25498784ccb4edde22c1a12360a194b39dca6725ac5cce5767c74735309782f
3
+ size 151038776
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_12/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopKAdditivity",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.000282842712474619,
6
+ "steps": 48828,
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 8192,
10
+ "k": 313,
11
+ "auxk_alpha": 0.03125,
12
+ "additivity_coeff": 0.0,
13
+ "intersection_coeff": 0.0,
14
+ "device": "cuda:0",
15
+ "layer": 4,
16
+ "lm_name": "google/gemma-2-2b",
17
+ "wandb_name": "TopKTrainer_Additivity-google/gemma-2-2b-resid_post_layer_4-add_coeff-0.0-inters_coeff-0.0",
18
+ "submodule_name": "resid_post_layer_4"
19
+ },
20
+ "buffer": {
21
+ "d_submodule": 2304,
22
+ "io": "out",
23
+ "n_ctxs": 8192,
24
+ "ctx_len": 128,
25
+ "refresh_batch_size": 32,
26
+ "out_batch_size": 2048,
27
+ "device": "cuda:0"
28
+ }
29
+ }
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_13/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:305017b664e1888dfae76815b3a70729d93e0848904b1d2b6e5c8c81257bdf12
3
+ size 151038760
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_13/checkpoints/ae_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f25498784ccb4edde22c1a12360a194b39dca6725ac5cce5767c74735309782f
3
+ size 151038776
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_13/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopKAdditivity",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.000282842712474619,
6
+ "steps": 48828,
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 8192,
10
+ "k": 313,
11
+ "auxk_alpha": 0.03125,
12
+ "additivity_coeff": 0.0,
13
+ "intersection_coeff": 1.0,
14
+ "device": "cuda:0",
15
+ "layer": 4,
16
+ "lm_name": "google/gemma-2-2b",
17
+ "wandb_name": "TopKTrainer_Additivity-google/gemma-2-2b-resid_post_layer_4-add_coeff-0.0-inters_coeff-1.0",
18
+ "submodule_name": "resid_post_layer_4"
19
+ },
20
+ "buffer": {
21
+ "d_submodule": 2304,
22
+ "io": "out",
23
+ "n_ctxs": 8192,
24
+ "ctx_len": 128,
25
+ "refresh_batch_size": 32,
26
+ "out_batch_size": 2048,
27
+ "device": "cuda:0"
28
+ }
29
+ }
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_14/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0bbbc75e32f3ee7c1c81fad6c4a24eda0ed491d274d2262bed6f2b524c55b20
3
+ size 151038760
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_14/checkpoints/ae_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f25498784ccb4edde22c1a12360a194b39dca6725ac5cce5767c74735309782f
3
+ size 151038776
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_14/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopKAdditivity",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.000282842712474619,
6
+ "steps": 48828,
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 8192,
10
+ "k": 313,
11
+ "auxk_alpha": 0.03125,
12
+ "additivity_coeff": 1.0,
13
+ "intersection_coeff": 0.0,
14
+ "device": "cuda:0",
15
+ "layer": 4,
16
+ "lm_name": "google/gemma-2-2b",
17
+ "wandb_name": "TopKTrainer_Additivity-google/gemma-2-2b-resid_post_layer_4-add_coeff-1.0-inters_coeff-0.0",
18
+ "submodule_name": "resid_post_layer_4"
19
+ },
20
+ "buffer": {
21
+ "d_submodule": 2304,
22
+ "io": "out",
23
+ "n_ctxs": 8192,
24
+ "ctx_len": 128,
25
+ "refresh_batch_size": 32,
26
+ "out_batch_size": 2048,
27
+ "device": "cuda:0"
28
+ }
29
+ }
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_15/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87d876e42c61b8e7f3559e6ca9b8ce6a39d4ebdcd41b0ecefcebda465eb828b3
3
+ size 151038760
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_15/checkpoints/ae_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f25498784ccb4edde22c1a12360a194b39dca6725ac5cce5767c74735309782f
3
+ size 151038776
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_15/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopKAdditivity",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.000282842712474619,
6
+ "steps": 48828,
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 8192,
10
+ "k": 313,
11
+ "auxk_alpha": 0.03125,
12
+ "additivity_coeff": 1.0,
13
+ "intersection_coeff": 1.0,
14
+ "device": "cuda:0",
15
+ "layer": 4,
16
+ "lm_name": "google/gemma-2-2b",
17
+ "wandb_name": "TopKTrainer_Additivity-google/gemma-2-2b-resid_post_layer_4-add_coeff-1.0-inters_coeff-1.0",
18
+ "submodule_name": "resid_post_layer_4"
19
+ },
20
+ "buffer": {
21
+ "d_submodule": 2304,
22
+ "io": "out",
23
+ "n_ctxs": 8192,
24
+ "ctx_len": 128,
25
+ "refresh_batch_size": 32,
26
+ "out_batch_size": 2048,
27
+ "device": "cuda:0"
28
+ }
29
+ }
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_2/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54e3d003092c270ce14d3bf939a58f77e16cc2cfd53653c5363764d8487e2cba
3
+ size 151038760
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_2/checkpoints/ae_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f25498784ccb4edde22c1a12360a194b39dca6725ac5cce5767c74735309782f
3
+ size 151038776
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_2/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopKAdditivity",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.000282842712474619,
6
+ "steps": 48828,
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 8192,
10
+ "k": 20,
11
+ "auxk_alpha": 0.03125,
12
+ "additivity_coeff": 1.0,
13
+ "intersection_coeff": 0.0,
14
+ "device": "cuda:0",
15
+ "layer": 4,
16
+ "lm_name": "google/gemma-2-2b",
17
+ "wandb_name": "TopKTrainer_Additivity-google/gemma-2-2b-resid_post_layer_4-add_coeff-1.0-inters_coeff-0.0",
18
+ "submodule_name": "resid_post_layer_4"
19
+ },
20
+ "buffer": {
21
+ "d_submodule": 2304,
22
+ "io": "out",
23
+ "n_ctxs": 8192,
24
+ "ctx_len": 128,
25
+ "refresh_batch_size": 32,
26
+ "out_batch_size": 2048,
27
+ "device": "cuda:0"
28
+ }
29
+ }
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_3/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96d85b0bc0c1212246c0a983e79860cebc342801b3c3a19333e3e07873de8825
3
+ size 151038760
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_3/checkpoints/ae_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f25498784ccb4edde22c1a12360a194b39dca6725ac5cce5767c74735309782f
3
+ size 151038776
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_3/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopKAdditivity",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.000282842712474619,
6
+ "steps": 48828,
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 8192,
10
+ "k": 20,
11
+ "auxk_alpha": 0.03125,
12
+ "additivity_coeff": 1.0,
13
+ "intersection_coeff": 1.0,
14
+ "device": "cuda:0",
15
+ "layer": 4,
16
+ "lm_name": "google/gemma-2-2b",
17
+ "wandb_name": "TopKTrainer_Additivity-google/gemma-2-2b-resid_post_layer_4-add_coeff-1.0-inters_coeff-1.0",
18
+ "submodule_name": "resid_post_layer_4"
19
+ },
20
+ "buffer": {
21
+ "d_submodule": 2304,
22
+ "io": "out",
23
+ "n_ctxs": 8192,
24
+ "ctx_len": 128,
25
+ "refresh_batch_size": 32,
26
+ "out_batch_size": 2048,
27
+ "device": "cuda:0"
28
+ }
29
+ }
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_4/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e3608fcb6501dcd3da4563d06284f325ff9b53618b208ddc0183167b3cf55e9
3
+ size 151038760
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_4/checkpoints/ae_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f25498784ccb4edde22c1a12360a194b39dca6725ac5cce5767c74735309782f
3
+ size 151038776
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_4/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopKAdditivity",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.000282842712474619,
6
+ "steps": 48828,
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 8192,
10
+ "k": 50,
11
+ "auxk_alpha": 0.03125,
12
+ "additivity_coeff": 0.0,
13
+ "intersection_coeff": 0.0,
14
+ "device": "cuda:0",
15
+ "layer": 4,
16
+ "lm_name": "google/gemma-2-2b",
17
+ "wandb_name": "TopKTrainer_Additivity-google/gemma-2-2b-resid_post_layer_4-add_coeff-0.0-inters_coeff-0.0",
18
+ "submodule_name": "resid_post_layer_4"
19
+ },
20
+ "buffer": {
21
+ "d_submodule": 2304,
22
+ "io": "out",
23
+ "n_ctxs": 8192,
24
+ "ctx_len": 128,
25
+ "refresh_batch_size": 32,
26
+ "out_batch_size": 2048,
27
+ "device": "cuda:0"
28
+ }
29
+ }
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_5/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:950a47c06016d9220d7ae99a25aca902a94afc414b584abec2d68b7ad7b8c1d2
3
+ size 151038760
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_5/checkpoints/ae_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f25498784ccb4edde22c1a12360a194b39dca6725ac5cce5767c74735309782f
3
+ size 151038776
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_5/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopKAdditivity",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.000282842712474619,
6
+ "steps": 48828,
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 8192,
10
+ "k": 50,
11
+ "auxk_alpha": 0.03125,
12
+ "additivity_coeff": 0.0,
13
+ "intersection_coeff": 1.0,
14
+ "device": "cuda:0",
15
+ "layer": 4,
16
+ "lm_name": "google/gemma-2-2b",
17
+ "wandb_name": "TopKTrainer_Additivity-google/gemma-2-2b-resid_post_layer_4-add_coeff-0.0-inters_coeff-1.0",
18
+ "submodule_name": "resid_post_layer_4"
19
+ },
20
+ "buffer": {
21
+ "d_submodule": 2304,
22
+ "io": "out",
23
+ "n_ctxs": 8192,
24
+ "ctx_len": 128,
25
+ "refresh_batch_size": 32,
26
+ "out_batch_size": 2048,
27
+ "device": "cuda:0"
28
+ }
29
+ }
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_6/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eae1519cf06910c54cd71d99252d17d302d928e77032f2515151e74d9296f345
3
+ size 151038760
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_6/checkpoints/ae_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f25498784ccb4edde22c1a12360a194b39dca6725ac5cce5767c74735309782f
3
+ size 151038776
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_6/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopKAdditivity",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.000282842712474619,
6
+ "steps": 48828,
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 8192,
10
+ "k": 50,
11
+ "auxk_alpha": 0.03125,
12
+ "additivity_coeff": 1.0,
13
+ "intersection_coeff": 0.0,
14
+ "device": "cuda:0",
15
+ "layer": 4,
16
+ "lm_name": "google/gemma-2-2b",
17
+ "wandb_name": "TopKTrainer_Additivity-google/gemma-2-2b-resid_post_layer_4-add_coeff-1.0-inters_coeff-0.0",
18
+ "submodule_name": "resid_post_layer_4"
19
+ },
20
+ "buffer": {
21
+ "d_submodule": 2304,
22
+ "io": "out",
23
+ "n_ctxs": 8192,
24
+ "ctx_len": 128,
25
+ "refresh_batch_size": 32,
26
+ "out_batch_size": 2048,
27
+ "device": "cuda:0"
28
+ }
29
+ }
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_7/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58f04625f657547022d620bffbf9221f9abca58bb8550f37d62dd0025d58a46c
3
+ size 151038760
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_7/checkpoints/ae_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f25498784ccb4edde22c1a12360a194b39dca6725ac5cce5767c74735309782f
3
+ size 151038776
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_7/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopKAdditivity",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.000282842712474619,
6
+ "steps": 48828,
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 8192,
10
+ "k": 50,
11
+ "auxk_alpha": 0.03125,
12
+ "additivity_coeff": 1.0,
13
+ "intersection_coeff": 1.0,
14
+ "device": "cuda:0",
15
+ "layer": 4,
16
+ "lm_name": "google/gemma-2-2b",
17
+ "wandb_name": "TopKTrainer_Additivity-google/gemma-2-2b-resid_post_layer_4-add_coeff-1.0-inters_coeff-1.0",
18
+ "submodule_name": "resid_post_layer_4"
19
+ },
20
+ "buffer": {
21
+ "d_submodule": 2304,
22
+ "io": "out",
23
+ "n_ctxs": 8192,
24
+ "ctx_len": 128,
25
+ "refresh_batch_size": 32,
26
+ "out_batch_size": 2048,
27
+ "device": "cuda:0"
28
+ }
29
+ }
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_8/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11ca8109bf927c514e58d5870122da7cd802f1bb17dcd6c9f989fe563fc30d09
3
+ size 151038760
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_8/checkpoints/ae_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f25498784ccb4edde22c1a12360a194b39dca6725ac5cce5767c74735309782f
3
+ size 151038776
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_8/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopKAdditivity",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.000282842712474619,
6
+ "steps": 48828,
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 8192,
10
+ "k": 125,
11
+ "auxk_alpha": 0.03125,
12
+ "additivity_coeff": 0.0,
13
+ "intersection_coeff": 0.0,
14
+ "device": "cuda:0",
15
+ "layer": 4,
16
+ "lm_name": "google/gemma-2-2b",
17
+ "wandb_name": "TopKTrainer_Additivity-google/gemma-2-2b-resid_post_layer_4-add_coeff-0.0-inters_coeff-0.0",
18
+ "submodule_name": "resid_post_layer_4"
19
+ },
20
+ "buffer": {
21
+ "d_submodule": 2304,
22
+ "io": "out",
23
+ "n_ctxs": 8192,
24
+ "ctx_len": 128,
25
+ "refresh_batch_size": 32,
26
+ "out_batch_size": 2048,
27
+ "device": "cuda:0"
28
+ }
29
+ }
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_9/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bff051ff37ef1ddbf0645ab45aa203fbd9013ac1598e65b086e7e1040dc62f1
3
+ size 151038760
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_9/checkpoints/ae_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f25498784ccb4edde22c1a12360a194b39dca6725ac5cce5767c74735309782f
3
+ size 151038776
gemma-2-2b_layer-4_width-2pow13_date-1204/trainer_9/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopKAdditivity",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.000282842712474619,
6
+ "steps": 48828,
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 8192,
10
+ "k": 125,
11
+ "auxk_alpha": 0.03125,
12
+ "additivity_coeff": 0.0,
13
+ "intersection_coeff": 1.0,
14
+ "device": "cuda:0",
15
+ "layer": 4,
16
+ "lm_name": "google/gemma-2-2b",
17
+ "wandb_name": "TopKTrainer_Additivity-google/gemma-2-2b-resid_post_layer_4-add_coeff-0.0-inters_coeff-1.0",
18
+ "submodule_name": "resid_post_layer_4"
19
+ },
20
+ "buffer": {
21
+ "d_submodule": 2304,
22
+ "io": "out",
23
+ "n_ctxs": 8192,
24
+ "ctx_len": 128,
25
+ "refresh_batch_size": 32,
26
+ "out_batch_size": 2048,
27
+ "device": "cuda:0"
28
+ }
29
+ }