AwesomeInterpretability commited on
Commit
647db18
·
verified ·
1 Parent(s): b5a5898

Upload folder using huggingface_hub

Browse files
Files changed (36) hide show
  1. saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_0/ae.pt +3 -0
  2. saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_0/config.json +28 -0
  3. saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_0/eval_results.json +1 -0
  4. saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_1/ae.pt +3 -0
  5. saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_1/config.json +28 -0
  6. saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_1/eval_results.json +1 -0
  7. saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_10/ae.pt +3 -0
  8. saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_10/config.json +29 -0
  9. saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_10/eval_results.json +1 -0
  10. saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_11/ae.pt +3 -0
  11. saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_11/config.json +29 -0
  12. saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_11/eval_results.json +1 -0
  13. saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_2/ae.pt +3 -0
  14. saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_2/config.json +28 -0
  15. saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_2/eval_results.json +1 -0
  16. saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_3/ae.pt +3 -0
  17. saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_3/config.json +28 -0
  18. saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_3/eval_results.json +1 -0
  19. saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_4/ae.pt +3 -0
  20. saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_4/config.json +28 -0
  21. saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_4/eval_results.json +1 -0
  22. saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_5/ae.pt +3 -0
  23. saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_5/config.json +28 -0
  24. saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_5/eval_results.json +1 -0
  25. saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_6/ae.pt +3 -0
  26. saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_6/config.json +29 -0
  27. saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_6/eval_results.json +1 -0
  28. saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_7/ae.pt +3 -0
  29. saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_7/config.json +29 -0
  30. saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_7/eval_results.json +1 -0
  31. saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_8/ae.pt +3 -0
  32. saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_8/config.json +29 -0
  33. saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_8/eval_results.json +1 -0
  34. saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_9/ae.pt +3 -0
  35. saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_9/config.json +29 -0
  36. saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_9/eval_results.json +1 -0
saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6d84e4f04fdf8c73b480d1847bd3f3cafc2cca09ebad9737ca766960837a005
3
+ size 469975062
saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_0/config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "GatedAutoEncoder",
4
+ "trainer_class": "GatedSAETrainer",
5
+ "activation_dim": 3584,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.012,
9
+ "warmup_steps": 1000,
10
+ "sparsity_warmup_steps": 5000,
11
+ "decay_start": 195312,
12
+ "seed": 3407,
13
+ "device": "cuda:0",
14
+ "layer": 31,
15
+ "lm_name": "google/gemma-2-9b",
16
+ "wandb_name": "GatedTrainer-google/gemma-2-9b-resid_post_layer_31_trainer_0",
17
+ "submodule_name": "resid_post_layer_31"
18
+ },
19
+ "buffer": {
20
+ "d_submodule": 3584,
21
+ "io": "out",
22
+ "n_ctxs": 122,
23
+ "ctx_len": 2048,
24
+ "refresh_batch_size": 4,
25
+ "out_batch_size": 2048,
26
+ "device": "cuda:0"
27
+ }
28
+ }
saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_0/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 129.475, "l1_loss": 4698.88, "l0": 851.4930541992187, "frac_variance_explained": 0.83517578125, "cossim": 0.943671875, "l2_ratio": 0.95328125, "relative_reconstruction_bias": 1.010234375, "frac_alive": 0.99969482421875, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_1/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37265778b96e4483b71ed2d37adf96bc758ef66a270fb5a56c4c61c97d1ecee1
3
+ size 469975062
saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_1/config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "GatedAutoEncoder",
4
+ "trainer_class": "GatedSAETrainer",
5
+ "activation_dim": 3584,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.018,
9
+ "warmup_steps": 1000,
10
+ "sparsity_warmup_steps": 5000,
11
+ "decay_start": 195312,
12
+ "seed": 3407,
13
+ "device": "cuda:0",
14
+ "layer": 31,
15
+ "lm_name": "google/gemma-2-9b",
16
+ "wandb_name": "GatedTrainer-google/gemma-2-9b-resid_post_layer_31_trainer_1",
17
+ "submodule_name": "resid_post_layer_31"
18
+ },
19
+ "buffer": {
20
+ "d_submodule": 3584,
21
+ "io": "out",
22
+ "n_ctxs": 122,
23
+ "ctx_len": 2048,
24
+ "refresh_batch_size": 4,
25
+ "out_batch_size": 2048,
26
+ "device": "cuda:0"
27
+ }
28
+ }
saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_1/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 147.325, "l1_loss": 3421.44, "l0": 485.14355224609375, "frac_variance_explained": 0.7877734375, "cossim": 0.92603515625, "l2_ratio": 0.93662109375, "relative_reconstruction_bias": 1.012109375, "frac_alive": 0.99957275390625, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_10/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8fb90b30ea011358f2cbda7f6cfaf7972d4d5b0ced637d6bf6b6bd2441cff43
3
+ size 469909279
saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_10/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "JumpReluTrainer",
4
+ "dict_class": "JumpReluAutoEncoder",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "seed": 3407,
8
+ "activation_dim": 3584,
9
+ "dict_size": 16384,
10
+ "device": "cuda:0",
11
+ "layer": 31,
12
+ "lm_name": "google/gemma-2-9b",
13
+ "wandb_name": "JumpReluTrainer-google/gemma-2-9b-resid_post_layer_31_trainer_10",
14
+ "submodule_name": "resid_post_layer_31",
15
+ "bandwidth": 0.001,
16
+ "sparsity_penalty": 1.0,
17
+ "sparsity_warmup_steps": 5000,
18
+ "target_l0": 520
19
+ },
20
+ "buffer": {
21
+ "d_submodule": 3584,
22
+ "io": "out",
23
+ "n_ctxs": 122,
24
+ "ctx_len": 2048,
25
+ "refresh_batch_size": 4,
26
+ "out_batch_size": 2048,
27
+ "device": "cuda:0"
28
+ }
29
+ }
saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_10/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 166.575, "l1_loss": 4647.2, "l0": 798.1393188476562, "frac_variance_explained": 0.7728125, "cossim": 0.9116796875, "l2_ratio": 1.0146875, "relative_reconstruction_bias": 1.075546875, "frac_alive": 0.9434814453125, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_11/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f2a589d118244f34c0ffa5bad3d92ddaf3d270ee839d87d93981dcec6a6d5a7
3
+ size 469909279
saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_11/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "JumpReluTrainer",
4
+ "dict_class": "JumpReluAutoEncoder",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "seed": 3407,
8
+ "activation_dim": 3584,
9
+ "dict_size": 16384,
10
+ "device": "cuda:0",
11
+ "layer": 31,
12
+ "lm_name": "google/gemma-2-9b",
13
+ "wandb_name": "JumpReluTrainer-google/gemma-2-9b-resid_post_layer_31_trainer_11",
14
+ "submodule_name": "resid_post_layer_31",
15
+ "bandwidth": 0.001,
16
+ "sparsity_penalty": 1.0,
17
+ "sparsity_warmup_steps": 5000,
18
+ "target_l0": 820
19
+ },
20
+ "buffer": {
21
+ "d_submodule": 3584,
22
+ "io": "out",
23
+ "n_ctxs": 122,
24
+ "ctx_len": 2048,
25
+ "refresh_batch_size": 4,
26
+ "out_batch_size": 2048,
27
+ "device": "cuda:0"
28
+ }
29
+ }
saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_11/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 167.445, "l1_loss": 6110.56, "l0": 1219.2402807617189, "frac_variance_explained": 0.79876953125, "cossim": 0.91375, "l2_ratio": 1.0494921875, "relative_reconstruction_bias": 1.1087109375, "frac_alive": 0.916015625, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_2/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6043d6b801086152c035a18d1414271ef9acb2760c37dff597c580f3dd5bf17f
3
+ size 469975062
saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_2/config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "GatedAutoEncoder",
4
+ "trainer_class": "GatedSAETrainer",
5
+ "activation_dim": 3584,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.024,
9
+ "warmup_steps": 1000,
10
+ "sparsity_warmup_steps": 5000,
11
+ "decay_start": 195312,
12
+ "seed": 3407,
13
+ "device": "cuda:0",
14
+ "layer": 31,
15
+ "lm_name": "google/gemma-2-9b",
16
+ "wandb_name": "GatedTrainer-google/gemma-2-9b-resid_post_layer_31_trainer_2",
17
+ "submodule_name": "resid_post_layer_31"
18
+ },
19
+ "buffer": {
20
+ "d_submodule": 3584,
21
+ "io": "out",
22
+ "n_ctxs": 122,
23
+ "ctx_len": 2048,
24
+ "refresh_batch_size": 4,
25
+ "out_batch_size": 2048,
26
+ "device": "cuda:0"
27
+ }
28
+ }
saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_2/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 158.52, "l1_loss": 2711.28, "l0": 317.59068603515624, "frac_variance_explained": 0.7526171875, "cossim": 0.91349609375, "l2_ratio": 0.9250390625, "relative_reconstruction_bias": 1.0134765625, "frac_alive": 0.9993896484375, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_3/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59427277211619ceb46505253b1d0072a4de7902ca54647750ce92397e12b9c1
3
+ size 469975062
saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_3/config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "GatedAutoEncoder",
4
+ "trainer_class": "GatedSAETrainer",
5
+ "activation_dim": 3584,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.04,
9
+ "warmup_steps": 1000,
10
+ "sparsity_warmup_steps": 5000,
11
+ "decay_start": 195312,
12
+ "seed": 3407,
13
+ "device": "cuda:0",
14
+ "layer": 31,
15
+ "lm_name": "google/gemma-2-9b",
16
+ "wandb_name": "GatedTrainer-google/gemma-2-9b-resid_post_layer_31_trainer_3",
17
+ "submodule_name": "resid_post_layer_31"
18
+ },
19
+ "buffer": {
20
+ "d_submodule": 3584,
21
+ "io": "out",
22
+ "n_ctxs": 122,
23
+ "ctx_len": 2048,
24
+ "refresh_batch_size": 4,
25
+ "out_batch_size": 2048,
26
+ "device": "cuda:0"
27
+ }
28
+ }
saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_3/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 176.195, "l1_loss": 1839.64, "l0": 153.8281005859375, "frac_variance_explained": 0.69720703125, "cossim": 0.89248046875, "l2_ratio": 0.90625, "relative_reconstruction_bias": 1.0146875, "frac_alive": 0.96002197265625, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_4/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f031c7d25bb319470a73720e45533e98cb9bde2f92bd1df665808dae17b13d2
3
+ size 469975062
saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_4/config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "GatedAutoEncoder",
4
+ "trainer_class": "GatedSAETrainer",
5
+ "activation_dim": 3584,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.06,
9
+ "warmup_steps": 1000,
10
+ "sparsity_warmup_steps": 5000,
11
+ "decay_start": 195312,
12
+ "seed": 3407,
13
+ "device": "cuda:0",
14
+ "layer": 31,
15
+ "lm_name": "google/gemma-2-9b",
16
+ "wandb_name": "GatedTrainer-google/gemma-2-9b-resid_post_layer_31_trainer_4",
17
+ "submodule_name": "resid_post_layer_31"
18
+ },
19
+ "buffer": {
20
+ "d_submodule": 3584,
21
+ "io": "out",
22
+ "n_ctxs": 122,
23
+ "ctx_len": 2048,
24
+ "refresh_batch_size": 4,
25
+ "out_batch_size": 2048,
26
+ "device": "cuda:0"
27
+ }
28
+ }
saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_4/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 191.84, "l1_loss": 1343.12, "l0": 83.0945849609375, "frac_variance_explained": 0.6426171875, "cossim": 0.87208984375, "l2_ratio": 0.88474609375, "relative_reconstruction_bias": 1.0132421875, "frac_alive": 0.7711181640625, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_5/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37c6c8644f3770e426bbe7bad66b7c7f6e778bbcf566af58235ff8f5009195cc
3
+ size 469975062
saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_5/config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "GatedAutoEncoder",
4
+ "trainer_class": "GatedSAETrainer",
5
+ "activation_dim": 3584,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.08,
9
+ "warmup_steps": 1000,
10
+ "sparsity_warmup_steps": 5000,
11
+ "decay_start": 195312,
12
+ "seed": 3407,
13
+ "device": "cuda:0",
14
+ "layer": 31,
15
+ "lm_name": "google/gemma-2-9b",
16
+ "wandb_name": "GatedTrainer-google/gemma-2-9b-resid_post_layer_31_trainer_5",
17
+ "submodule_name": "resid_post_layer_31"
18
+ },
19
+ "buffer": {
20
+ "d_submodule": 3584,
21
+ "io": "out",
22
+ "n_ctxs": 122,
23
+ "ctx_len": 2048,
24
+ "refresh_batch_size": 4,
25
+ "out_batch_size": 2048,
26
+ "device": "cuda:0"
27
+ }
28
+ }
saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_5/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 204.52, "l1_loss": 1045.64, "l0": 51.75321044921875, "frac_variance_explained": 0.59876953125, "cossim": 0.85453125, "l2_ratio": 0.86896484375, "relative_reconstruction_bias": 1.0145703125, "frac_alive": 0.599609375, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_6/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a586808fd95eea30f0d3283914ab7e86789dcc81c4d3b1c89a954260920889a
3
+ size 469909279
saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_6/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "JumpReluTrainer",
4
+ "dict_class": "JumpReluAutoEncoder",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "seed": 3407,
8
+ "activation_dim": 3584,
9
+ "dict_size": 16384,
10
+ "device": "cuda:0",
11
+ "layer": 31,
12
+ "lm_name": "google/gemma-2-9b",
13
+ "wandb_name": "JumpReluTrainer-google/gemma-2-9b-resid_post_layer_31_trainer_6",
14
+ "submodule_name": "resid_post_layer_31",
15
+ "bandwidth": 0.001,
16
+ "sparsity_penalty": 1.0,
17
+ "sparsity_warmup_steps": 5000,
18
+ "target_l0": 50
19
+ },
20
+ "buffer": {
21
+ "d_submodule": 3584,
22
+ "io": "out",
23
+ "n_ctxs": 122,
24
+ "ctx_len": 2048,
25
+ "refresh_batch_size": 4,
26
+ "out_batch_size": 2048,
27
+ "device": "cuda:0"
28
+ }
29
+ }
saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_6/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 194.8, "l1_loss": 1277.8, "l0": 64.866396484375, "frac_variance_explained": 0.63359375, "cossim": 0.86810546875, "l2_ratio": 0.87791015625, "relative_reconstruction_bias": 1.0101171875, "frac_alive": 0.79437255859375, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_7/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce55638139f00e54e806c21173a473e4689bb13e45b7660015c3389e2e5ddae5
3
+ size 469909279
saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_7/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "JumpReluTrainer",
4
+ "dict_class": "JumpReluAutoEncoder",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "seed": 3407,
8
+ "activation_dim": 3584,
9
+ "dict_size": 16384,
10
+ "device": "cuda:0",
11
+ "layer": 31,
12
+ "lm_name": "google/gemma-2-9b",
13
+ "wandb_name": "JumpReluTrainer-google/gemma-2-9b-resid_post_layer_31_trainer_7",
14
+ "submodule_name": "resid_post_layer_31",
15
+ "bandwidth": 0.001,
16
+ "sparsity_penalty": 1.0,
17
+ "sparsity_warmup_steps": 5000,
18
+ "target_l0": 80
19
+ },
20
+ "buffer": {
21
+ "d_submodule": 3584,
22
+ "io": "out",
23
+ "n_ctxs": 122,
24
+ "ctx_len": 2048,
25
+ "refresh_batch_size": 4,
26
+ "out_batch_size": 2048,
27
+ "device": "cuda:0"
28
+ }
29
+ }
saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_7/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 182.8, "l1_loss": 1683.64, "l0": 109.25167236328124, "frac_variance_explained": 0.674453125, "cossim": 0.8840625, "l2_ratio": 0.89279296875, "relative_reconstruction_bias": 1.0100390625, "frac_alive": 0.85064697265625, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_8/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2b533fdd20f00d2ffbda5944dc7044ba843ddcd57ac59c55b9ed2685782e48a
3
+ size 469909279
saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_8/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "JumpReluTrainer",
4
+ "dict_class": "JumpReluAutoEncoder",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "seed": 3407,
8
+ "activation_dim": 3584,
9
+ "dict_size": 16384,
10
+ "device": "cuda:0",
11
+ "layer": 31,
12
+ "lm_name": "google/gemma-2-9b",
13
+ "wandb_name": "JumpReluTrainer-google/gemma-2-9b-resid_post_layer_31_trainer_8",
14
+ "submodule_name": "resid_post_layer_31",
15
+ "bandwidth": 0.001,
16
+ "sparsity_penalty": 1.0,
17
+ "sparsity_warmup_steps": 5000,
18
+ "target_l0": 160
19
+ },
20
+ "buffer": {
21
+ "d_submodule": 3584,
22
+ "io": "out",
23
+ "n_ctxs": 122,
24
+ "ctx_len": 2048,
25
+ "refresh_batch_size": 4,
26
+ "out_batch_size": 2048,
27
+ "device": "cuda:0"
28
+ }
29
+ }
saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_8/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 164.39, "l1_loss": 2474.32, "l0": 236.04934814453125, "frac_variance_explained": 0.734453125, "cossim": 0.90666015625, "l2_ratio": 0.9155859375, "relative_reconstruction_bias": 1.011796875, "frac_alive": 0.93707275390625, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_9/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:980a3b3c2447255b02e8e1f30078972109d67a2a621d72b54ac94630bcae712c
3
+ size 469909279
saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_9/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "JumpReluTrainer",
4
+ "dict_class": "JumpReluAutoEncoder",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "seed": 3407,
8
+ "activation_dim": 3584,
9
+ "dict_size": 16384,
10
+ "device": "cuda:0",
11
+ "layer": 31,
12
+ "lm_name": "google/gemma-2-9b",
13
+ "wandb_name": "JumpReluTrainer-google/gemma-2-9b-resid_post_layer_31_trainer_9",
14
+ "submodule_name": "resid_post_layer_31",
15
+ "bandwidth": 0.001,
16
+ "sparsity_penalty": 1.0,
17
+ "sparsity_warmup_steps": 5000,
18
+ "target_l0": 320
19
+ },
20
+ "buffer": {
21
+ "d_submodule": 3584,
22
+ "io": "out",
23
+ "n_ctxs": 122,
24
+ "ctx_len": 2048,
25
+ "refresh_batch_size": 4,
26
+ "out_batch_size": 2048,
27
+ "device": "cuda:0"
28
+ }
29
+ }
saes_google_gemma-2-9b_gated_jump_relu/resid_post_layer_31/trainer_9/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 143.28, "l1_loss": 3752.0, "l0": 521.1142944335937, "frac_variance_explained": 0.79845703125, "cossim": 0.929765625, "l2_ratio": 0.9350390625, "relative_reconstruction_bias": 1.0071875, "frac_alive": 0.989501953125, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}