AwesomeInterpretability commited on Aug 5, 2025

Commit

2582ed9

verified ·

1 Parent(s): 895e244

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_0/ae.pt +3 -0
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_0/config.json +29 -0
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_0/eval_results.json +1 -0
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_1/ae.pt +3 -0
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_1/config.json +29 -0
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_1/eval_results.json +1 -0
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_10/ae.pt +3 -0
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_10/config.json +32 -0
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_10/eval_results.json +1 -0
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_11/ae.pt +3 -0
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_11/config.json +32 -0
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_11/eval_results.json +1 -0
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_12/ae.pt +3 -0
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_12/config.json +29 -0
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_12/eval_results.json +1 -0
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_13/ae.pt +3 -0
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_13/config.json +29 -0
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_13/eval_results.json +1 -0
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_14/ae.pt +3 -0
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_14/config.json +29 -0
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_14/eval_results.json +1 -0
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_15/ae.pt +3 -0
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_15/config.json +29 -0
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_15/eval_results.json +1 -0
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_16/ae.pt +3 -0
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_16/config.json +29 -0
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_16/eval_results.json +1 -0
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_17/ae.pt +3 -0
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_17/config.json +29 -0
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_17/eval_results.json +1 -0
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_2/ae.pt +3 -0
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_2/config.json +29 -0
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_2/eval_results.json +1 -0
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_3/ae.pt +3 -0
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_3/config.json +29 -0
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_3/eval_results.json +1 -0
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_4/ae.pt +3 -0
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_4/config.json +29 -0
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_4/eval_results.json +1 -0
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_5/ae.pt +3 -0
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_5/config.json +29 -0
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_5/eval_results.json +1 -0
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_6/ae.pt +3 -0
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_6/config.json +32 -0
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_6/eval_results.json +1 -0
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_7/ae.pt +3 -0
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_7/config.json +32 -0
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_7/eval_results.json +1 -0
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_8/ae.pt +3 -0
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_8/config.json +32 -0

trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_0/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1b71df4e2751d9da58c880b85e0b708c4a7b3bc84dc51f0aa460db5da4ab8de0
+size 469843624

trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_0/config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+    "trainer": {
+        "dict_class": "AutoEncoder",
+        "trainer_class": "StandardTrainerAprilUpdate",
+        "activation_dim": 3584,
+        "dict_size": 16384,
+        "lr": 0.0003,
+        "l1_penalty": 0.012,
+        "warmup_steps": 1000,
+        "sparsity_warmup_steps": 5000,
+        "steps": 244140,
+        "decay_start": 195312,
+        "seed": 3407,
+        "device": "cuda:0",
+        "layer": 20,
+        "lm_name": "google/gemma-2-9b",
+        "wandb_name": "StandardTrainerNew-google/gemma-2-9b-resid_post_layer_20_trainer_0",
+        "submodule_name": "resid_post_layer_20"
+    },
+    "buffer": {
+        "d_submodule": 3584,
+        "io": "out",
+        "n_ctxs": 122,
+        "ctx_len": 2048,
+        "refresh_batch_size": 4,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_0/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"l2_loss": 89.09, "l1_loss": 1378.96, "l0": 1085.1874340820314, "frac_variance_explained": 0.77041015625, "cossim": 0.90994140625, "l2_ratio": 0.884453125, "relative_reconstruction_bias": 0.99240234375, "frac_alive": 0.764892578125, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}

trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_1/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0f6dfc6fccec855b0270832531a9092659f1076e701178c16051317d43059e38
+size 469843624

trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_1/config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+    "trainer": {
+        "dict_class": "AutoEncoder",
+        "trainer_class": "StandardTrainerAprilUpdate",
+        "activation_dim": 3584,
+        "dict_size": 16384,
+        "lr": 0.0003,
+        "l1_penalty": 0.015,
+        "warmup_steps": 1000,
+        "sparsity_warmup_steps": 5000,
+        "steps": 244140,
+        "decay_start": 195312,
+        "seed": 3407,
+        "device": "cuda:0",
+        "layer": 20,
+        "lm_name": "google/gemma-2-9b",
+        "wandb_name": "StandardTrainerNew-google/gemma-2-9b-resid_post_layer_20_trainer_1",
+        "submodule_name": "resid_post_layer_20"
+    },
+    "buffer": {
+        "d_submodule": 3584,
+        "io": "out",
+        "n_ctxs": 122,
+        "ctx_len": 2048,
+        "refresh_batch_size": 4,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_1/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"l2_loss": 96.165, "l1_loss": 1069.92, "l0": 748.5599877929687, "frac_variance_explained": 0.7313671875, "cossim": 0.89447265625, "l2_ratio": 0.87078125, "relative_reconstruction_bias": 0.9985546875, "frac_alive": 0.76300048828125, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}

trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_10/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e2311b433e9c33d67c2e3b336d3634c49325690fefc0df64638e6358d4f97e55
+size 469843990

trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_10/config.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+    "trainer": {
+        "trainer_class": "BatchTopKTrainer",
+        "dict_class": "BatchTopKSAE",
+        "lr": 0.0003,
+        "steps": 244140,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 195312,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "top_k_aux": 1792,
+        "seed": 3407,
+        "activation_dim": 3584,
+        "dict_size": 16384,
+        "k": 520,
+        "device": "cuda:0",
+        "layer": 20,
+        "lm_name": "google/gemma-2-9b",
+        "wandb_name": "BatchTopKTrainer-google/gemma-2-9b-resid_post_layer_20_trainer_10",
+        "submodule_name": "resid_post_layer_20"
+    },
+    "buffer": {
+        "d_submodule": 3584,
+        "io": "out",
+        "n_ctxs": 122,
+        "ctx_len": 2048,
+        "refresh_batch_size": 4,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_10/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"l2_loss": 77.4025, "l1_loss": 3337.92, "l0": 709.4781958007812, "frac_variance_explained": 0.828125, "cossim": 0.92984375, "l2_ratio": 0.92970703125, "relative_reconstruction_bias": 1.0009375, "frac_alive": 0.85882568359375, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}

trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_11/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c685f19dc9ea10c04c4bcba75596d4bc709b917f0620688ed6e2a74018f1788a
+size 469843990

trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_11/config.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+    "trainer": {
+        "trainer_class": "BatchTopKTrainer",
+        "dict_class": "BatchTopKSAE",
+        "lr": 0.0003,
+        "steps": 244140,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 195312,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "top_k_aux": 1792,
+        "seed": 3407,
+        "activation_dim": 3584,
+        "dict_size": 16384,
+        "k": 820,
+        "device": "cuda:0",
+        "layer": 20,
+        "lm_name": "google/gemma-2-9b",
+        "wandb_name": "BatchTopKTrainer-google/gemma-2-9b-resid_post_layer_20_trainer_11",
+        "submodule_name": "resid_post_layer_20"
+    },
+    "buffer": {
+        "d_submodule": 3584,
+        "io": "out",
+        "n_ctxs": 122,
+        "ctx_len": 2048,
+        "refresh_batch_size": 4,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_11/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"l2_loss": 66.6125, "l1_loss": 4846.72, "l0": 1101.4039184570313, "frac_variance_explained": 0.87087890625, "cossim": 0.9459375, "l2_ratio": 0.94115234375, "relative_reconstruction_bias": 0.99490234375, "frac_alive": 0.69659423828125, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}

trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_12/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cd356475673932a2cdd2501c9007f13ea394fcd87d1c2e7e6c61b1b18a647512
+size 469909279

trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_12/config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+    "trainer": {
+        "trainer_class": "JumpReluTrainer",
+        "dict_class": "JumpReluAutoEncoder",
+        "lr": 0.0003,
+        "steps": 244140,
+        "seed": 3407,
+        "activation_dim": 3584,
+        "dict_size": 16384,
+        "device": "cuda:0",
+        "layer": 20,
+        "lm_name": "google/gemma-2-9b",
+        "wandb_name": "JumpReluTrainer-google/gemma-2-9b-resid_post_layer_20_trainer_12",
+        "submodule_name": "resid_post_layer_20",
+        "bandwidth": 0.001,
+        "sparsity_penalty": 1.0,
+        "sparsity_warmup_steps": 5000,
+        "target_l0": 50
+    },
+    "buffer": {
+        "d_submodule": 3584,
+        "io": "out",
+        "n_ctxs": 122,
+        "ctx_len": 2048,
+        "refresh_batch_size": 4,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_12/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"l2_loss": 113.1225, "l1_loss": 670.74, "l0": 62.107958984375, "frac_variance_explained": 0.63556640625, "cossim": 0.8490625, "l2_ratio": 0.85751953125, "relative_reconstruction_bias": 1.00828125, "frac_alive": 0.794921875, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}

trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_13/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5105ec73026d976d765b82a928c2b97845f89cf91a77dc14f36a024825a9e4b5
+size 469909279

trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_13/config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+    "trainer": {
+        "trainer_class": "JumpReluTrainer",
+        "dict_class": "JumpReluAutoEncoder",
+        "lr": 0.0003,
+        "steps": 244140,
+        "seed": 3407,
+        "activation_dim": 3584,
+        "dict_size": 16384,
+        "device": "cuda:0",
+        "layer": 20,
+        "lm_name": "google/gemma-2-9b",
+        "wandb_name": "JumpReluTrainer-google/gemma-2-9b-resid_post_layer_20_trainer_13",
+        "submodule_name": "resid_post_layer_20",
+        "bandwidth": 0.001,
+        "sparsity_penalty": 1.0,
+        "sparsity_warmup_steps": 5000,
+        "target_l0": 80
+    },
+    "buffer": {
+        "d_submodule": 3584,
+        "io": "out",
+        "n_ctxs": 122,
+        "ctx_len": 2048,
+        "refresh_batch_size": 4,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_13/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"l2_loss": 106.7075, "l1_loss": 850.78, "l0": 105.3335107421875, "frac_variance_explained": 0.67470703125, "cossim": 0.86701171875, "l2_ratio": 0.8744921875, "relative_reconstruction_bias": 1.0080078125, "frac_alive": 0.87646484375, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}

trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_14/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:09a8e5a9e1ab58d0587b48cd9a49fb6b752ddd4fe3c5b6d67164a5f988bb5540
+size 469909279

trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_14/config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+    "trainer": {
+        "trainer_class": "JumpReluTrainer",
+        "dict_class": "JumpReluAutoEncoder",
+        "lr": 0.0003,
+        "steps": 244140,
+        "seed": 3407,
+        "activation_dim": 3584,
+        "dict_size": 16384,
+        "device": "cuda:0",
+        "layer": 20,
+        "lm_name": "google/gemma-2-9b",
+        "wandb_name": "JumpReluTrainer-google/gemma-2-9b-resid_post_layer_20_trainer_14",
+        "submodule_name": "resid_post_layer_20",
+        "bandwidth": 0.001,
+        "sparsity_penalty": 1.0,
+        "sparsity_warmup_steps": 5000,
+        "target_l0": 160
+    },
+    "buffer": {
+        "d_submodule": 3584,
+        "io": "out",
+        "n_ctxs": 122,
+        "ctx_len": 2048,
+        "refresh_batch_size": 4,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_14/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"l2_loss": 96.0775, "l1_loss": 1289.84, "l0": 228.18613525390626, "frac_variance_explained": 0.73494140625, "cossim": 0.89296875, "l2_ratio": 0.89806640625, "relative_reconstruction_bias": 1.007421875, "frac_alive": 0.9716796875, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}

trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_15/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9cef0eac303a5fbd346e1f6a37026541a71d41d5e61636dd738ea64d624c2806
+size 469909279

trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_15/config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+    "trainer": {
+        "trainer_class": "JumpReluTrainer",
+        "dict_class": "JumpReluAutoEncoder",
+        "lr": 0.0003,
+        "steps": 244140,
+        "seed": 3407,
+        "activation_dim": 3584,
+        "dict_size": 16384,
+        "device": "cuda:0",
+        "layer": 20,
+        "lm_name": "google/gemma-2-9b",
+        "wandb_name": "JumpReluTrainer-google/gemma-2-9b-resid_post_layer_20_trainer_15",
+        "submodule_name": "resid_post_layer_20",
+        "bandwidth": 0.001,
+        "sparsity_penalty": 1.0,
+        "sparsity_warmup_steps": 5000,
+        "target_l0": 320
+    },
+    "buffer": {
+        "d_submodule": 3584,
+        "io": "out",
+        "n_ctxs": 122,
+        "ctx_len": 2048,
+        "refresh_batch_size": 4,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_15/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"l2_loss": 83.7375, "l1_loss": 1982.44, "l0": 475.87285888671875, "frac_variance_explained": 0.79875, "cossim": 0.91837890625, "l2_ratio": 0.92162109375, "relative_reconstruction_bias": 1.0037109375, "frac_alive": 0.9964599609375, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}

trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_16/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:37230e38854bdb9718c7a88373e19fbc1a482e7201231d54e98370f3b0ba3a05
+size 469909279

trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_16/config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+    "trainer": {
+        "trainer_class": "JumpReluTrainer",
+        "dict_class": "JumpReluAutoEncoder",
+        "lr": 0.0003,
+        "steps": 244140,
+        "seed": 3407,
+        "activation_dim": 3584,
+        "dict_size": 16384,
+        "device": "cuda:0",
+        "layer": 20,
+        "lm_name": "google/gemma-2-9b",
+        "wandb_name": "JumpReluTrainer-google/gemma-2-9b-resid_post_layer_20_trainer_16",
+        "submodule_name": "resid_post_layer_20",
+        "bandwidth": 0.001,
+        "sparsity_penalty": 1.0,
+        "sparsity_warmup_steps": 5000,
+        "target_l0": 520
+    },
+    "buffer": {
+        "d_submodule": 3584,
+        "io": "out",
+        "n_ctxs": 122,
+        "ctx_len": 2048,
+        "refresh_batch_size": 4,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_16/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"l2_loss": 72.4525, "l1_loss": 2695.04, "l0": 763.3169091796875, "frac_variance_explained": 0.8498046875, "cossim": 0.93865234375, "l2_ratio": 0.93953125, "relative_reconstruction_bias": 1.0021484375, "frac_alive": 0.97613525390625, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}

trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_17/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:46c50e326d6f0e8e8c75085f5c8abbbdfb1d4223ccf7aa5cd6d506f18ba98405
+size 469909279

trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_17/config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+    "trainer": {
+        "trainer_class": "JumpReluTrainer",
+        "dict_class": "JumpReluAutoEncoder",
+        "lr": 0.0003,
+        "steps": 244140,
+        "seed": 3407,
+        "activation_dim": 3584,
+        "dict_size": 16384,
+        "device": "cuda:0",
+        "layer": 20,
+        "lm_name": "google/gemma-2-9b",
+        "wandb_name": "JumpReluTrainer-google/gemma-2-9b-resid_post_layer_20_trainer_17",
+        "submodule_name": "resid_post_layer_20",
+        "bandwidth": 0.001,
+        "sparsity_penalty": 1.0,
+        "sparsity_warmup_steps": 5000,
+        "target_l0": 820
+    },
+    "buffer": {
+        "d_submodule": 3584,
+        "io": "out",
+        "n_ctxs": 122,
+        "ctx_len": 2048,
+        "refresh_batch_size": 4,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_17/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"l2_loss": 88.475, "l1_loss": 3319.2, "l0": 1094.02822265625, "frac_variance_explained": 0.82087890625, "cossim": 0.915390625, "l2_ratio": 1.0532421875, "relative_reconstruction_bias": 1.0951953125, "frac_alive": 0.8267822265625, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}

trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_2/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c13d63e813e9b1128c0b4f82b7787f4415f3e5240174d527113c47794c48c216
+size 469843624

trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_2/config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+    "trainer": {
+        "dict_class": "AutoEncoder",
+        "trainer_class": "StandardTrainerAprilUpdate",
+        "activation_dim": 3584,
+        "dict_size": 16384,
+        "lr": 0.0003,
+        "l1_penalty": 0.02,
+        "warmup_steps": 1000,
+        "sparsity_warmup_steps": 5000,
+        "steps": 244140,
+        "decay_start": 195312,
+        "seed": 3407,
+        "device": "cuda:0",
+        "layer": 20,
+        "lm_name": "google/gemma-2-9b",
+        "wandb_name": "StandardTrainerNew-google/gemma-2-9b-resid_post_layer_20_trainer_2",
+        "submodule_name": "resid_post_layer_20"
+    },
+    "buffer": {
+        "d_submodule": 3584,
+        "io": "out",
+        "n_ctxs": 122,
+        "ctx_len": 2048,
+        "refresh_batch_size": 4,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_2/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"l2_loss": 103.8625, "l1_loss": 782.32, "l0": 452.529501953125, "frac_variance_explained": 0.68568359375, "cossim": 0.87564453125, "l2_ratio": 0.85255859375, "relative_reconstruction_bias": 1.00189453125, "frac_alive": 0.76043701171875, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}

trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_3/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dc6c7741e89ecb1f21ad5fc5d9fa2399190e5ff543b5f1e1d21176b91408b9c1
+size 469843624

trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_3/config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+    "trainer": {
+        "dict_class": "AutoEncoder",
+        "trainer_class": "StandardTrainerAprilUpdate",
+        "activation_dim": 3584,
+        "dict_size": 16384,
+        "lr": 0.0003,
+        "l1_penalty": 0.03,
+        "warmup_steps": 1000,
+        "sparsity_warmup_steps": 5000,
+        "steps": 244140,
+        "decay_start": 195312,
+        "seed": 3407,
+        "device": "cuda:0",
+        "layer": 20,
+        "lm_name": "google/gemma-2-9b",
+        "wandb_name": "StandardTrainerNew-google/gemma-2-9b-resid_post_layer_20_trainer_3",
+        "submodule_name": "resid_post_layer_20"
+    },
+    "buffer": {
+        "d_submodule": 3584,
+        "io": "out",
+        "n_ctxs": 122,
+        "ctx_len": 2048,
+        "refresh_batch_size": 4,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_3/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"l2_loss": 112.78, "l1_loss": 524.97, "l0": 226.3606103515625, "frac_variance_explained": 0.6291796875, "cossim": 0.8526171875, "l2_ratio": 0.8326171875, "relative_reconstruction_bias": 1.01302734375, "frac_alive": 0.7562255859375, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}

trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_4/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:55839739e1dde5dfb250880c8737bdebe0b3502eca2f4fecd6330e8720a42a1e
+size 469843624

trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_4/config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+    "trainer": {
+        "dict_class": "AutoEncoder",
+        "trainer_class": "StandardTrainerAprilUpdate",
+        "activation_dim": 3584,
+        "dict_size": 16384,
+        "lr": 0.0003,
+        "l1_penalty": 0.04,
+        "warmup_steps": 1000,
+        "sparsity_warmup_steps": 5000,
+        "steps": 244140,
+        "decay_start": 195312,
+        "seed": 3407,
+        "device": "cuda:0",
+        "layer": 20,
+        "lm_name": "google/gemma-2-9b",
+        "wandb_name": "StandardTrainerNew-google/gemma-2-9b-resid_post_layer_20_trainer_4",
+        "submodule_name": "resid_post_layer_20"
+    },
+    "buffer": {
+        "d_submodule": 3584,
+        "io": "out",
+        "n_ctxs": 122,
+        "ctx_len": 2048,
+        "refresh_batch_size": 4,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_4/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"l2_loss": 118.235, "l1_loss": 401.73, "l0": 140.89248291015625, "frac_variance_explained": 0.58724609375, "cossim": 0.83705078125, "l2_ratio": 0.8162890625, "relative_reconstruction_bias": 1.0190234375, "frac_alive": 0.75213623046875, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}

trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_5/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aabc742dd6f851593f23b713d949249ef667211e71c7bbb2307600767b87b1e9
+size 469843624

trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_5/config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+    "trainer": {
+        "dict_class": "AutoEncoder",
+        "trainer_class": "StandardTrainerAprilUpdate",
+        "activation_dim": 3584,
+        "dict_size": 16384,
+        "lr": 0.0003,
+        "l1_penalty": 0.06,
+        "warmup_steps": 1000,
+        "sparsity_warmup_steps": 5000,
+        "steps": 244140,
+        "decay_start": 195312,
+        "seed": 3407,
+        "device": "cuda:0",
+        "layer": 20,
+        "lm_name": "google/gemma-2-9b",
+        "wandb_name": "StandardTrainerNew-google/gemma-2-9b-resid_post_layer_20_trainer_5",
+        "submodule_name": "resid_post_layer_20"
+    },
+    "buffer": {
+        "d_submodule": 3584,
+        "io": "out",
+        "n_ctxs": 122,
+        "ctx_len": 2048,
+        "refresh_batch_size": 4,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_5/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"l2_loss": 125.69, "l1_loss": 276.76, "l0": 73.5276318359375, "frac_variance_explained": 0.53919921875, "cossim": 0.8143359375, "l2_ratio": 0.79361328125, "relative_reconstruction_bias": 1.0208203125, "frac_alive": 0.74652099609375, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}

trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_6/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:54d29287a996d1e1b1c8d5ddc34e70791f28ad8ea22a2f5b129adc553e499cd3
+size 469843990

trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_6/config.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+    "trainer": {
+        "trainer_class": "BatchTopKTrainer",
+        "dict_class": "BatchTopKSAE",
+        "lr": 0.0003,
+        "steps": 244140,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 195312,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "top_k_aux": 1792,
+        "seed": 3407,
+        "activation_dim": 3584,
+        "dict_size": 16384,
+        "k": 50,
+        "device": "cuda:0",
+        "layer": 20,
+        "lm_name": "google/gemma-2-9b",
+        "wandb_name": "BatchTopKTrainer-google/gemma-2-9b-resid_post_layer_20_trainer_6",
+        "submodule_name": "resid_post_layer_20"
+    },
+    "buffer": {
+        "d_submodule": 3584,
+        "io": "out",
+        "n_ctxs": 122,
+        "ctx_len": 2048,
+        "refresh_batch_size": 4,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_6/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"l2_loss": 113.03, "l1_loss": 643.72, "l0": 59.92653564453125, "frac_variance_explained": 0.63427734375, "cossim": 0.84900390625, "l2_ratio": 0.85697265625, "relative_reconstruction_bias": 1.0082421875, "frac_alive": 0.93328857421875, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}

trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_7/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e527bcd86993559a7d07cc2d3a5b9ed83df35ac0f3652b0980566f878f633f0e
+size 469843990

trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_7/config.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+    "trainer": {
+        "trainer_class": "BatchTopKTrainer",
+        "dict_class": "BatchTopKSAE",
+        "lr": 0.0003,
+        "steps": 244140,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 195312,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "top_k_aux": 1792,
+        "seed": 3407,
+        "activation_dim": 3584,
+        "dict_size": 16384,
+        "k": 80,
+        "device": "cuda:0",
+        "layer": 20,
+        "lm_name": "google/gemma-2-9b",
+        "wandb_name": "BatchTopKTrainer-google/gemma-2-9b-resid_post_layer_20_trainer_7",
+        "submodule_name": "resid_post_layer_20"
+    },
+    "buffer": {
+        "d_submodule": 3584,
+        "io": "out",
+        "n_ctxs": 122,
+        "ctx_len": 2048,
+        "refresh_batch_size": 4,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_7/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"l2_loss": 106.885, "l1_loss": 847.28, "l0": 101.578388671875, "frac_variance_explained": 0.67421875, "cossim": 0.86607421875, "l2_ratio": 0.87265625, "relative_reconstruction_bias": 1.00671875, "frac_alive": 0.9200439453125, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}

trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_8/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f648ea19eb580d53fa01f59aea7db3b93e1fbc4a264bae9328fc391a54d380a7
+size 469843990

trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_8/config.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+    "trainer": {
+        "trainer_class": "BatchTopKTrainer",
+        "dict_class": "BatchTopKSAE",
+        "lr": 0.0003,
+        "steps": 244140,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 195312,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "top_k_aux": 1792,
+        "seed": 3407,
+        "activation_dim": 3584,
+        "dict_size": 16384,
+        "k": 160,
+        "device": "cuda:0",
+        "layer": 20,
+        "lm_name": "google/gemma-2-9b",
+        "wandb_name": "BatchTopKTrainer-google/gemma-2-9b-resid_post_layer_20_trainer_8",
+        "submodule_name": "resid_post_layer_20"
+    },
+    "buffer": {
+        "d_submodule": 3584,
+        "io": "out",
+        "n_ctxs": 122,
+        "ctx_len": 2048,
+        "refresh_batch_size": 4,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}