AwesomeInterpretability commited on Aug 28, 2025

Commit

0ed8675

verified ·

1 Parent(s): 647db18

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_0/ae.pt +3 -0
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_0/config.json +29 -0
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_0/eval_results.json +1 -0
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_1/ae.pt +3 -0
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_1/config.json +29 -0
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_1/eval_results.json +1 -0
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_10/ae.pt +3 -0
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_10/config.json +31 -0
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_10/eval_results.json +1 -0
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_11/ae.pt +3 -0
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_11/config.json +31 -0
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_11/eval_results.json +1 -0
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_12/ae.pt +3 -0
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_12/config.json +32 -0
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_12/eval_results.json +1 -0
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_13/ae.pt +3 -0
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_13/config.json +32 -0
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_13/eval_results.json +1 -0
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_14/ae.pt +3 -0
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_14/config.json +32 -0
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_14/eval_results.json +1 -0
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_15/ae.pt +3 -0
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_15/config.json +32 -0
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_15/eval_results.json +1 -0
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_16/ae.pt +3 -0
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_16/config.json +32 -0
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_16/eval_results.json +1 -0
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_17/ae.pt +3 -0
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_17/config.json +32 -0
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_17/eval_results.json +1 -0
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_2/ae.pt +3 -0
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_2/config.json +29 -0
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_2/eval_results.json +1 -0
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_3/ae.pt +3 -0
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_3/config.json +29 -0
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_3/eval_results.json +1 -0
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_4/ae.pt +3 -0
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_4/config.json +29 -0
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_4/eval_results.json +1 -0
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_5/ae.pt +3 -0
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_5/config.json +29 -0
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_5/eval_results.json +1 -0
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_6/ae.pt +3 -0
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_6/config.json +31 -0
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_6/eval_results.json +1 -0
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_7/ae.pt +3 -0
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_7/config.json +31 -0
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_7/eval_results.json +1 -0
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_8/ae.pt +3 -0
saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_8/config.json +31 -0

saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_0/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:98a7218b51e24617e6f77dcf389d76123924cca494f2b4d523f3510aa6ac6bcb
+size 469843624

saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_0/config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+    "trainer": {
+        "dict_class": "AutoEncoder",
+        "trainer_class": "StandardTrainerAprilUpdate",
+        "activation_dim": 3584,
+        "dict_size": 16384,
+        "lr": 0.0003,
+        "l1_penalty": 0.012,
+        "warmup_steps": 1000,
+        "sparsity_warmup_steps": 5000,
+        "steps": 244140,
+        "decay_start": 195312,
+        "seed": 3407,
+        "device": "cuda:0",
+        "layer": 31,
+        "lm_name": "google/gemma-2-9b",
+        "wandb_name": "StandardTrainerNew-google/gemma-2-9b-resid_post_layer_31_trainer_0",
+        "submodule_name": "resid_post_layer_31"
+    },
+    "buffer": {
+        "d_submodule": 3584,
+        "io": "out",
+        "n_ctxs": 122,
+        "ctx_len": 2048,
+        "refresh_batch_size": 4,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_0/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"l2_loss": 170.82, "l1_loss": 1930.28, "l0": 583.6903955078125, "frac_variance_explained": 0.7142578125, "cossim": 0.89908203125, "l2_ratio": 0.88638671875, "relative_reconstruction_bias": 0.989296875, "frac_alive": 0.74481201171875, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}

saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_1/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:157841a44ff8e2e23de041bb650b5f50f988da7ded3f1ef6006585cd0b9d55c8
+size 469843624

saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_1/config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+    "trainer": {
+        "dict_class": "AutoEncoder",
+        "trainer_class": "StandardTrainerAprilUpdate",
+        "activation_dim": 3584,
+        "dict_size": 16384,
+        "lr": 0.0003,
+        "l1_penalty": 0.015,
+        "warmup_steps": 1000,
+        "sparsity_warmup_steps": 5000,
+        "steps": 244140,
+        "decay_start": 195312,
+        "seed": 3407,
+        "device": "cuda:0",
+        "layer": 31,
+        "lm_name": "google/gemma-2-9b",
+        "wandb_name": "StandardTrainerNew-google/gemma-2-9b-resid_post_layer_31_trainer_1",
+        "submodule_name": "resid_post_layer_31"
+    },
+    "buffer": {
+        "d_submodule": 3584,
+        "io": "out",
+        "n_ctxs": 122,
+        "ctx_len": 2048,
+        "refresh_batch_size": 4,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_1/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"l2_loss": 178.87, "l1_loss": 1581.2, "l0": 416.1990014648438, "frac_variance_explained": 0.6869921875, "cossim": 0.8888671875, "l2_ratio": 0.8763671875, "relative_reconstruction_bias": 0.98818359375, "frac_alive": 0.74310302734375, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}

saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_10/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:91ddfd838ff20aa0ccac54d003e582b984e2b0fccbfc0a641f900247fb348c3a
+size 469843990

saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_10/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+    "trainer": {
+        "trainer_class": "TopKTrainer",
+        "dict_class": "AutoEncoderTopK",
+        "lr": 0.0003,
+        "steps": 244140,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 195312,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "seed": 3407,
+        "activation_dim": 3584,
+        "dict_size": 16384,
+        "k": 520,
+        "device": "cuda:0",
+        "layer": 31,
+        "lm_name": "google/gemma-2-9b",
+        "wandb_name": "TopKTrainer-google/gemma-2-9b-resid_post_layer_31_trainer_10",
+        "submodule_name": "resid_post_layer_31"
+    },
+    "buffer": {
+        "d_submodule": 3584,
+        "io": "out",
+        "n_ctxs": 122,
+        "ctx_len": 2048,
+        "refresh_batch_size": 4,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_10/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"l2_loss": 141.775, "l1_loss": 5995.04, "l0": 520.0, "frac_variance_explained": 0.798984375, "cossim": 0.9305078125, "l2_ratio": 0.93306640625, "relative_reconstruction_bias": 1.0034765625, "frac_alive": 0.98553466796875, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}

saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_11/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dcf0596070383f3d071e666fa2b0be0f1283783774443101515c57b98b296298
+size 469843990

saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_11/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+    "trainer": {
+        "trainer_class": "TopKTrainer",
+        "dict_class": "AutoEncoderTopK",
+        "lr": 0.0003,
+        "steps": 244140,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 195312,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "seed": 3407,
+        "activation_dim": 3584,
+        "dict_size": 16384,
+        "k": 820,
+        "device": "cuda:0",
+        "layer": 31,
+        "lm_name": "google/gemma-2-9b",
+        "wandb_name": "TopKTrainer-google/gemma-2-9b-resid_post_layer_31_trainer_11",
+        "submodule_name": "resid_post_layer_31"
+    },
+    "buffer": {
+        "d_submodule": 3584,
+        "io": "out",
+        "n_ctxs": 122,
+        "ctx_len": 2048,
+        "refresh_batch_size": 4,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_11/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"l2_loss": 127.6625, "l1_loss": 8207.52, "l0": 819.96537109375, "frac_variance_explained": 0.83650390625, "cossim": 0.944140625, "l2_ratio": 0.94552734375, "relative_reconstruction_bias": 1.003046875, "frac_alive": 0.9775390625, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}

saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_12/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:063aaf424f9a8a9c12a748b5bd8fabb5b5d8ddaa6d3d761ccb1a62f524e6c777
+size 469843990

saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_12/config.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+    "trainer": {
+        "trainer_class": "BatchTopKTrainer",
+        "dict_class": "BatchTopKSAE",
+        "lr": 0.0003,
+        "steps": 244140,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 195312,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "top_k_aux": 1792,
+        "seed": 3407,
+        "activation_dim": 3584,
+        "dict_size": 16384,
+        "k": 50,
+        "device": "cuda:0",
+        "layer": 31,
+        "lm_name": "google/gemma-2-9b",
+        "wandb_name": "BatchTopKTrainer-google/gemma-2-9b-resid_post_layer_31_trainer_12",
+        "submodule_name": "resid_post_layer_31"
+    },
+    "buffer": {
+        "d_submodule": 3584,
+        "io": "out",
+        "n_ctxs": 122,
+        "ctx_len": 2048,
+        "refresh_batch_size": 4,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_12/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"l2_loss": 193.81, "l1_loss": 1294.76, "l0": 64.83081787109376, "frac_variance_explained": 0.636875, "cossim": 0.86908203125, "l2_ratio": 0.880234375, "relative_reconstruction_bias": 1.0128515625, "frac_alive": 0.80010986328125, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}

saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_13/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3c17443a8052cae19fcab904cc3179ab910280b23710e1adea9e1b2494959158
+size 469843990

saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_13/config.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+    "trainer": {
+        "trainer_class": "BatchTopKTrainer",
+        "dict_class": "BatchTopKSAE",
+        "lr": 0.0003,
+        "steps": 244140,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 195312,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "top_k_aux": 1792,
+        "seed": 3407,
+        "activation_dim": 3584,
+        "dict_size": 16384,
+        "k": 80,
+        "device": "cuda:0",
+        "layer": 31,
+        "lm_name": "google/gemma-2-9b",
+        "wandb_name": "BatchTopKTrainer-google/gemma-2-9b-resid_post_layer_31_trainer_13",
+        "submodule_name": "resid_post_layer_31"
+    },
+    "buffer": {
+        "d_submodule": 3584,
+        "io": "out",
+        "n_ctxs": 122,
+        "ctx_len": 2048,
+        "refresh_batch_size": 4,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_13/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"l2_loss": 182.97, "l1_loss": 1810.48, "l0": 109.14227294921875, "frac_variance_explained": 0.6745703125, "cossim": 0.88337890625, "l2_ratio": 0.8923828125, "relative_reconstruction_bias": 1.010859375, "frac_alive": 0.8055419921875, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}

saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_14/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6bb2a5e191e5de65648bc66e7c4d9e6ecc745b59301d4bf1518c2576a0098e25
+size 469843990

saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_14/config.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+    "trainer": {
+        "trainer_class": "BatchTopKTrainer",
+        "dict_class": "BatchTopKSAE",
+        "lr": 0.0003,
+        "steps": 244140,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 195312,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "top_k_aux": 1792,
+        "seed": 3407,
+        "activation_dim": 3584,
+        "dict_size": 16384,
+        "k": 160,
+        "device": "cuda:0",
+        "layer": 31,
+        "lm_name": "google/gemma-2-9b",
+        "wandb_name": "BatchTopKTrainer-google/gemma-2-9b-resid_post_layer_31_trainer_14",
+        "submodule_name": "resid_post_layer_31"
+    },
+    "buffer": {
+        "d_submodule": 3584,
+        "io": "out",
+        "n_ctxs": 122,
+        "ctx_len": 2048,
+        "refresh_batch_size": 4,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_14/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"l2_loss": 165.44, "l1_loss": 3043.84, "l0": 231.51020263671876, "frac_variance_explained": 0.731875, "cossim": 0.905234375, "l2_ratio": 0.91205078125, "relative_reconstruction_bias": 1.0090625, "frac_alive": 0.87921142578125, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}

saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_15/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7f72ffad12c39db2c87a1bd08cc4c30c6bb1e8866071cad6ddd164df1a4ac177
+size 469843990

saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_15/config.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+    "trainer": {
+        "trainer_class": "BatchTopKTrainer",
+        "dict_class": "BatchTopKSAE",
+        "lr": 0.0003,
+        "steps": 244140,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 195312,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "top_k_aux": 1792,
+        "seed": 3407,
+        "activation_dim": 3584,
+        "dict_size": 16384,
+        "k": 320,
+        "device": "cuda:0",
+        "layer": 31,
+        "lm_name": "google/gemma-2-9b",
+        "wandb_name": "BatchTopKTrainer-google/gemma-2-9b-resid_post_layer_31_trainer_15",
+        "submodule_name": "resid_post_layer_31"
+    },
+    "buffer": {
+        "d_submodule": 3584,
+        "io": "out",
+        "n_ctxs": 122,
+        "ctx_len": 2048,
+        "refresh_batch_size": 4,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_15/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"l2_loss": 146.4, "l1_loss": 5059.04, "l0": 488.029560546875, "frac_variance_explained": 0.7890625, "cossim": 0.92640625, "l2_ratio": 0.9309375, "relative_reconstruction_bias": 1.0066796875, "frac_alive": 0.90704345703125, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}

saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_16/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2de842e70901f5df27a3f6ce261254274cdecf174afcbb1eb0f9d89a694529b1
+size 469843990

saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_16/config.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+    "trainer": {
+        "trainer_class": "BatchTopKTrainer",
+        "dict_class": "BatchTopKSAE",
+        "lr": 0.0003,
+        "steps": 244140,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 195312,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "top_k_aux": 1792,
+        "seed": 3407,
+        "activation_dim": 3584,
+        "dict_size": 16384,
+        "k": 520,
+        "device": "cuda:0",
+        "layer": 31,
+        "lm_name": "google/gemma-2-9b",
+        "wandb_name": "BatchTopKTrainer-google/gemma-2-9b-resid_post_layer_31_trainer_16",
+        "submodule_name": "resid_post_layer_31"
+    },
+    "buffer": {
+        "d_submodule": 3584,
+        "io": "out",
+        "n_ctxs": 122,
+        "ctx_len": 2048,
+        "refresh_batch_size": 4,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_16/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"l2_loss": 131.415, "l1_loss": 7060.32, "l0": 786.8075756835938, "frac_variance_explained": 0.830390625, "cossim": 0.94095703125, "l2_ratio": 0.9434765625, "relative_reconstruction_bias": 1.0042578125, "frac_alive": 0.9190673828125, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}

saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_17/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:59f25bb554235264ec86315cde39858a286435e701e546c85479e543742a1315
+size 469843990

saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_17/config.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+    "trainer": {
+        "trainer_class": "BatchTopKTrainer",
+        "dict_class": "BatchTopKSAE",
+        "lr": 0.0003,
+        "steps": 244140,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 195312,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "top_k_aux": 1792,
+        "seed": 3407,
+        "activation_dim": 3584,
+        "dict_size": 16384,
+        "k": 820,
+        "device": "cuda:0",
+        "layer": 31,
+        "lm_name": "google/gemma-2-9b",
+        "wandb_name": "BatchTopKTrainer-google/gemma-2-9b-resid_post_layer_31_trainer_17",
+        "submodule_name": "resid_post_layer_31"
+    },
+    "buffer": {
+        "d_submodule": 3584,
+        "io": "out",
+        "n_ctxs": 122,
+        "ctx_len": 2048,
+        "refresh_batch_size": 4,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_17/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"l2_loss": 114.0575, "l1_loss": 10167.36, "l0": 1232.5474438476563, "frac_variance_explained": 0.87095703125, "cossim": 0.9550390625, "l2_ratio": 0.95412109375, "relative_reconstruction_bias": 1.0001953125, "frac_alive": 0.877197265625, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}

saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_2/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1c239f541af061dac2d5ba272cbc7088fb462a775f4df194671b1a6deb168ed8
+size 469843624

saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_2/config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+    "trainer": {
+        "dict_class": "AutoEncoder",
+        "trainer_class": "StandardTrainerAprilUpdate",
+        "activation_dim": 3584,
+        "dict_size": 16384,
+        "lr": 0.0003,
+        "l1_penalty": 0.02,
+        "warmup_steps": 1000,
+        "sparsity_warmup_steps": 5000,
+        "steps": 244140,
+        "decay_start": 195312,
+        "seed": 3407,
+        "device": "cuda:0",
+        "layer": 31,
+        "lm_name": "google/gemma-2-9b",
+        "wandb_name": "StandardTrainerNew-google/gemma-2-9b-resid_post_layer_31_trainer_2",
+        "submodule_name": "resid_post_layer_31"
+    },
+    "buffer": {
+        "d_submodule": 3584,
+        "io": "out",
+        "n_ctxs": 122,
+        "ctx_len": 2048,
+        "refresh_batch_size": 4,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_2/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"l2_loss": 188.35, "l1_loss": 1242.88, "l0": 272.23369873046875, "frac_variance_explained": 0.65490234375, "cossim": 0.8765625, "l2_ratio": 0.86462890625, "relative_reconstruction_bias": 0.987734375, "frac_alive": 0.7396240234375, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}

saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_3/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:37420d8aa8c835623db9886f7440d6597bfc95c63eec52464079e1a5575f8d2d
+size 469843624

saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_3/config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+    "trainer": {
+        "dict_class": "AutoEncoder",
+        "trainer_class": "StandardTrainerAprilUpdate",
+        "activation_dim": 3584,
+        "dict_size": 16384,
+        "lr": 0.0003,
+        "l1_penalty": 0.03,
+        "warmup_steps": 1000,
+        "sparsity_warmup_steps": 5000,
+        "steps": 244140,
+        "decay_start": 195312,
+        "seed": 3407,
+        "device": "cuda:0",
+        "layer": 31,
+        "lm_name": "google/gemma-2-9b",
+        "wandb_name": "StandardTrainerNew-google/gemma-2-9b-resid_post_layer_31_trainer_3",
+        "submodule_name": "resid_post_layer_31"
+    },
+    "buffer": {
+        "d_submodule": 3584,
+        "io": "out",
+        "n_ctxs": 122,
+        "ctx_len": 2048,
+        "refresh_batch_size": 4,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_3/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"l2_loss": 200.405, "l1_loss": 910.36, "l0": 153.67116943359375, "frac_variance_explained": 0.61333984375, "cossim": 0.86, "l2_ratio": 0.84359375, "relative_reconstruction_bias": 0.98267578125, "frac_alive": 0.73577880859375, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}

saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_4/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3d5e105012c4222bd3a9901873e949b5469ee8e996899df4f882c1b0a6d1916f
+size 469843624

saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_4/config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+    "trainer": {
+        "dict_class": "AutoEncoder",
+        "trainer_class": "StandardTrainerAprilUpdate",
+        "activation_dim": 3584,
+        "dict_size": 16384,
+        "lr": 0.0003,
+        "l1_penalty": 0.04,
+        "warmup_steps": 1000,
+        "sparsity_warmup_steps": 5000,
+        "steps": 244140,
+        "decay_start": 195312,
+        "seed": 3407,
+        "device": "cuda:0",
+        "layer": 31,
+        "lm_name": "google/gemma-2-9b",
+        "wandb_name": "StandardTrainerNew-google/gemma-2-9b-resid_post_layer_31_trainer_4",
+        "submodule_name": "resid_post_layer_31"
+    },
+    "buffer": {
+        "d_submodule": 3584,
+        "io": "out",
+        "n_ctxs": 122,
+        "ctx_len": 2048,
+        "refresh_batch_size": 4,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_4/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"l2_loss": 208.625, "l1_loss": 737.86, "l0": 103.07125, "frac_variance_explained": 0.58234375, "cossim": 0.84818359375, "l2_ratio": 0.83033203125, "relative_reconstruction_bias": 0.97833984375, "frac_alive": 0.73297119140625, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}

saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_5/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2a4cd5233ab7740c7f6a017bb39362bd92ced07c870cee7fcdd1a0fdbd9ed2e7
+size 469843624

saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_5/config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+    "trainer": {
+        "dict_class": "AutoEncoder",
+        "trainer_class": "StandardTrainerAprilUpdate",
+        "activation_dim": 3584,
+        "dict_size": 16384,
+        "lr": 0.0003,
+        "l1_penalty": 0.06,
+        "warmup_steps": 1000,
+        "sparsity_warmup_steps": 5000,
+        "steps": 244140,
+        "decay_start": 195312,
+        "seed": 3407,
+        "device": "cuda:0",
+        "layer": 31,
+        "lm_name": "google/gemma-2-9b",
+        "wandb_name": "StandardTrainerNew-google/gemma-2-9b-resid_post_layer_31_trainer_5",
+        "submodule_name": "resid_post_layer_31"
+    },
+    "buffer": {
+        "d_submodule": 3584,
+        "io": "out",
+        "n_ctxs": 122,
+        "ctx_len": 2048,
+        "refresh_batch_size": 4,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_5/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"l2_loss": 221.23, "l1_loss": 536.56, "l0": 56.86413818359375, "frac_variance_explained": 0.53671875, "cossim": 0.82921875, "l2_ratio": 0.80978515625, "relative_reconstruction_bias": 0.97203125, "frac_alive": 0.728759765625, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}

saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_6/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aa974e2a849fb648664a1012cfaa639ed5b1010eaf4f31c775319a1136d776b5
+size 469843990

saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_6/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+    "trainer": {
+        "trainer_class": "TopKTrainer",
+        "dict_class": "AutoEncoderTopK",
+        "lr": 0.0003,
+        "steps": 244140,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 195312,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "seed": 3407,
+        "activation_dim": 3584,
+        "dict_size": 16384,
+        "k": 50,
+        "device": "cuda:0",
+        "layer": 31,
+        "lm_name": "google/gemma-2-9b",
+        "wandb_name": "TopKTrainer-google/gemma-2-9b-resid_post_layer_31_trainer_6",
+        "submodule_name": "resid_post_layer_31"
+    },
+    "buffer": {
+        "d_submodule": 3584,
+        "io": "out",
+        "n_ctxs": 122,
+        "ctx_len": 2048,
+        "refresh_batch_size": 4,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_6/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"l2_loss": 197.0, "l1_loss": 1147.16, "l0": 50.0, "frac_variance_explained": 0.62369140625, "cossim": 0.86560546875, "l2_ratio": 0.8712109375, "relative_reconstruction_bias": 1.00546875, "frac_alive": 0.83428955078125, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}

saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_7/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:21fa5cb38b1e74960e40b58eb417ce868b7c65111244523bda3b901d3c2ba371
+size 469843990

saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_7/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+    "trainer": {
+        "trainer_class": "TopKTrainer",
+        "dict_class": "AutoEncoderTopK",
+        "lr": 0.0003,
+        "steps": 244140,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 195312,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "seed": 3407,
+        "activation_dim": 3584,
+        "dict_size": 16384,
+        "k": 80,
+        "device": "cuda:0",
+        "layer": 31,
+        "lm_name": "google/gemma-2-9b",
+        "wandb_name": "TopKTrainer-google/gemma-2-9b-resid_post_layer_31_trainer_7",
+        "submodule_name": "resid_post_layer_31"
+    },
+    "buffer": {
+        "d_submodule": 3584,
+        "io": "out",
+        "n_ctxs": 122,
+        "ctx_len": 2048,
+        "refresh_batch_size": 4,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_7/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"l2_loss": 186.14, "l1_loss": 1509.64, "l0": 80.0, "frac_variance_explained": 0.6618359375, "cossim": 0.8799609375, "l2_ratio": 0.88447265625, "relative_reconstruction_bias": 1.0038671875, "frac_alive": 0.912109375, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}

saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_8/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:596fd8b134be06f70b707b8bad38732dca76fcf12856d505b088c0d0f617fdb2
+size 469843990

saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_8/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+    "trainer": {
+        "trainer_class": "TopKTrainer",
+        "dict_class": "AutoEncoderTopK",
+        "lr": 0.0003,
+        "steps": 244140,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 195312,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "seed": 3407,
+        "activation_dim": 3584,
+        "dict_size": 16384,
+        "k": 160,
+        "device": "cuda:0",
+        "layer": 31,
+        "lm_name": "google/gemma-2-9b",
+        "wandb_name": "TopKTrainer-google/gemma-2-9b-resid_post_layer_31_trainer_8",
+        "submodule_name": "resid_post_layer_31"
+    },
+    "buffer": {
+        "d_submodule": 3584,
+        "io": "out",
+        "n_ctxs": 122,
+        "ctx_len": 2048,
+        "refresh_batch_size": 4,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}