diff --git a/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_0/ae.pt b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..a63dd72844352f728006abd3b497571f410eae28 --- /dev/null +++ b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98a7218b51e24617e6f77dcf389d76123924cca494f2b4d523f3510aa6ac6bcb +size 469843624 diff --git a/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_0/config.json b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d2feaed6bc34f155902ba29e16e84754d562a9d3 --- /dev/null +++ b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_0/config.json @@ -0,0 +1,29 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainerAprilUpdate", + "activation_dim": 3584, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.012, + "warmup_steps": 1000, + "sparsity_warmup_steps": 5000, + "steps": 244140, + "decay_start": 195312, + "seed": 3407, + "device": "cuda:0", + "layer": 31, + "lm_name": "google/gemma-2-9b", + "wandb_name": "StandardTrainerNew-google/gemma-2-9b-resid_post_layer_31_trainer_0", + "submodule_name": "resid_post_layer_31" + }, + "buffer": { + "d_submodule": 3584, + "io": "out", + "n_ctxs": 122, + "ctx_len": 2048, + "refresh_batch_size": 4, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_0/eval_results.json b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..79b668bbd71aff2c03a408d125bc5fb18a1fbefb --- /dev/null +++ b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 170.82, "l1_loss": 1930.28, "l0": 583.6903955078125, "frac_variance_explained": 0.7142578125, "cossim": 0.89908203125, "l2_ratio": 0.88638671875, "relative_reconstruction_bias": 0.989296875, "frac_alive": 0.74481201171875, "hyperparameters": {"n_inputs": 200, "context_length": 2048}} \ No newline at end of file diff --git a/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_1/ae.pt b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_1/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..59fa113f0fdc2a256be4bb4691a44598ecf5e133 --- /dev/null +++ b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_1/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:157841a44ff8e2e23de041bb650b5f50f988da7ded3f1ef6006585cd0b9d55c8 +size 469843624 diff --git a/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_1/config.json b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_1/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1e53a7b3136033a0f239f1b695b5c0bc275f4283 --- /dev/null +++ b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_1/config.json @@ -0,0 +1,29 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainerAprilUpdate", + "activation_dim": 3584, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.015, + "warmup_steps": 1000, + "sparsity_warmup_steps": 5000, + "steps": 244140, + "decay_start": 195312, + "seed": 3407, + "device": "cuda:0", + "layer": 31, + "lm_name": "google/gemma-2-9b", + "wandb_name": "StandardTrainerNew-google/gemma-2-9b-resid_post_layer_31_trainer_1", + "submodule_name": "resid_post_layer_31" + }, + "buffer": { + "d_submodule": 3584, + "io": "out", + "n_ctxs": 122, + "ctx_len": 2048, + "refresh_batch_size": 4, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_1/eval_results.json b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_1/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c41cdb4b2d3148e153e0533a524c52c95fda4089 --- /dev/null +++ b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_1/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 178.87, "l1_loss": 1581.2, "l0": 416.1990014648438, "frac_variance_explained": 0.6869921875, "cossim": 0.8888671875, "l2_ratio": 0.8763671875, "relative_reconstruction_bias": 0.98818359375, "frac_alive": 0.74310302734375, "hyperparameters": {"n_inputs": 200, "context_length": 2048}} \ No newline at end of file diff --git a/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_10/ae.pt b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_10/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..6bf9998a5266327be0e06dda495eea65700e26fb --- /dev/null +++ b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_10/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91ddfd838ff20aa0ccac54d003e582b984e2b0fccbfc0a641f900247fb348c3a +size 469843990 diff --git a/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_10/config.json b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_10/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ede5b6dddd125b0f85d088c49c147dfd7479b413 --- /dev/null +++ b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_10/config.json @@ -0,0 +1,31 @@ +{ + "trainer": { + "trainer_class": "TopKTrainer", + "dict_class": "AutoEncoderTopK", + "lr": 0.0003, + "steps": 244140, + "auxk_alpha": 0.03125, + "warmup_steps": 1000, + "decay_start": 195312, + "threshold_beta": 0.999, + "threshold_start_step": 1000, + "seed": 3407, + "activation_dim": 3584, + "dict_size": 16384, + "k": 520, + "device": "cuda:0", + "layer": 31, + "lm_name": "google/gemma-2-9b", + "wandb_name": "TopKTrainer-google/gemma-2-9b-resid_post_layer_31_trainer_10", + "submodule_name": "resid_post_layer_31" + }, + "buffer": { + "d_submodule": 3584, + "io": "out", + "n_ctxs": 122, + "ctx_len": 2048, + "refresh_batch_size": 4, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_10/eval_results.json b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_10/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..842293a877ed4eb834a49aad17aba8675671df72 --- /dev/null +++ b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_10/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 141.775, "l1_loss": 5995.04, "l0": 520.0, "frac_variance_explained": 0.798984375, "cossim": 0.9305078125, "l2_ratio": 0.93306640625, "relative_reconstruction_bias": 1.0034765625, "frac_alive": 0.98553466796875, "hyperparameters": {"n_inputs": 200, "context_length": 2048}} \ No newline at end of file diff --git a/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_11/ae.pt b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_11/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..76dcbd6b46ce99c584549b39ea5759afc8754bcf --- /dev/null +++ b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_11/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcf0596070383f3d071e666fa2b0be0f1283783774443101515c57b98b296298 +size 469843990 diff --git a/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_11/config.json b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_11/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f77e98593a1b5d86f025335addd8da2ee8e9dff9 --- /dev/null +++ b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_11/config.json @@ -0,0 +1,31 @@ +{ + "trainer": { + "trainer_class": "TopKTrainer", + "dict_class": "AutoEncoderTopK", + "lr": 0.0003, + "steps": 244140, + "auxk_alpha": 0.03125, + "warmup_steps": 1000, + "decay_start": 195312, + "threshold_beta": 0.999, + "threshold_start_step": 1000, + "seed": 3407, + "activation_dim": 3584, + "dict_size": 16384, + "k": 820, + "device": "cuda:0", + "layer": 31, + "lm_name": "google/gemma-2-9b", + "wandb_name": "TopKTrainer-google/gemma-2-9b-resid_post_layer_31_trainer_11", + "submodule_name": "resid_post_layer_31" + }, + "buffer": { + "d_submodule": 3584, + "io": "out", + "n_ctxs": 122, + "ctx_len": 2048, + "refresh_batch_size": 4, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_11/eval_results.json b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_11/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..171b53509f1f261794a43ba64583bce2e56b8ff4 --- /dev/null +++ b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_11/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 127.6625, "l1_loss": 8207.52, "l0": 819.96537109375, "frac_variance_explained": 0.83650390625, "cossim": 0.944140625, "l2_ratio": 0.94552734375, "relative_reconstruction_bias": 1.003046875, "frac_alive": 0.9775390625, "hyperparameters": {"n_inputs": 200, "context_length": 2048}} \ No newline at end of file diff --git a/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_12/ae.pt b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_12/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..27937b21d281014de3eac6efd51ac6772ca6b62f --- /dev/null +++ b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_12/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:063aaf424f9a8a9c12a748b5bd8fabb5b5d8ddaa6d3d761ccb1a62f524e6c777 +size 469843990 diff --git a/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_12/config.json b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_12/config.json new file mode 100644 index 0000000000000000000000000000000000000000..46f38d227ce4031844c6c1f0297b98f821ba94de --- /dev/null +++ b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_12/config.json @@ -0,0 +1,32 @@ +{ + "trainer": { + "trainer_class": "BatchTopKTrainer", + "dict_class": "BatchTopKSAE", + "lr": 0.0003, + "steps": 244140, + "auxk_alpha": 0.03125, + "warmup_steps": 1000, + "decay_start": 195312, + "threshold_beta": 0.999, + "threshold_start_step": 1000, + "top_k_aux": 1792, + "seed": 3407, + "activation_dim": 3584, + "dict_size": 16384, + "k": 50, + "device": "cuda:0", + "layer": 31, + "lm_name": "google/gemma-2-9b", + "wandb_name": "BatchTopKTrainer-google/gemma-2-9b-resid_post_layer_31_trainer_12", + "submodule_name": "resid_post_layer_31" + }, + "buffer": { + "d_submodule": 3584, + "io": "out", + "n_ctxs": 122, + "ctx_len": 2048, + "refresh_batch_size": 4, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_12/eval_results.json b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_12/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..e84eca9b8847143b4f131a1914a79c232fc192a4 --- /dev/null +++ b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_12/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 193.81, "l1_loss": 1294.76, "l0": 64.83081787109376, "frac_variance_explained": 0.636875, "cossim": 0.86908203125, "l2_ratio": 0.880234375, "relative_reconstruction_bias": 1.0128515625, "frac_alive": 0.80010986328125, "hyperparameters": {"n_inputs": 200, "context_length": 2048}} \ No newline at end of file diff --git a/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_13/ae.pt b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_13/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..24966697a7b4caeb902e3d8759e6c73f3346b220 --- /dev/null +++ b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_13/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c17443a8052cae19fcab904cc3179ab910280b23710e1adea9e1b2494959158 +size 469843990 diff --git a/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_13/config.json b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_13/config.json new file mode 100644 index 0000000000000000000000000000000000000000..09ad977bdf896322c49ff96f645567c615d55d5f --- /dev/null +++ b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_13/config.json @@ -0,0 +1,32 @@ +{ + "trainer": { + "trainer_class": "BatchTopKTrainer", + "dict_class": "BatchTopKSAE", + "lr": 0.0003, + "steps": 244140, + "auxk_alpha": 0.03125, + "warmup_steps": 1000, + "decay_start": 195312, + "threshold_beta": 0.999, + "threshold_start_step": 1000, + "top_k_aux": 1792, + "seed": 3407, + "activation_dim": 3584, + "dict_size": 16384, + "k": 80, + "device": "cuda:0", + "layer": 31, + "lm_name": "google/gemma-2-9b", + "wandb_name": "BatchTopKTrainer-google/gemma-2-9b-resid_post_layer_31_trainer_13", + "submodule_name": "resid_post_layer_31" + }, + "buffer": { + "d_submodule": 3584, + "io": "out", + "n_ctxs": 122, + "ctx_len": 2048, + "refresh_batch_size": 4, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_13/eval_results.json b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_13/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..eee86549399178fbd05810959ab94051d48f9b5b --- /dev/null +++ b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_13/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 182.97, "l1_loss": 1810.48, "l0": 109.14227294921875, "frac_variance_explained": 0.6745703125, "cossim": 0.88337890625, "l2_ratio": 0.8923828125, "relative_reconstruction_bias": 1.010859375, "frac_alive": 0.8055419921875, "hyperparameters": {"n_inputs": 200, "context_length": 2048}} \ No newline at end of file diff --git a/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_14/ae.pt b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_14/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..3bc33dd005d191a0985ec419c1783ce4601f4b47 --- /dev/null +++ b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_14/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bb2a5e191e5de65648bc66e7c4d9e6ecc745b59301d4bf1518c2576a0098e25 +size 469843990 diff --git a/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_14/config.json b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_14/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a5f146aca2de338b2c7b4ef591a6e438a8244276 --- /dev/null +++ b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_14/config.json @@ -0,0 +1,32 @@ +{ + "trainer": { + "trainer_class": "BatchTopKTrainer", + "dict_class": "BatchTopKSAE", + "lr": 0.0003, + "steps": 244140, + "auxk_alpha": 0.03125, + "warmup_steps": 1000, + "decay_start": 195312, + "threshold_beta": 0.999, + "threshold_start_step": 1000, + "top_k_aux": 1792, + "seed": 3407, + "activation_dim": 3584, + "dict_size": 16384, + "k": 160, + "device": "cuda:0", + "layer": 31, + "lm_name": "google/gemma-2-9b", + "wandb_name": "BatchTopKTrainer-google/gemma-2-9b-resid_post_layer_31_trainer_14", + "submodule_name": "resid_post_layer_31" + }, + "buffer": { + "d_submodule": 3584, + "io": "out", + "n_ctxs": 122, + "ctx_len": 2048, + "refresh_batch_size": 4, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_14/eval_results.json b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_14/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..efbd89a461994430a73387b33cc19dda18c0a566 --- /dev/null +++ b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_14/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 165.44, "l1_loss": 3043.84, "l0": 231.51020263671876, "frac_variance_explained": 0.731875, "cossim": 0.905234375, "l2_ratio": 0.91205078125, "relative_reconstruction_bias": 1.0090625, "frac_alive": 0.87921142578125, "hyperparameters": {"n_inputs": 200, "context_length": 2048}} \ No newline at end of file diff --git a/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_15/ae.pt b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_15/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..4dafdb9a937b7b19417bd43799dd637c66f04754 --- /dev/null +++ b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_15/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f72ffad12c39db2c87a1bd08cc4c30c6bb1e8866071cad6ddd164df1a4ac177 +size 469843990 diff --git a/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_15/config.json b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_15/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ce3ead59b8863eba05bcb887ee56b648a8e53296 --- /dev/null +++ b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_15/config.json @@ -0,0 +1,32 @@ +{ + "trainer": { + "trainer_class": "BatchTopKTrainer", + "dict_class": "BatchTopKSAE", + "lr": 0.0003, + "steps": 244140, + "auxk_alpha": 0.03125, + "warmup_steps": 1000, + "decay_start": 195312, + "threshold_beta": 0.999, + "threshold_start_step": 1000, + "top_k_aux": 1792, + "seed": 3407, + "activation_dim": 3584, + "dict_size": 16384, + "k": 320, + "device": "cuda:0", + "layer": 31, + "lm_name": "google/gemma-2-9b", + "wandb_name": "BatchTopKTrainer-google/gemma-2-9b-resid_post_layer_31_trainer_15", + "submodule_name": "resid_post_layer_31" + }, + "buffer": { + "d_submodule": 3584, + "io": "out", + "n_ctxs": 122, + "ctx_len": 2048, + "refresh_batch_size": 4, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_15/eval_results.json b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_15/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..6466ca203a320c57476e876b36a29abda88e161f --- /dev/null +++ b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_15/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 146.4, "l1_loss": 5059.04, "l0": 488.029560546875, "frac_variance_explained": 0.7890625, "cossim": 0.92640625, "l2_ratio": 0.9309375, "relative_reconstruction_bias": 1.0066796875, "frac_alive": 0.90704345703125, "hyperparameters": {"n_inputs": 200, "context_length": 2048}} \ No newline at end of file diff --git a/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_16/ae.pt b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_16/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..cc97880df8fcc1eb7eca218b76da33879e63a7ef --- /dev/null +++ b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_16/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2de842e70901f5df27a3f6ce261254274cdecf174afcbb1eb0f9d89a694529b1 +size 469843990 diff --git a/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_16/config.json b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_16/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d555ba69f351a6532f2afbf9070b1d429e6abd9c --- /dev/null +++ b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_16/config.json @@ -0,0 +1,32 @@ +{ + "trainer": { + "trainer_class": "BatchTopKTrainer", + "dict_class": "BatchTopKSAE", + "lr": 0.0003, + "steps": 244140, + "auxk_alpha": 0.03125, + "warmup_steps": 1000, + "decay_start": 195312, + "threshold_beta": 0.999, + "threshold_start_step": 1000, + "top_k_aux": 1792, + "seed": 3407, + "activation_dim": 3584, + "dict_size": 16384, + "k": 520, + "device": "cuda:0", + "layer": 31, + "lm_name": "google/gemma-2-9b", + "wandb_name": "BatchTopKTrainer-google/gemma-2-9b-resid_post_layer_31_trainer_16", + "submodule_name": "resid_post_layer_31" + }, + "buffer": { + "d_submodule": 3584, + "io": "out", + "n_ctxs": 122, + "ctx_len": 2048, + "refresh_batch_size": 4, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_16/eval_results.json b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_16/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..faa5ba03c29d55d1ce239f4a455c42b72986383c --- /dev/null +++ b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_16/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 131.415, "l1_loss": 7060.32, "l0": 786.8075756835938, "frac_variance_explained": 0.830390625, "cossim": 0.94095703125, "l2_ratio": 0.9434765625, "relative_reconstruction_bias": 1.0042578125, "frac_alive": 0.9190673828125, "hyperparameters": {"n_inputs": 200, "context_length": 2048}} \ No newline at end of file diff --git a/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_17/ae.pt b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_17/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..cbded9673f64b7429566481888bec1adcf9fc612 --- /dev/null +++ b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_17/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59f25bb554235264ec86315cde39858a286435e701e546c85479e543742a1315 +size 469843990 diff --git a/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_17/config.json b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_17/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e88f3d4b3e100fae1864945385a760f6d81f0515 --- /dev/null +++ b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_17/config.json @@ -0,0 +1,32 @@ +{ + "trainer": { + "trainer_class": "BatchTopKTrainer", + "dict_class": "BatchTopKSAE", + "lr": 0.0003, + "steps": 244140, + "auxk_alpha": 0.03125, + "warmup_steps": 1000, + "decay_start": 195312, + "threshold_beta": 0.999, + "threshold_start_step": 1000, + "top_k_aux": 1792, + "seed": 3407, + "activation_dim": 3584, + "dict_size": 16384, + "k": 820, + "device": "cuda:0", + "layer": 31, + "lm_name": "google/gemma-2-9b", + "wandb_name": "BatchTopKTrainer-google/gemma-2-9b-resid_post_layer_31_trainer_17", + "submodule_name": "resid_post_layer_31" + }, + "buffer": { + "d_submodule": 3584, + "io": "out", + "n_ctxs": 122, + "ctx_len": 2048, + "refresh_batch_size": 4, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_17/eval_results.json b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_17/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..b7195a550889a6ba9f0ceb6b13efb421ab7fc8cd --- /dev/null +++ b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_17/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 114.0575, "l1_loss": 10167.36, "l0": 1232.5474438476563, "frac_variance_explained": 0.87095703125, "cossim": 0.9550390625, "l2_ratio": 0.95412109375, "relative_reconstruction_bias": 1.0001953125, "frac_alive": 0.877197265625, "hyperparameters": {"n_inputs": 200, "context_length": 2048}} \ No newline at end of file diff --git a/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_2/ae.pt b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_2/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..4283a8797275f68bca7ab5a4fd1e3b5ac3c50cc9 --- /dev/null +++ b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_2/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c239f541af061dac2d5ba272cbc7088fb462a775f4df194671b1a6deb168ed8 +size 469843624 diff --git a/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_2/config.json b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..83f88aa5b6b2f7e812e8a54e4dae22e45760d384 --- /dev/null +++ b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_2/config.json @@ -0,0 +1,29 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainerAprilUpdate", + "activation_dim": 3584, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.02, + "warmup_steps": 1000, + "sparsity_warmup_steps": 5000, + "steps": 244140, + "decay_start": 195312, + "seed": 3407, + "device": "cuda:0", + "layer": 31, + "lm_name": "google/gemma-2-9b", + "wandb_name": "StandardTrainerNew-google/gemma-2-9b-resid_post_layer_31_trainer_2", + "submodule_name": "resid_post_layer_31" + }, + "buffer": { + "d_submodule": 3584, + "io": "out", + "n_ctxs": 122, + "ctx_len": 2048, + "refresh_batch_size": 4, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_2/eval_results.json b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_2/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..b33aebcc4667d65c669c4fa479ccb5112c72c65d --- /dev/null +++ b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_2/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 188.35, "l1_loss": 1242.88, "l0": 272.23369873046875, "frac_variance_explained": 0.65490234375, "cossim": 0.8765625, "l2_ratio": 0.86462890625, "relative_reconstruction_bias": 0.987734375, "frac_alive": 0.7396240234375, "hyperparameters": {"n_inputs": 200, "context_length": 2048}} \ No newline at end of file diff --git a/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_3/ae.pt b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_3/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..03ae6868ea813d4cf9608e3255bf67f5a2411d28 --- /dev/null +++ b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_3/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37420d8aa8c835623db9886f7440d6597bfc95c63eec52464079e1a5575f8d2d +size 469843624 diff --git a/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_3/config.json b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_3/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2d09b8174396dc1e61a2ba35f874df8bcf570ce8 --- /dev/null +++ b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_3/config.json @@ -0,0 +1,29 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainerAprilUpdate", + "activation_dim": 3584, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.03, + "warmup_steps": 1000, + "sparsity_warmup_steps": 5000, + "steps": 244140, + "decay_start": 195312, + "seed": 3407, + "device": "cuda:0", + "layer": 31, + "lm_name": "google/gemma-2-9b", + "wandb_name": "StandardTrainerNew-google/gemma-2-9b-resid_post_layer_31_trainer_3", + "submodule_name": "resid_post_layer_31" + }, + "buffer": { + "d_submodule": 3584, + "io": "out", + "n_ctxs": 122, + "ctx_len": 2048, + "refresh_batch_size": 4, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_3/eval_results.json b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_3/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..20100f61ba4e86239860bcad973fd8345247c70f --- /dev/null +++ b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_3/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 200.405, "l1_loss": 910.36, "l0": 153.67116943359375, "frac_variance_explained": 0.61333984375, "cossim": 0.86, "l2_ratio": 0.84359375, "relative_reconstruction_bias": 0.98267578125, "frac_alive": 0.73577880859375, "hyperparameters": {"n_inputs": 200, "context_length": 2048}} \ No newline at end of file diff --git a/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_4/ae.pt b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_4/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..dc90ba22811b8d60ffb1efa42414d3f68ccbad67 --- /dev/null +++ b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_4/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d5e105012c4222bd3a9901873e949b5469ee8e996899df4f882c1b0a6d1916f +size 469843624 diff --git a/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_4/config.json b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_4/config.json new file mode 100644 index 0000000000000000000000000000000000000000..dd52cddbe1b8c972805c424c7c310a507146e3e6 --- /dev/null +++ b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_4/config.json @@ -0,0 +1,29 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainerAprilUpdate", + "activation_dim": 3584, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "sparsity_warmup_steps": 5000, + "steps": 244140, + "decay_start": 195312, + "seed": 3407, + "device": "cuda:0", + "layer": 31, + "lm_name": "google/gemma-2-9b", + "wandb_name": "StandardTrainerNew-google/gemma-2-9b-resid_post_layer_31_trainer_4", + "submodule_name": "resid_post_layer_31" + }, + "buffer": { + "d_submodule": 3584, + "io": "out", + "n_ctxs": 122, + "ctx_len": 2048, + "refresh_batch_size": 4, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_4/eval_results.json b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_4/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..1434413cbca14433e4cc3c0988d243d676acd940 --- /dev/null +++ b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_4/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 208.625, "l1_loss": 737.86, "l0": 103.07125, "frac_variance_explained": 0.58234375, "cossim": 0.84818359375, "l2_ratio": 0.83033203125, "relative_reconstruction_bias": 0.97833984375, "frac_alive": 0.73297119140625, "hyperparameters": {"n_inputs": 200, "context_length": 2048}} \ No newline at end of file diff --git a/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_5/ae.pt b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_5/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..0063f668d88c414de0e9206be63363d2012ff2f6 --- /dev/null +++ b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_5/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a4cd5233ab7740c7f6a017bb39362bd92ced07c870cee7fcdd1a0fdbd9ed2e7 +size 469843624 diff --git a/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_5/config.json b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4a51ff89c2100246ea1b9dc6b8bb1349290478cd --- /dev/null +++ b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_5/config.json @@ -0,0 +1,29 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainerAprilUpdate", + "activation_dim": 3584, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "sparsity_warmup_steps": 5000, + "steps": 244140, + "decay_start": 195312, + "seed": 3407, + "device": "cuda:0", + "layer": 31, + "lm_name": "google/gemma-2-9b", + "wandb_name": "StandardTrainerNew-google/gemma-2-9b-resid_post_layer_31_trainer_5", + "submodule_name": "resid_post_layer_31" + }, + "buffer": { + "d_submodule": 3584, + "io": "out", + "n_ctxs": 122, + "ctx_len": 2048, + "refresh_batch_size": 4, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_5/eval_results.json b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_5/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..cb5d832082ef103c2209a7c133baadb5af988d7d --- /dev/null +++ b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_5/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 221.23, "l1_loss": 536.56, "l0": 56.86413818359375, "frac_variance_explained": 0.53671875, "cossim": 0.82921875, "l2_ratio": 0.80978515625, "relative_reconstruction_bias": 0.97203125, "frac_alive": 0.728759765625, "hyperparameters": {"n_inputs": 200, "context_length": 2048}} \ No newline at end of file diff --git a/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_6/ae.pt b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_6/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..77203fd18377413f376ff30d15d57d75670d00c0 --- /dev/null +++ b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_6/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa974e2a849fb648664a1012cfaa639ed5b1010eaf4f31c775319a1136d776b5 +size 469843990 diff --git a/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_6/config.json b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_6/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a20a42c00f7af47c4dc513acfa0e2dd3c58d0e59 --- /dev/null +++ b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_6/config.json @@ -0,0 +1,31 @@ +{ + "trainer": { + "trainer_class": "TopKTrainer", + "dict_class": "AutoEncoderTopK", + "lr": 0.0003, + "steps": 244140, + "auxk_alpha": 0.03125, + "warmup_steps": 1000, + "decay_start": 195312, + "threshold_beta": 0.999, + "threshold_start_step": 1000, + "seed": 3407, + "activation_dim": 3584, + "dict_size": 16384, + "k": 50, + "device": "cuda:0", + "layer": 31, + "lm_name": "google/gemma-2-9b", + "wandb_name": "TopKTrainer-google/gemma-2-9b-resid_post_layer_31_trainer_6", + "submodule_name": "resid_post_layer_31" + }, + "buffer": { + "d_submodule": 3584, + "io": "out", + "n_ctxs": 122, + "ctx_len": 2048, + "refresh_batch_size": 4, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_6/eval_results.json b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_6/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f03ffdf9978f9b521dd3c52494baccbb8b20155d --- /dev/null +++ b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_6/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 197.0, "l1_loss": 1147.16, "l0": 50.0, "frac_variance_explained": 0.62369140625, "cossim": 0.86560546875, "l2_ratio": 0.8712109375, "relative_reconstruction_bias": 1.00546875, "frac_alive": 0.83428955078125, "hyperparameters": {"n_inputs": 200, "context_length": 2048}} \ No newline at end of file diff --git a/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_7/ae.pt b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_7/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..4f7a6f68df18aebba36f3b8324928be29b428de4 --- /dev/null +++ b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_7/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21fa5cb38b1e74960e40b58eb417ce868b7c65111244523bda3b901d3c2ba371 +size 469843990 diff --git a/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_7/config.json b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_7/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4eaaeb496ae5bfffe2587f3fba57d4711c08f9f0 --- /dev/null +++ b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_7/config.json @@ -0,0 +1,31 @@ +{ + "trainer": { + "trainer_class": "TopKTrainer", + "dict_class": "AutoEncoderTopK", + "lr": 0.0003, + "steps": 244140, + "auxk_alpha": 0.03125, + "warmup_steps": 1000, + "decay_start": 195312, + "threshold_beta": 0.999, + "threshold_start_step": 1000, + "seed": 3407, + "activation_dim": 3584, + "dict_size": 16384, + "k": 80, + "device": "cuda:0", + "layer": 31, + "lm_name": "google/gemma-2-9b", + "wandb_name": "TopKTrainer-google/gemma-2-9b-resid_post_layer_31_trainer_7", + "submodule_name": "resid_post_layer_31" + }, + "buffer": { + "d_submodule": 3584, + "io": "out", + "n_ctxs": 122, + "ctx_len": 2048, + "refresh_batch_size": 4, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_7/eval_results.json b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_7/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..e3fb84a82474f80477e0f6267f8d9fd7c3c89103 --- /dev/null +++ b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_7/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 186.14, "l1_loss": 1509.64, "l0": 80.0, "frac_variance_explained": 0.6618359375, "cossim": 0.8799609375, "l2_ratio": 0.88447265625, "relative_reconstruction_bias": 1.0038671875, "frac_alive": 0.912109375, "hyperparameters": {"n_inputs": 200, "context_length": 2048}} \ No newline at end of file diff --git a/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_8/ae.pt b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_8/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..19cd9b3179b02b3f53d0cc1856f39060032e50bb --- /dev/null +++ b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_8/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:596fd8b134be06f70b707b8bad38732dca76fcf12856d505b088c0d0f617fdb2 +size 469843990 diff --git a/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_8/config.json b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_8/config.json new file mode 100644 index 0000000000000000000000000000000000000000..bcbf3f63d7c7fb7595cd18f38c68a811f2b50701 --- /dev/null +++ b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_8/config.json @@ -0,0 +1,31 @@ +{ + "trainer": { + "trainer_class": "TopKTrainer", + "dict_class": "AutoEncoderTopK", + "lr": 0.0003, + "steps": 244140, + "auxk_alpha": 0.03125, + "warmup_steps": 1000, + "decay_start": 195312, + "threshold_beta": 0.999, + "threshold_start_step": 1000, + "seed": 3407, + "activation_dim": 3584, + "dict_size": 16384, + "k": 160, + "device": "cuda:0", + "layer": 31, + "lm_name": "google/gemma-2-9b", + "wandb_name": "TopKTrainer-google/gemma-2-9b-resid_post_layer_31_trainer_8", + "submodule_name": "resid_post_layer_31" + }, + "buffer": { + "d_submodule": 3584, + "io": "out", + "n_ctxs": 122, + "ctx_len": 2048, + "refresh_batch_size": 4, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_8/eval_results.json b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_8/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c8a5e9561e91783d6b638f37ed20d4b7a2907040 --- /dev/null +++ b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_8/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 170.875, "l1_loss": 2775.92, "l0": 160.0, "frac_variance_explained": 0.71212890625, "cossim": 0.8984765625, "l2_ratio": 0.9022265625, "relative_reconstruction_bias": 1.0039453125, "frac_alive": 0.985595703125, "hyperparameters": {"n_inputs": 200, "context_length": 2048}} \ No newline at end of file diff --git a/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_9/ae.pt b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_9/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..e4d7f09e172ef50988cd8665b4cdd26f66c40547 --- /dev/null +++ b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_9/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37a5ae03567a0c667cc0f3f59a2dfb1aee7aa87ddac45acabbdaf50492a05d49 +size 469843990 diff --git a/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_9/config.json b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_9/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f95a9486407509d951e029b6c7a187a527c609a7 --- /dev/null +++ b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_9/config.json @@ -0,0 +1,31 @@ +{ + "trainer": { + "trainer_class": "TopKTrainer", + "dict_class": "AutoEncoderTopK", + "lr": 0.0003, + "steps": 244140, + "auxk_alpha": 0.03125, + "warmup_steps": 1000, + "decay_start": 195312, + "threshold_beta": 0.999, + "threshold_start_step": 1000, + "seed": 3407, + "activation_dim": 3584, + "dict_size": 16384, + "k": 320, + "device": "cuda:0", + "layer": 31, + "lm_name": "google/gemma-2-9b", + "wandb_name": "TopKTrainer-google/gemma-2-9b-resid_post_layer_31_trainer_9", + "submodule_name": "resid_post_layer_31" + }, + "buffer": { + "d_submodule": 3584, + "io": "out", + "n_ctxs": 122, + "ctx_len": 2048, + "refresh_batch_size": 4, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_9/eval_results.json b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_9/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..cbf5e6b4b0723f4730ac2f1065501bf7d8898baf --- /dev/null +++ b/saes_google_gemma-2-9b_batch_top_k_top_k_standard_new/resid_post_layer_31/trainer_9/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 154.515, "l1_loss": 4466.88, "l0": 320.0, "frac_variance_explained": 0.76146484375, "cossim": 0.91712890625, "l2_ratio": 0.92029296875, "relative_reconstruction_bias": 1.003671875, "frac_alive": 0.98602294921875, "hyperparameters": {"n_inputs": 200, "context_length": 2048}} \ No newline at end of file