diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_0/config.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_0/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..adbaaa2586ded7dbbeab2cc9de9acdfd7d1206cd
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_0/config.json
@@ -0,0 +1,46 @@
+{
+    "trainer": {
+        "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/seed-0_trainer_0",
+        "trainer_class": "TopKTrainer",
+        "dict_class": "AutoEncoderTopK",
+        "lr": 0.0003,
+        "steps": 14648,
+        "auxk_alpha": 0.03125,
+        "diversity_scale": 0.0,
+        "diversity_type": null,
+        "warmup_steps": 1000,
+        "decay_start": 1200,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "seed": 0,
+        "activation_dim": 512,
+        "dict_size": 2048,
+        "k": 128,
+        "device": "cuda:0",
+        "layer": 4,
+        "lm_name": "EleutherAI/pythia-70m-deduped",
+        "submodule_name": "resid_post_layer_4",
+        "random_mask_bit_ratio": 0.0,
+        "random_mask_bit_ratio_force_topk": false,
+        "random_mask_bit_original_scale": 1.0,
+        "random_mask_bit_pos": "input",
+        "random_mask_bit_reconstruction_target": "original",
+        "nonlinear_sae_loss_scale": 0.0,
+        "nonlinear_sae_input": "original",
+        "nonlinear_sae_target": "residual",
+        "nonlinear_use_encoder_mlp": true,
+        "nonlinear_use_decoder_mlp": false,
+        "nonlinear_topk_k": -1,
+        "nonlinear_block_gradients": false,
+        "meta_sae_loss_scale": 0.0
+    },
+    "buffer": {
+        "d_submodule": 512,
+        "io": "out",
+        "n_ctxs": 2048,
+        "ctx_len": 128,
+        "refresh_batch_size": 24,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_0/self_feature_space_diversity_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_0/self_feature_space_diversity_eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..26e61966c8fcccacc9755da8bcce436e9f7a24ba
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_0/self_feature_space_diversity_eval_results.json
@@ -0,0 +1,26 @@
+{
+    "intrinsic_dim_0.8": 980,
+    "intrinsic_dim_0.9": 1364,
+    "intrinsic_dim_0.95": 1619,
+    "effective_rank": 1223.4310302734375,
+    "feature_sparsity": 0.9375,
+    "mean_correlation": 0.03588181361556053,
+    "max_correlation": 1.000002145767212,
+    "correlation_std": 0.06462709605693817,
+    "decoder_coactive_mean_sim": 0.0015496726846322417,
+    "decoder_coactive_max_sim": 0.4138309955596924,
+    "decoder_coactive_std_sim": 0.01725614070892334,
+    "encoder_coactive_mean_sim": 0.002769735874608159,
+    "encoder_coactive_max_sim": 0.3684731125831604,
+    "encoder_coactive_std_sim": 0.017463568598031998,
+    "decoder_per_sample_mean_sim": 0.0015496726846322417,
+    "decoder_per_sample_max_sim": 0.3042750656604767,
+    "encoder_per_sample_mean_sim": 0.002769735874608159,
+    "encoder_per_sample_max_sim": 0.2725919187068939,
+    "encoder_mean_correlation": 0.0021128507796674967,
+    "encoder_max_correlation": 1.000000238418579,
+    "encoder_correlation_std": 0.05275088548660278,
+    "decoder_mean_correlation": 0.0006922338507138193,
+    "decoder_max_correlation": 1.000001072883606,
+    "decoder_correlation_std": 0.05089571326971054
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_0/standard_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_0/standard_eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..83d6e89b861167d3aa1753aebaf18ebc906000a3
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_0/standard_eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 3.7783051311969755, "l1_loss": 94.76761198043823, "l0": 128.0, "frac_variance_explained": 0.942155122756958, "cossim": 0.9667428568005562, "l2_ratio": 0.9664503745734692, "relative_reconstruction_bias": 0.9999962501227856, "loss_original": 5.4671875, "loss_reconstructed": 5.718359375, "loss_zero": 10.740625, "frac_recovered": 0.9515625, "frac_alive": 1.0}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_1/config.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_1/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..83bd35639cb8c9dafc98b47665756398cb261273
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_1/config.json
@@ -0,0 +1,46 @@
+{
+    "trainer": {
+        "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/seed-1_trainer_1",
+        "trainer_class": "TopKTrainer",
+        "dict_class": "AutoEncoderTopK",
+        "lr": 0.0003,
+        "steps": 14648,
+        "auxk_alpha": 0.03125,
+        "diversity_scale": 0.0,
+        "diversity_type": null,
+        "warmup_steps": 1000,
+        "decay_start": 1200,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "seed": 1,
+        "activation_dim": 512,
+        "dict_size": 2048,
+        "k": 128,
+        "device": "cuda:0",
+        "layer": 4,
+        "lm_name": "EleutherAI/pythia-70m-deduped",
+        "submodule_name": "resid_post_layer_4",
+        "random_mask_bit_ratio": 0.0,
+        "random_mask_bit_ratio_force_topk": false,
+        "random_mask_bit_original_scale": 1.0,
+        "random_mask_bit_pos": "input",
+        "random_mask_bit_reconstruction_target": "original",
+        "nonlinear_sae_loss_scale": 0.0,
+        "nonlinear_sae_input": "original",
+        "nonlinear_sae_target": "residual",
+        "nonlinear_use_encoder_mlp": true,
+        "nonlinear_use_decoder_mlp": false,
+        "nonlinear_topk_k": -1,
+        "nonlinear_block_gradients": false,
+        "meta_sae_loss_scale": 0.0
+    },
+    "buffer": {
+        "d_submodule": 512,
+        "io": "out",
+        "n_ctxs": 2048,
+        "ctx_len": 128,
+        "refresh_batch_size": 24,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_1/self_feature_space_diversity_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_1/self_feature_space_diversity_eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..97c93b2df9cee12202b2f9cb07a9bb198de6dde8
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_1/self_feature_space_diversity_eval_results.json
@@ -0,0 +1,26 @@
+{
+    "intrinsic_dim_0.8": 979,
+    "intrinsic_dim_0.9": 1363,
+    "intrinsic_dim_0.95": 1619,
+    "effective_rank": 1231.33984375,
+    "feature_sparsity": 0.9375,
+    "mean_correlation": 0.03564154729247093,
+    "max_correlation": 1.0000020265579224,
+    "correlation_std": 0.06366480886936188,
+    "decoder_coactive_mean_sim": 0.0015298674115911126,
+    "decoder_coactive_max_sim": 0.3695344626903534,
+    "decoder_coactive_std_sim": 0.0173921138048172,
+    "encoder_coactive_mean_sim": 0.0028206498827785254,
+    "encoder_coactive_max_sim": 0.42045122385025024,
+    "encoder_coactive_std_sim": 0.017060182988643646,
+    "decoder_per_sample_mean_sim": 0.0015298674115911126,
+    "decoder_per_sample_max_sim": 0.2891332805156708,
+    "encoder_per_sample_mean_sim": 0.0028206498827785254,
+    "encoder_per_sample_max_sim": 0.24817219376564026,
+    "encoder_mean_correlation": 0.002107219770550728,
+    "encoder_max_correlation": 1.000000238418579,
+    "encoder_correlation_std": 0.052732452750205994,
+    "decoder_mean_correlation": 0.000679917458910495,
+    "decoder_max_correlation": 1.000001072883606,
+    "decoder_correlation_std": 0.05092431232333183
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_1/standard_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_1/standard_eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..a9fadda20d8c33efefca0d79216d380be71686e4
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_1/standard_eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 3.784668281674385, "l1_loss": 94.24630346298218, "l0": 128.0, "frac_variance_explained": 0.9389505088329315, "cossim": 0.9668197274208069, "l2_ratio": 0.9664119355380535, "relative_reconstruction_bias": 0.99992056787014, "loss_original": 5.4671875, "loss_reconstructed": 5.71640625, "loss_zero": 10.740625, "frac_recovered": 0.95185546875, "frac_alive": 1.0}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_2/config.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_2/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..abe82a211074c7f9b6beda8d88c96375a2cda636
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_2/config.json
@@ -0,0 +1,46 @@
+{
+    "trainer": {
+        "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/seed-2_trainer_2",
+        "trainer_class": "TopKTrainer",
+        "dict_class": "AutoEncoderTopK",
+        "lr": 0.0003,
+        "steps": 14648,
+        "auxk_alpha": 0.03125,
+        "diversity_scale": 0.0,
+        "diversity_type": null,
+        "warmup_steps": 1000,
+        "decay_start": 1200,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "seed": 2,
+        "activation_dim": 512,
+        "dict_size": 2048,
+        "k": 128,
+        "device": "cuda:0",
+        "layer": 4,
+        "lm_name": "EleutherAI/pythia-70m-deduped",
+        "submodule_name": "resid_post_layer_4",
+        "random_mask_bit_ratio": 0.0,
+        "random_mask_bit_ratio_force_topk": false,
+        "random_mask_bit_original_scale": 1.0,
+        "random_mask_bit_pos": "input",
+        "random_mask_bit_reconstruction_target": "original",
+        "nonlinear_sae_loss_scale": 0.0,
+        "nonlinear_sae_input": "original",
+        "nonlinear_sae_target": "residual",
+        "nonlinear_use_encoder_mlp": true,
+        "nonlinear_use_decoder_mlp": false,
+        "nonlinear_topk_k": -1,
+        "nonlinear_block_gradients": false,
+        "meta_sae_loss_scale": 0.0
+    },
+    "buffer": {
+        "d_submodule": 512,
+        "io": "out",
+        "n_ctxs": 2048,
+        "ctx_len": 128,
+        "refresh_batch_size": 24,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_2/self_feature_space_diversity_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_2/self_feature_space_diversity_eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..d649fa23961e1b972d54219cc5f9ce3a7d3b0247
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_2/self_feature_space_diversity_eval_results.json
@@ -0,0 +1,26 @@
+{
+    "intrinsic_dim_0.8": 968,
+    "intrinsic_dim_0.9": 1353,
+    "intrinsic_dim_0.95": 1611,
+    "effective_rank": 1223.5145263671875,
+    "feature_sparsity": 0.9375,
+    "mean_correlation": 0.035781797021627426,
+    "max_correlation": 1.000002145767212,
+    "correlation_std": 0.06626083701848984,
+    "decoder_coactive_mean_sim": 0.0015025029424577951,
+    "decoder_coactive_max_sim": 0.44835346937179565,
+    "decoder_coactive_std_sim": 0.017151132225990295,
+    "encoder_coactive_mean_sim": 0.002768551465123892,
+    "encoder_coactive_max_sim": 0.4386450946331024,
+    "encoder_coactive_std_sim": 0.017231818288564682,
+    "decoder_per_sample_mean_sim": 0.0015025028260424733,
+    "decoder_per_sample_max_sim": 0.2751252353191376,
+    "encoder_per_sample_mean_sim": 0.002768551232293248,
+    "encoder_per_sample_max_sim": 0.2641173303127289,
+    "encoder_mean_correlation": 0.0015855191741138697,
+    "encoder_max_correlation": 1.0000003576278687,
+    "encoder_correlation_std": 0.05263438820838928,
+    "decoder_mean_correlation": 0.0006925205234438181,
+    "decoder_max_correlation": 1.0000011920928955,
+    "decoder_correlation_std": 0.05096922814846039
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_2/standard_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_2/standard_eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..4b7d315f23dad5d0aab578a5375b34901de39e52
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_2/standard_eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 3.7807009369134903, "l1_loss": 95.33445606231689, "l0": 128.0, "frac_variance_explained": 0.9413482956588268, "cossim": 0.9669262684881688, "l2_ratio": 0.9666900806128979, "relative_reconstruction_bias": 0.9999219357967377, "loss_original": 5.4671875, "loss_reconstructed": 5.70546875, "loss_zero": 10.740625, "frac_recovered": 0.953857421875, "frac_alive": 1.0}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_3/config.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_3/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..c054c645b2719e74b4b527a24ebd295b55760fee
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_3/config.json
@@ -0,0 +1,46 @@
+{
+    "trainer": {
+        "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/seed-3_trainer_3",
+        "trainer_class": "TopKTrainer",
+        "dict_class": "AutoEncoderTopK",
+        "lr": 0.0003,
+        "steps": 14648,
+        "auxk_alpha": 0.03125,
+        "diversity_scale": 0.0,
+        "diversity_type": null,
+        "warmup_steps": 1000,
+        "decay_start": 1200,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "seed": 3,
+        "activation_dim": 512,
+        "dict_size": 2048,
+        "k": 128,
+        "device": "cuda:0",
+        "layer": 4,
+        "lm_name": "EleutherAI/pythia-70m-deduped",
+        "submodule_name": "resid_post_layer_4",
+        "random_mask_bit_ratio": 0.0,
+        "random_mask_bit_ratio_force_topk": false,
+        "random_mask_bit_original_scale": 1.0,
+        "random_mask_bit_pos": "input",
+        "random_mask_bit_reconstruction_target": "original",
+        "nonlinear_sae_loss_scale": 0.0,
+        "nonlinear_sae_input": "original",
+        "nonlinear_sae_target": "residual",
+        "nonlinear_use_encoder_mlp": true,
+        "nonlinear_use_decoder_mlp": false,
+        "nonlinear_topk_k": -1,
+        "nonlinear_block_gradients": false,
+        "meta_sae_loss_scale": 0.0
+    },
+    "buffer": {
+        "d_submodule": 512,
+        "io": "out",
+        "n_ctxs": 2048,
+        "ctx_len": 128,
+        "refresh_batch_size": 24,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_3/self_feature_space_diversity_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_3/self_feature_space_diversity_eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..824e89d9b1fbb1be0b539dcbe313b40cebbafa11
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_3/self_feature_space_diversity_eval_results.json
@@ -0,0 +1,26 @@
+{
+    "intrinsic_dim_0.8": 972,
+    "intrinsic_dim_0.9": 1358,
+    "intrinsic_dim_0.95": 1616,
+    "effective_rank": 1217.7279052734375,
+    "feature_sparsity": 0.9375,
+    "mean_correlation": 0.036073531955480576,
+    "max_correlation": 1.0000025033950806,
+    "correlation_std": 0.06570165604352951,
+    "decoder_coactive_mean_sim": 0.0015081887831911445,
+    "decoder_coactive_max_sim": 0.30734848976135254,
+    "decoder_coactive_std_sim": 0.016810396686196327,
+    "encoder_coactive_mean_sim": 0.002697763964533806,
+    "encoder_coactive_max_sim": 0.36042797565460205,
+    "encoder_coactive_std_sim": 0.016821540892124176,
+    "decoder_per_sample_mean_sim": 0.0015081887831911445,
+    "decoder_per_sample_max_sim": 0.24087123572826385,
+    "encoder_per_sample_mean_sim": 0.0026977641973644495,
+    "encoder_per_sample_max_sim": 0.22100086510181427,
+    "encoder_mean_correlation": 0.001659764559008181,
+    "encoder_max_correlation": 1.0000003576278687,
+    "encoder_correlation_std": 0.05284586176276207,
+    "decoder_mean_correlation": 0.0006780978292226791,
+    "decoder_max_correlation": 1.000001072883606,
+    "decoder_correlation_std": 0.05088222399353981
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_3/standard_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_3/standard_eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..d6aaec23b3258b179c1d82fde8a7bc8c3cf9958a
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_3/standard_eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 3.792155793309212, "l1_loss": 95.12483205795289, "l0": 128.0, "frac_variance_explained": 0.9385048314929009, "cossim": 0.9666961587965488, "l2_ratio": 0.9664709158241749, "relative_reconstruction_bias": 1.0001889944076539, "loss_original": 5.4671875, "loss_reconstructed": 5.7171875, "loss_zero": 10.740625, "frac_recovered": 0.95166015625, "frac_alive": 0.99951171875}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_4/config.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_4/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..c74bab2191c20dd9eea5b597f9c86b8809c8133d
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_4/config.json
@@ -0,0 +1,46 @@
+{
+    "trainer": {
+        "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/seed-4_trainer_4",
+        "trainer_class": "TopKTrainer",
+        "dict_class": "AutoEncoderTopK",
+        "lr": 0.0003,
+        "steps": 14648,
+        "auxk_alpha": 0.03125,
+        "diversity_scale": 0.0,
+        "diversity_type": null,
+        "warmup_steps": 1000,
+        "decay_start": 1200,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "seed": 4,
+        "activation_dim": 512,
+        "dict_size": 2048,
+        "k": 128,
+        "device": "cuda:0",
+        "layer": 4,
+        "lm_name": "EleutherAI/pythia-70m-deduped",
+        "submodule_name": "resid_post_layer_4",
+        "random_mask_bit_ratio": 0.0,
+        "random_mask_bit_ratio_force_topk": false,
+        "random_mask_bit_original_scale": 1.0,
+        "random_mask_bit_pos": "input",
+        "random_mask_bit_reconstruction_target": "original",
+        "nonlinear_sae_loss_scale": 0.0,
+        "nonlinear_sae_input": "original",
+        "nonlinear_sae_target": "residual",
+        "nonlinear_use_encoder_mlp": true,
+        "nonlinear_use_decoder_mlp": false,
+        "nonlinear_topk_k": -1,
+        "nonlinear_block_gradients": false,
+        "meta_sae_loss_scale": 0.0
+    },
+    "buffer": {
+        "d_submodule": 512,
+        "io": "out",
+        "n_ctxs": 2048,
+        "ctx_len": 128,
+        "refresh_batch_size": 24,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_4/self_feature_space_diversity_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_4/self_feature_space_diversity_eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..e76649160386625680768439152ff7aa3b5fce8c
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_4/self_feature_space_diversity_eval_results.json
@@ -0,0 +1,26 @@
+{
+    "intrinsic_dim_0.8": 971,
+    "intrinsic_dim_0.9": 1355,
+    "intrinsic_dim_0.95": 1612,
+    "effective_rank": 1222.1011962890625,
+    "feature_sparsity": 0.9375,
+    "mean_correlation": 0.03547367453575134,
+    "max_correlation": 1.0000027418136597,
+    "correlation_std": 0.06494747847318649,
+    "decoder_coactive_mean_sim": 0.001463062479160726,
+    "decoder_coactive_max_sim": 0.3568807542324066,
+    "decoder_coactive_std_sim": 0.016684627160429955,
+    "encoder_coactive_mean_sim": 0.002751479623839259,
+    "encoder_coactive_max_sim": 0.47617244720458984,
+    "encoder_coactive_std_sim": 0.016827696934342384,
+    "decoder_per_sample_mean_sim": 0.0014630623627454042,
+    "decoder_per_sample_max_sim": 0.2273091822862625,
+    "encoder_per_sample_mean_sim": 0.002751479623839259,
+    "encoder_per_sample_max_sim": 0.2101120501756668,
+    "encoder_mean_correlation": 0.001956398133188486,
+    "encoder_max_correlation": 1.000000238418579,
+    "encoder_correlation_std": 0.053541772067546844,
+    "decoder_mean_correlation": 0.0007174276979640126,
+    "decoder_max_correlation": 1.0000011920928955,
+    "decoder_correlation_std": 0.050983842462301254
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_4/standard_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_4/standard_eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..89135c9bea6b92a0d27e62126eaab581a34d71ca
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_4/standard_eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 3.795499560236931, "l1_loss": 95.14364604949951, "l0": 128.0, "frac_variance_explained": 0.9360299751162529, "cossim": 0.9666673980653286, "l2_ratio": 0.9663952246308327, "relative_reconstruction_bias": 0.9999688774347305, "loss_original": 5.4671875, "loss_reconstructed": 5.721484375, "loss_zero": 10.740625, "frac_recovered": 0.9509765625, "frac_alive": 1.0}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_0/config.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_0/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..a27e2f240b73869ff79b125b4ef3b9b554837199
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_0/config.json
@@ -0,0 +1,46 @@
+{
+    "trainer": {
+        "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/seed-0_trainer_0",
+        "trainer_class": "TopKTrainer",
+        "dict_class": "AutoEncoderTopK",
+        "lr": 0.0003,
+        "steps": 14648,
+        "auxk_alpha": 0.03125,
+        "diversity_scale": 0.0,
+        "diversity_type": null,
+        "warmup_steps": 1000,
+        "decay_start": 1200,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "seed": 0,
+        "activation_dim": 512,
+        "dict_size": 2048,
+        "k": 256,
+        "device": "cuda:0",
+        "layer": 4,
+        "lm_name": "EleutherAI/pythia-70m-deduped",
+        "submodule_name": "resid_post_layer_4",
+        "random_mask_bit_ratio": 0.0,
+        "random_mask_bit_ratio_force_topk": false,
+        "random_mask_bit_original_scale": 1.0,
+        "random_mask_bit_pos": "input",
+        "random_mask_bit_reconstruction_target": "original",
+        "nonlinear_sae_loss_scale": 0.0,
+        "nonlinear_sae_input": "original",
+        "nonlinear_sae_target": "residual",
+        "nonlinear_use_encoder_mlp": true,
+        "nonlinear_use_decoder_mlp": false,
+        "nonlinear_topk_k": -1,
+        "nonlinear_block_gradients": false,
+        "meta_sae_loss_scale": 0.0
+    },
+    "buffer": {
+        "d_submodule": 512,
+        "io": "out",
+        "n_ctxs": 2048,
+        "ctx_len": 128,
+        "refresh_batch_size": 24,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_0/self_feature_space_diversity_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_0/self_feature_space_diversity_eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..d3c3ed9a592ea33b4af1f88a473cf024aeb9b1a6
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_0/self_feature_space_diversity_eval_results.json
@@ -0,0 +1,26 @@
+{
+    "intrinsic_dim_0.8": 480,
+    "intrinsic_dim_0.9": 696,
+    "intrinsic_dim_0.95": 998,
+    "effective_rank": 654.4144287109375,
+    "feature_sparsity": 0.875,
+    "mean_correlation": 0.04879017919301987,
+    "max_correlation": 1.0000032186508179,
+    "correlation_std": 0.1387787014245987,
+    "decoder_coactive_mean_sim": -0.000603697553742677,
+    "decoder_coactive_max_sim": 0.4850667119026184,
+    "decoder_coactive_std_sim": 0.011331773363053799,
+    "encoder_coactive_mean_sim": 0.004783302079886198,
+    "encoder_coactive_max_sim": 0.6346930861473083,
+    "encoder_coactive_std_sim": 0.020051371306180954,
+    "decoder_per_sample_mean_sim": -0.000603697553742677,
+    "decoder_per_sample_max_sim": 0.19871939718723297,
+    "encoder_per_sample_mean_sim": 0.004783302079886198,
+    "encoder_per_sample_max_sim": 0.6304284930229187,
+    "encoder_mean_correlation": 0.00369116198271513,
+    "encoder_max_correlation": 1.000000238418579,
+    "encoder_correlation_std": 0.06792980432510376,
+    "decoder_mean_correlation": 0.0015754885971546173,
+    "decoder_max_correlation": 1.0000011920928955,
+    "decoder_correlation_std": 0.0512159988284111
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_0/standard_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_0/standard_eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..4a7e31eb750ca75a9d9a5f1cb345f2a674d04ac3
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_0/standard_eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 2.7219308167696, "l1_loss": 219.18922500610353, "l0": 256.0, "frac_variance_explained": 0.9695879392325878, "cossim": 0.9831567205488682, "l2_ratio": 0.983003368973732, "relative_reconstruction_bias": 0.9997936256229878, "loss_original": 5.4671875, "loss_reconstructed": 5.5921875, "loss_zero": 10.740625, "frac_recovered": 0.9759765625, "frac_alive": 0.97412109375}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_1/config.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_1/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..556c07c2c9e36fbdade81b3f4c534d3bfdf61c00
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_1/config.json
@@ -0,0 +1,46 @@
+{
+    "trainer": {
+        "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/seed-1_trainer_1",
+        "trainer_class": "TopKTrainer",
+        "dict_class": "AutoEncoderTopK",
+        "lr": 0.0003,
+        "steps": 14648,
+        "auxk_alpha": 0.03125,
+        "diversity_scale": 0.0,
+        "diversity_type": null,
+        "warmup_steps": 1000,
+        "decay_start": 1200,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "seed": 1,
+        "activation_dim": 512,
+        "dict_size": 2048,
+        "k": 256,
+        "device": "cuda:0",
+        "layer": 4,
+        "lm_name": "EleutherAI/pythia-70m-deduped",
+        "submodule_name": "resid_post_layer_4",
+        "random_mask_bit_ratio": 0.0,
+        "random_mask_bit_ratio_force_topk": false,
+        "random_mask_bit_original_scale": 1.0,
+        "random_mask_bit_pos": "input",
+        "random_mask_bit_reconstruction_target": "original",
+        "nonlinear_sae_loss_scale": 0.0,
+        "nonlinear_sae_input": "original",
+        "nonlinear_sae_target": "residual",
+        "nonlinear_use_encoder_mlp": true,
+        "nonlinear_use_decoder_mlp": false,
+        "nonlinear_topk_k": -1,
+        "nonlinear_block_gradients": false,
+        "meta_sae_loss_scale": 0.0
+    },
+    "buffer": {
+        "d_submodule": 512,
+        "io": "out",
+        "n_ctxs": 2048,
+        "ctx_len": 128,
+        "refresh_batch_size": 24,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_1/self_feature_space_diversity_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_1/self_feature_space_diversity_eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..609a9212d19b86099a72044a3bd3b384494a6e78
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_1/self_feature_space_diversity_eval_results.json
@@ -0,0 +1,26 @@
+{
+    "intrinsic_dim_0.8": 486,
+    "intrinsic_dim_0.9": 680,
+    "intrinsic_dim_0.95": 972,
+    "effective_rank": 676.2651977539062,
+    "feature_sparsity": 0.875,
+    "mean_correlation": 0.0487031452357769,
+    "max_correlation": 1.0000033378601074,
+    "correlation_std": 0.1352577954530716,
+    "decoder_coactive_mean_sim": -0.000500466616358608,
+    "decoder_coactive_max_sim": 0.42623040080070496,
+    "decoder_coactive_std_sim": 0.011117835529148579,
+    "encoder_coactive_mean_sim": 0.0036506024189293385,
+    "encoder_coactive_max_sim": 0.46891945600509644,
+    "encoder_coactive_std_sim": 0.016733255237340927,
+    "decoder_per_sample_mean_sim": -0.000500466616358608,
+    "decoder_per_sample_max_sim": 0.2542282044887543,
+    "encoder_per_sample_mean_sim": 0.0036506024189293385,
+    "encoder_per_sample_max_sim": 0.44621554017066956,
+    "encoder_mean_correlation": 0.003571811132133007,
+    "encoder_max_correlation": 1.000000238418579,
+    "encoder_correlation_std": 0.06752442568540573,
+    "decoder_mean_correlation": 0.001791062531992793,
+    "decoder_max_correlation": 1.0000009536743164,
+    "decoder_correlation_std": 0.051166798919439316
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_1/standard_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_1/standard_eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..ff161048aadd6b469e2a5af2b92616bde22d995e
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_1/standard_eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 2.659628000855446, "l1_loss": 204.22452449798584, "l0": 256.0, "frac_variance_explained": 0.9706387490034103, "cossim": 0.9838522009551525, "l2_ratio": 0.9836978435516357, "relative_reconstruction_bias": 0.99996752217412, "loss_original": 5.4671875, "loss_reconstructed": 5.5828125, "loss_zero": 10.740625, "frac_recovered": 0.97783203125, "frac_alive": 0.96240234375}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_2/config.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_2/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..42fc387c1015c62a0ca26aa4115b32310cb582bb
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_2/config.json
@@ -0,0 +1,46 @@
+{
+    "trainer": {
+        "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/seed-2_trainer_2",
+        "trainer_class": "TopKTrainer",
+        "dict_class": "AutoEncoderTopK",
+        "lr": 0.0003,
+        "steps": 14648,
+        "auxk_alpha": 0.03125,
+        "diversity_scale": 0.0,
+        "diversity_type": null,
+        "warmup_steps": 1000,
+        "decay_start": 1200,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "seed": 2,
+        "activation_dim": 512,
+        "dict_size": 2048,
+        "k": 256,
+        "device": "cuda:0",
+        "layer": 4,
+        "lm_name": "EleutherAI/pythia-70m-deduped",
+        "submodule_name": "resid_post_layer_4",
+        "random_mask_bit_ratio": 0.0,
+        "random_mask_bit_ratio_force_topk": false,
+        "random_mask_bit_original_scale": 1.0,
+        "random_mask_bit_pos": "input",
+        "random_mask_bit_reconstruction_target": "original",
+        "nonlinear_sae_loss_scale": 0.0,
+        "nonlinear_sae_input": "original",
+        "nonlinear_sae_target": "residual",
+        "nonlinear_use_encoder_mlp": true,
+        "nonlinear_use_decoder_mlp": false,
+        "nonlinear_topk_k": -1,
+        "nonlinear_block_gradients": false,
+        "meta_sae_loss_scale": 0.0
+    },
+    "buffer": {
+        "d_submodule": 512,
+        "io": "out",
+        "n_ctxs": 2048,
+        "ctx_len": 128,
+        "refresh_batch_size": 24,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_2/self_feature_space_diversity_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_2/self_feature_space_diversity_eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..ceb38d9a90dda51f3ea68f7437a5724be5ed33ab
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_2/self_feature_space_diversity_eval_results.json
@@ -0,0 +1,26 @@
+{
+    "intrinsic_dim_0.8": 481,
+    "intrinsic_dim_0.9": 688,
+    "intrinsic_dim_0.95": 983,
+    "effective_rank": 670.652587890625,
+    "feature_sparsity": 0.8750005960464478,
+    "mean_correlation": 0.04843044653534889,
+    "max_correlation": 1.0000027418136597,
+    "correlation_std": 0.13766011595726013,
+    "decoder_coactive_mean_sim": -0.0005655006971210241,
+    "decoder_coactive_max_sim": 0.45535174012184143,
+    "decoder_coactive_std_sim": 0.011256729252636433,
+    "encoder_coactive_mean_sim": 0.004165450111031532,
+    "encoder_coactive_max_sim": 0.5755204558372498,
+    "encoder_coactive_std_sim": 0.018040597438812256,
+    "decoder_per_sample_mean_sim": -0.0005655006389133632,
+    "decoder_per_sample_max_sim": 0.24903523921966553,
+    "encoder_per_sample_mean_sim": 0.004165449645370245,
+    "encoder_per_sample_max_sim": 0.5712661147117615,
+    "encoder_mean_correlation": 0.0021907533518970013,
+    "encoder_max_correlation": 1.000000238418579,
+    "encoder_correlation_std": 0.06807170808315277,
+    "decoder_mean_correlation": 0.001756084617227316,
+    "decoder_max_correlation": 1.0000009536743164,
+    "decoder_correlation_std": 0.051152873784303665
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_2/standard_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_2/standard_eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..1124ca4d3db0d1b7fef494dfa7bda6a3715a7e92
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_2/standard_eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 2.702136534452438, "l1_loss": 209.18400897979737, "l0": 255.9986328125, "frac_variance_explained": 0.9689409710466862, "cossim": 0.9832561373710632, "l2_ratio": 0.9830743968486786, "relative_reconstruction_bias": 0.9999133288860321, "loss_original": 5.4671875, "loss_reconstructed": 5.591015625, "loss_zero": 10.740625, "frac_recovered": 0.97607421875, "frac_alive": 0.97119140625}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_3/config.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_3/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..0b3410849f9025021b8d6522578ae167bfa5eb21
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_3/config.json
@@ -0,0 +1,46 @@
+{
+    "trainer": {
+        "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/seed-3_trainer_3",
+        "trainer_class": "TopKTrainer",
+        "dict_class": "AutoEncoderTopK",
+        "lr": 0.0003,
+        "steps": 14648,
+        "auxk_alpha": 0.03125,
+        "diversity_scale": 0.0,
+        "diversity_type": null,
+        "warmup_steps": 1000,
+        "decay_start": 1200,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "seed": 3,
+        "activation_dim": 512,
+        "dict_size": 2048,
+        "k": 256,
+        "device": "cuda:0",
+        "layer": 4,
+        "lm_name": "EleutherAI/pythia-70m-deduped",
+        "submodule_name": "resid_post_layer_4",
+        "random_mask_bit_ratio": 0.0,
+        "random_mask_bit_ratio_force_topk": false,
+        "random_mask_bit_original_scale": 1.0,
+        "random_mask_bit_pos": "input",
+        "random_mask_bit_reconstruction_target": "original",
+        "nonlinear_sae_loss_scale": 0.0,
+        "nonlinear_sae_input": "original",
+        "nonlinear_sae_target": "residual",
+        "nonlinear_use_encoder_mlp": true,
+        "nonlinear_use_decoder_mlp": false,
+        "nonlinear_topk_k": -1,
+        "nonlinear_block_gradients": false,
+        "meta_sae_loss_scale": 0.0
+    },
+    "buffer": {
+        "d_submodule": 512,
+        "io": "out",
+        "n_ctxs": 2048,
+        "ctx_len": 128,
+        "refresh_batch_size": 24,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_3/self_feature_space_diversity_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_3/self_feature_space_diversity_eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..57067f473edb6434cedb2e8efde1ddc4befd99fa
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_3/self_feature_space_diversity_eval_results.json
@@ -0,0 +1,26 @@
+{
+    "intrinsic_dim_0.8": 472,
+    "intrinsic_dim_0.9": 677,
+    "intrinsic_dim_0.95": 968,
+    "effective_rank": 652.3516235351562,
+    "feature_sparsity": 0.875,
+    "mean_correlation": 0.04835033416748047,
+    "max_correlation": 1.0000038146972656,
+    "correlation_std": 0.13854721188545227,
+    "decoder_coactive_mean_sim": -0.0006331527838483453,
+    "decoder_coactive_max_sim": 0.47532302141189575,
+    "decoder_coactive_std_sim": 0.011337202973663807,
+    "encoder_coactive_mean_sim": 0.005108896177262068,
+    "encoder_coactive_max_sim": 0.5862697958946228,
+    "encoder_coactive_std_sim": 0.019558507949113846,
+    "decoder_per_sample_mean_sim": -0.0006331527838483453,
+    "decoder_per_sample_max_sim": 0.2255595475435257,
+    "encoder_per_sample_mean_sim": 0.005108896177262068,
+    "encoder_per_sample_max_sim": 0.5814019441604614,
+    "encoder_mean_correlation": 0.002764908829703927,
+    "encoder_max_correlation": 1.000000238418579,
+    "encoder_correlation_std": 0.07186086475849152,
+    "decoder_mean_correlation": 0.001473523210734129,
+    "decoder_max_correlation": 1.000001072883606,
+    "decoder_correlation_std": 0.05101215839385986
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_3/standard_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_3/standard_eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..dae03e88f2dc7288ee755466f51ab5f4178e1ceb
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_3/standard_eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 2.7153001874685287, "l1_loss": 214.051033782959, "l0": 256.0, "frac_variance_explained": 0.9711439780890941, "cossim": 0.9831305019557476, "l2_ratio": 0.9829454332590103, "relative_reconstruction_bias": 0.9999813109636306, "loss_original": 5.4671875, "loss_reconstructed": 5.588671875, "loss_zero": 10.740625, "frac_recovered": 0.976611328125, "frac_alive": 0.96630859375}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_4/config.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_4/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..226878f8a4a0236fdcaa59684f231e24c6aa2831
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_4/config.json
@@ -0,0 +1,46 @@
+{
+    "trainer": {
+        "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/seed-4_trainer_4",
+        "trainer_class": "TopKTrainer",
+        "dict_class": "AutoEncoderTopK",
+        "lr": 0.0003,
+        "steps": 14648,
+        "auxk_alpha": 0.03125,
+        "diversity_scale": 0.0,
+        "diversity_type": null,
+        "warmup_steps": 1000,
+        "decay_start": 1200,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "seed": 4,
+        "activation_dim": 512,
+        "dict_size": 2048,
+        "k": 256,
+        "device": "cuda:0",
+        "layer": 4,
+        "lm_name": "EleutherAI/pythia-70m-deduped",
+        "submodule_name": "resid_post_layer_4",
+        "random_mask_bit_ratio": 0.0,
+        "random_mask_bit_ratio_force_topk": false,
+        "random_mask_bit_original_scale": 1.0,
+        "random_mask_bit_pos": "input",
+        "random_mask_bit_reconstruction_target": "original",
+        "nonlinear_sae_loss_scale": 0.0,
+        "nonlinear_sae_input": "original",
+        "nonlinear_sae_target": "residual",
+        "nonlinear_use_encoder_mlp": true,
+        "nonlinear_use_decoder_mlp": false,
+        "nonlinear_topk_k": -1,
+        "nonlinear_block_gradients": false,
+        "meta_sae_loss_scale": 0.0
+    },
+    "buffer": {
+        "d_submodule": 512,
+        "io": "out",
+        "n_ctxs": 2048,
+        "ctx_len": 128,
+        "refresh_batch_size": 24,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_4/self_feature_space_diversity_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_4/self_feature_space_diversity_eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..09fc7c9ec8e9fce7019bbda5a99e4ba7c587a67b
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_4/self_feature_space_diversity_eval_results.json
@@ -0,0 +1,26 @@
+{
+    "intrinsic_dim_0.8": 481,
+    "intrinsic_dim_0.9": 691,
+    "intrinsic_dim_0.95": 987,
+    "effective_rank": 678.6663818359375,
+    "feature_sparsity": 0.8750003576278687,
+    "mean_correlation": 0.04904370754957199,
+    "max_correlation": 1.0000029802322388,
+    "correlation_std": 0.13809221982955933,
+    "decoder_coactive_mean_sim": -0.0006003740127198398,
+    "decoder_coactive_max_sim": 0.4896470904350281,
+    "decoder_coactive_std_sim": 0.01121507491916418,
+    "encoder_coactive_mean_sim": 0.004258359782397747,
+    "encoder_coactive_max_sim": 0.4985700845718384,
+    "encoder_coactive_std_sim": 0.01753879338502884,
+    "decoder_per_sample_mean_sim": -0.0006003740709275007,
+    "decoder_per_sample_max_sim": 0.2869977653026581,
+    "encoder_per_sample_mean_sim": 0.004258360248059034,
+    "encoder_per_sample_max_sim": 0.4956880509853363,
+    "encoder_mean_correlation": 0.0028513818979263306,
+    "encoder_max_correlation": 1.0000003576278687,
+    "encoder_correlation_std": 0.06897676736116409,
+    "decoder_mean_correlation": 0.0016533236484974623,
+    "decoder_max_correlation": 1.000001072883606,
+    "decoder_correlation_std": 0.05112037807703018
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_4/standard_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_4/standard_eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..36b4e258164235c3a6091cda2f162496aa9837d8
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_4/standard_eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 2.701796779036522, "l1_loss": 206.50695514678955, "l0": 255.9994140625, "frac_variance_explained": 0.9682310611009598, "cossim": 0.9833530187606812, "l2_ratio": 0.9831482082605362, "relative_reconstruction_bias": 0.9999046422541141, "loss_original": 5.4671875, "loss_reconstructed": 5.59140625, "loss_zero": 10.740625, "frac_recovered": 0.976123046875, "frac_alive": 0.97509765625}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_0/config.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_0/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..63fcba13dc437ce492a687ad36c7231d649d96f6
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_0/config.json
@@ -0,0 +1,46 @@
+{
+    "trainer": {
+        "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/seed-0_trainer_0",
+        "trainer_class": "TopKTrainer",
+        "dict_class": "AutoEncoderTopK",
+        "lr": 0.0003,
+        "steps": 14648,
+        "auxk_alpha": 0.03125,
+        "diversity_scale": 0.0,
+        "diversity_type": null,
+        "warmup_steps": 1000,
+        "decay_start": 1200,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "seed": 0,
+        "activation_dim": 512,
+        "dict_size": 2048,
+        "k": 32,
+        "device": "cuda:0",
+        "layer": 4,
+        "lm_name": "EleutherAI/pythia-70m-deduped",
+        "submodule_name": "resid_post_layer_4",
+        "random_mask_bit_ratio": 0.0,
+        "random_mask_bit_ratio_force_topk": false,
+        "random_mask_bit_original_scale": 1.0,
+        "random_mask_bit_pos": "input",
+        "random_mask_bit_reconstruction_target": "original",
+        "nonlinear_sae_loss_scale": 0.0,
+        "nonlinear_sae_input": "original",
+        "nonlinear_sae_target": "residual",
+        "nonlinear_use_encoder_mlp": true,
+        "nonlinear_use_decoder_mlp": false,
+        "nonlinear_topk_k": -1,
+        "nonlinear_block_gradients": false,
+        "meta_sae_loss_scale": 0.0
+    },
+    "buffer": {
+        "d_submodule": 512,
+        "io": "out",
+        "n_ctxs": 2048,
+        "ctx_len": 128,
+        "refresh_batch_size": 24,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_0/self_feature_space_diversity_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_0/self_feature_space_diversity_eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..8a70c68f83d0ab01d7a5cffaaeb228325ea6c2cc
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_0/self_feature_space_diversity_eval_results.json
@@ -0,0 +1,26 @@
+{
+    "intrinsic_dim_0.8": 1145,
+    "intrinsic_dim_0.9": 1494,
+    "intrinsic_dim_0.95": 1716,
+    "effective_rank": 1393.9248046875,
+    "feature_sparsity": 0.984375,
+    "mean_correlation": 0.009593969210982323,
+    "max_correlation": 1.0000017881393433,
+    "correlation_std": 0.034370556473731995,
+    "decoder_coactive_mean_sim": 0.012857094407081604,
+    "decoder_coactive_max_sim": 0.5831928253173828,
+    "decoder_coactive_std_sim": 0.04128008335828781,
+    "encoder_coactive_mean_sim": 0.013956918381154537,
+    "encoder_coactive_max_sim": 0.45717740058898926,
+    "encoder_coactive_std_sim": 0.03173547610640526,
+    "decoder_per_sample_mean_sim": 0.012857094407081604,
+    "decoder_per_sample_max_sim": 0.2939373552799225,
+    "encoder_per_sample_mean_sim": 0.013956919312477112,
+    "encoder_per_sample_max_sim": 0.19147595763206482,
+    "encoder_mean_correlation": 0.0025674644857645035,
+    "encoder_max_correlation": 1.0000003576278687,
+    "encoder_correlation_std": 0.059210386127233505,
+    "decoder_mean_correlation": 0.004608687479048967,
+    "decoder_max_correlation": 1.0000009536743164,
+    "decoder_correlation_std": 0.057089705020189285
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_0/standard_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_0/standard_eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..e13ad8fbdd77b093e414a3d21de325c6b1bd2e63
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_0/standard_eval_results.json
@@ -0,0 +1,14 @@
+{
+    "l2_loss": 5.521578305959702,
+    "l1_loss": 44.27927327156067,
+    "l0": 32.0,
+    "frac_variance_explained": 0.8718812368810177,
+    "cossim": 0.9284569166600705,
+    "l2_ratio": 0.9278947010636329,
+    "relative_reconstruction_bias": 1.0000686429440975,
+    "loss_original": 5.4671875,
+    "loss_reconstructed": 6.01953125,
+    "loss_zero": 10.740625,
+    "frac_recovered": 0.89345703125,
+    "frac_alive": 1.0
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_1/config.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_1/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..eb63b7f1d6f4849a7cc66285320a36e652b9e3a9
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_1/config.json
@@ -0,0 +1,46 @@
+{
+    "trainer": {
+        "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/seed-1_trainer_1",
+        "trainer_class": "TopKTrainer",
+        "dict_class": "AutoEncoderTopK",
+        "lr": 0.0003,
+        "steps": 14648,
+        "auxk_alpha": 0.03125,
+        "diversity_scale": 0.0,
+        "diversity_type": null,
+        "warmup_steps": 1000,
+        "decay_start": 1200,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "seed": 1,
+        "activation_dim": 512,
+        "dict_size": 2048,
+        "k": 32,
+        "device": "cuda:0",
+        "layer": 4,
+        "lm_name": "EleutherAI/pythia-70m-deduped",
+        "submodule_name": "resid_post_layer_4",
+        "random_mask_bit_ratio": 0.0,
+        "random_mask_bit_ratio_force_topk": false,
+        "random_mask_bit_original_scale": 1.0,
+        "random_mask_bit_pos": "input",
+        "random_mask_bit_reconstruction_target": "original",
+        "nonlinear_sae_loss_scale": 0.0,
+        "nonlinear_sae_input": "original",
+        "nonlinear_sae_target": "residual",
+        "nonlinear_use_encoder_mlp": true,
+        "nonlinear_use_decoder_mlp": false,
+        "nonlinear_topk_k": -1,
+        "nonlinear_block_gradients": false,
+        "meta_sae_loss_scale": 0.0
+    },
+    "buffer": {
+        "d_submodule": 512,
+        "io": "out",
+        "n_ctxs": 2048,
+        "ctx_len": 128,
+        "refresh_batch_size": 24,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_1/self_feature_space_diversity_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_1/self_feature_space_diversity_eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..15861e9c6d682d8c2b29c7d03157fb60867fe3bf
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_1/self_feature_space_diversity_eval_results.json
@@ -0,0 +1,26 @@
+{
+    "intrinsic_dim_0.8": 1145,
+    "intrinsic_dim_0.9": 1493,
+    "intrinsic_dim_0.95": 1714,
+    "effective_rank": 1391.723876953125,
+    "feature_sparsity": 0.984375,
+    "mean_correlation": 0.009598391130566597,
+    "max_correlation": 1.0000014305114746,
+    "correlation_std": 0.03441373631358147,
+    "decoder_coactive_mean_sim": 0.013000169768929482,
+    "decoder_coactive_max_sim": 0.5189113616943359,
+    "decoder_coactive_std_sim": 0.04145849123597145,
+    "encoder_coactive_mean_sim": 0.013387808576226234,
+    "encoder_coactive_max_sim": 0.46838805079460144,
+    "encoder_coactive_std_sim": 0.03152437135577202,
+    "decoder_per_sample_mean_sim": 0.013000166974961758,
+    "decoder_per_sample_max_sim": 0.29648900032043457,
+    "encoder_per_sample_mean_sim": 0.013387808576226234,
+    "encoder_per_sample_max_sim": 0.1819629967212677,
+    "encoder_mean_correlation": 0.0035003535449504852,
+    "encoder_max_correlation": 1.000000238418579,
+    "encoder_correlation_std": 0.0581950768828392,
+    "decoder_mean_correlation": 0.004728738218545914,
+    "decoder_max_correlation": 1.0000011920928955,
+    "decoder_correlation_std": 0.05676833167672157
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_1/standard_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_1/standard_eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..bc3fb16ff15804d07ba83905db8f02252f847dd1
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_1/standard_eval_results.json
@@ -0,0 +1,14 @@
+{
+    "l2_loss": 5.493244290351868,
+    "l1_loss": 43.91451568603516,
+    "l0": 32.0,
+    "frac_variance_explained": 0.8692285768687725,
+    "cossim": 0.9287988729774952,
+    "l2_ratio": 0.9284325882792472,
+    "relative_reconstruction_bias": 1.0001945979893208,
+    "loss_original": 5.4671875,
+    "loss_reconstructed": 6.02265625,
+    "loss_zero": 10.740625,
+    "frac_recovered": 0.892724609375,
+    "frac_alive": 1.0
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_2/config.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_2/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..b837f7d1bb57b3158b4ab465602c137124e26046
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_2/config.json
@@ -0,0 +1,46 @@
+{
+    "trainer": {
+        "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/seed-2_trainer_2",
+        "trainer_class": "TopKTrainer",
+        "dict_class": "AutoEncoderTopK",
+        "lr": 0.0003,
+        "steps": 14648,
+        "auxk_alpha": 0.03125,
+        "diversity_scale": 0.0,
+        "diversity_type": null,
+        "warmup_steps": 1000,
+        "decay_start": 1200,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "seed": 2,
+        "activation_dim": 512,
+        "dict_size": 2048,
+        "k": 32,
+        "device": "cuda:0",
+        "layer": 4,
+        "lm_name": "EleutherAI/pythia-70m-deduped",
+        "submodule_name": "resid_post_layer_4",
+        "random_mask_bit_ratio": 0.0,
+        "random_mask_bit_ratio_force_topk": false,
+        "random_mask_bit_original_scale": 1.0,
+        "random_mask_bit_pos": "input",
+        "random_mask_bit_reconstruction_target": "original",
+        "nonlinear_sae_loss_scale": 0.0,
+        "nonlinear_sae_input": "original",
+        "nonlinear_sae_target": "residual",
+        "nonlinear_use_encoder_mlp": true,
+        "nonlinear_use_decoder_mlp": false,
+        "nonlinear_topk_k": -1,
+        "nonlinear_block_gradients": false,
+        "meta_sae_loss_scale": 0.0
+    },
+    "buffer": {
+        "d_submodule": 512,
+        "io": "out",
+        "n_ctxs": 2048,
+        "ctx_len": 128,
+        "refresh_batch_size": 24,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_2/self_feature_space_diversity_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_2/self_feature_space_diversity_eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..29b5977f8e58e6106a48bdbdb376f1be5ae403b8
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_2/self_feature_space_diversity_eval_results.json
@@ -0,0 +1,26 @@
+{
+    "intrinsic_dim_0.8": 1147,
+    "intrinsic_dim_0.9": 1497,
+    "intrinsic_dim_0.95": 1720,
+    "effective_rank": 1386.2728271484375,
+    "feature_sparsity": 0.984375,
+    "mean_correlation": 0.009658349677920341,
+    "max_correlation": 1.0000011920928955,
+    "correlation_std": 0.034718144685029984,
+    "decoder_coactive_mean_sim": 0.013472042046487331,
+    "decoder_coactive_max_sim": 0.6248090863227844,
+    "decoder_coactive_std_sim": 0.042402926832437515,
+    "encoder_coactive_mean_sim": 0.013234490528702736,
+    "encoder_coactive_max_sim": 0.48452436923980713,
+    "encoder_coactive_std_sim": 0.03160259500145912,
+    "decoder_per_sample_mean_sim": 0.013472042977809906,
+    "decoder_per_sample_max_sim": 0.295926034450531,
+    "encoder_per_sample_mean_sim": 0.013234490528702736,
+    "encoder_per_sample_max_sim": 0.1905398666858673,
+    "encoder_mean_correlation": 0.003325967350974679,
+    "encoder_max_correlation": 1.0000003576278687,
+    "encoder_correlation_std": 0.057232871651649475,
+    "decoder_mean_correlation": 0.004680304788053036,
+    "decoder_max_correlation": 1.0000011920928955,
+    "decoder_correlation_std": 0.057123977690935135
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_2/standard_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_2/standard_eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..f9e478a111cc4e1203ee9ecbd214294e0644cb76
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_2/standard_eval_results.json
@@ -0,0 +1,14 @@
+{
+    "l2_loss": 5.518608373403549,
+    "l1_loss": 43.638191032409665,
+    "l0": 32.0,
+    "frac_variance_explained": 0.8637508787214756,
+    "cossim": 0.9281533844769001,
+    "l2_ratio": 0.9278098031878471,
+    "relative_reconstruction_bias": 1.0003917694091797,
+    "loss_original": 5.4671875,
+    "loss_reconstructed": 6.020703125,
+    "loss_zero": 10.740625,
+    "frac_recovered": 0.892822265625,
+    "frac_alive": 1.0
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_3/config.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_3/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..3e20a5277c2d3a2bdcf42d41a2ab9cc877654221
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_3/config.json
@@ -0,0 +1,46 @@
+{
+    "trainer": {
+        "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/seed-3_trainer_3",
+        "trainer_class": "TopKTrainer",
+        "dict_class": "AutoEncoderTopK",
+        "lr": 0.0003,
+        "steps": 14648,
+        "auxk_alpha": 0.03125,
+        "diversity_scale": 0.0,
+        "diversity_type": null,
+        "warmup_steps": 1000,
+        "decay_start": 1200,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "seed": 3,
+        "activation_dim": 512,
+        "dict_size": 2048,
+        "k": 32,
+        "device": "cuda:0",
+        "layer": 4,
+        "lm_name": "EleutherAI/pythia-70m-deduped",
+        "submodule_name": "resid_post_layer_4",
+        "random_mask_bit_ratio": 0.0,
+        "random_mask_bit_ratio_force_topk": false,
+        "random_mask_bit_original_scale": 1.0,
+        "random_mask_bit_pos": "input",
+        "random_mask_bit_reconstruction_target": "original",
+        "nonlinear_sae_loss_scale": 0.0,
+        "nonlinear_sae_input": "original",
+        "nonlinear_sae_target": "residual",
+        "nonlinear_use_encoder_mlp": true,
+        "nonlinear_use_decoder_mlp": false,
+        "nonlinear_topk_k": -1,
+        "nonlinear_block_gradients": false,
+        "meta_sae_loss_scale": 0.0
+    },
+    "buffer": {
+        "d_submodule": 512,
+        "io": "out",
+        "n_ctxs": 2048,
+        "ctx_len": 128,
+        "refresh_batch_size": 24,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_3/self_feature_space_diversity_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_3/self_feature_space_diversity_eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..3d18b526973ae1f3f97daf5d0e97290d3c2ec8e5
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_3/self_feature_space_diversity_eval_results.json
@@ -0,0 +1,26 @@
+{
+    "intrinsic_dim_0.8": 1146,
+    "intrinsic_dim_0.9": 1496,
+    "intrinsic_dim_0.95": 1719,
+    "effective_rank": 1397.719970703125,
+    "feature_sparsity": 0.984375,
+    "mean_correlation": 0.009594940580427647,
+    "max_correlation": 1.0000015497207642,
+    "correlation_std": 0.034256912767887115,
+    "decoder_coactive_mean_sim": 0.013142098672688007,
+    "decoder_coactive_max_sim": 0.4960811734199524,
+    "decoder_coactive_std_sim": 0.0417519137263298,
+    "encoder_coactive_mean_sim": 0.013556867837905884,
+    "encoder_coactive_max_sim": 0.4828951954841614,
+    "encoder_coactive_std_sim": 0.0318557471036911,
+    "decoder_per_sample_mean_sim": 0.013142098672688007,
+    "decoder_per_sample_max_sim": 0.3047160804271698,
+    "encoder_per_sample_mean_sim": 0.013556867837905884,
+    "encoder_per_sample_max_sim": 0.1878674179315567,
+    "encoder_mean_correlation": 0.003432024270296097,
+    "encoder_max_correlation": 1.000000238418579,
+    "encoder_correlation_std": 0.058941394090652466,
+    "decoder_mean_correlation": 0.004725632257759571,
+    "decoder_max_correlation": 1.0000011920928955,
+    "decoder_correlation_std": 0.05638653412461281
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_3/standard_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_3/standard_eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..37bffbd97434e8406aa2628e70241a2be385ba00
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_3/standard_eval_results.json
@@ -0,0 +1,14 @@
+{
+    "l2_loss": 5.497262263298035,
+    "l1_loss": 43.95940580368042,
+    "l0": 32.0,
+    "frac_variance_explained": 0.873967283219099,
+    "cossim": 0.9286889567971229,
+    "l2_ratio": 0.9278851471841335,
+    "relative_reconstruction_bias": 0.99990846067667,
+    "loss_original": 5.4671875,
+    "loss_reconstructed": 6.01796875,
+    "loss_zero": 10.740625,
+    "frac_recovered": 0.893505859375,
+    "frac_alive": 1.0
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_4/config.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_4/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..b64b823442cb08a2f845f88bb0a6c6764b6164ca
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_4/config.json
@@ -0,0 +1,46 @@
+{
+    "trainer": {
+        "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/seed-4_trainer_4",
+        "trainer_class": "TopKTrainer",
+        "dict_class": "AutoEncoderTopK",
+        "lr": 0.0003,
+        "steps": 14648,
+        "auxk_alpha": 0.03125,
+        "diversity_scale": 0.0,
+        "diversity_type": null,
+        "warmup_steps": 1000,
+        "decay_start": 1200,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "seed": 4,
+        "activation_dim": 512,
+        "dict_size": 2048,
+        "k": 32,
+        "device": "cuda:0",
+        "layer": 4,
+        "lm_name": "EleutherAI/pythia-70m-deduped",
+        "submodule_name": "resid_post_layer_4",
+        "random_mask_bit_ratio": 0.0,
+        "random_mask_bit_ratio_force_topk": false,
+        "random_mask_bit_original_scale": 1.0,
+        "random_mask_bit_pos": "input",
+        "random_mask_bit_reconstruction_target": "original",
+        "nonlinear_sae_loss_scale": 0.0,
+        "nonlinear_sae_input": "original",
+        "nonlinear_sae_target": "residual",
+        "nonlinear_use_encoder_mlp": true,
+        "nonlinear_use_decoder_mlp": false,
+        "nonlinear_topk_k": -1,
+        "nonlinear_block_gradients": false,
+        "meta_sae_loss_scale": 0.0
+    },
+    "buffer": {
+        "d_submodule": 512,
+        "io": "out",
+        "n_ctxs": 2048,
+        "ctx_len": 128,
+        "refresh_batch_size": 24,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_4/self_feature_space_diversity_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_4/self_feature_space_diversity_eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..b31bd2ccf2e5fe86c204628aae0ad462a1cdae9a
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_4/self_feature_space_diversity_eval_results.json
@@ -0,0 +1,26 @@
+{
+    "intrinsic_dim_0.8": 1143,
+    "intrinsic_dim_0.9": 1494,
+    "intrinsic_dim_0.95": 1717,
+    "effective_rank": 1369.8779296875,
+    "feature_sparsity": 0.984375,
+    "mean_correlation": 0.009484217502176762,
+    "max_correlation": 1.0000014305114746,
+    "correlation_std": 0.0345107764005661,
+    "decoder_coactive_mean_sim": 0.012512738816440105,
+    "decoder_coactive_max_sim": 0.6438488960266113,
+    "decoder_coactive_std_sim": 0.04079408198595047,
+    "encoder_coactive_mean_sim": 0.012626885436475277,
+    "encoder_coactive_max_sim": 0.5188671946525574,
+    "encoder_coactive_std_sim": 0.031891077756881714,
+    "decoder_per_sample_mean_sim": 0.01251273788511753,
+    "decoder_per_sample_max_sim": 0.2933551073074341,
+    "encoder_per_sample_mean_sim": 0.012626885436475277,
+    "encoder_per_sample_max_sim": 0.19583337008953094,
+    "encoder_mean_correlation": 0.003148031421005726,
+    "encoder_max_correlation": 1.0000003576278687,
+    "encoder_correlation_std": 0.05692300200462341,
+    "decoder_mean_correlation": 0.004536854103207588,
+    "decoder_max_correlation": 1.0000011920928955,
+    "decoder_correlation_std": 0.05627986416220665
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_4/standard_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_4/standard_eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..70fa6c5d07a7305048d9cb06369731113f937e06
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_4/standard_eval_results.json
@@ -0,0 +1,14 @@
+{
+    "l2_loss": 5.485749912261963,
+    "l1_loss": 44.28157253265381,
+    "l0": 32.0,
+    "frac_variance_explained": 0.867275919765234,
+    "cossim": 0.9295805610716343,
+    "l2_ratio": 0.9290113553404808,
+    "relative_reconstruction_bias": 1.0001047268509864,
+    "loss_original": 5.4671875,
+    "loss_reconstructed": 6.025390625,
+    "loss_zero": 10.740625,
+    "frac_recovered": 0.8919921875,
+    "frac_alive": 1.0
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-32768/trainer_0/config.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-32768/trainer_0/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..23d329d9972c47592cf4c8fb98e33510c4f69194
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-32768/trainer_0/config.json
@@ -0,0 +1,34 @@
+{
+    "trainer": {
+        "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-32768/seed-0_trainer_0",
+        "trainer_class": "TopKTrainer",
+        "dict_class": "AutoEncoderTopK",
+        "lr": 0.0003,
+        "steps": 14648,
+        "auxk_alpha": 0.03125,
+        "diversity_scale": 0.0,
+        "diversity_type": null,
+        "warmup_steps": 1000,
+        "decay_start": 1200,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "seed": 0,
+        "activation_dim": 512,
+        "dict_size": 32768,
+        "k": 32,
+        "device": "cuda:0",
+        "layer": 4,
+        "lm_name": "EleutherAI/pythia-70m-deduped",
+        "submodule_name": "resid_post_layer_4",
+        "random_mask_bit_ratio": 0.0
+    },
+    "buffer": {
+        "d_submodule": 512,
+        "io": "out",
+        "n_ctxs": 2048,
+        "ctx_len": 128,
+        "refresh_batch_size": 24,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-32768/trainer_1/config.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-32768/trainer_1/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..5e733c5fd5aa1883f5c9178bc2d915282b3a6509
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-32768/trainer_1/config.json
@@ -0,0 +1,34 @@
+{
+    "trainer": {
+        "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-32768/seed-1_trainer_1",
+        "trainer_class": "TopKTrainer",
+        "dict_class": "AutoEncoderTopK",
+        "lr": 0.0003,
+        "steps": 14648,
+        "auxk_alpha": 0.03125,
+        "diversity_scale": 0.0,
+        "diversity_type": null,
+        "warmup_steps": 1000,
+        "decay_start": 1200,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "seed": 1,
+        "activation_dim": 512,
+        "dict_size": 32768,
+        "k": 32,
+        "device": "cuda:0",
+        "layer": 4,
+        "lm_name": "EleutherAI/pythia-70m-deduped",
+        "submodule_name": "resid_post_layer_4",
+        "random_mask_bit_ratio": 0.0
+    },
+    "buffer": {
+        "d_submodule": 512,
+        "io": "out",
+        "n_ctxs": 2048,
+        "ctx_len": 128,
+        "refresh_batch_size": 24,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-32768/trainer_2/config.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-32768/trainer_2/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..4ec0e1fae0690463cf004713d4d6a626976eb934
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-32768/trainer_2/config.json
@@ -0,0 +1,34 @@
+{
+    "trainer": {
+        "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-32768/seed-2_trainer_2",
+        "trainer_class": "TopKTrainer",
+        "dict_class": "AutoEncoderTopK",
+        "lr": 0.0003,
+        "steps": 14648,
+        "auxk_alpha": 0.03125,
+        "diversity_scale": 0.0,
+        "diversity_type": null,
+        "warmup_steps": 1000,
+        "decay_start": 1200,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "seed": 2,
+        "activation_dim": 512,
+        "dict_size": 32768,
+        "k": 32,
+        "device": "cuda:0",
+        "layer": 4,
+        "lm_name": "EleutherAI/pythia-70m-deduped",
+        "submodule_name": "resid_post_layer_4",
+        "random_mask_bit_ratio": 0.0
+    },
+    "buffer": {
+        "d_submodule": 512,
+        "io": "out",
+        "n_ctxs": 2048,
+        "ctx_len": 128,
+        "refresh_batch_size": 24,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-32768/trainer_3/config.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-32768/trainer_3/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..3c5426e9b36f66be7795d20c74e3fb636892aca7
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-32768/trainer_3/config.json
@@ -0,0 +1,34 @@
+{
+    "trainer": {
+        "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-32768/seed-3_trainer_3",
+        "trainer_class": "TopKTrainer",
+        "dict_class": "AutoEncoderTopK",
+        "lr": 0.0003,
+        "steps": 14648,
+        "auxk_alpha": 0.03125,
+        "diversity_scale": 0.0,
+        "diversity_type": null,
+        "warmup_steps": 1000,
+        "decay_start": 1200,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "seed": 3,
+        "activation_dim": 512,
+        "dict_size": 32768,
+        "k": 32,
+        "device": "cuda:0",
+        "layer": 4,
+        "lm_name": "EleutherAI/pythia-70m-deduped",
+        "submodule_name": "resid_post_layer_4",
+        "random_mask_bit_ratio": 0.0
+    },
+    "buffer": {
+        "d_submodule": 512,
+        "io": "out",
+        "n_ctxs": 2048,
+        "ctx_len": 128,
+        "refresh_batch_size": 24,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-32768/trainer_4/config.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-32768/trainer_4/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..67f7a2bf041023f6321ba4c09b9806e67617f016
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-32768/trainer_4/config.json
@@ -0,0 +1,34 @@
+{
+    "trainer": {
+        "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-32768/seed-4_trainer_4",
+        "trainer_class": "TopKTrainer",
+        "dict_class": "AutoEncoderTopK",
+        "lr": 0.0003,
+        "steps": 14648,
+        "auxk_alpha": 0.03125,
+        "diversity_scale": 0.0,
+        "diversity_type": null,
+        "warmup_steps": 1000,
+        "decay_start": 1200,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "seed": 4,
+        "activation_dim": 512,
+        "dict_size": 32768,
+        "k": 32,
+        "device": "cuda:0",
+        "layer": 4,
+        "lm_name": "EleutherAI/pythia-70m-deduped",
+        "submodule_name": "resid_post_layer_4",
+        "random_mask_bit_ratio": 0.0
+    },
+    "buffer": {
+        "d_submodule": 512,
+        "io": "out",
+        "n_ctxs": 2048,
+        "ctx_len": 128,
+        "refresh_batch_size": 24,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_0/config.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_0/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..2c77b2e10a21b573b3193abd091af2e53eab9a5d
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_0/config.json
@@ -0,0 +1,46 @@
+{
+    "trainer": {
+        "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/seed-0_trainer_0",
+        "trainer_class": "TopKTrainer",
+        "dict_class": "AutoEncoderTopK",
+        "lr": 0.0003,
+        "steps": 14648,
+        "auxk_alpha": 0.03125,
+        "diversity_scale": 0.0,
+        "diversity_type": null,
+        "warmup_steps": 1000,
+        "decay_start": 1200,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "seed": 0,
+        "activation_dim": 512,
+        "dict_size": 2048,
+        "k": 512,
+        "device": "cuda:0",
+        "layer": 4,
+        "lm_name": "EleutherAI/pythia-70m-deduped",
+        "submodule_name": "resid_post_layer_4",
+        "random_mask_bit_ratio": 0.0,
+        "random_mask_bit_ratio_force_topk": false,
+        "random_mask_bit_original_scale": 1.0,
+        "random_mask_bit_pos": "input",
+        "random_mask_bit_reconstruction_target": "original",
+        "nonlinear_sae_loss_scale": 0.0,
+        "nonlinear_sae_input": "original",
+        "nonlinear_sae_target": "residual",
+        "nonlinear_use_encoder_mlp": true,
+        "nonlinear_use_decoder_mlp": false,
+        "nonlinear_topk_k": -1,
+        "nonlinear_block_gradients": false,
+        "meta_sae_loss_scale": 0.0
+    },
+    "buffer": {
+        "d_submodule": 512,
+        "io": "out",
+        "n_ctxs": 2048,
+        "ctx_len": 128,
+        "refresh_batch_size": 24,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_0/self_feature_space_diversity_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_0/self_feature_space_diversity_eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..43319a64b2ddb3be81e4181604c7e82bf73d8648
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_0/self_feature_space_diversity_eval_results.json
@@ -0,0 +1,26 @@
+{
+    "intrinsic_dim_0.8": 346,
+    "intrinsic_dim_0.9": 437,
+    "intrinsic_dim_0.95": 489,
+    "effective_rank": 321.3466796875,
+    "feature_sparsity": 0.7501027584075928,
+    "mean_correlation": 0.11567842215299606,
+    "max_correlation": 1.000004768371582,
+    "correlation_std": 0.2895612120628357,
+    "decoder_coactive_mean_sim": -0.0009108898229897022,
+    "decoder_coactive_max_sim": 0.2942618727684021,
+    "decoder_coactive_std_sim": 0.01467378344386816,
+    "encoder_coactive_mean_sim": 0.04503903165459633,
+    "encoder_coactive_max_sim": 0.36706337332725525,
+    "encoder_coactive_std_sim": 0.05137478560209274,
+    "decoder_per_sample_mean_sim": -0.0009108897647820413,
+    "decoder_per_sample_max_sim": 0.10512230545282364,
+    "encoder_per_sample_mean_sim": 0.04503902792930603,
+    "encoder_per_sample_max_sim": 0.2558523714542389,
+    "encoder_mean_correlation": 0.004667331930249929,
+    "encoder_max_correlation": 1.000000238418579,
+    "encoder_correlation_std": 0.11842181533575058,
+    "decoder_mean_correlation": 0.00027410718030296266,
+    "decoder_max_correlation": 1.000001072883606,
+    "decoder_correlation_std": 0.04888004809617996
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_0/standard_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_0/standard_eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..1979f91f7f45af38cceff8d4d948f9c5ecb3b984
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_0/standard_eval_results.json
@@ -0,0 +1,14 @@
+{
+    "l2_loss": 0.1218978282995522,
+    "l1_loss": 803.34478225708,
+    "l0": 511.82421875,
+    "frac_variance_explained": 0.9998174428939819,
+    "cossim": 1.0000896871089935,
+    "l2_ratio": 0.9999499209225178,
+    "relative_reconstruction_bias": 1.0000311397016048,
+    "loss_original": 5.4671875,
+    "loss_reconstructed": 5.46796875,
+    "loss_zero": 10.740625,
+    "frac_recovered": 0.99990234375,
+    "frac_alive": 0.58154296875
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_1/config.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_1/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..546245429d200340a0d80bb495f39f43b0c5771a
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_1/config.json
@@ -0,0 +1,46 @@
+{
+    "trainer": {
+        "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/seed-1_trainer_1",
+        "trainer_class": "TopKTrainer",
+        "dict_class": "AutoEncoderTopK",
+        "lr": 0.0003,
+        "steps": 14648,
+        "auxk_alpha": 0.03125,
+        "diversity_scale": 0.0,
+        "diversity_type": null,
+        "warmup_steps": 1000,
+        "decay_start": 1200,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "seed": 1,
+        "activation_dim": 512,
+        "dict_size": 2048,
+        "k": 512,
+        "device": "cuda:0",
+        "layer": 4,
+        "lm_name": "EleutherAI/pythia-70m-deduped",
+        "submodule_name": "resid_post_layer_4",
+        "random_mask_bit_ratio": 0.0,
+        "random_mask_bit_ratio_force_topk": false,
+        "random_mask_bit_original_scale": 1.0,
+        "random_mask_bit_pos": "input",
+        "random_mask_bit_reconstruction_target": "original",
+        "nonlinear_sae_loss_scale": 0.0,
+        "nonlinear_sae_input": "original",
+        "nonlinear_sae_target": "residual",
+        "nonlinear_use_encoder_mlp": true,
+        "nonlinear_use_decoder_mlp": false,
+        "nonlinear_topk_k": -1,
+        "nonlinear_block_gradients": false,
+        "meta_sae_loss_scale": 0.0
+    },
+    "buffer": {
+        "d_submodule": 512,
+        "io": "out",
+        "n_ctxs": 2048,
+        "ctx_len": 128,
+        "refresh_batch_size": 24,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_1/self_feature_space_diversity_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_1/self_feature_space_diversity_eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..5c85a9ac8ee036821330ae794edc40625fbfdaca
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_1/self_feature_space_diversity_eval_results.json
@@ -0,0 +1,26 @@
+{
+    "intrinsic_dim_0.8": 342,
+    "intrinsic_dim_0.9": 433,
+    "intrinsic_dim_0.95": 486,
+    "effective_rank": 302.5154724121094,
+    "feature_sparsity": 0.7500362992286682,
+    "mean_correlation": 0.11686287820339203,
+    "max_correlation": 1.000004768371582,
+    "correlation_std": 0.29108867049217224,
+    "decoder_coactive_mean_sim": -0.0009243678650818765,
+    "decoder_coactive_max_sim": 0.21814467012882233,
+    "decoder_coactive_std_sim": 0.0147458016872406,
+    "encoder_coactive_mean_sim": 0.051302291452884674,
+    "encoder_coactive_max_sim": 0.4465314447879791,
+    "encoder_coactive_std_sim": 0.056279055774211884,
+    "decoder_per_sample_mean_sim": -0.0009243678650818765,
+    "decoder_per_sample_max_sim": 0.09590303897857666,
+    "encoder_per_sample_mean_sim": 0.051302291452884674,
+    "encoder_per_sample_max_sim": 0.2655414640903473,
+    "encoder_mean_correlation": 0.005481375381350517,
+    "encoder_max_correlation": 1.000000238418579,
+    "encoder_correlation_std": 0.12337620556354523,
+    "decoder_mean_correlation": 0.0002578080748207867,
+    "decoder_max_correlation": 1.000001072883606,
+    "decoder_correlation_std": 0.048913467675447464
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_1/standard_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_1/standard_eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..9071cf61e6f366a11338af73136e2bf4929da25f
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_1/standard_eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 0.03437363065313548, "l1_loss": 898.7996040344239, "l0": 511.90654296875, "frac_variance_explained": 0.9998641557991504, "cossim": 1.0001215882599355, "l2_ratio": 0.9999546416103839, "relative_reconstruction_bias": 0.9999443709850311, "loss_original": 5.4671875, "loss_reconstructed": 5.466015625, "loss_zero": 10.740625, "frac_recovered": 1.000244140625, "frac_alive": 0.59033203125}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_2/config.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_2/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..7372e329c335f0341358b584e56afcfc846005ab
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_2/config.json
@@ -0,0 +1,46 @@
+{
+    "trainer": {
+        "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/seed-2_trainer_2",
+        "trainer_class": "TopKTrainer",
+        "dict_class": "AutoEncoderTopK",
+        "lr": 0.0003,
+        "steps": 14648,
+        "auxk_alpha": 0.03125,
+        "diversity_scale": 0.0,
+        "diversity_type": null,
+        "warmup_steps": 1000,
+        "decay_start": 1200,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "seed": 2,
+        "activation_dim": 512,
+        "dict_size": 2048,
+        "k": 512,
+        "device": "cuda:0",
+        "layer": 4,
+        "lm_name": "EleutherAI/pythia-70m-deduped",
+        "submodule_name": "resid_post_layer_4",
+        "random_mask_bit_ratio": 0.0,
+        "random_mask_bit_ratio_force_topk": false,
+        "random_mask_bit_original_scale": 1.0,
+        "random_mask_bit_pos": "input",
+        "random_mask_bit_reconstruction_target": "original",
+        "nonlinear_sae_loss_scale": 0.0,
+        "nonlinear_sae_input": "original",
+        "nonlinear_sae_target": "residual",
+        "nonlinear_use_encoder_mlp": true,
+        "nonlinear_use_decoder_mlp": false,
+        "nonlinear_topk_k": -1,
+        "nonlinear_block_gradients": false,
+        "meta_sae_loss_scale": 0.0
+    },
+    "buffer": {
+        "d_submodule": 512,
+        "io": "out",
+        "n_ctxs": 2048,
+        "ctx_len": 128,
+        "refresh_batch_size": 24,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_2/self_feature_space_diversity_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_2/self_feature_space_diversity_eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..76da297de144df840694d3ccb99e8bb29ec2f11e
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_2/self_feature_space_diversity_eval_results.json
@@ -0,0 +1,26 @@
+{
+    "intrinsic_dim_0.8": 342,
+    "intrinsic_dim_0.9": 434,
+    "intrinsic_dim_0.95": 487,
+    "effective_rank": 297.5074462890625,
+    "feature_sparsity": 0.7500617504119873,
+    "mean_correlation": 0.11699061840772629,
+    "max_correlation": 1.0000042915344238,
+    "correlation_std": 0.29189395904541016,
+    "decoder_coactive_mean_sim": -0.0009281504317186773,
+    "decoder_coactive_max_sim": 0.2474658489227295,
+    "decoder_coactive_std_sim": 0.014783051796257496,
+    "encoder_coactive_mean_sim": 0.05288249999284744,
+    "encoder_coactive_max_sim": 0.3966051936149597,
+    "encoder_coactive_std_sim": 0.057189274579286575,
+    "decoder_per_sample_mean_sim": -0.0009281504899263382,
+    "decoder_per_sample_max_sim": 0.10155373066663742,
+    "encoder_per_sample_mean_sim": 0.05288250371813774,
+    "encoder_per_sample_max_sim": 0.27083733677864075,
+    "encoder_mean_correlation": 0.003606635145843029,
+    "encoder_max_correlation": 1.000000238418579,
+    "encoder_correlation_std": 0.12434981763362885,
+    "decoder_mean_correlation": 0.00025882525369524956,
+    "decoder_max_correlation": 1.0000009536743164,
+    "decoder_correlation_std": 0.04886815324425697
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_2/standard_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_2/standard_eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..1b5e04b2ee6080c6ac77ba1c02d6a13a9cda7749
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_2/standard_eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 0.025140870863106103, "l1_loss": 931.2091484069824, "l0": 511.9453125, "frac_variance_explained": 0.9999042950570584, "cossim": 1.0001301787793637, "l2_ratio": 0.9999769635498523, "relative_reconstruction_bias": 1.000041725486517, "loss_original": 5.4671875, "loss_reconstructed": 5.472265625, "loss_zero": 10.740625, "frac_recovered": 0.99892578125, "frac_alive": 0.58544921875}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_3/config.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_3/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..32e95c5b43788cf409645d97d954e19276d63797
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_3/config.json
@@ -0,0 +1,46 @@
+{
+    "trainer": {
+        "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/seed-3_trainer_3",
+        "trainer_class": "TopKTrainer",
+        "dict_class": "AutoEncoderTopK",
+        "lr": 0.0003,
+        "steps": 14648,
+        "auxk_alpha": 0.03125,
+        "diversity_scale": 0.0,
+        "diversity_type": null,
+        "warmup_steps": 1000,
+        "decay_start": 1200,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "seed": 3,
+        "activation_dim": 512,
+        "dict_size": 2048,
+        "k": 512,
+        "device": "cuda:0",
+        "layer": 4,
+        "lm_name": "EleutherAI/pythia-70m-deduped",
+        "submodule_name": "resid_post_layer_4",
+        "random_mask_bit_ratio": 0.0,
+        "random_mask_bit_ratio_force_topk": false,
+        "random_mask_bit_original_scale": 1.0,
+        "random_mask_bit_pos": "input",
+        "random_mask_bit_reconstruction_target": "original",
+        "nonlinear_sae_loss_scale": 0.0,
+        "nonlinear_sae_input": "original",
+        "nonlinear_sae_target": "residual",
+        "nonlinear_use_encoder_mlp": true,
+        "nonlinear_use_decoder_mlp": false,
+        "nonlinear_topk_k": -1,
+        "nonlinear_block_gradients": false,
+        "meta_sae_loss_scale": 0.0
+    },
+    "buffer": {
+        "d_submodule": 512,
+        "io": "out",
+        "n_ctxs": 2048,
+        "ctx_len": 128,
+        "refresh_batch_size": 24,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_3/self_feature_space_diversity_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_3/self_feature_space_diversity_eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..d07863b1156c45a6fa4d45daa59e871c59ef19b2
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_3/self_feature_space_diversity_eval_results.json
@@ -0,0 +1,26 @@
+{
+    "intrinsic_dim_0.8": 345,
+    "intrinsic_dim_0.9": 434,
+    "intrinsic_dim_0.95": 486,
+    "effective_rank": 317.96185302734375,
+    "feature_sparsity": 0.7500829100608826,
+    "mean_correlation": 0.11769045889377594,
+    "max_correlation": 1.0000042915344238,
+    "correlation_std": 0.2899561822414398,
+    "decoder_coactive_mean_sim": -0.0009198148618452251,
+    "decoder_coactive_max_sim": 0.24994757771492004,
+    "decoder_coactive_std_sim": 0.014731859788298607,
+    "encoder_coactive_mean_sim": 0.04878106713294983,
+    "encoder_coactive_max_sim": 0.4278787076473236,
+    "encoder_coactive_std_sim": 0.054398663341999054,
+    "decoder_per_sample_mean_sim": -0.0009198148618452251,
+    "decoder_per_sample_max_sim": 0.10858472436666489,
+    "encoder_per_sample_mean_sim": 0.04878106713294983,
+    "encoder_per_sample_max_sim": 0.3049105703830719,
+    "encoder_mean_correlation": 0.004845459014177322,
+    "encoder_max_correlation": 1.000000238418579,
+    "encoder_correlation_std": 0.12293191999197006,
+    "decoder_mean_correlation": 0.0002943026483990252,
+    "decoder_max_correlation": 1.000001072883606,
+    "decoder_correlation_std": 0.04887963831424713
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_3/standard_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_3/standard_eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..e4f2664b14e645558e2b2e55b2f7e1593005f0da
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_3/standard_eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 0.11367939515039324, "l1_loss": 810.8659385681152, "l0": 511.80732421875, "frac_variance_explained": 0.9998415939509868, "cossim": 1.0000781424343586, "l2_ratio": 0.9999195456504821, "relative_reconstruction_bias": 0.999939326196909, "loss_original": 5.4671875, "loss_reconstructed": 5.4640625, "loss_zero": 10.740625, "frac_recovered": 1.000732421875, "frac_alive": 0.59228515625}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_4/config.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_4/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..36c4dd70fc6843ba931dde54752c4b4bc917c212
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_4/config.json
@@ -0,0 +1,46 @@
+{
+    "trainer": {
+        "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/seed-4_trainer_4",
+        "trainer_class": "TopKTrainer",
+        "dict_class": "AutoEncoderTopK",
+        "lr": 0.0003,
+        "steps": 14648,
+        "auxk_alpha": 0.03125,
+        "diversity_scale": 0.0,
+        "diversity_type": null,
+        "warmup_steps": 1000,
+        "decay_start": 1200,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "seed": 4,
+        "activation_dim": 512,
+        "dict_size": 2048,
+        "k": 512,
+        "device": "cuda:0",
+        "layer": 4,
+        "lm_name": "EleutherAI/pythia-70m-deduped",
+        "submodule_name": "resid_post_layer_4",
+        "random_mask_bit_ratio": 0.0,
+        "random_mask_bit_ratio_force_topk": false,
+        "random_mask_bit_original_scale": 1.0,
+        "random_mask_bit_pos": "input",
+        "random_mask_bit_reconstruction_target": "original",
+        "nonlinear_sae_loss_scale": 0.0,
+        "nonlinear_sae_input": "original",
+        "nonlinear_sae_target": "residual",
+        "nonlinear_use_encoder_mlp": true,
+        "nonlinear_use_decoder_mlp": false,
+        "nonlinear_topk_k": -1,
+        "nonlinear_block_gradients": false,
+        "meta_sae_loss_scale": 0.0
+    },
+    "buffer": {
+        "d_submodule": 512,
+        "io": "out",
+        "n_ctxs": 2048,
+        "ctx_len": 128,
+        "refresh_batch_size": 24,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_4/self_feature_space_diversity_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_4/self_feature_space_diversity_eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..1871aeb59267daca5eb261498b817353b93b89e8
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_4/self_feature_space_diversity_eval_results.json
@@ -0,0 +1,26 @@
+{
+    "intrinsic_dim_0.8": 343,
+    "intrinsic_dim_0.9": 434,
+    "intrinsic_dim_0.95": 486,
+    "effective_rank": 299.96270751953125,
+    "feature_sparsity": 0.7500380277633667,
+    "mean_correlation": 0.12012023478746414,
+    "max_correlation": 1.0000050067901611,
+    "correlation_std": 0.292927086353302,
+    "decoder_coactive_mean_sim": -0.000933852803427726,
+    "decoder_coactive_max_sim": 0.2453019917011261,
+    "decoder_coactive_std_sim": 0.014877861365675926,
+    "encoder_coactive_mean_sim": 0.05555467680096626,
+    "encoder_coactive_max_sim": 0.34842175245285034,
+    "encoder_coactive_std_sim": 0.058952681720256805,
+    "decoder_per_sample_mean_sim": -0.000933852803427726,
+    "decoder_per_sample_max_sim": 0.09431355446577072,
+    "encoder_per_sample_mean_sim": 0.05555467680096626,
+    "encoder_per_sample_max_sim": 0.24732723832130432,
+    "encoder_mean_correlation": 0.0046387407928705215,
+    "encoder_max_correlation": 1.0000003576278687,
+    "encoder_correlation_std": 0.12495078891515732,
+    "decoder_mean_correlation": 0.00025382271269336343,
+    "decoder_max_correlation": 1.0000011920928955,
+    "decoder_correlation_std": 0.048862189054489136
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_4/standard_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_4/standard_eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..2907d41f5091e04ab39bcd0504bbaba859dcfba8
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_4/standard_eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 0.026735168404411525, "l1_loss": 929.9151672363281, "l0": 511.94140625, "frac_variance_explained": 0.9999176003038883, "cossim": 1.0001217171549797, "l2_ratio": 0.9999563246965408, "relative_reconstruction_bias": 0.9999580040574074, "loss_original": 5.4671875, "loss_reconstructed": 5.468359375, "loss_zero": 10.740625, "frac_recovered": 0.999853515625, "frac_alive": 0.60107421875}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_0/self_feature_space_diversity_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_0/self_feature_space_diversity_eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..378262f03ce912949248d655785bdd2425c0f85e
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_0/self_feature_space_diversity_eval_results.json
@@ -0,0 +1,26 @@
+{
+    "intrinsic_dim_0.8": 1101,
+    "intrinsic_dim_0.9": 1466,
+    "intrinsic_dim_0.95": 1701,
+    "effective_rank": 1336.6378173828125,
+    "feature_sparsity": 0.96875,
+    "mean_correlation": 0.018963707610964775,
+    "max_correlation": 1.0000014305114746,
+    "correlation_std": 0.044405288994312286,
+    "decoder_coactive_mean_sim": 0.005104673095047474,
+    "decoder_coactive_max_sim": 0.38417404890060425,
+    "decoder_coactive_std_sim": 0.02679547853767872,
+    "encoder_coactive_mean_sim": 0.00732428440824151,
+    "encoder_coactive_max_sim": 0.36569666862487793,
+    "encoder_coactive_std_sim": 0.02348160743713379,
+    "decoder_per_sample_mean_sim": 0.005104673095047474,
+    "decoder_per_sample_max_sim": 0.24980509281158447,
+    "encoder_per_sample_mean_sim": 0.00732428440824151,
+    "encoder_per_sample_max_sim": 0.17561672627925873,
+    "encoder_mean_correlation": 0.0016635311767458916,
+    "encoder_max_correlation": 1.000000238418579,
+    "encoder_correlation_std": 0.05483342707157135,
+    "decoder_mean_correlation": 0.0018550025997683406,
+    "decoder_max_correlation": 1.0000009536743164,
+    "decoder_correlation_std": 0.052489809691905975
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_1/config.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_1/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..ef72ba2f051374defdf6d1ce3c09a60da80b2e01
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_1/config.json
@@ -0,0 +1,46 @@
+{
+    "trainer": {
+        "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/seed-1_trainer_1",
+        "trainer_class": "TopKTrainer",
+        "dict_class": "AutoEncoderTopK",
+        "lr": 0.0003,
+        "steps": 14648,
+        "auxk_alpha": 0.03125,
+        "diversity_scale": 0.0,
+        "diversity_type": null,
+        "warmup_steps": 1000,
+        "decay_start": 1200,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "seed": 1,
+        "activation_dim": 512,
+        "dict_size": 2048,
+        "k": 64,
+        "device": "cuda:0",
+        "layer": 4,
+        "lm_name": "EleutherAI/pythia-70m-deduped",
+        "submodule_name": "resid_post_layer_4",
+        "random_mask_bit_ratio": 0.0,
+        "random_mask_bit_ratio_force_topk": false,
+        "random_mask_bit_original_scale": 1.0,
+        "random_mask_bit_pos": "input",
+        "random_mask_bit_reconstruction_target": "original",
+        "nonlinear_sae_loss_scale": 0.0,
+        "nonlinear_sae_input": "original",
+        "nonlinear_sae_target": "residual",
+        "nonlinear_use_encoder_mlp": true,
+        "nonlinear_use_decoder_mlp": false,
+        "nonlinear_topk_k": -1,
+        "nonlinear_block_gradients": false,
+        "meta_sae_loss_scale": 0.0
+    },
+    "buffer": {
+        "d_submodule": 512,
+        "io": "out",
+        "n_ctxs": 2048,
+        "ctx_len": 128,
+        "refresh_batch_size": 24,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_1/self_feature_space_diversity_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_1/self_feature_space_diversity_eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..32d92203690c0c25a2d61b652537cd6941a9a833
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_1/self_feature_space_diversity_eval_results.json
@@ -0,0 +1,26 @@
+{
+    "intrinsic_dim_0.8": 1096,
+    "intrinsic_dim_0.9": 1461,
+    "intrinsic_dim_0.95": 1697,
+    "effective_rank": 1342.979248046875,
+    "feature_sparsity": 0.96875,
+    "mean_correlation": 0.018925022333860397,
+    "max_correlation": 1.000002145767212,
+    "correlation_std": 0.04421866685152054,
+    "decoder_coactive_mean_sim": 0.005001583602279425,
+    "decoder_coactive_max_sim": 0.6809477806091309,
+    "decoder_coactive_std_sim": 0.026375532150268555,
+    "encoder_coactive_mean_sim": 0.006971509661525488,
+    "encoder_coactive_max_sim": 0.5061028003692627,
+    "encoder_coactive_std_sim": 0.0230117067694664,
+    "decoder_per_sample_mean_sim": 0.005001583602279425,
+    "decoder_per_sample_max_sim": 0.27040722966194153,
+    "encoder_per_sample_mean_sim": 0.006971509661525488,
+    "encoder_per_sample_max_sim": 0.20409739017486572,
+    "encoder_mean_correlation": 0.0018320512026548386,
+    "encoder_max_correlation": 1.0000003576278687,
+    "encoder_correlation_std": 0.053943462669849396,
+    "decoder_mean_correlation": 0.0019005772192031145,
+    "decoder_max_correlation": 1.0000011920928955,
+    "decoder_correlation_std": 0.052223801612854004
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_1/standard_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_1/standard_eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..830feecf8f5526b0df6d5aba13ccc2a4b995a59d
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_1/standard_eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 4.780432206392288, "l1_loss": 63.796514415740965, "l0": 64.0, "frac_variance_explained": 0.9086314626038074, "cossim": 0.9469698674976825, "l2_ratio": 0.9466174811124801, "relative_reconstruction_bias": 1.000212862342596, "loss_original": 5.4671875, "loss_reconstructed": 5.86328125, "loss_zero": 10.740625, "frac_recovered": 0.9236328125, "frac_alive": 1.0}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_2/config.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_2/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..77f51fa39225e7eb1fc12781a83caf8eb2502c9d
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_2/config.json
@@ -0,0 +1,46 @@
+{
+    "trainer": {
+        "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/seed-2_trainer_2",
+        "trainer_class": "TopKTrainer",
+        "dict_class": "AutoEncoderTopK",
+        "lr": 0.0003,
+        "steps": 14648,
+        "auxk_alpha": 0.03125,
+        "diversity_scale": 0.0,
+        "diversity_type": null,
+        "warmup_steps": 1000,
+        "decay_start": 1200,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "seed": 2,
+        "activation_dim": 512,
+        "dict_size": 2048,
+        "k": 64,
+        "device": "cuda:0",
+        "layer": 4,
+        "lm_name": "EleutherAI/pythia-70m-deduped",
+        "submodule_name": "resid_post_layer_4",
+        "random_mask_bit_ratio": 0.0,
+        "random_mask_bit_ratio_force_topk": false,
+        "random_mask_bit_original_scale": 1.0,
+        "random_mask_bit_pos": "input",
+        "random_mask_bit_reconstruction_target": "original",
+        "nonlinear_sae_loss_scale": 0.0,
+        "nonlinear_sae_input": "original",
+        "nonlinear_sae_target": "residual",
+        "nonlinear_use_encoder_mlp": true,
+        "nonlinear_use_decoder_mlp": false,
+        "nonlinear_topk_k": -1,
+        "nonlinear_block_gradients": false,
+        "meta_sae_loss_scale": 0.0
+    },
+    "buffer": {
+        "d_submodule": 512,
+        "io": "out",
+        "n_ctxs": 2048,
+        "ctx_len": 128,
+        "refresh_batch_size": 24,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_2/self_feature_space_diversity_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_2/self_feature_space_diversity_eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..9a09f7a582945552246bb69d44f089655f4bd06c
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_2/self_feature_space_diversity_eval_results.json
@@ -0,0 +1,26 @@
+{
+    "intrinsic_dim_0.8": 1098,
+    "intrinsic_dim_0.9": 1462,
+    "intrinsic_dim_0.95": 1696,
+    "effective_rank": 1344.98193359375,
+    "feature_sparsity": 0.96875,
+    "mean_correlation": 0.018938545137643814,
+    "max_correlation": 1.0000015497207642,
+    "correlation_std": 0.04458320140838623,
+    "decoder_coactive_mean_sim": 0.005024466197937727,
+    "decoder_coactive_max_sim": 0.3917606472969055,
+    "decoder_coactive_std_sim": 0.02672816626727581,
+    "encoder_coactive_mean_sim": 0.0071235643699765205,
+    "encoder_coactive_max_sim": 0.382651150226593,
+    "encoder_coactive_std_sim": 0.023562893271446228,
+    "decoder_per_sample_mean_sim": 0.005024466197937727,
+    "decoder_per_sample_max_sim": 0.2612590789794922,
+    "encoder_per_sample_mean_sim": 0.0071235643699765205,
+    "encoder_per_sample_max_sim": 0.2195105403661728,
+    "encoder_mean_correlation": 0.0016356734558939934,
+    "encoder_max_correlation": 1.0000003576278687,
+    "encoder_correlation_std": 0.053993258625268936,
+    "decoder_mean_correlation": 0.0018265489488840103,
+    "decoder_max_correlation": 1.0000009536743164,
+    "decoder_correlation_std": 0.052157212048769
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_2/standard_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_2/standard_eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..35ebc4d04ec72bdfc8079f1a4dc906fca526aa94
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_2/standard_eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 4.802043730020523, "l1_loss": 63.38813443183899, "l0": 64.0, "frac_variance_explained": 0.9043121233582496, "cossim": 0.9462784223258496, "l2_ratio": 0.9456560485064983, "relative_reconstruction_bias": 0.9999310210347175, "loss_original": 5.4671875, "loss_reconstructed": 5.8640625, "loss_zero": 10.740625, "frac_recovered": 0.923486328125, "frac_alive": 1.0}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_3/config.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_3/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..2e3df419402f599f7d6881eb3103ea407423fedc
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_3/config.json
@@ -0,0 +1,46 @@
+{
+    "trainer": {
+        "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/seed-3_trainer_3",
+        "trainer_class": "TopKTrainer",
+        "dict_class": "AutoEncoderTopK",
+        "lr": 0.0003,
+        "steps": 14648,
+        "auxk_alpha": 0.03125,
+        "diversity_scale": 0.0,
+        "diversity_type": null,
+        "warmup_steps": 1000,
+        "decay_start": 1200,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "seed": 3,
+        "activation_dim": 512,
+        "dict_size": 2048,
+        "k": 64,
+        "device": "cuda:0",
+        "layer": 4,
+        "lm_name": "EleutherAI/pythia-70m-deduped",
+        "submodule_name": "resid_post_layer_4",
+        "random_mask_bit_ratio": 0.0,
+        "random_mask_bit_ratio_force_topk": false,
+        "random_mask_bit_original_scale": 1.0,
+        "random_mask_bit_pos": "input",
+        "random_mask_bit_reconstruction_target": "original",
+        "nonlinear_sae_loss_scale": 0.0,
+        "nonlinear_sae_input": "original",
+        "nonlinear_sae_target": "residual",
+        "nonlinear_use_encoder_mlp": true,
+        "nonlinear_use_decoder_mlp": false,
+        "nonlinear_topk_k": -1,
+        "nonlinear_block_gradients": false,
+        "meta_sae_loss_scale": 0.0
+    },
+    "buffer": {
+        "d_submodule": 512,
+        "io": "out",
+        "n_ctxs": 2048,
+        "ctx_len": 128,
+        "refresh_batch_size": 24,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_3/self_feature_space_diversity_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_3/self_feature_space_diversity_eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..1ea0c51b8b307ac12dd1bd69ebaa83a2d07d51e7
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_3/self_feature_space_diversity_eval_results.json
@@ -0,0 +1,26 @@
+{
+    "intrinsic_dim_0.8": 1095,
+    "intrinsic_dim_0.9": 1460,
+    "intrinsic_dim_0.95": 1694,
+    "effective_rank": 1335.848388671875,
+    "feature_sparsity": 0.96875,
+    "mean_correlation": 0.018848147243261337,
+    "max_correlation": 1.0000026226043701,
+    "correlation_std": 0.0433562807738781,
+    "decoder_coactive_mean_sim": 0.004982855170965195,
+    "decoder_coactive_max_sim": 0.3878358006477356,
+    "decoder_coactive_std_sim": 0.02627839706838131,
+    "encoder_coactive_mean_sim": 0.006902318447828293,
+    "encoder_coactive_max_sim": 0.36567115783691406,
+    "encoder_coactive_std_sim": 0.02317717857658863,
+    "decoder_per_sample_mean_sim": 0.004982855170965195,
+    "decoder_per_sample_max_sim": 0.24182987213134766,
+    "encoder_per_sample_mean_sim": 0.006902318447828293,
+    "encoder_per_sample_max_sim": 0.17159400880336761,
+    "encoder_mean_correlation": 0.0018277183407917619,
+    "encoder_max_correlation": 1.000000238418579,
+    "encoder_correlation_std": 0.05462810397148132,
+    "decoder_mean_correlation": 0.0019029118120670319,
+    "decoder_max_correlation": 1.000001072883606,
+    "decoder_correlation_std": 0.05232343450188637
+}
\ No newline at end of file
diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_3/standard_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_3/standard_eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..2344f65d0cb5930c4bd6f1af237a6ac2c930ca3c
--- /dev/null
+++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_3/standard_eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 4.806892818212509, "l1_loss": 63.29269104003906, "l0": 64.0, "frac_variance_explained": 0.9039966031908989, "cossim": 0.946143351495266, "l2_ratio": 0.9456219218671322, "relative_reconstruction_bias": 1.0000569581985475, "loss_original": 5.4671875, "loss_reconstructed": 5.86484375, "loss_zero": 10.740625, "frac_recovered": 0.9232421875, "frac_alive": 1.0}
\ No newline at end of file