diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_0/config.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..adbaaa2586ded7dbbeab2cc9de9acdfd7d1206cd --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_0/config.json @@ -0,0 +1,46 @@ +{ + "trainer": { + "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/seed-0_trainer_0", + "trainer_class": "TopKTrainer", + "dict_class": "AutoEncoderTopK", + "lr": 0.0003, + "steps": 14648, + "auxk_alpha": 0.03125, + "diversity_scale": 0.0, + "diversity_type": null, + "warmup_steps": 1000, + "decay_start": 1200, + "threshold_beta": 0.999, + "threshold_start_step": 1000, + "seed": 0, + "activation_dim": 512, + "dict_size": 2048, + "k": 128, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "submodule_name": "resid_post_layer_4", + "random_mask_bit_ratio": 0.0, + "random_mask_bit_ratio_force_topk": false, + "random_mask_bit_original_scale": 1.0, + "random_mask_bit_pos": "input", + "random_mask_bit_reconstruction_target": "original", + "nonlinear_sae_loss_scale": 0.0, + "nonlinear_sae_input": "original", + "nonlinear_sae_target": "residual", + "nonlinear_use_encoder_mlp": true, + "nonlinear_use_decoder_mlp": false, + "nonlinear_topk_k": -1, + "nonlinear_block_gradients": false, + "meta_sae_loss_scale": 0.0 + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_0/self_feature_space_diversity_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_0/self_feature_space_diversity_eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..26e61966c8fcccacc9755da8bcce436e9f7a24ba --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_0/self_feature_space_diversity_eval_results.json @@ -0,0 +1,26 @@ +{ + "intrinsic_dim_0.8": 980, + "intrinsic_dim_0.9": 1364, + "intrinsic_dim_0.95": 1619, + "effective_rank": 1223.4310302734375, + "feature_sparsity": 0.9375, + "mean_correlation": 0.03588181361556053, + "max_correlation": 1.000002145767212, + "correlation_std": 0.06462709605693817, + "decoder_coactive_mean_sim": 0.0015496726846322417, + "decoder_coactive_max_sim": 0.4138309955596924, + "decoder_coactive_std_sim": 0.01725614070892334, + "encoder_coactive_mean_sim": 0.002769735874608159, + "encoder_coactive_max_sim": 0.3684731125831604, + "encoder_coactive_std_sim": 0.017463568598031998, + "decoder_per_sample_mean_sim": 0.0015496726846322417, + "decoder_per_sample_max_sim": 0.3042750656604767, + "encoder_per_sample_mean_sim": 0.002769735874608159, + "encoder_per_sample_max_sim": 0.2725919187068939, + "encoder_mean_correlation": 0.0021128507796674967, + "encoder_max_correlation": 1.000000238418579, + "encoder_correlation_std": 0.05275088548660278, + "decoder_mean_correlation": 0.0006922338507138193, + "decoder_max_correlation": 1.000001072883606, + "decoder_correlation_std": 0.05089571326971054 +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_0/standard_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_0/standard_eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..83d6e89b861167d3aa1753aebaf18ebc906000a3 --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_0/standard_eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 3.7783051311969755, "l1_loss": 94.76761198043823, "l0": 128.0, "frac_variance_explained": 0.942155122756958, "cossim": 0.9667428568005562, "l2_ratio": 0.9664503745734692, "relative_reconstruction_bias": 0.9999962501227856, "loss_original": 5.4671875, "loss_reconstructed": 5.718359375, "loss_zero": 10.740625, "frac_recovered": 0.9515625, "frac_alive": 1.0} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_1/config.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_1/config.json new file mode 100644 index 0000000000000000000000000000000000000000..83bd35639cb8c9dafc98b47665756398cb261273 --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_1/config.json @@ -0,0 +1,46 @@ +{ + "trainer": { + "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/seed-1_trainer_1", + "trainer_class": "TopKTrainer", + "dict_class": "AutoEncoderTopK", + "lr": 0.0003, + "steps": 14648, + "auxk_alpha": 0.03125, + "diversity_scale": 0.0, + "diversity_type": null, + "warmup_steps": 1000, + "decay_start": 1200, + "threshold_beta": 0.999, + "threshold_start_step": 1000, + "seed": 1, + "activation_dim": 512, + "dict_size": 2048, + "k": 128, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "submodule_name": "resid_post_layer_4", + "random_mask_bit_ratio": 0.0, + "random_mask_bit_ratio_force_topk": false, + "random_mask_bit_original_scale": 1.0, + "random_mask_bit_pos": "input", + "random_mask_bit_reconstruction_target": "original", + "nonlinear_sae_loss_scale": 0.0, + "nonlinear_sae_input": "original", + "nonlinear_sae_target": "residual", + "nonlinear_use_encoder_mlp": true, + "nonlinear_use_decoder_mlp": false, + "nonlinear_topk_k": -1, + "nonlinear_block_gradients": false, + "meta_sae_loss_scale": 0.0 + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_1/self_feature_space_diversity_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_1/self_feature_space_diversity_eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..97c93b2df9cee12202b2f9cb07a9bb198de6dde8 --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_1/self_feature_space_diversity_eval_results.json @@ -0,0 +1,26 @@ +{ + "intrinsic_dim_0.8": 979, + "intrinsic_dim_0.9": 1363, + "intrinsic_dim_0.95": 1619, + "effective_rank": 1231.33984375, + "feature_sparsity": 0.9375, + "mean_correlation": 0.03564154729247093, + "max_correlation": 1.0000020265579224, + "correlation_std": 0.06366480886936188, + "decoder_coactive_mean_sim": 0.0015298674115911126, + "decoder_coactive_max_sim": 0.3695344626903534, + "decoder_coactive_std_sim": 0.0173921138048172, + "encoder_coactive_mean_sim": 0.0028206498827785254, + "encoder_coactive_max_sim": 0.42045122385025024, + "encoder_coactive_std_sim": 0.017060182988643646, + "decoder_per_sample_mean_sim": 0.0015298674115911126, + "decoder_per_sample_max_sim": 0.2891332805156708, + "encoder_per_sample_mean_sim": 0.0028206498827785254, + "encoder_per_sample_max_sim": 0.24817219376564026, + "encoder_mean_correlation": 0.002107219770550728, + "encoder_max_correlation": 1.000000238418579, + "encoder_correlation_std": 0.052732452750205994, + "decoder_mean_correlation": 0.000679917458910495, + "decoder_max_correlation": 1.000001072883606, + "decoder_correlation_std": 0.05092431232333183 +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_1/standard_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_1/standard_eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..a9fadda20d8c33efefca0d79216d380be71686e4 --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_1/standard_eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 3.784668281674385, "l1_loss": 94.24630346298218, "l0": 128.0, "frac_variance_explained": 0.9389505088329315, "cossim": 0.9668197274208069, "l2_ratio": 0.9664119355380535, "relative_reconstruction_bias": 0.99992056787014, "loss_original": 5.4671875, "loss_reconstructed": 5.71640625, "loss_zero": 10.740625, "frac_recovered": 0.95185546875, "frac_alive": 1.0} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_2/config.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..abe82a211074c7f9b6beda8d88c96375a2cda636 --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_2/config.json @@ -0,0 +1,46 @@ +{ + "trainer": { + "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/seed-2_trainer_2", + "trainer_class": "TopKTrainer", + "dict_class": "AutoEncoderTopK", + "lr": 0.0003, + "steps": 14648, + "auxk_alpha": 0.03125, + "diversity_scale": 0.0, + "diversity_type": null, + "warmup_steps": 1000, + "decay_start": 1200, + "threshold_beta": 0.999, + "threshold_start_step": 1000, + "seed": 2, + "activation_dim": 512, + "dict_size": 2048, + "k": 128, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "submodule_name": "resid_post_layer_4", + "random_mask_bit_ratio": 0.0, + "random_mask_bit_ratio_force_topk": false, + "random_mask_bit_original_scale": 1.0, + "random_mask_bit_pos": "input", + "random_mask_bit_reconstruction_target": "original", + "nonlinear_sae_loss_scale": 0.0, + "nonlinear_sae_input": "original", + "nonlinear_sae_target": "residual", + "nonlinear_use_encoder_mlp": true, + "nonlinear_use_decoder_mlp": false, + "nonlinear_topk_k": -1, + "nonlinear_block_gradients": false, + "meta_sae_loss_scale": 0.0 + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_2/self_feature_space_diversity_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_2/self_feature_space_diversity_eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..d649fa23961e1b972d54219cc5f9ce3a7d3b0247 --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_2/self_feature_space_diversity_eval_results.json @@ -0,0 +1,26 @@ +{ + "intrinsic_dim_0.8": 968, + "intrinsic_dim_0.9": 1353, + "intrinsic_dim_0.95": 1611, + "effective_rank": 1223.5145263671875, + "feature_sparsity": 0.9375, + "mean_correlation": 0.035781797021627426, + "max_correlation": 1.000002145767212, + "correlation_std": 0.06626083701848984, + "decoder_coactive_mean_sim": 0.0015025029424577951, + "decoder_coactive_max_sim": 0.44835346937179565, + "decoder_coactive_std_sim": 0.017151132225990295, + "encoder_coactive_mean_sim": 0.002768551465123892, + "encoder_coactive_max_sim": 0.4386450946331024, + "encoder_coactive_std_sim": 0.017231818288564682, + "decoder_per_sample_mean_sim": 0.0015025028260424733, + "decoder_per_sample_max_sim": 0.2751252353191376, + "encoder_per_sample_mean_sim": 0.002768551232293248, + "encoder_per_sample_max_sim": 0.2641173303127289, + "encoder_mean_correlation": 0.0015855191741138697, + "encoder_max_correlation": 1.0000003576278687, + "encoder_correlation_std": 0.05263438820838928, + "decoder_mean_correlation": 0.0006925205234438181, + "decoder_max_correlation": 1.0000011920928955, + "decoder_correlation_std": 0.05096922814846039 +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_2/standard_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_2/standard_eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..4b7d315f23dad5d0aab578a5375b34901de39e52 --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_2/standard_eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 3.7807009369134903, "l1_loss": 95.33445606231689, "l0": 128.0, "frac_variance_explained": 0.9413482956588268, "cossim": 0.9669262684881688, "l2_ratio": 0.9666900806128979, "relative_reconstruction_bias": 0.9999219357967377, "loss_original": 5.4671875, "loss_reconstructed": 5.70546875, "loss_zero": 10.740625, "frac_recovered": 0.953857421875, "frac_alive": 1.0} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_3/config.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_3/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c054c645b2719e74b4b527a24ebd295b55760fee --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_3/config.json @@ -0,0 +1,46 @@ +{ + "trainer": { + "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/seed-3_trainer_3", + "trainer_class": "TopKTrainer", + "dict_class": "AutoEncoderTopK", + "lr": 0.0003, + "steps": 14648, + "auxk_alpha": 0.03125, + "diversity_scale": 0.0, + "diversity_type": null, + "warmup_steps": 1000, + "decay_start": 1200, + "threshold_beta": 0.999, + "threshold_start_step": 1000, + "seed": 3, + "activation_dim": 512, + "dict_size": 2048, + "k": 128, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "submodule_name": "resid_post_layer_4", + "random_mask_bit_ratio": 0.0, + "random_mask_bit_ratio_force_topk": false, + "random_mask_bit_original_scale": 1.0, + "random_mask_bit_pos": "input", + "random_mask_bit_reconstruction_target": "original", + "nonlinear_sae_loss_scale": 0.0, + "nonlinear_sae_input": "original", + "nonlinear_sae_target": "residual", + "nonlinear_use_encoder_mlp": true, + "nonlinear_use_decoder_mlp": false, + "nonlinear_topk_k": -1, + "nonlinear_block_gradients": false, + "meta_sae_loss_scale": 0.0 + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_3/self_feature_space_diversity_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_3/self_feature_space_diversity_eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..824e89d9b1fbb1be0b539dcbe313b40cebbafa11 --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_3/self_feature_space_diversity_eval_results.json @@ -0,0 +1,26 @@ +{ + "intrinsic_dim_0.8": 972, + "intrinsic_dim_0.9": 1358, + "intrinsic_dim_0.95": 1616, + "effective_rank": 1217.7279052734375, + "feature_sparsity": 0.9375, + "mean_correlation": 0.036073531955480576, + "max_correlation": 1.0000025033950806, + "correlation_std": 0.06570165604352951, + "decoder_coactive_mean_sim": 0.0015081887831911445, + "decoder_coactive_max_sim": 0.30734848976135254, + "decoder_coactive_std_sim": 0.016810396686196327, + "encoder_coactive_mean_sim": 0.002697763964533806, + "encoder_coactive_max_sim": 0.36042797565460205, + "encoder_coactive_std_sim": 0.016821540892124176, + "decoder_per_sample_mean_sim": 0.0015081887831911445, + "decoder_per_sample_max_sim": 0.24087123572826385, + "encoder_per_sample_mean_sim": 0.0026977641973644495, + "encoder_per_sample_max_sim": 0.22100086510181427, + "encoder_mean_correlation": 0.001659764559008181, + "encoder_max_correlation": 1.0000003576278687, + "encoder_correlation_std": 0.05284586176276207, + "decoder_mean_correlation": 0.0006780978292226791, + "decoder_max_correlation": 1.000001072883606, + "decoder_correlation_std": 0.05088222399353981 +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_3/standard_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_3/standard_eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..d6aaec23b3258b179c1d82fde8a7bc8c3cf9958a --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_3/standard_eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 3.792155793309212, "l1_loss": 95.12483205795289, "l0": 128.0, "frac_variance_explained": 0.9385048314929009, "cossim": 0.9666961587965488, "l2_ratio": 0.9664709158241749, "relative_reconstruction_bias": 1.0001889944076539, "loss_original": 5.4671875, "loss_reconstructed": 5.7171875, "loss_zero": 10.740625, "frac_recovered": 0.95166015625, "frac_alive": 0.99951171875} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_4/config.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_4/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c74bab2191c20dd9eea5b597f9c86b8809c8133d --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_4/config.json @@ -0,0 +1,46 @@ +{ + "trainer": { + "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/seed-4_trainer_4", + "trainer_class": "TopKTrainer", + "dict_class": "AutoEncoderTopK", + "lr": 0.0003, + "steps": 14648, + "auxk_alpha": 0.03125, + "diversity_scale": 0.0, + "diversity_type": null, + "warmup_steps": 1000, + "decay_start": 1200, + "threshold_beta": 0.999, + "threshold_start_step": 1000, + "seed": 4, + "activation_dim": 512, + "dict_size": 2048, + "k": 128, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "submodule_name": "resid_post_layer_4", + "random_mask_bit_ratio": 0.0, + "random_mask_bit_ratio_force_topk": false, + "random_mask_bit_original_scale": 1.0, + "random_mask_bit_pos": "input", + "random_mask_bit_reconstruction_target": "original", + "nonlinear_sae_loss_scale": 0.0, + "nonlinear_sae_input": "original", + "nonlinear_sae_target": "residual", + "nonlinear_use_encoder_mlp": true, + "nonlinear_use_decoder_mlp": false, + "nonlinear_topk_k": -1, + "nonlinear_block_gradients": false, + "meta_sae_loss_scale": 0.0 + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_4/self_feature_space_diversity_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_4/self_feature_space_diversity_eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..e76649160386625680768439152ff7aa3b5fce8c --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_4/self_feature_space_diversity_eval_results.json @@ -0,0 +1,26 @@ +{ + "intrinsic_dim_0.8": 971, + "intrinsic_dim_0.9": 1355, + "intrinsic_dim_0.95": 1612, + "effective_rank": 1222.1011962890625, + "feature_sparsity": 0.9375, + "mean_correlation": 0.03547367453575134, + "max_correlation": 1.0000027418136597, + "correlation_std": 0.06494747847318649, + "decoder_coactive_mean_sim": 0.001463062479160726, + "decoder_coactive_max_sim": 0.3568807542324066, + "decoder_coactive_std_sim": 0.016684627160429955, + "encoder_coactive_mean_sim": 0.002751479623839259, + "encoder_coactive_max_sim": 0.47617244720458984, + "encoder_coactive_std_sim": 0.016827696934342384, + "decoder_per_sample_mean_sim": 0.0014630623627454042, + "decoder_per_sample_max_sim": 0.2273091822862625, + "encoder_per_sample_mean_sim": 0.002751479623839259, + "encoder_per_sample_max_sim": 0.2101120501756668, + "encoder_mean_correlation": 0.001956398133188486, + "encoder_max_correlation": 1.000000238418579, + "encoder_correlation_std": 0.053541772067546844, + "decoder_mean_correlation": 0.0007174276979640126, + "decoder_max_correlation": 1.0000011920928955, + "decoder_correlation_std": 0.050983842462301254 +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_4/standard_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_4/standard_eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..89135c9bea6b92a0d27e62126eaab581a34d71ca --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_4/standard_eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 3.795499560236931, "l1_loss": 95.14364604949951, "l0": 128.0, "frac_variance_explained": 0.9360299751162529, "cossim": 0.9666673980653286, "l2_ratio": 0.9663952246308327, "relative_reconstruction_bias": 0.9999688774347305, "loss_original": 5.4671875, "loss_reconstructed": 5.721484375, "loss_zero": 10.740625, "frac_recovered": 0.9509765625, "frac_alive": 1.0} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_0/config.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a27e2f240b73869ff79b125b4ef3b9b554837199 --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_0/config.json @@ -0,0 +1,46 @@ +{ + "trainer": { + "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/seed-0_trainer_0", + "trainer_class": "TopKTrainer", + "dict_class": "AutoEncoderTopK", + "lr": 0.0003, + "steps": 14648, + "auxk_alpha": 0.03125, + "diversity_scale": 0.0, + "diversity_type": null, + "warmup_steps": 1000, + "decay_start": 1200, + "threshold_beta": 0.999, + "threshold_start_step": 1000, + "seed": 0, + "activation_dim": 512, + "dict_size": 2048, + "k": 256, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "submodule_name": "resid_post_layer_4", + "random_mask_bit_ratio": 0.0, + "random_mask_bit_ratio_force_topk": false, + "random_mask_bit_original_scale": 1.0, + "random_mask_bit_pos": "input", + "random_mask_bit_reconstruction_target": "original", + "nonlinear_sae_loss_scale": 0.0, + "nonlinear_sae_input": "original", + "nonlinear_sae_target": "residual", + "nonlinear_use_encoder_mlp": true, + "nonlinear_use_decoder_mlp": false, + "nonlinear_topk_k": -1, + "nonlinear_block_gradients": false, + "meta_sae_loss_scale": 0.0 + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_0/self_feature_space_diversity_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_0/self_feature_space_diversity_eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..d3c3ed9a592ea33b4af1f88a473cf024aeb9b1a6 --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_0/self_feature_space_diversity_eval_results.json @@ -0,0 +1,26 @@ +{ + "intrinsic_dim_0.8": 480, + "intrinsic_dim_0.9": 696, + "intrinsic_dim_0.95": 998, + "effective_rank": 654.4144287109375, + "feature_sparsity": 0.875, + "mean_correlation": 0.04879017919301987, + "max_correlation": 1.0000032186508179, + "correlation_std": 0.1387787014245987, + "decoder_coactive_mean_sim": -0.000603697553742677, + "decoder_coactive_max_sim": 0.4850667119026184, + "decoder_coactive_std_sim": 0.011331773363053799, + "encoder_coactive_mean_sim": 0.004783302079886198, + "encoder_coactive_max_sim": 0.6346930861473083, + "encoder_coactive_std_sim": 0.020051371306180954, + "decoder_per_sample_mean_sim": -0.000603697553742677, + "decoder_per_sample_max_sim": 0.19871939718723297, + "encoder_per_sample_mean_sim": 0.004783302079886198, + "encoder_per_sample_max_sim": 0.6304284930229187, + "encoder_mean_correlation": 0.00369116198271513, + "encoder_max_correlation": 1.000000238418579, + "encoder_correlation_std": 0.06792980432510376, + "decoder_mean_correlation": 0.0015754885971546173, + "decoder_max_correlation": 1.0000011920928955, + "decoder_correlation_std": 0.0512159988284111 +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_0/standard_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_0/standard_eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..4a7e31eb750ca75a9d9a5f1cb345f2a674d04ac3 --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_0/standard_eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 2.7219308167696, "l1_loss": 219.18922500610353, "l0": 256.0, "frac_variance_explained": 0.9695879392325878, "cossim": 0.9831567205488682, "l2_ratio": 0.983003368973732, "relative_reconstruction_bias": 0.9997936256229878, "loss_original": 5.4671875, "loss_reconstructed": 5.5921875, "loss_zero": 10.740625, "frac_recovered": 0.9759765625, "frac_alive": 0.97412109375} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_1/config.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_1/config.json new file mode 100644 index 0000000000000000000000000000000000000000..556c07c2c9e36fbdade81b3f4c534d3bfdf61c00 --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_1/config.json @@ -0,0 +1,46 @@ +{ + "trainer": { + "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/seed-1_trainer_1", + "trainer_class": "TopKTrainer", + "dict_class": "AutoEncoderTopK", + "lr": 0.0003, + "steps": 14648, + "auxk_alpha": 0.03125, + "diversity_scale": 0.0, + "diversity_type": null, + "warmup_steps": 1000, + "decay_start": 1200, + "threshold_beta": 0.999, + "threshold_start_step": 1000, + "seed": 1, + "activation_dim": 512, + "dict_size": 2048, + "k": 256, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "submodule_name": "resid_post_layer_4", + "random_mask_bit_ratio": 0.0, + "random_mask_bit_ratio_force_topk": false, + "random_mask_bit_original_scale": 1.0, + "random_mask_bit_pos": "input", + "random_mask_bit_reconstruction_target": "original", + "nonlinear_sae_loss_scale": 0.0, + "nonlinear_sae_input": "original", + "nonlinear_sae_target": "residual", + "nonlinear_use_encoder_mlp": true, + "nonlinear_use_decoder_mlp": false, + "nonlinear_topk_k": -1, + "nonlinear_block_gradients": false, + "meta_sae_loss_scale": 0.0 + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_1/self_feature_space_diversity_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_1/self_feature_space_diversity_eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..609a9212d19b86099a72044a3bd3b384494a6e78 --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_1/self_feature_space_diversity_eval_results.json @@ -0,0 +1,26 @@ +{ + "intrinsic_dim_0.8": 486, + "intrinsic_dim_0.9": 680, + "intrinsic_dim_0.95": 972, + "effective_rank": 676.2651977539062, + "feature_sparsity": 0.875, + "mean_correlation": 0.0487031452357769, + "max_correlation": 1.0000033378601074, + "correlation_std": 0.1352577954530716, + "decoder_coactive_mean_sim": -0.000500466616358608, + "decoder_coactive_max_sim": 0.42623040080070496, + "decoder_coactive_std_sim": 0.011117835529148579, + "encoder_coactive_mean_sim": 0.0036506024189293385, + "encoder_coactive_max_sim": 0.46891945600509644, + "encoder_coactive_std_sim": 0.016733255237340927, + "decoder_per_sample_mean_sim": -0.000500466616358608, + "decoder_per_sample_max_sim": 0.2542282044887543, + "encoder_per_sample_mean_sim": 0.0036506024189293385, + "encoder_per_sample_max_sim": 0.44621554017066956, + "encoder_mean_correlation": 0.003571811132133007, + "encoder_max_correlation": 1.000000238418579, + "encoder_correlation_std": 0.06752442568540573, + "decoder_mean_correlation": 0.001791062531992793, + "decoder_max_correlation": 1.0000009536743164, + "decoder_correlation_std": 0.051166798919439316 +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_1/standard_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_1/standard_eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..ff161048aadd6b469e2a5af2b92616bde22d995e --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_1/standard_eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 2.659628000855446, "l1_loss": 204.22452449798584, "l0": 256.0, "frac_variance_explained": 0.9706387490034103, "cossim": 0.9838522009551525, "l2_ratio": 0.9836978435516357, "relative_reconstruction_bias": 0.99996752217412, "loss_original": 5.4671875, "loss_reconstructed": 5.5828125, "loss_zero": 10.740625, "frac_recovered": 0.97783203125, "frac_alive": 0.96240234375} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_2/config.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..42fc387c1015c62a0ca26aa4115b32310cb582bb --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_2/config.json @@ -0,0 +1,46 @@ +{ + "trainer": { + "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/seed-2_trainer_2", + "trainer_class": "TopKTrainer", + "dict_class": "AutoEncoderTopK", + "lr": 0.0003, + "steps": 14648, + "auxk_alpha": 0.03125, + "diversity_scale": 0.0, + "diversity_type": null, + "warmup_steps": 1000, + "decay_start": 1200, + "threshold_beta": 0.999, + "threshold_start_step": 1000, + "seed": 2, + "activation_dim": 512, + "dict_size": 2048, + "k": 256, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "submodule_name": "resid_post_layer_4", + "random_mask_bit_ratio": 0.0, + "random_mask_bit_ratio_force_topk": false, + "random_mask_bit_original_scale": 1.0, + "random_mask_bit_pos": "input", + "random_mask_bit_reconstruction_target": "original", + "nonlinear_sae_loss_scale": 0.0, + "nonlinear_sae_input": "original", + "nonlinear_sae_target": "residual", + "nonlinear_use_encoder_mlp": true, + "nonlinear_use_decoder_mlp": false, + "nonlinear_topk_k": -1, + "nonlinear_block_gradients": false, + "meta_sae_loss_scale": 0.0 + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_2/self_feature_space_diversity_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_2/self_feature_space_diversity_eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..ceb38d9a90dda51f3ea68f7437a5724be5ed33ab --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_2/self_feature_space_diversity_eval_results.json @@ -0,0 +1,26 @@ +{ + "intrinsic_dim_0.8": 481, + "intrinsic_dim_0.9": 688, + "intrinsic_dim_0.95": 983, + "effective_rank": 670.652587890625, + "feature_sparsity": 0.8750005960464478, + "mean_correlation": 0.04843044653534889, + "max_correlation": 1.0000027418136597, + "correlation_std": 0.13766011595726013, + "decoder_coactive_mean_sim": -0.0005655006971210241, + "decoder_coactive_max_sim": 0.45535174012184143, + "decoder_coactive_std_sim": 0.011256729252636433, + "encoder_coactive_mean_sim": 0.004165450111031532, + "encoder_coactive_max_sim": 0.5755204558372498, + "encoder_coactive_std_sim": 0.018040597438812256, + "decoder_per_sample_mean_sim": -0.0005655006389133632, + "decoder_per_sample_max_sim": 0.24903523921966553, + "encoder_per_sample_mean_sim": 0.004165449645370245, + "encoder_per_sample_max_sim": 0.5712661147117615, + "encoder_mean_correlation": 0.0021907533518970013, + "encoder_max_correlation": 1.000000238418579, + "encoder_correlation_std": 0.06807170808315277, + "decoder_mean_correlation": 0.001756084617227316, + "decoder_max_correlation": 1.0000009536743164, + "decoder_correlation_std": 0.051152873784303665 +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_2/standard_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_2/standard_eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..1124ca4d3db0d1b7fef494dfa7bda6a3715a7e92 --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_2/standard_eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 2.702136534452438, "l1_loss": 209.18400897979737, "l0": 255.9986328125, "frac_variance_explained": 0.9689409710466862, "cossim": 0.9832561373710632, "l2_ratio": 0.9830743968486786, "relative_reconstruction_bias": 0.9999133288860321, "loss_original": 5.4671875, "loss_reconstructed": 5.591015625, "loss_zero": 10.740625, "frac_recovered": 0.97607421875, "frac_alive": 0.97119140625} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_3/config.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_3/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0b3410849f9025021b8d6522578ae167bfa5eb21 --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_3/config.json @@ -0,0 +1,46 @@ +{ + "trainer": { + "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/seed-3_trainer_3", + "trainer_class": "TopKTrainer", + "dict_class": "AutoEncoderTopK", + "lr": 0.0003, + "steps": 14648, + "auxk_alpha": 0.03125, + "diversity_scale": 0.0, + "diversity_type": null, + "warmup_steps": 1000, + "decay_start": 1200, + "threshold_beta": 0.999, + "threshold_start_step": 1000, + "seed": 3, + "activation_dim": 512, + "dict_size": 2048, + "k": 256, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "submodule_name": "resid_post_layer_4", + "random_mask_bit_ratio": 0.0, + "random_mask_bit_ratio_force_topk": false, + "random_mask_bit_original_scale": 1.0, + "random_mask_bit_pos": "input", + "random_mask_bit_reconstruction_target": "original", + "nonlinear_sae_loss_scale": 0.0, + "nonlinear_sae_input": "original", + "nonlinear_sae_target": "residual", + "nonlinear_use_encoder_mlp": true, + "nonlinear_use_decoder_mlp": false, + "nonlinear_topk_k": -1, + "nonlinear_block_gradients": false, + "meta_sae_loss_scale": 0.0 + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_3/self_feature_space_diversity_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_3/self_feature_space_diversity_eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..57067f473edb6434cedb2e8efde1ddc4befd99fa --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_3/self_feature_space_diversity_eval_results.json @@ -0,0 +1,26 @@ +{ + "intrinsic_dim_0.8": 472, + "intrinsic_dim_0.9": 677, + "intrinsic_dim_0.95": 968, + "effective_rank": 652.3516235351562, + "feature_sparsity": 0.875, + "mean_correlation": 0.04835033416748047, + "max_correlation": 1.0000038146972656, + "correlation_std": 0.13854721188545227, + "decoder_coactive_mean_sim": -0.0006331527838483453, + "decoder_coactive_max_sim": 0.47532302141189575, + "decoder_coactive_std_sim": 0.011337202973663807, + "encoder_coactive_mean_sim": 0.005108896177262068, + "encoder_coactive_max_sim": 0.5862697958946228, + "encoder_coactive_std_sim": 0.019558507949113846, + "decoder_per_sample_mean_sim": -0.0006331527838483453, + "decoder_per_sample_max_sim": 0.2255595475435257, + "encoder_per_sample_mean_sim": 0.005108896177262068, + "encoder_per_sample_max_sim": 0.5814019441604614, + "encoder_mean_correlation": 0.002764908829703927, + "encoder_max_correlation": 1.000000238418579, + "encoder_correlation_std": 0.07186086475849152, + "decoder_mean_correlation": 0.001473523210734129, + "decoder_max_correlation": 1.000001072883606, + "decoder_correlation_std": 0.05101215839385986 +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_3/standard_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_3/standard_eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..dae03e88f2dc7288ee755466f51ab5f4178e1ceb --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_3/standard_eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 2.7153001874685287, "l1_loss": 214.051033782959, "l0": 256.0, "frac_variance_explained": 0.9711439780890941, "cossim": 0.9831305019557476, "l2_ratio": 0.9829454332590103, "relative_reconstruction_bias": 0.9999813109636306, "loss_original": 5.4671875, "loss_reconstructed": 5.588671875, "loss_zero": 10.740625, "frac_recovered": 0.976611328125, "frac_alive": 0.96630859375} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_4/config.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_4/config.json new file mode 100644 index 0000000000000000000000000000000000000000..226878f8a4a0236fdcaa59684f231e24c6aa2831 --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_4/config.json @@ -0,0 +1,46 @@ +{ + "trainer": { + "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/seed-4_trainer_4", + "trainer_class": "TopKTrainer", + "dict_class": "AutoEncoderTopK", + "lr": 0.0003, + "steps": 14648, + "auxk_alpha": 0.03125, + "diversity_scale": 0.0, + "diversity_type": null, + "warmup_steps": 1000, + "decay_start": 1200, + "threshold_beta": 0.999, + "threshold_start_step": 1000, + "seed": 4, + "activation_dim": 512, + "dict_size": 2048, + "k": 256, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "submodule_name": "resid_post_layer_4", + "random_mask_bit_ratio": 0.0, + "random_mask_bit_ratio_force_topk": false, + "random_mask_bit_original_scale": 1.0, + "random_mask_bit_pos": "input", + "random_mask_bit_reconstruction_target": "original", + "nonlinear_sae_loss_scale": 0.0, + "nonlinear_sae_input": "original", + "nonlinear_sae_target": "residual", + "nonlinear_use_encoder_mlp": true, + "nonlinear_use_decoder_mlp": false, + "nonlinear_topk_k": -1, + "nonlinear_block_gradients": false, + "meta_sae_loss_scale": 0.0 + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_4/self_feature_space_diversity_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_4/self_feature_space_diversity_eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..09fc7c9ec8e9fce7019bbda5a99e4ba7c587a67b --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_4/self_feature_space_diversity_eval_results.json @@ -0,0 +1,26 @@ +{ + "intrinsic_dim_0.8": 481, + "intrinsic_dim_0.9": 691, + "intrinsic_dim_0.95": 987, + "effective_rank": 678.6663818359375, + "feature_sparsity": 0.8750003576278687, + "mean_correlation": 0.04904370754957199, + "max_correlation": 1.0000029802322388, + "correlation_std": 0.13809221982955933, + "decoder_coactive_mean_sim": -0.0006003740127198398, + "decoder_coactive_max_sim": 0.4896470904350281, + "decoder_coactive_std_sim": 0.01121507491916418, + "encoder_coactive_mean_sim": 0.004258359782397747, + "encoder_coactive_max_sim": 0.4985700845718384, + "encoder_coactive_std_sim": 0.01753879338502884, + "decoder_per_sample_mean_sim": -0.0006003740709275007, + "decoder_per_sample_max_sim": 0.2869977653026581, + "encoder_per_sample_mean_sim": 0.004258360248059034, + "encoder_per_sample_max_sim": 0.4956880509853363, + "encoder_mean_correlation": 0.0028513818979263306, + "encoder_max_correlation": 1.0000003576278687, + "encoder_correlation_std": 0.06897676736116409, + "decoder_mean_correlation": 0.0016533236484974623, + "decoder_max_correlation": 1.000001072883606, + "decoder_correlation_std": 0.05112037807703018 +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_4/standard_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_4/standard_eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..36b4e258164235c3a6091cda2f162496aa9837d8 --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_4/standard_eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 2.701796779036522, "l1_loss": 206.50695514678955, "l0": 255.9994140625, "frac_variance_explained": 0.9682310611009598, "cossim": 0.9833530187606812, "l2_ratio": 0.9831482082605362, "relative_reconstruction_bias": 0.9999046422541141, "loss_original": 5.4671875, "loss_reconstructed": 5.59140625, "loss_zero": 10.740625, "frac_recovered": 0.976123046875, "frac_alive": 0.97509765625} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_0/config.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..63fcba13dc437ce492a687ad36c7231d649d96f6 --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_0/config.json @@ -0,0 +1,46 @@ +{ + "trainer": { + "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/seed-0_trainer_0", + "trainer_class": "TopKTrainer", + "dict_class": "AutoEncoderTopK", + "lr": 0.0003, + "steps": 14648, + "auxk_alpha": 0.03125, + "diversity_scale": 0.0, + "diversity_type": null, + "warmup_steps": 1000, + "decay_start": 1200, + "threshold_beta": 0.999, + "threshold_start_step": 1000, + "seed": 0, + "activation_dim": 512, + "dict_size": 2048, + "k": 32, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "submodule_name": "resid_post_layer_4", + "random_mask_bit_ratio": 0.0, + "random_mask_bit_ratio_force_topk": false, + "random_mask_bit_original_scale": 1.0, + "random_mask_bit_pos": "input", + "random_mask_bit_reconstruction_target": "original", + "nonlinear_sae_loss_scale": 0.0, + "nonlinear_sae_input": "original", + "nonlinear_sae_target": "residual", + "nonlinear_use_encoder_mlp": true, + "nonlinear_use_decoder_mlp": false, + "nonlinear_topk_k": -1, + "nonlinear_block_gradients": false, + "meta_sae_loss_scale": 0.0 + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_0/self_feature_space_diversity_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_0/self_feature_space_diversity_eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..8a70c68f83d0ab01d7a5cffaaeb228325ea6c2cc --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_0/self_feature_space_diversity_eval_results.json @@ -0,0 +1,26 @@ +{ + "intrinsic_dim_0.8": 1145, + "intrinsic_dim_0.9": 1494, + "intrinsic_dim_0.95": 1716, + "effective_rank": 1393.9248046875, + "feature_sparsity": 0.984375, + "mean_correlation": 0.009593969210982323, + "max_correlation": 1.0000017881393433, + "correlation_std": 0.034370556473731995, + "decoder_coactive_mean_sim": 0.012857094407081604, + "decoder_coactive_max_sim": 0.5831928253173828, + "decoder_coactive_std_sim": 0.04128008335828781, + "encoder_coactive_mean_sim": 0.013956918381154537, + "encoder_coactive_max_sim": 0.45717740058898926, + "encoder_coactive_std_sim": 0.03173547610640526, + "decoder_per_sample_mean_sim": 0.012857094407081604, + "decoder_per_sample_max_sim": 0.2939373552799225, + "encoder_per_sample_mean_sim": 0.013956919312477112, + "encoder_per_sample_max_sim": 0.19147595763206482, + "encoder_mean_correlation": 0.0025674644857645035, + "encoder_max_correlation": 1.0000003576278687, + "encoder_correlation_std": 0.059210386127233505, + "decoder_mean_correlation": 0.004608687479048967, + "decoder_max_correlation": 1.0000009536743164, + "decoder_correlation_std": 0.057089705020189285 +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_0/standard_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_0/standard_eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..e13ad8fbdd77b093e414a3d21de325c6b1bd2e63 --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_0/standard_eval_results.json @@ -0,0 +1,14 @@ +{ + "l2_loss": 5.521578305959702, + "l1_loss": 44.27927327156067, + "l0": 32.0, + "frac_variance_explained": 0.8718812368810177, + "cossim": 0.9284569166600705, + "l2_ratio": 0.9278947010636329, + "relative_reconstruction_bias": 1.0000686429440975, + "loss_original": 5.4671875, + "loss_reconstructed": 6.01953125, + "loss_zero": 10.740625, + "frac_recovered": 0.89345703125, + "frac_alive": 1.0 +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_1/config.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_1/config.json new file mode 100644 index 0000000000000000000000000000000000000000..eb63b7f1d6f4849a7cc66285320a36e652b9e3a9 --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_1/config.json @@ -0,0 +1,46 @@ +{ + "trainer": { + "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/seed-1_trainer_1", + "trainer_class": "TopKTrainer", + "dict_class": "AutoEncoderTopK", + "lr": 0.0003, + "steps": 14648, + "auxk_alpha": 0.03125, + "diversity_scale": 0.0, + "diversity_type": null, + "warmup_steps": 1000, + "decay_start": 1200, + "threshold_beta": 0.999, + "threshold_start_step": 1000, + "seed": 1, + "activation_dim": 512, + "dict_size": 2048, + "k": 32, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "submodule_name": "resid_post_layer_4", + "random_mask_bit_ratio": 0.0, + "random_mask_bit_ratio_force_topk": false, + "random_mask_bit_original_scale": 1.0, + "random_mask_bit_pos": "input", + "random_mask_bit_reconstruction_target": "original", + "nonlinear_sae_loss_scale": 0.0, + "nonlinear_sae_input": "original", + "nonlinear_sae_target": "residual", + "nonlinear_use_encoder_mlp": true, + "nonlinear_use_decoder_mlp": false, + "nonlinear_topk_k": -1, + "nonlinear_block_gradients": false, + "meta_sae_loss_scale": 0.0 + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_1/self_feature_space_diversity_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_1/self_feature_space_diversity_eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..15861e9c6d682d8c2b29c7d03157fb60867fe3bf --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_1/self_feature_space_diversity_eval_results.json @@ -0,0 +1,26 @@ +{ + "intrinsic_dim_0.8": 1145, + "intrinsic_dim_0.9": 1493, + "intrinsic_dim_0.95": 1714, + "effective_rank": 1391.723876953125, + "feature_sparsity": 0.984375, + "mean_correlation": 0.009598391130566597, + "max_correlation": 1.0000014305114746, + "correlation_std": 0.03441373631358147, + "decoder_coactive_mean_sim": 0.013000169768929482, + "decoder_coactive_max_sim": 0.5189113616943359, + "decoder_coactive_std_sim": 0.04145849123597145, + "encoder_coactive_mean_sim": 0.013387808576226234, + "encoder_coactive_max_sim": 0.46838805079460144, + "encoder_coactive_std_sim": 0.03152437135577202, + "decoder_per_sample_mean_sim": 0.013000166974961758, + "decoder_per_sample_max_sim": 0.29648900032043457, + "encoder_per_sample_mean_sim": 0.013387808576226234, + "encoder_per_sample_max_sim": 0.1819629967212677, + "encoder_mean_correlation": 0.0035003535449504852, + "encoder_max_correlation": 1.000000238418579, + "encoder_correlation_std": 0.0581950768828392, + "decoder_mean_correlation": 0.004728738218545914, + "decoder_max_correlation": 1.0000011920928955, + "decoder_correlation_std": 0.05676833167672157 +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_1/standard_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_1/standard_eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..bc3fb16ff15804d07ba83905db8f02252f847dd1 --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_1/standard_eval_results.json @@ -0,0 +1,14 @@ +{ + "l2_loss": 5.493244290351868, + "l1_loss": 43.91451568603516, + "l0": 32.0, + "frac_variance_explained": 0.8692285768687725, + "cossim": 0.9287988729774952, + "l2_ratio": 0.9284325882792472, + "relative_reconstruction_bias": 1.0001945979893208, + "loss_original": 5.4671875, + "loss_reconstructed": 6.02265625, + "loss_zero": 10.740625, + "frac_recovered": 0.892724609375, + "frac_alive": 1.0 +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_2/config.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b837f7d1bb57b3158b4ab465602c137124e26046 --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_2/config.json @@ -0,0 +1,46 @@ +{ + "trainer": { + "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/seed-2_trainer_2", + "trainer_class": "TopKTrainer", + "dict_class": "AutoEncoderTopK", + "lr": 0.0003, + "steps": 14648, + "auxk_alpha": 0.03125, + "diversity_scale": 0.0, + "diversity_type": null, + "warmup_steps": 1000, + "decay_start": 1200, + "threshold_beta": 0.999, + "threshold_start_step": 1000, + "seed": 2, + "activation_dim": 512, + "dict_size": 2048, + "k": 32, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "submodule_name": "resid_post_layer_4", + "random_mask_bit_ratio": 0.0, + "random_mask_bit_ratio_force_topk": false, + "random_mask_bit_original_scale": 1.0, + "random_mask_bit_pos": "input", + "random_mask_bit_reconstruction_target": "original", + "nonlinear_sae_loss_scale": 0.0, + "nonlinear_sae_input": "original", + "nonlinear_sae_target": "residual", + "nonlinear_use_encoder_mlp": true, + "nonlinear_use_decoder_mlp": false, + "nonlinear_topk_k": -1, + "nonlinear_block_gradients": false, + "meta_sae_loss_scale": 0.0 + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_2/self_feature_space_diversity_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_2/self_feature_space_diversity_eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..29b5977f8e58e6106a48bdbdb376f1be5ae403b8 --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_2/self_feature_space_diversity_eval_results.json @@ -0,0 +1,26 @@ +{ + "intrinsic_dim_0.8": 1147, + "intrinsic_dim_0.9": 1497, + "intrinsic_dim_0.95": 1720, + "effective_rank": 1386.2728271484375, + "feature_sparsity": 0.984375, + "mean_correlation": 0.009658349677920341, + "max_correlation": 1.0000011920928955, + "correlation_std": 0.034718144685029984, + "decoder_coactive_mean_sim": 0.013472042046487331, + "decoder_coactive_max_sim": 0.6248090863227844, + "decoder_coactive_std_sim": 0.042402926832437515, + "encoder_coactive_mean_sim": 0.013234490528702736, + "encoder_coactive_max_sim": 0.48452436923980713, + "encoder_coactive_std_sim": 0.03160259500145912, + "decoder_per_sample_mean_sim": 0.013472042977809906, + "decoder_per_sample_max_sim": 0.295926034450531, + "encoder_per_sample_mean_sim": 0.013234490528702736, + "encoder_per_sample_max_sim": 0.1905398666858673, + "encoder_mean_correlation": 0.003325967350974679, + "encoder_max_correlation": 1.0000003576278687, + "encoder_correlation_std": 0.057232871651649475, + "decoder_mean_correlation": 0.004680304788053036, + "decoder_max_correlation": 1.0000011920928955, + "decoder_correlation_std": 0.057123977690935135 +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_2/standard_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_2/standard_eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f9e478a111cc4e1203ee9ecbd214294e0644cb76 --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_2/standard_eval_results.json @@ -0,0 +1,14 @@ +{ + "l2_loss": 5.518608373403549, + "l1_loss": 43.638191032409665, + "l0": 32.0, + "frac_variance_explained": 0.8637508787214756, + "cossim": 0.9281533844769001, + "l2_ratio": 0.9278098031878471, + "relative_reconstruction_bias": 1.0003917694091797, + "loss_original": 5.4671875, + "loss_reconstructed": 6.020703125, + "loss_zero": 10.740625, + "frac_recovered": 0.892822265625, + "frac_alive": 1.0 +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_3/config.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_3/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3e20a5277c2d3a2bdcf42d41a2ab9cc877654221 --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_3/config.json @@ -0,0 +1,46 @@ +{ + "trainer": { + "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/seed-3_trainer_3", + "trainer_class": "TopKTrainer", + "dict_class": "AutoEncoderTopK", + "lr": 0.0003, + "steps": 14648, + "auxk_alpha": 0.03125, + "diversity_scale": 0.0, + "diversity_type": null, + "warmup_steps": 1000, + "decay_start": 1200, + "threshold_beta": 0.999, + "threshold_start_step": 1000, + "seed": 3, + "activation_dim": 512, + "dict_size": 2048, + "k": 32, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "submodule_name": "resid_post_layer_4", + "random_mask_bit_ratio": 0.0, + "random_mask_bit_ratio_force_topk": false, + "random_mask_bit_original_scale": 1.0, + "random_mask_bit_pos": "input", + "random_mask_bit_reconstruction_target": "original", + "nonlinear_sae_loss_scale": 0.0, + "nonlinear_sae_input": "original", + "nonlinear_sae_target": "residual", + "nonlinear_use_encoder_mlp": true, + "nonlinear_use_decoder_mlp": false, + "nonlinear_topk_k": -1, + "nonlinear_block_gradients": false, + "meta_sae_loss_scale": 0.0 + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_3/self_feature_space_diversity_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_3/self_feature_space_diversity_eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..3d18b526973ae1f3f97daf5d0e97290d3c2ec8e5 --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_3/self_feature_space_diversity_eval_results.json @@ -0,0 +1,26 @@ +{ + "intrinsic_dim_0.8": 1146, + "intrinsic_dim_0.9": 1496, + "intrinsic_dim_0.95": 1719, + "effective_rank": 1397.719970703125, + "feature_sparsity": 0.984375, + "mean_correlation": 0.009594940580427647, + "max_correlation": 1.0000015497207642, + "correlation_std": 0.034256912767887115, + "decoder_coactive_mean_sim": 0.013142098672688007, + "decoder_coactive_max_sim": 0.4960811734199524, + "decoder_coactive_std_sim": 0.0417519137263298, + "encoder_coactive_mean_sim": 0.013556867837905884, + "encoder_coactive_max_sim": 0.4828951954841614, + "encoder_coactive_std_sim": 0.0318557471036911, + "decoder_per_sample_mean_sim": 0.013142098672688007, + "decoder_per_sample_max_sim": 0.3047160804271698, + "encoder_per_sample_mean_sim": 0.013556867837905884, + "encoder_per_sample_max_sim": 0.1878674179315567, + "encoder_mean_correlation": 0.003432024270296097, + "encoder_max_correlation": 1.000000238418579, + "encoder_correlation_std": 0.058941394090652466, + "decoder_mean_correlation": 0.004725632257759571, + "decoder_max_correlation": 1.0000011920928955, + "decoder_correlation_std": 0.05638653412461281 +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_3/standard_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_3/standard_eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..37bffbd97434e8406aa2628e70241a2be385ba00 --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_3/standard_eval_results.json @@ -0,0 +1,14 @@ +{ + "l2_loss": 5.497262263298035, + "l1_loss": 43.95940580368042, + "l0": 32.0, + "frac_variance_explained": 0.873967283219099, + "cossim": 0.9286889567971229, + "l2_ratio": 0.9278851471841335, + "relative_reconstruction_bias": 0.99990846067667, + "loss_original": 5.4671875, + "loss_reconstructed": 6.01796875, + "loss_zero": 10.740625, + "frac_recovered": 0.893505859375, + "frac_alive": 1.0 +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_4/config.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_4/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b64b823442cb08a2f845f88bb0a6c6764b6164ca --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_4/config.json @@ -0,0 +1,46 @@ +{ + "trainer": { + "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/seed-4_trainer_4", + "trainer_class": "TopKTrainer", + "dict_class": "AutoEncoderTopK", + "lr": 0.0003, + "steps": 14648, + "auxk_alpha": 0.03125, + "diversity_scale": 0.0, + "diversity_type": null, + "warmup_steps": 1000, + "decay_start": 1200, + "threshold_beta": 0.999, + "threshold_start_step": 1000, + "seed": 4, + "activation_dim": 512, + "dict_size": 2048, + "k": 32, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "submodule_name": "resid_post_layer_4", + "random_mask_bit_ratio": 0.0, + "random_mask_bit_ratio_force_topk": false, + "random_mask_bit_original_scale": 1.0, + "random_mask_bit_pos": "input", + "random_mask_bit_reconstruction_target": "original", + "nonlinear_sae_loss_scale": 0.0, + "nonlinear_sae_input": "original", + "nonlinear_sae_target": "residual", + "nonlinear_use_encoder_mlp": true, + "nonlinear_use_decoder_mlp": false, + "nonlinear_topk_k": -1, + "nonlinear_block_gradients": false, + "meta_sae_loss_scale": 0.0 + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_4/self_feature_space_diversity_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_4/self_feature_space_diversity_eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..b31bd2ccf2e5fe86c204628aae0ad462a1cdae9a --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_4/self_feature_space_diversity_eval_results.json @@ -0,0 +1,26 @@ +{ + "intrinsic_dim_0.8": 1143, + "intrinsic_dim_0.9": 1494, + "intrinsic_dim_0.95": 1717, + "effective_rank": 1369.8779296875, + "feature_sparsity": 0.984375, + "mean_correlation": 0.009484217502176762, + "max_correlation": 1.0000014305114746, + "correlation_std": 0.0345107764005661, + "decoder_coactive_mean_sim": 0.012512738816440105, + "decoder_coactive_max_sim": 0.6438488960266113, + "decoder_coactive_std_sim": 0.04079408198595047, + "encoder_coactive_mean_sim": 0.012626885436475277, + "encoder_coactive_max_sim": 0.5188671946525574, + "encoder_coactive_std_sim": 0.031891077756881714, + "decoder_per_sample_mean_sim": 0.01251273788511753, + "decoder_per_sample_max_sim": 0.2933551073074341, + "encoder_per_sample_mean_sim": 0.012626885436475277, + "encoder_per_sample_max_sim": 0.19583337008953094, + "encoder_mean_correlation": 0.003148031421005726, + "encoder_max_correlation": 1.0000003576278687, + "encoder_correlation_std": 0.05692300200462341, + "decoder_mean_correlation": 0.004536854103207588, + "decoder_max_correlation": 1.0000011920928955, + "decoder_correlation_std": 0.05627986416220665 +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_4/standard_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_4/standard_eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..70fa6c5d07a7305048d9cb06369731113f937e06 --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_4/standard_eval_results.json @@ -0,0 +1,14 @@ +{ + "l2_loss": 5.485749912261963, + "l1_loss": 44.28157253265381, + "l0": 32.0, + "frac_variance_explained": 0.867275919765234, + "cossim": 0.9295805610716343, + "l2_ratio": 0.9290113553404808, + "relative_reconstruction_bias": 1.0001047268509864, + "loss_original": 5.4671875, + "loss_reconstructed": 6.025390625, + "loss_zero": 10.740625, + "frac_recovered": 0.8919921875, + "frac_alive": 1.0 +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-32768/trainer_0/config.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-32768/trainer_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..23d329d9972c47592cf4c8fb98e33510c4f69194 --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-32768/trainer_0/config.json @@ -0,0 +1,34 @@ +{ + "trainer": { + "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-32768/seed-0_trainer_0", + "trainer_class": "TopKTrainer", + "dict_class": "AutoEncoderTopK", + "lr": 0.0003, + "steps": 14648, + "auxk_alpha": 0.03125, + "diversity_scale": 0.0, + "diversity_type": null, + "warmup_steps": 1000, + "decay_start": 1200, + "threshold_beta": 0.999, + "threshold_start_step": 1000, + "seed": 0, + "activation_dim": 512, + "dict_size": 32768, + "k": 32, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "submodule_name": "resid_post_layer_4", + "random_mask_bit_ratio": 0.0 + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-32768/trainer_1/config.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-32768/trainer_1/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5e733c5fd5aa1883f5c9178bc2d915282b3a6509 --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-32768/trainer_1/config.json @@ -0,0 +1,34 @@ +{ + "trainer": { + "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-32768/seed-1_trainer_1", + "trainer_class": "TopKTrainer", + "dict_class": "AutoEncoderTopK", + "lr": 0.0003, + "steps": 14648, + "auxk_alpha": 0.03125, + "diversity_scale": 0.0, + "diversity_type": null, + "warmup_steps": 1000, + "decay_start": 1200, + "threshold_beta": 0.999, + "threshold_start_step": 1000, + "seed": 1, + "activation_dim": 512, + "dict_size": 32768, + "k": 32, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "submodule_name": "resid_post_layer_4", + "random_mask_bit_ratio": 0.0 + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-32768/trainer_2/config.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-32768/trainer_2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4ec0e1fae0690463cf004713d4d6a626976eb934 --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-32768/trainer_2/config.json @@ -0,0 +1,34 @@ +{ + "trainer": { + "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-32768/seed-2_trainer_2", + "trainer_class": "TopKTrainer", + "dict_class": "AutoEncoderTopK", + "lr": 0.0003, + "steps": 14648, + "auxk_alpha": 0.03125, + "diversity_scale": 0.0, + "diversity_type": null, + "warmup_steps": 1000, + "decay_start": 1200, + "threshold_beta": 0.999, + "threshold_start_step": 1000, + "seed": 2, + "activation_dim": 512, + "dict_size": 32768, + "k": 32, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "submodule_name": "resid_post_layer_4", + "random_mask_bit_ratio": 0.0 + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-32768/trainer_3/config.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-32768/trainer_3/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3c5426e9b36f66be7795d20c74e3fb636892aca7 --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-32768/trainer_3/config.json @@ -0,0 +1,34 @@ +{ + "trainer": { + "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-32768/seed-3_trainer_3", + "trainer_class": "TopKTrainer", + "dict_class": "AutoEncoderTopK", + "lr": 0.0003, + "steps": 14648, + "auxk_alpha": 0.03125, + "diversity_scale": 0.0, + "diversity_type": null, + "warmup_steps": 1000, + "decay_start": 1200, + "threshold_beta": 0.999, + "threshold_start_step": 1000, + "seed": 3, + "activation_dim": 512, + "dict_size": 32768, + "k": 32, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "submodule_name": "resid_post_layer_4", + "random_mask_bit_ratio": 0.0 + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-32768/trainer_4/config.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-32768/trainer_4/config.json new file mode 100644 index 0000000000000000000000000000000000000000..67f7a2bf041023f6321ba4c09b9806e67617f016 --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-32768/trainer_4/config.json @@ -0,0 +1,34 @@ +{ + "trainer": { + "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-32768/seed-4_trainer_4", + "trainer_class": "TopKTrainer", + "dict_class": "AutoEncoderTopK", + "lr": 0.0003, + "steps": 14648, + "auxk_alpha": 0.03125, + "diversity_scale": 0.0, + "diversity_type": null, + "warmup_steps": 1000, + "decay_start": 1200, + "threshold_beta": 0.999, + "threshold_start_step": 1000, + "seed": 4, + "activation_dim": 512, + "dict_size": 32768, + "k": 32, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "submodule_name": "resid_post_layer_4", + "random_mask_bit_ratio": 0.0 + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_0/config.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2c77b2e10a21b573b3193abd091af2e53eab9a5d --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_0/config.json @@ -0,0 +1,46 @@ +{ + "trainer": { + "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/seed-0_trainer_0", + "trainer_class": "TopKTrainer", + "dict_class": "AutoEncoderTopK", + "lr": 0.0003, + "steps": 14648, + "auxk_alpha": 0.03125, + "diversity_scale": 0.0, + "diversity_type": null, + "warmup_steps": 1000, + "decay_start": 1200, + "threshold_beta": 0.999, + "threshold_start_step": 1000, + "seed": 0, + "activation_dim": 512, + "dict_size": 2048, + "k": 512, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "submodule_name": "resid_post_layer_4", + "random_mask_bit_ratio": 0.0, + "random_mask_bit_ratio_force_topk": false, + "random_mask_bit_original_scale": 1.0, + "random_mask_bit_pos": "input", + "random_mask_bit_reconstruction_target": "original", + "nonlinear_sae_loss_scale": 0.0, + "nonlinear_sae_input": "original", + "nonlinear_sae_target": "residual", + "nonlinear_use_encoder_mlp": true, + "nonlinear_use_decoder_mlp": false, + "nonlinear_topk_k": -1, + "nonlinear_block_gradients": false, + "meta_sae_loss_scale": 0.0 + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_0/self_feature_space_diversity_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_0/self_feature_space_diversity_eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..43319a64b2ddb3be81e4181604c7e82bf73d8648 --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_0/self_feature_space_diversity_eval_results.json @@ -0,0 +1,26 @@ +{ + "intrinsic_dim_0.8": 346, + "intrinsic_dim_0.9": 437, + "intrinsic_dim_0.95": 489, + "effective_rank": 321.3466796875, + "feature_sparsity": 0.7501027584075928, + "mean_correlation": 0.11567842215299606, + "max_correlation": 1.000004768371582, + "correlation_std": 0.2895612120628357, + "decoder_coactive_mean_sim": -0.0009108898229897022, + "decoder_coactive_max_sim": 0.2942618727684021, + "decoder_coactive_std_sim": 0.01467378344386816, + "encoder_coactive_mean_sim": 0.04503903165459633, + "encoder_coactive_max_sim": 0.36706337332725525, + "encoder_coactive_std_sim": 0.05137478560209274, + "decoder_per_sample_mean_sim": -0.0009108897647820413, + "decoder_per_sample_max_sim": 0.10512230545282364, + "encoder_per_sample_mean_sim": 0.04503902792930603, + "encoder_per_sample_max_sim": 0.2558523714542389, + "encoder_mean_correlation": 0.004667331930249929, + "encoder_max_correlation": 1.000000238418579, + "encoder_correlation_std": 0.11842181533575058, + "decoder_mean_correlation": 0.00027410718030296266, + "decoder_max_correlation": 1.000001072883606, + "decoder_correlation_std": 0.04888004809617996 +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_0/standard_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_0/standard_eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..1979f91f7f45af38cceff8d4d948f9c5ecb3b984 --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_0/standard_eval_results.json @@ -0,0 +1,14 @@ +{ + "l2_loss": 0.1218978282995522, + "l1_loss": 803.34478225708, + "l0": 511.82421875, + "frac_variance_explained": 0.9998174428939819, + "cossim": 1.0000896871089935, + "l2_ratio": 0.9999499209225178, + "relative_reconstruction_bias": 1.0000311397016048, + "loss_original": 5.4671875, + "loss_reconstructed": 5.46796875, + "loss_zero": 10.740625, + "frac_recovered": 0.99990234375, + "frac_alive": 0.58154296875 +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_1/config.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_1/config.json new file mode 100644 index 0000000000000000000000000000000000000000..546245429d200340a0d80bb495f39f43b0c5771a --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_1/config.json @@ -0,0 +1,46 @@ +{ + "trainer": { + "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/seed-1_trainer_1", + "trainer_class": "TopKTrainer", + "dict_class": "AutoEncoderTopK", + "lr": 0.0003, + "steps": 14648, + "auxk_alpha": 0.03125, + "diversity_scale": 0.0, + "diversity_type": null, + "warmup_steps": 1000, + "decay_start": 1200, + "threshold_beta": 0.999, + "threshold_start_step": 1000, + "seed": 1, + "activation_dim": 512, + "dict_size": 2048, + "k": 512, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "submodule_name": "resid_post_layer_4", + "random_mask_bit_ratio": 0.0, + "random_mask_bit_ratio_force_topk": false, + "random_mask_bit_original_scale": 1.0, + "random_mask_bit_pos": "input", + "random_mask_bit_reconstruction_target": "original", + "nonlinear_sae_loss_scale": 0.0, + "nonlinear_sae_input": "original", + "nonlinear_sae_target": "residual", + "nonlinear_use_encoder_mlp": true, + "nonlinear_use_decoder_mlp": false, + "nonlinear_topk_k": -1, + "nonlinear_block_gradients": false, + "meta_sae_loss_scale": 0.0 + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_1/self_feature_space_diversity_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_1/self_feature_space_diversity_eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..5c85a9ac8ee036821330ae794edc40625fbfdaca --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_1/self_feature_space_diversity_eval_results.json @@ -0,0 +1,26 @@ +{ + "intrinsic_dim_0.8": 342, + "intrinsic_dim_0.9": 433, + "intrinsic_dim_0.95": 486, + "effective_rank": 302.5154724121094, + "feature_sparsity": 0.7500362992286682, + "mean_correlation": 0.11686287820339203, + "max_correlation": 1.000004768371582, + "correlation_std": 0.29108867049217224, + "decoder_coactive_mean_sim": -0.0009243678650818765, + "decoder_coactive_max_sim": 0.21814467012882233, + "decoder_coactive_std_sim": 0.0147458016872406, + "encoder_coactive_mean_sim": 0.051302291452884674, + "encoder_coactive_max_sim": 0.4465314447879791, + "encoder_coactive_std_sim": 0.056279055774211884, + "decoder_per_sample_mean_sim": -0.0009243678650818765, + "decoder_per_sample_max_sim": 0.09590303897857666, + "encoder_per_sample_mean_sim": 0.051302291452884674, + "encoder_per_sample_max_sim": 0.2655414640903473, + "encoder_mean_correlation": 0.005481375381350517, + "encoder_max_correlation": 1.000000238418579, + "encoder_correlation_std": 0.12337620556354523, + "decoder_mean_correlation": 0.0002578080748207867, + "decoder_max_correlation": 1.000001072883606, + "decoder_correlation_std": 0.048913467675447464 +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_1/standard_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_1/standard_eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..9071cf61e6f366a11338af73136e2bf4929da25f --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_1/standard_eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 0.03437363065313548, "l1_loss": 898.7996040344239, "l0": 511.90654296875, "frac_variance_explained": 0.9998641557991504, "cossim": 1.0001215882599355, "l2_ratio": 0.9999546416103839, "relative_reconstruction_bias": 0.9999443709850311, "loss_original": 5.4671875, "loss_reconstructed": 5.466015625, "loss_zero": 10.740625, "frac_recovered": 1.000244140625, "frac_alive": 0.59033203125} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_2/config.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7372e329c335f0341358b584e56afcfc846005ab --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_2/config.json @@ -0,0 +1,46 @@ +{ + "trainer": { + "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/seed-2_trainer_2", + "trainer_class": "TopKTrainer", + "dict_class": "AutoEncoderTopK", + "lr": 0.0003, + "steps": 14648, + "auxk_alpha": 0.03125, + "diversity_scale": 0.0, + "diversity_type": null, + "warmup_steps": 1000, + "decay_start": 1200, + "threshold_beta": 0.999, + "threshold_start_step": 1000, + "seed": 2, + "activation_dim": 512, + "dict_size": 2048, + "k": 512, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "submodule_name": "resid_post_layer_4", + "random_mask_bit_ratio": 0.0, + "random_mask_bit_ratio_force_topk": false, + "random_mask_bit_original_scale": 1.0, + "random_mask_bit_pos": "input", + "random_mask_bit_reconstruction_target": "original", + "nonlinear_sae_loss_scale": 0.0, + "nonlinear_sae_input": "original", + "nonlinear_sae_target": "residual", + "nonlinear_use_encoder_mlp": true, + "nonlinear_use_decoder_mlp": false, + "nonlinear_topk_k": -1, + "nonlinear_block_gradients": false, + "meta_sae_loss_scale": 0.0 + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_2/self_feature_space_diversity_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_2/self_feature_space_diversity_eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..76da297de144df840694d3ccb99e8bb29ec2f11e --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_2/self_feature_space_diversity_eval_results.json @@ -0,0 +1,26 @@ +{ + "intrinsic_dim_0.8": 342, + "intrinsic_dim_0.9": 434, + "intrinsic_dim_0.95": 487, + "effective_rank": 297.5074462890625, + "feature_sparsity": 0.7500617504119873, + "mean_correlation": 0.11699061840772629, + "max_correlation": 1.0000042915344238, + "correlation_std": 0.29189395904541016, + "decoder_coactive_mean_sim": -0.0009281504317186773, + "decoder_coactive_max_sim": 0.2474658489227295, + "decoder_coactive_std_sim": 0.014783051796257496, + "encoder_coactive_mean_sim": 0.05288249999284744, + "encoder_coactive_max_sim": 0.3966051936149597, + "encoder_coactive_std_sim": 0.057189274579286575, + "decoder_per_sample_mean_sim": -0.0009281504899263382, + "decoder_per_sample_max_sim": 0.10155373066663742, + "encoder_per_sample_mean_sim": 0.05288250371813774, + "encoder_per_sample_max_sim": 0.27083733677864075, + "encoder_mean_correlation": 0.003606635145843029, + "encoder_max_correlation": 1.000000238418579, + "encoder_correlation_std": 0.12434981763362885, + "decoder_mean_correlation": 0.00025882525369524956, + "decoder_max_correlation": 1.0000009536743164, + "decoder_correlation_std": 0.04886815324425697 +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_2/standard_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_2/standard_eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..1b5e04b2ee6080c6ac77ba1c02d6a13a9cda7749 --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_2/standard_eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 0.025140870863106103, "l1_loss": 931.2091484069824, "l0": 511.9453125, "frac_variance_explained": 0.9999042950570584, "cossim": 1.0001301787793637, "l2_ratio": 0.9999769635498523, "relative_reconstruction_bias": 1.000041725486517, "loss_original": 5.4671875, "loss_reconstructed": 5.472265625, "loss_zero": 10.740625, "frac_recovered": 0.99892578125, "frac_alive": 0.58544921875} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_3/config.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_3/config.json new file mode 100644 index 0000000000000000000000000000000000000000..32e95c5b43788cf409645d97d954e19276d63797 --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_3/config.json @@ -0,0 +1,46 @@ +{ + "trainer": { + "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/seed-3_trainer_3", + "trainer_class": "TopKTrainer", + "dict_class": "AutoEncoderTopK", + "lr": 0.0003, + "steps": 14648, + "auxk_alpha": 0.03125, + "diversity_scale": 0.0, + "diversity_type": null, + "warmup_steps": 1000, + "decay_start": 1200, + "threshold_beta": 0.999, + "threshold_start_step": 1000, + "seed": 3, + "activation_dim": 512, + "dict_size": 2048, + "k": 512, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "submodule_name": "resid_post_layer_4", + "random_mask_bit_ratio": 0.0, + "random_mask_bit_ratio_force_topk": false, + "random_mask_bit_original_scale": 1.0, + "random_mask_bit_pos": "input", + "random_mask_bit_reconstruction_target": "original", + "nonlinear_sae_loss_scale": 0.0, + "nonlinear_sae_input": "original", + "nonlinear_sae_target": "residual", + "nonlinear_use_encoder_mlp": true, + "nonlinear_use_decoder_mlp": false, + "nonlinear_topk_k": -1, + "nonlinear_block_gradients": false, + "meta_sae_loss_scale": 0.0 + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_3/self_feature_space_diversity_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_3/self_feature_space_diversity_eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..d07863b1156c45a6fa4d45daa59e871c59ef19b2 --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_3/self_feature_space_diversity_eval_results.json @@ -0,0 +1,26 @@ +{ + "intrinsic_dim_0.8": 345, + "intrinsic_dim_0.9": 434, + "intrinsic_dim_0.95": 486, + "effective_rank": 317.96185302734375, + "feature_sparsity": 0.7500829100608826, + "mean_correlation": 0.11769045889377594, + "max_correlation": 1.0000042915344238, + "correlation_std": 0.2899561822414398, + "decoder_coactive_mean_sim": -0.0009198148618452251, + "decoder_coactive_max_sim": 0.24994757771492004, + "decoder_coactive_std_sim": 0.014731859788298607, + "encoder_coactive_mean_sim": 0.04878106713294983, + "encoder_coactive_max_sim": 0.4278787076473236, + "encoder_coactive_std_sim": 0.054398663341999054, + "decoder_per_sample_mean_sim": -0.0009198148618452251, + "decoder_per_sample_max_sim": 0.10858472436666489, + "encoder_per_sample_mean_sim": 0.04878106713294983, + "encoder_per_sample_max_sim": 0.3049105703830719, + "encoder_mean_correlation": 0.004845459014177322, + "encoder_max_correlation": 1.000000238418579, + "encoder_correlation_std": 0.12293191999197006, + "decoder_mean_correlation": 0.0002943026483990252, + "decoder_max_correlation": 1.000001072883606, + "decoder_correlation_std": 0.04887963831424713 +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_3/standard_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_3/standard_eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..e4f2664b14e645558e2b2e55b2f7e1593005f0da --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_3/standard_eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 0.11367939515039324, "l1_loss": 810.8659385681152, "l0": 511.80732421875, "frac_variance_explained": 0.9998415939509868, "cossim": 1.0000781424343586, "l2_ratio": 0.9999195456504821, "relative_reconstruction_bias": 0.999939326196909, "loss_original": 5.4671875, "loss_reconstructed": 5.4640625, "loss_zero": 10.740625, "frac_recovered": 1.000732421875, "frac_alive": 0.59228515625} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_4/config.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_4/config.json new file mode 100644 index 0000000000000000000000000000000000000000..36c4dd70fc6843ba931dde54752c4b4bc917c212 --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_4/config.json @@ -0,0 +1,46 @@ +{ + "trainer": { + "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/seed-4_trainer_4", + "trainer_class": "TopKTrainer", + "dict_class": "AutoEncoderTopK", + "lr": 0.0003, + "steps": 14648, + "auxk_alpha": 0.03125, + "diversity_scale": 0.0, + "diversity_type": null, + "warmup_steps": 1000, + "decay_start": 1200, + "threshold_beta": 0.999, + "threshold_start_step": 1000, + "seed": 4, + "activation_dim": 512, + "dict_size": 2048, + "k": 512, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "submodule_name": "resid_post_layer_4", + "random_mask_bit_ratio": 0.0, + "random_mask_bit_ratio_force_topk": false, + "random_mask_bit_original_scale": 1.0, + "random_mask_bit_pos": "input", + "random_mask_bit_reconstruction_target": "original", + "nonlinear_sae_loss_scale": 0.0, + "nonlinear_sae_input": "original", + "nonlinear_sae_target": "residual", + "nonlinear_use_encoder_mlp": true, + "nonlinear_use_decoder_mlp": false, + "nonlinear_topk_k": -1, + "nonlinear_block_gradients": false, + "meta_sae_loss_scale": 0.0 + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_4/self_feature_space_diversity_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_4/self_feature_space_diversity_eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..1871aeb59267daca5eb261498b817353b93b89e8 --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_4/self_feature_space_diversity_eval_results.json @@ -0,0 +1,26 @@ +{ + "intrinsic_dim_0.8": 343, + "intrinsic_dim_0.9": 434, + "intrinsic_dim_0.95": 486, + "effective_rank": 299.96270751953125, + "feature_sparsity": 0.7500380277633667, + "mean_correlation": 0.12012023478746414, + "max_correlation": 1.0000050067901611, + "correlation_std": 0.292927086353302, + "decoder_coactive_mean_sim": -0.000933852803427726, + "decoder_coactive_max_sim": 0.2453019917011261, + "decoder_coactive_std_sim": 0.014877861365675926, + "encoder_coactive_mean_sim": 0.05555467680096626, + "encoder_coactive_max_sim": 0.34842175245285034, + "encoder_coactive_std_sim": 0.058952681720256805, + "decoder_per_sample_mean_sim": -0.000933852803427726, + "decoder_per_sample_max_sim": 0.09431355446577072, + "encoder_per_sample_mean_sim": 0.05555467680096626, + "encoder_per_sample_max_sim": 0.24732723832130432, + "encoder_mean_correlation": 0.0046387407928705215, + "encoder_max_correlation": 1.0000003576278687, + "encoder_correlation_std": 0.12495078891515732, + "decoder_mean_correlation": 0.00025382271269336343, + "decoder_max_correlation": 1.0000011920928955, + "decoder_correlation_std": 0.048862189054489136 +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_4/standard_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_4/standard_eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..2907d41f5091e04ab39bcd0504bbaba859dcfba8 --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-512/dict_size-2048/trainer_4/standard_eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 0.026735168404411525, "l1_loss": 929.9151672363281, "l0": 511.94140625, "frac_variance_explained": 0.9999176003038883, "cossim": 1.0001217171549797, "l2_ratio": 0.9999563246965408, "relative_reconstruction_bias": 0.9999580040574074, "loss_original": 5.4671875, "loss_reconstructed": 5.468359375, "loss_zero": 10.740625, "frac_recovered": 0.999853515625, "frac_alive": 0.60107421875} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_0/self_feature_space_diversity_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_0/self_feature_space_diversity_eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..378262f03ce912949248d655785bdd2425c0f85e --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_0/self_feature_space_diversity_eval_results.json @@ -0,0 +1,26 @@ +{ + "intrinsic_dim_0.8": 1101, + "intrinsic_dim_0.9": 1466, + "intrinsic_dim_0.95": 1701, + "effective_rank": 1336.6378173828125, + "feature_sparsity": 0.96875, + "mean_correlation": 0.018963707610964775, + "max_correlation": 1.0000014305114746, + "correlation_std": 0.044405288994312286, + "decoder_coactive_mean_sim": 0.005104673095047474, + "decoder_coactive_max_sim": 0.38417404890060425, + "decoder_coactive_std_sim": 0.02679547853767872, + "encoder_coactive_mean_sim": 0.00732428440824151, + "encoder_coactive_max_sim": 0.36569666862487793, + "encoder_coactive_std_sim": 0.02348160743713379, + "decoder_per_sample_mean_sim": 0.005104673095047474, + "decoder_per_sample_max_sim": 0.24980509281158447, + "encoder_per_sample_mean_sim": 0.00732428440824151, + "encoder_per_sample_max_sim": 0.17561672627925873, + "encoder_mean_correlation": 0.0016635311767458916, + "encoder_max_correlation": 1.000000238418579, + "encoder_correlation_std": 0.05483342707157135, + "decoder_mean_correlation": 0.0018550025997683406, + "decoder_max_correlation": 1.0000009536743164, + "decoder_correlation_std": 0.052489809691905975 +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_1/config.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_1/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ef72ba2f051374defdf6d1ce3c09a60da80b2e01 --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_1/config.json @@ -0,0 +1,46 @@ +{ + "trainer": { + "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/seed-1_trainer_1", + "trainer_class": "TopKTrainer", + "dict_class": "AutoEncoderTopK", + "lr": 0.0003, + "steps": 14648, + "auxk_alpha": 0.03125, + "diversity_scale": 0.0, + "diversity_type": null, + "warmup_steps": 1000, + "decay_start": 1200, + "threshold_beta": 0.999, + "threshold_start_step": 1000, + "seed": 1, + "activation_dim": 512, + "dict_size": 2048, + "k": 64, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "submodule_name": "resid_post_layer_4", + "random_mask_bit_ratio": 0.0, + "random_mask_bit_ratio_force_topk": false, + "random_mask_bit_original_scale": 1.0, + "random_mask_bit_pos": "input", + "random_mask_bit_reconstruction_target": "original", + "nonlinear_sae_loss_scale": 0.0, + "nonlinear_sae_input": "original", + "nonlinear_sae_target": "residual", + "nonlinear_use_encoder_mlp": true, + "nonlinear_use_decoder_mlp": false, + "nonlinear_topk_k": -1, + "nonlinear_block_gradients": false, + "meta_sae_loss_scale": 0.0 + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_1/self_feature_space_diversity_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_1/self_feature_space_diversity_eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..32d92203690c0c25a2d61b652537cd6941a9a833 --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_1/self_feature_space_diversity_eval_results.json @@ -0,0 +1,26 @@ +{ + "intrinsic_dim_0.8": 1096, + "intrinsic_dim_0.9": 1461, + "intrinsic_dim_0.95": 1697, + "effective_rank": 1342.979248046875, + "feature_sparsity": 0.96875, + "mean_correlation": 0.018925022333860397, + "max_correlation": 1.000002145767212, + "correlation_std": 0.04421866685152054, + "decoder_coactive_mean_sim": 0.005001583602279425, + "decoder_coactive_max_sim": 0.6809477806091309, + "decoder_coactive_std_sim": 0.026375532150268555, + "encoder_coactive_mean_sim": 0.006971509661525488, + "encoder_coactive_max_sim": 0.5061028003692627, + "encoder_coactive_std_sim": 0.0230117067694664, + "decoder_per_sample_mean_sim": 0.005001583602279425, + "decoder_per_sample_max_sim": 0.27040722966194153, + "encoder_per_sample_mean_sim": 0.006971509661525488, + "encoder_per_sample_max_sim": 0.20409739017486572, + "encoder_mean_correlation": 0.0018320512026548386, + "encoder_max_correlation": 1.0000003576278687, + "encoder_correlation_std": 0.053943462669849396, + "decoder_mean_correlation": 0.0019005772192031145, + "decoder_max_correlation": 1.0000011920928955, + "decoder_correlation_std": 0.052223801612854004 +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_1/standard_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_1/standard_eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..830feecf8f5526b0df6d5aba13ccc2a4b995a59d --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_1/standard_eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 4.780432206392288, "l1_loss": 63.796514415740965, "l0": 64.0, "frac_variance_explained": 0.9086314626038074, "cossim": 0.9469698674976825, "l2_ratio": 0.9466174811124801, "relative_reconstruction_bias": 1.000212862342596, "loss_original": 5.4671875, "loss_reconstructed": 5.86328125, "loss_zero": 10.740625, "frac_recovered": 0.9236328125, "frac_alive": 1.0} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_2/config.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..77f51fa39225e7eb1fc12781a83caf8eb2502c9d --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_2/config.json @@ -0,0 +1,46 @@ +{ + "trainer": { + "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/seed-2_trainer_2", + "trainer_class": "TopKTrainer", + "dict_class": "AutoEncoderTopK", + "lr": 0.0003, + "steps": 14648, + "auxk_alpha": 0.03125, + "diversity_scale": 0.0, + "diversity_type": null, + "warmup_steps": 1000, + "decay_start": 1200, + "threshold_beta": 0.999, + "threshold_start_step": 1000, + "seed": 2, + "activation_dim": 512, + "dict_size": 2048, + "k": 64, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "submodule_name": "resid_post_layer_4", + "random_mask_bit_ratio": 0.0, + "random_mask_bit_ratio_force_topk": false, + "random_mask_bit_original_scale": 1.0, + "random_mask_bit_pos": "input", + "random_mask_bit_reconstruction_target": "original", + "nonlinear_sae_loss_scale": 0.0, + "nonlinear_sae_input": "original", + "nonlinear_sae_target": "residual", + "nonlinear_use_encoder_mlp": true, + "nonlinear_use_decoder_mlp": false, + "nonlinear_topk_k": -1, + "nonlinear_block_gradients": false, + "meta_sae_loss_scale": 0.0 + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_2/self_feature_space_diversity_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_2/self_feature_space_diversity_eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..9a09f7a582945552246bb69d44f089655f4bd06c --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_2/self_feature_space_diversity_eval_results.json @@ -0,0 +1,26 @@ +{ + "intrinsic_dim_0.8": 1098, + "intrinsic_dim_0.9": 1462, + "intrinsic_dim_0.95": 1696, + "effective_rank": 1344.98193359375, + "feature_sparsity": 0.96875, + "mean_correlation": 0.018938545137643814, + "max_correlation": 1.0000015497207642, + "correlation_std": 0.04458320140838623, + "decoder_coactive_mean_sim": 0.005024466197937727, + "decoder_coactive_max_sim": 0.3917606472969055, + "decoder_coactive_std_sim": 0.02672816626727581, + "encoder_coactive_mean_sim": 0.0071235643699765205, + "encoder_coactive_max_sim": 0.382651150226593, + "encoder_coactive_std_sim": 0.023562893271446228, + "decoder_per_sample_mean_sim": 0.005024466197937727, + "decoder_per_sample_max_sim": 0.2612590789794922, + "encoder_per_sample_mean_sim": 0.0071235643699765205, + "encoder_per_sample_max_sim": 0.2195105403661728, + "encoder_mean_correlation": 0.0016356734558939934, + "encoder_max_correlation": 1.0000003576278687, + "encoder_correlation_std": 0.053993258625268936, + "decoder_mean_correlation": 0.0018265489488840103, + "decoder_max_correlation": 1.0000009536743164, + "decoder_correlation_std": 0.052157212048769 +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_2/standard_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_2/standard_eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..35ebc4d04ec72bdfc8079f1a4dc906fca526aa94 --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_2/standard_eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 4.802043730020523, "l1_loss": 63.38813443183899, "l0": 64.0, "frac_variance_explained": 0.9043121233582496, "cossim": 0.9462784223258496, "l2_ratio": 0.9456560485064983, "relative_reconstruction_bias": 0.9999310210347175, "loss_original": 5.4671875, "loss_reconstructed": 5.8640625, "loss_zero": 10.740625, "frac_recovered": 0.923486328125, "frac_alive": 1.0} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_3/config.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_3/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2e3df419402f599f7d6881eb3103ea407423fedc --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_3/config.json @@ -0,0 +1,46 @@ +{ + "trainer": { + "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/seed-3_trainer_3", + "trainer_class": "TopKTrainer", + "dict_class": "AutoEncoderTopK", + "lr": 0.0003, + "steps": 14648, + "auxk_alpha": 0.03125, + "diversity_scale": 0.0, + "diversity_type": null, + "warmup_steps": 1000, + "decay_start": 1200, + "threshold_beta": 0.999, + "threshold_start_step": 1000, + "seed": 3, + "activation_dim": 512, + "dict_size": 2048, + "k": 64, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "submodule_name": "resid_post_layer_4", + "random_mask_bit_ratio": 0.0, + "random_mask_bit_ratio_force_topk": false, + "random_mask_bit_original_scale": 1.0, + "random_mask_bit_pos": "input", + "random_mask_bit_reconstruction_target": "original", + "nonlinear_sae_loss_scale": 0.0, + "nonlinear_sae_input": "original", + "nonlinear_sae_target": "residual", + "nonlinear_use_encoder_mlp": true, + "nonlinear_use_decoder_mlp": false, + "nonlinear_topk_k": -1, + "nonlinear_block_gradients": false, + "meta_sae_loss_scale": 0.0 + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_3/self_feature_space_diversity_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_3/self_feature_space_diversity_eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..1ea0c51b8b307ac12dd1bd69ebaa83a2d07d51e7 --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_3/self_feature_space_diversity_eval_results.json @@ -0,0 +1,26 @@ +{ + "intrinsic_dim_0.8": 1095, + "intrinsic_dim_0.9": 1460, + "intrinsic_dim_0.95": 1694, + "effective_rank": 1335.848388671875, + "feature_sparsity": 0.96875, + "mean_correlation": 0.018848147243261337, + "max_correlation": 1.0000026226043701, + "correlation_std": 0.0433562807738781, + "decoder_coactive_mean_sim": 0.004982855170965195, + "decoder_coactive_max_sim": 0.3878358006477356, + "decoder_coactive_std_sim": 0.02627839706838131, + "encoder_coactive_mean_sim": 0.006902318447828293, + "encoder_coactive_max_sim": 0.36567115783691406, + "encoder_coactive_std_sim": 0.02317717857658863, + "decoder_per_sample_mean_sim": 0.004982855170965195, + "decoder_per_sample_max_sim": 0.24182987213134766, + "encoder_per_sample_mean_sim": 0.006902318447828293, + "encoder_per_sample_max_sim": 0.17159400880336761, + "encoder_mean_correlation": 0.0018277183407917619, + "encoder_max_correlation": 1.000000238418579, + "encoder_correlation_std": 0.05462810397148132, + "decoder_mean_correlation": 0.0019029118120670319, + "decoder_max_correlation": 1.000001072883606, + "decoder_correlation_std": 0.05232343450188637 +} \ No newline at end of file diff --git a/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_3/standard_eval_results.json b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_3/standard_eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..2344f65d0cb5930c4bd6f1af237a6ac2c930ca3c --- /dev/null +++ b/TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-64/dict_size-2048/trainer_3/standard_eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 4.806892818212509, "l1_loss": 63.29269104003906, "l0": 64.0, "frac_variance_explained": 0.9039966031908989, "cossim": 0.946143351495266, "l2_ratio": 0.9456219218671322, "relative_reconstruction_bias": 1.0000569581985475, "loss_original": 5.4671875, "loss_reconstructed": 5.86484375, "loss_zero": 10.740625, "frac_recovered": 0.9232421875, "frac_alive": 1.0} \ No newline at end of file