diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..fc9af4cb740e4f7e6a0af256bfe25c67d7278757 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/saebench_autointerp_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/btk/cfg.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/btk/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..4724bfee639cae6cd2e03fcc0f2bdf81ed64c237 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/btk/cfg.json @@ -0,0 +1 @@ +{"d_in": 2304, "metadata": {"sae_lens_version": "6.39.0", "sae_lens_training_version": "6.39.0", "model_name": "google/gemma-2-2b", "hook_name": "model.layers.12", "hook_head_index": null, "context_size": 1024, "prepend_bos": true, "seqpos_slice": [null], "model_from_pretrained_kwargs": {}, "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-2B", "model_class_name": "AutoModelForCausalLM", "exclude_special_tokens": true, "sequence_separator_token": "bos", "disable_concat_sequences": false}, "d_sae": 32768, "device": "cuda", "apply_b_dec_to_input": true, "normalize_activations": "none", "reshape_activations": "none", "dtype": "float32", "architecture": "jumprelu"} \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/btk/sae_weights.safetensors b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/btk/sae_weights.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..310d0857aaf04f5b75862abe5eac54e092106419 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/btk/sae_weights.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6325c51465e114a4ccfa887dcad6d34570939b7ef13c50219000bf13cc560d2 +size 604251536 diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/cfg.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..3837c6d5e8a2f38c87588837ac6e5b7e272178fc --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/cfg.json @@ -0,0 +1 @@ +{"d_in": 2304, "metadata": {"sae_lens_version": "6.39.0", "sae_lens_training_version": "6.39.0", "model_name": "gemma-2-2b", "hook_name": "blocks.12.hook_resid_post", "hook_head_index": null, "context_size": 1024, "prepend_bos": true, "seqpos_slice": [null], "model_from_pretrained_kwargs": {"center_writing_weights": false}, "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-2B", "model_class_name": "AutoModelForCausalLM", "exclude_special_tokens": true, "sequence_separator_token": "bos", "disable_concat_sequences": false}, "d_sae": 32768, "device": "cuda", "apply_b_dec_to_input": true, "normalize_activations": "none", "reshape_activations": "none", "dtype": "float32", "architecture": "jumprelu"} \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/dashboard.zip b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/dashboard.zip new file mode 100644 index 0000000000000000000000000000000000000000..863a2a5c1eca79b3e3455d2983629fb33ccd9475 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/dashboard.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e8aee97ea8dd08894edbffc87c0582fb206b2d4e62fb19604449635c69fd503 +size 6945976548 diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/runner_cfg.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/runner_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..3e7d3c704dbca450cf9089dba715a37a00cf0993 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/runner_cfg.json @@ -0,0 +1 @@ +{"sae": {"d_in": 2304, "d_sae": 32768, "dtype": "float32", "device": "cpu", "apply_b_dec_to_input": true, "normalize_activations": "none", "reshape_activations": "none", "metadata": {"sae_lens_version": "6.37.4", "sae_lens_training_version": "6.37.4"}, "decoder_init_norm": 0.1, "k": 150, "use_sparse_activations": false, "aux_loss_coefficient": 1.0, "rescale_acts_by_decoder_norm": true, "topk_threshold_lr": 0.01, "matryoshka_widths": [2048, 8192, 32768], "matryoshka_loss_multipliers": null, "skip_final_matryoshka_width": true, "include_outer_loss": true, "detach_matryoshka_losses": false, "normalize_reconstruction_losses_by_d_in": false, "normalize_losses_by_num_matryoshka_steps": false, "matryoshka_loss_probabilities": 1.0, "initial_matryoshka_loss_probabilities": null, "transition_matryoshka_loss_probabilities_duration": 0, "transition_matryoshka_loss_probabilities_start_step": 0, "pin_matryoshka_encoder": false, "pin_matryoshka_encoder_at_step": 0, "pin_loss_coefficient": 1.0, "use_frequency_sorted_matryoshka": false, "firing_frequency_ema_decay": 0.99, "use_floating_decoder": false, "floating_decoder_controls_b_dec": true, "use_matryoshka_aux_loss": true, "initial_k": null, "transition_k_duration_steps": null, "transition_k_start_step": 0, "btk_start_step": 61035, "weight_link_enc_min_cos_sim": 0.7, "weight_link_dec_min_cos_sim": 0.7, "weight_link_enc_max_norm_ratio": 1.1, "weight_link_dec_max_norm_ratio": 1.1, "weight_link_direction_coeff": 1.0, "weight_link_norm_coeff": 1.0, "weight_link_coeff": 1.0, "btk_loss_coeff": 1.0, "btk_k_ratio": 1.0, "architecture": "twin_xmatryoshka_batchtopk"}, "model_name": "google/gemma-2-2b", "model_class_name": "AutoModelForCausalLM", "hook_name": "model.layers.12", "hook_eval": "NOT_IN_USE", "hook_head_index": null, "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-2B", "dataset_trust_remote_code": true, "streaming": false, "is_dataset_tokenized": true, "use_chat_formatting": false, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "from_pretrained_path": null, "n_batches_in_buffer": 64, "training_tokens": 500000000, "store_batch_size_prompts": 12, "seqpos_slice": [null], "disable_concat_sequences": false, "sequence_separator_token": "bos", "activations_mixing_fraction": 1.0, "device": "cuda", "act_store_device": "cuda", "seed": 0, "dtype": "float32", "prepend_bos": true, "autocast": true, "autocast_lm": true, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "train_batch_size_tokens": 4096, "adam_beta1": 0.9, "adam_beta2": 0.999, "lr": 0.0003, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 2.9999999999999997e-05, "lr_decay_steps": 24414, "n_restart_cycles": 1, "dead_feature_window": 1000, "feature_sampling_window": 2000, "dead_feature_threshold": 1e-08, "n_eval_batches": 10, "eval_batch_size_prompts": 6, "logger": {"log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "log_weights_to_wandb": true, "wandb_project": "sae-rethink", "wandb_id": null, "run_name": "twin-btk-mat-k-150-layer-12-2026-04-13T14:09:48", "wandb_entity": "chanind", "wandb_log_frequency": 10, "eval_every_n_wandb_logs": 100}, "n_checkpoints": 0, "checkpoint_path": "/vol/data/shared/checkpoints/1bdd9c609f4c734ebf649edf8d9d13c0bd3134b7d42bd968669bd05716199b8e", "save_final_checkpoint": false, "output_path": "/vol/data/saes/twin/gemma-2-2b-twin-btk-mat/k-150/seed-0/model.layers.12/w-32768/t-500M/l0-143.5", "resume_from_checkpoint": null, "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "6.37.4", "sae_lens_training_version": "6.37.4", "exclude_special_tokens": true, "n_batches_for_norm_estimate": 1000, "b_dec_init_method": "zeros", "vnorm_denoise_config": null} \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/100_news_fake_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/100_news_fake_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..48342a54479b631477db9660ee833bb778ec999f --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/100_news_fake_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.9794742107838612, + "test_acc": 0.9794749403341289, + "test_auc": 0.9952809554014742, + "val_auc": 0.9824109824109826, + "k": 1, + "dataset": "100_news_fake", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 591 + ] + }, + { + "test_f1": 0.9937947494033413, + "test_acc": 0.9937947494033413, + "test_auc": 0.9997165656874968, + "val_auc": 0.9982839982839983, + "k": 16, + "dataset": "100_news_fake", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 591, + 1912, + 1873, + 1727, + 18788, + 17, + 12301, + 875, + 1809, + 7145, + 7768, + 4169, + 206, + 671, + 2511, + 1439 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/105_click_bait_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/105_click_bait_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..70d3aa48f007f74c0487dda9a3809695486b5ad2 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/105_click_bait_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.9371352919954474, + "test_acc": 0.9371354504212573, + "test_auc": 0.9632147873362769, + "val_auc": 0.9470855725506409, + "k": 1, + "dataset": "105_click_bait", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 961 + ] + }, + { + "test_f1": 0.965648298646884, + "test_acc": 0.9656513285806869, + "test_auc": 0.9963407995806536, + "val_auc": 0.9979330301777595, + "k": 16, + "dataset": "105_click_bait", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 961, + 765, + 1912, + 1873, + 5667, + 915, + 1854, + 65, + 1147, + 1471, + 6221, + 2090, + 1146, + 5081, + 29991, + 1831 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/106_hate_hate_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/106_hate_hate_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..a58c5b31a2017f91396e9bfd0ebc70f10f33ea05 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/106_hate_hate_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.5801399806457852, + "test_acc": 0.5849056603773585, + "test_auc": 0.5955164808680975, + "val_auc": 0.5542216886754702, + "k": 1, + "dataset": "106_hate_hate", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 473 + ] + }, + { + "test_f1": 0.6099633060242315, + "test_acc": 0.6125786163522012, + "test_auc": 0.6489218400488488, + "val_auc": 0.6390556222488996, + "k": 16, + "dataset": "106_hate_hate", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 473, + 4736, + 2680, + 197, + 1863, + 7180, + 1115, + 1954, + 6258, + 1446, + 828, + 1534, + 1834, + 1520, + 976, + 721 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/107_hate_offensive_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/107_hate_offensive_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..8acd4c386955ce29cb07442889dd6c28b3c4de43 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/107_hate_offensive_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.6455622465734614, + "test_acc": 0.6616352201257861, + "test_auc": 0.6706750315683734, + "val_auc": 0.6894757903161265, + "k": 1, + "dataset": "107_hate_offensive", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 453 + ] + }, + { + "test_f1": 0.7225637496552629, + "test_acc": 0.7240251572327044, + "test_auc": 0.7802056425113337, + "val_auc": 0.7995198079231693, + "k": 16, + "dataset": "107_hate_offensive", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 453, + 7715, + 409, + 1346, + 7468, + 1956, + 3544, + 8954, + 30258, + 866, + 319, + 1186, + 1986, + 473, + 6488, + 1440 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/110_aimade_humangpt3_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/110_aimade_humangpt3_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..e286332d8d5b5f092fa2668d7f74ad5b96edfa30 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/110_aimade_humangpt3_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.7854055454944832, + "test_acc": 0.7854088050314465, + "test_auc": 0.8212894629984235, + "val_auc": 0.852140856342537, + "k": 1, + "dataset": "110_aimade_humangpt3", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1912 + ] + }, + { + "test_f1": 0.7891522853666044, + "test_acc": 0.7891823899371069, + "test_auc": 0.8958062668917379, + "val_auc": 0.9181672669067628, + "k": 16, + "dataset": "110_aimade_humangpt3", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1912, + 1957, + 59, + 866, + 641, + 1146, + 306, + 1007, + 22764, + 915, + 1405, + 1658, + 831, + 7028, + 942, + 6044 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/113_movie_sent_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/113_movie_sent_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..ab0642d05bf970ace16bbbcc765f76fcf999cf42 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/113_movie_sent_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.8507001394550531, + "test_acc": 0.8508176100628931, + "test_auc": 0.8986679260262127, + "val_auc": 0.9447779111644659, + "k": 1, + "dataset": "113_movie_sent", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1460 + ] + }, + { + "test_f1": 0.8716530308731567, + "test_acc": 0.8716981132075472, + "test_auc": 0.9356869703373741, + "val_auc": 0.957983193277311, + "k": 16, + "dataset": "113_movie_sent", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1460, + 29254, + 1954, + 2407, + 1390, + 870, + 1801, + 1115, + 4108, + 2832, + 6941, + 575, + 1147, + 1540, + 4711, + 7014 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/114_nyc_borough_Manhattan_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/114_nyc_borough_Manhattan_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..0825369e9e68c0f89eafb8e824268bde75ab64c8 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/114_nyc_borough_Manhattan_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.5421815750405149, + "test_acc": 0.5466666666666666, + "test_auc": 0.551883815262396, + "val_auc": 0.6560587515299877, + "k": 1, + "dataset": "114_nyc_borough_Manhattan", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 502 + ] + }, + { + "test_f1": 0.5909808502528386, + "test_acc": 0.5917948717948718, + "test_auc": 0.6476819436496449, + "val_auc": 0.7099143206854345, + "k": 16, + "dataset": "114_nyc_borough_Manhattan", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 502, + 3072, + 694, + 7591, + 928, + 2016, + 1694, + 1939, + 738, + 218, + 5467, + 5136, + 1719, + 1623, + 19604, + 260 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/115_nyc_borough_Brooklyn_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/115_nyc_borough_Brooklyn_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..5bc9df3f90be375993834ffce227bbc323e08684 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/115_nyc_borough_Brooklyn_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.47928499606596, + "test_acc": 0.5158974358974359, + "test_auc": 0.5161683273302589, + "val_auc": 0.5320277437780498, + "k": 1, + "dataset": "115_nyc_borough_Brooklyn", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 5838 + ] + }, + { + "test_f1": 0.5248522938533454, + "test_acc": 0.5251282051282051, + "test_auc": 0.5273294173090517, + "val_auc": 0.6001631986944104, + "k": 16, + "dataset": "115_nyc_borough_Brooklyn", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 5838, + 216, + 1626, + 1728, + 218, + 2016, + 3064, + 2799, + 849, + 126, + 2514, + 65, + 30082, + 1309, + 260, + 355 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/116_nyc_borough_Bronx_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/116_nyc_borough_Bronx_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..919f116e494b6700a01d32a79a7084c4cdb7d78c --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/116_nyc_borough_Bronx_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.4928310090563661, + "test_acc": 0.5312820512820513, + "test_auc": 0.5312931295654223, + "val_auc": 0.5430436556507547, + "k": 1, + "dataset": "116_nyc_borough_Bronx", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1493 + ] + }, + { + "test_f1": 0.5471738419200378, + "test_acc": 0.5517948717948717, + "test_auc": 0.5607306358770661, + "val_auc": 0.5915952672378622, + "k": 16, + "dataset": "116_nyc_borough_Bronx", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1493, + 2514, + 1787, + 30288, + 23429, + 12645, + 1576, + 1963, + 3205, + 248, + 755, + 4375, + 1498, + 30358, + 1152, + 65 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/117_us_state_FL_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/117_us_state_FL_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..483857da57407fd750c079aebbef1fcdad0c7e1e --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/117_us_state_FL_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.8233894735728156, + "test_acc": 0.8282828282828283, + "test_auc": 0.8300000000000001, + "val_auc": 0.8183092948717948, + "k": 1, + "dataset": "117_us_state_FL", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 26915 + ] + }, + { + "test_f1": 0.8181447124304266, + "test_acc": 0.8181818181818182, + "test_auc": 0.9191836734693878, + "val_auc": 0.9579326923076923, + "k": 16, + "dataset": "117_us_state_FL", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 26915, + 390, + 6811, + 3518, + 1785, + 746, + 7052, + 1694, + 1284, + 1582, + 6732, + 2889, + 3586, + 3776, + 7777, + 6527 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/118_us_state_CA_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/118_us_state_CA_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..1d0d2bfc891acbb0eacb83ab28d8ee34ed156472 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/118_us_state_CA_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.888410991636798, + "test_acc": 0.8888888888888888, + "test_auc": 0.8965306122448978, + "val_auc": 0.9397035256410257, + "k": 1, + "dataset": "118_us_state_CA", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 4032 + ] + }, + { + "test_f1": 0.8785398998164956, + "test_acc": 0.8787878787878788, + "test_auc": 0.9583673469387755, + "val_auc": 0.9899839743589743, + "k": 16, + "dataset": "118_us_state_CA", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 4032, + 746, + 7038, + 1131, + 126, + 4223, + 6732, + 7650, + 2757, + 1326, + 6525, + 538, + 1727, + 1429, + 7777, + 841 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/119_us_state_TX_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/119_us_state_TX_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..3649858dfd91f18008e32ee49a3b637e9b0ed2b7 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/119_us_state_TX_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.8473644735326978, + "test_acc": 0.8484848484848485, + "test_auc": 0.8759183673469387, + "val_auc": 0.8052884615384616, + "k": 1, + "dataset": "119_us_state_TX", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 8171 + ] + }, + { + "test_f1": 0.8989280055456527, + "test_acc": 0.898989898989899, + "test_auc": 0.9775510204081633, + "val_auc": 0.9543269230769231, + "k": 16, + "dataset": "119_us_state_TX", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 8171, + 7038, + 746, + 557, + 5467, + 7052, + 6732, + 4223, + 1823, + 288, + 6932, + 7777, + 1585, + 178, + 5673, + 850 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/120_us_timezone_Chicago_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/120_us_timezone_Chicago_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..4cc7e697ce819266f36e81c1a571f05576f14f72 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/120_us_timezone_Chicago_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.6291773460140262, + "test_acc": 0.6605981794538361, + "test_auc": 0.6660862101878756, + "val_auc": 0.7328, + "k": 1, + "dataset": "120_us_timezone_Chicago", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 4032 + ] + }, + { + "test_f1": 0.7861743175870144, + "test_acc": 0.7863025574338969, + "test_auc": 0.8812839987914882, + "val_auc": 0.894, + "k": 16, + "dataset": "120_us_timezone_Chicago", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 4032, + 8171, + 7038, + 746, + 5673, + 126, + 849, + 1582, + 5436, + 7052, + 18, + 373, + 441, + 288, + 425, + 1694 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/121_us_timezone_New_York_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/121_us_timezone_New_York_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..d083036cf8c8443b85fe374b8d02f00246b2aeef --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/121_us_timezone_New_York_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.6029657527953877, + "test_acc": 0.6098829648894668, + "test_auc": 0.6332497095212399, + "val_auc": 0.6344000000000001, + "k": 1, + "dataset": "121_us_timezone_New_York", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 5436 + ] + }, + { + "test_f1": 0.8203616024969126, + "test_acc": 0.8214130905938448, + "test_auc": 0.9213369989523224, + "val_auc": 0.9683999999999999, + "k": 16, + "dataset": "121_us_timezone_New_York", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 5436, + 4032, + 7052, + 7777, + 8171, + 2799, + 6732, + 5673, + 4223, + 7970, + 1616, + 1131, + 6097, + 557, + 488, + 649 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/122_us_timezone_Los_Angeles_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/122_us_timezone_Los_Angeles_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..21f1658a74027ce3de7df47b6e52d3a81d267abd --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/122_us_timezone_Los_Angeles_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.8317040063586488, + "test_acc": 0.8339835283918509, + "test_auc": 0.8464190319579247, + "val_auc": 0.8568, + "k": 1, + "dataset": "122_us_timezone_Los_Angeles", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 4032 + ] + }, + { + "test_f1": 0.8269683091823924, + "test_acc": 0.8300823580407456, + "test_auc": 0.9195580514098554, + "val_auc": 0.9481999999999999, + "k": 16, + "dataset": "122_us_timezone_Los_Angeles", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 4032, + 746, + 7038, + 7777, + 126, + 1429, + 4223, + 1582, + 832, + 7591, + 5353, + 2757, + 7052, + 23169, + 5673, + 3776 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/123_world_country_United_Kingdom_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/123_world_country_United_Kingdom_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..2326592f3434411f0c238f3be0bb2559efd9073d --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/123_world_country_United_Kingdom_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.8758700150817185, + "test_acc": 0.8758974358974358, + "test_auc": 0.9490271653145723, + "val_auc": 0.9640962872297021, + "k": 1, + "dataset": "123_world_country_United_Kingdom", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 3561 + ] + }, + { + "test_f1": 0.9620141214967085, + "test_acc": 0.9620512820512821, + "test_auc": 0.9911552832665702, + "val_auc": 0.9930640554875562, + "k": 16, + "dataset": "123_world_country_United_Kingdom", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 3561, + 3350, + 4099, + 5467, + 1293, + 288, + 26439, + 124, + 1585, + 6932, + 1582, + 1785, + 12764, + 57, + 14471, + 128 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/124_world_country_United_States_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/124_world_country_United_States_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..6f6a5b27bc5097ea606448f065105720954428f6 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/124_world_country_United_States_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.7585132995164583, + "test_acc": 0.76, + "test_auc": 0.8036405560978894, + "val_auc": 0.864953080375357, + "k": 1, + "dataset": "124_world_country_United_States", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1238 + ] + }, + { + "test_f1": 0.9558948377016445, + "test_acc": 0.9558974358974359, + "test_auc": 0.9910837513044064, + "val_auc": 1.0, + "k": 16, + "dataset": "124_world_country_United_States", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1238, + 7161, + 22214, + 1293, + 10346, + 2846, + 1018, + 6763, + 841, + 1827, + 806, + 6894, + 4561, + 850, + 5467, + 12764 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/125_world_country_Italy_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/125_world_country_Italy_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..eef29dc4673d3b2a58865d4b815f98d60ba49eee --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/125_world_country_Italy_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.8892027972027973, + "test_acc": 0.8892307692307693, + "test_auc": 0.9603670851987748, + "val_auc": 0.9661362709098327, + "k": 1, + "dataset": "125_world_country_Italy", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1238 + ] + }, + { + "test_f1": 0.9815384226975771, + "test_acc": 0.9815384615384616, + "test_auc": 0.9982874406705491, + "val_auc": 1.0, + "k": 16, + "dataset": "125_world_country_Italy", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1238, + 5855, + 23310, + 128, + 7760, + 726, + 806, + 52, + 441, + 896, + 218, + 1652, + 22214, + 1168, + 3561, + 18157 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/126_art_type_book_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/126_art_type_book_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..51e55ed8c5c97880779700e84f265078daac0eb8 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/126_art_type_book_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.5810247630045569, + "test_acc": 0.6025140875596012, + "test_auc": 0.613088304039947, + "val_auc": 0.6372, + "k": 1, + "dataset": "126_art_type_book", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 5925 + ] + }, + { + "test_f1": 0.7011243941203538, + "test_acc": 0.7013437364542696, + "test_auc": 0.7737858138140126, + "val_auc": 0.824, + "k": 16, + "dataset": "126_art_type_book", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 5925, + 765, + 1627, + 1471, + 965, + 976, + 8182, + 1245, + 1974, + 850, + 9215, + 13167, + 6492, + 1956, + 750, + 455 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/127_art_type_song_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/127_art_type_song_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..f0844450009843cdb4b80f44d47063455a74fafe --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/127_art_type_song_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.6203802143288761, + "test_acc": 0.6402254009536195, + "test_auc": 0.6498235332137849, + "val_auc": 0.7092, + "k": 1, + "dataset": "127_art_type_song", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1245 + ] + }, + { + "test_f1": 0.7596734443124059, + "test_acc": 0.7598612917208496, + "test_auc": 0.8486049503893843, + "val_auc": 0.8296, + "k": 16, + "dataset": "127_art_type_song", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1245, + 1301, + 8182, + 409, + 31360, + 1391, + 9215, + 755, + 31605, + 1627, + 1956, + 1253, + 3544, + 5925, + 726, + 759 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/128_art_type_movie_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/128_art_type_movie_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..13e411c17dc76f15448e99a68648e6145d47317d --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/128_art_type_movie_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.580628661854354, + "test_acc": 0.6016471608149111, + "test_auc": 0.612021837389013, + "val_auc": 0.6571999999999999, + "k": 1, + "dataset": "128_art_type_movie", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1293 + ] + }, + { + "test_f1": 0.6699944893321388, + "test_acc": 0.6701343736454269, + "test_auc": 0.7443884614170554, + "val_auc": 0.8204, + "k": 16, + "dataset": "128_art_type_movie", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1293, + 806, + 976, + 338, + 1301, + 841, + 828, + 128, + 1245, + 409, + 1939, + 1133, + 31605, + 8182, + 1186, + 6777 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/129_arith_mc_A_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/129_arith_mc_A_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..d4615b08a68124a9491d88d04509522d96a67ce6 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/129_arith_mc_A_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.5763080389618984, + "test_acc": 0.5884861407249466, + "test_auc": 0.6015093653391526, + "val_auc": 0.6288, + "k": 1, + "dataset": "129_arith_mc_A", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1752 + ] + }, + { + "test_f1": 0.7269895228760173, + "test_acc": 0.7270788912579957, + "test_auc": 0.8215675577377706, + "val_auc": 0.8056, + "k": 16, + "dataset": "129_arith_mc_A", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1752, + 66, + 1702, + 21988, + 3907, + 1529, + 298, + 1763, + 1236, + 1782, + 1900, + 7729, + 1416, + 5967, + 892, + 416 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/130_temp_cat_Frequency_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/130_temp_cat_Frequency_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..ddf9458d8c4af9f72cdc3961081b8c6ebb5854c7 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/130_temp_cat_Frequency_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.9393815708101421, + "test_acc": 0.9393939393939394, + "test_auc": 0.9648979591836735, + "val_auc": 0.9665861513687601, + "k": 1, + "dataset": "130_temp_cat_Frequency", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 803 + ] + }, + { + "test_f1": 1.0, + "test_acc": 1.0, + "test_auc": 1.0, + "val_auc": 1.0, + "k": 16, + "dataset": "130_temp_cat_Frequency", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 803, + 881, + 26786, + 1699, + 1143, + 12841, + 743, + 948, + 412, + 5740, + 2034, + 774, + 1779, + 7295, + 1043, + 2030 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/131_temp_cat_Typical Time_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/131_temp_cat_Typical Time_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..026819ca4a05ad85398c5cbdedb53045ad1ebc2e --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/131_temp_cat_Typical Time_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.8382848088730442, + "test_acc": 0.8383838383838383, + "test_auc": 0.8828571428571429, + "val_auc": 0.8667471819645732, + "k": 1, + "dataset": "131_temp_cat_Typical Time", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 18126 + ] + }, + { + "test_f1": 0.9494846391855681, + "test_acc": 0.9494949494949495, + "test_auc": 0.9820408163265306, + "val_auc": 0.9661835748792271, + "k": 16, + "dataset": "131_temp_cat_Typical Time", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 18126, + 1546, + 1779, + 12841, + 907, + 1389, + 3926, + 264, + 1043, + 2598, + 4676, + 1699, + 1412, + 732, + 803, + 728 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/132_temp_cat_Event Ordering_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/132_temp_cat_Event Ordering_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..a212583c0dda34f7337b4e8ff791a03288b52765 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/132_temp_cat_Event Ordering_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.9290757032692516, + "test_acc": 0.9292929292929293, + "test_auc": 0.9485714285714285, + "val_auc": 0.9690016103059581, + "k": 1, + "dataset": "132_temp_cat_Event Ordering", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1389 + ] + }, + { + "test_f1": 1.0, + "test_acc": 1.0, + "test_auc": 1.0, + "val_auc": 1.0, + "k": 16, + "dataset": "132_temp_cat_Event Ordering", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1389, + 3619, + 601, + 673, + 1404, + 18725, + 1412, + 19, + 59, + 2278, + 12841, + 1108, + 5858, + 112, + 1043, + 2393 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/133_context_type_Causality_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/133_context_type_Causality_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..a07286b2b54ed7de74d52b7431507fde62b222c8 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/133_context_type_Causality_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.9288860795055486, + "test_acc": 0.9288888888888889, + "test_auc": 0.9521965865992414, + "val_auc": 0.9354707792207794, + "k": 1, + "dataset": "133_context_type_Causality", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1896 + ] + }, + { + "test_f1": 0.9377531840140536, + "test_acc": 0.9377777777777778, + "test_auc": 0.9766908975979771, + "val_auc": 0.9910714285714286, + "k": 16, + "dataset": "133_context_type_Causality", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1896, + 537, + 18725, + 16688, + 6065, + 2036, + 1089, + 975, + 2574, + 1816, + 995, + 3926, + 1136, + 1520, + 14471, + 110 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/134_context_type_Belief_states_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/134_context_type_Belief_states_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..84e3021ea5215c1ba82f41ce902264bd1e9347d5 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/134_context_type_Belief_states_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.8311044388256777, + "test_acc": 0.8311111111111111, + "test_auc": 0.8823878002528445, + "val_auc": 0.8530844155844155, + "k": 1, + "dataset": "134_context_type_Belief_states", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1536 + ] + }, + { + "test_f1": 0.8710958341838806, + "test_acc": 0.8711111111111111, + "test_auc": 0.939198798988622, + "val_auc": 0.9068587662337662, + "k": 16, + "dataset": "134_context_type_Belief_states", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1536, + 950, + 1286, + 1164, + 1187, + 1907, + 609, + 3926, + 948, + 1343, + 7689, + 110, + 29973, + 19935, + 5137, + 1854 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/135_context_type_Event_duration_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/135_context_type_Event_duration_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..b79ed7bbbf59333c1a688038dff7c0843526aae6 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/135_context_type_Event_duration_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.9244444444444444, + "test_acc": 0.9244444444444444, + "test_auc": 0.9511694058154236, + "val_auc": 0.9389204545454546, + "k": 1, + "dataset": "135_context_type_Event_duration", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1389 + ] + }, + { + "test_f1": 0.9733333333333334, + "test_acc": 0.9733333333333334, + "test_auc": 0.9822218710493045, + "val_auc": 0.9805194805194805, + "k": 16, + "dataset": "135_context_type_Event_duration", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1389, + 1043, + 1568, + 2278, + 12841, + 1779, + 296, + 288, + 1699, + 1045, + 918, + 743, + 1536, + 1896, + 7776, + 1452 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/136_glue_mnli_entailment_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/136_glue_mnli_entailment_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..b3995118635fc4f485d6097e7a6812de999e40f1 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/136_glue_mnli_entailment_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.6931515798171085, + "test_acc": 0.693974859124404, + "test_auc": 0.7382805160526154, + "val_auc": 0.8124, + "k": 1, + "dataset": "136_glue_mnli_entailment", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 17441 + ] + }, + { + "test_f1": 0.7576863748010236, + "test_acc": 0.7589943649761595, + "test_auc": 0.8276367429702637, + "val_auc": 0.8808, + "k": 16, + "dataset": "136_glue_mnli_entailment", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 17441, + 6, + 1149, + 2993, + 3478, + 6259, + 5197, + 484, + 23968, + 870, + 1673, + 1573, + 3798, + 772, + 3733, + 7742 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/137_glue_mnli_neutral_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/137_glue_mnli_neutral_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..6218420f06595765ad6b7c93f7ff742494ddff65 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/137_glue_mnli_neutral_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.5918740790861167, + "test_acc": 0.5925444299956654, + "test_auc": 0.6224384883981355, + "val_auc": 0.6424, + "k": 1, + "dataset": "137_glue_mnli_neutral", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 831 + ] + }, + { + "test_f1": 0.6823735884160967, + "test_acc": 0.682704811443433, + "test_auc": 0.7453504609330494, + "val_auc": 0.8089999999999999, + "k": 16, + "dataset": "137_glue_mnli_neutral", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 831, + 6, + 17441, + 870, + 1942, + 9, + 3478, + 1149, + 5631, + 5475, + 496, + 3358, + 2993, + 641, + 1573, + 772 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/138_glue_mnli_contradiction_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/138_glue_mnli_contradiction_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..207fb773c536587edf29486ad078fd5130c1cdae --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/138_glue_mnli_contradiction_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.6031938982411725, + "test_acc": 0.6168183788469874, + "test_auc": 0.6442134977550839, + "val_auc": 0.6803999999999999, + "k": 1, + "dataset": "138_glue_mnli_contradiction", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 870 + ] + }, + { + "test_f1": 0.7530309590465332, + "test_acc": 0.7542262678803641, + "test_auc": 0.8377493871010896, + "val_auc": 0.7992, + "k": 16, + "dataset": "138_glue_mnli_contradiction", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 870, + 1149, + 5631, + 6, + 1989, + 17441, + 1986, + 1204, + 3478, + 1460, + 1865, + 422, + 484, + 575, + 1563, + 6259 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/139_news_class_Politics_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/139_news_class_Politics_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..e1a214bffa18b99d911f7cfa1c1f4d572d2a8324 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/139_news_class_Politics_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.8053932859952562, + "test_acc": 0.8067796610169492, + "test_auc": 0.834229627913647, + "val_auc": 0.8015297906602254, + "k": 1, + "dataset": "139_news_class_Politics", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 386 + ] + }, + { + "test_f1": 0.8764820911771912, + "test_acc": 0.8766101694915254, + "test_auc": 0.9417325787911881, + "val_auc": 0.9448470209339774, + "k": 16, + "dataset": "139_news_class_Politics", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 386, + 1293, + 7619, + 1605, + 31430, + 6186, + 591, + 15791, + 642, + 351, + 1147, + 656, + 4454, + 15549, + 51, + 6365 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/140_news_class_Technology_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/140_news_class_Technology_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..1b753e482bd50958c006ebd244974ba4b9d4edb2 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/140_news_class_Technology_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.8301655639119235, + "test_acc": 0.8325423728813559, + "test_auc": 0.8414146929800369, + "val_auc": 0.8047504025764896, + "k": 1, + "dataset": "140_news_class_Technology", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 642 + ] + }, + { + "test_f1": 0.8604195503375026, + "test_acc": 0.8610169491525423, + "test_auc": 0.9290373704279784, + "val_auc": 0.8776167471819646, + "k": 16, + "dataset": "140_news_class_Technology", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 642, + 234, + 2803, + 1756, + 351, + 2019, + 607, + 20009, + 179, + 1605, + 492, + 1685, + 735, + 386, + 6365, + 2054 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/141_news_class_Entertainment_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/141_news_class_Entertainment_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..4f36db34c53a3d170a8182dde3bca569f893a936 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/141_news_class_Entertainment_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.6359536817229112, + "test_acc": 0.64, + "test_auc": 0.6656977492434355, + "val_auc": 0.6843800322061192, + "k": 1, + "dataset": "141_news_class_Entertainment", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 4454 + ] + }, + { + "test_f1": 0.8054156793246283, + "test_acc": 0.8054237288135593, + "test_auc": 0.9003770872172765, + "val_auc": 0.9376006441223833, + "k": 16, + "dataset": "141_news_class_Entertainment", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 4454, + 921, + 1795, + 386, + 591, + 1065, + 6962, + 351, + 15549, + 1164, + 1535, + 15791, + 28150, + 7619, + 310, + 462 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/142_cancer_cat_Thyroid_Cancer_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/142_cancer_cat_Thyroid_Cancer_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..db6a45afc0d375da102c639c42cbdf1f9c3afd98 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/142_cancer_cat_Thyroid_Cancer_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.6062019613040022, + "test_acc": 0.6122448979591837, + "test_auc": 0.6179620563035496, + "val_auc": 0.5438175270108043, + "k": 1, + "dataset": "142_cancer_cat_Thyroid_Cancer", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 21381 + ] + }, + { + "test_f1": 0.6520499049822358, + "test_acc": 0.6530612244897959, + "test_auc": 0.7547939616483068, + "val_auc": 0.7302921168467388, + "k": 16, + "dataset": "142_cancer_cat_Thyroid_Cancer", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 21381, + 6438, + 1051, + 58, + 1246, + 1336, + 1912, + 945, + 6526, + 364, + 1398, + 1959, + 405, + 883, + 1440, + 1707 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/143_cancer_cat_Lung_Cancer_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/143_cancer_cat_Lung_Cancer_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..5f5fcf3956194aadc3fbfc6f3524ce70ac02d203 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/143_cancer_cat_Lung_Cancer_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.7118675852178012, + "test_acc": 0.7230320699708455, + "test_auc": 0.7396980824153406, + "val_auc": 0.7294917967186876, + "k": 1, + "dataset": "143_cancer_cat_Lung_Cancer", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 822 + ] + }, + { + "test_f1": 0.8539667068820793, + "test_acc": 0.8542274052478134, + "test_auc": 0.926016591867265, + "val_auc": 0.9059623849539816, + "k": 16, + "dataset": "143_cancer_cat_Lung_Cancer", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 822, + 1246, + 405, + 6526, + 21381, + 307, + 611, + 945, + 1152, + 1912, + 1336, + 565, + 872, + 268, + 792, + 1051 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/144_cancer_cat_Colon_Cancer_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/144_cancer_cat_Colon_Cancer_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..23bdb11fd28e74ee6247c0664f1177774ec9fea5 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/144_cancer_cat_Colon_Cancer_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.6616882630922509, + "test_acc": 0.6676384839650146, + "test_auc": 0.6858595131238949, + "val_auc": 0.6846738695478191, + "k": 1, + "dataset": "144_cancer_cat_Colon_Cancer", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 822 + ] + }, + { + "test_f1": 0.8278830114955611, + "test_acc": 0.8279883381924198, + "test_auc": 0.8875628994968041, + "val_auc": 0.912765106042417, + "k": 16, + "dataset": "144_cancer_cat_Colon_Cancer", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 822, + 1152, + 1090, + 1707, + 11181, + 565, + 307, + 611, + 6526, + 1246, + 405, + 550, + 4173, + 65, + 1750, + 1756 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/145_disease_class_digestive system diseases_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/145_disease_class_digestive system diseases_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..9b64dba6666f61b542438e0c5cc2b595e3e8861b --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/145_disease_class_digestive system diseases_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.3389600705036947, + "test_acc": 0.5050505050505051, + "test_auc": 0.5, + "val_auc": 0.5, + "k": 1, + "dataset": "145_disease_class_digestive system diseases", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 777 + ] + }, + { + "test_f1": 0.5246703106692887, + "test_acc": 0.5252525252525253, + "test_auc": 0.5612244897959183, + "val_auc": 0.7002020202020203, + "k": 16, + "dataset": "145_disease_class_digestive system diseases", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 777, + 4978, + 2258, + 828, + 7169, + 24524, + 16234, + 209, + 1997, + 1064, + 3833, + 158, + 935, + 496, + 5312, + 6273 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/146_disease_class_cardiovascular diseases_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/146_disease_class_cardiovascular diseases_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..ab8808855a84d8980e0643a2995e888a459bc33b --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/146_disease_class_cardiovascular diseases_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.509923546960584, + "test_acc": 0.5252525252525253, + "test_auc": 0.506938775510204, + "val_auc": 0.5915151515151514, + "k": 1, + "dataset": "146_disease_class_cardiovascular diseases", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 828 + ] + }, + { + "test_f1": 0.5858585858585859, + "test_acc": 0.5858585858585859, + "test_auc": 0.6140816326530611, + "val_auc": 0.6012121212121211, + "k": 16, + "dataset": "146_disease_class_cardiovascular diseases", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 828, + 653, + 759, + 4711, + 1567, + 3833, + 713, + 29134, + 6832, + 20539, + 7, + 1957, + 16181, + 6446, + 12389, + 286 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/147_disease_class_nervous system diseases_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/147_disease_class_nervous system diseases_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..fac11923930e84e71974d4a313ad401b5fbee4ea --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/147_disease_class_nervous system diseases_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.5636261691770523, + "test_acc": 0.5757575757575758, + "test_auc": 0.5740816326530612, + "val_auc": 0.5723232323232323, + "k": 1, + "dataset": "147_disease_class_nervous system diseases", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 713 + ] + }, + { + "test_f1": 0.5541916712129478, + "test_acc": 0.5555555555555556, + "test_auc": 0.5677551020408164, + "val_auc": 0.6056565656565656, + "k": 16, + "dataset": "147_disease_class_nervous system diseases", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 713, + 4821, + 24524, + 7677, + 562, + 120, + 24361, + 7768, + 24921, + 3449, + 65, + 733, + 31821, + 1957, + 563, + 1667 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/148_twt_emotion_worry_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/148_twt_emotion_worry_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..097c1f37218a49946ba8df5f70de4cb75db29c9d --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/148_twt_emotion_worry_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.49158923230309076, + "test_acc": 0.528135593220339, + "test_auc": 0.5337778954451688, + "val_auc": 0.5571658615136876, + "k": 1, + "dataset": "148_twt_emotion_worry", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1714 + ] + }, + { + "test_f1": 0.5776271186440678, + "test_acc": 0.5776271186440678, + "test_auc": 0.6218353906741239, + "val_auc": 0.6151368760064412, + "k": 16, + "dataset": "148_twt_emotion_worry", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1714, + 1390, + 1188, + 288, + 1460, + 1801, + 286, + 1410, + 1661, + 566, + 1779, + 750, + 870, + 30258, + 1586, + 1245 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/149_twt_emotion_happiness_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/149_twt_emotion_happiness_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..78a019ba9a4445db71a220b882ec55711fa97357 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/149_twt_emotion_happiness_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.5911363415229978, + "test_acc": 0.5925423728813559, + "test_auc": 0.6170349288296141, + "val_auc": 0.6751207729468599, + "k": 1, + "dataset": "149_twt_emotion_happiness", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 750 + ] + }, + { + "test_f1": 0.6687002654009802, + "test_acc": 0.6691525423728814, + "test_auc": 0.7358403841840317, + "val_auc": 0.7661030595813204, + "k": 16, + "dataset": "149_twt_emotion_happiness", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 750, + 1460, + 1661, + 1390, + 3731, + 7587, + 721, + 866, + 870, + 1801, + 1325, + 4934, + 1124, + 1188, + 7456, + 1809 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/150_twt_emotion_sadness_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/150_twt_emotion_sadness_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..1fdf8023b567fe25a268f5a6cdec9940fcdba462 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/150_twt_emotion_sadness_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.5920515426753724, + "test_acc": 0.6006779661016949, + "test_auc": 0.6100061407669707, + "val_auc": 0.6384863123993558, + "k": 1, + "dataset": "150_twt_emotion_sadness", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1460 + ] + }, + { + "test_f1": 0.6126216842100116, + "test_acc": 0.6149152542372881, + "test_auc": 0.6556463800730274, + "val_auc": 0.7238325281803543, + "k": 16, + "dataset": "150_twt_emotion_sadness", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1460, + 1536, + 1834, + 1188, + 1661, + 15253, + 7456, + 1718, + 866, + 28318, + 1801, + 870, + 1822, + 750, + 669, + 1873 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/151_it_tick_HR Support_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/151_it_tick_HR Support_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..52ddf53ac6735ff54ba067ced669bd5f96dead4d --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/151_it_tick_HR Support_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.5730537842260894, + "test_acc": 0.5732009925558312, + "test_auc": 0.587483375203192, + "val_auc": 0.5438175270108043, + "k": 1, + "dataset": "151_it_tick_HR Support", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 907 + ] + }, + { + "test_f1": 0.5975639435006681, + "test_acc": 0.598014888337469, + "test_auc": 0.665558346879464, + "val_auc": 0.6890756302521007, + "k": 16, + "dataset": "151_it_tick_HR Support", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 907, + 1506, + 4308, + 1025, + 2, + 28659, + 765, + 1279, + 434, + 2046, + 504, + 7954, + 1692, + 1098, + 32074, + 4803 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/152_it_tick_Hardware_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/152_it_tick_Hardware_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..7d682239b128a1161bae06f3e10a434340b44f8f --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/152_it_tick_Hardware_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.535510823364502, + "test_acc": 0.5632754342431762, + "test_auc": 0.5747376976503621, + "val_auc": 0.590236094437775, + "k": 1, + "dataset": "152_it_tick_Hardware", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1558 + ] + }, + { + "test_f1": 0.6499300640094215, + "test_acc": 0.6501240694789082, + "test_auc": 0.6886852864390917, + "val_auc": 0.7358943577430973, + "k": 16, + "dataset": "152_it_tick_Hardware", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1558, + 1006, + 16855, + 735, + 462, + 2023, + 557, + 966, + 907, + 1019, + 199, + 468, + 1336, + 1162, + 889, + 1231 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/153_it_tick_Administrative rights_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/153_it_tick_Administrative rights_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..50c72bec005856d0d24aad8bf37338073a31d2d1 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/153_it_tick_Administrative rights_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.5524072271941481, + "test_acc": 0.5533498759305211, + "test_auc": 0.5775208117826708, + "val_auc": 0.5286114445778312, + "k": 1, + "dataset": "153_it_tick_Administrative rights", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 199 + ] + }, + { + "test_f1": 0.6667140747317708, + "test_acc": 0.6674937965260546, + "test_auc": 0.7428451800403921, + "val_auc": 0.7751100440176071, + "k": 16, + "dataset": "153_it_tick_Administrative rights", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 199, + 31041, + 735, + 1271, + 1558, + 7954, + 70, + 907, + 955, + 65, + 684, + 4200, + 792, + 1591, + 1231, + 1257 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/154_athlete_sport_football_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/154_athlete_sport_football_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..81af8b8957660e203ec780b1c13da753c0993b4b --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/154_athlete_sport_football_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.8268797586359806, + "test_acc": 0.8286479250334672, + "test_auc": 0.8547834439649611, + "val_auc": 0.848080808080808, + "k": 1, + "dataset": "154_athlete_sport_football", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 2172 + ] + }, + { + "test_f1": 0.8913969313223233, + "test_acc": 0.891566265060241, + "test_auc": 0.9621367435592321, + "val_auc": 0.9656565656565657, + "k": 16, + "dataset": "154_athlete_sport_football", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 2172, + 14623, + 6753, + 32618, + 660, + 738, + 425, + 1433, + 1061, + 1429, + 1131, + 3560, + 1631, + 2293, + 8296, + 453 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/155_athlete_sport_basketball_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/155_athlete_sport_basketball_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..c2072a0960739aa48e57ee000d767b50d4cbcf80 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/155_athlete_sport_basketball_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.881044004247915, + "test_acc": 0.8821954484605087, + "test_auc": 0.8881449728319307, + "val_auc": 0.8836363636363636, + "k": 1, + "dataset": "155_athlete_sport_basketball", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 14623 + ] + }, + { + "test_f1": 0.8857299073296271, + "test_acc": 0.8862115127175368, + "test_auc": 0.9719932330719273, + "val_auc": 0.9761616161616162, + "k": 16, + "dataset": "155_athlete_sport_basketball", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 14623, + 2172, + 6753, + 738, + 7468, + 1433, + 1131, + 7161, + 1694, + 32618, + 1908, + 660, + 2293, + 1151, + 1939, + 201 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/156_athlete_sport_baseball_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/156_athlete_sport_baseball_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..47483918835071ffdbee6118ae66c0e9ce832e7c --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/156_athlete_sport_baseball_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.8785263905745833, + "test_acc": 0.8795180722891566, + "test_auc": 0.8920230534329256, + "val_auc": 0.8763636363636362, + "k": 1, + "dataset": "156_athlete_sport_baseball", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 6753 + ] + }, + { + "test_f1": 0.9049528052503261, + "test_acc": 0.9049531459170014, + "test_auc": 0.9773623317228426, + "val_auc": 0.9727272727272727, + "k": 16, + "dataset": "156_athlete_sport_baseball", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 6753, + 7468, + 2172, + 14623, + 7161, + 823, + 1, + 6237, + 1053, + 6732, + 201, + 15189, + 4840, + 394, + 19402, + 4127 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/157_amazon_5star_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/157_amazon_5star_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..2a2447bc42bad3d9875f01d5a33ab744fe4237ad --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/157_amazon_5star_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.704679934126535, + "test_acc": 0.7169230769230769, + "test_auc": 0.7285677618069815, + "val_auc": 0.6878824969400246, + "k": 1, + "dataset": "157_amazon_5star", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1460 + ] + }, + { + "test_f1": 0.8173072085229978, + "test_acc": 0.8194871794871795, + "test_auc": 0.9027649207257549, + "val_auc": 0.9041207670338638, + "k": 16, + "dataset": "157_amazon_5star", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1460, + 22772, + 29254, + 7623, + 870, + 567, + 25906, + 6114, + 19563, + 866, + 1115, + 1661, + 65, + 1325, + 684, + 653 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/158_code_C_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/158_code_C_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..2e8608252e2938c2b91c3420aceffb3d985df394 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/158_code_C_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.8978758379965306, + "test_acc": 0.8981512605042017, + "test_auc": 0.911317665285521, + "val_auc": 0.911682055399438, + "k": 1, + "dataset": "158_code_C", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 88 + ] + }, + { + "test_f1": 0.9401457098217405, + "test_acc": 0.9401680672268907, + "test_auc": 0.9852882689401334, + "val_auc": 0.9831393014853472, + "k": 16, + "dataset": "158_code_C", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 88, + 655, + 1095, + 1275, + 958, + 1002, + 16842, + 1965, + 279, + 16569, + 15068, + 1479, + 1084, + 60, + 6596, + 7174 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/159_code_Python_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/159_code_Python_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..0c86cba491d0a0880c999c8e75b286a7925b85ed --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/159_code_Python_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.7532817103463898, + "test_acc": 0.7596638655462185, + "test_auc": 0.7630404364709201, + "val_auc": 0.793255720594139, + "k": 1, + "dataset": "159_code_Python", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 838 + ] + }, + { + "test_f1": 0.8181508495748757, + "test_acc": 0.8181512605042017, + "test_auc": 0.9260716532529234, + "val_auc": 0.9514251304696909, + "k": 16, + "dataset": "159_code_Python", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 838, + 81, + 833, + 3, + 2723, + 2042, + 1540, + 961, + 875, + 2303, + 902, + 8004, + 1322, + 907, + 1414, + 1965 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/160_code_HTML_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/160_code_HTML_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..e7e127e77bae7c031e382a1fb00d06b59c8d05e6 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/160_code_HTML_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.809233980078518, + "test_acc": 0.8124369747899159, + "test_auc": 0.7999924073150096, + "val_auc": 0.7531112003211561, + "k": 1, + "dataset": "160_code_HTML", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 279 + ] + }, + { + "test_f1": 0.8348130796020846, + "test_acc": 0.8363025210084034, + "test_auc": 0.9318692105776949, + "val_auc": 0.9285427539140907, + "k": 16, + "dataset": "160_code_HTML", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 279, + 7502, + 27995, + 3700, + 315, + 4238, + 1095, + 6571, + 1247, + 6442, + 1002, + 20048, + 9762, + 2924, + 14774, + 151 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/161_agnews_0_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/161_agnews_0_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..aae2d863f7d37b86b2cc2cf1fabdb872891dd1d9 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/161_agnews_0_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.972082381470149, + "test_acc": 0.9721008403361344, + "test_auc": 0.9730513916306919, + "val_auc": 0.9811320754716981, + "k": 1, + "dataset": "161_agnews_0", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 591 + ] + }, + { + "test_f1": 0.9973109237620901, + "test_acc": 0.9973109243697479, + "test_auc": 0.9997367417257812, + "val_auc": 1.0, + "k": 16, + "dataset": "161_agnews_0", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 591, + 915, + 2010, + 18788, + 1147, + 1912, + 34, + 1727, + 3894, + 28150, + 1957, + 1734, + 2125, + 1414, + 5892, + 831 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/162_agnews_1_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/162_agnews_1_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..965865f642358022f54c53e6d6ab5ab0478beed3 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/162_agnews_1_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.9163062183266419, + "test_acc": 0.9166386554621849, + "test_auc": 0.9367079654496678, + "val_auc": 0.930349257326375, + "k": 1, + "dataset": "162_agnews_1", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 915 + ] + }, + { + "test_f1": 0.9979831864411416, + "test_acc": 0.9979831932773109, + "test_auc": 0.999674825187467, + "val_auc": 1.0, + "k": 16, + "dataset": "162_agnews_1", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 915, + 591, + 1912, + 2010, + 351, + 34, + 2125, + 1957, + 18788, + 9, + 1414, + 462, + 1809, + 831, + 1734, + 1727 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/163_agnews_2_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/163_agnews_2_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..ea53af0d9cd26f6fb5e7736aa7a50af85848d7ed --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/163_agnews_2_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.9132652446731683, + "test_acc": 0.9136134453781513, + "test_auc": 0.933103699806929, + "val_auc": 0.9504215174628663, + "k": 1, + "dataset": "163_agnews_2", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 915 + ] + }, + { + "test_f1": 0.993613293840133, + "test_acc": 0.9936134453781512, + "test_auc": 0.9992570015402303, + "val_auc": 0.9971898835808912, + "k": 16, + "dataset": "163_agnews_2", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 915, + 2010, + 591, + 1912, + 1727, + 34, + 3894, + 28150, + 1147, + 2125, + 18788, + 1957, + 1414, + 831, + 1809, + 1734 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/21_headline_istrump_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/21_headline_istrump_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..020b6bdacfbe596eae39b7ecf6755e46acf7a028 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/21_headline_istrump_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.738850471137791, + "test_acc": 0.7388679245283019, + "test_auc": 0.8103548315560196, + "val_auc": 0.8563425370148059, + "k": 1, + "dataset": "21_headline_istrump", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1863 + ] + }, + { + "test_f1": 0.8532413800382611, + "test_acc": 0.8533333333333334, + "test_auc": 0.926105703167166, + "val_auc": 0.9611844737895159, + "k": 16, + "dataset": "21_headline_istrump", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1863, + 17376, + 976, + 4403, + 656, + 29991, + 2099, + 5852, + 386, + 5467, + 1002, + 16114, + 1293, + 6, + 373, + 3647 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/22_headline_isobama_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/22_headline_isobama_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..42995a21601e832a19666e75c078a95daf35db20 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/22_headline_isobama_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.782270706750391, + "test_acc": 0.7828843106180665, + "test_auc": 0.822237291541089, + "val_auc": 0.8159263705482193, + "k": 1, + "dataset": "22_headline_isobama", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 8543 + ] + }, + { + "test_f1": 0.8319662542974042, + "test_acc": 0.8320126782884311, + "test_auc": 0.9174703636728953, + "val_auc": 0.9171668667466987, + "k": 16, + "dataset": "22_headline_isobama", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 8543, + 2099, + 369, + 656, + 16114, + 2037, + 6394, + 5549, + 133, + 4403, + 1293, + 15997, + 71, + 3647, + 31813, + 1002 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/23_headline_ischina_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/23_headline_ischina_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..4e72469e2a2bbe67622cc903e4c4c8ca11f418af --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/23_headline_ischina_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.7022800542463803, + "test_acc": 0.7022809123649459, + "test_auc": 0.7127576784726066, + "val_auc": 0.7725442834138487, + "k": 1, + "dataset": "23_headline_ischina", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1293 + ] + }, + { + "test_f1": 0.7562877696214125, + "test_acc": 0.7563025210084033, + "test_auc": 0.8401125253643238, + "val_auc": 0.8707729468599034, + "k": 16, + "dataset": "23_headline_ischina", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1293, + 738, + 6504, + 7161, + 3205, + 565, + 948, + 29991, + 656, + 4552, + 5852, + 642, + 1563, + 8543, + 1146, + 5315 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/24_headline_isiran_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/24_headline_isiran_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..2500f77f8b4e956f3ea349a24e9874d1eb3789f5 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/24_headline_isiran_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.7298799416200877, + "test_acc": 0.7300275482093664, + "test_auc": 0.765314795701536, + "val_auc": 0.7410964385754302, + "k": 1, + "dataset": "24_headline_isiran", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 386 + ] + }, + { + "test_f1": 0.7437627621884252, + "test_acc": 0.743801652892562, + "test_auc": 0.7920435917673487, + "val_auc": 0.8231292517006802, + "k": 16, + "dataset": "24_headline_isiran", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 386, + 738, + 4552, + 31813, + 4403, + 3718, + 2251, + 1293, + 7689, + 1183, + 662, + 32675, + 61, + 3373, + 1563, + 2090 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/26_headline_isfrontpage_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/26_headline_isfrontpage_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..45a74e52c3303aea8fd81e6802ce08c62d44b50b --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/26_headline_isfrontpage_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.603462985929392, + "test_acc": 0.6035220125786164, + "test_auc": 0.6154197454480279, + "val_auc": 0.580232092837135, + "k": 1, + "dataset": "26_headline_isfrontpage", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 831 + ] + }, + { + "test_f1": 0.673453762012148, + "test_acc": 0.6737106918238994, + "test_auc": 0.7418444993058502, + "val_auc": 0.7615046018407362, + "k": 16, + "dataset": "26_headline_isfrontpage", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 831, + 1293, + 1863, + 1659, + 656, + 8543, + 738, + 4403, + 28150, + 976, + 31962, + 29991, + 386, + 1238, + 1440, + 6259 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/36_sciq_tf_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/36_sciq_tf_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..917820ac33baad73c2c08142455b533cb962e8e9 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/36_sciq_tf_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.7190642158255687, + "test_acc": 0.7210062893081761, + "test_auc": 0.7790834083514677, + "val_auc": 0.8207282913165266, + "k": 1, + "dataset": "36_sciq_tf", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1974 + ] + }, + { + "test_f1": 0.8449242711467113, + "test_acc": 0.8450314465408805, + "test_auc": 0.9197712697928891, + "val_auc": 0.9487795118047219, + "k": 16, + "dataset": "36_sciq_tf", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1974, + 410, + 1507, + 1397, + 613, + 906, + 714, + 1257, + 413, + 765, + 2043, + 1243, + 872, + 484, + 237, + 7682 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/41_truthqa_tf_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/41_truthqa_tf_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..79adfcae0e0f4139de69a9b5fee176f099b518ba --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/41_truthqa_tf_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.6254987276959685, + "test_acc": 0.6262626262626263, + "test_auc": 0.663061224489796, + "val_auc": 0.6172219991971095, + "k": 1, + "dataset": "41_truthqa_tf", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1563 + ] + }, + { + "test_f1": 0.630474176985805, + "test_acc": 0.6363636363636364, + "test_auc": 0.7159183673469387, + "val_auc": 0.7234042553191489, + "k": 16, + "dataset": "41_truthqa_tf", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1563, + 1420, + 33, + 1840, + 2016, + 413, + 1068, + 774, + 1551, + 1409, + 1136, + 575, + 870, + 1586, + 9, + 687 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/42_temp_sense_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/42_temp_sense_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..f29f92a6aa4052bb5faf75477b39899fb5255d67 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/42_temp_sense_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.6599699457250175, + "test_acc": 0.6622185154295246, + "test_auc": 0.6953283249860879, + "val_auc": 0.7916666666666666, + "k": 1, + "dataset": "42_temp_sense", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1986 + ] + }, + { + "test_f1": 0.7710344323485822, + "test_acc": 0.7723102585487907, + "test_auc": 0.8621869782971618, + "val_auc": 0.9061996779388083, + "k": 16, + "dataset": "42_temp_sense", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1986, + 1397, + 345, + 1243, + 6934, + 17403, + 1257, + 1030, + 995, + 1474, + 1611, + 2320, + 2016, + 1277, + 1815, + 3358 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/44_phys_tf_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/44_phys_tf_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..baa9c81039bdfba28827e6c972b8fd3faf6ea3bb --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/44_phys_tf_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.3334731051125547, + "test_acc": 0.500125786163522, + "test_auc": 0.5, + "val_auc": 0.5, + "k": 1, + "dataset": "44_phys_tf", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1986 + ] + }, + { + "test_f1": 0.6318228986741723, + "test_acc": 0.6327044025157232, + "test_auc": 0.7036246163442659, + "val_auc": 0.6814725890356143, + "k": 16, + "dataset": "44_phys_tf", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1986, + 954, + 1474, + 276, + 1397, + 5292, + 1115, + 169, + 1068, + 15499, + 294, + 65, + 434, + 265, + 1243, + 2090 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/47_reasoning_tf_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/47_reasoning_tf_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..bcb2110c411c09a240a2b7aa8b93c7a85b7ddbca --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/47_reasoning_tf_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.5172893567567015, + "test_acc": 0.5421903052064632, + "test_auc": 0.5435721857719967, + "val_auc": 0.5716579686872741, + "k": 1, + "dataset": "47_reasoning_tf", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 7681 + ] + }, + { + "test_f1": 0.5695019561394865, + "test_acc": 0.5697187312986236, + "test_auc": 0.5792976248460018, + "val_auc": 0.5279004415897229, + "k": 16, + "dataset": "47_reasoning_tf", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 7681, + 5292, + 4575, + 265, + 276, + 6999, + 1662, + 4301, + 25606, + 3569, + 352, + 1412, + 1373, + 3432, + 1816, + 1460 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/48_cm_correct_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/48_cm_correct_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..ea7b0d53fc28ac0755a0421461b55f805356a467 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/48_cm_correct_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.5794197955120348, + "test_acc": 0.5816352201257862, + "test_auc": 0.6107257536158066, + "val_auc": 0.6234493797519007, + "k": 1, + "dataset": "48_cm_correct", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 294 + ] + }, + { + "test_f1": 0.5975437562831636, + "test_acc": 0.6022641509433962, + "test_auc": 0.6808918432588484, + "val_auc": 0.7442977190876351, + "k": 16, + "dataset": "48_cm_correct", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 294, + 1986, + 2090, + 1563, + 10279, + 473, + 721, + 6513, + 1237, + 5783, + 4388, + 598, + 1605, + 1301, + 6380, + 3880 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/49_cm_isshort_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/49_cm_isshort_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..b5b2ef9d7e96f5a0eff692c92a813516343fe680 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/49_cm_isshort_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.9574693378224212, + "test_acc": 0.9574842767295597, + "test_auc": 0.9670959830447202, + "val_auc": 0.9557823129251701, + "k": 1, + "dataset": "49_cm_isshort", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 915 + ] + }, + { + "test_f1": 0.9994968551548519, + "test_acc": 0.9994968553459119, + "test_auc": 1.0, + "val_auc": 1.0, + "k": 16, + "dataset": "49_cm_isshort", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 915, + 1873, + 462, + 9, + 1704, + 1353, + 1277, + 1164, + 3205, + 1124, + 866, + 1809, + 17403, + 34, + 434, + 1957 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/50_deon_isvalid_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/50_deon_isvalid_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..78ab57e9a6d82f36279f8c85fccd25cdeaaee2b8 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/50_deon_isvalid_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.6527497525691421, + "test_acc": 0.6528301886792452, + "test_auc": 0.7012097750063542, + "val_auc": 0.7446978791516606, + "k": 1, + "dataset": "50_deon_isvalid", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1986 + ] + }, + { + "test_f1": 0.7211726289104828, + "test_acc": 0.7212578616352201, + "test_auc": 0.8000929583540497, + "val_auc": 0.8371348539415766, + "k": 16, + "dataset": "50_deon_isvalid", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1986, + 2090, + 16234, + 721, + 1409, + 1756, + 294, + 1551, + 1779, + 942, + 30, + 1509, + 94, + 669, + 7434, + 2191 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/51_just_is_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/51_just_is_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..af7c5aa7f779bb38a9cc1aadcdc494c772d44411 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/51_just_is_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.6730302257848467, + "test_acc": 0.6747169811320755, + "test_auc": 0.7274824082896979, + "val_auc": 0.7376950780312125, + "k": 1, + "dataset": "51_just_is", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1986 + ] + }, + { + "test_f1": 0.7043593608928589, + "test_acc": 0.7051572327044026, + "test_auc": 0.7797385723500541, + "val_auc": 0.7707082833133254, + "k": 16, + "dataset": "51_just_is", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1986, + 2090, + 3385, + 592, + 1942, + 19, + 294, + 16234, + 1301, + 496, + 598, + 870, + 641, + 759, + 942, + 3985 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/52_virtue_is_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/52_virtue_is_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..87bdfefb13d4dd925922ebc4b61178cc1c3d5812 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/52_virtue_is_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.7589588598731792, + "test_acc": 0.7590013140604468, + "test_auc": 0.8013166713910682, + "val_auc": 0.716, + "k": 1, + "dataset": "52_virtue_is", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 6765 + ] + }, + { + "test_f1": 0.845457204835433, + "test_acc": 0.8454664914586071, + "test_auc": 0.9189078564864928, + "val_auc": 0.8912, + "k": 16, + "dataset": "52_virtue_is", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 6765, + 1345, + 1673, + 300, + 1815, + 1533, + 375, + 612, + 3358, + 1676, + 1925, + 413, + 1704, + 2043, + 4846, + 449 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/54_cs_tf_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/54_cs_tf_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..a94bb75e2f33c0d0f11d42abd2e83840afb8ae6e --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/54_cs_tf_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.5468317501056502, + "test_acc": 0.5527044025157233, + "test_auc": 0.5654922235982579, + "val_auc": 0.5294117647058824, + "k": 1, + "dataset": "54_cs_tf", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1563 + ] + }, + { + "test_f1": 0.5664753909128919, + "test_acc": 0.5667924528301886, + "test_auc": 0.6138256311902619, + "val_auc": 0.7092837134853942, + "k": 16, + "dataset": "54_cs_tf", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1563, + 496, + 1912, + 59, + 1957, + 915, + 1958, + 831, + 2191, + 310, + 288, + 6273, + 961, + 6652, + 13138, + 1989 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/56_wikidatasex_or_gender_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/56_wikidatasex_or_gender_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..71d8216f1f81bb4c6785118612e1d8da57e62af9 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/56_wikidatasex_or_gender_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.9939621602123847, + "test_acc": 0.9939622641509434, + "test_auc": 0.9971361890517743, + "val_auc": 1.0, + "k": 1, + "dataset": "56_wikidatasex_or_gender", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 319 + ] + }, + { + "test_f1": 0.9911946609355929, + "test_acc": 0.9911949685534591, + "test_auc": 0.9993438233831776, + "val_auc": 1.0, + "k": 16, + "dataset": "56_wikidatasex_or_gender", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 319, + 1276, + 69, + 1452, + 976, + 2185, + 832, + 505, + 23702, + 65, + 15963, + 660, + 1238, + 410, + 12952, + 2547 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/57_wikidatais_alive_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/57_wikidatais_alive_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..609d053bce5392fa238f4ac337e00c6ed458fe80 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/57_wikidatais_alive_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.9008604751707702, + "test_acc": 0.9008805031446541, + "test_auc": 0.9536306920536809, + "val_auc": 0.9725890356142457, + "k": 1, + "dataset": "57_wikidatais_alive", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1276 + ] + }, + { + "test_f1": 0.9607543891196058, + "test_acc": 0.9607547169811321, + "test_auc": 0.9916390643812548, + "val_auc": 0.9891956782713085, + "k": 16, + "dataset": "57_wikidatais_alive", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1276, + 505, + 976, + 12952, + 338, + 5071, + 5304, + 3521, + 3351, + 2293, + 2185, + 15963, + 733, + 5912, + 1245, + 22570 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/58_wikidatapolitical_party_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/58_wikidatapolitical_party_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..1c6b91bb79d60fc46f030db04f6dc508325489cf --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/58_wikidatapolitical_party_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.8401219477591941, + "test_acc": 0.8420253164556962, + "test_auc": 0.8512535430228598, + "val_auc": 0.8455382152861145, + "k": 1, + "dataset": "58_wikidatapolitical_party", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 2937 + ] + }, + { + "test_f1": 0.8697814125575458, + "test_acc": 0.8698734177215189, + "test_auc": 0.9454641103577275, + "val_auc": 0.9607843137254902, + "k": 16, + "dataset": "58_wikidatapolitical_party", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 2937, + 69, + 1484, + 897, + 1272, + 3548, + 1694, + 2293, + 1101, + 5227, + 502, + 1756, + 538, + 759, + 1177, + 3535 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/59_wikidata_occupation_isjournalist_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/59_wikidata_occupation_isjournalist_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..7c9463d64a9958c17df7ca6f08627b30ced075ad --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/59_wikidata_occupation_isjournalist_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.801327413343747, + "test_acc": 0.803076923076923, + "test_auc": 0.8212584576025853, + "val_auc": 0.7862097103223173, + "k": 1, + "dataset": "59_wikidata_occupation_isjournalist", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 71 + ] + }, + { + "test_f1": 0.8649746285572041, + "test_acc": 0.8656410256410256, + "test_auc": 0.9088640724408389, + "val_auc": 0.8969808241534067, + "k": 16, + "dataset": "59_wikidata_occupation_isjournalist", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 71, + 965, + 823, + 431, + 223, + 23702, + 30341, + 850, + 133, + 15963, + 6237, + 1343, + 1555, + 1151, + 1276, + 2046 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/5_hist_fig_ismale_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/5_hist_fig_ismale_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..81d98cdabacabdb864710fe238aebc1b601592eb --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/5_hist_fig_ismale_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.8863041475895898, + "test_acc": 0.8867924528301887, + "test_auc": 0.9056046140962535, + "val_auc": 0.9597839135654261, + "k": 1, + "dataset": "5_hist_fig_ismale", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 319 + ] + }, + { + "test_f1": 0.9325771570023317, + "test_acc": 0.9325786163522013, + "test_auc": 0.9826110664996522, + "val_auc": 0.9915966386554622, + "k": 16, + "dataset": "5_hist_fig_ismale", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 319, + 1297, + 738, + 6302, + 444, + 15175, + 2523, + 3919, + 13184, + 1776, + 1691, + 2016, + 5200, + 1551, + 22570, + 984 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/60_wikidata_occupation_isathlete_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/60_wikidata_occupation_isathlete_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..ae0bc92e4b9ecbb75ec11d5ed37ed4e565294776 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/60_wikidata_occupation_isathlete_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.8620654996578955, + "test_acc": 0.8635897435897436, + "test_auc": 0.8745266267209748, + "val_auc": 0.9004487964096288, + "k": 1, + "dataset": "60_wikidata_occupation_isathlete", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 351 + ] + }, + { + "test_f1": 0.899111507588698, + "test_acc": 0.8994871794871795, + "test_auc": 0.9351057831487528, + "val_auc": 0.9261525907792738, + "k": 16, + "dataset": "60_wikidata_occupation_isathlete", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 351, + 179, + 2187, + 18577, + 223, + 1151, + 2016, + 431, + 1429, + 1420, + 12966, + 338, + 6761, + 832, + 1860, + 1245 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/61_wikidata_occupation_isactor_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/61_wikidata_occupation_isactor_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..ae56a0b3118d30a64fb7c0d0b75b240c3144bbbc --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/61_wikidata_occupation_isactor_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.8829838667358565, + "test_acc": 0.8830769230769231, + "test_auc": 0.9215588918436732, + "val_auc": 0.9412484700122399, + "k": 1, + "dataset": "61_wikidata_occupation_isactor", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 338 + ] + }, + { + "test_f1": 0.8787951006380599, + "test_acc": 0.8789743589743589, + "test_auc": 0.9215588918436732, + "val_auc": 0.9412484700122399, + "k": 16, + "dataset": "61_wikidata_occupation_isactor", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 338, + 5912, + 1520, + 223, + 1339, + 1527, + 2293, + 1585, + 3351, + 5071, + 492, + 129, + 1420, + 1652, + 824, + 1397 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/62_wikidata_occupation_ispolitician_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/62_wikidata_occupation_ispolitician_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..277e30b86be6fc421f77ec1811a83a4ff7b91d09 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/62_wikidata_occupation_ispolitician_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.8102104243413148, + "test_acc": 0.8123076923076923, + "test_auc": 0.8436753122159761, + "val_auc": 0.8270093839249286, + "k": 1, + "dataset": "62_wikidata_occupation_ispolitician", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 656 + ] + }, + { + "test_f1": 0.8901753111658276, + "test_acc": 0.8902564102564102, + "test_auc": 0.9506176995320968, + "val_auc": 0.930232558139535, + "k": 16, + "dataset": "62_wikidata_occupation_ispolitician", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 656, + 369, + 338, + 1776, + 386, + 1358, + 1756, + 1276, + 223, + 505, + 828, + 22209, + 410, + 759, + 4872, + 1238 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/63_wikidata_occupation_issinger_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/63_wikidata_occupation_issinger_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..e1d6d850e16ddff240971df93b8a06e90f668059 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/63_wikidata_occupation_issinger_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.8404513428581768, + "test_acc": 0.841025641025641, + "test_auc": 0.8823930386777528, + "val_auc": 0.8816809465524276, + "k": 1, + "dataset": "63_wikidata_occupation_issinger", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1245 + ] + }, + { + "test_f1": 0.8683787450850086, + "test_acc": 0.8687179487179487, + "test_auc": 0.9091375770020533, + "val_auc": 0.9432884536923705, + "k": 16, + "dataset": "63_wikidata_occupation_issinger", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1245, + 19109, + 3558, + 985, + 502, + 660, + 5505, + 1756, + 17938, + 3351, + 750, + 2016, + 1652, + 1540, + 1694, + 1346 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/64_wikidata_occupation_isresearcher_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/64_wikidata_occupation_isresearcher_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1c0bd7d0d76ee0f9d758cf57c54abedea63b6c --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/64_wikidata_occupation_isresearcher_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.8641711229946524, + "test_acc": 0.8641711229946524, + "test_auc": 0.9208166328080676, + "val_auc": 0.9084848484848485, + "k": 1, + "dataset": "64_wikidata_occupation_isresearcher", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1652 + ] + }, + { + "test_f1": 0.909074684257991, + "test_acc": 0.9090909090909091, + "test_auc": 0.9584957630996176, + "val_auc": 0.9591919191919192, + "k": 16, + "dataset": "64_wikidata_occupation_isresearcher", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1652, + 1420, + 5071, + 201, + 413, + 1939, + 832, + 850, + 65, + 964, + 2016, + 1974, + 1452, + 223, + 5311, + 502 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/65_high-school_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/65_high-school_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..c36fd724277495c742ccfe91e675878ad02fe50d --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/65_high-school_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.9770112901514615, + "test_acc": 0.9770114942528736, + "test_auc": 0.9952395286541479, + "val_auc": 1.0, + "k": 1, + "dataset": "65_high-school", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 25246 + ] + }, + { + "test_f1": 0.9857470661786898, + "test_acc": 0.9857471264367816, + "test_auc": 0.9958652389198549, + "val_auc": 1.0, + "k": 16, + "dataset": "65_high-school", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 25246, + 426, + 3595, + 886, + 6124, + 527, + 248, + 1662, + 1585, + 857, + 5628, + 1696, + 201, + 260, + 52, + 1515 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/66_living-room_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/66_living-room_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..8ba39247dbb58d1fb40afa3910db457a64ba9784 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/66_living-room_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.9612968664671548, + "test_acc": 0.9612995699952221, + "test_auc": 0.9770755376830095, + "val_auc": 0.9987129987129987, + "k": 1, + "dataset": "66_living-room", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 10285 + ] + }, + { + "test_f1": 0.9875760742367136, + "test_acc": 0.9875776397515528, + "test_auc": 0.9979272472930946, + "val_auc": 1.0, + "k": 16, + "dataset": "66_living-room", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 10285, + 1448, + 5556, + 1230, + 1662, + 1519, + 750, + 841, + 1579, + 479, + 299, + 2512, + 201, + 514, + 4285, + 1470 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/67_social-security_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/67_social-security_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..1b274b5790cc8939bb675b5d47c5fc574f9ed9b8 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/67_social-security_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.9977011465095805, + "test_acc": 0.9977011494252873, + "test_auc": 0.9999551856161047, + "val_auc": 1.0, + "k": 1, + "dataset": "67_social-security", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 29843 + ] + }, + { + "test_f1": 0.9977011397062403, + "test_acc": 0.9977011494252873, + "test_auc": 0.9997953758320255, + "val_auc": 1.0, + "k": 16, + "dataset": "67_social-security", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 29843, + 248, + 153, + 6721, + 638, + 832, + 1203, + 3401, + 897, + 6079, + 886, + 1025, + 424, + 1514, + 413, + 983 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/68_credit-card_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/68_credit-card_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..8713fd6ab86d99833a4b8ebc048d96e058f61a66 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/68_credit-card_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.8959196358306792, + "test_acc": 0.8960919540229885, + "test_auc": 0.9622430360138534, + "val_auc": 0.9486607142857143, + "k": 1, + "dataset": "68_credit-card", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 23639 + ] + }, + { + "test_f1": 0.9655169352280335, + "test_acc": 0.9655172413793104, + "test_auc": 0.9913981749553546, + "val_auc": 0.966314935064935, + "k": 16, + "dataset": "68_credit-card", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 23639, + 26, + 1578, + 10873, + 201, + 1980, + 4786, + 1500, + 660, + 3598, + 248, + 398, + 69, + 642, + 5682, + 832 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/69_blood-pressure_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/69_blood-pressure_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..643325456999b079270246ab56984bf57258467c --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/69_blood-pressure_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.9972413793103448, + "test_acc": 0.9972413793103448, + "test_auc": 0.9998046769305698, + "val_auc": 0.997564935064935, + "k": 1, + "dataset": "69_blood-pressure", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 12767 + ] + }, + { + "test_f1": 0.9977011494252873, + "test_acc": 0.9977011494252873, + "test_auc": 0.9998985334704259, + "val_auc": 0.997564935064935, + "k": 16, + "dataset": "69_blood-pressure", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 12767, + 6549, + 248, + 1503, + 1131, + 1280, + 1328, + 1855, + 992, + 1866, + 955, + 1925, + 6317, + 797, + 663, + 280 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/6_hist_fig_isamerican_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/6_hist_fig_isamerican_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..b95d6a215e44939703b59786c29f69a7a466fe22 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/6_hist_fig_isamerican_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.8128281960601547, + "test_acc": 0.8133333333333334, + "test_auc": 0.8694057905561199, + "val_auc": 0.84593837535014, + "k": 1, + "dataset": "6_hist_fig_isamerican", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1238 + ] + }, + { + "test_f1": 0.9237587232172939, + "test_acc": 0.9237735849056604, + "test_auc": 0.9698349887953792, + "val_auc": 0.9963985594237695, + "k": 16, + "dataset": "6_hist_fig_isamerican", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1238, + 806, + 726, + 1974, + 245, + 441, + 5071, + 5311, + 1420, + 1293, + 6842, + 6224, + 1776, + 1471, + 823, + 413 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/70_prime-factors_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/70_prime-factors_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..3f066bed71c7ca0ab621bde0b11f614a84ba4afe --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/70_prime-factors_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.9990804593813543, + "test_acc": 0.999080459770115, + "test_auc": 0.999985625574977, + "val_auc": 1.0, + "k": 1, + "dataset": "70_prime-factors", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 981 + ] + }, + { + "test_f1": 0.9986206861563154, + "test_acc": 0.9986206896551724, + "test_auc": 0.9988475093349207, + "val_auc": 1.0, + "k": 16, + "dataset": "70_prime-factors", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 981, + 1491, + 919, + 855, + 10282, + 2623, + 269, + 1608, + 442, + 1811, + 781, + 598, + 12222, + 7940, + 1133, + 257 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/71_social-media_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/71_social-media_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..8f300bbdc0a7821d756946e13fa9389b7eab822c --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/71_social-media_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.9852864045778265, + "test_acc": 0.9852873563218391, + "test_auc": 0.9940938024243736, + "val_auc": 1.0, + "k": 1, + "dataset": "71_social-media", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 5402 + ] + }, + { + "test_f1": 0.9912637362312928, + "test_acc": 0.991264367816092, + "test_auc": 0.9971657016072297, + "val_auc": 1.0, + "k": 16, + "dataset": "71_social-media", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 5402, + 2, + 18481, + 976, + 874, + 832, + 4217, + 6318, + 667, + 857, + 113, + 1578, + 841, + 201, + 1852, + 17 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/72_gene-expression_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/72_gene-expression_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..5531ca3456363d5381f0a44a71d155e81e361ac8 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/72_gene-expression_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.8487289170387824, + "test_acc": 0.848735632183908, + "test_auc": 0.9145038794036475, + "val_auc": 0.9541396103896104, + "k": 1, + "dataset": "72_gene-expression", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1578 + ] + }, + { + "test_f1": 0.971032388556392, + "test_acc": 0.9710344827586207, + "test_auc": 0.9967074111153202, + "val_auc": 0.9955357142857143, + "k": 16, + "dataset": "72_gene-expression", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1578, + 201, + 1696, + 1264, + 1123, + 16152, + 1558, + 1284, + 4139, + 1562, + 1580, + 210, + 6675, + 828, + 1328, + 1270 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/73_control-group_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/73_control-group_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..e239b70b96b560ebde5a60ef9635fb3c2a132902 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/73_control-group_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.9420508111706456, + "test_acc": 0.9420689655172414, + "test_auc": 0.9671341455165322, + "val_auc": 0.948051948051948, + "k": 1, + "dataset": "73_control-group", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 11316 + ] + }, + { + "test_f1": 0.9682752585054257, + "test_acc": 0.9682758620689655, + "test_auc": 0.9908206612911954, + "val_auc": 0.9983766233766234, + "k": 16, + "dataset": "73_control-group", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 11316, + 1726, + 2442, + 996, + 218, + 1735, + 7865, + 404, + 268, + 1490, + 182, + 1921, + 342, + 759, + 201, + 765 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/74_magnetic-field_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/74_magnetic-field_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..1d5d6b647073a10a5b4dfc9e6d953e8edea15f5a --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/74_magnetic-field_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.9457371471227155, + "test_acc": 0.9457471264367816, + "test_auc": 0.9797887128632501, + "val_auc": 0.9797077922077922, + "k": 1, + "dataset": "74_magnetic-field", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 25400 + ] + }, + { + "test_f1": 0.9719477610328515, + "test_acc": 0.9719540229885058, + "test_auc": 0.9950433600303047, + "val_auc": 0.9931006493506493, + "k": 16, + "dataset": "74_magnetic-field", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 25400, + 111, + 1866, + 3372, + 527, + 69, + 1641, + 1045, + 426, + 980, + 1921, + 1754, + 1105, + 288, + 6681, + 663 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/75_cell-lines_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/75_cell-lines_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..83abab6b93201337f05e487f7fdde5b0aa640694 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/75_cell-lines_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.9291943246361052, + "test_acc": 0.9291954022988506, + "test_auc": 0.9643108393311326, + "val_auc": 0.9683441558441558, + "k": 1, + "dataset": "75_cell-lines", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 342 + ] + }, + { + "test_f1": 0.9811461890968234, + "test_acc": 0.9811494252873563, + "test_auc": 0.9952471386438659, + "val_auc": 0.9902597402597403, + "k": 16, + "dataset": "75_cell-lines", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 342, + 24926, + 3133, + 152, + 683, + 480, + 509, + 1572, + 1604, + 759, + 57, + 810, + 1776, + 841, + 1756, + 1537 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/76_trial-court_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/76_trial-court_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..52ef44b0979b070fedb9340e66f86bc2703b919d --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/76_trial-court_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.9848262390376196, + "test_acc": 0.9848275862068966, + "test_auc": 0.9976814897992315, + "val_auc": 1.0, + "k": 1, + "dataset": "76_trial-court", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 16845 + ] + }, + { + "test_f1": 0.9908042439163941, + "test_acc": 0.9908045977011494, + "test_auc": 0.9986902362140808, + "val_auc": 1.0, + "k": 16, + "dataset": "76_trial-court", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 16845, + 1594, + 5193, + 126, + 1933, + 30935, + 201, + 486, + 1213, + 897, + 1080, + 4589, + 579, + 220, + 13, + 899 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/77_second-derivative_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/77_second-derivative_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..d8af2b209d1dfb92d157f9cc3769a973d8214a0b --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/77_second-derivative_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.9415824975743915, + "test_acc": 0.9416091954022988, + "test_auc": 0.9671958709886899, + "val_auc": 0.9780844155844156, + "k": 1, + "dataset": "77_second-derivative", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1013 + ] + }, + { + "test_f1": 0.9908045977011494, + "test_acc": 0.9908045977011494, + "test_auc": 0.998346941122355, + "val_auc": 1.0, + "k": 16, + "dataset": "77_second-derivative", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1013, + 789, + 465, + 6926, + 388, + 1640, + 1852, + 673, + 60, + 604, + 1085, + 1608, + 1557, + 615, + 6570, + 923 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/78_north-america_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/78_north-america_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..df10f808360f869bc28c78527439abbba8d4e10b --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/78_north-america_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.9793085077785721, + "test_acc": 0.9793103448275862, + "test_auc": 0.9879271740895069, + "val_auc": 1.0, + "k": 1, + "dataset": "78_north-america", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 6870 + ] + }, + { + "test_f1": 0.9802284556813469, + "test_acc": 0.9802298850574712, + "test_auc": 0.9958441000595271, + "val_auc": 1.0, + "k": 16, + "dataset": "78_north-america", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 6870, + 4857, + 6130, + 1162, + 1537, + 7280, + 1755, + 839, + 52, + 262, + 1578, + 1862, + 284, + 1018, + 1372, + 1484 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/79_human-rights_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/79_human-rights_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..2684791358aec56e72a50cf7ef53adfce529dc3b --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/79_human-rights_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.9305294814680884, + "test_acc": 0.9305747126436782, + "test_auc": 0.9387674860652633, + "val_auc": 0.9314123376623378, + "k": 1, + "dataset": "79_human-rights", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 7902 + ] + }, + { + "test_f1": 0.9816089154898974, + "test_acc": 0.9816091954022989, + "test_auc": 0.9983173467178961, + "val_auc": 0.9995941558441559, + "k": 16, + "dataset": "79_human-rights", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 7902, + 386, + 1683, + 426, + 1101, + 8734, + 201, + 1293, + 738, + 4404, + 410, + 1270, + 22828, + 1657, + 6073, + 7807 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/7_hist_fig_ispolitician_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/7_hist_fig_ispolitician_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..f79cd462a29da413a690d16336afbbe137454e56 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/7_hist_fig_ispolitician_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.5993847377428689, + "test_acc": 0.5994968553459119, + "test_auc": 0.6206267549939799, + "val_auc": 0.6410564225690276, + "k": 1, + "dataset": "7_hist_fig_ispolitician", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1551 + ] + }, + { + "test_f1": 0.6280235484505983, + "test_acc": 0.6281761006289308, + "test_auc": 0.6667029099610243, + "val_auc": 0.6522609043617447, + "k": 16, + "dataset": "7_hist_fig_ispolitician", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1551, + 656, + 444, + 223, + 6302, + 431, + 386, + 8332, + 7495, + 245, + 1776, + 1358, + 2016, + 841, + 1939, + 6224 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/80_side-effects_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/80_side-effects_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..69fa7f159ccba6577c99e42b15b76045b43fb686 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/80_side-effects_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.9489390724069559, + "test_acc": 0.9489655172413793, + "test_auc": 0.9652164281075815, + "val_auc": 0.9732142857142858, + "k": 1, + "dataset": "80_side-effects", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 7261 + ] + }, + { + "test_f1": 0.9829883547124963, + "test_acc": 0.9829885057471265, + "test_auc": 0.9937065385031658, + "val_auc": 0.9995941558441559, + "k": 16, + "dataset": "80_side-effects", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 7261, + 18158, + 1801, + 479, + 286, + 1667, + 1714, + 201, + 1766, + 410, + 1718, + 1338, + 841, + 1364, + 1640, + 855 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/81_public-health_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/81_public-health_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..836a6d3ae67be18f02407a244d86876131b926ed --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/81_public-health_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.9852873563218391, + "test_acc": 0.9852873563218391, + "test_auc": 0.9986052579955625, + "val_auc": 0.9995941558441559, + "k": 1, + "dataset": "81_public-health", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 14247 + ] + }, + { + "test_f1": 0.9917239804803086, + "test_acc": 0.9917241379310345, + "test_auc": 0.9996431760376644, + "val_auc": 1.0, + "k": 16, + "dataset": "81_public-health", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 14247, + 1772, + 324, + 1683, + 201, + 1189, + 3596, + 807, + 369, + 1745, + 1092, + 1320, + 1783, + 389, + 1855, + 860 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/82_federal-government_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/82_federal-government_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..3166c773230263847e1f5f413a56d2a8c2821aca --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/82_federal-government_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.8786183803778608, + "test_acc": 0.8786206896551724, + "test_auc": 0.9119608745061963, + "val_auc": 0.9387175324675325, + "k": 1, + "dataset": "82_federal-government", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 527 + ] + }, + { + "test_f1": 0.9645956654889328, + "test_acc": 0.9645977011494253, + "test_auc": 0.9953925740029221, + "val_auc": 0.9963474025974026, + "k": 16, + "dataset": "82_federal-government", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 527, + 1018, + 11856, + 201, + 191, + 369, + 821, + 809, + 1544, + 143, + 182, + 1820, + 327, + 1942, + 2185, + 3299 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/83_third-party_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/83_third-party_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..43a372bf2afd948221d1bca41f045ff5890ec282 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/83_third-party_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.9301144993393465, + "test_acc": 0.9301149425287356, + "test_auc": 0.9639311853996428, + "val_auc": 0.9598214285714285, + "k": 1, + "dataset": "83_third-party", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1177 + ] + }, + { + "test_f1": 0.9747088906304773, + "test_acc": 0.9747126436781609, + "test_auc": 0.9931366348287245, + "val_auc": 0.9797077922077922, + "k": 16, + "dataset": "83_third-party", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1177, + 25094, + 1162, + 324, + 626, + 1933, + 1955, + 1283, + 201, + 2897, + 619, + 825, + 1979, + 635, + 829, + 955 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/84_clinical-trials_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/84_clinical-trials_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..e56789ededa477a4dd2280c1d9b61c7ac32c5e71 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/84_clinical-trials_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.8478050890819959, + "test_acc": 0.847816091954023, + "test_auc": 0.8928564180962174, + "val_auc": 0.9383116883116883, + "k": 1, + "dataset": "84_clinical-trials", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1065 + ] + }, + { + "test_f1": 0.9530582399514179, + "test_acc": 0.953103448275862, + "test_auc": 0.9832994547865144, + "val_auc": 0.989448051948052, + "k": 16, + "dataset": "84_clinical-trials", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1065, + 4385, + 410, + 1766, + 11648, + 201, + 31844, + 125, + 824, + 1490, + 1578, + 976, + 486, + 1398, + 380, + 199 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/85_mental-health_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/85_mental-health_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..d82a3368b28fb3cabf4b4c1cf85eb37641cfbec0 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/85_mental-health_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.9166558188639083, + "test_acc": 0.9167816091954023, + "test_auc": 0.9566797107527463, + "val_auc": 0.9683441558441558, + "k": 1, + "dataset": "85_mental-health", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 23004 + ] + }, + { + "test_f1": 0.9857466564109479, + "test_acc": 0.9857471264367816, + "test_auc": 0.9979140172628389, + "val_auc": 1.0, + "k": 16, + "dataset": "85_mental-health", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 23004, + 1828, + 201, + 2399, + 5332, + 955, + 248, + 857, + 841, + 1550, + 1683, + 976, + 317, + 1162, + 527, + 399 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/87_glue_cola_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/87_glue_cola_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..4777a100d5fe798bf3e7aa74ee8ce471b1247c4a --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/87_glue_cola_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.6935389776352238, + "test_acc": 0.6968553459119496, + "test_auc": 0.7156207248523856, + "val_auc": 0.7138855542216886, + "k": 1, + "dataset": "87_glue_cola", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 714 + ] + }, + { + "test_f1": 0.713579483538159, + "test_acc": 0.7147169811320755, + "test_auc": 0.7799767907900346, + "val_auc": 0.7747098839535815, + "k": 16, + "dataset": "87_glue_cola", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 714, + 1790, + 326, + 30086, + 907, + 21398, + 7282, + 6044, + 28659, + 237, + 1301, + 902, + 872, + 511, + 59, + 3205 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/89_glue_mrpc_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/89_glue_mrpc_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..8b7acb7ee2f5f6d7ecba39d4ecba88d83813f29d --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/89_glue_mrpc_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.6722472880246534, + "test_acc": 0.6749816581071166, + "test_auc": 0.732061269221991, + "val_auc": 0.7648237179487178, + "k": 1, + "dataset": "89_glue_mrpc", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 3478 + ] + }, + { + "test_f1": 0.7042618276571507, + "test_acc": 0.7043286867204696, + "test_auc": 0.7697882620434845, + "val_auc": 0.8673878205128205, + "k": 16, + "dataset": "89_glue_mrpc", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 3478, + 17441, + 6, + 772, + 484, + 9, + 32031, + 7495, + 5586, + 452, + 942, + 359, + 23040, + 6718, + 422, + 2037 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/90_glue_qnli_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/90_glue_qnli_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..dab262dfc27a2bd01ffaa3dc1626a7447e57995b --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/90_glue_qnli_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.6801092402333448, + "test_acc": 0.6953459119496855, + "test_auc": 0.7020064524033987, + "val_auc": 0.633453381352541, + "k": 1, + "dataset": "90_glue_qnli", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 6363 + ] + }, + { + "test_f1": 0.7582353214773718, + "test_acc": 0.7582389937106918, + "test_auc": 0.8389767391465046, + "val_auc": 0.8155262104841936, + "k": 16, + "dataset": "90_glue_qnli", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 6363, + 6010, + 1489, + 511, + 28659, + 496, + 31202, + 2673, + 17403, + 42, + 653, + 19738, + 3419, + 2042, + 611, + 11275 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/91_glue_qqp_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/91_glue_qqp_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..fc8edd44cce9bdded164dee238b610fca55dceb0 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/91_glue_qqp_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.7635206655984828, + "test_acc": 0.7635220125786164, + "test_auc": 0.7815728037069928, + "val_auc": 0.7310924369747899, + "k": 1, + "dataset": "91_glue_qqp", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1149 + ] + }, + { + "test_f1": 0.8015905805890556, + "test_acc": 0.8017610062893081, + "test_auc": 0.8747673509603167, + "val_auc": 0.8599439775910365, + "k": 16, + "dataset": "91_glue_qqp", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1149, + 23968, + 3066, + 1565, + 127, + 20242, + 2258, + 6996, + 17441, + 2042, + 21897, + 11358, + 7574, + 2457, + 6, + 1309 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/92_glue_sst2_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/92_glue_sst2_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..8eb5ea5d6c2385c6c34316fa2945884d7ed8f6d1 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/92_glue_sst2_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.8369896198402058, + "test_acc": 0.8372327044025157, + "test_auc": 0.8675128273415026, + "val_auc": 0.9187675070028011, + "k": 1, + "dataset": "92_glue_sst2", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1460 + ] + }, + { + "test_f1": 0.8784419444926684, + "test_acc": 0.8784905660377359, + "test_auc": 0.9446445912515861, + "val_auc": 0.9729891956782712, + "k": 16, + "dataset": "92_glue_sst2", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 1460, + 1954, + 1115, + 2407, + 1390, + 431, + 750, + 1986, + 29254, + 4108, + 3124, + 1863, + 1420, + 286, + 294, + 4736 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/94_ai_gen_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/94_ai_gen_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..716571bf195b5af9c4dfa5d277eacdb590866313 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/94_ai_gen_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.8980006621030812, + "test_acc": 0.8983647798742138, + "test_auc": 0.9145656525970114, + "val_auc": 0.9171668667466988, + "k": 1, + "dataset": "94_ai_gen", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 19333 + ] + }, + { + "test_f1": 0.9345865499790268, + "test_acc": 0.9345911949685535, + "test_auc": 0.9813095989120428, + "val_auc": 0.9951980792316926, + "k": 16, + "dataset": "94_ai_gen", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 19333, + 1006, + 373, + 1661, + 1700, + 875, + 40, + 866, + 1575, + 4821, + 326, + 19935, + 32410, + 65, + 984, + 1271 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/95_toxic_is_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/95_toxic_is_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..b4fd135d34bb46e8ff62a5c36d2b8a7a70d102bc --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/95_toxic_is_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.8059046292218132, + "test_acc": 0.8080808080808081, + "test_auc": 0.8193877551020409, + "val_auc": 0.8032852564102564, + "k": 1, + "dataset": "95_toxic_is", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 453 + ] + }, + { + "test_f1": 0.8382848088730442, + "test_acc": 0.8383838383838383, + "test_auc": 0.900408163265306, + "val_auc": 0.9042467948717949, + "k": 16, + "dataset": "95_toxic_is", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 453, + 462, + 1986, + 750, + 294, + 473, + 1346, + 457, + 1277, + 1187, + 866, + 21398, + 2755, + 1090, + 1146, + 1391 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/96_spam_is_blocks.12.hook_resid_post_l1.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/96_spam_is_blocks.12.hook_resid_post_l1.json new file mode 100644 index 0000000000000000000000000000000000000000..854e25f82f98a790e0ff243c17c8c1ec66862c78 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_probes/sae_probes_gemma-2-2b/normal_setting/96_spam_is_blocks.12.hook_resid_post_l1.json @@ -0,0 +1,45 @@ +[ + { + "test_f1": 0.8911392088121264, + "test_acc": 0.8912579957356077, + "test_auc": 0.9370794689943627, + "val_auc": 0.9119999999999999, + "k": 1, + "dataset": "96_spam_is", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 671 + ] + }, + { + "test_f1": 0.9701484394424185, + "test_acc": 0.9701492537313433, + "test_auc": 0.9879614475359155, + "val_auc": 0.9972, + "k": 16, + "dataset": "96_spam_is", + "hook_name": "blocks.12.hook_resid_post", + "reg_type": "l1", + "binarize": false, + "indices": [ + 671, + 13755, + 1146, + 3914, + 28318, + 362, + 1277, + 6023, + 7942, + 1635, + 6862, + 22927, + 684, + 14626, + 721, + 462 + ] + } +] \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_stats.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_stats.json new file mode 100644 index 0000000000000000000000000000000000000000..e885f45b9936bc5a49edc0d57be5a5b62ff0a803 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_stats.json @@ -0,0 +1 @@ +{"l0": 143.4744873046875, "width": 32768, "hook_name": "model.layers.12", "tokens_trained": 500000000, "all_metrics": {"model_performance_preservation": {"ce_loss_score": 0.99384634692864, "ce_loss_with_ablation": 12.452933311462402, "ce_loss_with_sae": 1.874442219734192, "ce_loss_without_sae": 1.8089427947998047}, "reconstruction_quality": {"explained_variance": 0.8779053688049316, "explained_variance_legacy": 0.8117672204971313, "mse": 0.41642075777053833, "cossim": 0.9340218901634216}, "shrinkage": {"l2_norm_in": 142.23941040039062, "l2_norm_out": 134.5427703857422, "l2_ratio": 0.9444009065628052, "relative_reconstruction_bias": 1.0117323398590088}, "sparsity": {"l0": 143.4744873046875, "l1": 951.1505737304688}, "token_stats": {"total_tokens_eval_reconstruction": 61440, "total_tokens_eval_sparsity_variance": 61440}}} \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_weights.safetensors b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_weights.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d6212cf2ba1c111e5c035049e6b09cc1c356d819 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sae_weights.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05dc8a56c3a4b83767493affa173b5188182544f42f816f3994cfa4f57cf1c21 +size 604251536 diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/saebench_autointerp_custom_sae_eval_results.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/saebench_autointerp_custom_sae_eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..059dfff6f359df0e4ad36cb272fd2404043868a4 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/saebench_autointerp_custom_sae_eval_results.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7dad9e63af591a4e7f0be8753a9e34695a35c9c492781febfb6068049353b4b9 +size 27779446 diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/scr/saebench_scr_custom_sae_eval_results.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/scr/saebench_scr_custom_sae_eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..6ef82333faff10a681f4c0728697c21c5b6c9c62 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/scr/saebench_scr_custom_sae_eval_results.json @@ -0,0 +1,326 @@ +{ + "eval_type_id": "scr", + "eval_config": { + "random_seed": 42, + "dataset_names": [ + "LabHC/bias_in_bios_class_set1", + "canrager/amazon_reviews_mcauley_1and5" + ], + "perform_scr": true, + "early_stopping_patience": 20, + "train_set_size": 4000, + "test_set_size": 1000, + "context_length": 128, + "probe_train_batch_size": 16, + "probe_test_batch_size": 500, + "probe_epochs": 20, + "probe_lr": 0.001, + "probe_l1_penalty": 0.001, + "sae_batch_size": 32, + "llm_batch_size": 128, + "llm_dtype": "float32", + "lower_vram_usage": false, + "model_name": "gemma-2-2b", + "n_values": [ + 2, + 5, + 10 + ], + "column1_vals_lookup": { + "LabHC/bias_in_bios_class_set1": [ + [ + "professor", + "nurse" + ], + [ + "architect", + "journalist" + ], + [ + "surgeon", + "psychologist" + ], + [ + "attorney", + "teacher" + ] + ], + "canrager/amazon_reviews_mcauley_1and5": [ + [ + "Books", + "CDs_and_Vinyl" + ], + [ + "Software", + "Electronics" + ], + [ + "Pet_Supplies", + "Office_Products" + ], + [ + "Industrial_and_Scientific", + "Toys_and_Games" + ] + ] + } + }, + "eval_id": "d1076d24-28e3-4317-88f9-01561dc61acd", + "datetime_epoch_millis": 1776108003704, + "eval_result_metrics": { + "scr_metrics": { + "scr_dir1_threshold_2": 0.22315586101016013, + "scr_metric_threshold_2": 0.17985678581811984, + "scr_dir2_threshold_2": 0.1870203182973137, + "scr_dir1_threshold_5": 0.3704278751349361, + "scr_metric_threshold_5": 0.2387328993493279, + "scr_dir2_threshold_5": 0.24401017191569593, + "scr_dir1_threshold_10": 0.3810325417277716, + "scr_metric_threshold_10": 0.2808346051682663, + "scr_dir2_threshold_10": 0.2946276353833737, + "scr_dir1_threshold_20": null, + "scr_metric_threshold_20": null, + "scr_dir2_threshold_20": null, + "scr_dir1_threshold_50": null, + "scr_metric_threshold_50": null, + "scr_dir2_threshold_50": null, + "scr_dir1_threshold_100": null, + "scr_metric_threshold_100": null, + "scr_dir2_threshold_100": null, + "scr_dir1_threshold_500": null, + "scr_metric_threshold_500": null, + "scr_dir2_threshold_500": null + } + }, + "eval_result_details": [ + { + "dataset_name": "LabHC/bias_in_bios_class_set1_scr_professor_nurse_results", + "scr_dir1_threshold_2": 0.5512825313325533, + "scr_metric_threshold_2": 0.07512948567373162, + "scr_dir2_threshold_2": 0.07512948567373162, + "scr_dir1_threshold_5": 0.6282053731288537, + "scr_metric_threshold_5": 0.17616583431207444, + "scr_dir2_threshold_5": 0.17616583431207444, + "scr_dir1_threshold_10": 0.5512825313325533, + "scr_metric_threshold_10": 0.1968911085011348, + "scr_dir2_threshold_10": 0.1968911085011348, + "scr_dir1_threshold_20": null, + "scr_metric_threshold_20": null, + "scr_dir2_threshold_20": null, + "scr_dir1_threshold_50": null, + "scr_metric_threshold_50": null, + "scr_dir2_threshold_50": null, + "scr_dir1_threshold_100": null, + "scr_metric_threshold_100": null, + "scr_dir2_threshold_100": null, + "scr_dir1_threshold_500": null, + "scr_metric_threshold_500": null, + "scr_dir2_threshold_500": null + }, + { + "dataset_name": "LabHC/bias_in_bios_class_set1_scr_architect_journalist_results", + "scr_dir1_threshold_2": 0.24242422874090183, + "scr_metric_threshold_2": 0.2782875058100854, + "scr_dir2_threshold_2": 0.2782875058100854, + "scr_dir1_threshold_5": 0.4924241158533419, + "scr_metric_threshold_5": 0.37308877810486407, + "scr_dir2_threshold_5": 0.37308877810486407, + "scr_dir1_threshold_10": 0.5681820542194435, + "scr_metric_threshold_10": 0.37308877810486407, + "scr_dir2_threshold_10": 0.37308877810486407, + "scr_dir1_threshold_20": null, + "scr_metric_threshold_20": null, + "scr_dir2_threshold_20": null, + "scr_dir1_threshold_50": null, + "scr_metric_threshold_50": null, + "scr_dir2_threshold_50": null, + "scr_dir1_threshold_100": null, + "scr_metric_threshold_100": null, + "scr_dir2_threshold_100": null, + "scr_dir1_threshold_500": null, + "scr_metric_threshold_500": null, + "scr_dir2_threshold_500": null + }, + { + "dataset_name": "LabHC/bias_in_bios_class_set1_scr_surgeon_psychologist_results", + "scr_dir1_threshold_2": 0.3749997671693945, + "scr_metric_threshold_2": 0.06532668057741017, + "scr_dir2_threshold_2": 0.06532668057741017, + "scr_dir1_threshold_5": 0.5, + "scr_metric_threshold_5": 0.10050255094361776, + "scr_dir2_threshold_5": 0.10050255094361776, + "scr_dir1_threshold_10": 0.5625001164153027, + "scr_metric_threshold_10": 0.16834176159853043, + "scr_dir2_threshold_10": 0.16834176159853043, + "scr_dir1_threshold_20": null, + "scr_metric_threshold_20": null, + "scr_dir2_threshold_20": null, + "scr_dir1_threshold_50": null, + "scr_metric_threshold_50": null, + "scr_dir2_threshold_50": null, + "scr_dir1_threshold_100": null, + "scr_metric_threshold_100": null, + "scr_dir2_threshold_100": null, + "scr_dir1_threshold_500": null, + "scr_metric_threshold_500": null, + "scr_dir2_threshold_500": null + }, + { + "dataset_name": "LabHC/bias_in_bios_class_set1_scr_attorney_teacher_results", + "scr_dir1_threshold_2": 0.24832216912919208, + "scr_metric_threshold_2": 0.04777065348328394, + "scr_dir2_threshold_2": 0.04777065348328394, + "scr_dir1_threshold_5": 0.7046977664257812, + "scr_metric_threshold_5": -0.012738878893616343, + "scr_dir2_threshold_5": -0.012738878893616343, + "scr_dir1_threshold_10": 0.5167783087080793, + "scr_metric_threshold_10": 0.03184710232218929, + "scr_dir2_threshold_10": 0.03184710232218929, + "scr_dir1_threshold_20": null, + "scr_metric_threshold_20": null, + "scr_dir2_threshold_20": null, + "scr_dir1_threshold_50": null, + "scr_metric_threshold_50": null, + "scr_dir2_threshold_50": null, + "scr_dir1_threshold_100": null, + "scr_metric_threshold_100": null, + "scr_dir2_threshold_100": null, + "scr_dir1_threshold_500": null, + "scr_metric_threshold_500": null, + "scr_dir2_threshold_500": null + }, + { + "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Books_CDs_and_Vinyl_results", + "scr_dir1_threshold_2": 0.03684225881529179, + "scr_metric_threshold_2": 0.5039683206086106, + "scr_dir2_threshold_2": 0.5039683206086106, + "scr_dir1_threshold_5": 0.057894933322792726, + "scr_metric_threshold_5": 0.5912697183136902, + "scr_dir2_threshold_5": 0.5912697183136902, + "scr_dir1_threshold_10": 0.08421069803000372, + "scr_metric_threshold_10": 0.6349206536925663, + "scr_dir2_threshold_10": 0.6349206536925663, + "scr_dir1_threshold_20": null, + "scr_metric_threshold_20": null, + "scr_dir2_threshold_20": null, + "scr_dir1_threshold_50": null, + "scr_metric_threshold_50": null, + "scr_dir2_threshold_50": null, + "scr_dir1_threshold_100": null, + "scr_metric_threshold_100": null, + "scr_dir2_threshold_100": null, + "scr_dir1_threshold_500": null, + "scr_metric_threshold_500": null, + "scr_dir2_threshold_500": null + }, + { + "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Software_Electronics_results", + "scr_dir1_threshold_2": 0.16393429808190266, + "scr_metric_threshold_2": 0.13358787395939287, + "scr_dir2_threshold_2": 0.13358787395939287, + "scr_dir1_threshold_5": 0.2732241558450018, + "scr_metric_threshold_5": 0.17938926174649328, + "scr_dir2_threshold_5": 0.17938926174649328, + "scr_dir1_threshold_10": 0.3606557163469838, + "scr_metric_threshold_10": 0.23282436583052582, + "scr_dir2_threshold_10": 0.23282436583052582, + "scr_dir1_threshold_20": null, + "scr_metric_threshold_20": null, + "scr_dir2_threshold_20": null, + "scr_dir1_threshold_50": null, + "scr_metric_threshold_50": null, + "scr_dir2_threshold_50": null, + "scr_dir1_threshold_100": null, + "scr_metric_threshold_100": null, + "scr_dir2_threshold_100": null, + "scr_dir1_threshold_500": null, + "scr_metric_threshold_500": null, + "scr_dir2_threshold_500": null + }, + { + "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Pet_Supplies_Office_Products_results", + "scr_dir1_threshold_2": 0.11162783863611286, + "scr_metric_threshold_2": 0.2789699702565123, + "scr_dir2_threshold_2": 0.2789699702565123, + "scr_dir1_threshold_5": 0.19534878692091323, + "scr_metric_threshold_5": 0.3905580606846963, + "scr_dir2_threshold_5": 0.3905580606846963, + "scr_dir1_threshold_10": 0.3023255233702845, + "scr_metric_threshold_10": 0.506437685896798, + "scr_dir2_threshold_10": 0.506437685896798, + "scr_dir1_threshold_20": null, + "scr_metric_threshold_20": null, + "scr_dir2_threshold_20": null, + "scr_dir1_threshold_50": null, + "scr_metric_threshold_50": null, + "scr_dir2_threshold_50": null, + "scr_dir1_threshold_100": null, + "scr_metric_threshold_100": null, + "scr_dir2_threshold_100": null, + "scr_dir1_threshold_500": null, + "scr_metric_threshold_500": null, + "scr_dir2_threshold_500": null + }, + { + "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Industrial_and_Scientific_Toys_and_Games_results", + "scr_dir1_threshold_2": 0.055813796175931865, + "scr_metric_threshold_2": 0.055813796175931865, + "scr_dir2_threshold_2": 0.1131220560094828, + "scr_dir1_threshold_5": 0.11162786958280348, + "scr_metric_threshold_5": 0.11162786958280348, + "scr_dir2_threshold_5": 0.15384605011374777, + "scr_dir1_threshold_10": 0.10232538539952167, + "scr_metric_threshold_10": 0.10232538539952167, + "scr_dir2_threshold_10": 0.21266962712038115, + "scr_dir1_threshold_20": null, + "scr_metric_threshold_20": null, + "scr_dir2_threshold_20": null, + "scr_dir1_threshold_50": null, + "scr_metric_threshold_50": null, + "scr_dir2_threshold_50": null, + "scr_dir1_threshold_100": null, + "scr_metric_threshold_100": null, + "scr_dir2_threshold_100": null, + "scr_dir1_threshold_500": null, + "scr_metric_threshold_500": null, + "scr_dir2_threshold_500": null + } + ], + "sae_bench_commit_hash": "Unknown", + "sae_lens_id": "custom_sae", + "sae_lens_release_id": "saebench_scr", + "sae_lens_version": "6.39.0", + "sae_cfg_dict": { + "d_in": 2304, + "d_sae": 32768, + "dtype": "float32", + "device": "cuda", + "apply_b_dec_to_input": true, + "normalize_activations": "none", + "reshape_activations": "none", + "metadata": { + "sae_lens_version": "6.39.0", + "sae_lens_training_version": "6.39.0", + "model_name": "gemma-2-2b", + "hook_name": "blocks.12.hook_resid_post", + "hook_head_index": null, + "context_size": 1024, + "prepend_bos": true, + "seqpos_slice": [ + null + ], + "model_from_pretrained_kwargs": { + "center_writing_weights": false + }, + "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-2B", + "model_class_name": "AutoModelForCausalLM", + "exclude_special_tokens": true, + "sequence_separator_token": "bos", + "disable_concat_sequences": false, + "hook_layer": 12, + "dataset_trust_remote_code": null + }, + "architecture": "jumprelu" + }, + "eval_result_unstructured": null +} \ No newline at end of file diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sparsity.safetensors b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sparsity.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..427f29199aef561e09c239a90fb037abb342afe3 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/sparsity.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd94599e63edae674a0fa0e7ff4654cf2588702f650b334b5eec6dcc3f12be79 +size 131152 diff --git a/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/tpp/saebench_tpp_custom_sae_eval_results.json b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/tpp/saebench_tpp_custom_sae_eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..976a32d0b5849e51124f539108c3c2c152dc7389 --- /dev/null +++ b/gemma-2-2b-twin-btk-mat/layer-12/k-150/w-32768/t-500M/enc-0.7-dec-0.7/seed-0/tpp/saebench_tpp_custom_sae_eval_results.json @@ -0,0 +1,297 @@ +{ + "eval_type_id": "tpp", + "eval_config": { + "random_seed": 42, + "dataset_names": [ + "LabHC/bias_in_bios_class_set1", + "canrager/amazon_reviews_mcauley_1and5" + ], + "perform_scr": false, + "early_stopping_patience": 20, + "train_set_size": 4000, + "test_set_size": 1000, + "context_length": 128, + "probe_train_batch_size": 16, + "probe_test_batch_size": 500, + "probe_epochs": 20, + "probe_lr": 0.001, + "probe_l1_penalty": 0.001, + "sae_batch_size": 32, + "llm_batch_size": 128, + "llm_dtype": "float32", + "lower_vram_usage": false, + "model_name": "gemma-2-2b", + "n_values": [ + 2, + 5, + 10 + ], + "column1_vals_lookup": { + "LabHC/bias_in_bios_class_set1": [ + [ + "professor", + "nurse" + ], + [ + "architect", + "journalist" + ], + [ + "surgeon", + "psychologist" + ], + [ + "attorney", + "teacher" + ] + ], + "canrager/amazon_reviews_mcauley_1and5": [ + [ + "Books", + "CDs_and_Vinyl" + ], + [ + "Software", + "Electronics" + ], + [ + "Pet_Supplies", + "Office_Products" + ], + [ + "Industrial_and_Scientific", + "Toys_and_Games" + ] + ] + } + }, + "eval_id": "9610ce7f-13a6-4bf5-8dc3-50ded7b5a538", + "datetime_epoch_millis": 1776108172770, + "eval_result_metrics": { + "tpp_metrics": { + "tpp_threshold_2_total_metric": 0.03802499920129776, + "tpp_threshold_2_intended_diff_only": 0.042600005865097046, + "tpp_threshold_2_unintended_diff_only": 0.004575006663799286, + "tpp_threshold_5_total_metric": 0.10620001703500748, + "tpp_threshold_5_intended_diff_only": 0.130100017786026, + "tpp_threshold_5_unintended_diff_only": 0.023900000751018526, + "tpp_threshold_10_total_metric": 0.16375000178813937, + "tpp_threshold_10_intended_diff_only": 0.20880000591278075, + "tpp_threshold_10_unintended_diff_only": 0.04505000412464142, + "tpp_threshold_20_total_metric": null, + "tpp_threshold_20_intended_diff_only": null, + "tpp_threshold_20_unintended_diff_only": null, + "tpp_threshold_50_total_metric": null, + "tpp_threshold_50_intended_diff_only": null, + "tpp_threshold_50_unintended_diff_only": null, + "tpp_threshold_100_total_metric": null, + "tpp_threshold_100_intended_diff_only": null, + "tpp_threshold_100_unintended_diff_only": null, + "tpp_threshold_500_total_metric": null, + "tpp_threshold_500_intended_diff_only": null, + "tpp_threshold_500_unintended_diff_only": null + } + }, + "eval_result_details": [ + { + "dataset_name": "LabHC/bias_in_bios_class_set1_tpp_results", + "tpp_threshold_2_total_metric": 0.06175000667572021, + "tpp_threshold_2_intended_diff_only": 0.06880002021789551, + "tpp_threshold_2_unintended_diff_only": 0.007050013542175293, + "tpp_threshold_5_total_metric": 0.17065001428127288, + "tpp_threshold_5_intended_diff_only": 0.21200002431869508, + "tpp_threshold_5_unintended_diff_only": 0.04135001003742218, + "tpp_threshold_10_total_metric": 0.254750007390976, + "tpp_threshold_10_intended_diff_only": 0.3350000143051147, + "tpp_threshold_10_unintended_diff_only": 0.08025000691413879, + "tpp_threshold_20_total_metric": null, + "tpp_threshold_20_intended_diff_only": null, + "tpp_threshold_20_unintended_diff_only": null, + "tpp_threshold_50_total_metric": null, + "tpp_threshold_50_intended_diff_only": null, + "tpp_threshold_50_unintended_diff_only": null, + "tpp_threshold_100_total_metric": null, + "tpp_threshold_100_intended_diff_only": null, + "tpp_threshold_100_unintended_diff_only": null, + "tpp_threshold_500_total_metric": null, + "tpp_threshold_500_intended_diff_only": null, + "tpp_threshold_500_unintended_diff_only": null + }, + { + "dataset_name": "canrager/amazon_reviews_mcauley_1and5_tpp_results", + "tpp_threshold_2_total_metric": 0.014299991726875304, + "tpp_threshold_2_intended_diff_only": 0.016399991512298585, + "tpp_threshold_2_unintended_diff_only": 0.002099999785423279, + "tpp_threshold_5_total_metric": 0.041750019788742064, + "tpp_threshold_5_intended_diff_only": 0.048200011253356934, + "tpp_threshold_5_unintended_diff_only": 0.006449991464614868, + "tpp_threshold_10_total_metric": 0.07274999618530273, + "tpp_threshold_10_intended_diff_only": 0.08259999752044678, + "tpp_threshold_10_unintended_diff_only": 0.009850001335144043, + "tpp_threshold_20_total_metric": null, + "tpp_threshold_20_intended_diff_only": null, + "tpp_threshold_20_unintended_diff_only": null, + "tpp_threshold_50_total_metric": null, + "tpp_threshold_50_intended_diff_only": null, + "tpp_threshold_50_unintended_diff_only": null, + "tpp_threshold_100_total_metric": null, + "tpp_threshold_100_intended_diff_only": null, + "tpp_threshold_100_unintended_diff_only": null, + "tpp_threshold_500_total_metric": null, + "tpp_threshold_500_intended_diff_only": null, + "tpp_threshold_500_unintended_diff_only": null + } + ], + "sae_bench_commit_hash": "Unknown", + "sae_lens_id": "custom_sae", + "sae_lens_release_id": "saebench_tpp", + "sae_lens_version": "6.39.0", + "sae_cfg_dict": { + "d_in": 2304, + "d_sae": 32768, + "dtype": "float32", + "device": "cuda", + "apply_b_dec_to_input": true, + "normalize_activations": "none", + "reshape_activations": "none", + "metadata": { + "sae_lens_version": "6.39.0", + "sae_lens_training_version": "6.39.0", + "model_name": "gemma-2-2b", + "hook_name": "blocks.12.hook_resid_post", + "hook_head_index": null, + "context_size": 1024, + "prepend_bos": true, + "seqpos_slice": [ + null + ], + "model_from_pretrained_kwargs": { + "center_writing_weights": false + }, + "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-2B", + "model_class_name": "AutoModelForCausalLM", + "exclude_special_tokens": true, + "sequence_separator_token": "bos", + "disable_concat_sequences": false, + "hook_layer": 12, + "dataset_trust_remote_code": null + }, + "architecture": "jumprelu" + }, + "eval_result_unstructured": { + "LabHC/bias_in_bios_class_set1": { + "0": { + "tpp_threshold_2_total_metric": 0.19624997675418854, + "tpp_threshold_2_intended_diff_only": 0.22699999809265137, + "tpp_threshold_2_unintended_diff_only": 0.03075002133846283, + "tpp_threshold_5_total_metric": 0.2357499599456787, + "tpp_threshold_5_intended_diff_only": 0.2929999828338623, + "tpp_threshold_5_unintended_diff_only": 0.057250022888183594, + "tpp_threshold_10_total_metric": 0.34424997866153717, + "tpp_threshold_10_intended_diff_only": 0.4129999876022339, + "tpp_threshold_10_unintended_diff_only": 0.06875000894069672 + }, + "1": { + "tpp_threshold_2_total_metric": 0.0007500201463699341, + "tpp_threshold_2_intended_diff_only": 0.0020000338554382324, + "tpp_threshold_2_unintended_diff_only": 0.0012500137090682983, + "tpp_threshold_5_total_metric": 0.08400003612041473, + "tpp_threshold_5_intended_diff_only": 0.11300003528594971, + "tpp_threshold_5_unintended_diff_only": 0.028999999165534973, + "tpp_threshold_10_total_metric": 0.13225002586841583, + "tpp_threshold_10_intended_diff_only": 0.1640000343322754, + "tpp_threshold_10_unintended_diff_only": 0.03175000846385956 + }, + "2": { + "tpp_threshold_2_total_metric": 0.015000030398368835, + "tpp_threshold_2_intended_diff_only": 0.01100003719329834, + "tpp_threshold_2_unintended_diff_only": -0.003999993205070496, + "tpp_threshold_5_total_metric": 0.18375004827976227, + "tpp_threshold_5_intended_diff_only": 0.2500000596046448, + "tpp_threshold_5_unintended_diff_only": 0.06625001132488251, + "tpp_threshold_10_total_metric": 0.1807500123977661, + "tpp_threshold_10_intended_diff_only": 0.30000001192092896, + "tpp_threshold_10_unintended_diff_only": 0.11924999952316284 + }, + "6": { + "tpp_threshold_2_total_metric": 0.07650001347064972, + "tpp_threshold_2_intended_diff_only": 0.08100003004074097, + "tpp_threshold_2_unintended_diff_only": 0.0045000165700912476, + "tpp_threshold_5_total_metric": 0.20750001072883606, + "tpp_threshold_5_intended_diff_only": 0.23900002241134644, + "tpp_threshold_5_unintended_diff_only": 0.031500011682510376, + "tpp_threshold_10_total_metric": 0.3517500013113022, + "tpp_threshold_10_intended_diff_only": 0.453000009059906, + "tpp_threshold_10_unintended_diff_only": 0.10125000774860382 + }, + "9": { + "tpp_threshold_2_total_metric": 0.020249992609024048, + "tpp_threshold_2_intended_diff_only": 0.023000001907348633, + "tpp_threshold_2_unintended_diff_only": 0.002750009298324585, + "tpp_threshold_5_total_metric": 0.14225001633167267, + "tpp_threshold_5_intended_diff_only": 0.16500002145767212, + "tpp_threshold_5_unintended_diff_only": 0.02275000512599945, + "tpp_threshold_10_total_metric": 0.26475001871585846, + "tpp_threshold_10_intended_diff_only": 0.3450000286102295, + "tpp_threshold_10_unintended_diff_only": 0.08025000989437103 + } + }, + "canrager/amazon_reviews_mcauley_1and5": { + "1": { + "tpp_threshold_2_total_metric": 0.007250010967254639, + "tpp_threshold_2_intended_diff_only": 0.0040000081062316895, + "tpp_threshold_2_unintended_diff_only": -0.0032500028610229492, + "tpp_threshold_5_total_metric": 0.028749987483024597, + "tpp_threshold_5_intended_diff_only": 0.02899998426437378, + "tpp_threshold_5_unintended_diff_only": 0.00024999678134918213, + "tpp_threshold_10_total_metric": 0.04524996876716614, + "tpp_threshold_10_intended_diff_only": 0.05699998140335083, + "tpp_threshold_10_unintended_diff_only": 0.011750012636184692 + }, + "2": { + "tpp_threshold_2_total_metric": 0.010750025510787964, + "tpp_threshold_2_intended_diff_only": 0.017000019550323486, + "tpp_threshold_2_unintended_diff_only": 0.0062499940395355225, + "tpp_threshold_5_total_metric": 0.0140000581741333, + "tpp_threshold_5_intended_diff_only": 0.025000035762786865, + "tpp_threshold_5_unintended_diff_only": 0.010999977588653564, + "tpp_threshold_10_total_metric": 0.06050004065036774, + "tpp_threshold_10_intended_diff_only": 0.06700003147125244, + "tpp_threshold_10_unintended_diff_only": 0.006499990820884705 + }, + "3": { + "tpp_threshold_2_total_metric": 0.014999985694885254, + "tpp_threshold_2_intended_diff_only": 0.010999977588653564, + "tpp_threshold_2_unintended_diff_only": -0.0040000081062316895, + "tpp_threshold_5_total_metric": 0.025250032544136047, + "tpp_threshold_5_intended_diff_only": 0.026000022888183594, + "tpp_threshold_5_unintended_diff_only": 0.0007499903440475464, + "tpp_threshold_10_total_metric": 0.024250000715255737, + "tpp_threshold_10_intended_diff_only": 0.03299999237060547, + "tpp_threshold_10_unintended_diff_only": 0.008749991655349731 + }, + "5": { + "tpp_threshold_2_total_metric": -0.0005000680685043335, + "tpp_threshold_2_intended_diff_only": 0.008999943733215332, + "tpp_threshold_2_unintended_diff_only": 0.009500011801719666, + "tpp_threshold_5_total_metric": 0.018249988555908203, + "tpp_threshold_5_intended_diff_only": 0.023000001907348633, + "tpp_threshold_5_unintended_diff_only": 0.00475001335144043, + "tpp_threshold_10_total_metric": 0.037499964237213135, + "tpp_threshold_10_intended_diff_only": 0.04399996995925903, + "tpp_threshold_10_unintended_diff_only": 0.0065000057220458984 + }, + "6": { + "tpp_threshold_2_total_metric": 0.039000004529953, + "tpp_threshold_2_intended_diff_only": 0.04100000858306885, + "tpp_threshold_2_unintended_diff_only": 0.0020000040531158447, + "tpp_threshold_5_total_metric": 0.12250003218650818, + "tpp_threshold_5_intended_diff_only": 0.1380000114440918, + "tpp_threshold_5_unintended_diff_only": 0.015499979257583618, + "tpp_threshold_10_total_metric": 0.19625000655651093, + "tpp_threshold_10_intended_diff_only": 0.2120000123977661, + "tpp_threshold_10_unintended_diff_only": 0.015750005841255188 + } + } + } +} \ No newline at end of file