bsr-sae-65k / cfg.json
aemack's picture
Upload folder
72bf510 verified
{"d_in": 2304, "d_sae": 65536, "dtype": "bfloat16", "device": "cuda", "apply_b_dec_to_input": true, "normalize_activations": "none", "reshape_activations": "none", "metadata": {"sae_lens_version": "6.11.0", "sae_lens_training_version": "6.11.0", "dataset_path": "monology/pile-uncopyrighted", "hook_name": "blocks.12.hook_resid_post", "model_name": "google/gemma-2-2b", "model_class_name": "HookedTransformer", "hook_head_index": null, "context_size": 1024, "seqpos_slice": [null], "model_from_pretrained_kwargs": {"center_writing_weights": false}, "prepend_bos": true, "exclude_special_tokens": false, "sequence_separator_token": "bos", "disable_concat_sequences": false}, "decoder_init_norm": 0.1, "block_size": 64, "degree": 9, "bsr_tile_size": 64, "learn_b_penalty": true, "discrete_threshold": 1e-05, "top_k": 8, "l1_coefficient": 1.0, "base_l1_coef": 1.0, "ema_alpha": 0.001, "gamma_b_enc": 0.9, "architecture": "bsr"}