{"d_in": 2304, "d_sae": 16384, "dtype": "bfloat16", "device": "cuda:0", "apply_b_dec_to_input": true, "normalize_activations": "none", "reshape_activations": "none", "metadata": {"sae_lens_version": "6.30.1", "sae_lens_training_version": "6.30.1", "dataset_path": "monology/pile-uncopyrighted", "hook_name": "blocks.12.hook_resid_post", "model_name": "google/gemma-2-2b", "model_class_name": "HookedTransformer", "hook_head_index": null, "context_size": 1024, "seqpos_slice": [null], "model_from_pretrained_kwargs": {"center_writing_weights": false}, "prepend_bos": true, "exclude_special_tokens": false, "sequence_separator_token": "bos", "disable_concat_sequences": false}, "decoder_init_norm": 0.1, "block_size": 32, "degree": 128, "bsr_tile_size": 32, "lambda_init_value": 0.1, "learn_b_penalty": false, "jumprelu_init_threshold": 0.005, "jumprelu_bandwidth": 0.05, "dag_coefficient": 0.1, "bits_per_value": 4.0, "baseline_sparsity_coefficient": 0.2772579191793165, "tanh_alpha": 4.0, "baseline_sparsity_mode": "step", "scale_sparsity_by_decoder_norm": true, "sync_metrics_across_ranks": true, "use_conditional_ema_bound": true, "conditional_ema_gamma": 0.943, "threshold_ema_alpha": 0.01, "threshold_ema_init_mean": 0.0, "threshold_ema_init_std": 1.0, "pre_act_loss_coefficient": null, "use_muon": true, "muon_lr": 0.001, "muon_momentum": 0.95, "muon_nesterov": true, "muon_ns_steps": 5, "muon_weight_decay": 0.0, "use_norm_adapted_optimizers": false, "norm_adapted_lr_enc": 0.02, "norm_adapted_lr_dec": 0.02, "norm_adapted_beta": 0.95, "norm_adapted_eps": 1e-08, "use_cascade_penalty": true, "cascade_penalty_mode": "symmetric", "b_penalty_l0_threshold": 1000.0, "b_penalty_l0_bandwidth": 10.0, "b_penalty_min": 2.302585, "b_penalty_c": 0.04, "n_batches_for_norm_estimate": 10, "architecture": "psae"}