aemack commited on
Commit
620c3e1
·
verified ·
1 Parent(s): 2cffa5a

Upload folder

Browse files
.ipynb_checkpoints/cfg-checkpoint.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"d_in": 2304, "d_sae": 16384, "dtype": "bfloat16", "device": "cuda:0", "apply_b_dec_to_input": true, "normalize_activations": "none", "reshape_activations": "none", "metadata": {"sae_lens_version": "6.11.0", "sae_lens_training_version": "6.11.0", "dataset_path": "monology/pile-uncopyrighted", "hook_name": "blocks.12.hook_resid_post", "model_name": "google/gemma-2-2b", "model_class_name": "HookedTransformer", "hook_head_index": null, "context_size": 1024, "seqpos_slice": [null], "model_from_pretrained_kwargs": {"center_writing_weights": false}, "prepend_bos": true, "exclude_special_tokens": false, "sequence_separator_token": "bos", "disable_concat_sequences": false}, "decoder_init_norm": 0.1, "block_size": 64, "degree": 4, "bsr_tile_size": 64, "lambda_init_value": 0.1, "learn_b_penalty": false, "jumprelu_init_threshold": 0.005, "jumprelu_bandwidth": 0.05, "dag_coefficient": 0.1, "bits_per_value": 4.0, "baseline_sparsity_coefficient": 0.2772579191793165, "tanh_alpha": 4.0, "baseline_sparsity_mode": "step", "scale_sparsity_by_decoder_norm": true, "sync_metrics_across_ranks": true, "use_conditional_ema_bound": true, "conditional_ema_gamma": 0.943, "threshold_ema_alpha": 0.01, "threshold_ema_init_mean": 0.0, "threshold_ema_init_std": 1.0, "pre_act_loss_coefficient": null, "use_muon": true, "muon_lr": 0.001, "muon_momentum": 0.95, "muon_nesterov": true, "muon_ns_steps": 5, "muon_weight_decay": 0.0, "use_norm_adapted_optimizers": false, "norm_adapted_lr_enc": 0.02, "norm_adapted_lr_dec": 0.02, "norm_adapted_beta": 0.95, "norm_adapted_eps": 1e-08, "use_cascade_penalty": true, "b_penalty_l0_threshold": 1000.0, "b_penalty_l0_bandwidth": 10.0, "b_penalty_min": 2.302585, "b_penalty_c": 0.1, "n_batches_for_norm_estimate": 10, "architecture": "psae"}
cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"d_in": 2304, "d_sae": 16384, "dtype": "bfloat16", "device": "cuda:0", "apply_b_dec_to_input": true, "normalize_activations": "none", "reshape_activations": "none", "metadata": {"sae_lens_version": "6.11.0", "sae_lens_training_version": "6.11.0", "dataset_path": "monology/pile-uncopyrighted", "hook_name": "blocks.12.hook_resid_post", "model_name": "google/gemma-2-2b", "model_class_name": "HookedTransformer", "hook_head_index": null, "context_size": 1024, "seqpos_slice": [null], "model_from_pretrained_kwargs": {"center_writing_weights": false}, "prepend_bos": true, "exclude_special_tokens": false, "sequence_separator_token": "bos", "disable_concat_sequences": false}, "decoder_init_norm": 0.1, "block_size": 64, "degree": 4, "bsr_tile_size": 64, "lambda_init_value": 0.1, "learn_b_penalty": false, "jumprelu_init_threshold": 0.005, "jumprelu_bandwidth": 0.05, "dag_coefficient": 0.1, "bits_per_value": 4.0, "baseline_sparsity_coefficient": 0.2772579191793165, "tanh_alpha": 4.0, "baseline_sparsity_mode": "step", "scale_sparsity_by_decoder_norm": true, "sync_metrics_across_ranks": true, "use_conditional_ema_bound": true, "conditional_ema_gamma": 0.943, "threshold_ema_alpha": 0.01, "threshold_ema_init_mean": 0.0, "threshold_ema_init_std": 1.0, "pre_act_loss_coefficient": null, "use_muon": true, "muon_lr": 0.001, "muon_momentum": 0.95, "muon_nesterov": true, "muon_ns_steps": 5, "muon_weight_decay": 0.0, "use_norm_adapted_optimizers": false, "norm_adapted_lr_enc": 0.02, "norm_adapted_lr_dec": 0.02, "norm_adapted_beta": 0.95, "norm_adapted_eps": 1e-08, "use_cascade_penalty": true, "b_penalty_l0_threshold": 1000.0, "b_penalty_l0_bandwidth": 10.0, "b_penalty_min": 2.302585, "b_penalty_c": 0.1, "n_batches_for_norm_estimate": 10, "architecture": "psae"}
sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ca1adc51f6df4bd1dc4f27675ba73ae12a8bd7fee3b1aca9d756e2914026eac
3
+ size 177784480