michaelwaves commited on
Commit
b20c580
·
verified ·
1 Parent(s): 24bed25

Add files using upload-large-folder tool

Browse files
Files changed (3) hide show
  1. cfg.json +14 -0
  2. runner_cfg.json +1 -0
  3. sae_weights.safetensors +3 -0
cfg.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "d_in": 1024,
3
+ "d_sae": 16384,
4
+ "dtype": "float32",
5
+ "device": "cuda",
6
+ "jumprelu_init_threshold": 0.01,
7
+ "jumprelu_bandwidth": 0.05,
8
+ "jumprelu_sparsity_loss_mode": "step",
9
+ "l0_coefficient": 1.0,
10
+ "l0_warm_up_steps": 0,
11
+ "pre_act_loss_coefficient": null,
12
+ "jumprelu_tanh_scale": 4.0,
13
+ "decoder_init_norm": 0.1
14
+ }
runner_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sae": {"d_in": 1024, "d_sae": 16384, "dtype": "float32", "device": "cpu", "apply_b_dec_to_input": true, "normalize_activations": "none", "reshape_activations": "none", "metadata": {"sae_lens_version": "6.35.0", "sae_lens_training_version": "6.35.0"}, "decoder_init_norm": 0.1, "l1_coefficient": 5, "lp_norm": 1.0, "l1_warm_up_steps": 1500, "architecture": "standard"}, "model_name": "Qwen/Qwen3-0.6B", "model_class_name": "HookedTransformer", "hook_name": "blocks.0.hook_mlp_out", "hook_eval": "NOT_IN_USE", "hook_head_index": null, "dataset_path": "michaelwaves/code-text", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 256, "use_cached_activations": false, "cached_activations_path": null, "from_pretrained_path": null, "n_batches_in_buffer": 64, "training_tokens": 122880000, "store_batch_size_prompts": 16, "seqpos_slice": [null], "disable_concat_sequences": false, "sequence_separator_token": "bos", "activations_mixing_fraction": 0.5, "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": true, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "train_batch_size_tokens": 4096, "adam_beta1": 0.9, "adam_beta2": 0.999, "lr": 5e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 5e-06, "lr_decay_steps": 6000, "n_restart_cycles": 1, "dead_feature_window": 1000, "feature_sampling_window": 2000, "dead_feature_threshold": 1e-08, "n_eval_batches": 10, "eval_batch_size_prompts": null, "logger": {"log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "log_weights_to_wandb": true, "wandb_project": "sae_lens_tutorial", "wandb_id": null, "run_name": "standard-16384-LR-5e-05-Tokens-1.229e+08", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 20}, "n_checkpoints": 0, "checkpoint_path": "checkpoints/cg88sv58", "save_final_checkpoint": false, "output_path": "output", "resume_from_checkpoint": null, "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {"center_writing_weights": false}, "sae_lens_version": "6.35.0", "sae_lens_training_version": "6.35.0", "exclude_special_tokens": false}
sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03a4ed15179da3e939cfd47c2089fb5864a79c3b19e481cbc396f91a4912ce1d
3
+ size 134353296