Add files using upload-large-folder tool
Browse files- cfg.json +14 -0
- runner_cfg.json +1 -0
- sae_weights.safetensors +3 -0
cfg.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"d_in": 1024,
|
| 3 |
+
"d_sae": 16384,
|
| 4 |
+
"dtype": "float32",
|
| 5 |
+
"device": "cuda",
|
| 6 |
+
"jumprelu_init_threshold": 0.01,
|
| 7 |
+
"jumprelu_bandwidth": 0.05,
|
| 8 |
+
"jumprelu_sparsity_loss_mode": "step",
|
| 9 |
+
"l0_coefficient": 1.0,
|
| 10 |
+
"l0_warm_up_steps": 0,
|
| 11 |
+
"pre_act_loss_coefficient": null,
|
| 12 |
+
"jumprelu_tanh_scale": 4.0,
|
| 13 |
+
"decoder_init_norm": 0.1
|
| 14 |
+
}
|
runner_cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"sae": {"d_in": 1024, "d_sae": 16384, "dtype": "float32", "device": "cpu", "apply_b_dec_to_input": true, "normalize_activations": "none", "reshape_activations": "none", "metadata": {"sae_lens_version": "6.35.0", "sae_lens_training_version": "6.35.0"}, "decoder_init_norm": 0.1, "l1_coefficient": 5, "lp_norm": 1.0, "l1_warm_up_steps": 1500, "architecture": "standard"}, "model_name": "Qwen/Qwen3-0.6B", "model_class_name": "HookedTransformer", "hook_name": "blocks.0.hook_mlp_out", "hook_eval": "NOT_IN_USE", "hook_head_index": null, "dataset_path": "michaelwaves/code-text", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 256, "use_cached_activations": false, "cached_activations_path": null, "from_pretrained_path": null, "n_batches_in_buffer": 64, "training_tokens": 122880000, "store_batch_size_prompts": 16, "seqpos_slice": [null], "disable_concat_sequences": false, "sequence_separator_token": "bos", "activations_mixing_fraction": 0.5, "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": true, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "train_batch_size_tokens": 4096, "adam_beta1": 0.9, "adam_beta2": 0.999, "lr": 5e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 5e-06, "lr_decay_steps": 6000, "n_restart_cycles": 1, "dead_feature_window": 1000, "feature_sampling_window": 2000, "dead_feature_threshold": 1e-08, "n_eval_batches": 10, "eval_batch_size_prompts": null, "logger": {"log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "log_weights_to_wandb": true, "wandb_project": "sae_lens_tutorial", "wandb_id": null, "run_name": "standard-16384-LR-5e-05-Tokens-1.229e+08", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 20}, "n_checkpoints": 0, "checkpoint_path": "checkpoints/cg88sv58", "save_final_checkpoint": false, "output_path": "output", "resume_from_checkpoint": null, "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {"center_writing_weights": false}, "sae_lens_version": "6.35.0", "sae_lens_training_version": "6.35.0", "exclude_special_tokens": false}
|
sae_weights.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:03a4ed15179da3e939cfd47c2089fb5864a79c3b19e481cbc396f91a4912ce1d
|
| 3 |
+
size 134353296
|