| act_frequency_n_tokens: 500000 |
| batch_size: 16 |
| collect_act_frequency_every_n_samples: 40000 |
| collect_output_metrics_every_n_samples: 0 |
| cooldown_samples: 0 |
| effective_batch_size: 16 |
| eval_data: |
| column_name: input_ids |
| dataset_name: apollo-research/Skylion007-openwebtext-tokenizer-gpt2 |
| is_tokenized: true |
| n_ctx: 1024 |
| seed: 0 |
| split: train |
| streaming: true |
| tokenizer_name: gpt2 |
| eval_every_n_samples: 20000 |
| eval_n_samples: 500 |
| log_every_n_grad_steps: 20 |
| loss: |
| in_to_orig: null |
| logits_kl: |
| coeff: 1.0 |
| out_to_in: |
| coeff: 0.0 |
| out_to_orig: null |
| sparsity: |
| coeff: 0.5 |
| p_norm: 1.0 |
| lr: 0.0005 |
| lr_schedule: cosine |
| max_grad_norm: 10.0 |
| min_lr_factor: 0.1 |
| n_samples: 400000 |
| saes: |
| dict_size_to_input_ratio: 60.0 |
| k: null |
| pretrained_sae_paths: null |
| retrain_saes: false |
| sae_positions: |
| - blocks.2.hook_resid_pre |
| type_of_sparsifier: sae |
| save_dir: /data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out |
| save_every_n_samples: null |
| seed: 0 |
| tlens_model_name: gpt2-small |
| tlens_model_path: null |
| train_data: |
| column_name: input_ids |
| dataset_name: apollo-research/Skylion007-openwebtext-tokenizer-gpt2 |
| is_tokenized: true |
| n_ctx: 1024 |
| seed: 0 |
| split: train |
| streaming: true |
| tokenizer_name: gpt2 |
| wandb_project: gpt2-e2e_play |
| wandb_run_name: null |
| wandb_run_name_prefix: '' |
| warmup_samples: 20000 |
|
|