ojuschugh1 commited on
Commit
358e874
·
verified ·
1 Parent(s): 6b47ce6

Upload 2 files

Browse files
downstream_similar_lpcoeff_30_l0_layer_6.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:169407e13bd5751ae8b7f971851d156b23d4d24f939c63f9d87e08e4fcdfb9ff
3
+ size 283305416
downstream_similar_lpcoeff_30_l0_layer_6.yaml ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ act_frequency_n_tokens: 500000
2
+ batch_size: 4
3
+ collect_act_frequency_every_n_samples: 40000
4
+ cooldown_samples: 0
5
+ effective_batch_size: 16
6
+ eval_data:
7
+ column_name: input_ids
8
+ dataset_name: apollo-research/Skylion007-openwebtext-tokenizer-gpt2
9
+ is_tokenized: true
10
+ n_ctx: 1024
11
+ seed: null
12
+ split: train
13
+ streaming: true
14
+ tokenizer_name: gpt2
15
+ eval_every_n_samples: 40000
16
+ eval_n_samples: 500
17
+ log_every_n_grad_steps: 20
18
+ loss:
19
+ in_to_orig:
20
+ hook_positions:
21
+ - blocks.7.hook_resid_pre
22
+ - blocks.8.hook_resid_pre
23
+ - blocks.9.hook_resid_pre
24
+ - blocks.10.hook_resid_pre
25
+ - blocks.11.hook_resid_pre
26
+ total_coeff: 2.0
27
+ logits_kl:
28
+ coeff: 0.65
29
+ out_to_in:
30
+ coeff: 0.0
31
+ out_to_orig: null
32
+ sparsity:
33
+ coeff: 30.0
34
+ p_norm: 1.0
35
+ lr: 0.0005
36
+ lr_schedule: cosine
37
+ max_grad_norm: 10.0
38
+ min_lr_factor: 0.1
39
+ n_samples: 400000
40
+ saes:
41
+ dict_size_to_input_ratio: 60.0
42
+ k: null
43
+ pretrained_sae_paths: null
44
+ retrain_saes: false
45
+ sae_positions:
46
+ - blocks.6.hook_resid_pre
47
+ type_of_sparsifier: sae
48
+ save_dir: /data/dan_braun/e2e_sae/e2e_sae/scripts/train_tlens_saes/out
49
+ save_every_n_samples: null
50
+ seed: 0
51
+ tlens_model_name: gpt2-small
52
+ tlens_model_path: null
53
+ train_data:
54
+ column_name: input_ids
55
+ dataset_name: apollo-research/Skylion007-openwebtext-tokenizer-gpt2
56
+ is_tokenized: true
57
+ n_ctx: 1024
58
+ seed: null
59
+ split: train
60
+ streaming: true
61
+ tokenizer_name: gpt2
62
+ wandb_project: gpt2-e2e
63
+ wandb_run_name: null
64
+ wandb_run_name_prefix: recon_
65
+ warmup_samples: 20000