eoinf commited on
Commit
b7dc0dc
·
verified ·
1 Parent(s): e87e50d

Initial commit

Browse files
Files changed (32) hide show
  1. autumn-lion-33/hyperparameters.yaml +75 -0
  2. dashing-mountain-34/hyperparameters.yaml +75 -0
  3. dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_11239424.pt +3 -0
  4. dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_14987264.pt +3 -0
  5. dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_18735104.pt +3 -0
  6. dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_20475904_log_feature_sparsity.pt +3 -0
  7. dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_22482944.pt +3 -0
  8. dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_26230784.pt +3 -0
  9. dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_29978624.pt +3 -0
  10. dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_33726464.pt +3 -0
  11. dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_3743744.pt +3 -0
  12. dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_37474304.pt +3 -0
  13. dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_40955904_log_feature_sparsity.pt +3 -0
  14. dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_41222144.pt +3 -0
  15. dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_44969984.pt +3 -0
  16. dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_48717824.pt +3 -0
  17. dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_52465664.pt +3 -0
  18. dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_56213504.pt +3 -0
  19. dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_59961344.pt +3 -0
  20. dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_61435904_log_feature_sparsity.pt +3 -0
  21. dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_63709184.pt +3 -0
  22. dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_67457024.pt +3 -0
  23. dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_71204864.pt +3 -0
  24. dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_7491584.pt +3 -0
  25. dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_74952704.pt +3 -0
  26. dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_78700544.pt +3 -0
  27. dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_81915904_log_feature_sparsity.pt +3 -0
  28. dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_82448384.pt +3 -0
  29. dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_86196224.pt +3 -0
  30. dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_89944064.pt +3 -0
  31. dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_93691904.pt +3 -0
  32. fast-darkness-32/hyperparameters.yaml +75 -0
autumn-lion-33/hyperparameters.yaml ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ adam_beta1: 0.9
2
+ adam_beta2: 0.999
3
+ b_dec_init_method: zeros
4
+ cached_activations_path: null
5
+ checkpoint_path: ./outputs/checkpoints
6
+ clip_grad_norm: true
7
+ context_size: 256
8
+ custom_loss: null
9
+ d_in: 4096
10
+ d_out: null
11
+ dataset: skylion007/openwebtext
12
+ dense_loss_coefficient: 0
13
+ device: cuda
14
+ different_output: false
15
+ dtype: float32
16
+ epsilon_l0_approx: 0.5
17
+ eval_frequency: 500
18
+ expansion_factor: 8
19
+ feature_reinit_scale: 0.2
20
+ feature_resampling_method: null
21
+ fine_tune_dataset: false
22
+ finetuning_steps: !!python/tuple
23
+ - 1000
24
+ flatten_activations_over_layer: false
25
+ flatten_activations_over_layer_output: false
26
+ from_pretrained_path: null
27
+ hook_point: blocks.10.hook_resid_pre
28
+ hook_point_head_index: null
29
+ hook_point_head_index_output: null
30
+ hook_point_layer: 10
31
+ hook_point_layer_output: null
32
+ hook_point_output: null
33
+ initial_decoder_norm: 0.1
34
+ initialise_encoder_to_decoder_transpose: false
35
+ is_dataset_tokenized: false
36
+ l0_coefficient: 0
37
+ l0_warmup: false
38
+ l0_warmup_steps: 1000
39
+ l1_coefficient: 5
40
+ l1_warmup: true
41
+ l1_warmup_steps: 5000
42
+ log_to_wandb: true
43
+ loop_dataset: false
44
+ lr: 0.0001
45
+ lr_scheduler_name: constant_with_warmup
46
+ lr_warm_up_steps: 1000
47
+ max_resample_step: 100000
48
+ max_sparsity_target: 1
49
+ min_sparsity_for_resample: 0
50
+ min_sparsity_target: 0
51
+ model_name: meta-llama/Llama-2-7b-hf
52
+ mse_loss_coefficient: 1
53
+ mse_loss_type: standard
54
+ multiple_runs: false
55
+ n_batches_in_store_buffer: 128
56
+ n_checkpoints: 80
57
+ n_running_sparsity: 500
58
+ n_starting_steps: null
59
+ normalise_initial_decoder_weights: false
60
+ normalise_w_dec: false
61
+ resample_batches: 128
62
+ resample_frequency: 25000
63
+ scale_input_norm: false
64
+ seed: 42
65
+ sparse_loss_coefficient: 0
66
+ sparsity_log_frequency: 5000
67
+ store_batch_size: 8
68
+ subtract_b_dec_from_inputs: false
69
+ total_training_steps: 73242
70
+ train_batch_size: 4096
71
+ use_cached_activations: false
72
+ use_gated_sparse_autoencoder: false
73
+ wandb_log_frequency: 10
74
+ wandb_project: test_gemma_2b
75
+ weight_l1_by_decoder_norms: true
dashing-mountain-34/hyperparameters.yaml ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ adam_beta1: 0.9
2
+ adam_beta2: 0.999
3
+ b_dec_init_method: zeros
4
+ cached_activations_path: null
5
+ checkpoint_path: ./outputs/checkpoints
6
+ clip_grad_norm: true
7
+ context_size: 256
8
+ custom_loss: null
9
+ d_in: 4096
10
+ d_out: null
11
+ dataset: skylion007/openwebtext
12
+ dense_loss_coefficient: 0
13
+ device: cuda
14
+ different_output: false
15
+ dtype: float32
16
+ epsilon_l0_approx: 0.5
17
+ eval_frequency: 500
18
+ expansion_factor: 8
19
+ feature_reinit_scale: 0.2
20
+ feature_resampling_method: null
21
+ fine_tune_dataset: false
22
+ finetuning_steps: !!python/tuple
23
+ - 1000
24
+ flatten_activations_over_layer: false
25
+ flatten_activations_over_layer_output: false
26
+ from_pretrained_path: null
27
+ hook_point: blocks.10.hook_resid_pre
28
+ hook_point_head_index: null
29
+ hook_point_head_index_output: null
30
+ hook_point_layer: 10
31
+ hook_point_layer_output: null
32
+ hook_point_output: null
33
+ initial_decoder_norm: 0.1
34
+ initialise_encoder_to_decoder_transpose: false
35
+ is_dataset_tokenized: false
36
+ l0_coefficient: 0
37
+ l0_warmup: false
38
+ l0_warmup_steps: 1000
39
+ l1_coefficient: 5
40
+ l1_warmup: true
41
+ l1_warmup_steps: 5000
42
+ log_to_wandb: true
43
+ loop_dataset: false
44
+ lr: 0.0001
45
+ lr_scheduler_name: constant_with_warmup
46
+ lr_warm_up_steps: 1000
47
+ max_resample_step: 100000
48
+ max_sparsity_target: 1
49
+ min_sparsity_for_resample: 0
50
+ min_sparsity_target: 0
51
+ model_name: meta-llama/Llama-2-7b-hf
52
+ mse_loss_coefficient: 1
53
+ mse_loss_type: standard
54
+ multiple_runs: false
55
+ n_batches_in_store_buffer: 128
56
+ n_checkpoints: 80
57
+ n_running_sparsity: 500
58
+ n_starting_steps: null
59
+ normalise_initial_decoder_weights: false
60
+ normalise_w_dec: false
61
+ resample_batches: 128
62
+ resample_frequency: 25000
63
+ scale_input_norm: false
64
+ seed: 42
65
+ sparse_loss_coefficient: 0
66
+ sparsity_log_frequency: 5000
67
+ store_batch_size: 8
68
+ subtract_b_dec_from_inputs: false
69
+ total_training_steps: 73242
70
+ train_batch_size: 4096
71
+ use_cached_activations: false
72
+ use_gated_sparse_autoencoder: false
73
+ wandb_log_frequency: 10
74
+ wandb_project: test_gemma_2b
75
+ weight_l1_by_decoder_norms: true
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_11239424.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:230b44cf52c88bdbf3d69f98ec75c0a010ce32818e3421140ee2a8a66d8193e5
3
+ size 1073894344
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_14987264.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8df6232c90a5a061e009e1095899561d1d4287e05390210a1b9544d3e6ae983c
3
+ size 1073894344
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_18735104.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f34c480b14f472162b1485f4e2e43e417ef39b997eca39f691c44ccad68178c9
3
+ size 1073894344
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_20475904_log_feature_sparsity.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9ff75215c33a8eae7374908c94f2547cb35fdf61946bbd33de8a681f35a464d
3
+ size 132912
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_22482944.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55e5f26471ef49d647e5ac0d45da999c2e807e803b2ba3d50690e9d2b18d03ed
3
+ size 1073894344
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_26230784.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12e2115c3c3cc578e3289a245711e6bdbe7449fdbdf419aef15672c9c6ddd9e4
3
+ size 1073894344
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_29978624.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8552a3a8203dcd03c1cc06845de0380232dec5142c8585e2d907ae3445925348
3
+ size 1073894344
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_33726464.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f5718364fdf8117440ec32be6e0dd128c9a29811f1dece2fa57c571acfa5eb3
3
+ size 1073894344
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_3743744.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75d40ea401aa1e34cc0782cac2eaa190c6a28244a29fed363484cf30c52c0cb6
3
+ size 1073894272
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_37474304.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d35b7d2cda0468a71c9fd9ece696df1a04199d77b88d8744a4beb40678661330
3
+ size 1073894344
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_40955904_log_feature_sparsity.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da1efe686262533e2d6e7292f38e33a7422b1de0a218181277ac2126548417aa
3
+ size 132912
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_41222144.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1998aec6b69b80d534f68bd82f03bf062f116122a470181d5a95bde862bc25fa
3
+ size 1073894344
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_44969984.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae0a3fd8061f462e3d8a4576311c4fa294108eeae22981203304dd39aca0ee5a
3
+ size 1073894344
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_48717824.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:640d1dc8ffbbc2239084829baa5788b3f570712590fb485dab440ffc1cf5546f
3
+ size 1073894344
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_52465664.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c056e3dbaf24304da12b499121f7f497d5161567fd710caadf1a2871dba2320
3
+ size 1073894344
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_56213504.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52da0773143ae11aff4ea823015419133a0fbe668cda164bc6a522c43c155e04
3
+ size 1073894344
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_59961344.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1647c3b25232e81073c97c2f22214d639e826f8d7102f24b9c66a6d14d84fd8
3
+ size 1073894344
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_61435904_log_feature_sparsity.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65a6f2d3e5720f8a4e01fc2bad2e6ae0cb7578f0c311ff900644b9e48a7201b1
3
+ size 132912
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_63709184.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb30d28e25f12ce35882b6caf14011bc9a4d10ff228bb4a83d46edc3f8fcdf95
3
+ size 1073894344
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_67457024.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:148cc87a1f19cb76d96ca85b2705d4ebe03ab7f3e0f41e7d85548d411f0ecd81
3
+ size 1073894344
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_71204864.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be7cd70eeac61eeaa3daebd90721fa25fc04ebfd1485b6dad229466833311acb
3
+ size 1073894344
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_7491584.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e8b2b8e4ff95ede51fe14a59b3d994fe059529412902099fbece762bc48048b
3
+ size 1073894272
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_74952704.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08b3a5b2217f08fb751f5a6119a6fb927854f18390cab171af1a9aad47a3adfa
3
+ size 1073894344
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_78700544.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74bb245e92ebf8ad48cb837c253de30e8c48cb502dcac77dfa965ab2a737d4c5
3
+ size 1073894344
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_81915904_log_feature_sparsity.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7f185de31ec497514b0d05e14f1d15e6bd1171980ae8e8b33926af3b0cc0f2a
3
+ size 132912
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_82448384.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09918568a889a289547476a4ce4a092df48c02fb64bd5ad8b7e6a83ce399be3e
3
+ size 1073894344
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_86196224.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6bece16884e003d46ad79d9cfd399a7acddda6f605a9c945c8bf0e9bc88e2a09
3
+ size 1073894344
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_89944064.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30d508ce2aa3e25c649b177f610109524d93880c485df524e72352dd2256cb5b
3
+ size 1073894344
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_93691904.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a823e8f48e8d381a965c29c2274c4f0137ddbb36afadb7b9beff49ffbc46a315
3
+ size 1073894344
fast-darkness-32/hyperparameters.yaml ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ adam_beta1: 0.9
2
+ adam_beta2: 0.999
3
+ b_dec_init_method: zeros
4
+ cached_activations_path: null
5
+ checkpoint_path: ./outputs/checkpoints
6
+ clip_grad_norm: true
7
+ context_size: 256
8
+ custom_loss: null
9
+ d_in: 4096
10
+ d_out: null
11
+ dataset: skylion007/openwebtext
12
+ dense_loss_coefficient: 0
13
+ device: cuda
14
+ different_output: false
15
+ dtype: float32
16
+ epsilon_l0_approx: 0.5
17
+ eval_frequency: 500
18
+ expansion_factor: 8
19
+ feature_reinit_scale: 0.2
20
+ feature_resampling_method: null
21
+ fine_tune_dataset: false
22
+ finetuning_steps: !!python/tuple
23
+ - 1000
24
+ flatten_activations_over_layer: false
25
+ flatten_activations_over_layer_output: false
26
+ from_pretrained_path: null
27
+ hook_point: blocks.10.hook_resid_pre
28
+ hook_point_head_index: null
29
+ hook_point_head_index_output: null
30
+ hook_point_layer: 10
31
+ hook_point_layer_output: null
32
+ hook_point_output: null
33
+ initial_decoder_norm: 0.1
34
+ initialise_encoder_to_decoder_transpose: false
35
+ is_dataset_tokenized: false
36
+ l0_coefficient: 0
37
+ l0_warmup: false
38
+ l0_warmup_steps: 1000
39
+ l1_coefficient: 5
40
+ l1_warmup: true
41
+ l1_warmup_steps: 5000
42
+ log_to_wandb: true
43
+ loop_dataset: false
44
+ lr: 0.0001
45
+ lr_scheduler_name: constant_with_warmup
46
+ lr_warm_up_steps: 1000
47
+ max_resample_step: 100000
48
+ max_sparsity_target: 1
49
+ min_sparsity_for_resample: 0
50
+ min_sparsity_target: 0
51
+ model_name: meta-llama/Llama-2-7b-hf
52
+ mse_loss_coefficient: 1
53
+ mse_loss_type: standard
54
+ multiple_runs: false
55
+ n_batches_in_store_buffer: 128
56
+ n_checkpoints: 80
57
+ n_running_sparsity: 500
58
+ n_starting_steps: null
59
+ normalise_initial_decoder_weights: false
60
+ normalise_w_dec: false
61
+ resample_batches: 128
62
+ resample_frequency: 25000
63
+ scale_input_norm: false
64
+ seed: 42
65
+ sparse_loss_coefficient: 0
66
+ sparsity_log_frequency: 5000
67
+ store_batch_size: 8
68
+ subtract_b_dec_from_inputs: false
69
+ total_training_steps: 73242
70
+ train_batch_size: 4096
71
+ use_cached_activations: false
72
+ use_gated_sparse_autoencoder: false
73
+ wandb_log_frequency: 10
74
+ wandb_project: test_gemma_2b
75
+ weight_l1_by_decoder_norms: true