eoinf commited on
Commit
5b6159c
·
verified ·
1 Parent(s): 2f9f45f

Trains llama-2b resid-pre-16 SAEs on up to 200M tokens of OpenWebText

Browse files
lunar-fire-36_llama/hyperparameters.yaml ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ adam_beta1: 0.9
2
+ adam_beta2: 0.999
3
+ b_dec_init_method: zeros
4
+ cached_activations_path: null
5
+ checkpoint_path: ./outputs/checkpoints
6
+ clip_grad_norm: true
7
+ context_size: 256
8
+ custom_loss: null
9
+ d_in: 4096
10
+ d_out: null
11
+ dataset: skylion007/openwebtext
12
+ dense_loss_coefficient: 0
13
+ device: cuda
14
+ different_output: false
15
+ dtype: float32
16
+ epsilon_l0_approx: 0.5
17
+ eval_frequency: 500
18
+ expansion_factor: 8
19
+ feature_reinit_scale: 0.2
20
+ feature_resampling_method: null
21
+ fine_tune_dataset: false
22
+ finetuning_steps: !!python/tuple
23
+ - 1000
24
+ flatten_activations_over_layer: false
25
+ flatten_activations_over_layer_output: false
26
+ from_pretrained_path: null
27
+ hook_point: blocks.16.hook_resid_pre
28
+ hook_point_head_index: null
29
+ hook_point_head_index_output: null
30
+ hook_point_layer: 16
31
+ hook_point_layer_output: null
32
+ hook_point_output: null
33
+ initial_decoder_norm: 0.1
34
+ initialise_encoder_to_decoder_transpose: false
35
+ is_dataset_tokenized: false
36
+ l0_coefficient: 0
37
+ l0_warmup: false
38
+ l0_warmup_steps: 1000
39
+ l1_coefficient: 5
40
+ l1_warmup: true
41
+ l1_warmup_steps: 5000
42
+ log_to_wandb: true
43
+ loop_dataset: false
44
+ lr: 0.0001
45
+ lr_scheduler_name: constant_with_warmup
46
+ lr_warm_up_steps: 1000
47
+ max_resample_step: 100000
48
+ max_sparsity_target: 1
49
+ min_sparsity_for_resample: 0
50
+ min_sparsity_target: 0
51
+ model_name: meta-llama/Llama-2-7b-hf
52
+ mse_loss_coefficient: 1
53
+ mse_loss_type: standard
54
+ multiple_runs: false
55
+ n_batches_in_store_buffer: 128
56
+ n_checkpoints: 80
57
+ n_running_sparsity: 500
58
+ n_starting_steps: null
59
+ normalise_initial_decoder_weights: false
60
+ normalise_w_dec: false
61
+ resample_batches: 128
62
+ resample_frequency: 25000
63
+ scale_input_norm: false
64
+ seed: 42
65
+ sparse_loss_coefficient: 0
66
+ sparsity_log_frequency: 5000
67
+ store_batch_size: 8
68
+ subtract_b_dec_from_inputs: false
69
+ total_training_steps: 73242
70
+ train_batch_size: 4096
71
+ use_cached_activations: false
72
+ use_gated_sparse_autoencoder: false
73
+ wandb_log_frequency: 10
74
+ wandb_project: test_pythia-mlp
75
+ weight_l1_by_decoder_norms: true
lunar-fire-36_llama/sparse_autoencoder/Llama-2-7b-hf_blocks.16.hook_mlp_out_s32768_100M_tokens_openwebtext.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc4edf63c86d477956d054b87f0980666fac2db42f3b0e4cec7828e480e30145
3
+ size 1073894352
lunar-fire-36_llama/sparse_autoencoder/Llama-2-7b-hf_blocks.16.hook_mlp_out_s32768_150M_tokens_openwebtext.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77b86136732e028bea093921a33ca2648404d1f0670b9f77f56558154905d388
3
+ size 1073894352
lunar-fire-36_llama/sparse_autoencoder/Llama-2-7b-hf_blocks.16.hook_mlp_out_s32768_200M_tokens_openwebtext.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4c4316f423fba61eb21718ccfb4d06d35c19ef2b95ed996d10899769436f252
3
+ size 1073894352
lunar-fire-36_llama/sparse_autoencoder/Llama-2-7b-hf_blocks.16.hook_mlp_out_s32768_50M_tokens_openwebtext.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be6c2e053e06599c4a22f432dd13c1752e42629f17f60bf845343fb0afc9f105
3
+ size 1073894344