Initial commit
Browse files- autumn-lion-33/hyperparameters.yaml +75 -0
- dashing-mountain-34/hyperparameters.yaml +75 -0
- dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_11239424.pt +3 -0
- dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_14987264.pt +3 -0
- dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_18735104.pt +3 -0
- dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_20475904_log_feature_sparsity.pt +3 -0
- dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_22482944.pt +3 -0
- dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_26230784.pt +3 -0
- dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_29978624.pt +3 -0
- dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_33726464.pt +3 -0
- dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_3743744.pt +3 -0
- dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_37474304.pt +3 -0
- dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_40955904_log_feature_sparsity.pt +3 -0
- dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_41222144.pt +3 -0
- dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_44969984.pt +3 -0
- dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_48717824.pt +3 -0
- dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_52465664.pt +3 -0
- dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_56213504.pt +3 -0
- dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_59961344.pt +3 -0
- dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_61435904_log_feature_sparsity.pt +3 -0
- dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_63709184.pt +3 -0
- dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_67457024.pt +3 -0
- dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_71204864.pt +3 -0
- dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_7491584.pt +3 -0
- dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_74952704.pt +3 -0
- dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_78700544.pt +3 -0
- dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_81915904_log_feature_sparsity.pt +3 -0
- dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_82448384.pt +3 -0
- dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_86196224.pt +3 -0
- dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_89944064.pt +3 -0
- dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_93691904.pt +3 -0
- fast-darkness-32/hyperparameters.yaml +75 -0
autumn-lion-33/hyperparameters.yaml
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
adam_beta1: 0.9
|
| 2 |
+
adam_beta2: 0.999
|
| 3 |
+
b_dec_init_method: zeros
|
| 4 |
+
cached_activations_path: null
|
| 5 |
+
checkpoint_path: ./outputs/checkpoints
|
| 6 |
+
clip_grad_norm: true
|
| 7 |
+
context_size: 256
|
| 8 |
+
custom_loss: null
|
| 9 |
+
d_in: 4096
|
| 10 |
+
d_out: null
|
| 11 |
+
dataset: skylion007/openwebtext
|
| 12 |
+
dense_loss_coefficient: 0
|
| 13 |
+
device: cuda
|
| 14 |
+
different_output: false
|
| 15 |
+
dtype: float32
|
| 16 |
+
epsilon_l0_approx: 0.5
|
| 17 |
+
eval_frequency: 500
|
| 18 |
+
expansion_factor: 8
|
| 19 |
+
feature_reinit_scale: 0.2
|
| 20 |
+
feature_resampling_method: null
|
| 21 |
+
fine_tune_dataset: false
|
| 22 |
+
finetuning_steps: !!python/tuple
|
| 23 |
+
- 1000
|
| 24 |
+
flatten_activations_over_layer: false
|
| 25 |
+
flatten_activations_over_layer_output: false
|
| 26 |
+
from_pretrained_path: null
|
| 27 |
+
hook_point: blocks.10.hook_resid_pre
|
| 28 |
+
hook_point_head_index: null
|
| 29 |
+
hook_point_head_index_output: null
|
| 30 |
+
hook_point_layer: 10
|
| 31 |
+
hook_point_layer_output: null
|
| 32 |
+
hook_point_output: null
|
| 33 |
+
initial_decoder_norm: 0.1
|
| 34 |
+
initialise_encoder_to_decoder_transpose: false
|
| 35 |
+
is_dataset_tokenized: false
|
| 36 |
+
l0_coefficient: 0
|
| 37 |
+
l0_warmup: false
|
| 38 |
+
l0_warmup_steps: 1000
|
| 39 |
+
l1_coefficient: 5
|
| 40 |
+
l1_warmup: true
|
| 41 |
+
l1_warmup_steps: 5000
|
| 42 |
+
log_to_wandb: true
|
| 43 |
+
loop_dataset: false
|
| 44 |
+
lr: 0.0001
|
| 45 |
+
lr_scheduler_name: constant_with_warmup
|
| 46 |
+
lr_warm_up_steps: 1000
|
| 47 |
+
max_resample_step: 100000
|
| 48 |
+
max_sparsity_target: 1
|
| 49 |
+
min_sparsity_for_resample: 0
|
| 50 |
+
min_sparsity_target: 0
|
| 51 |
+
model_name: meta-llama/Llama-2-7b-hf
|
| 52 |
+
mse_loss_coefficient: 1
|
| 53 |
+
mse_loss_type: standard
|
| 54 |
+
multiple_runs: false
|
| 55 |
+
n_batches_in_store_buffer: 128
|
| 56 |
+
n_checkpoints: 80
|
| 57 |
+
n_running_sparsity: 500
|
| 58 |
+
n_starting_steps: null
|
| 59 |
+
normalise_initial_decoder_weights: false
|
| 60 |
+
normalise_w_dec: false
|
| 61 |
+
resample_batches: 128
|
| 62 |
+
resample_frequency: 25000
|
| 63 |
+
scale_input_norm: false
|
| 64 |
+
seed: 42
|
| 65 |
+
sparse_loss_coefficient: 0
|
| 66 |
+
sparsity_log_frequency: 5000
|
| 67 |
+
store_batch_size: 8
|
| 68 |
+
subtract_b_dec_from_inputs: false
|
| 69 |
+
total_training_steps: 73242
|
| 70 |
+
train_batch_size: 4096
|
| 71 |
+
use_cached_activations: false
|
| 72 |
+
use_gated_sparse_autoencoder: false
|
| 73 |
+
wandb_log_frequency: 10
|
| 74 |
+
wandb_project: test_gemma_2b
|
| 75 |
+
weight_l1_by_decoder_norms: true
|
dashing-mountain-34/hyperparameters.yaml
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
adam_beta1: 0.9
|
| 2 |
+
adam_beta2: 0.999
|
| 3 |
+
b_dec_init_method: zeros
|
| 4 |
+
cached_activations_path: null
|
| 5 |
+
checkpoint_path: ./outputs/checkpoints
|
| 6 |
+
clip_grad_norm: true
|
| 7 |
+
context_size: 256
|
| 8 |
+
custom_loss: null
|
| 9 |
+
d_in: 4096
|
| 10 |
+
d_out: null
|
| 11 |
+
dataset: skylion007/openwebtext
|
| 12 |
+
dense_loss_coefficient: 0
|
| 13 |
+
device: cuda
|
| 14 |
+
different_output: false
|
| 15 |
+
dtype: float32
|
| 16 |
+
epsilon_l0_approx: 0.5
|
| 17 |
+
eval_frequency: 500
|
| 18 |
+
expansion_factor: 8
|
| 19 |
+
feature_reinit_scale: 0.2
|
| 20 |
+
feature_resampling_method: null
|
| 21 |
+
fine_tune_dataset: false
|
| 22 |
+
finetuning_steps: !!python/tuple
|
| 23 |
+
- 1000
|
| 24 |
+
flatten_activations_over_layer: false
|
| 25 |
+
flatten_activations_over_layer_output: false
|
| 26 |
+
from_pretrained_path: null
|
| 27 |
+
hook_point: blocks.10.hook_resid_pre
|
| 28 |
+
hook_point_head_index: null
|
| 29 |
+
hook_point_head_index_output: null
|
| 30 |
+
hook_point_layer: 10
|
| 31 |
+
hook_point_layer_output: null
|
| 32 |
+
hook_point_output: null
|
| 33 |
+
initial_decoder_norm: 0.1
|
| 34 |
+
initialise_encoder_to_decoder_transpose: false
|
| 35 |
+
is_dataset_tokenized: false
|
| 36 |
+
l0_coefficient: 0
|
| 37 |
+
l0_warmup: false
|
| 38 |
+
l0_warmup_steps: 1000
|
| 39 |
+
l1_coefficient: 5
|
| 40 |
+
l1_warmup: true
|
| 41 |
+
l1_warmup_steps: 5000
|
| 42 |
+
log_to_wandb: true
|
| 43 |
+
loop_dataset: false
|
| 44 |
+
lr: 0.0001
|
| 45 |
+
lr_scheduler_name: constant_with_warmup
|
| 46 |
+
lr_warm_up_steps: 1000
|
| 47 |
+
max_resample_step: 100000
|
| 48 |
+
max_sparsity_target: 1
|
| 49 |
+
min_sparsity_for_resample: 0
|
| 50 |
+
min_sparsity_target: 0
|
| 51 |
+
model_name: meta-llama/Llama-2-7b-hf
|
| 52 |
+
mse_loss_coefficient: 1
|
| 53 |
+
mse_loss_type: standard
|
| 54 |
+
multiple_runs: false
|
| 55 |
+
n_batches_in_store_buffer: 128
|
| 56 |
+
n_checkpoints: 80
|
| 57 |
+
n_running_sparsity: 500
|
| 58 |
+
n_starting_steps: null
|
| 59 |
+
normalise_initial_decoder_weights: false
|
| 60 |
+
normalise_w_dec: false
|
| 61 |
+
resample_batches: 128
|
| 62 |
+
resample_frequency: 25000
|
| 63 |
+
scale_input_norm: false
|
| 64 |
+
seed: 42
|
| 65 |
+
sparse_loss_coefficient: 0
|
| 66 |
+
sparsity_log_frequency: 5000
|
| 67 |
+
store_batch_size: 8
|
| 68 |
+
subtract_b_dec_from_inputs: false
|
| 69 |
+
total_training_steps: 73242
|
| 70 |
+
train_batch_size: 4096
|
| 71 |
+
use_cached_activations: false
|
| 72 |
+
use_gated_sparse_autoencoder: false
|
| 73 |
+
wandb_log_frequency: 10
|
| 74 |
+
wandb_project: test_gemma_2b
|
| 75 |
+
weight_l1_by_decoder_norms: true
|
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_11239424.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:230b44cf52c88bdbf3d69f98ec75c0a010ce32818e3421140ee2a8a66d8193e5
|
| 3 |
+
size 1073894344
|
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_14987264.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8df6232c90a5a061e009e1095899561d1d4287e05390210a1b9544d3e6ae983c
|
| 3 |
+
size 1073894344
|
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_18735104.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f34c480b14f472162b1485f4e2e43e417ef39b997eca39f691c44ccad68178c9
|
| 3 |
+
size 1073894344
|
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_20475904_log_feature_sparsity.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d9ff75215c33a8eae7374908c94f2547cb35fdf61946bbd33de8a681f35a464d
|
| 3 |
+
size 132912
|
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_22482944.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:55e5f26471ef49d647e5ac0d45da999c2e807e803b2ba3d50690e9d2b18d03ed
|
| 3 |
+
size 1073894344
|
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_26230784.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:12e2115c3c3cc578e3289a245711e6bdbe7449fdbdf419aef15672c9c6ddd9e4
|
| 3 |
+
size 1073894344
|
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_29978624.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8552a3a8203dcd03c1cc06845de0380232dec5142c8585e2d907ae3445925348
|
| 3 |
+
size 1073894344
|
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_33726464.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8f5718364fdf8117440ec32be6e0dd128c9a29811f1dece2fa57c571acfa5eb3
|
| 3 |
+
size 1073894344
|
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_3743744.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:75d40ea401aa1e34cc0782cac2eaa190c6a28244a29fed363484cf30c52c0cb6
|
| 3 |
+
size 1073894272
|
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_37474304.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d35b7d2cda0468a71c9fd9ece696df1a04199d77b88d8744a4beb40678661330
|
| 3 |
+
size 1073894344
|
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_40955904_log_feature_sparsity.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:da1efe686262533e2d6e7292f38e33a7422b1de0a218181277ac2126548417aa
|
| 3 |
+
size 132912
|
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_41222144.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1998aec6b69b80d534f68bd82f03bf062f116122a470181d5a95bde862bc25fa
|
| 3 |
+
size 1073894344
|
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_44969984.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ae0a3fd8061f462e3d8a4576311c4fa294108eeae22981203304dd39aca0ee5a
|
| 3 |
+
size 1073894344
|
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_48717824.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:640d1dc8ffbbc2239084829baa5788b3f570712590fb485dab440ffc1cf5546f
|
| 3 |
+
size 1073894344
|
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_52465664.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6c056e3dbaf24304da12b499121f7f497d5161567fd710caadf1a2871dba2320
|
| 3 |
+
size 1073894344
|
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_56213504.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:52da0773143ae11aff4ea823015419133a0fbe668cda164bc6a522c43c155e04
|
| 3 |
+
size 1073894344
|
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_59961344.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c1647c3b25232e81073c97c2f22214d639e826f8d7102f24b9c66a6d14d84fd8
|
| 3 |
+
size 1073894344
|
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_61435904_log_feature_sparsity.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:65a6f2d3e5720f8a4e01fc2bad2e6ae0cb7578f0c311ff900644b9e48a7201b1
|
| 3 |
+
size 132912
|
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_63709184.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eb30d28e25f12ce35882b6caf14011bc9a4d10ff228bb4a83d46edc3f8fcdf95
|
| 3 |
+
size 1073894344
|
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_67457024.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:148cc87a1f19cb76d96ca85b2705d4ebe03ab7f3e0f41e7d85548d411f0ecd81
|
| 3 |
+
size 1073894344
|
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_71204864.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:be7cd70eeac61eeaa3daebd90721fa25fc04ebfd1485b6dad229466833311acb
|
| 3 |
+
size 1073894344
|
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_7491584.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4e8b2b8e4ff95ede51fe14a59b3d994fe059529412902099fbece762bc48048b
|
| 3 |
+
size 1073894272
|
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_74952704.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:08b3a5b2217f08fb751f5a6119a6fb927854f18390cab171af1a9aad47a3adfa
|
| 3 |
+
size 1073894344
|
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_78700544.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:74bb245e92ebf8ad48cb837c253de30e8c48cb502dcac77dfa965ab2a737d4c5
|
| 3 |
+
size 1073894344
|
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_81915904_log_feature_sparsity.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f7f185de31ec497514b0d05e14f1d15e6bd1171980ae8e8b33926af3b0cc0f2a
|
| 3 |
+
size 132912
|
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_82448384.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:09918568a889a289547476a4ce4a092df48c02fb64bd5ad8b7e6a83ce399be3e
|
| 3 |
+
size 1073894344
|
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_86196224.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6bece16884e003d46ad79d9cfd399a7acddda6f605a9c945c8bf0e9bc88e2a09
|
| 3 |
+
size 1073894344
|
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_89944064.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:30d508ce2aa3e25c649b177f610109524d93880c485df524e72352dd2256cb5b
|
| 3 |
+
size 1073894344
|
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_93691904.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a823e8f48e8d381a965c29c2274c4f0137ddbb36afadb7b9beff49ffbc46a315
|
| 3 |
+
size 1073894344
|
fast-darkness-32/hyperparameters.yaml
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
adam_beta1: 0.9
|
| 2 |
+
adam_beta2: 0.999
|
| 3 |
+
b_dec_init_method: zeros
|
| 4 |
+
cached_activations_path: null
|
| 5 |
+
checkpoint_path: ./outputs/checkpoints
|
| 6 |
+
clip_grad_norm: true
|
| 7 |
+
context_size: 256
|
| 8 |
+
custom_loss: null
|
| 9 |
+
d_in: 4096
|
| 10 |
+
d_out: null
|
| 11 |
+
dataset: skylion007/openwebtext
|
| 12 |
+
dense_loss_coefficient: 0
|
| 13 |
+
device: cuda
|
| 14 |
+
different_output: false
|
| 15 |
+
dtype: float32
|
| 16 |
+
epsilon_l0_approx: 0.5
|
| 17 |
+
eval_frequency: 500
|
| 18 |
+
expansion_factor: 8
|
| 19 |
+
feature_reinit_scale: 0.2
|
| 20 |
+
feature_resampling_method: null
|
| 21 |
+
fine_tune_dataset: false
|
| 22 |
+
finetuning_steps: !!python/tuple
|
| 23 |
+
- 1000
|
| 24 |
+
flatten_activations_over_layer: false
|
| 25 |
+
flatten_activations_over_layer_output: false
|
| 26 |
+
from_pretrained_path: null
|
| 27 |
+
hook_point: blocks.10.hook_resid_pre
|
| 28 |
+
hook_point_head_index: null
|
| 29 |
+
hook_point_head_index_output: null
|
| 30 |
+
hook_point_layer: 10
|
| 31 |
+
hook_point_layer_output: null
|
| 32 |
+
hook_point_output: null
|
| 33 |
+
initial_decoder_norm: 0.1
|
| 34 |
+
initialise_encoder_to_decoder_transpose: false
|
| 35 |
+
is_dataset_tokenized: false
|
| 36 |
+
l0_coefficient: 0
|
| 37 |
+
l0_warmup: false
|
| 38 |
+
l0_warmup_steps: 1000
|
| 39 |
+
l1_coefficient: 5
|
| 40 |
+
l1_warmup: true
|
| 41 |
+
l1_warmup_steps: 5000
|
| 42 |
+
log_to_wandb: true
|
| 43 |
+
loop_dataset: false
|
| 44 |
+
lr: 0.0001
|
| 45 |
+
lr_scheduler_name: constant_with_warmup
|
| 46 |
+
lr_warm_up_steps: 1000
|
| 47 |
+
max_resample_step: 100000
|
| 48 |
+
max_sparsity_target: 1
|
| 49 |
+
min_sparsity_for_resample: 0
|
| 50 |
+
min_sparsity_target: 0
|
| 51 |
+
model_name: meta-llama/Llama-2-7b-hf
|
| 52 |
+
mse_loss_coefficient: 1
|
| 53 |
+
mse_loss_type: standard
|
| 54 |
+
multiple_runs: false
|
| 55 |
+
n_batches_in_store_buffer: 128
|
| 56 |
+
n_checkpoints: 80
|
| 57 |
+
n_running_sparsity: 500
|
| 58 |
+
n_starting_steps: null
|
| 59 |
+
normalise_initial_decoder_weights: false
|
| 60 |
+
normalise_w_dec: false
|
| 61 |
+
resample_batches: 128
|
| 62 |
+
resample_frequency: 25000
|
| 63 |
+
scale_input_norm: false
|
| 64 |
+
seed: 42
|
| 65 |
+
sparse_loss_coefficient: 0
|
| 66 |
+
sparsity_log_frequency: 5000
|
| 67 |
+
store_batch_size: 8
|
| 68 |
+
subtract_b_dec_from_inputs: false
|
| 69 |
+
total_training_steps: 73242
|
| 70 |
+
train_batch_size: 4096
|
| 71 |
+
use_cached_activations: false
|
| 72 |
+
use_gated_sparse_autoencoder: false
|
| 73 |
+
wandb_log_frequency: 10
|
| 74 |
+
wandb_project: test_gemma_2b
|
| 75 |
+
weight_l1_by_decoder_norms: true
|