eoinf commited on May 28, 2024

Commit

b7dc0dc

verified ·

1 Parent(s): e87e50d

Initial commit

Browse files

Files changed (32) hide show

autumn-lion-33/hyperparameters.yaml +75 -0
dashing-mountain-34/hyperparameters.yaml +75 -0
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_11239424.pt +3 -0
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_14987264.pt +3 -0
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_18735104.pt +3 -0
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_20475904_log_feature_sparsity.pt +3 -0
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_22482944.pt +3 -0
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_26230784.pt +3 -0
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_29978624.pt +3 -0
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_33726464.pt +3 -0
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_3743744.pt +3 -0
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_37474304.pt +3 -0
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_40955904_log_feature_sparsity.pt +3 -0
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_41222144.pt +3 -0
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_44969984.pt +3 -0
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_48717824.pt +3 -0
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_52465664.pt +3 -0
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_56213504.pt +3 -0
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_59961344.pt +3 -0
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_61435904_log_feature_sparsity.pt +3 -0
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_63709184.pt +3 -0
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_67457024.pt +3 -0
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_71204864.pt +3 -0
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_7491584.pt +3 -0
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_74952704.pt +3 -0
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_78700544.pt +3 -0
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_81915904_log_feature_sparsity.pt +3 -0
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_82448384.pt +3 -0
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_86196224.pt +3 -0
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_89944064.pt +3 -0
dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_93691904.pt +3 -0
fast-darkness-32/hyperparameters.yaml +75 -0

autumn-lion-33/hyperparameters.yaml ADDED Viewed

	@@ -0,0 +1,75 @@

+adam_beta1: 0.9
+adam_beta2: 0.999
+b_dec_init_method: zeros
+cached_activations_path: null
+checkpoint_path: ./outputs/checkpoints
+clip_grad_norm: true
+context_size: 256
+custom_loss: null
+d_in: 4096
+d_out: null
+dataset: skylion007/openwebtext
+dense_loss_coefficient: 0
+device: cuda
+different_output: false
+dtype: float32
+epsilon_l0_approx: 0.5
+eval_frequency: 500
+expansion_factor: 8
+feature_reinit_scale: 0.2
+feature_resampling_method: null
+fine_tune_dataset: false
+finetuning_steps: !!python/tuple
+- 1000
+flatten_activations_over_layer: false
+flatten_activations_over_layer_output: false
+from_pretrained_path: null
+hook_point: blocks.10.hook_resid_pre
+hook_point_head_index: null
+hook_point_head_index_output: null
+hook_point_layer: 10
+hook_point_layer_output: null
+hook_point_output: null
+initial_decoder_norm: 0.1
+initialise_encoder_to_decoder_transpose: false
+is_dataset_tokenized: false
+l0_coefficient: 0
+l0_warmup: false
+l0_warmup_steps: 1000
+l1_coefficient: 5
+l1_warmup: true
+l1_warmup_steps: 5000
+log_to_wandb: true
+loop_dataset: false
+lr: 0.0001
+lr_scheduler_name: constant_with_warmup
+lr_warm_up_steps: 1000
+max_resample_step: 100000
+max_sparsity_target: 1
+min_sparsity_for_resample: 0
+min_sparsity_target: 0
+model_name: meta-llama/Llama-2-7b-hf
+mse_loss_coefficient: 1
+mse_loss_type: standard
+multiple_runs: false
+n_batches_in_store_buffer: 128
+n_checkpoints: 80
+n_running_sparsity: 500
+n_starting_steps: null
+normalise_initial_decoder_weights: false
+normalise_w_dec: false
+resample_batches: 128
+resample_frequency: 25000
+scale_input_norm: false
+seed: 42
+sparse_loss_coefficient: 0
+sparsity_log_frequency: 5000
+store_batch_size: 8
+subtract_b_dec_from_inputs: false
+total_training_steps: 73242
+train_batch_size: 4096
+use_cached_activations: false
+use_gated_sparse_autoencoder: false
+wandb_log_frequency: 10
+wandb_project: test_gemma_2b
+weight_l1_by_decoder_norms: true

dashing-mountain-34/hyperparameters.yaml ADDED Viewed

	@@ -0,0 +1,75 @@

+adam_beta1: 0.9
+adam_beta2: 0.999
+b_dec_init_method: zeros
+cached_activations_path: null
+checkpoint_path: ./outputs/checkpoints
+clip_grad_norm: true
+context_size: 256
+custom_loss: null
+d_in: 4096
+d_out: null
+dataset: skylion007/openwebtext
+dense_loss_coefficient: 0
+device: cuda
+different_output: false
+dtype: float32
+epsilon_l0_approx: 0.5
+eval_frequency: 500
+expansion_factor: 8
+feature_reinit_scale: 0.2
+feature_resampling_method: null
+fine_tune_dataset: false
+finetuning_steps: !!python/tuple
+- 1000
+flatten_activations_over_layer: false
+flatten_activations_over_layer_output: false
+from_pretrained_path: null
+hook_point: blocks.10.hook_resid_pre
+hook_point_head_index: null
+hook_point_head_index_output: null
+hook_point_layer: 10
+hook_point_layer_output: null
+hook_point_output: null
+initial_decoder_norm: 0.1
+initialise_encoder_to_decoder_transpose: false
+is_dataset_tokenized: false
+l0_coefficient: 0
+l0_warmup: false
+l0_warmup_steps: 1000
+l1_coefficient: 5
+l1_warmup: true
+l1_warmup_steps: 5000
+log_to_wandb: true
+loop_dataset: false
+lr: 0.0001
+lr_scheduler_name: constant_with_warmup
+lr_warm_up_steps: 1000
+max_resample_step: 100000
+max_sparsity_target: 1
+min_sparsity_for_resample: 0
+min_sparsity_target: 0
+model_name: meta-llama/Llama-2-7b-hf
+mse_loss_coefficient: 1
+mse_loss_type: standard
+multiple_runs: false
+n_batches_in_store_buffer: 128
+n_checkpoints: 80
+n_running_sparsity: 500
+n_starting_steps: null
+normalise_initial_decoder_weights: false
+normalise_w_dec: false
+resample_batches: 128
+resample_frequency: 25000
+scale_input_norm: false
+seed: 42
+sparse_loss_coefficient: 0
+sparsity_log_frequency: 5000
+store_batch_size: 8
+subtract_b_dec_from_inputs: false
+total_training_steps: 73242
+train_batch_size: 4096
+use_cached_activations: false
+use_gated_sparse_autoencoder: false
+wandb_log_frequency: 10
+wandb_project: test_gemma_2b
+weight_l1_by_decoder_norms: true

dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_11239424.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:230b44cf52c88bdbf3d69f98ec75c0a010ce32818e3421140ee2a8a66d8193e5
+size 1073894344

dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_14987264.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8df6232c90a5a061e009e1095899561d1d4287e05390210a1b9544d3e6ae983c
+size 1073894344

dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_18735104.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f34c480b14f472162b1485f4e2e43e417ef39b997eca39f691c44ccad68178c9
+size 1073894344

dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_20475904_log_feature_sparsity.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d9ff75215c33a8eae7374908c94f2547cb35fdf61946bbd33de8a681f35a464d
+size 132912

dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_22482944.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:55e5f26471ef49d647e5ac0d45da999c2e807e803b2ba3d50690e9d2b18d03ed
+size 1073894344

dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_26230784.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:12e2115c3c3cc578e3289a245711e6bdbe7449fdbdf419aef15672c9c6ddd9e4
+size 1073894344

dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_29978624.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8552a3a8203dcd03c1cc06845de0380232dec5142c8585e2d907ae3445925348
+size 1073894344

dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_33726464.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8f5718364fdf8117440ec32be6e0dd128c9a29811f1dece2fa57c571acfa5eb3
+size 1073894344

dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_3743744.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:75d40ea401aa1e34cc0782cac2eaa190c6a28244a29fed363484cf30c52c0cb6
+size 1073894272

dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_37474304.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d35b7d2cda0468a71c9fd9ece696df1a04199d77b88d8744a4beb40678661330
+size 1073894344

dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_40955904_log_feature_sparsity.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:da1efe686262533e2d6e7292f38e33a7422b1de0a218181277ac2126548417aa
+size 132912

dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_41222144.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1998aec6b69b80d534f68bd82f03bf062f116122a470181d5a95bde862bc25fa
+size 1073894344

dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_44969984.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ae0a3fd8061f462e3d8a4576311c4fa294108eeae22981203304dd39aca0ee5a
+size 1073894344

dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_48717824.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:640d1dc8ffbbc2239084829baa5788b3f570712590fb485dab440ffc1cf5546f
+size 1073894344

dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_52465664.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6c056e3dbaf24304da12b499121f7f497d5161567fd710caadf1a2871dba2320
+size 1073894344

dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_56213504.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:52da0773143ae11aff4ea823015419133a0fbe668cda164bc6a522c43c155e04
+size 1073894344

dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_59961344.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c1647c3b25232e81073c97c2f22214d639e826f8d7102f24b9c66a6d14d84fd8
+size 1073894344

dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_61435904_log_feature_sparsity.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:65a6f2d3e5720f8a4e01fc2bad2e6ae0cb7578f0c311ff900644b9e48a7201b1
+size 132912

dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_63709184.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:eb30d28e25f12ce35882b6caf14011bc9a4d10ff228bb4a83d46edc3f8fcdf95
+size 1073894344

dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_67457024.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:148cc87a1f19cb76d96ca85b2705d4ebe03ab7f3e0f41e7d85548d411f0ecd81
+size 1073894344

dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_71204864.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:be7cd70eeac61eeaa3daebd90721fa25fc04ebfd1485b6dad229466833311acb
+size 1073894344

dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_7491584.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4e8b2b8e4ff95ede51fe14a59b3d994fe059529412902099fbece762bc48048b
+size 1073894272

dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_74952704.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:08b3a5b2217f08fb751f5a6119a6fb927854f18390cab171af1a9aad47a3adfa
+size 1073894344

dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_78700544.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:74bb245e92ebf8ad48cb837c253de30e8c48cb502dcac77dfa965ab2a737d4c5
+size 1073894344

dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_81915904_log_feature_sparsity.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f7f185de31ec497514b0d05e14f1d15e6bd1171980ae8e8b33926af3b0cc0f2a
+size 132912

dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_82448384.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:09918568a889a289547476a4ce4a092df48c02fb64bd5ad8b7e6a83ce399be3e
+size 1073894344

dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_86196224.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6bece16884e003d46ad79d9cfd399a7acddda6f605a9c945c8bf0e9bc88e2a09
+size 1073894344

dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_89944064.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:30d508ce2aa3e25c649b177f610109524d93880c485df524e72352dd2256cb5b
+size 1073894344

dashing-mountain-34/sparse_autoencoder_meta-llama/Llama-2-7b-hf_blocks.10.hook_resid_pre_s32768_93691904.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a823e8f48e8d381a965c29c2274c4f0137ddbb36afadb7b9beff49ffbc46a315
+size 1073894344

fast-darkness-32/hyperparameters.yaml ADDED Viewed

	@@ -0,0 +1,75 @@

+adam_beta1: 0.9
+adam_beta2: 0.999
+b_dec_init_method: zeros
+cached_activations_path: null
+checkpoint_path: ./outputs/checkpoints
+clip_grad_norm: true
+context_size: 256
+custom_loss: null
+d_in: 4096
+d_out: null
+dataset: skylion007/openwebtext
+dense_loss_coefficient: 0
+device: cuda
+different_output: false
+dtype: float32
+epsilon_l0_approx: 0.5
+eval_frequency: 500
+expansion_factor: 8
+feature_reinit_scale: 0.2
+feature_resampling_method: null
+fine_tune_dataset: false
+finetuning_steps: !!python/tuple
+- 1000
+flatten_activations_over_layer: false
+flatten_activations_over_layer_output: false
+from_pretrained_path: null
+hook_point: blocks.10.hook_resid_pre
+hook_point_head_index: null
+hook_point_head_index_output: null
+hook_point_layer: 10
+hook_point_layer_output: null
+hook_point_output: null
+initial_decoder_norm: 0.1
+initialise_encoder_to_decoder_transpose: false
+is_dataset_tokenized: false
+l0_coefficient: 0
+l0_warmup: false
+l0_warmup_steps: 1000
+l1_coefficient: 5
+l1_warmup: true
+l1_warmup_steps: 5000
+log_to_wandb: true
+loop_dataset: false
+lr: 0.0001
+lr_scheduler_name: constant_with_warmup
+lr_warm_up_steps: 1000
+max_resample_step: 100000
+max_sparsity_target: 1
+min_sparsity_for_resample: 0
+min_sparsity_target: 0
+model_name: meta-llama/Llama-2-7b-hf
+mse_loss_coefficient: 1
+mse_loss_type: standard
+multiple_runs: false
+n_batches_in_store_buffer: 128
+n_checkpoints: 80
+n_running_sparsity: 500
+n_starting_steps: null
+normalise_initial_decoder_weights: false
+normalise_w_dec: false
+resample_batches: 128
+resample_frequency: 25000
+scale_input_norm: false
+seed: 42
+sparse_loss_coefficient: 0
+sparsity_log_frequency: 5000
+store_batch_size: 8
+subtract_b_dec_from_inputs: false
+total_training_steps: 73242
+train_batch_size: 4096
+use_cached_activations: false
+use_gated_sparse_autoencoder: false
+wandb_log_frequency: 10
+wandb_project: test_gemma_2b
+weight_l1_by_decoder_norms: true