jbloom commited on
Commit
fbcb980
·
verified ·
1 Parent(s): ab4df0d

Upload 27 files

Browse files
Files changed (27) hide show
  1. similar_l0/e2e/blocks.10.hook_resid_pre/cfg.json +1 -0
  2. similar_l0/e2e/blocks.10.hook_resid_pre/sae_weights.safetensors +3 -0
  3. similar_l0/e2e/blocks.10.hook_resid_pre/sparsity.safetensors +3 -0
  4. similar_l0/e2e/blocks.2.hook_resid_pre/cfg.json +1 -0
  5. similar_l0/e2e/blocks.2.hook_resid_pre/sae_weights.safetensors +3 -0
  6. similar_l0/e2e/blocks.2.hook_resid_pre/sparsity.safetensors +3 -0
  7. similar_l0/e2e/blocks.6.hook_resid_pre/cfg.json +1 -0
  8. similar_l0/e2e/blocks.6.hook_resid_pre/sae_weights.safetensors +3 -0
  9. similar_l0/e2e/blocks.6.hook_resid_pre/sparsity.safetensors +3 -0
  10. similar_l0/e2e_future_recon/blocks.10.hook_resid_pre/cfg.json +1 -0
  11. similar_l0/e2e_future_recon/blocks.10.hook_resid_pre/sae_weights.safetensors +3 -0
  12. similar_l0/e2e_future_recon/blocks.10.hook_resid_pre/sparsity.safetensors +3 -0
  13. similar_l0/e2e_future_recon/blocks.2.hook_resid_pre/cfg.json +1 -0
  14. similar_l0/e2e_future_recon/blocks.2.hook_resid_pre/sae_weights.safetensors +3 -0
  15. similar_l0/e2e_future_recon/blocks.2.hook_resid_pre/sparsity.safetensors +3 -0
  16. similar_l0/e2e_future_recon/blocks.6.hook_resid_pre/cfg.json +1 -0
  17. similar_l0/e2e_future_recon/blocks.6.hook_resid_pre/sae_weights.safetensors +3 -0
  18. similar_l0/e2e_future_recon/blocks.6.hook_resid_pre/sparsity.safetensors +3 -0
  19. similar_l0/local/blocks.10.hook_resid_pre/cfg.json +1 -0
  20. similar_l0/local/blocks.10.hook_resid_pre/sae_weights.safetensors +3 -0
  21. similar_l0/local/blocks.10.hook_resid_pre/sparsity.safetensors +3 -0
  22. similar_l0/local/blocks.2.hook_resid_pre/cfg.json +1 -0
  23. similar_l0/local/blocks.2.hook_resid_pre/sae_weights.safetensors +3 -0
  24. similar_l0/local/blocks.2.hook_resid_pre/sparsity.safetensors +3 -0
  25. similar_l0/local/blocks.6.hook_resid_pre/cfg.json +1 -0
  26. similar_l0/local/blocks.6.hook_resid_pre/sae_weights.safetensors +3 -0
  27. similar_l0/local/blocks.6.hook_resid_pre/sparsity.safetensors +3 -0
similar_l0/e2e/blocks.10.hook_resid_pre/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model_name": "gpt2-small", "model_class_name": "HookedTransformer", "hook_point": "blocks.10.hook_resid_pre", "hook_point_eval": "blocks.{layer}.attn.pattern", "hook_point_layer": 10, "hook_point_head_index": null, "dataset_path": "apollo-research/Skylion007-openwebtext-tokenizer-gpt2", "is_dataset_tokenized": true, "context_size": 128, "use_cached_activations": false, "cached_activations_path": null, "d_in": 768, "d_sae": 46080, "b_dec_init_method": "geometric_median", "expansion_factor": 60, "activation_fn": "relu", "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": null, "apply_b_dec_to_input": false, "decoder_orthogonal_init": true, "n_batches_in_buffer": 8, "training_tokens": 2000000, "finetuning_tokens": 0, "store_batch_size": 16, "train_batch_size": 4096, "device": "mps", "seed": 42, "dtype": "torch.float32", "prepend_bos": false, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 0.001, "lp_norm": 1, "lr": 0.0003, "lr_scheduler_name": "constant", "lr_warm_up_steps": 500, "lr_end": 2.9999999999999997e-05, "lr_decay_steps": 0, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-08, "log_to_wandb": true, "wandb_project": "mats_sae_training_language_model", "run_name": "46080-L1-0.001-LR-0.0003-Tokens-2.000e+06", "wandb_entity": null, "wandb_log_frequency": 10, "n_checkpoints": 0, "checkpoint_path": "8crnit9h/sq1mxo7k", "verbose": false, "model_kwargs": {}, "tokens_per_buffer": 4194304}
similar_l0/e2e/blocks.10.hook_resid_pre/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f3e4ca935edee4bd34adb3fae116cab0101be7c7c78b785cd7e3cc89ea6c5e3
3
+ size 283487640
similar_l0/e2e/blocks.10.hook_resid_pre/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8dc6e2f4618bbe0cf9ad743715f3ec41023a82df0a33e4e26896451c7d2fabd3
3
+ size 184400
similar_l0/e2e/blocks.2.hook_resid_pre/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model_name": "gpt2-small", "model_class_name": "HookedTransformer", "hook_point": "blocks.2.hook_resid_pre", "hook_point_eval": "blocks.{layer}.attn.pattern", "hook_point_layer": 2, "hook_point_head_index": null, "dataset_path": "apollo-research/Skylion007-openwebtext-tokenizer-gpt2", "is_dataset_tokenized": true, "context_size": 128, "use_cached_activations": false, "cached_activations_path": null, "d_in": 768, "d_sae": 46080, "b_dec_init_method": "geometric_median", "expansion_factor": 60, "activation_fn": "relu", "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": null, "apply_b_dec_to_input": false, "decoder_orthogonal_init": true, "n_batches_in_buffer": 8, "training_tokens": 2000000, "finetuning_tokens": 0, "store_batch_size": 16, "train_batch_size": 4096, "device": "mps", "seed": 42, "dtype": "torch.float32", "prepend_bos": false, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 0.001, "lp_norm": 1, "lr": 0.0003, "lr_scheduler_name": "constant", "lr_warm_up_steps": 500, "lr_end": 2.9999999999999997e-05, "lr_decay_steps": 0, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-08, "log_to_wandb": true, "wandb_project": "mats_sae_training_language_model", "run_name": "46080-L1-0.001-LR-0.0003-Tokens-2.000e+06", "wandb_entity": null, "wandb_log_frequency": 10, "n_checkpoints": 0, "checkpoint_path": "bst0prdd/m6m60438", "verbose": false, "model_kwargs": {}, "tokens_per_buffer": 4194304}
similar_l0/e2e/blocks.2.hook_resid_pre/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c1a9f2c9ec074c70c2eb4991564d70b747db9cfa45e57fc7f4a163e2b7a098f
3
+ size 283487640
similar_l0/e2e/blocks.2.hook_resid_pre/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7e21525fa7f97c81c9b43aa0d6423b63c9f27776bfd2d95d133ec06c611e80e
3
+ size 184400
similar_l0/e2e/blocks.6.hook_resid_pre/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model_name": "gpt2-small", "model_class_name": "HookedTransformer", "hook_point": "blocks.6.hook_resid_pre", "hook_point_eval": "blocks.{layer}.attn.pattern", "hook_point_layer": 6, "hook_point_head_index": null, "dataset_path": "apollo-research/Skylion007-openwebtext-tokenizer-gpt2", "is_dataset_tokenized": true, "context_size": 128, "use_cached_activations": false, "cached_activations_path": null, "d_in": 768, "d_sae": 46080, "b_dec_init_method": "geometric_median", "expansion_factor": 60, "activation_fn": "relu", "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": null, "apply_b_dec_to_input": false, "decoder_orthogonal_init": true, "n_batches_in_buffer": 8, "training_tokens": 2000000, "finetuning_tokens": 0, "store_batch_size": 16, "train_batch_size": 4096, "device": "mps", "seed": 42, "dtype": "torch.float32", "prepend_bos": false, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 0.001, "lp_norm": 1, "lr": 0.0003, "lr_scheduler_name": "constant", "lr_warm_up_steps": 500, "lr_end": 2.9999999999999997e-05, "lr_decay_steps": 0, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-08, "log_to_wandb": true, "wandb_project": "mats_sae_training_language_model", "run_name": "46080-L1-0.001-LR-0.0003-Tokens-2.000e+06", "wandb_entity": null, "wandb_log_frequency": 10, "n_checkpoints": 0, "checkpoint_path": "tvj2owza/xvkgrq33", "verbose": false, "model_kwargs": {}, "tokens_per_buffer": 4194304}
similar_l0/e2e/blocks.6.hook_resid_pre/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66cc21cd3a0654f949b54a84896f13531fb49cf362debf773e7a581e28857350
3
+ size 283487640
similar_l0/e2e/blocks.6.hook_resid_pre/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37c3358ae88dbb41971162df7c7e482d79d1e65e493ebe60c0ed9d0d7be92b17
3
+ size 184400
similar_l0/e2e_future_recon/blocks.10.hook_resid_pre/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model_name": "gpt2-small", "model_class_name": "HookedTransformer", "hook_point": "blocks.10.hook_resid_pre", "hook_point_eval": "blocks.{layer}.attn.pattern", "hook_point_layer": 10, "hook_point_head_index": null, "dataset_path": "apollo-research/Skylion007-openwebtext-tokenizer-gpt2", "is_dataset_tokenized": true, "context_size": 128, "use_cached_activations": false, "cached_activations_path": null, "d_in": 768, "d_sae": 46080, "b_dec_init_method": "geometric_median", "expansion_factor": 60, "activation_fn": "relu", "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": null, "apply_b_dec_to_input": false, "decoder_orthogonal_init": true, "n_batches_in_buffer": 8, "training_tokens": 2000000, "finetuning_tokens": 0, "store_batch_size": 16, "train_batch_size": 4096, "device": "mps", "seed": 42, "dtype": "torch.float32", "prepend_bos": false, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 0.001, "lp_norm": 1, "lr": 0.0003, "lr_scheduler_name": "constant", "lr_warm_up_steps": 500, "lr_end": 2.9999999999999997e-05, "lr_decay_steps": 0, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-08, "log_to_wandb": true, "wandb_project": "mats_sae_training_language_model", "run_name": "46080-L1-0.001-LR-0.0003-Tokens-2.000e+06", "wandb_entity": null, "wandb_log_frequency": 10, "n_checkpoints": 0, "checkpoint_path": "cvj5um2h/w1wxgb12", "verbose": false, "model_kwargs": {}, "tokens_per_buffer": 4194304}
similar_l0/e2e_future_recon/blocks.10.hook_resid_pre/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e6ec2e7c1b256c788e2e05653af13600d1fe311f0c82e854f504a44f950b11b
3
+ size 283487640
similar_l0/e2e_future_recon/blocks.10.hook_resid_pre/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7fe6e7f46e27139e39336bad73cd680e137d74bbcb794e17e5e4df7674a597a
3
+ size 184400
similar_l0/e2e_future_recon/blocks.2.hook_resid_pre/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model_name": "gpt2-small", "model_class_name": "HookedTransformer", "hook_point": "blocks.2.hook_resid_pre", "hook_point_eval": "blocks.{layer}.attn.pattern", "hook_point_layer": 2, "hook_point_head_index": null, "dataset_path": "apollo-research/Skylion007-openwebtext-tokenizer-gpt2", "is_dataset_tokenized": true, "context_size": 128, "use_cached_activations": false, "cached_activations_path": null, "d_in": 768, "d_sae": 46080, "b_dec_init_method": "geometric_median", "expansion_factor": 60, "activation_fn": "relu", "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": null, "apply_b_dec_to_input": false, "decoder_orthogonal_init": true, "n_batches_in_buffer": 8, "training_tokens": 2000000, "finetuning_tokens": 0, "store_batch_size": 16, "train_batch_size": 4096, "device": "mps", "seed": 42, "dtype": "torch.float32", "prepend_bos": false, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 0.001, "lp_norm": 1, "lr": 0.0003, "lr_scheduler_name": "constant", "lr_warm_up_steps": 500, "lr_end": 2.9999999999999997e-05, "lr_decay_steps": 0, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-08, "log_to_wandb": true, "wandb_project": "mats_sae_training_language_model", "run_name": "46080-L1-0.001-LR-0.0003-Tokens-2.000e+06", "wandb_entity": null, "wandb_log_frequency": 10, "n_checkpoints": 0, "checkpoint_path": "e26jflpq/f0pn20cr", "verbose": false, "model_kwargs": {}, "tokens_per_buffer": 4194304}
similar_l0/e2e_future_recon/blocks.2.hook_resid_pre/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c1a9f2c9ec074c70c2eb4991564d70b747db9cfa45e57fc7f4a163e2b7a098f
3
+ size 283487640
similar_l0/e2e_future_recon/blocks.2.hook_resid_pre/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:052c5a002f97d69311c24e84235253ba2d659680d206403fd0858beb3532fda8
3
+ size 184400
similar_l0/e2e_future_recon/blocks.6.hook_resid_pre/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model_name": "gpt2-small", "model_class_name": "HookedTransformer", "hook_point": "blocks.6.hook_resid_pre", "hook_point_eval": "blocks.{layer}.attn.pattern", "hook_point_layer": 6, "hook_point_head_index": null, "dataset_path": "apollo-research/Skylion007-openwebtext-tokenizer-gpt2", "is_dataset_tokenized": true, "context_size": 128, "use_cached_activations": false, "cached_activations_path": null, "d_in": 768, "d_sae": 46080, "b_dec_init_method": "geometric_median", "expansion_factor": 60, "activation_fn": "relu", "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": null, "apply_b_dec_to_input": false, "decoder_orthogonal_init": true, "n_batches_in_buffer": 8, "training_tokens": 2000000, "finetuning_tokens": 0, "store_batch_size": 16, "train_batch_size": 4096, "device": "mps", "seed": 42, "dtype": "torch.float32", "prepend_bos": false, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 0.001, "lp_norm": 1, "lr": 0.0003, "lr_scheduler_name": "constant", "lr_warm_up_steps": 500, "lr_end": 2.9999999999999997e-05, "lr_decay_steps": 0, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-08, "log_to_wandb": true, "wandb_project": "mats_sae_training_language_model", "run_name": "46080-L1-0.001-LR-0.0003-Tokens-2.000e+06", "wandb_entity": null, "wandb_log_frequency": 10, "n_checkpoints": 0, "checkpoint_path": "2lzle2f0/t5h4g1b3", "verbose": false, "model_kwargs": {}, "tokens_per_buffer": 4194304}
similar_l0/e2e_future_recon/blocks.6.hook_resid_pre/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66cc21cd3a0654f949b54a84896f13531fb49cf362debf773e7a581e28857350
3
+ size 283487640
similar_l0/e2e_future_recon/blocks.6.hook_resid_pre/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b704a53f7745863882c86f10255df7fa510ffd69f93c562e7714e8ecd0d27df
3
+ size 184400
similar_l0/local/blocks.10.hook_resid_pre/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model_name": "gpt2-small", "model_class_name": "HookedTransformer", "hook_point": "blocks.10.hook_resid_pre", "hook_point_eval": "blocks.{layer}.attn.pattern", "hook_point_layer": 10, "hook_point_head_index": null, "dataset_path": "apollo-research/Skylion007-openwebtext-tokenizer-gpt2", "is_dataset_tokenized": true, "context_size": 128, "use_cached_activations": false, "cached_activations_path": null, "d_in": 768, "d_sae": 46080, "b_dec_init_method": "geometric_median", "expansion_factor": 60, "activation_fn": "relu", "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": null, "apply_b_dec_to_input": false, "decoder_orthogonal_init": true, "n_batches_in_buffer": 8, "training_tokens": 2000000, "finetuning_tokens": 0, "store_batch_size": 16, "train_batch_size": 4096, "device": "mps", "seed": 42, "dtype": "torch.float32", "prepend_bos": false, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 0.001, "lp_norm": 1, "lr": 0.0003, "lr_scheduler_name": "constant", "lr_warm_up_steps": 500, "lr_end": 2.9999999999999997e-05, "lr_decay_steps": 0, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-08, "log_to_wandb": true, "wandb_project": "mats_sae_training_language_model", "run_name": "46080-L1-0.001-LR-0.0003-Tokens-2.000e+06", "wandb_entity": null, "wandb_log_frequency": 10, "n_checkpoints": 0, "checkpoint_path": "5vmpdgaz/01yl6kmm", "verbose": false, "model_kwargs": {}, "tokens_per_buffer": 4194304}
similar_l0/local/blocks.10.hook_resid_pre/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:832f97c0ec34dec8e3dd809aabd4eb563e3a1feee1268f2ff762e9b210770da4
3
+ size 283487640
similar_l0/local/blocks.10.hook_resid_pre/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea27bfea10017fd5c5e2b695c8a8e3e25b6dbc0a736cfb8ee8ce4c70ad577f2a
3
+ size 184400
similar_l0/local/blocks.2.hook_resid_pre/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model_name": "gpt2-small", "model_class_name": "HookedTransformer", "hook_point": "blocks.2.hook_resid_pre", "hook_point_eval": "blocks.{layer}.attn.pattern", "hook_point_layer": 2, "hook_point_head_index": null, "dataset_path": "apollo-research/Skylion007-openwebtext-tokenizer-gpt2", "is_dataset_tokenized": true, "context_size": 128, "use_cached_activations": false, "cached_activations_path": null, "d_in": 768, "d_sae": 46080, "b_dec_init_method": "geometric_median", "expansion_factor": 60, "activation_fn": "relu", "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": null, "apply_b_dec_to_input": false, "decoder_orthogonal_init": true, "n_batches_in_buffer": 8, "training_tokens": 2000000, "finetuning_tokens": 0, "store_batch_size": 16, "train_batch_size": 4096, "device": "mps", "seed": 42, "dtype": "torch.float32", "prepend_bos": false, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 0.001, "lp_norm": 1, "lr": 0.0003, "lr_scheduler_name": "constant", "lr_warm_up_steps": 500, "lr_end": 2.9999999999999997e-05, "lr_decay_steps": 0, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-08, "log_to_wandb": true, "wandb_project": "mats_sae_training_language_model", "run_name": "46080-L1-0.001-LR-0.0003-Tokens-2.000e+06", "wandb_entity": null, "wandb_log_frequency": 10, "n_checkpoints": 0, "checkpoint_path": "6vtk4k51/yv02gptp", "verbose": false, "model_kwargs": {}, "tokens_per_buffer": 4194304}
similar_l0/local/blocks.2.hook_resid_pre/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:810be7bbe520724e75a3a67ac6c794f926f7d00a483f4dcb29e33eb50a90d176
3
+ size 283487640
similar_l0/local/blocks.2.hook_resid_pre/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b5f56594a67518be9dc68aa991de5e6b73c605331278947a1ce0fe56db66a0f
3
+ size 184400
similar_l0/local/blocks.6.hook_resid_pre/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model_name": "gpt2-small", "model_class_name": "HookedTransformer", "hook_point": "blocks.6.hook_resid_pre", "hook_point_eval": "blocks.{layer}.attn.pattern", "hook_point_layer": 6, "hook_point_head_index": null, "dataset_path": "apollo-research/Skylion007-openwebtext-tokenizer-gpt2", "is_dataset_tokenized": true, "context_size": 128, "use_cached_activations": false, "cached_activations_path": null, "d_in": 768, "d_sae": 46080, "b_dec_init_method": "geometric_median", "expansion_factor": 60, "activation_fn": "relu", "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": null, "apply_b_dec_to_input": false, "decoder_orthogonal_init": true, "n_batches_in_buffer": 8, "training_tokens": 2000000, "finetuning_tokens": 0, "store_batch_size": 16, "train_batch_size": 4096, "device": "mps", "seed": 42, "dtype": "torch.float32", "prepend_bos": false, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 0.001, "lp_norm": 1, "lr": 0.0003, "lr_scheduler_name": "constant", "lr_warm_up_steps": 500, "lr_end": 2.9999999999999997e-05, "lr_decay_steps": 0, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-08, "log_to_wandb": true, "wandb_project": "mats_sae_training_language_model", "run_name": "46080-L1-0.001-LR-0.0003-Tokens-2.000e+06", "wandb_entity": null, "wandb_log_frequency": 10, "n_checkpoints": 0, "checkpoint_path": "jup3glm9/jjy96cw8", "verbose": false, "model_kwargs": {}, "tokens_per_buffer": 4194304}
similar_l0/local/blocks.6.hook_resid_pre/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bd3d612497583034f7d76499d070906dd192849a64e780f15345546441276d9
3
+ size 283487640
similar_l0/local/blocks.6.hook_resid_pre/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24fd6cc946a63906050bc4ea745a74180c695d0a4367b4988b28632068633391
3
+ size 184400