jbloom commited on
Commit
ab4df0d
·
verified ·
1 Parent(s): 7504f5a

Upload 27 files

Browse files
Files changed (27) hide show
  1. similar_ce_loss/e2e/blocks.10.hook_resid_pre/cfg.json +1 -0
  2. similar_ce_loss/e2e/blocks.10.hook_resid_pre/sae_weights.safetensors +3 -0
  3. similar_ce_loss/e2e/blocks.10.hook_resid_pre/sparsity.safetensors +3 -0
  4. similar_ce_loss/e2e/blocks.2.hook_resid_pre/cfg.json +1 -0
  5. similar_ce_loss/e2e/blocks.2.hook_resid_pre/sae_weights.safetensors +3 -0
  6. similar_ce_loss/e2e/blocks.2.hook_resid_pre/sparsity.safetensors +3 -0
  7. similar_ce_loss/e2e/blocks.6.hook_resid_pre/cfg.json +1 -0
  8. similar_ce_loss/e2e/blocks.6.hook_resid_pre/sae_weights.safetensors +3 -0
  9. similar_ce_loss/e2e/blocks.6.hook_resid_pre/sparsity.safetensors +3 -0
  10. similar_ce_loss/e2e_future_recon/blocks.10.hook_resid_pre/cfg.json +1 -0
  11. similar_ce_loss/e2e_future_recon/blocks.10.hook_resid_pre/sae_weights.safetensors +3 -0
  12. similar_ce_loss/e2e_future_recon/blocks.10.hook_resid_pre/sparsity.safetensors +3 -0
  13. similar_ce_loss/e2e_future_recon/blocks.2.hook_resid_pre/cfg.json +1 -0
  14. similar_ce_loss/e2e_future_recon/blocks.2.hook_resid_pre/sae_weights.safetensors +3 -0
  15. similar_ce_loss/e2e_future_recon/blocks.2.hook_resid_pre/sparsity.safetensors +3 -0
  16. similar_ce_loss/e2e_future_recon/blocks.6.hook_resid_pre/cfg.json +1 -0
  17. similar_ce_loss/e2e_future_recon/blocks.6.hook_resid_pre/sae_weights.safetensors +3 -0
  18. similar_ce_loss/e2e_future_recon/blocks.6.hook_resid_pre/sparsity.safetensors +3 -0
  19. similar_ce_loss/local/blocks.10.hook_resid_pre/cfg.json +1 -0
  20. similar_ce_loss/local/blocks.10.hook_resid_pre/sae_weights.safetensors +3 -0
  21. similar_ce_loss/local/blocks.10.hook_resid_pre/sparsity.safetensors +3 -0
  22. similar_ce_loss/local/blocks.2.hook_resid_pre/cfg.json +1 -0
  23. similar_ce_loss/local/blocks.2.hook_resid_pre/sae_weights.safetensors +3 -0
  24. similar_ce_loss/local/blocks.2.hook_resid_pre/sparsity.safetensors +3 -0
  25. similar_ce_loss/local/blocks.6.hook_resid_pre/cfg.json +1 -0
  26. similar_ce_loss/local/blocks.6.hook_resid_pre/sae_weights.safetensors +3 -0
  27. similar_ce_loss/local/blocks.6.hook_resid_pre/sparsity.safetensors +3 -0
similar_ce_loss/e2e/blocks.10.hook_resid_pre/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model_name": "gpt2-small", "model_class_name": "HookedTransformer", "hook_point": "blocks.10.hook_resid_pre", "hook_point_eval": "blocks.{layer}.attn.pattern", "hook_point_layer": 10, "hook_point_head_index": null, "dataset_path": "apollo-research/Skylion007-openwebtext-tokenizer-gpt2", "is_dataset_tokenized": true, "context_size": 128, "use_cached_activations": false, "cached_activations_path": null, "d_in": 768, "d_sae": 46080, "b_dec_init_method": "geometric_median", "expansion_factor": 60, "activation_fn": "relu", "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": null, "apply_b_dec_to_input": false, "decoder_orthogonal_init": true, "n_batches_in_buffer": 8, "training_tokens": 2000000, "finetuning_tokens": 0, "store_batch_size": 16, "train_batch_size": 4096, "device": "mps", "seed": 42, "dtype": "torch.float32", "prepend_bos": false, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 0.001, "lp_norm": 1, "lr": 0.0003, "lr_scheduler_name": "constant", "lr_warm_up_steps": 500, "lr_end": 2.9999999999999997e-05, "lr_decay_steps": 0, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-08, "log_to_wandb": true, "wandb_project": "mats_sae_training_language_model", "run_name": "46080-L1-0.001-LR-0.0003-Tokens-2.000e+06", "wandb_entity": null, "wandb_log_frequency": 10, "n_checkpoints": 0, "checkpoint_path": "8crnit9h/lrkvecno", "verbose": false, "model_kwargs": {}, "tokens_per_buffer": 4194304}
similar_ce_loss/e2e/blocks.10.hook_resid_pre/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f3e4ca935edee4bd34adb3fae116cab0101be7c7c78b785cd7e3cc89ea6c5e3
3
+ size 283487640
similar_ce_loss/e2e/blocks.10.hook_resid_pre/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70e0026fb81af935b9d8dc23311cf5fee8f805aedbbf907a3647c120246ea244
3
+ size 184400
similar_ce_loss/e2e/blocks.2.hook_resid_pre/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model_name": "gpt2-small", "model_class_name": "HookedTransformer", "hook_point": "blocks.2.hook_resid_pre", "hook_point_eval": "blocks.{layer}.attn.pattern", "hook_point_layer": 2, "hook_point_head_index": null, "dataset_path": "apollo-research/Skylion007-openwebtext-tokenizer-gpt2", "is_dataset_tokenized": true, "context_size": 128, "use_cached_activations": false, "cached_activations_path": null, "d_in": 768, "d_sae": 46080, "b_dec_init_method": "geometric_median", "expansion_factor": 60, "activation_fn": "relu", "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": null, "apply_b_dec_to_input": false, "decoder_orthogonal_init": true, "n_batches_in_buffer": 8, "training_tokens": 2000000, "finetuning_tokens": 0, "store_batch_size": 16, "train_batch_size": 4096, "device": "mps", "seed": 42, "dtype": "torch.float32", "prepend_bos": false, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 0.001, "lp_norm": 1, "lr": 0.0003, "lr_scheduler_name": "constant", "lr_warm_up_steps": 500, "lr_end": 2.9999999999999997e-05, "lr_decay_steps": 0, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-08, "log_to_wandb": true, "wandb_project": "mats_sae_training_language_model", "run_name": "46080-L1-0.001-LR-0.0003-Tokens-2.000e+06", "wandb_entity": null, "wandb_log_frequency": 10, "n_checkpoints": 0, "checkpoint_path": "ovhfts9n/gzyh3c36", "verbose": false, "model_kwargs": {}, "tokens_per_buffer": 4194304}
similar_ce_loss/e2e/blocks.2.hook_resid_pre/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:510997fc90dfeb030934666c8f3684834917f790d95cfb1ff171c04a53ad9c2c
3
+ size 283487640
similar_ce_loss/e2e/blocks.2.hook_resid_pre/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2549adf6befaf2d3cf26f8c4de04a6259f1abe0b78c5f4d1c6f0c1dd29a8499
3
+ size 184400
similar_ce_loss/e2e/blocks.6.hook_resid_pre/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model_name": "gpt2-small", "model_class_name": "HookedTransformer", "hook_point": "blocks.6.hook_resid_pre", "hook_point_eval": "blocks.{layer}.attn.pattern", "hook_point_layer": 6, "hook_point_head_index": null, "dataset_path": "apollo-research/Skylion007-openwebtext-tokenizer-gpt2", "is_dataset_tokenized": true, "context_size": 128, "use_cached_activations": false, "cached_activations_path": null, "d_in": 768, "d_sae": 46080, "b_dec_init_method": "geometric_median", "expansion_factor": 60, "activation_fn": "relu", "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": null, "apply_b_dec_to_input": false, "decoder_orthogonal_init": true, "n_batches_in_buffer": 8, "training_tokens": 2000000, "finetuning_tokens": 0, "store_batch_size": 16, "train_batch_size": 4096, "device": "mps", "seed": 42, "dtype": "torch.float32", "prepend_bos": false, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 0.001, "lp_norm": 1, "lr": 0.0003, "lr_scheduler_name": "constant", "lr_warm_up_steps": 500, "lr_end": 2.9999999999999997e-05, "lr_decay_steps": 0, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-08, "log_to_wandb": true, "wandb_project": "mats_sae_training_language_model", "run_name": "46080-L1-0.001-LR-0.0003-Tokens-2.000e+06", "wandb_entity": null, "wandb_log_frequency": 10, "n_checkpoints": 0, "checkpoint_path": "zgdpkafo/vdhbr7l5", "verbose": false, "model_kwargs": {}, "tokens_per_buffer": 4194304}
similar_ce_loss/e2e/blocks.6.hook_resid_pre/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7ae5881e1574f8e6d1f5a17502893ba3cc05bdd0c78b7cf671ac6978abe1479
3
+ size 283487640
similar_ce_loss/e2e/blocks.6.hook_resid_pre/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddb8589298f4fcabc4e82401c042e2ac641b994d0a29181186a104a8deaaf260
3
+ size 184400
similar_ce_loss/e2e_future_recon/blocks.10.hook_resid_pre/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model_name": "gpt2-small", "model_class_name": "HookedTransformer", "hook_point": "blocks.10.hook_resid_pre", "hook_point_eval": "blocks.{layer}.attn.pattern", "hook_point_layer": 10, "hook_point_head_index": null, "dataset_path": "apollo-research/Skylion007-openwebtext-tokenizer-gpt2", "is_dataset_tokenized": true, "context_size": 128, "use_cached_activations": false, "cached_activations_path": null, "d_in": 768, "d_sae": 46080, "b_dec_init_method": "geometric_median", "expansion_factor": 60, "activation_fn": "relu", "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": null, "apply_b_dec_to_input": false, "decoder_orthogonal_init": true, "n_batches_in_buffer": 8, "training_tokens": 2000000, "finetuning_tokens": 0, "store_batch_size": 16, "train_batch_size": 4096, "device": "mps", "seed": 42, "dtype": "torch.float32", "prepend_bos": false, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 0.001, "lp_norm": 1, "lr": 0.0003, "lr_scheduler_name": "constant", "lr_warm_up_steps": 500, "lr_end": 2.9999999999999997e-05, "lr_decay_steps": 0, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-08, "log_to_wandb": true, "wandb_project": "mats_sae_training_language_model", "run_name": "46080-L1-0.001-LR-0.0003-Tokens-2.000e+06", "wandb_entity": null, "wandb_log_frequency": 10, "n_checkpoints": 0, "checkpoint_path": "cvj5um2h/rop2d6mb", "verbose": false, "model_kwargs": {}, "tokens_per_buffer": 4194304}
similar_ce_loss/e2e_future_recon/blocks.10.hook_resid_pre/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e6ec2e7c1b256c788e2e05653af13600d1fe311f0c82e854f504a44f950b11b
3
+ size 283487640
similar_ce_loss/e2e_future_recon/blocks.10.hook_resid_pre/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6dcced92d84fbce7e12c9e2f7232ec5651cf9c4b6316310729328b087c2d04d6
3
+ size 184400
similar_ce_loss/e2e_future_recon/blocks.2.hook_resid_pre/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model_name": "gpt2-small", "model_class_name": "HookedTransformer", "hook_point": "blocks.2.hook_resid_pre", "hook_point_eval": "blocks.{layer}.attn.pattern", "hook_point_layer": 2, "hook_point_head_index": null, "dataset_path": "apollo-research/Skylion007-openwebtext-tokenizer-gpt2", "is_dataset_tokenized": true, "context_size": 128, "use_cached_activations": false, "cached_activations_path": null, "d_in": 768, "d_sae": 46080, "b_dec_init_method": "geometric_median", "expansion_factor": 60, "activation_fn": "relu", "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": null, "apply_b_dec_to_input": false, "decoder_orthogonal_init": true, "n_batches_in_buffer": 8, "training_tokens": 2000000, "finetuning_tokens": 0, "store_batch_size": 16, "train_batch_size": 4096, "device": "mps", "seed": 42, "dtype": "torch.float32", "prepend_bos": false, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 0.001, "lp_norm": 1, "lr": 0.0003, "lr_scheduler_name": "constant", "lr_warm_up_steps": 500, "lr_end": 2.9999999999999997e-05, "lr_decay_steps": 0, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-08, "log_to_wandb": true, "wandb_project": "mats_sae_training_language_model", "run_name": "46080-L1-0.001-LR-0.0003-Tokens-2.000e+06", "wandb_entity": null, "wandb_log_frequency": 10, "n_checkpoints": 0, "checkpoint_path": "visi12en/kmk2b4re", "verbose": false, "model_kwargs": {}, "tokens_per_buffer": 4194304}
similar_ce_loss/e2e_future_recon/blocks.2.hook_resid_pre/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc62ac98c10bc513fef99087a6412f3aa2b0b74b9c54406b5f9b2a0aa2afb296
3
+ size 283487640
similar_ce_loss/e2e_future_recon/blocks.2.hook_resid_pre/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b01a91187f18e97698d505ebb8ca6268efb6a4348e6d121085869d4985d0b58f
3
+ size 184400
similar_ce_loss/e2e_future_recon/blocks.6.hook_resid_pre/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model_name": "gpt2-small", "model_class_name": "HookedTransformer", "hook_point": "blocks.6.hook_resid_pre", "hook_point_eval": "blocks.{layer}.attn.pattern", "hook_point_layer": 6, "hook_point_head_index": null, "dataset_path": "apollo-research/Skylion007-openwebtext-tokenizer-gpt2", "is_dataset_tokenized": true, "context_size": 128, "use_cached_activations": false, "cached_activations_path": null, "d_in": 768, "d_sae": 46080, "b_dec_init_method": "geometric_median", "expansion_factor": 60, "activation_fn": "relu", "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": null, "apply_b_dec_to_input": false, "decoder_orthogonal_init": true, "n_batches_in_buffer": 8, "training_tokens": 2000000, "finetuning_tokens": 0, "store_batch_size": 16, "train_batch_size": 4096, "device": "mps", "seed": 42, "dtype": "torch.float32", "prepend_bos": false, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 0.001, "lp_norm": 1, "lr": 0.0003, "lr_scheduler_name": "constant", "lr_warm_up_steps": 500, "lr_end": 2.9999999999999997e-05, "lr_decay_steps": 0, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-08, "log_to_wandb": true, "wandb_project": "mats_sae_training_language_model", "run_name": "46080-L1-0.001-LR-0.0003-Tokens-2.000e+06", "wandb_entity": null, "wandb_log_frequency": 10, "n_checkpoints": 0, "checkpoint_path": "2lzle2f0/mlznmn9a", "verbose": false, "model_kwargs": {}, "tokens_per_buffer": 4194304}
similar_ce_loss/e2e_future_recon/blocks.6.hook_resid_pre/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4a74c2c86899a9c2d64692de9526892991bfd1bce777992dbb2822d6e91b919
3
+ size 283487640
similar_ce_loss/e2e_future_recon/blocks.6.hook_resid_pre/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56a99516877c88826ff6da945d53e7ee45c3d531b63c686a421200ab6dccbfe1
3
+ size 184400
similar_ce_loss/local/blocks.10.hook_resid_pre/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model_name": "gpt2-small", "model_class_name": "HookedTransformer", "hook_point": "blocks.10.hook_resid_pre", "hook_point_eval": "blocks.{layer}.attn.pattern", "hook_point_layer": 10, "hook_point_head_index": null, "dataset_path": "apollo-research/Skylion007-openwebtext-tokenizer-gpt2", "is_dataset_tokenized": true, "context_size": 128, "use_cached_activations": false, "cached_activations_path": null, "d_in": 768, "d_sae": 46080, "b_dec_init_method": "geometric_median", "expansion_factor": 60, "activation_fn": "relu", "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": null, "apply_b_dec_to_input": false, "decoder_orthogonal_init": true, "n_batches_in_buffer": 8, "training_tokens": 2000000, "finetuning_tokens": 0, "store_batch_size": 16, "train_batch_size": 4096, "device": "mps", "seed": 42, "dtype": "torch.float32", "prepend_bos": false, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 0.001, "lp_norm": 1, "lr": 0.0003, "lr_scheduler_name": "constant", "lr_warm_up_steps": 500, "lr_end": 2.9999999999999997e-05, "lr_decay_steps": 0, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-08, "log_to_wandb": true, "wandb_project": "mats_sae_training_language_model", "run_name": "46080-L1-0.001-LR-0.0003-Tokens-2.000e+06", "wandb_entity": null, "wandb_log_frequency": 10, "n_checkpoints": 0, "checkpoint_path": "m2hntlav/b473p2g3", "verbose": false, "model_kwargs": {}, "tokens_per_buffer": 4194304}
similar_ce_loss/local/blocks.10.hook_resid_pre/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eecc1c563065435c4aae11c6419e33bc9401d12f409e22b20c28cce4846d0963
3
+ size 283487640
similar_ce_loss/local/blocks.10.hook_resid_pre/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63183693df6bec60f549dd6dabbe6ffecd9a2031673020ef872a644d3e2b2790
3
+ size 184400
similar_ce_loss/local/blocks.2.hook_resid_pre/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model_name": "gpt2-small", "model_class_name": "HookedTransformer", "hook_point": "blocks.2.hook_resid_pre", "hook_point_eval": "blocks.{layer}.attn.pattern", "hook_point_layer": 2, "hook_point_head_index": null, "dataset_path": "apollo-research/Skylion007-openwebtext-tokenizer-gpt2", "is_dataset_tokenized": true, "context_size": 128, "use_cached_activations": false, "cached_activations_path": null, "d_in": 768, "d_sae": 46080, "b_dec_init_method": "geometric_median", "expansion_factor": 60, "activation_fn": "relu", "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": null, "apply_b_dec_to_input": false, "decoder_orthogonal_init": true, "n_batches_in_buffer": 8, "training_tokens": 2000000, "finetuning_tokens": 0, "store_batch_size": 16, "train_batch_size": 4096, "device": "mps", "seed": 42, "dtype": "torch.float32", "prepend_bos": false, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 0.001, "lp_norm": 1, "lr": 0.0003, "lr_scheduler_name": "constant", "lr_warm_up_steps": 500, "lr_end": 2.9999999999999997e-05, "lr_decay_steps": 0, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-08, "log_to_wandb": true, "wandb_project": "mats_sae_training_language_model", "run_name": "46080-L1-0.001-LR-0.0003-Tokens-2.000e+06", "wandb_entity": null, "wandb_log_frequency": 10, "n_checkpoints": 0, "checkpoint_path": "ue3lz0n7/o9axu6et", "verbose": false, "model_kwargs": {}, "tokens_per_buffer": 4194304}
similar_ce_loss/local/blocks.2.hook_resid_pre/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45dbb9966dea058ad3fbe151b2e36e4d291eb937340cda2bbbf47978d0aa56a4
3
+ size 283487640
similar_ce_loss/local/blocks.2.hook_resid_pre/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0e98525b61b5227a7bd7045fc78e6592afb23509488d2ae0da93fb50398feca
3
+ size 184400
similar_ce_loss/local/blocks.6.hook_resid_pre/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model_name": "gpt2-small", "model_class_name": "HookedTransformer", "hook_point": "blocks.6.hook_resid_pre", "hook_point_eval": "blocks.{layer}.attn.pattern", "hook_point_layer": 6, "hook_point_head_index": null, "dataset_path": "apollo-research/Skylion007-openwebtext-tokenizer-gpt2", "is_dataset_tokenized": true, "context_size": 128, "use_cached_activations": false, "cached_activations_path": null, "d_in": 768, "d_sae": 46080, "b_dec_init_method": "geometric_median", "expansion_factor": 60, "activation_fn": "relu", "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": null, "apply_b_dec_to_input": false, "decoder_orthogonal_init": true, "n_batches_in_buffer": 8, "training_tokens": 2000000, "finetuning_tokens": 0, "store_batch_size": 16, "train_batch_size": 4096, "device": "mps", "seed": 42, "dtype": "torch.float32", "prepend_bos": false, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 0.001, "lp_norm": 1, "lr": 0.0003, "lr_scheduler_name": "constant", "lr_warm_up_steps": 500, "lr_end": 2.9999999999999997e-05, "lr_decay_steps": 0, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-08, "log_to_wandb": true, "wandb_project": "mats_sae_training_language_model", "run_name": "46080-L1-0.001-LR-0.0003-Tokens-2.000e+06", "wandb_entity": null, "wandb_log_frequency": 10, "n_checkpoints": 0, "checkpoint_path": "1jy3m5j0/n646m3bg", "verbose": false, "model_kwargs": {}, "tokens_per_buffer": 4194304}
similar_ce_loss/local/blocks.6.hook_resid_pre/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d708046d00bf5158a2bd664d97a827cb135795467fd0feb1069de07287bd26d
3
+ size 283487640
similar_ce_loss/local/blocks.6.hook_resid_pre/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16505950b571e2dfdbb25c22dc5decc7bc683f616216b68d096bacce0ce9fccc
3
+ size 184400