Upload 27 files
Browse files- similar_l0/e2e/blocks.10.hook_resid_pre/cfg.json +1 -0
- similar_l0/e2e/blocks.10.hook_resid_pre/sae_weights.safetensors +3 -0
- similar_l0/e2e/blocks.10.hook_resid_pre/sparsity.safetensors +3 -0
- similar_l0/e2e/blocks.2.hook_resid_pre/cfg.json +1 -0
- similar_l0/e2e/blocks.2.hook_resid_pre/sae_weights.safetensors +3 -0
- similar_l0/e2e/blocks.2.hook_resid_pre/sparsity.safetensors +3 -0
- similar_l0/e2e/blocks.6.hook_resid_pre/cfg.json +1 -0
- similar_l0/e2e/blocks.6.hook_resid_pre/sae_weights.safetensors +3 -0
- similar_l0/e2e/blocks.6.hook_resid_pre/sparsity.safetensors +3 -0
- similar_l0/e2e_future_recon/blocks.10.hook_resid_pre/cfg.json +1 -0
- similar_l0/e2e_future_recon/blocks.10.hook_resid_pre/sae_weights.safetensors +3 -0
- similar_l0/e2e_future_recon/blocks.10.hook_resid_pre/sparsity.safetensors +3 -0
- similar_l0/e2e_future_recon/blocks.2.hook_resid_pre/cfg.json +1 -0
- similar_l0/e2e_future_recon/blocks.2.hook_resid_pre/sae_weights.safetensors +3 -0
- similar_l0/e2e_future_recon/blocks.2.hook_resid_pre/sparsity.safetensors +3 -0
- similar_l0/e2e_future_recon/blocks.6.hook_resid_pre/cfg.json +1 -0
- similar_l0/e2e_future_recon/blocks.6.hook_resid_pre/sae_weights.safetensors +3 -0
- similar_l0/e2e_future_recon/blocks.6.hook_resid_pre/sparsity.safetensors +3 -0
- similar_l0/local/blocks.10.hook_resid_pre/cfg.json +1 -0
- similar_l0/local/blocks.10.hook_resid_pre/sae_weights.safetensors +3 -0
- similar_l0/local/blocks.10.hook_resid_pre/sparsity.safetensors +3 -0
- similar_l0/local/blocks.2.hook_resid_pre/cfg.json +1 -0
- similar_l0/local/blocks.2.hook_resid_pre/sae_weights.safetensors +3 -0
- similar_l0/local/blocks.2.hook_resid_pre/sparsity.safetensors +3 -0
- similar_l0/local/blocks.6.hook_resid_pre/cfg.json +1 -0
- similar_l0/local/blocks.6.hook_resid_pre/sae_weights.safetensors +3 -0
- similar_l0/local/blocks.6.hook_resid_pre/sparsity.safetensors +3 -0
similar_l0/e2e/blocks.10.hook_resid_pre/cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"model_name": "gpt2-small", "model_class_name": "HookedTransformer", "hook_point": "blocks.10.hook_resid_pre", "hook_point_eval": "blocks.{layer}.attn.pattern", "hook_point_layer": 10, "hook_point_head_index": null, "dataset_path": "apollo-research/Skylion007-openwebtext-tokenizer-gpt2", "is_dataset_tokenized": true, "context_size": 128, "use_cached_activations": false, "cached_activations_path": null, "d_in": 768, "d_sae": 46080, "b_dec_init_method": "geometric_median", "expansion_factor": 60, "activation_fn": "relu", "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": null, "apply_b_dec_to_input": false, "decoder_orthogonal_init": true, "n_batches_in_buffer": 8, "training_tokens": 2000000, "finetuning_tokens": 0, "store_batch_size": 16, "train_batch_size": 4096, "device": "mps", "seed": 42, "dtype": "torch.float32", "prepend_bos": false, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 0.001, "lp_norm": 1, "lr": 0.0003, "lr_scheduler_name": "constant", "lr_warm_up_steps": 500, "lr_end": 2.9999999999999997e-05, "lr_decay_steps": 0, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-08, "log_to_wandb": true, "wandb_project": "mats_sae_training_language_model", "run_name": "46080-L1-0.001-LR-0.0003-Tokens-2.000e+06", "wandb_entity": null, "wandb_log_frequency": 10, "n_checkpoints": 0, "checkpoint_path": "8crnit9h/sq1mxo7k", "verbose": false, "model_kwargs": {}, "tokens_per_buffer": 4194304}
|
similar_l0/e2e/blocks.10.hook_resid_pre/sae_weights.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3f3e4ca935edee4bd34adb3fae116cab0101be7c7c78b785cd7e3cc89ea6c5e3
|
| 3 |
+
size 283487640
|
similar_l0/e2e/blocks.10.hook_resid_pre/sparsity.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8dc6e2f4618bbe0cf9ad743715f3ec41023a82df0a33e4e26896451c7d2fabd3
|
| 3 |
+
size 184400
|
similar_l0/e2e/blocks.2.hook_resid_pre/cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"model_name": "gpt2-small", "model_class_name": "HookedTransformer", "hook_point": "blocks.2.hook_resid_pre", "hook_point_eval": "blocks.{layer}.attn.pattern", "hook_point_layer": 2, "hook_point_head_index": null, "dataset_path": "apollo-research/Skylion007-openwebtext-tokenizer-gpt2", "is_dataset_tokenized": true, "context_size": 128, "use_cached_activations": false, "cached_activations_path": null, "d_in": 768, "d_sae": 46080, "b_dec_init_method": "geometric_median", "expansion_factor": 60, "activation_fn": "relu", "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": null, "apply_b_dec_to_input": false, "decoder_orthogonal_init": true, "n_batches_in_buffer": 8, "training_tokens": 2000000, "finetuning_tokens": 0, "store_batch_size": 16, "train_batch_size": 4096, "device": "mps", "seed": 42, "dtype": "torch.float32", "prepend_bos": false, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 0.001, "lp_norm": 1, "lr": 0.0003, "lr_scheduler_name": "constant", "lr_warm_up_steps": 500, "lr_end": 2.9999999999999997e-05, "lr_decay_steps": 0, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-08, "log_to_wandb": true, "wandb_project": "mats_sae_training_language_model", "run_name": "46080-L1-0.001-LR-0.0003-Tokens-2.000e+06", "wandb_entity": null, "wandb_log_frequency": 10, "n_checkpoints": 0, "checkpoint_path": "bst0prdd/m6m60438", "verbose": false, "model_kwargs": {}, "tokens_per_buffer": 4194304}
|
similar_l0/e2e/blocks.2.hook_resid_pre/sae_weights.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5c1a9f2c9ec074c70c2eb4991564d70b747db9cfa45e57fc7f4a163e2b7a098f
|
| 3 |
+
size 283487640
|
similar_l0/e2e/blocks.2.hook_resid_pre/sparsity.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d7e21525fa7f97c81c9b43aa0d6423b63c9f27776bfd2d95d133ec06c611e80e
|
| 3 |
+
size 184400
|
similar_l0/e2e/blocks.6.hook_resid_pre/cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"model_name": "gpt2-small", "model_class_name": "HookedTransformer", "hook_point": "blocks.6.hook_resid_pre", "hook_point_eval": "blocks.{layer}.attn.pattern", "hook_point_layer": 6, "hook_point_head_index": null, "dataset_path": "apollo-research/Skylion007-openwebtext-tokenizer-gpt2", "is_dataset_tokenized": true, "context_size": 128, "use_cached_activations": false, "cached_activations_path": null, "d_in": 768, "d_sae": 46080, "b_dec_init_method": "geometric_median", "expansion_factor": 60, "activation_fn": "relu", "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": null, "apply_b_dec_to_input": false, "decoder_orthogonal_init": true, "n_batches_in_buffer": 8, "training_tokens": 2000000, "finetuning_tokens": 0, "store_batch_size": 16, "train_batch_size": 4096, "device": "mps", "seed": 42, "dtype": "torch.float32", "prepend_bos": false, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 0.001, "lp_norm": 1, "lr": 0.0003, "lr_scheduler_name": "constant", "lr_warm_up_steps": 500, "lr_end": 2.9999999999999997e-05, "lr_decay_steps": 0, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-08, "log_to_wandb": true, "wandb_project": "mats_sae_training_language_model", "run_name": "46080-L1-0.001-LR-0.0003-Tokens-2.000e+06", "wandb_entity": null, "wandb_log_frequency": 10, "n_checkpoints": 0, "checkpoint_path": "tvj2owza/xvkgrq33", "verbose": false, "model_kwargs": {}, "tokens_per_buffer": 4194304}
|
similar_l0/e2e/blocks.6.hook_resid_pre/sae_weights.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:66cc21cd3a0654f949b54a84896f13531fb49cf362debf773e7a581e28857350
|
| 3 |
+
size 283487640
|
similar_l0/e2e/blocks.6.hook_resid_pre/sparsity.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:37c3358ae88dbb41971162df7c7e482d79d1e65e493ebe60c0ed9d0d7be92b17
|
| 3 |
+
size 184400
|
similar_l0/e2e_future_recon/blocks.10.hook_resid_pre/cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"model_name": "gpt2-small", "model_class_name": "HookedTransformer", "hook_point": "blocks.10.hook_resid_pre", "hook_point_eval": "blocks.{layer}.attn.pattern", "hook_point_layer": 10, "hook_point_head_index": null, "dataset_path": "apollo-research/Skylion007-openwebtext-tokenizer-gpt2", "is_dataset_tokenized": true, "context_size": 128, "use_cached_activations": false, "cached_activations_path": null, "d_in": 768, "d_sae": 46080, "b_dec_init_method": "geometric_median", "expansion_factor": 60, "activation_fn": "relu", "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": null, "apply_b_dec_to_input": false, "decoder_orthogonal_init": true, "n_batches_in_buffer": 8, "training_tokens": 2000000, "finetuning_tokens": 0, "store_batch_size": 16, "train_batch_size": 4096, "device": "mps", "seed": 42, "dtype": "torch.float32", "prepend_bos": false, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 0.001, "lp_norm": 1, "lr": 0.0003, "lr_scheduler_name": "constant", "lr_warm_up_steps": 500, "lr_end": 2.9999999999999997e-05, "lr_decay_steps": 0, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-08, "log_to_wandb": true, "wandb_project": "mats_sae_training_language_model", "run_name": "46080-L1-0.001-LR-0.0003-Tokens-2.000e+06", "wandb_entity": null, "wandb_log_frequency": 10, "n_checkpoints": 0, "checkpoint_path": "cvj5um2h/w1wxgb12", "verbose": false, "model_kwargs": {}, "tokens_per_buffer": 4194304}
|
similar_l0/e2e_future_recon/blocks.10.hook_resid_pre/sae_weights.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1e6ec2e7c1b256c788e2e05653af13600d1fe311f0c82e854f504a44f950b11b
|
| 3 |
+
size 283487640
|
similar_l0/e2e_future_recon/blocks.10.hook_resid_pre/sparsity.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f7fe6e7f46e27139e39336bad73cd680e137d74bbcb794e17e5e4df7674a597a
|
| 3 |
+
size 184400
|
similar_l0/e2e_future_recon/blocks.2.hook_resid_pre/cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"model_name": "gpt2-small", "model_class_name": "HookedTransformer", "hook_point": "blocks.2.hook_resid_pre", "hook_point_eval": "blocks.{layer}.attn.pattern", "hook_point_layer": 2, "hook_point_head_index": null, "dataset_path": "apollo-research/Skylion007-openwebtext-tokenizer-gpt2", "is_dataset_tokenized": true, "context_size": 128, "use_cached_activations": false, "cached_activations_path": null, "d_in": 768, "d_sae": 46080, "b_dec_init_method": "geometric_median", "expansion_factor": 60, "activation_fn": "relu", "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": null, "apply_b_dec_to_input": false, "decoder_orthogonal_init": true, "n_batches_in_buffer": 8, "training_tokens": 2000000, "finetuning_tokens": 0, "store_batch_size": 16, "train_batch_size": 4096, "device": "mps", "seed": 42, "dtype": "torch.float32", "prepend_bos": false, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 0.001, "lp_norm": 1, "lr": 0.0003, "lr_scheduler_name": "constant", "lr_warm_up_steps": 500, "lr_end": 2.9999999999999997e-05, "lr_decay_steps": 0, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-08, "log_to_wandb": true, "wandb_project": "mats_sae_training_language_model", "run_name": "46080-L1-0.001-LR-0.0003-Tokens-2.000e+06", "wandb_entity": null, "wandb_log_frequency": 10, "n_checkpoints": 0, "checkpoint_path": "e26jflpq/f0pn20cr", "verbose": false, "model_kwargs": {}, "tokens_per_buffer": 4194304}
|
similar_l0/e2e_future_recon/blocks.2.hook_resid_pre/sae_weights.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5c1a9f2c9ec074c70c2eb4991564d70b747db9cfa45e57fc7f4a163e2b7a098f
|
| 3 |
+
size 283487640
|
similar_l0/e2e_future_recon/blocks.2.hook_resid_pre/sparsity.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:052c5a002f97d69311c24e84235253ba2d659680d206403fd0858beb3532fda8
|
| 3 |
+
size 184400
|
similar_l0/e2e_future_recon/blocks.6.hook_resid_pre/cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"model_name": "gpt2-small", "model_class_name": "HookedTransformer", "hook_point": "blocks.6.hook_resid_pre", "hook_point_eval": "blocks.{layer}.attn.pattern", "hook_point_layer": 6, "hook_point_head_index": null, "dataset_path": "apollo-research/Skylion007-openwebtext-tokenizer-gpt2", "is_dataset_tokenized": true, "context_size": 128, "use_cached_activations": false, "cached_activations_path": null, "d_in": 768, "d_sae": 46080, "b_dec_init_method": "geometric_median", "expansion_factor": 60, "activation_fn": "relu", "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": null, "apply_b_dec_to_input": false, "decoder_orthogonal_init": true, "n_batches_in_buffer": 8, "training_tokens": 2000000, "finetuning_tokens": 0, "store_batch_size": 16, "train_batch_size": 4096, "device": "mps", "seed": 42, "dtype": "torch.float32", "prepend_bos": false, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 0.001, "lp_norm": 1, "lr": 0.0003, "lr_scheduler_name": "constant", "lr_warm_up_steps": 500, "lr_end": 2.9999999999999997e-05, "lr_decay_steps": 0, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-08, "log_to_wandb": true, "wandb_project": "mats_sae_training_language_model", "run_name": "46080-L1-0.001-LR-0.0003-Tokens-2.000e+06", "wandb_entity": null, "wandb_log_frequency": 10, "n_checkpoints": 0, "checkpoint_path": "2lzle2f0/t5h4g1b3", "verbose": false, "model_kwargs": {}, "tokens_per_buffer": 4194304}
|
similar_l0/e2e_future_recon/blocks.6.hook_resid_pre/sae_weights.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:66cc21cd3a0654f949b54a84896f13531fb49cf362debf773e7a581e28857350
|
| 3 |
+
size 283487640
|
similar_l0/e2e_future_recon/blocks.6.hook_resid_pre/sparsity.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8b704a53f7745863882c86f10255df7fa510ffd69f93c562e7714e8ecd0d27df
|
| 3 |
+
size 184400
|
similar_l0/local/blocks.10.hook_resid_pre/cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"model_name": "gpt2-small", "model_class_name": "HookedTransformer", "hook_point": "blocks.10.hook_resid_pre", "hook_point_eval": "blocks.{layer}.attn.pattern", "hook_point_layer": 10, "hook_point_head_index": null, "dataset_path": "apollo-research/Skylion007-openwebtext-tokenizer-gpt2", "is_dataset_tokenized": true, "context_size": 128, "use_cached_activations": false, "cached_activations_path": null, "d_in": 768, "d_sae": 46080, "b_dec_init_method": "geometric_median", "expansion_factor": 60, "activation_fn": "relu", "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": null, "apply_b_dec_to_input": false, "decoder_orthogonal_init": true, "n_batches_in_buffer": 8, "training_tokens": 2000000, "finetuning_tokens": 0, "store_batch_size": 16, "train_batch_size": 4096, "device": "mps", "seed": 42, "dtype": "torch.float32", "prepend_bos": false, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 0.001, "lp_norm": 1, "lr": 0.0003, "lr_scheduler_name": "constant", "lr_warm_up_steps": 500, "lr_end": 2.9999999999999997e-05, "lr_decay_steps": 0, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-08, "log_to_wandb": true, "wandb_project": "mats_sae_training_language_model", "run_name": "46080-L1-0.001-LR-0.0003-Tokens-2.000e+06", "wandb_entity": null, "wandb_log_frequency": 10, "n_checkpoints": 0, "checkpoint_path": "5vmpdgaz/01yl6kmm", "verbose": false, "model_kwargs": {}, "tokens_per_buffer": 4194304}
|
similar_l0/local/blocks.10.hook_resid_pre/sae_weights.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:832f97c0ec34dec8e3dd809aabd4eb563e3a1feee1268f2ff762e9b210770da4
|
| 3 |
+
size 283487640
|
similar_l0/local/blocks.10.hook_resid_pre/sparsity.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ea27bfea10017fd5c5e2b695c8a8e3e25b6dbc0a736cfb8ee8ce4c70ad577f2a
|
| 3 |
+
size 184400
|
similar_l0/local/blocks.2.hook_resid_pre/cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"model_name": "gpt2-small", "model_class_name": "HookedTransformer", "hook_point": "blocks.2.hook_resid_pre", "hook_point_eval": "blocks.{layer}.attn.pattern", "hook_point_layer": 2, "hook_point_head_index": null, "dataset_path": "apollo-research/Skylion007-openwebtext-tokenizer-gpt2", "is_dataset_tokenized": true, "context_size": 128, "use_cached_activations": false, "cached_activations_path": null, "d_in": 768, "d_sae": 46080, "b_dec_init_method": "geometric_median", "expansion_factor": 60, "activation_fn": "relu", "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": null, "apply_b_dec_to_input": false, "decoder_orthogonal_init": true, "n_batches_in_buffer": 8, "training_tokens": 2000000, "finetuning_tokens": 0, "store_batch_size": 16, "train_batch_size": 4096, "device": "mps", "seed": 42, "dtype": "torch.float32", "prepend_bos": false, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 0.001, "lp_norm": 1, "lr": 0.0003, "lr_scheduler_name": "constant", "lr_warm_up_steps": 500, "lr_end": 2.9999999999999997e-05, "lr_decay_steps": 0, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-08, "log_to_wandb": true, "wandb_project": "mats_sae_training_language_model", "run_name": "46080-L1-0.001-LR-0.0003-Tokens-2.000e+06", "wandb_entity": null, "wandb_log_frequency": 10, "n_checkpoints": 0, "checkpoint_path": "6vtk4k51/yv02gptp", "verbose": false, "model_kwargs": {}, "tokens_per_buffer": 4194304}
|
similar_l0/local/blocks.2.hook_resid_pre/sae_weights.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:810be7bbe520724e75a3a67ac6c794f926f7d00a483f4dcb29e33eb50a90d176
|
| 3 |
+
size 283487640
|
similar_l0/local/blocks.2.hook_resid_pre/sparsity.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5b5f56594a67518be9dc68aa991de5e6b73c605331278947a1ce0fe56db66a0f
|
| 3 |
+
size 184400
|
similar_l0/local/blocks.6.hook_resid_pre/cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"model_name": "gpt2-small", "model_class_name": "HookedTransformer", "hook_point": "blocks.6.hook_resid_pre", "hook_point_eval": "blocks.{layer}.attn.pattern", "hook_point_layer": 6, "hook_point_head_index": null, "dataset_path": "apollo-research/Skylion007-openwebtext-tokenizer-gpt2", "is_dataset_tokenized": true, "context_size": 128, "use_cached_activations": false, "cached_activations_path": null, "d_in": 768, "d_sae": 46080, "b_dec_init_method": "geometric_median", "expansion_factor": 60, "activation_fn": "relu", "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": null, "apply_b_dec_to_input": false, "decoder_orthogonal_init": true, "n_batches_in_buffer": 8, "training_tokens": 2000000, "finetuning_tokens": 0, "store_batch_size": 16, "train_batch_size": 4096, "device": "mps", "seed": 42, "dtype": "torch.float32", "prepend_bos": false, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 0.001, "lp_norm": 1, "lr": 0.0003, "lr_scheduler_name": "constant", "lr_warm_up_steps": 500, "lr_end": 2.9999999999999997e-05, "lr_decay_steps": 0, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-08, "log_to_wandb": true, "wandb_project": "mats_sae_training_language_model", "run_name": "46080-L1-0.001-LR-0.0003-Tokens-2.000e+06", "wandb_entity": null, "wandb_log_frequency": 10, "n_checkpoints": 0, "checkpoint_path": "jup3glm9/jjy96cw8", "verbose": false, "model_kwargs": {}, "tokens_per_buffer": 4194304}
|
similar_l0/local/blocks.6.hook_resid_pre/sae_weights.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0bd3d612497583034f7d76499d070906dd192849a64e780f15345546441276d9
|
| 3 |
+
size 283487640
|
similar_l0/local/blocks.6.hook_resid_pre/sparsity.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:24fd6cc946a63906050bc4ea745a74180c695d0a4367b4988b28632068633391
|
| 3 |
+
size 184400
|