Abdullah
commited on
Uploading saes for ['transformer.h.0.attn', 'transformer.h.0.mlp', 'transformer.h.1.attn', 'transformer.h.1.mlp'] and sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256
Browse files- saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/config.json +1 -0
- saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/lr_scheduler.pt +3 -0
- saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/model_config.json +1 -0
- saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/optimizer.pt +3 -0
- saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/state.pt +3 -0
- saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.0.attn/cfg.json +1 -0
- saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.0.attn/metrics.json +1 -0
- saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.0.attn/sae.safetensors +3 -0
- saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.0.mlp/cfg.json +1 -0
- saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.0.mlp/metrics.json +1 -0
- saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.0.mlp/sae.safetensors +3 -0
- saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.1.attn/cfg.json +1 -0
- saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.1.attn/metrics.json +1 -0
- saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.1.attn/sae.safetensors +3 -0
- saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.1.mlp/cfg.json +1 -0
- saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.1.mlp/metrics.json +1 -0
- saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.1.mlp/sae.safetensors +3 -0
saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"sae": {"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 256, "multi_topk": false, "skip_connection": false}, "batch_size": 16, "grad_acc_steps": 1, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["transformer.h.0.attn", "transformer.h.0.mlp", "transformer.h.1.attn", "transformer.h.1.mlp"], "init_seeds": [0], "layers": [], "layer_stride": 1, "transcode": false, "distribute_modules": false, "save_every": 100000, "log_to_wandb": true, "run_name": "saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256", "wandb_log_frequency": 1}
|
saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/lr_scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:81ec4a68d15406a4705b860cc8ccb0cda3b0bf3d809162715b62225325b749eb
|
| 3 |
+
size 1483
|
saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/model_config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"model_name": "withmartian/sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1", "dataset_name": "withmartian/cs4_dataset_synonyms", "model_abbrev": "saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True"}
|
saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2e8ac7bb7bd2c9806b08499bad1eb2e67823120e2ed8d5f49a0a9aba2d612f2e
|
| 3 |
+
size 1007171903
|
saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/state.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9664eb80003ed847bda69230d7d0246d0f29aa961e9c2dde10112fb898dd9272
|
| 3 |
+
size 493769
|
saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.0.attn/cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 256, "multi_topk": false, "skip_connection": false, "d_in": 1024}
|
saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.0.attn/metrics.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dead_pct": 0.00533854216337204, "fvu": 0.008540037088096142}
|
saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.0.attn/sae.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dc1a0601d4d4c286ea7aaf6bed5f77dec68ac610cabb07cb39f410a375200639
|
| 3 |
+
size 125894984
|
saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.0.mlp/cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 256, "multi_topk": false, "skip_connection": false, "d_in": 1024}
|
saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.0.mlp/metrics.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dead_pct": 0.0, "fvu": 0.02065226063132286}
|
saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.0.mlp/sae.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:308157e312372ccf9cf099fc30a98f8f8e3730762283d98aa83c0e4e005df334
|
| 3 |
+
size 125894984
|
saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.1.attn/cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 256, "multi_topk": false, "skip_connection": false, "d_in": 1024}
|
saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.1.attn/metrics.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dead_pct": 0.0745442733168602, "fvu": 0.013256488367915154}
|
saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.1.attn/sae.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f3ef1f0fe2543b9726b20b8035b6e761b973973b8d0bb6ca73f3f85865994cb3
|
| 3 |
+
size 125894984
|
saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.1.mlp/cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 256, "multi_topk": false, "skip_connection": false, "d_in": 1024}
|
saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.1.mlp/metrics.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dead_pct": 0.0, "fvu": 0.023422980681061745}
|
saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.1.mlp/sae.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:10a64d5c5852126c79dc802e142b06d344aecfc1fef0543b0d53cc74350391c2
|
| 3 |
+
size 125894984
|