Abdullah
commited on
Uploading saes for ['transformer.h.0.attn', 'transformer.h.0.mlp', 'transformer.h.1.attn', 'transformer.h.1.mlp'] and sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256
Browse files- saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/config.json +1 -0
- saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/lr_scheduler.pt +3 -0
- saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/model_config.json +1 -0
- saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/optimizer.pt +3 -0
- saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/state.pt +3 -0
- saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.0.attn/cfg.json +1 -0
- saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.0.attn/metrics.json +1 -0
- saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.0.attn/sae.safetensors +3 -0
- saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.0.mlp/cfg.json +1 -0
- saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.0.mlp/metrics.json +1 -0
- saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.0.mlp/sae.safetensors +3 -0
- saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.1.attn/cfg.json +1 -0
- saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.1.attn/metrics.json +1 -0
- saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.1.attn/sae.safetensors +3 -0
- saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.1.mlp/cfg.json +1 -0
- saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.1.mlp/metrics.json +1 -0
- saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.1.mlp/sae.safetensors +3 -0
saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"sae": {"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 256, "multi_topk": false, "skip_connection": false}, "batch_size": 16, "grad_acc_steps": 1, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["transformer.h.0.attn", "transformer.h.0.mlp", "transformer.h.1.attn", "transformer.h.1.mlp"], "init_seeds": [0], "layers": [], "layer_stride": 1, "transcode": false, "distribute_modules": false, "save_every": 100000, "log_to_wandb": true, "run_name": "saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256", "wandb_log_frequency": 1}
|
saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/lr_scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:81ec4a68d15406a4705b860cc8ccb0cda3b0bf3d809162715b62225325b749eb
|
| 3 |
+
size 1483
|
saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/model_config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"model_name": "withmartian/sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1", "dataset_name": "withmartian/cs4_dataset_synonyms", "model_abbrev": "saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True"}
|
saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:27c08febe8aaa44e0c8ec1cf87a44957e8ff6d34dd152fbf6f62eec876e65cf9
|
| 3 |
+
size 1007171903
|
saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/state.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:de2173e3849ec5139924417c0ed5e321018dbe06b6ffa510b6488e2c3cb50a93
|
| 3 |
+
size 493769
|
saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.0.attn/cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 256, "multi_topk": false, "skip_connection": false, "d_in": 1024}
|
saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.0.attn/metrics.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dead_pct": 0.004947917070239782, "fvu": 0.008074119687080383}
|
saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.0.attn/sae.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1d6c378f731f23108d97f975a41be00668644d59444d6adcb7fba60da1aaca27
|
| 3 |
+
size 125894984
|
saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.0.mlp/cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 256, "multi_topk": false, "skip_connection": false, "d_in": 1024}
|
saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.0.mlp/metrics.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dead_pct": 0.0, "fvu": 0.019652528688311577}
|
saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.0.mlp/sae.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fd347a7e925ef9de8e06b4b9b4b120f4d58e749b580bb28e6b8283fca2decb8f
|
| 3 |
+
size 125894984
|
saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.1.attn/cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 256, "multi_topk": false, "skip_connection": false, "d_in": 1024}
|
saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.1.attn/metrics.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dead_pct": 0.0729166716337204, "fvu": 0.013302471488714218}
|
saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.1.attn/sae.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7d67bba14ae54a043626560bed9eba208ede788a9384130f3fd14c99636d1367
|
| 3 |
+
size 125894984
|
saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.1.mlp/cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 256, "multi_topk": false, "skip_connection": false, "d_in": 1024}
|
saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.1.mlp/metrics.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dead_pct": 0.0, "fvu": 0.02290498837828636}
|
saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.1.mlp/sae.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6f991a58558dfc9078b189842c6a423905b03525bb6835400c11177514f225b2
|
| 3 |
+
size 125894984
|