Upload SAEs from experiment 4.3
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=128/config.json +1 -0
- saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=128/lr_scheduler.pt +3 -0
- saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=128/model_config.json +1 -0
- saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=128/optimizer.pt +3 -0
- saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=128/state.pt +3 -0
- saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=128/transformer.h.0.attn/cfg.json +1 -0
- saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=128/transformer.h.0.attn/metrics.json +1 -0
- saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=128/transformer.h.0.attn/sae.safetensors +3 -0
- saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=128/transformer.h.0.mlp/cfg.json +1 -0
- saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=128/transformer.h.0.mlp/metrics.json +1 -0
- saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=128/transformer.h.0.mlp/sae.safetensors +3 -0
- saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=128/transformer.h.1.attn/cfg.json +1 -0
- saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=128/transformer.h.1.attn/metrics.json +1 -0
- saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=128/transformer.h.1.attn/sae.safetensors +3 -0
- saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=128/transformer.h.1.mlp/cfg.json +1 -0
- saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=128/transformer.h.1.mlp/metrics.json +1 -0
- saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=128/transformer.h.1.mlp/sae.safetensors +3 -0
- saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=16/config.json +1 -0
- saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=16/lr_scheduler.pt +3 -0
- saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=16/model_config.json +1 -0
- saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=16/optimizer.pt +3 -0
- saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=16/state.pt +3 -0
- saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=16/transformer.h.0.attn/cfg.json +1 -0
- saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=16/transformer.h.0.attn/metrics.json +1 -0
- saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=16/transformer.h.0.attn/sae.safetensors +3 -0
- saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=16/transformer.h.0.mlp/cfg.json +1 -0
- saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=16/transformer.h.0.mlp/metrics.json +1 -0
- saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=16/transformer.h.0.mlp/sae.safetensors +3 -0
- saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=16/transformer.h.1.attn/cfg.json +1 -0
- saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=16/transformer.h.1.attn/metrics.json +1 -0
- saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=16/transformer.h.1.attn/sae.safetensors +3 -0
- saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=16/transformer.h.1.mlp/cfg.json +1 -0
- saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=16/transformer.h.1.mlp/metrics.json +1 -0
- saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=16/transformer.h.1.mlp/sae.safetensors +3 -0
- saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=256/config.json +1 -0
- saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=256/lr_scheduler.pt +3 -0
- saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=256/model_config.json +1 -0
- saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=256/optimizer.pt +3 -0
- saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=256/state.pt +3 -0
- saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=256/transformer.h.0.attn/cfg.json +1 -0
- saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=256/transformer.h.0.attn/metrics.json +1 -0
- saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=256/transformer.h.0.attn/sae.safetensors +3 -0
- saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=256/transformer.h.0.mlp/cfg.json +1 -0
- saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=256/transformer.h.0.mlp/metrics.json +1 -0
- saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=256/transformer.h.0.mlp/sae.safetensors +3 -0
- saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=256/transformer.h.1.attn/cfg.json +1 -0
- saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=256/transformer.h.1.attn/metrics.json +1 -0
- saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=256/transformer.h.1.attn/sae.safetensors +3 -0
- saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=256/transformer.h.1.mlp/cfg.json +1 -0
- saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=256/transformer.h.1.mlp/metrics.json +1 -0
saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=128/config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"sae": {"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": false}, "batch_size": 16, "grad_acc_steps": 1, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["transformer.h.0.attn", "transformer.h.0.mlp", "transformer.h.1.attn", "transformer.h.1.mlp"], "init_seeds": [0], "layers": [], "layer_stride": 1, "transcode": false, "distribute_modules": false, "save_every": 100000, "log_to_wandb": true, "run_name": "saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=128", "wandb_log_frequency": 1}
|
saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=128/lr_scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:afafb111606b64199b463308d054f28acf71d5cca5174e9fd9fce0b0ab0a717c
|
| 3 |
+
size 1076
|
saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=128/model_config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"model_name": "withmartian/sql_interp_bm1_cs3_experiment_3.10", "dataset_name": "withmartian/cs3_dataset_synonyms", "model_abbrev": "saes_sql_interp_bm1_cs3_experiment_3.10_syn=True"}
|
saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=128/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:05bc737c2f583a7284067f762d22f5dc411999b81dc3d208e7366c5aa0cfab12
|
| 3 |
+
size 255767204
|
saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=128/state.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9c556a21b6a0c8685b667ed3d31ea9263f623a5f9294cfa25ef9b7535f142138
|
| 3 |
+
size 493312
|
saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=128/transformer.h.0.attn/cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": false, "d_in": 1024}
|
saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=128/transformer.h.0.attn/metrics.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dead_pct": 0.18613281846046448, "fvu": 0.0024563544429838657}
|
saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=128/transformer.h.0.attn/sae.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9fd936b6c30c1ce6ea74d0cd9327e147cfc20375ed42db7c7234eed4794283ac
|
| 3 |
+
size 125894984
|
saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=128/transformer.h.0.mlp/cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": false, "d_in": 1024}
|
saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=128/transformer.h.0.mlp/metrics.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"fvu": 0.016500595957040787, "dead_pct": 0.00032552084303461015}
|
saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=128/transformer.h.0.mlp/sae.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c304bced17a8af8c3b2ffc00fd849a1de346b67cbb6b0d76de57169d707435bc
|
| 3 |
+
size 125894984
|
saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=128/transformer.h.1.attn/cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": false, "d_in": 1024}
|
saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=128/transformer.h.1.attn/metrics.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"fvu": 0.018058784306049347, "dead_pct": 0.20774739980697632}
|
saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=128/transformer.h.1.attn/sae.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:53833a29f9fbb82ea8e6ff8d993a9bb2ae98a713a068a1678a6c5b71fdb9f04c
|
| 3 |
+
size 125894984
|
saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=128/transformer.h.1.mlp/cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": false, "d_in": 1024}
|
saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=128/transformer.h.1.mlp/metrics.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dead_pct": 0.0014973959187045693, "fvu": 0.026534151285886765}
|
saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=128/transformer.h.1.mlp/sae.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2a447e5988964f955ef0755905f556fd220dd6b8b93baf3b7d21407cb2ba0730
|
| 3 |
+
size 125894984
|
saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=16/config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"sae": {"expansion_factor": 4, "normalize_decoder": true, "num_latents": 0, "k": 16, "multi_topk": false, "skip_connection": false}, "batch_size": 16, "grad_acc_steps": 1, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["transformer.h.0.attn", "transformer.h.0.mlp", "transformer.h.1.attn", "transformer.h.1.mlp"], "init_seeds": [0], "layers": [], "layer_stride": 1, "transcode": false, "distribute_modules": false, "save_every": 100000, "log_to_wandb": true, "run_name": "saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=16", "wandb_log_frequency": 1}
|
saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=16/lr_scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a19ec7a7d99a9517726d577d84860d99635fa1571577af8b6d7ba1c019ae8379
|
| 3 |
+
size 1076
|
saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=16/model_config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"model_name": "withmartian/sql_interp_bm1_cs3_experiment_3.10", "dataset_name": "withmartian/cs3_dataset_synonyms", "model_abbrev": "saes_sql_interp_bm1_cs3_experiment_3.10_syn=True"}
|
saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=16/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4d4d59e4af27494d7a55d71e3027cce99c6490154de521e57d113db32b90e221
|
| 3 |
+
size 68243108
|
saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=16/state.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3642f8e60a9d07d644099d3adcc60d2dedcb4afe7375d80861cda0cfd29ee212
|
| 3 |
+
size 132864
|
saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=16/transformer.h.0.attn/cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"expansion_factor": 4, "normalize_decoder": true, "num_latents": 0, "k": 16, "multi_topk": false, "skip_connection": false, "d_in": 1024}
|
saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=16/transformer.h.0.attn/metrics.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dead_pct": 0.644287109375, "fvu": 0.017582185566425323}
|
saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=16/transformer.h.0.attn/sae.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:26ab021fd45966fe30335feff1fd87662d938d6b72a4aa94e1e931752961e750
|
| 3 |
+
size 33575240
|
saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=16/transformer.h.0.mlp/cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"expansion_factor": 4, "normalize_decoder": true, "num_latents": 0, "k": 16, "multi_topk": false, "skip_connection": false, "d_in": 1024}
|
saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=16/transformer.h.0.mlp/metrics.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dead_pct": 0.233642578125, "fvu": 0.03958441689610481}
|
saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=16/transformer.h.0.mlp/sae.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:07dcceee56acab72bf1a7a053e3f237485dd5adc1faa3b9e5344d425314f1e40
|
| 3 |
+
size 33575240
|
saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=16/transformer.h.1.attn/cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"expansion_factor": 4, "normalize_decoder": true, "num_latents": 0, "k": 16, "multi_topk": false, "skip_connection": false, "d_in": 1024}
|
saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=16/transformer.h.1.attn/metrics.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dead_pct": 0.695556640625, "fvu": 0.0602182000875473}
|
saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=16/transformer.h.1.attn/sae.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d54af82c1e5d6ef611afc3f0e4625ac4da93868716073389f56b1946a8fecb6c
|
| 3 |
+
size 33575240
|
saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=16/transformer.h.1.mlp/cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"expansion_factor": 4, "normalize_decoder": true, "num_latents": 0, "k": 16, "multi_topk": false, "skip_connection": false, "d_in": 1024}
|
saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=16/transformer.h.1.mlp/metrics.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dead_pct": 0.3828125, "fvu": 0.07253600656986237}
|
saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=16/transformer.h.1.mlp/sae.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dc095dd6554a039e301b8a348a3d35c174e46616bd9461b855d56d890ff602a2
|
| 3 |
+
size 33575240
|
saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=256/config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"sae": {"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 256, "multi_topk": false, "skip_connection": false}, "batch_size": 16, "grad_acc_steps": 1, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["transformer.h.0.attn", "transformer.h.0.mlp", "transformer.h.1.attn", "transformer.h.1.mlp"], "init_seeds": [0], "layers": [], "layer_stride": 1, "transcode": false, "distribute_modules": false, "save_every": 100000, "log_to_wandb": true, "run_name": "saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=256", "wandb_log_frequency": 1}
|
saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=256/lr_scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bd74b3fabf7af5e0d248711a1fa05fe261368df7b69f594226dfd4b050dd08b9
|
| 3 |
+
size 1076
|
saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=256/model_config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"model_name": "withmartian/sql_interp_bm1_cs3_experiment_3.10", "dataset_name": "withmartian/cs3_dataset_synonyms", "model_abbrev": "saes_sql_interp_bm1_cs3_experiment_3.10_syn=True"}
|
saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=256/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3af4b7f60df38a89e653cbb346214ec92efe3238f1f83c0506f30f2dcbe08bc1
|
| 3 |
+
size 255767204
|
saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=256/state.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:714abfbb80f4016e75d93e8f7ece32c6f752f64f5640a66e19e998afbd3dfbe9
|
| 3 |
+
size 493312
|
saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=256/transformer.h.0.attn/cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 256, "multi_topk": false, "skip_connection": false, "d_in": 1024}
|
saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=256/transformer.h.0.attn/metrics.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dead_pct": 0.22682292759418488, "fvu": 0.0019393906695768237}
|
saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=256/transformer.h.0.attn/sae.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1cf80c780554996ca1a41445ae88263fdd6a0367bf5f02e2b4bdd6c6339a56e4
|
| 3 |
+
size 125894984
|
saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=256/transformer.h.0.mlp/cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 256, "multi_topk": false, "skip_connection": false, "d_in": 1024}
|
saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=256/transformer.h.0.mlp/metrics.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"fvu": 0.011782245710492134, "dead_pct": 0.0002604166802484542}
|
saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=256/transformer.h.0.mlp/sae.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:32a79c0e4fe0116b797176be12f787629731e7fe0b3aca7f30b27f8528a1ff55
|
| 3 |
+
size 125894984
|
saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=256/transformer.h.1.attn/cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 256, "multi_topk": false, "skip_connection": false, "d_in": 1024}
|
saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=256/transformer.h.1.attn/metrics.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"fvu": 0.008456540293991566, "dead_pct": 0.3470052182674408}
|
saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=256/transformer.h.1.attn/sae.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f27c8e1fb20b9b7f0a2f7beb9eac3fba227ef5328b9811d8331bf6f07524d222
|
| 3 |
+
size 125894984
|
saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=256/transformer.h.1.mlp/cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 256, "multi_topk": false, "skip_connection": false, "d_in": 1024}
|
saes_sql_interp_bm1_cs3_experiment_3.10_syn=True/k=256/transformer.h.1.mlp/metrics.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"fvu": 0.018147287890315056, "dead_pct": 0.0001302083401242271}
|