Upload SAEs from experiment 4.3
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/config.json +1 -0
- saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/lr_scheduler.pt +3 -0
- saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.10.mlp/cfg.json +1 -0
- saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.10.mlp/metrics.json +1 -0
- saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.10.mlp/sae.safetensors +3 -0
- saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.10.post_attention_layernorm/cfg.json +1 -0
- saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.10.post_attention_layernorm/metrics.json +1 -0
- saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.10.post_attention_layernorm/sae.safetensors +3 -0
- saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.11.mlp/cfg.json +1 -0
- saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.11.mlp/metrics.json +1 -0
- saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.11.mlp/sae.safetensors +3 -0
- saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.11.post_attention_layernorm/cfg.json +1 -0
- saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.11.post_attention_layernorm/metrics.json +1 -0
- saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.11.post_attention_layernorm/sae.safetensors +3 -0
- saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.12.mlp/cfg.json +1 -0
- saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.12.mlp/metrics.json +1 -0
- saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.12.mlp/sae.safetensors +3 -0
- saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.12.post_attention_layernorm/cfg.json +1 -0
- saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.12.post_attention_layernorm/metrics.json +1 -0
- saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.12.post_attention_layernorm/sae.safetensors +3 -0
- saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.13.mlp/cfg.json +1 -0
- saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.13.mlp/metrics.json +1 -0
- saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.13.mlp/sae.safetensors +3 -0
- saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.13.post_attention_layernorm/cfg.json +1 -0
- saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.13.post_attention_layernorm/metrics.json +1 -0
- saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.13.post_attention_layernorm/sae.safetensors +3 -0
- saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.14.mlp/cfg.json +1 -0
- saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.14.mlp/metrics.json +1 -0
- saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.14.mlp/sae.safetensors +3 -0
- saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.14.post_attention_layernorm/cfg.json +1 -0
- saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.14.post_attention_layernorm/metrics.json +1 -0
- saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.14.post_attention_layernorm/sae.safetensors +3 -0
- saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.15.mlp/cfg.json +1 -0
- saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.15.mlp/metrics.json +1 -0
- saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.15.mlp/sae.safetensors +3 -0
- saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.15.post_attention_layernorm/cfg.json +1 -0
- saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.15.post_attention_layernorm/metrics.json +1 -0
- saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.15.post_attention_layernorm/sae.safetensors +3 -0
- saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.16.mlp/cfg.json +1 -0
- saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.16.mlp/metrics.json +1 -0
- saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.16.mlp/sae.safetensors +3 -0
- saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.16.post_attention_layernorm/cfg.json +1 -0
- saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.16.post_attention_layernorm/metrics.json +1 -0
- saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.16.post_attention_layernorm/sae.safetensors +3 -0
- saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.17.mlp/cfg.json +1 -0
- saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.17.mlp/metrics.json +1 -0
- saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.17.mlp/sae.safetensors +3 -0
- saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.17.post_attention_layernorm/cfg.json +1 -0
- saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.17.post_attention_layernorm/metrics.json +1 -0
- saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.17.post_attention_layernorm/sae.safetensors +3 -0
saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"sae": {"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": false}, "batch_size": 16, "grad_acc_steps": 1, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["model.layers.5.mlp", "model.layers.5.post_attention_layernorm", "model.layers.6.mlp", "model.layers.6.post_attention_layernorm", "model.layers.7.mlp", "model.layers.7.post_attention_layernorm", "model.layers.8.mlp", "model.layers.8.post_attention_layernorm", "model.layers.9.mlp", "model.layers.9.post_attention_layernorm", "model.layers.10.mlp", "model.layers.10.post_attention_layernorm", "model.layers.11.mlp", "model.layers.11.post_attention_layernorm", "model.layers.12.mlp", "model.layers.12.post_attention_layernorm", "model.layers.13.mlp", "model.layers.13.post_attention_layernorm", "model.layers.14.mlp", "model.layers.14.post_attention_layernorm", "model.layers.15.mlp", "model.layers.15.post_attention_layernorm", "model.layers.16.mlp", "model.layers.16.post_attention_layernorm", "model.layers.17.mlp", "model.layers.17.post_attention_layernorm", "model.layers.18.mlp", "model.layers.18.post_attention_layernorm", "model.layers.19.mlp", "model.layers.19.post_attention_layernorm"], "init_seeds": [0], "layers": [], "layer_stride": 1, "transcode": false, "distribute_modules": false, "save_every": 100000, "log_to_wandb": true, "run_name": "saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128", "wandb_log_frequency": 1}
|
saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/lr_scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0c1a4ccded57e94e581be291e5bc725b8c8e883138bcad563998f929ce54ece4
|
| 3 |
+
size 1652
|
saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.10.mlp/cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": false, "d_in": 896}
|
saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.10.mlp/metrics.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"fvu": 0.011143377050757408, "dead_pct": 0.00758928619325161}
|
saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.10.mlp/sae.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:26b70e6030838b0a5139b6d3ad6b6ce3a919ee94dd20ecbca513028fa96adfcc
|
| 3 |
+
size 96395592
|
saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.10.post_attention_layernorm/cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": false, "d_in": 896}
|
saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.10.post_attention_layernorm/metrics.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dead_pct": 0.004017857369035482, "fvu": 0.008762086741626263}
|
saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.10.post_attention_layernorm/sae.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d87920aa769096bb31889dcd100d7760890cb9c65ba7fa49ed4bf74e6ebe798f
|
| 3 |
+
size 96395592
|
saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.11.mlp/cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": false, "d_in": 896}
|
saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.11.mlp/metrics.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"fvu": 0.012227058410644531, "dead_pct": 0.0096726194024086}
|
saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.11.mlp/sae.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:00530f95e7f09543bedf83233ad4adeba0d0adf97a619d8ebed51aa1128077da
|
| 3 |
+
size 96395592
|
saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.11.post_attention_layernorm/cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": false, "d_in": 896}
|
saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.11.post_attention_layernorm/metrics.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dead_pct": 0.00513392873108387, "fvu": 0.010888863354921341}
|
saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.11.post_attention_layernorm/sae.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e7e6a4919d9b0aeb08d3314734ab3ec46a8978780fd60c0dab53c1acaa1b7749
|
| 3 |
+
size 96395592
|
saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.12.mlp/cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": false, "d_in": 896}
|
saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.12.mlp/metrics.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"fvu": 0.013765087351202965, "dead_pct": 0.007663690950721502}
|
saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.12.mlp/sae.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e16c225c2d47b5a6615914d8a4e62c7d863373a0fb078c0cc7ff0b1c6d22e20f
|
| 3 |
+
size 96395592
|
saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.12.post_attention_layernorm/cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": false, "d_in": 896}
|
saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.12.post_attention_layernorm/metrics.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"fvu": 0.010856058448553085, "dead_pct": 0.01004464365541935}
|
saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.12.post_attention_layernorm/sae.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7b266fcccbc2a12baeb5e9c14f7434f4c725e4eee7a7c5a23df51d6a6e256f7b
|
| 3 |
+
size 96395592
|
saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.13.mlp/cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": false, "d_in": 896}
|
saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.13.mlp/metrics.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"fvu": 0.015192780643701553, "dead_pct": 0.0055059525184333324}
|
saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.13.mlp/sae.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fe702ea2a7d8e08337c980bb9eee72d9149c8911166f237721e364b18d250035
|
| 3 |
+
size 96395592
|
saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.13.post_attention_layernorm/cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": false, "d_in": 896}
|
saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.13.post_attention_layernorm/metrics.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"fvu": 0.01202225312590599, "dead_pct": 0.01004464365541935}
|
saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.13.post_attention_layernorm/sae.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5c9c78a5bf72a2681a3c85b450b37d11287976c167a19536b8207101ef6105bd
|
| 3 |
+
size 96395592
|
saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.14.mlp/cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": false, "d_in": 896}
|
saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.14.mlp/metrics.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"fvu": 0.01987934298813343, "dead_pct": 0.0012648809934034944}
|
saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.14.mlp/sae.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a6e3f2d1a8870029e1f3a890637a34c7f65336744156a07a48b7a6c1de1c9117
|
| 3 |
+
size 96395592
|
saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.14.post_attention_layernorm/cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": false, "d_in": 896}
|
saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.14.post_attention_layernorm/metrics.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dead_pct": 0.00907738134264946, "fvu": 0.014239244163036346}
|
saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.14.post_attention_layernorm/sae.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c71198823012d8575bd5aa4ab8d92ccd37da5b9b265a06498777e0b5624ce0ce
|
| 3 |
+
size 96395592
|
saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.15.mlp/cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": false, "d_in": 896}
|
saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.15.mlp/metrics.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dead_pct": 0.0005208333604969084, "fvu": 0.023401465266942978}
|
saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.15.mlp/sae.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:082673861e33fabb4d467840ba2e8598256763dfc348f4647e2760a37cddebbe
|
| 3 |
+
size 96395592
|
saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.15.post_attention_layernorm/cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": false, "d_in": 896}
|
saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.15.post_attention_layernorm/metrics.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"fvu": 0.014060208573937416, "dead_pct": 0.004092262126505375}
|
saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.15.post_attention_layernorm/sae.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:924e63b29c8a439afdd9f18892fbfe0899b1e0d110307cf54a6043b60afadf41
|
| 3 |
+
size 96395592
|
saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.16.mlp/cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": false, "d_in": 896}
|
saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.16.mlp/metrics.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"fvu": 0.016603531315922737, "dead_pct": 0.00022321430151350796}
|
saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.16.mlp/sae.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:14b234d5e11026e3beafd482b1ae7b14571996abc94e5d2b72f92ea83cc74308
|
| 3 |
+
size 96395592
|
saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.16.post_attention_layernorm/cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": false, "d_in": 896}
|
saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.16.post_attention_layernorm/metrics.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"fvu": 0.013535319827497005, "dead_pct": 0.0027529762592166662}
|
saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.16.post_attention_layernorm/sae.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bdd4138f7126c21b3c2c52502c86a18c7b14d5ce75cf66d5a73d2e75b63e7656
|
| 3 |
+
size 96395592
|
saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.17.mlp/cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": false, "d_in": 896}
|
saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.17.mlp/metrics.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dead_pct": 0.0014136905083432794, "fvu": 0.011707901023328304}
|
saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.17.mlp/sae.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5d31d0403c99aeacbb92a2b74e829a0edf56793ff0136d6c6f7f4ace994546c8
|
| 3 |
+
size 96395592
|
saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.17.post_attention_layernorm/cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": false, "d_in": 896}
|
saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.17.post_attention_layernorm/metrics.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"fvu": 0.011492160148918629, "dead_pct": 0.002306547714397311}
|
saes_sql_interp_bm2_cs1_experiment_4.3_syn=True/k=128/model.layers.17.post_attention_layernorm/sae.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:526f3402b3c6843b76fe8e83f1159fe894f13254d252872ff8ae0be67c7ac82b
|
| 3 |
+
size 96395592
|