Abdullah commited on
Commit
fb4a8b2
·
verified ·
1 Parent(s): efc87b5

Uploading saes for ['transformer.h.0.attn', 'transformer.h.0.mlp', 'transformer.h.1.attn', 'transformer.h.1.mlp'] and sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32

Browse files
Files changed (17) hide show
  1. saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/config.json +1 -0
  2. saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/lr_scheduler.pt +3 -0
  3. saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/model_config.json +1 -0
  4. saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/optimizer.pt +3 -0
  5. saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/state.pt +3 -0
  6. saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/transformer.h.0.attn/cfg.json +1 -0
  7. saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/transformer.h.0.attn/metrics.json +1 -0
  8. saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/transformer.h.0.attn/sae.safetensors +3 -0
  9. saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/transformer.h.0.mlp/cfg.json +1 -0
  10. saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/transformer.h.0.mlp/metrics.json +1 -0
  11. saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/transformer.h.0.mlp/sae.safetensors +3 -0
  12. saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/transformer.h.1.attn/cfg.json +1 -0
  13. saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/transformer.h.1.attn/metrics.json +1 -0
  14. saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/transformer.h.1.attn/sae.safetensors +3 -0
  15. saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/transformer.h.1.mlp/cfg.json +1 -0
  16. saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/transformer.h.1.mlp/metrics.json +1 -0
  17. saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/transformer.h.1.mlp/sae.safetensors +3 -0
saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sae": {"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false}, "batch_size": 16, "grad_acc_steps": 1, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["transformer.h.0.attn", "transformer.h.0.mlp", "transformer.h.1.attn", "transformer.h.1.mlp"], "init_seeds": [0], "layers": [], "layer_stride": 1, "transcode": false, "distribute_modules": false, "save_every": 100000, "log_to_wandb": true, "run_name": "saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32", "wandb_log_frequency": 1}
saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/lr_scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81ec4a68d15406a4705b860cc8ccb0cda3b0bf3d809162715b62225325b749eb
3
+ size 1483
saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/model_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model_name": "withmartian/sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1", "dataset_name": "withmartian/cs4_dataset_synonyms", "model_abbrev": "saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True"}
saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e18c87e3620a5ac0630b3adbf54139d6f9a9e3dfd67c7cfd908c3e199c54982
3
+ size 1007171903
saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/state.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:215cf7251b6a504602ce7cdc58ea7c94ae3004e0f47ccc52a5cb4c53fcff9bf4
3
+ size 493769
saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/transformer.h.0.attn/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 1024}
saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/transformer.h.0.attn/metrics.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dead_pct": 0.44589847326278687, "fvu": 0.032526373863220215}
saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/transformer.h.0.attn/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc0023c16fded8019ac91847a938e03c85fc915c7a3b71cd2e2d16a8a584e5a4
3
+ size 125894984
saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/transformer.h.0.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 1024}
saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/transformer.h.0.mlp/metrics.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dead_pct": 0.0800781324505806, "fvu": 0.047936663031578064}
saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/transformer.h.0.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0315d691c50eb489a2a37cde089e4f2e471fca9b39f045401d2d300a47c6bb52
3
+ size 125894984
saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/transformer.h.1.attn/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 1024}
saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/transformer.h.1.attn/metrics.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dead_pct": 0.5221354365348816, "fvu": 0.04926695302128792}
saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/transformer.h.1.attn/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4254e0232a874579e761fe9b7142ed048a0deffe08b04dc313c5e4d783bae73
3
+ size 125894984
saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/transformer.h.1.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 1024}
saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/transformer.h.1.mlp/metrics.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dead_pct": 0.1442057341337204, "fvu": 0.05900321900844574}
saes_sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/transformer.h.1.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f270bf9e9db37ca7ae63880af1da5faf64c112c2b85c1198f74296c99a1147f8
3
+ size 125894984