Abdullah
commited on
Delete saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True
Browse files- saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/config.json +0 -1
- saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/lr_scheduler.pt +0 -3
- saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/model_config.json +0 -1
- saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/optimizer.pt +0 -3
- saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/state.pt +0 -3
- saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.0.attn/cfg.json +0 -1
- saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.0.attn/metrics.json +0 -1
- saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.0.attn/sae.safetensors +0 -3
- saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.0.mlp/cfg.json +0 -1
- saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.0.mlp/metrics.json +0 -1
- saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.0.mlp/sae.safetensors +0 -3
- saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.1.attn/cfg.json +0 -1
- saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.1.attn/metrics.json +0 -1
- saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.1.attn/sae.safetensors +0 -3
- saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.1.mlp/cfg.json +0 -1
- saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.1.mlp/metrics.json +0 -1
- saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.1.mlp/sae.safetensors +0 -3
- saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/config.json +0 -1
- saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/lr_scheduler.pt +0 -3
- saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/model_config.json +0 -1
- saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/optimizer.pt +0 -3
- saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/state.pt +0 -3
- saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/transformer.h.0.attn/cfg.json +0 -1
- saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/transformer.h.0.attn/metrics.json +0 -1
- saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/transformer.h.0.attn/sae.safetensors +0 -3
- saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/transformer.h.0.mlp/cfg.json +0 -1
- saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/transformer.h.0.mlp/metrics.json +0 -1
- saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/transformer.h.0.mlp/sae.safetensors +0 -3
- saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/transformer.h.1.attn/cfg.json +0 -1
- saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/transformer.h.1.attn/metrics.json +0 -1
- saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/transformer.h.1.attn/sae.safetensors +0 -3
- saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/transformer.h.1.mlp/cfg.json +0 -1
- saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/transformer.h.1.mlp/metrics.json +0 -1
- saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/transformer.h.1.mlp/sae.safetensors +0 -3
saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/config.json
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
{"sae": {"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 256, "multi_topk": false, "skip_connection": false}, "batch_size": 16, "grad_acc_steps": 1, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["transformer.h.0.attn", "transformer.h.0.mlp", "transformer.h.1.attn", "transformer.h.1.mlp"], "init_seeds": [0], "layers": [], "layer_stride": 1, "transcode": false, "distribute_modules": false, "save_every": 100000, "log_to_wandb": true, "run_name": "saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256", "wandb_log_frequency": 1}
|
|
|
|
|
|
saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/lr_scheduler.pt
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:81ec4a68d15406a4705b860cc8ccb0cda3b0bf3d809162715b62225325b749eb
|
| 3 |
-
size 1483
|
|
|
|
|
|
|
|
|
|
|
|
saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/model_config.json
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
{"model_name": "withmartian/sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1", "dataset_name": "withmartian/cs5_dataset_synonyms", "model_abbrev": "saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True"}
|
|
|
|
|
|
saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/optimizer.pt
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:70ba185d15c4266961c443120fd44653a290fd0daa91a72ae84e6189b611173e
|
| 3 |
-
size 1007171903
|
|
|
|
|
|
|
|
|
|
|
|
saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/state.pt
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:53ad8da566ec46c2e7d8b4d63924ed1915bd5178e078270f57872c5896e24ff4
|
| 3 |
-
size 493769
|
|
|
|
|
|
|
|
|
|
|
|
saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.0.attn/cfg.json
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
{"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 256, "multi_topk": false, "skip_connection": false, "d_in": 1024}
|
|
|
|
|
|
saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.0.attn/metrics.json
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
{"dead_pct": 0.004882812965661287, "fvu": 0.008325805887579918}
|
|
|
|
|
|
saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.0.attn/sae.safetensors
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:df97c5d6668086e484cdfa5d265334cfae49e09d402050fd290cca89f62d2177
|
| 3 |
-
size 125894984
|
|
|
|
|
|
|
|
|
|
|
|
saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.0.mlp/cfg.json
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
{"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 256, "multi_topk": false, "skip_connection": false, "d_in": 1024}
|
|
|
|
|
|
saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.0.mlp/metrics.json
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
{"dead_pct": 0.0, "fvu": 0.017450330778956413}
|
|
|
|
|
|
saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.0.mlp/sae.safetensors
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:cf130148630f3b36d1d9018438d6b19e3a234dfe8616713ad5b7eddebac6969c
|
| 3 |
-
size 125894984
|
|
|
|
|
|
|
|
|
|
|
|
saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.1.attn/cfg.json
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
{"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 256, "multi_topk": false, "skip_connection": false, "d_in": 1024}
|
|
|
|
|
|
saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.1.attn/metrics.json
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
{"dead_pct": 0.07109375298023224, "fvu": 0.012197243049740791}
|
|
|
|
|
|
saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.1.attn/sae.safetensors
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:72efb21d8889eded524e33a71c8cb4f0335ffd02de87f8999d465f6475058e59
|
| 3 |
-
size 125894984
|
|
|
|
|
|
|
|
|
|
|
|
saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.1.mlp/cfg.json
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
{"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 256, "multi_topk": false, "skip_connection": false, "d_in": 1024}
|
|
|
|
|
|
saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.1.mlp/metrics.json
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
{"dead_pct": 0.0, "fvu": 0.021102389320731163}
|
|
|
|
|
|
saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=256/transformer.h.1.mlp/sae.safetensors
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:2fac135e9f7c8d17fc88afe8e3d8900f172fb7c6097b8e5e9255a40943e0b311
|
| 3 |
-
size 125894984
|
|
|
|
|
|
|
|
|
|
|
|
saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/config.json
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
{"sae": {"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false}, "batch_size": 16, "grad_acc_steps": 1, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["transformer.h.0.attn", "transformer.h.0.mlp", "transformer.h.1.attn", "transformer.h.1.mlp"], "init_seeds": [0], "layers": [], "layer_stride": 1, "transcode": false, "distribute_modules": false, "save_every": 100000, "log_to_wandb": true, "run_name": "saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32", "wandb_log_frequency": 1}
|
|
|
|
|
|
saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/lr_scheduler.pt
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:81ec4a68d15406a4705b860cc8ccb0cda3b0bf3d809162715b62225325b749eb
|
| 3 |
-
size 1483
|
|
|
|
|
|
|
|
|
|
|
|
saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/model_config.json
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
{"model_name": "withmartian/sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1", "dataset_name": "withmartian/cs5_dataset_synonyms", "model_abbrev": "saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True"}
|
|
|
|
|
|
saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/optimizer.pt
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:ac45e9c96ef07c06f7caf5e08e5f381186af424482410a6c5ff310264b2821e6
|
| 3 |
-
size 1007171903
|
|
|
|
|
|
|
|
|
|
|
|
saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/state.pt
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:1a21791269f0132fad465c7879f5c7465f6bca1ab53b1f601f3bcaff84edfa0b
|
| 3 |
-
size 493769
|
|
|
|
|
|
|
|
|
|
|
|
saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/transformer.h.0.attn/cfg.json
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
{"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 1024}
|
|
|
|
|
|
saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/transformer.h.0.attn/metrics.json
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
{"dead_pct": 0.4452474117279053, "fvu": 0.030083920806646347}
|
|
|
|
|
|
saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/transformer.h.0.attn/sae.safetensors
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:17150a5b9107576b97ea47372bd40fd143eb8c2c0f6fcc72675a228d2f168252
|
| 3 |
-
size 125894984
|
|
|
|
|
|
|
|
|
|
|
|
saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/transformer.h.0.mlp/cfg.json
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
{"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 1024}
|
|
|
|
|
|
saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/transformer.h.0.mlp/metrics.json
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
{"dead_pct": 0.07851562649011612, "fvu": 0.05118158459663391}
|
|
|
|
|
|
saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/transformer.h.0.mlp/sae.safetensors
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:dbbc7c87cba60665bed538de3452297c9d0a7458e140edd82ca19de63f861f28
|
| 3 |
-
size 125894984
|
|
|
|
|
|
|
|
|
|
|
|
saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/transformer.h.1.attn/cfg.json
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
{"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 1024}
|
|
|
|
|
|
saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/transformer.h.1.attn/metrics.json
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
{"dead_pct": 0.5203125476837158, "fvu": 0.052874982357025146}
|
|
|
|
|
|
saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/transformer.h.1.attn/sae.safetensors
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:95ced1033fd1128018871f32569f83f15f8b69f4c92f2682a2d2b0395a0b86dd
|
| 3 |
-
size 125894984
|
|
|
|
|
|
|
|
|
|
|
|
saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/transformer.h.1.mlp/cfg.json
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
{"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 1024}
|
|
|
|
|
|
saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/transformer.h.1.mlp/metrics.json
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
{"dead_pct": 0.14557293057441711, "fvu": 0.061192333698272705}
|
|
|
|
|
|
saes_sft_sql_interp_TinyStories-2Layers-33M_cs4_dataset_synonyms_experiment_1.1_syn=True/k=32/transformer.h.1.mlp/sae.safetensors
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:6af71d10ebbe039a91f4c8899d7de0bf27742c76cdcee7b6a7b71219b7ba46ab
|
| 3 |
-
size 125894984
|
|
|
|
|
|
|
|
|
|
|
|