Upload SAE Llama3.2 artifacts
Browse files- config.json +1 -0
- layers.12/cfg.json +1 -0
- layers.12/sae.safetensors +3 -0
- layers.16/cfg.json +1 -0
- layers.16/sae.safetensors +3 -0
- layers.20/cfg.json +1 -0
- layers.20/sae.safetensors +3 -0
- layers.24/cfg.json +1 -0
- layers.24/sae.safetensors +3 -0
- optimizer_0.pt +3 -0
- rank_0_state.pt +3 -0
- state.pt +3 -0
config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"sae": {"activation": "topk", "expansion_factor": 8, "normalize_decoder": true, "num_latents": 0, "k": 64, "multi_topk": false, "skip_connection": false, "transcode": false}, "batch_size": 1, "grad_acc_steps": 4, "micro_acc_steps": 1, "loss_fn": "fvu", "optimizer": "signum", "lr": null, "lr_warmup_steps": 1000, "k_decay_steps": 0, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "exclude_tokens": [], "hookpoints": ["layers.12", "layers.16", "layers.20", "layers.24"], "init_seeds": [0], "layers": [12, 16, 20, 24], "layer_stride": 1, "distribute_modules": false, "save_every": 1000, "save_best": false, "finetune": null, "log_to_wandb": false, "run_name": null, "wandb_log_frequency": 1, "save_dir": "artifacts/sae_llama32_3b", "model": "meta-llama/Llama-3.2-3B-Instruct", "dataset": "roneneldan/TinyStories", "split": "train", "ctx_len": 1024, "hf_token": null, "revision": null, "load_in_8bit": true, "max_examples": 20000, "resume": false, "text_column": "text", "shuffle_seed": 42, "data_preprocessing_num_proc": 16}
|
layers.12/cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"activation": "topk", "expansion_factor": 8, "normalize_decoder": true, "num_latents": 0, "k": 64, "multi_topk": false, "skip_connection": false, "transcode": false, "d_in": 3072}
|
layers.12/sae.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cfbb3d8dfa24d30da2613867a7b8334d668709c8c71ac16593d6f3ef8b8b230c
|
| 3 |
+
size 604090704
|
layers.16/cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"activation": "topk", "expansion_factor": 8, "normalize_decoder": true, "num_latents": 0, "k": 64, "multi_topk": false, "skip_connection": false, "transcode": false, "d_in": 3072}
|
layers.16/sae.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6f8076c17e12091d0ec206e12f73880aad645edf423001dc9f5db67cb49a8e20
|
| 3 |
+
size 604090704
|
layers.20/cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"activation": "topk", "expansion_factor": 8, "normalize_decoder": true, "num_latents": 0, "k": 64, "multi_topk": false, "skip_connection": false, "transcode": false, "d_in": 3072}
|
layers.20/sae.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:78a2c7b3f713e923f3cefa68a7d5c039d4d70fb498629cf48c3fb1be6f2de824
|
| 3 |
+
size 604090704
|
layers.24/cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"activation": "topk", "expansion_factor": 8, "normalize_decoder": true, "num_latents": 0, "k": 64, "multi_topk": false, "skip_connection": false, "transcode": false, "d_in": 3072}
|
layers.24/sae.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a21bd3bb13a9b6f835155d941423c00002723ce479626ed436b757e0eab4300e
|
| 3 |
+
size 604090704
|
optimizer_0.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7a801d075b1ea54e1bc5a58fa179778138dca21f004e00a21cb2bf4154cf3b2a
|
| 3 |
+
size 2416367435
|
rank_0_state.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:78c9712c3e403ff80d29d4d6595746b0eef9861ed0541a13da4fc5a7d1f948b4
|
| 3 |
+
size 789007
|
state.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c5addcbc4b1901c57dbe62c49df4b5a3a2a89b2f96409d93cdd0baebd3151369
|
| 3 |
+
size 1249
|