Upload folder using huggingface_hub
Browse files- saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_0/ae.pt +3 -0
- saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_0/config.json +28 -0
- saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_0/eval_results.json +1 -0
- saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_1/ae.pt +3 -0
- saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_1/config.json +28 -0
- saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_1/eval_results.json +1 -0
- saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_10/ae.pt +3 -0
- saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_10/config.json +31 -0
- saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_10/eval_results.json +1 -0
- saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_11/ae.pt +3 -0
- saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_11/config.json +31 -0
- saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_11/eval_results.json +1 -0
- saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_2/ae.pt +3 -0
- saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_2/config.json +28 -0
- saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_2/eval_results.json +1 -0
- saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_3/ae.pt +3 -0
- saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_3/config.json +28 -0
- saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_3/eval_results.json +1 -0
- saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_4/ae.pt +3 -0
- saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_4/config.json +28 -0
- saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_4/eval_results.json +1 -0
- saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_5/ae.pt +3 -0
- saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_5/config.json +28 -0
- saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_5/eval_results.json +1 -0
- saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_6/ae.pt +3 -0
- saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_6/config.json +31 -0
- saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_6/eval_results.json +1 -0
- saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_7/ae.pt +3 -0
- saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_7/config.json +31 -0
- saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_7/eval_results.json +1 -0
- saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_8/ae.pt +3 -0
- saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_8/config.json +31 -0
- saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_8/eval_results.json +1 -0
- saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_9/ae.pt +3 -0
- saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_9/config.json +31 -0
- saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_9/eval_results.json +1 -0
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_0/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1e194665ab0ab01e0dbba0257d197c349196d38431ef466161b0e18c8c022860
|
| 3 |
+
size 469975062
|
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_0/config.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"dict_class": "GatedAutoEncoder",
|
| 4 |
+
"trainer_class": "GatedSAETrainer",
|
| 5 |
+
"activation_dim": 3584,
|
| 6 |
+
"dict_size": 16384,
|
| 7 |
+
"lr": 0.0003,
|
| 8 |
+
"l1_penalty": 0.012,
|
| 9 |
+
"warmup_steps": 1000,
|
| 10 |
+
"sparsity_warmup_steps": 5000,
|
| 11 |
+
"decay_start": 195312,
|
| 12 |
+
"seed": 3407,
|
| 13 |
+
"device": "cuda",
|
| 14 |
+
"layer": 10,
|
| 15 |
+
"lm_name": "Dream-org/Dream-v0-Base-7B",
|
| 16 |
+
"wandb_name": "GatedTrainer-Dream-org/Dream-v0-Base-7B-resid_post_layer_10_trainer_0",
|
| 17 |
+
"submodule_name": "resid_post_layer_10"
|
| 18 |
+
},
|
| 19 |
+
"buffer": {
|
| 20 |
+
"d_submodule": 3584,
|
| 21 |
+
"io": "out",
|
| 22 |
+
"n_ctxs": 122,
|
| 23 |
+
"ctx_len": 2048,
|
| 24 |
+
"refresh_batch_size": 8,
|
| 25 |
+
"out_batch_size": 2048,
|
| 26 |
+
"device": "cuda"
|
| 27 |
+
}
|
| 28 |
+
}
|
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_0/eval_results.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"info": "Diffusion LM detected. AR-only CE/loss-recovered metrics are skipped.", "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
|
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_1/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6fc8b01942eb029e6d5ab1d4dd6b5717196e97e461226e7bf222b6648d2256fc
|
| 3 |
+
size 469975062
|
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_1/config.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"dict_class": "GatedAutoEncoder",
|
| 4 |
+
"trainer_class": "GatedSAETrainer",
|
| 5 |
+
"activation_dim": 3584,
|
| 6 |
+
"dict_size": 16384,
|
| 7 |
+
"lr": 0.0003,
|
| 8 |
+
"l1_penalty": 0.018,
|
| 9 |
+
"warmup_steps": 1000,
|
| 10 |
+
"sparsity_warmup_steps": 5000,
|
| 11 |
+
"decay_start": 195312,
|
| 12 |
+
"seed": 3407,
|
| 13 |
+
"device": "cuda",
|
| 14 |
+
"layer": 10,
|
| 15 |
+
"lm_name": "Dream-org/Dream-v0-Base-7B",
|
| 16 |
+
"wandb_name": "GatedTrainer-Dream-org/Dream-v0-Base-7B-resid_post_layer_10_trainer_1",
|
| 17 |
+
"submodule_name": "resid_post_layer_10"
|
| 18 |
+
},
|
| 19 |
+
"buffer": {
|
| 20 |
+
"d_submodule": 3584,
|
| 21 |
+
"io": "out",
|
| 22 |
+
"n_ctxs": 122,
|
| 23 |
+
"ctx_len": 2048,
|
| 24 |
+
"refresh_batch_size": 8,
|
| 25 |
+
"out_batch_size": 2048,
|
| 26 |
+
"device": "cuda"
|
| 27 |
+
}
|
| 28 |
+
}
|
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_1/eval_results.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"info": "Diffusion LM detected. AR-only CE/loss-recovered metrics are skipped.", "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
|
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_10/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5734865ba376c04ad8fa700a60978556b2743d4ebea7969a3b2037880bf70de5
|
| 3 |
+
size 469843990
|
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_10/config.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "TopKTrainer",
|
| 4 |
+
"dict_class": "AutoEncoderTopK",
|
| 5 |
+
"lr": 0.0003,
|
| 6 |
+
"steps": 244140,
|
| 7 |
+
"auxk_alpha": 0.03125,
|
| 8 |
+
"warmup_steps": 1000,
|
| 9 |
+
"decay_start": 195312,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"seed": 3407,
|
| 13 |
+
"activation_dim": 3584,
|
| 14 |
+
"dict_size": 16384,
|
| 15 |
+
"k": 520,
|
| 16 |
+
"device": "cuda",
|
| 17 |
+
"layer": 10,
|
| 18 |
+
"lm_name": "Dream-org/Dream-v0-Base-7B",
|
| 19 |
+
"wandb_name": "TopKTrainer-Dream-org/Dream-v0-Base-7B-resid_post_layer_10_trainer_10",
|
| 20 |
+
"submodule_name": "resid_post_layer_10"
|
| 21 |
+
},
|
| 22 |
+
"buffer": {
|
| 23 |
+
"d_submodule": 3584,
|
| 24 |
+
"io": "out",
|
| 25 |
+
"n_ctxs": 122,
|
| 26 |
+
"ctx_len": 2048,
|
| 27 |
+
"refresh_batch_size": 8,
|
| 28 |
+
"out_batch_size": 2048,
|
| 29 |
+
"device": "cuda"
|
| 30 |
+
}
|
| 31 |
+
}
|
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_10/eval_results.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"info": "Diffusion LM detected. AR-only CE/loss-recovered metrics are skipped.", "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
|
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_11/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d116fc3fdf485924febb5f1f28a037488b8cc5e07763026d00880e4bb47f481d
|
| 3 |
+
size 469843990
|
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_11/config.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "TopKTrainer",
|
| 4 |
+
"dict_class": "AutoEncoderTopK",
|
| 5 |
+
"lr": 0.0003,
|
| 6 |
+
"steps": 244140,
|
| 7 |
+
"auxk_alpha": 0.03125,
|
| 8 |
+
"warmup_steps": 1000,
|
| 9 |
+
"decay_start": 195312,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"seed": 3407,
|
| 13 |
+
"activation_dim": 3584,
|
| 14 |
+
"dict_size": 16384,
|
| 15 |
+
"k": 820,
|
| 16 |
+
"device": "cuda",
|
| 17 |
+
"layer": 10,
|
| 18 |
+
"lm_name": "Dream-org/Dream-v0-Base-7B",
|
| 19 |
+
"wandb_name": "TopKTrainer-Dream-org/Dream-v0-Base-7B-resid_post_layer_10_trainer_11",
|
| 20 |
+
"submodule_name": "resid_post_layer_10"
|
| 21 |
+
},
|
| 22 |
+
"buffer": {
|
| 23 |
+
"d_submodule": 3584,
|
| 24 |
+
"io": "out",
|
| 25 |
+
"n_ctxs": 122,
|
| 26 |
+
"ctx_len": 2048,
|
| 27 |
+
"refresh_batch_size": 8,
|
| 28 |
+
"out_batch_size": 2048,
|
| 29 |
+
"device": "cuda"
|
| 30 |
+
}
|
| 31 |
+
}
|
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_11/eval_results.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"info": "Diffusion LM detected. AR-only CE/loss-recovered metrics are skipped.", "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
|
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_2/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:73b4c7661d318aaa92245db9036f3cd12e000a2f53ee7177e5d176def40f7c4c
|
| 3 |
+
size 469975062
|
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_2/config.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"dict_class": "GatedAutoEncoder",
|
| 4 |
+
"trainer_class": "GatedSAETrainer",
|
| 5 |
+
"activation_dim": 3584,
|
| 6 |
+
"dict_size": 16384,
|
| 7 |
+
"lr": 0.0003,
|
| 8 |
+
"l1_penalty": 0.024,
|
| 9 |
+
"warmup_steps": 1000,
|
| 10 |
+
"sparsity_warmup_steps": 5000,
|
| 11 |
+
"decay_start": 195312,
|
| 12 |
+
"seed": 3407,
|
| 13 |
+
"device": "cuda",
|
| 14 |
+
"layer": 10,
|
| 15 |
+
"lm_name": "Dream-org/Dream-v0-Base-7B",
|
| 16 |
+
"wandb_name": "GatedTrainer-Dream-org/Dream-v0-Base-7B-resid_post_layer_10_trainer_2",
|
| 17 |
+
"submodule_name": "resid_post_layer_10"
|
| 18 |
+
},
|
| 19 |
+
"buffer": {
|
| 20 |
+
"d_submodule": 3584,
|
| 21 |
+
"io": "out",
|
| 22 |
+
"n_ctxs": 122,
|
| 23 |
+
"ctx_len": 2048,
|
| 24 |
+
"refresh_batch_size": 8,
|
| 25 |
+
"out_batch_size": 2048,
|
| 26 |
+
"device": "cuda"
|
| 27 |
+
}
|
| 28 |
+
}
|
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_2/eval_results.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"info": "Diffusion LM detected. AR-only CE/loss-recovered metrics are skipped.", "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
|
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_3/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a65475230842ce2c280241d7a8cb2124ec4b7cdfdf20533810ae8c71553cc596
|
| 3 |
+
size 469975062
|
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_3/config.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"dict_class": "GatedAutoEncoder",
|
| 4 |
+
"trainer_class": "GatedSAETrainer",
|
| 5 |
+
"activation_dim": 3584,
|
| 6 |
+
"dict_size": 16384,
|
| 7 |
+
"lr": 0.0003,
|
| 8 |
+
"l1_penalty": 0.04,
|
| 9 |
+
"warmup_steps": 1000,
|
| 10 |
+
"sparsity_warmup_steps": 5000,
|
| 11 |
+
"decay_start": 195312,
|
| 12 |
+
"seed": 3407,
|
| 13 |
+
"device": "cuda",
|
| 14 |
+
"layer": 10,
|
| 15 |
+
"lm_name": "Dream-org/Dream-v0-Base-7B",
|
| 16 |
+
"wandb_name": "GatedTrainer-Dream-org/Dream-v0-Base-7B-resid_post_layer_10_trainer_3",
|
| 17 |
+
"submodule_name": "resid_post_layer_10"
|
| 18 |
+
},
|
| 19 |
+
"buffer": {
|
| 20 |
+
"d_submodule": 3584,
|
| 21 |
+
"io": "out",
|
| 22 |
+
"n_ctxs": 122,
|
| 23 |
+
"ctx_len": 2048,
|
| 24 |
+
"refresh_batch_size": 8,
|
| 25 |
+
"out_batch_size": 2048,
|
| 26 |
+
"device": "cuda"
|
| 27 |
+
}
|
| 28 |
+
}
|
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_3/eval_results.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"info": "Diffusion LM detected. AR-only CE/loss-recovered metrics are skipped.", "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
|
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_4/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7627d04102202588f7c59ae6fac0db0ab9e6eb1f14b1ae927ec28eb1c0cbb762
|
| 3 |
+
size 469975062
|
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_4/config.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"dict_class": "GatedAutoEncoder",
|
| 4 |
+
"trainer_class": "GatedSAETrainer",
|
| 5 |
+
"activation_dim": 3584,
|
| 6 |
+
"dict_size": 16384,
|
| 7 |
+
"lr": 0.0003,
|
| 8 |
+
"l1_penalty": 0.06,
|
| 9 |
+
"warmup_steps": 1000,
|
| 10 |
+
"sparsity_warmup_steps": 5000,
|
| 11 |
+
"decay_start": 195312,
|
| 12 |
+
"seed": 3407,
|
| 13 |
+
"device": "cuda",
|
| 14 |
+
"layer": 10,
|
| 15 |
+
"lm_name": "Dream-org/Dream-v0-Base-7B",
|
| 16 |
+
"wandb_name": "GatedTrainer-Dream-org/Dream-v0-Base-7B-resid_post_layer_10_trainer_4",
|
| 17 |
+
"submodule_name": "resid_post_layer_10"
|
| 18 |
+
},
|
| 19 |
+
"buffer": {
|
| 20 |
+
"d_submodule": 3584,
|
| 21 |
+
"io": "out",
|
| 22 |
+
"n_ctxs": 122,
|
| 23 |
+
"ctx_len": 2048,
|
| 24 |
+
"refresh_batch_size": 8,
|
| 25 |
+
"out_batch_size": 2048,
|
| 26 |
+
"device": "cuda"
|
| 27 |
+
}
|
| 28 |
+
}
|
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_4/eval_results.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"info": "Diffusion LM detected. AR-only CE/loss-recovered metrics are skipped.", "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
|
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_5/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c2cb8f9cad0579a773fc0a48025a89fe47205445aad4da293da7b51f553735c3
|
| 3 |
+
size 469975062
|
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_5/config.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"dict_class": "GatedAutoEncoder",
|
| 4 |
+
"trainer_class": "GatedSAETrainer",
|
| 5 |
+
"activation_dim": 3584,
|
| 6 |
+
"dict_size": 16384,
|
| 7 |
+
"lr": 0.0003,
|
| 8 |
+
"l1_penalty": 0.08,
|
| 9 |
+
"warmup_steps": 1000,
|
| 10 |
+
"sparsity_warmup_steps": 5000,
|
| 11 |
+
"decay_start": 195312,
|
| 12 |
+
"seed": 3407,
|
| 13 |
+
"device": "cuda",
|
| 14 |
+
"layer": 10,
|
| 15 |
+
"lm_name": "Dream-org/Dream-v0-Base-7B",
|
| 16 |
+
"wandb_name": "GatedTrainer-Dream-org/Dream-v0-Base-7B-resid_post_layer_10_trainer_5",
|
| 17 |
+
"submodule_name": "resid_post_layer_10"
|
| 18 |
+
},
|
| 19 |
+
"buffer": {
|
| 20 |
+
"d_submodule": 3584,
|
| 21 |
+
"io": "out",
|
| 22 |
+
"n_ctxs": 122,
|
| 23 |
+
"ctx_len": 2048,
|
| 24 |
+
"refresh_batch_size": 8,
|
| 25 |
+
"out_batch_size": 2048,
|
| 26 |
+
"device": "cuda"
|
| 27 |
+
}
|
| 28 |
+
}
|
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_5/eval_results.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"info": "Diffusion LM detected. AR-only CE/loss-recovered metrics are skipped.", "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
|
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_6/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:817f7e909f973725af379f1c57af2596b5595793726141bae8b48eee83a34c72
|
| 3 |
+
size 469843990
|
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_6/config.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "TopKTrainer",
|
| 4 |
+
"dict_class": "AutoEncoderTopK",
|
| 5 |
+
"lr": 0.0003,
|
| 6 |
+
"steps": 244140,
|
| 7 |
+
"auxk_alpha": 0.03125,
|
| 8 |
+
"warmup_steps": 1000,
|
| 9 |
+
"decay_start": 195312,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"seed": 3407,
|
| 13 |
+
"activation_dim": 3584,
|
| 14 |
+
"dict_size": 16384,
|
| 15 |
+
"k": 50,
|
| 16 |
+
"device": "cuda",
|
| 17 |
+
"layer": 10,
|
| 18 |
+
"lm_name": "Dream-org/Dream-v0-Base-7B",
|
| 19 |
+
"wandb_name": "TopKTrainer-Dream-org/Dream-v0-Base-7B-resid_post_layer_10_trainer_6",
|
| 20 |
+
"submodule_name": "resid_post_layer_10"
|
| 21 |
+
},
|
| 22 |
+
"buffer": {
|
| 23 |
+
"d_submodule": 3584,
|
| 24 |
+
"io": "out",
|
| 25 |
+
"n_ctxs": 122,
|
| 26 |
+
"ctx_len": 2048,
|
| 27 |
+
"refresh_batch_size": 8,
|
| 28 |
+
"out_batch_size": 2048,
|
| 29 |
+
"device": "cuda"
|
| 30 |
+
}
|
| 31 |
+
}
|
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_6/eval_results.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"info": "Diffusion LM detected. AR-only CE/loss-recovered metrics are skipped.", "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
|
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_7/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ada562dfd95422f45268afa181b0db3627356234c443d0880d926120ed829886
|
| 3 |
+
size 469843990
|
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_7/config.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "TopKTrainer",
|
| 4 |
+
"dict_class": "AutoEncoderTopK",
|
| 5 |
+
"lr": 0.0003,
|
| 6 |
+
"steps": 244140,
|
| 7 |
+
"auxk_alpha": 0.03125,
|
| 8 |
+
"warmup_steps": 1000,
|
| 9 |
+
"decay_start": 195312,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"seed": 3407,
|
| 13 |
+
"activation_dim": 3584,
|
| 14 |
+
"dict_size": 16384,
|
| 15 |
+
"k": 80,
|
| 16 |
+
"device": "cuda",
|
| 17 |
+
"layer": 10,
|
| 18 |
+
"lm_name": "Dream-org/Dream-v0-Base-7B",
|
| 19 |
+
"wandb_name": "TopKTrainer-Dream-org/Dream-v0-Base-7B-resid_post_layer_10_trainer_7",
|
| 20 |
+
"submodule_name": "resid_post_layer_10"
|
| 21 |
+
},
|
| 22 |
+
"buffer": {
|
| 23 |
+
"d_submodule": 3584,
|
| 24 |
+
"io": "out",
|
| 25 |
+
"n_ctxs": 122,
|
| 26 |
+
"ctx_len": 2048,
|
| 27 |
+
"refresh_batch_size": 8,
|
| 28 |
+
"out_batch_size": 2048,
|
| 29 |
+
"device": "cuda"
|
| 30 |
+
}
|
| 31 |
+
}
|
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_7/eval_results.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"info": "Diffusion LM detected. AR-only CE/loss-recovered metrics are skipped.", "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
|
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_8/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bbc4ffeffa5ed600ad2ca933dd473b8375c48ad8dae2421fac74ce7154e8a3c2
|
| 3 |
+
size 469843990
|
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_8/config.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "TopKTrainer",
|
| 4 |
+
"dict_class": "AutoEncoderTopK",
|
| 5 |
+
"lr": 0.0003,
|
| 6 |
+
"steps": 244140,
|
| 7 |
+
"auxk_alpha": 0.03125,
|
| 8 |
+
"warmup_steps": 1000,
|
| 9 |
+
"decay_start": 195312,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"seed": 3407,
|
| 13 |
+
"activation_dim": 3584,
|
| 14 |
+
"dict_size": 16384,
|
| 15 |
+
"k": 160,
|
| 16 |
+
"device": "cuda",
|
| 17 |
+
"layer": 10,
|
| 18 |
+
"lm_name": "Dream-org/Dream-v0-Base-7B",
|
| 19 |
+
"wandb_name": "TopKTrainer-Dream-org/Dream-v0-Base-7B-resid_post_layer_10_trainer_8",
|
| 20 |
+
"submodule_name": "resid_post_layer_10"
|
| 21 |
+
},
|
| 22 |
+
"buffer": {
|
| 23 |
+
"d_submodule": 3584,
|
| 24 |
+
"io": "out",
|
| 25 |
+
"n_ctxs": 122,
|
| 26 |
+
"ctx_len": 2048,
|
| 27 |
+
"refresh_batch_size": 8,
|
| 28 |
+
"out_batch_size": 2048,
|
| 29 |
+
"device": "cuda"
|
| 30 |
+
}
|
| 31 |
+
}
|
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_8/eval_results.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"info": "Diffusion LM detected. AR-only CE/loss-recovered metrics are skipped.", "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
|
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_9/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:51722fe186f504360a1d44c5669f558c3a4b7b5c747c9c399ec5d27e5e6dcec0
|
| 3 |
+
size 469843990
|
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_9/config.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "TopKTrainer",
|
| 4 |
+
"dict_class": "AutoEncoderTopK",
|
| 5 |
+
"lr": 0.0003,
|
| 6 |
+
"steps": 244140,
|
| 7 |
+
"auxk_alpha": 0.03125,
|
| 8 |
+
"warmup_steps": 1000,
|
| 9 |
+
"decay_start": 195312,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"seed": 3407,
|
| 13 |
+
"activation_dim": 3584,
|
| 14 |
+
"dict_size": 16384,
|
| 15 |
+
"k": 320,
|
| 16 |
+
"device": "cuda",
|
| 17 |
+
"layer": 10,
|
| 18 |
+
"lm_name": "Dream-org/Dream-v0-Base-7B",
|
| 19 |
+
"wandb_name": "TopKTrainer-Dream-org/Dream-v0-Base-7B-resid_post_layer_10_trainer_9",
|
| 20 |
+
"submodule_name": "resid_post_layer_10"
|
| 21 |
+
},
|
| 22 |
+
"buffer": {
|
| 23 |
+
"d_submodule": 3584,
|
| 24 |
+
"io": "out",
|
| 25 |
+
"n_ctxs": 122,
|
| 26 |
+
"ctx_len": 2048,
|
| 27 |
+
"refresh_batch_size": 8,
|
| 28 |
+
"out_batch_size": 2048,
|
| 29 |
+
"device": "cuda"
|
| 30 |
+
}
|
| 31 |
+
}
|
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_9/eval_results.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"info": "Diffusion LM detected. AR-only CE/loss-recovered metrics are skipped.", "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
|