AwesomeInterpretability commited on
Commit
c25c23b
·
verified ·
1 Parent(s): c12c001

Upload folder using huggingface_hub

Browse files
Files changed (36) hide show
  1. saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_0/ae.pt +3 -0
  2. saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_0/config.json +28 -0
  3. saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_0/eval_results.json +1 -0
  4. saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_1/ae.pt +3 -0
  5. saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_1/config.json +28 -0
  6. saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_1/eval_results.json +1 -0
  7. saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_10/ae.pt +3 -0
  8. saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_10/config.json +31 -0
  9. saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_10/eval_results.json +1 -0
  10. saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_11/ae.pt +3 -0
  11. saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_11/config.json +31 -0
  12. saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_11/eval_results.json +1 -0
  13. saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_2/ae.pt +3 -0
  14. saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_2/config.json +28 -0
  15. saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_2/eval_results.json +1 -0
  16. saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_3/ae.pt +3 -0
  17. saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_3/config.json +28 -0
  18. saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_3/eval_results.json +1 -0
  19. saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_4/ae.pt +3 -0
  20. saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_4/config.json +28 -0
  21. saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_4/eval_results.json +1 -0
  22. saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_5/ae.pt +3 -0
  23. saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_5/config.json +28 -0
  24. saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_5/eval_results.json +1 -0
  25. saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_6/ae.pt +3 -0
  26. saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_6/config.json +31 -0
  27. saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_6/eval_results.json +1 -0
  28. saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_7/ae.pt +3 -0
  29. saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_7/config.json +31 -0
  30. saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_7/eval_results.json +1 -0
  31. saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_8/ae.pt +3 -0
  32. saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_8/config.json +31 -0
  33. saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_8/eval_results.json +1 -0
  34. saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_9/ae.pt +3 -0
  35. saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_9/config.json +31 -0
  36. saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_9/eval_results.json +1 -0
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e194665ab0ab01e0dbba0257d197c349196d38431ef466161b0e18c8c022860
3
+ size 469975062
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_0/config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "GatedAutoEncoder",
4
+ "trainer_class": "GatedSAETrainer",
5
+ "activation_dim": 3584,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.012,
9
+ "warmup_steps": 1000,
10
+ "sparsity_warmup_steps": 5000,
11
+ "decay_start": 195312,
12
+ "seed": 3407,
13
+ "device": "cuda",
14
+ "layer": 10,
15
+ "lm_name": "Dream-org/Dream-v0-Base-7B",
16
+ "wandb_name": "GatedTrainer-Dream-org/Dream-v0-Base-7B-resid_post_layer_10_trainer_0",
17
+ "submodule_name": "resid_post_layer_10"
18
+ },
19
+ "buffer": {
20
+ "d_submodule": 3584,
21
+ "io": "out",
22
+ "n_ctxs": 122,
23
+ "ctx_len": 2048,
24
+ "refresh_batch_size": 8,
25
+ "out_batch_size": 2048,
26
+ "device": "cuda"
27
+ }
28
+ }
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_0/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"info": "Diffusion LM detected. AR-only CE/loss-recovered metrics are skipped.", "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_1/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fc8b01942eb029e6d5ab1d4dd6b5717196e97e461226e7bf222b6648d2256fc
3
+ size 469975062
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_1/config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "GatedAutoEncoder",
4
+ "trainer_class": "GatedSAETrainer",
5
+ "activation_dim": 3584,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.018,
9
+ "warmup_steps": 1000,
10
+ "sparsity_warmup_steps": 5000,
11
+ "decay_start": 195312,
12
+ "seed": 3407,
13
+ "device": "cuda",
14
+ "layer": 10,
15
+ "lm_name": "Dream-org/Dream-v0-Base-7B",
16
+ "wandb_name": "GatedTrainer-Dream-org/Dream-v0-Base-7B-resid_post_layer_10_trainer_1",
17
+ "submodule_name": "resid_post_layer_10"
18
+ },
19
+ "buffer": {
20
+ "d_submodule": 3584,
21
+ "io": "out",
22
+ "n_ctxs": 122,
23
+ "ctx_len": 2048,
24
+ "refresh_batch_size": 8,
25
+ "out_batch_size": 2048,
26
+ "device": "cuda"
27
+ }
28
+ }
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_1/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"info": "Diffusion LM detected. AR-only CE/loss-recovered metrics are skipped.", "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_10/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5734865ba376c04ad8fa700a60978556b2743d4ebea7969a3b2037880bf70de5
3
+ size 469843990
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_10/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TopKTrainer",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 195312,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "seed": 3407,
13
+ "activation_dim": 3584,
14
+ "dict_size": 16384,
15
+ "k": 520,
16
+ "device": "cuda",
17
+ "layer": 10,
18
+ "lm_name": "Dream-org/Dream-v0-Base-7B",
19
+ "wandb_name": "TopKTrainer-Dream-org/Dream-v0-Base-7B-resid_post_layer_10_trainer_10",
20
+ "submodule_name": "resid_post_layer_10"
21
+ },
22
+ "buffer": {
23
+ "d_submodule": 3584,
24
+ "io": "out",
25
+ "n_ctxs": 122,
26
+ "ctx_len": 2048,
27
+ "refresh_batch_size": 8,
28
+ "out_batch_size": 2048,
29
+ "device": "cuda"
30
+ }
31
+ }
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_10/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"info": "Diffusion LM detected. AR-only CE/loss-recovered metrics are skipped.", "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_11/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d116fc3fdf485924febb5f1f28a037488b8cc5e07763026d00880e4bb47f481d
3
+ size 469843990
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_11/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TopKTrainer",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 195312,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "seed": 3407,
13
+ "activation_dim": 3584,
14
+ "dict_size": 16384,
15
+ "k": 820,
16
+ "device": "cuda",
17
+ "layer": 10,
18
+ "lm_name": "Dream-org/Dream-v0-Base-7B",
19
+ "wandb_name": "TopKTrainer-Dream-org/Dream-v0-Base-7B-resid_post_layer_10_trainer_11",
20
+ "submodule_name": "resid_post_layer_10"
21
+ },
22
+ "buffer": {
23
+ "d_submodule": 3584,
24
+ "io": "out",
25
+ "n_ctxs": 122,
26
+ "ctx_len": 2048,
27
+ "refresh_batch_size": 8,
28
+ "out_batch_size": 2048,
29
+ "device": "cuda"
30
+ }
31
+ }
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_11/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"info": "Diffusion LM detected. AR-only CE/loss-recovered metrics are skipped.", "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_2/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73b4c7661d318aaa92245db9036f3cd12e000a2f53ee7177e5d176def40f7c4c
3
+ size 469975062
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_2/config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "GatedAutoEncoder",
4
+ "trainer_class": "GatedSAETrainer",
5
+ "activation_dim": 3584,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.024,
9
+ "warmup_steps": 1000,
10
+ "sparsity_warmup_steps": 5000,
11
+ "decay_start": 195312,
12
+ "seed": 3407,
13
+ "device": "cuda",
14
+ "layer": 10,
15
+ "lm_name": "Dream-org/Dream-v0-Base-7B",
16
+ "wandb_name": "GatedTrainer-Dream-org/Dream-v0-Base-7B-resid_post_layer_10_trainer_2",
17
+ "submodule_name": "resid_post_layer_10"
18
+ },
19
+ "buffer": {
20
+ "d_submodule": 3584,
21
+ "io": "out",
22
+ "n_ctxs": 122,
23
+ "ctx_len": 2048,
24
+ "refresh_batch_size": 8,
25
+ "out_batch_size": 2048,
26
+ "device": "cuda"
27
+ }
28
+ }
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_2/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"info": "Diffusion LM detected. AR-only CE/loss-recovered metrics are skipped.", "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_3/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a65475230842ce2c280241d7a8cb2124ec4b7cdfdf20533810ae8c71553cc596
3
+ size 469975062
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_3/config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "GatedAutoEncoder",
4
+ "trainer_class": "GatedSAETrainer",
5
+ "activation_dim": 3584,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.04,
9
+ "warmup_steps": 1000,
10
+ "sparsity_warmup_steps": 5000,
11
+ "decay_start": 195312,
12
+ "seed": 3407,
13
+ "device": "cuda",
14
+ "layer": 10,
15
+ "lm_name": "Dream-org/Dream-v0-Base-7B",
16
+ "wandb_name": "GatedTrainer-Dream-org/Dream-v0-Base-7B-resid_post_layer_10_trainer_3",
17
+ "submodule_name": "resid_post_layer_10"
18
+ },
19
+ "buffer": {
20
+ "d_submodule": 3584,
21
+ "io": "out",
22
+ "n_ctxs": 122,
23
+ "ctx_len": 2048,
24
+ "refresh_batch_size": 8,
25
+ "out_batch_size": 2048,
26
+ "device": "cuda"
27
+ }
28
+ }
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_3/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"info": "Diffusion LM detected. AR-only CE/loss-recovered metrics are skipped.", "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_4/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7627d04102202588f7c59ae6fac0db0ab9e6eb1f14b1ae927ec28eb1c0cbb762
3
+ size 469975062
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_4/config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "GatedAutoEncoder",
4
+ "trainer_class": "GatedSAETrainer",
5
+ "activation_dim": 3584,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.06,
9
+ "warmup_steps": 1000,
10
+ "sparsity_warmup_steps": 5000,
11
+ "decay_start": 195312,
12
+ "seed": 3407,
13
+ "device": "cuda",
14
+ "layer": 10,
15
+ "lm_name": "Dream-org/Dream-v0-Base-7B",
16
+ "wandb_name": "GatedTrainer-Dream-org/Dream-v0-Base-7B-resid_post_layer_10_trainer_4",
17
+ "submodule_name": "resid_post_layer_10"
18
+ },
19
+ "buffer": {
20
+ "d_submodule": 3584,
21
+ "io": "out",
22
+ "n_ctxs": 122,
23
+ "ctx_len": 2048,
24
+ "refresh_batch_size": 8,
25
+ "out_batch_size": 2048,
26
+ "device": "cuda"
27
+ }
28
+ }
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_4/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"info": "Diffusion LM detected. AR-only CE/loss-recovered metrics are skipped.", "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_5/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2cb8f9cad0579a773fc0a48025a89fe47205445aad4da293da7b51f553735c3
3
+ size 469975062
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_5/config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "GatedAutoEncoder",
4
+ "trainer_class": "GatedSAETrainer",
5
+ "activation_dim": 3584,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.08,
9
+ "warmup_steps": 1000,
10
+ "sparsity_warmup_steps": 5000,
11
+ "decay_start": 195312,
12
+ "seed": 3407,
13
+ "device": "cuda",
14
+ "layer": 10,
15
+ "lm_name": "Dream-org/Dream-v0-Base-7B",
16
+ "wandb_name": "GatedTrainer-Dream-org/Dream-v0-Base-7B-resid_post_layer_10_trainer_5",
17
+ "submodule_name": "resid_post_layer_10"
18
+ },
19
+ "buffer": {
20
+ "d_submodule": 3584,
21
+ "io": "out",
22
+ "n_ctxs": 122,
23
+ "ctx_len": 2048,
24
+ "refresh_batch_size": 8,
25
+ "out_batch_size": 2048,
26
+ "device": "cuda"
27
+ }
28
+ }
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_5/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"info": "Diffusion LM detected. AR-only CE/loss-recovered metrics are skipped.", "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_6/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:817f7e909f973725af379f1c57af2596b5595793726141bae8b48eee83a34c72
3
+ size 469843990
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_6/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TopKTrainer",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 195312,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "seed": 3407,
13
+ "activation_dim": 3584,
14
+ "dict_size": 16384,
15
+ "k": 50,
16
+ "device": "cuda",
17
+ "layer": 10,
18
+ "lm_name": "Dream-org/Dream-v0-Base-7B",
19
+ "wandb_name": "TopKTrainer-Dream-org/Dream-v0-Base-7B-resid_post_layer_10_trainer_6",
20
+ "submodule_name": "resid_post_layer_10"
21
+ },
22
+ "buffer": {
23
+ "d_submodule": 3584,
24
+ "io": "out",
25
+ "n_ctxs": 122,
26
+ "ctx_len": 2048,
27
+ "refresh_batch_size": 8,
28
+ "out_batch_size": 2048,
29
+ "device": "cuda"
30
+ }
31
+ }
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_6/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"info": "Diffusion LM detected. AR-only CE/loss-recovered metrics are skipped.", "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_7/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ada562dfd95422f45268afa181b0db3627356234c443d0880d926120ed829886
3
+ size 469843990
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_7/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TopKTrainer",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 195312,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "seed": 3407,
13
+ "activation_dim": 3584,
14
+ "dict_size": 16384,
15
+ "k": 80,
16
+ "device": "cuda",
17
+ "layer": 10,
18
+ "lm_name": "Dream-org/Dream-v0-Base-7B",
19
+ "wandb_name": "TopKTrainer-Dream-org/Dream-v0-Base-7B-resid_post_layer_10_trainer_7",
20
+ "submodule_name": "resid_post_layer_10"
21
+ },
22
+ "buffer": {
23
+ "d_submodule": 3584,
24
+ "io": "out",
25
+ "n_ctxs": 122,
26
+ "ctx_len": 2048,
27
+ "refresh_batch_size": 8,
28
+ "out_batch_size": 2048,
29
+ "device": "cuda"
30
+ }
31
+ }
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_7/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"info": "Diffusion LM detected. AR-only CE/loss-recovered metrics are skipped.", "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_8/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbc4ffeffa5ed600ad2ca933dd473b8375c48ad8dae2421fac74ce7154e8a3c2
3
+ size 469843990
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_8/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TopKTrainer",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 195312,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "seed": 3407,
13
+ "activation_dim": 3584,
14
+ "dict_size": 16384,
15
+ "k": 160,
16
+ "device": "cuda",
17
+ "layer": 10,
18
+ "lm_name": "Dream-org/Dream-v0-Base-7B",
19
+ "wandb_name": "TopKTrainer-Dream-org/Dream-v0-Base-7B-resid_post_layer_10_trainer_8",
20
+ "submodule_name": "resid_post_layer_10"
21
+ },
22
+ "buffer": {
23
+ "d_submodule": 3584,
24
+ "io": "out",
25
+ "n_ctxs": 122,
26
+ "ctx_len": 2048,
27
+ "refresh_batch_size": 8,
28
+ "out_batch_size": 2048,
29
+ "device": "cuda"
30
+ }
31
+ }
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_8/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"info": "Diffusion LM detected. AR-only CE/loss-recovered metrics are skipped.", "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_9/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51722fe186f504360a1d44c5669f558c3a4b7b5c747c9c399ec5d27e5e6dcec0
3
+ size 469843990
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_9/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TopKTrainer",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 195312,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "seed": 3407,
13
+ "activation_dim": 3584,
14
+ "dict_size": 16384,
15
+ "k": 320,
16
+ "device": "cuda",
17
+ "layer": 10,
18
+ "lm_name": "Dream-org/Dream-v0-Base-7B",
19
+ "wandb_name": "TopKTrainer-Dream-org/Dream-v0-Base-7B-resid_post_layer_10_trainer_9",
20
+ "submodule_name": "resid_post_layer_10"
21
+ },
22
+ "buffer": {
23
+ "d_submodule": 3584,
24
+ "io": "out",
25
+ "n_ctxs": 122,
26
+ "ctx_len": 2048,
27
+ "refresh_batch_size": 8,
28
+ "out_batch_size": 2048,
29
+ "device": "cuda"
30
+ }
31
+ }
saes_Dream-org_Dream-v0-Base-7B_gated_top_k/resid_post_layer_10/trainer_9/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"info": "Diffusion LM detected. AR-only CE/loss-recovered metrics are skipped.", "hyperparameters": {"n_inputs": 200, "context_length": 2048}}