AwesomeInterpretability commited on
Commit
2582ed9
·
verified ·
1 Parent(s): 895e244

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_0/ae.pt +3 -0
  2. trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_0/config.json +29 -0
  3. trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_0/eval_results.json +1 -0
  4. trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_1/ae.pt +3 -0
  5. trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_1/config.json +29 -0
  6. trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_1/eval_results.json +1 -0
  7. trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_10/ae.pt +3 -0
  8. trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_10/config.json +32 -0
  9. trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_10/eval_results.json +1 -0
  10. trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_11/ae.pt +3 -0
  11. trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_11/config.json +32 -0
  12. trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_11/eval_results.json +1 -0
  13. trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_12/ae.pt +3 -0
  14. trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_12/config.json +29 -0
  15. trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_12/eval_results.json +1 -0
  16. trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_13/ae.pt +3 -0
  17. trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_13/config.json +29 -0
  18. trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_13/eval_results.json +1 -0
  19. trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_14/ae.pt +3 -0
  20. trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_14/config.json +29 -0
  21. trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_14/eval_results.json +1 -0
  22. trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_15/ae.pt +3 -0
  23. trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_15/config.json +29 -0
  24. trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_15/eval_results.json +1 -0
  25. trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_16/ae.pt +3 -0
  26. trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_16/config.json +29 -0
  27. trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_16/eval_results.json +1 -0
  28. trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_17/ae.pt +3 -0
  29. trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_17/config.json +29 -0
  30. trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_17/eval_results.json +1 -0
  31. trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_2/ae.pt +3 -0
  32. trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_2/config.json +29 -0
  33. trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_2/eval_results.json +1 -0
  34. trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_3/ae.pt +3 -0
  35. trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_3/config.json +29 -0
  36. trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_3/eval_results.json +1 -0
  37. trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_4/ae.pt +3 -0
  38. trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_4/config.json +29 -0
  39. trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_4/eval_results.json +1 -0
  40. trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_5/ae.pt +3 -0
  41. trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_5/config.json +29 -0
  42. trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_5/eval_results.json +1 -0
  43. trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_6/ae.pt +3 -0
  44. trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_6/config.json +32 -0
  45. trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_6/eval_results.json +1 -0
  46. trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_7/ae.pt +3 -0
  47. trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_7/config.json +32 -0
  48. trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_7/eval_results.json +1 -0
  49. trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_8/ae.pt +3 -0
  50. trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_8/config.json +32 -0
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b71df4e2751d9da58c880b85e0b708c4a7b3bc84dc51f0aa460db5da4ab8de0
3
+ size 469843624
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_0/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainerAprilUpdate",
5
+ "activation_dim": 3584,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.012,
9
+ "warmup_steps": 1000,
10
+ "sparsity_warmup_steps": 5000,
11
+ "steps": 244140,
12
+ "decay_start": 195312,
13
+ "seed": 3407,
14
+ "device": "cuda:0",
15
+ "layer": 20,
16
+ "lm_name": "google/gemma-2-9b",
17
+ "wandb_name": "StandardTrainerNew-google/gemma-2-9b-resid_post_layer_20_trainer_0",
18
+ "submodule_name": "resid_post_layer_20"
19
+ },
20
+ "buffer": {
21
+ "d_submodule": 3584,
22
+ "io": "out",
23
+ "n_ctxs": 122,
24
+ "ctx_len": 2048,
25
+ "refresh_batch_size": 4,
26
+ "out_batch_size": 2048,
27
+ "device": "cuda:0"
28
+ }
29
+ }
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_0/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 89.09, "l1_loss": 1378.96, "l0": 1085.1874340820314, "frac_variance_explained": 0.77041015625, "cossim": 0.90994140625, "l2_ratio": 0.884453125, "relative_reconstruction_bias": 0.99240234375, "frac_alive": 0.764892578125, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_1/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f6dfc6fccec855b0270832531a9092659f1076e701178c16051317d43059e38
3
+ size 469843624
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_1/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainerAprilUpdate",
5
+ "activation_dim": 3584,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.015,
9
+ "warmup_steps": 1000,
10
+ "sparsity_warmup_steps": 5000,
11
+ "steps": 244140,
12
+ "decay_start": 195312,
13
+ "seed": 3407,
14
+ "device": "cuda:0",
15
+ "layer": 20,
16
+ "lm_name": "google/gemma-2-9b",
17
+ "wandb_name": "StandardTrainerNew-google/gemma-2-9b-resid_post_layer_20_trainer_1",
18
+ "submodule_name": "resid_post_layer_20"
19
+ },
20
+ "buffer": {
21
+ "d_submodule": 3584,
22
+ "io": "out",
23
+ "n_ctxs": 122,
24
+ "ctx_len": 2048,
25
+ "refresh_batch_size": 4,
26
+ "out_batch_size": 2048,
27
+ "device": "cuda:0"
28
+ }
29
+ }
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_1/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 96.165, "l1_loss": 1069.92, "l0": 748.5599877929687, "frac_variance_explained": 0.7313671875, "cossim": 0.89447265625, "l2_ratio": 0.87078125, "relative_reconstruction_bias": 0.9985546875, "frac_alive": 0.76300048828125, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_10/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2311b433e9c33d67c2e3b336d3634c49325690fefc0df64638e6358d4f97e55
3
+ size 469843990
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_10/config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "BatchTopKTrainer",
4
+ "dict_class": "BatchTopKSAE",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 195312,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 1792,
13
+ "seed": 3407,
14
+ "activation_dim": 3584,
15
+ "dict_size": 16384,
16
+ "k": 520,
17
+ "device": "cuda:0",
18
+ "layer": 20,
19
+ "lm_name": "google/gemma-2-9b",
20
+ "wandb_name": "BatchTopKTrainer-google/gemma-2-9b-resid_post_layer_20_trainer_10",
21
+ "submodule_name": "resid_post_layer_20"
22
+ },
23
+ "buffer": {
24
+ "d_submodule": 3584,
25
+ "io": "out",
26
+ "n_ctxs": 122,
27
+ "ctx_len": 2048,
28
+ "refresh_batch_size": 4,
29
+ "out_batch_size": 2048,
30
+ "device": "cuda:0"
31
+ }
32
+ }
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_10/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 77.4025, "l1_loss": 3337.92, "l0": 709.4781958007812, "frac_variance_explained": 0.828125, "cossim": 0.92984375, "l2_ratio": 0.92970703125, "relative_reconstruction_bias": 1.0009375, "frac_alive": 0.85882568359375, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_11/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c685f19dc9ea10c04c4bcba75596d4bc709b917f0620688ed6e2a74018f1788a
3
+ size 469843990
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_11/config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "BatchTopKTrainer",
4
+ "dict_class": "BatchTopKSAE",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 195312,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 1792,
13
+ "seed": 3407,
14
+ "activation_dim": 3584,
15
+ "dict_size": 16384,
16
+ "k": 820,
17
+ "device": "cuda:0",
18
+ "layer": 20,
19
+ "lm_name": "google/gemma-2-9b",
20
+ "wandb_name": "BatchTopKTrainer-google/gemma-2-9b-resid_post_layer_20_trainer_11",
21
+ "submodule_name": "resid_post_layer_20"
22
+ },
23
+ "buffer": {
24
+ "d_submodule": 3584,
25
+ "io": "out",
26
+ "n_ctxs": 122,
27
+ "ctx_len": 2048,
28
+ "refresh_batch_size": 4,
29
+ "out_batch_size": 2048,
30
+ "device": "cuda:0"
31
+ }
32
+ }
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_11/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 66.6125, "l1_loss": 4846.72, "l0": 1101.4039184570313, "frac_variance_explained": 0.87087890625, "cossim": 0.9459375, "l2_ratio": 0.94115234375, "relative_reconstruction_bias": 0.99490234375, "frac_alive": 0.69659423828125, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_12/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd356475673932a2cdd2501c9007f13ea394fcd87d1c2e7e6c61b1b18a647512
3
+ size 469909279
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_12/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "JumpReluTrainer",
4
+ "dict_class": "JumpReluAutoEncoder",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "seed": 3407,
8
+ "activation_dim": 3584,
9
+ "dict_size": 16384,
10
+ "device": "cuda:0",
11
+ "layer": 20,
12
+ "lm_name": "google/gemma-2-9b",
13
+ "wandb_name": "JumpReluTrainer-google/gemma-2-9b-resid_post_layer_20_trainer_12",
14
+ "submodule_name": "resid_post_layer_20",
15
+ "bandwidth": 0.001,
16
+ "sparsity_penalty": 1.0,
17
+ "sparsity_warmup_steps": 5000,
18
+ "target_l0": 50
19
+ },
20
+ "buffer": {
21
+ "d_submodule": 3584,
22
+ "io": "out",
23
+ "n_ctxs": 122,
24
+ "ctx_len": 2048,
25
+ "refresh_batch_size": 4,
26
+ "out_batch_size": 2048,
27
+ "device": "cuda:0"
28
+ }
29
+ }
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_12/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 113.1225, "l1_loss": 670.74, "l0": 62.107958984375, "frac_variance_explained": 0.63556640625, "cossim": 0.8490625, "l2_ratio": 0.85751953125, "relative_reconstruction_bias": 1.00828125, "frac_alive": 0.794921875, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_13/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5105ec73026d976d765b82a928c2b97845f89cf91a77dc14f36a024825a9e4b5
3
+ size 469909279
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_13/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "JumpReluTrainer",
4
+ "dict_class": "JumpReluAutoEncoder",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "seed": 3407,
8
+ "activation_dim": 3584,
9
+ "dict_size": 16384,
10
+ "device": "cuda:0",
11
+ "layer": 20,
12
+ "lm_name": "google/gemma-2-9b",
13
+ "wandb_name": "JumpReluTrainer-google/gemma-2-9b-resid_post_layer_20_trainer_13",
14
+ "submodule_name": "resid_post_layer_20",
15
+ "bandwidth": 0.001,
16
+ "sparsity_penalty": 1.0,
17
+ "sparsity_warmup_steps": 5000,
18
+ "target_l0": 80
19
+ },
20
+ "buffer": {
21
+ "d_submodule": 3584,
22
+ "io": "out",
23
+ "n_ctxs": 122,
24
+ "ctx_len": 2048,
25
+ "refresh_batch_size": 4,
26
+ "out_batch_size": 2048,
27
+ "device": "cuda:0"
28
+ }
29
+ }
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_13/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 106.7075, "l1_loss": 850.78, "l0": 105.3335107421875, "frac_variance_explained": 0.67470703125, "cossim": 0.86701171875, "l2_ratio": 0.8744921875, "relative_reconstruction_bias": 1.0080078125, "frac_alive": 0.87646484375, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_14/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09a8e5a9e1ab58d0587b48cd9a49fb6b752ddd4fe3c5b6d67164a5f988bb5540
3
+ size 469909279
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_14/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "JumpReluTrainer",
4
+ "dict_class": "JumpReluAutoEncoder",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "seed": 3407,
8
+ "activation_dim": 3584,
9
+ "dict_size": 16384,
10
+ "device": "cuda:0",
11
+ "layer": 20,
12
+ "lm_name": "google/gemma-2-9b",
13
+ "wandb_name": "JumpReluTrainer-google/gemma-2-9b-resid_post_layer_20_trainer_14",
14
+ "submodule_name": "resid_post_layer_20",
15
+ "bandwidth": 0.001,
16
+ "sparsity_penalty": 1.0,
17
+ "sparsity_warmup_steps": 5000,
18
+ "target_l0": 160
19
+ },
20
+ "buffer": {
21
+ "d_submodule": 3584,
22
+ "io": "out",
23
+ "n_ctxs": 122,
24
+ "ctx_len": 2048,
25
+ "refresh_batch_size": 4,
26
+ "out_batch_size": 2048,
27
+ "device": "cuda:0"
28
+ }
29
+ }
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_14/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 96.0775, "l1_loss": 1289.84, "l0": 228.18613525390626, "frac_variance_explained": 0.73494140625, "cossim": 0.89296875, "l2_ratio": 0.89806640625, "relative_reconstruction_bias": 1.007421875, "frac_alive": 0.9716796875, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_15/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cef0eac303a5fbd346e1f6a37026541a71d41d5e61636dd738ea64d624c2806
3
+ size 469909279
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_15/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "JumpReluTrainer",
4
+ "dict_class": "JumpReluAutoEncoder",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "seed": 3407,
8
+ "activation_dim": 3584,
9
+ "dict_size": 16384,
10
+ "device": "cuda:0",
11
+ "layer": 20,
12
+ "lm_name": "google/gemma-2-9b",
13
+ "wandb_name": "JumpReluTrainer-google/gemma-2-9b-resid_post_layer_20_trainer_15",
14
+ "submodule_name": "resid_post_layer_20",
15
+ "bandwidth": 0.001,
16
+ "sparsity_penalty": 1.0,
17
+ "sparsity_warmup_steps": 5000,
18
+ "target_l0": 320
19
+ },
20
+ "buffer": {
21
+ "d_submodule": 3584,
22
+ "io": "out",
23
+ "n_ctxs": 122,
24
+ "ctx_len": 2048,
25
+ "refresh_batch_size": 4,
26
+ "out_batch_size": 2048,
27
+ "device": "cuda:0"
28
+ }
29
+ }
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_15/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 83.7375, "l1_loss": 1982.44, "l0": 475.87285888671875, "frac_variance_explained": 0.79875, "cossim": 0.91837890625, "l2_ratio": 0.92162109375, "relative_reconstruction_bias": 1.0037109375, "frac_alive": 0.9964599609375, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_16/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37230e38854bdb9718c7a88373e19fbc1a482e7201231d54e98370f3b0ba3a05
3
+ size 469909279
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_16/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "JumpReluTrainer",
4
+ "dict_class": "JumpReluAutoEncoder",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "seed": 3407,
8
+ "activation_dim": 3584,
9
+ "dict_size": 16384,
10
+ "device": "cuda:0",
11
+ "layer": 20,
12
+ "lm_name": "google/gemma-2-9b",
13
+ "wandb_name": "JumpReluTrainer-google/gemma-2-9b-resid_post_layer_20_trainer_16",
14
+ "submodule_name": "resid_post_layer_20",
15
+ "bandwidth": 0.001,
16
+ "sparsity_penalty": 1.0,
17
+ "sparsity_warmup_steps": 5000,
18
+ "target_l0": 520
19
+ },
20
+ "buffer": {
21
+ "d_submodule": 3584,
22
+ "io": "out",
23
+ "n_ctxs": 122,
24
+ "ctx_len": 2048,
25
+ "refresh_batch_size": 4,
26
+ "out_batch_size": 2048,
27
+ "device": "cuda:0"
28
+ }
29
+ }
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_16/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 72.4525, "l1_loss": 2695.04, "l0": 763.3169091796875, "frac_variance_explained": 0.8498046875, "cossim": 0.93865234375, "l2_ratio": 0.93953125, "relative_reconstruction_bias": 1.0021484375, "frac_alive": 0.97613525390625, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_17/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46c50e326d6f0e8e8c75085f5c8abbbdfb1d4223ccf7aa5cd6d506f18ba98405
3
+ size 469909279
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_17/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "JumpReluTrainer",
4
+ "dict_class": "JumpReluAutoEncoder",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "seed": 3407,
8
+ "activation_dim": 3584,
9
+ "dict_size": 16384,
10
+ "device": "cuda:0",
11
+ "layer": 20,
12
+ "lm_name": "google/gemma-2-9b",
13
+ "wandb_name": "JumpReluTrainer-google/gemma-2-9b-resid_post_layer_20_trainer_17",
14
+ "submodule_name": "resid_post_layer_20",
15
+ "bandwidth": 0.001,
16
+ "sparsity_penalty": 1.0,
17
+ "sparsity_warmup_steps": 5000,
18
+ "target_l0": 820
19
+ },
20
+ "buffer": {
21
+ "d_submodule": 3584,
22
+ "io": "out",
23
+ "n_ctxs": 122,
24
+ "ctx_len": 2048,
25
+ "refresh_batch_size": 4,
26
+ "out_batch_size": 2048,
27
+ "device": "cuda:0"
28
+ }
29
+ }
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_17/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 88.475, "l1_loss": 3319.2, "l0": 1094.02822265625, "frac_variance_explained": 0.82087890625, "cossim": 0.915390625, "l2_ratio": 1.0532421875, "relative_reconstruction_bias": 1.0951953125, "frac_alive": 0.8267822265625, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_2/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c13d63e813e9b1128c0b4f82b7787f4415f3e5240174d527113c47794c48c216
3
+ size 469843624
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_2/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainerAprilUpdate",
5
+ "activation_dim": 3584,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.02,
9
+ "warmup_steps": 1000,
10
+ "sparsity_warmup_steps": 5000,
11
+ "steps": 244140,
12
+ "decay_start": 195312,
13
+ "seed": 3407,
14
+ "device": "cuda:0",
15
+ "layer": 20,
16
+ "lm_name": "google/gemma-2-9b",
17
+ "wandb_name": "StandardTrainerNew-google/gemma-2-9b-resid_post_layer_20_trainer_2",
18
+ "submodule_name": "resid_post_layer_20"
19
+ },
20
+ "buffer": {
21
+ "d_submodule": 3584,
22
+ "io": "out",
23
+ "n_ctxs": 122,
24
+ "ctx_len": 2048,
25
+ "refresh_batch_size": 4,
26
+ "out_batch_size": 2048,
27
+ "device": "cuda:0"
28
+ }
29
+ }
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_2/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 103.8625, "l1_loss": 782.32, "l0": 452.529501953125, "frac_variance_explained": 0.68568359375, "cossim": 0.87564453125, "l2_ratio": 0.85255859375, "relative_reconstruction_bias": 1.00189453125, "frac_alive": 0.76043701171875, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_3/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc6c7741e89ecb1f21ad5fc5d9fa2399190e5ff543b5f1e1d21176b91408b9c1
3
+ size 469843624
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_3/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainerAprilUpdate",
5
+ "activation_dim": 3584,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.03,
9
+ "warmup_steps": 1000,
10
+ "sparsity_warmup_steps": 5000,
11
+ "steps": 244140,
12
+ "decay_start": 195312,
13
+ "seed": 3407,
14
+ "device": "cuda:0",
15
+ "layer": 20,
16
+ "lm_name": "google/gemma-2-9b",
17
+ "wandb_name": "StandardTrainerNew-google/gemma-2-9b-resid_post_layer_20_trainer_3",
18
+ "submodule_name": "resid_post_layer_20"
19
+ },
20
+ "buffer": {
21
+ "d_submodule": 3584,
22
+ "io": "out",
23
+ "n_ctxs": 122,
24
+ "ctx_len": 2048,
25
+ "refresh_batch_size": 4,
26
+ "out_batch_size": 2048,
27
+ "device": "cuda:0"
28
+ }
29
+ }
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_3/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 112.78, "l1_loss": 524.97, "l0": 226.3606103515625, "frac_variance_explained": 0.6291796875, "cossim": 0.8526171875, "l2_ratio": 0.8326171875, "relative_reconstruction_bias": 1.01302734375, "frac_alive": 0.7562255859375, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_4/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55839739e1dde5dfb250880c8737bdebe0b3502eca2f4fecd6330e8720a42a1e
3
+ size 469843624
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_4/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainerAprilUpdate",
5
+ "activation_dim": 3584,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.04,
9
+ "warmup_steps": 1000,
10
+ "sparsity_warmup_steps": 5000,
11
+ "steps": 244140,
12
+ "decay_start": 195312,
13
+ "seed": 3407,
14
+ "device": "cuda:0",
15
+ "layer": 20,
16
+ "lm_name": "google/gemma-2-9b",
17
+ "wandb_name": "StandardTrainerNew-google/gemma-2-9b-resid_post_layer_20_trainer_4",
18
+ "submodule_name": "resid_post_layer_20"
19
+ },
20
+ "buffer": {
21
+ "d_submodule": 3584,
22
+ "io": "out",
23
+ "n_ctxs": 122,
24
+ "ctx_len": 2048,
25
+ "refresh_batch_size": 4,
26
+ "out_batch_size": 2048,
27
+ "device": "cuda:0"
28
+ }
29
+ }
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_4/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 118.235, "l1_loss": 401.73, "l0": 140.89248291015625, "frac_variance_explained": 0.58724609375, "cossim": 0.83705078125, "l2_ratio": 0.8162890625, "relative_reconstruction_bias": 1.0190234375, "frac_alive": 0.75213623046875, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_5/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aabc742dd6f851593f23b713d949249ef667211e71c7bbb2307600767b87b1e9
3
+ size 469843624
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_5/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainerAprilUpdate",
5
+ "activation_dim": 3584,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.06,
9
+ "warmup_steps": 1000,
10
+ "sparsity_warmup_steps": 5000,
11
+ "steps": 244140,
12
+ "decay_start": 195312,
13
+ "seed": 3407,
14
+ "device": "cuda:0",
15
+ "layer": 20,
16
+ "lm_name": "google/gemma-2-9b",
17
+ "wandb_name": "StandardTrainerNew-google/gemma-2-9b-resid_post_layer_20_trainer_5",
18
+ "submodule_name": "resid_post_layer_20"
19
+ },
20
+ "buffer": {
21
+ "d_submodule": 3584,
22
+ "io": "out",
23
+ "n_ctxs": 122,
24
+ "ctx_len": 2048,
25
+ "refresh_batch_size": 4,
26
+ "out_batch_size": 2048,
27
+ "device": "cuda:0"
28
+ }
29
+ }
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_5/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 125.69, "l1_loss": 276.76, "l0": 73.5276318359375, "frac_variance_explained": 0.53919921875, "cossim": 0.8143359375, "l2_ratio": 0.79361328125, "relative_reconstruction_bias": 1.0208203125, "frac_alive": 0.74652099609375, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_6/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54d29287a996d1e1b1c8d5ddc34e70791f28ad8ea22a2f5b129adc553e499cd3
3
+ size 469843990
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_6/config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "BatchTopKTrainer",
4
+ "dict_class": "BatchTopKSAE",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 195312,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 1792,
13
+ "seed": 3407,
14
+ "activation_dim": 3584,
15
+ "dict_size": 16384,
16
+ "k": 50,
17
+ "device": "cuda:0",
18
+ "layer": 20,
19
+ "lm_name": "google/gemma-2-9b",
20
+ "wandb_name": "BatchTopKTrainer-google/gemma-2-9b-resid_post_layer_20_trainer_6",
21
+ "submodule_name": "resid_post_layer_20"
22
+ },
23
+ "buffer": {
24
+ "d_submodule": 3584,
25
+ "io": "out",
26
+ "n_ctxs": 122,
27
+ "ctx_len": 2048,
28
+ "refresh_batch_size": 4,
29
+ "out_batch_size": 2048,
30
+ "device": "cuda:0"
31
+ }
32
+ }
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_6/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 113.03, "l1_loss": 643.72, "l0": 59.92653564453125, "frac_variance_explained": 0.63427734375, "cossim": 0.84900390625, "l2_ratio": 0.85697265625, "relative_reconstruction_bias": 1.0082421875, "frac_alive": 0.93328857421875, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_7/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e527bcd86993559a7d07cc2d3a5b9ed83df35ac0f3652b0980566f878f633f0e
3
+ size 469843990
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_7/config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "BatchTopKTrainer",
4
+ "dict_class": "BatchTopKSAE",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 195312,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 1792,
13
+ "seed": 3407,
14
+ "activation_dim": 3584,
15
+ "dict_size": 16384,
16
+ "k": 80,
17
+ "device": "cuda:0",
18
+ "layer": 20,
19
+ "lm_name": "google/gemma-2-9b",
20
+ "wandb_name": "BatchTopKTrainer-google/gemma-2-9b-resid_post_layer_20_trainer_7",
21
+ "submodule_name": "resid_post_layer_20"
22
+ },
23
+ "buffer": {
24
+ "d_submodule": 3584,
25
+ "io": "out",
26
+ "n_ctxs": 122,
27
+ "ctx_len": 2048,
28
+ "refresh_batch_size": 4,
29
+ "out_batch_size": 2048,
30
+ "device": "cuda:0"
31
+ }
32
+ }
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_7/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 106.885, "l1_loss": 847.28, "l0": 101.578388671875, "frac_variance_explained": 0.67421875, "cossim": 0.86607421875, "l2_ratio": 0.87265625, "relative_reconstruction_bias": 1.00671875, "frac_alive": 0.9200439453125, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_8/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f648ea19eb580d53fa01f59aea7db3b93e1fbc4a264bae9328fc391a54d380a7
3
+ size 469843990
trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_8/config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "BatchTopKTrainer",
4
+ "dict_class": "BatchTopKSAE",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 195312,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 1792,
13
+ "seed": 3407,
14
+ "activation_dim": 3584,
15
+ "dict_size": 16384,
16
+ "k": 160,
17
+ "device": "cuda:0",
18
+ "layer": 20,
19
+ "lm_name": "google/gemma-2-9b",
20
+ "wandb_name": "BatchTopKTrainer-google/gemma-2-9b-resid_post_layer_20_trainer_8",
21
+ "submodule_name": "resid_post_layer_20"
22
+ },
23
+ "buffer": {
24
+ "d_submodule": 3584,
25
+ "io": "out",
26
+ "n_ctxs": 122,
27
+ "ctx_len": 2048,
28
+ "refresh_batch_size": 4,
29
+ "out_batch_size": 2048,
30
+ "device": "cuda:0"
31
+ }
32
+ }