Upload trainer_0/config.json with huggingface_hub
Browse files- trainer_0/config.json +5 -5
trainer_0/config.json
CHANGED
|
@@ -2,8 +2,8 @@
|
|
| 2 |
"trainer": {
|
| 3 |
"trainer_class": "TopKTrainer",
|
| 4 |
"dict_class": "AutoEncoderTopK",
|
| 5 |
-
"lr": 0.
|
| 6 |
-
"steps":
|
| 7 |
"auxk_alpha": 0.03125,
|
| 8 |
"warmup_steps": 0,
|
| 9 |
"decay_start": null,
|
|
@@ -11,7 +11,7 @@
|
|
| 11 |
"threshold_start_step": 1000,
|
| 12 |
"seed": null,
|
| 13 |
"activation_dim": 6144,
|
| 14 |
-
"dict_size":
|
| 15 |
"k": 128,
|
| 16 |
"device": "cuda:0",
|
| 17 |
"layer": 4,
|
|
@@ -23,9 +23,9 @@
|
|
| 23 |
"n_models": 12,
|
| 24 |
"d_submodule": 512,
|
| 25 |
"io": "out",
|
| 26 |
-
"n_ctxs":
|
| 27 |
"ctx_len": 128,
|
| 28 |
-
"refresh_batch_size":
|
| 29 |
"out_batch_size": 8192,
|
| 30 |
"device": "cuda:0",
|
| 31 |
"rescale_acts": true
|
|
|
|
| 2 |
"trainer": {
|
| 3 |
"trainer_class": "TopKTrainer",
|
| 4 |
"dict_class": "AutoEncoderTopK",
|
| 5 |
+
"lr": 0.000282842712474619,
|
| 6 |
+
"steps": 61035,
|
| 7 |
"auxk_alpha": 0.03125,
|
| 8 |
"warmup_steps": 0,
|
| 9 |
"decay_start": null,
|
|
|
|
| 11 |
"threshold_start_step": 1000,
|
| 12 |
"seed": null,
|
| 13 |
"activation_dim": 6144,
|
| 14 |
+
"dict_size": 8192,
|
| 15 |
"k": 128,
|
| 16 |
"device": "cuda:0",
|
| 17 |
"layer": 4,
|
|
|
|
| 23 |
"n_models": 12,
|
| 24 |
"d_submodule": 512,
|
| 25 |
"io": "out",
|
| 26 |
+
"n_ctxs": 1024,
|
| 27 |
"ctx_len": 128,
|
| 28 |
+
"refresh_batch_size": 512,
|
| 29 |
"out_batch_size": 8192,
|
| 30 |
"device": "cuda:0",
|
| 31 |
"rescale_acts": true
|