jacobcd52 commited on
Commit
ff79fea
·
verified ·
1 Parent(s): ae4578d

Upload trainer_0/config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. trainer_0/config.json +5 -5
trainer_0/config.json CHANGED
@@ -2,8 +2,8 @@
2
  "trainer": {
3
  "trainer_class": "TopKTrainer",
4
  "dict_class": "AutoEncoderTopK",
5
- "lr": 0.0002,
6
- "steps": 12,
7
  "auxk_alpha": 0.03125,
8
  "warmup_steps": 0,
9
  "decay_start": null,
@@ -11,7 +11,7 @@
11
  "threshold_start_step": 1000,
12
  "seed": null,
13
  "activation_dim": 6144,
14
- "dict_size": 16384,
15
  "k": 128,
16
  "device": "cuda:0",
17
  "layer": 4,
@@ -23,9 +23,9 @@
23
  "n_models": 12,
24
  "d_submodule": 512,
25
  "io": "out",
26
- "n_ctxs": 512,
27
  "ctx_len": 128,
28
- "refresh_batch_size": 256,
29
  "out_batch_size": 8192,
30
  "device": "cuda:0",
31
  "rescale_acts": true
 
2
  "trainer": {
3
  "trainer_class": "TopKTrainer",
4
  "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.000282842712474619,
6
+ "steps": 61035,
7
  "auxk_alpha": 0.03125,
8
  "warmup_steps": 0,
9
  "decay_start": null,
 
11
  "threshold_start_step": 1000,
12
  "seed": null,
13
  "activation_dim": 6144,
14
+ "dict_size": 8192,
15
  "k": 128,
16
  "device": "cuda:0",
17
  "layer": 4,
 
23
  "n_models": 12,
24
  "d_submodule": 512,
25
  "io": "out",
26
+ "n_ctxs": 1024,
27
  "ctx_len": 128,
28
+ "refresh_batch_size": 512,
29
  "out_batch_size": 8192,
30
  "device": "cuda:0",
31
  "rescale_acts": true