jacobcd52 commited on
Commit
fbc92fc
·
verified ·
1 Parent(s): aa9e2d3

Upload trainer_0/config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. trainer_0/config.json +5 -5
trainer_0/config.json CHANGED
@@ -2,7 +2,7 @@
2
  "trainer": {
3
  "trainer_class": "TopKTrainer",
4
  "dict_class": "AutoEncoderTopK",
5
- "lr": 0.0001414213562373095,
6
  "steps": 12,
7
  "auxk_alpha": 0.03125,
8
  "warmup_steps": 0,
@@ -10,8 +10,8 @@
10
  "threshold_beta": 0.999,
11
  "threshold_start_step": 1000,
12
  "seed": null,
13
- "activation_dim": 1024,
14
- "dict_size": 32768,
15
  "k": 128,
16
  "device": "cuda:2",
17
  "layer": 4,
@@ -20,8 +20,8 @@
20
  "submodule_name": null
21
  },
22
  "buffer": {
23
- "n_models": 2,
24
- "d_submodule": 512,
25
  "io": "out",
26
  "n_ctxs": 1024,
27
  "ctx_len": 128,
 
2
  "trainer": {
3
  "trainer_class": "TopKTrainer",
4
  "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.00010690449676496975,
6
  "steps": 12,
7
  "auxk_alpha": 0.03125,
8
  "warmup_steps": 0,
 
10
  "threshold_beta": 0.999,
11
  "threshold_start_step": 1000,
12
  "seed": null,
13
+ "activation_dim": 896,
14
+ "dict_size": 57344,
15
  "k": 128,
16
  "device": "cuda:2",
17
  "layer": 4,
 
20
  "submodule_name": null
21
  },
22
  "buffer": {
23
+ "n_models": 1,
24
+ "d_submodule": 896,
25
  "io": "out",
26
  "n_ctxs": 1024,
27
  "ctx_len": 128,