jacobcd52 commited on
Commit
c20a1bd
·
verified ·
1 Parent(s): 36e9df5

Upload trainer_0/config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. trainer_0/config.json +8 -5
trainer_0/config.json CHANGED
@@ -7,15 +7,18 @@
7
  "lr": 1e-06,
8
  "l1_penalty": 0.01,
9
  "warmup_steps": 0,
10
- "sparsity_warmup_steps": 2000,
11
- "steps": 61035,
12
  "decay_start": null,
13
  "seed": null,
14
  "device": "cuda:2",
15
  "layer": 7,
16
  "lm_name": "blah",
17
  "wandb_name": "StandardTrainerAprilUpdate",
18
- "submodule_name": null
 
 
 
19
  },
20
  "buffer": {
21
  "n_models": 2,
@@ -23,8 +26,8 @@
23
  "io": "out",
24
  "n_ctxs": 512,
25
  "ctx_len": 256,
26
- "refresh_batch_size": 512,
27
- "out_batch_size": 8192,
28
  "device": "cuda:2",
29
  "rescale_acts": false
30
  }
 
7
  "lr": 1e-06,
8
  "l1_penalty": 0.01,
9
  "warmup_steps": 0,
10
+ "sparsity_warmup_steps": 0,
11
+ "steps": 30517,
12
  "decay_start": null,
13
  "seed": null,
14
  "device": "cuda:2",
15
  "layer": 7,
16
  "lm_name": "blah",
17
  "wandb_name": "StandardTrainerAprilUpdate",
18
+ "submodule_name": null,
19
+ "frac_features_shared": 0.1,
20
+ "shared_l1_penalty": 0.002,
21
+ "num_shared_features": 5734
22
  },
23
  "buffer": {
24
  "n_models": 2,
 
26
  "io": "out",
27
  "n_ctxs": 512,
28
  "ctx_len": 256,
29
+ "refresh_batch_size": 1024,
30
+ "out_batch_size": 16384,
31
  "device": "cuda:2",
32
  "rescale_acts": false
33
  }