snedelkoski commited on
Commit
bbee7d7
·
verified ·
1 Parent(s): f0681b6

Upload meta_000765.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. meta_000765.json +6 -6
meta_000765.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "step": 765,
3
- "val_bpb": 0.4475785077800831,
4
  "model_config": {
5
  "sequence_len": 2048,
6
  "vocab_size": 65536,
@@ -10,7 +10,7 @@
10
  "n_embd": 1280
11
  },
12
  "user_config": {
13
- "run": "nanochat-h100-diloco-base-mid-f",
14
  "max_iterations": -1,
15
  "dtype": "bfloat16",
16
  "max_seq_len": 2048,
@@ -23,8 +23,8 @@
23
  "eval_every": 150,
24
  "eval_tokens": 10485760,
25
  "total_batch_size": 524288,
26
- "use_diloco": 1,
27
- "base_diloco": 0,
28
  "diloco_H": 30,
29
  "diloco_outer_lr": 0.8,
30
  "diloco_outer_momentum": 0.9,
@@ -32,6 +32,6 @@
32
  },
33
  "device_batch_size": 32,
34
  "max_seq_len": 2048,
35
- "use_diloco": 1,
36
- "base_diloco": 0
37
  }
 
1
  {
2
  "step": 765,
3
+ "val_bpb": 0.4384375044226997,
4
  "model_config": {
5
  "sequence_len": 2048,
6
  "vocab_size": 65536,
 
10
  "n_embd": 1280
11
  },
12
  "user_config": {
13
+ "run": "nanochat-h100-diloco-base-mid-standard-f",
14
  "max_iterations": -1,
15
  "dtype": "bfloat16",
16
  "max_seq_len": 2048,
 
23
  "eval_every": 150,
24
  "eval_tokens": 10485760,
25
  "total_batch_size": 524288,
26
+ "use_diloco": 0,
27
+ "base_diloco": 1,
28
  "diloco_H": 30,
29
  "diloco_outer_lr": 0.8,
30
  "diloco_outer_momentum": 0.9,
 
32
  },
33
  "device_batch_size": 32,
34
  "max_seq_len": 2048,
35
+ "use_diloco": 0,
36
+ "base_diloco": 1
37
  }