bpbradle commited on
Commit
7c8e5b6
·
verified ·
1 Parent(s): b89c6f2

upload dec/12.5M/62.5Mtok/postprefixlm_pct100.json

Browse files
dec/12.5M/62.5Mtok/postprefixlm_pct100.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "dec",
3
+ "arch": {
4
+ "dim": 256,
5
+ "num_encoder_layers": 10,
6
+ "num_decoder_layers": 5
7
+ },
8
+ "arch_label": "12.5M",
9
+ "non_emb_params": 11804672,
10
+ "pretrain_ckpt": "pct100.pt",
11
+ "sft_objective": "prefixlm",
12
+ "sft_train_loss": 5.69834877858243,
13
+ "sft_train_time_s": 20.571524381637573,
14
+ "sft_n_steps": 95,
15
+ "sft_mfu_pct": 10.746167840152369,
16
+ "sft_tokens_target": 6245580,
17
+ "sft_tokens_actual": 6242304,
18
+ "sft_lr": 0.0002,
19
+ "sft_batch_size": 8,
20
+ "sft_grad_accum": 4,
21
+ "eval_loss_held_out": 5.680119342154884,
22
+ "eval_n_batches": 100,
23
+ "eval_n_valid_tokens": 1494358,
24
+ "eval_elapsed_s": 6.522653341293335,
25
+ "wallclock_s": 33.648579835891724
26
+ }