bpbradle commited on
Commit
4fb9c3d
·
verified ·
1 Parent(s): 68009d8

upload dd/25M/1Btok/postprefixlm_postprefixlm_pct100.json

Browse files
dd/25M/1Btok/postprefixlm_postprefixlm_pct100.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "dd",
3
+ "arch": {
4
+ "dim": 448,
5
+ "num_encoder_layers": 8,
6
+ "num_decoder_layers": 4
7
+ },
8
+ "arch_label": "25M",
9
+ "non_emb_params": 28913920,
10
+ "pretrain_ckpt": "postprefixlm_pct100.pt",
11
+ "sft_objective": "prefixlm",
12
+ "sft_train_loss": 3.6299335887785955,
13
+ "sft_train_time_s": 655.3580076694489,
14
+ "sft_n_steps": 1525,
15
+ "sft_mfu_pct": 3.5651847922978264,
16
+ "sft_tokens_target": 100000000,
17
+ "sft_tokens_actual": 99999744,
18
+ "sft_lr": 2e-05,
19
+ "sft_batch_size": 4,
20
+ "sft_grad_accum": 8,
21
+ "eval_loss_held_out": 3.724371613551832,
22
+ "eval_n_batches": 100,
23
+ "eval_n_valid_tokens": 749488,
24
+ "eval_elapsed_s": 10.915366649627686,
25
+ "wallclock_s": 671.0772655010223
26
+ }