File size: 584 Bytes
d1d73f1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
{
  "vocab_size": 24000,
  "pad_id": 0,
  "eos_id": 3,
  "profile": "base",
  "max_seq_len": 256,
  "min_target_tokens": 48,
  "d_model": 512,
  "nhead": 8,
  "num_layers": 8,
  "dim_feedforward": 2048,
  "dropout": 0.1,
  "rope_base": 10000.0,
  "gradient_checkpointing": false,
  "batch_size": 192,
  "epochs": 4,
  "lr": 0.0003,
  "min_lr_ratio": 0.05,
  "warmup_steps": 2000,
  "weight_decay": 0.1,
  "grad_accum_steps": 2,
  "clip_grad_norm": 1.0,
  "label_smoothing": 0.05,
  "val_rows": 10000,
  "val_fraction": 0.01,
  "seed": 42,
  "amp": "bf16",
  "style_tag": "<natural>"
}