| { | |
| "vocab_size": 24000, | |
| "pad_id": 0, | |
| "eos_id": 3, | |
| "profile": "base", | |
| "max_seq_len": 256, | |
| "min_target_tokens": 48, | |
| "d_model": 512, | |
| "nhead": 8, | |
| "num_layers": 8, | |
| "dim_feedforward": 2048, | |
| "dropout": 0.1, | |
| "rope_base": 10000.0, | |
| "gradient_checkpointing": false, | |
| "batch_size": 192, | |
| "epochs": 4, | |
| "lr": 0.0003, | |
| "min_lr_ratio": 0.05, | |
| "warmup_steps": 2000, | |
| "weight_decay": 0.1, | |
| "grad_accum_steps": 2, | |
| "clip_grad_norm": 1.0, | |
| "label_smoothing": 0.05, | |
| "val_rows": 10000, | |
| "val_fraction": 0.01, | |
| "seed": 42, | |
| "amp": "bf16", | |
| "style_tag": "<natural>" | |
| } |