| { | |
| "model": { | |
| "vocab_size": 3072, | |
| "max_seq_len": 84, | |
| "d_model": 384, | |
| "n_layers": 6, | |
| "n_heads": 6, | |
| "ffn_hidden": 1152, | |
| "dropout": 0.1, | |
| "pad_id": 0, | |
| "bos_id": 1, | |
| "eos_id": 2 | |
| }, | |
| "train": { | |
| "batch_size": 32, | |
| "learning_rate": 0.0003, | |
| "min_lr": 3e-05, | |
| "weight_decay": 0.1, | |
| "warmup_steps": 400, | |
| "max_steps": 12000, | |
| "eval_interval": 400, | |
| "save_interval": 1000, | |
| "grad_clip": 1.0, | |
| "device": "auto", | |
| "seed": 42, | |
| "data_dir": "data", | |
| "output_dir": "checkpoints" | |
| } | |
| } |