{ "vocab_size": 24000, "pad_id": 0, "eos_id": 3, "profile": "base", "max_seq_len": 256, "min_target_tokens": 48, "d_model": 512, "nhead": 8, "num_layers": 8, "dim_feedforward": 2048, "dropout": 0.1, "rope_base": 10000.0, "gradient_checkpointing": false, "batch_size": 192, "epochs": 4, "lr": 0.0003, "min_lr_ratio": 0.05, "warmup_steps": 2000, "weight_decay": 0.1, "grad_accum_steps": 2, "clip_grad_norm": 1.0, "label_smoothing": 0.05, "val_rows": 10000, "val_fraction": 0.01, "seed": 42, "amp": "bf16", "style_tag": "" }