| { | |
| "seed": 42, | |
| "dataset_path": "data/sft/processed", | |
| "output_dir": "outputs/sft", | |
| "checkpoint_dir": "checkpoints/sft", | |
| "init_from": "checkpoints/pretrain_stage2/last.pt", | |
| "resume_from": null, | |
| "seq_len": 2048, | |
| "micro_batch_size": 8, | |
| "grad_accum_steps": 16, | |
| "max_steps": 5000, | |
| "warmup_steps": 200, | |
| "learning_rate": 0.0005, | |
| "min_lr": 5e-05, | |
| "weight_decay": 0.01, | |
| "beta1": 0.9, | |
| "beta2": 0.95, | |
| "grad_clip": 1.0, | |
| "precision": "bf16", | |
| "num_workers": 0, | |
| "log_interval": 10, | |
| "eval_interval": 100, | |
| "eval_batches": 50, | |
| "save_interval": 200, | |
| "compile_model": false | |
| } | |