| training: | |
| batch_size: 2 | |
| gradient_accumulation: 16 | |
| learning_rate: 3e-4 | |
| min_lr: 3e-5 | |
| warmup_steps: 2000 | |
| total_steps: 250000 | |
| weight_decay: 0.1 | |
| beta1: 0.9 | |
| beta2: 0.95 | |
| grad_clip: 1.0 | |
| checkpoint: | |
| save_every_steps: 5000 | |
| keep_last_n: 3 | |
| keep_best_n: 2 | |
| max_space_gb: 50.0 | |
| save_optimizer: true | |
| save_scheduler: true | |
| save_experts_only: false | |
| checkpoint_dir: "./outputs/checkpoints" | |
| resume_from: null | |
| grpo: | |
| group_size: 8 | |
| epsilon: 0.2 | |
| beta: 0.04 | |
| learning_rate: 1e-6 | |