| # Main configuration file for GRPO training | |
| defaults: | |
| - dataset: default | |
| - model: qwen2.5-3b | |
| - training: default | |
| - _self_ | |
| # Output directory for model checkpoints and logs | |
| output_dir: models/grpo | |
| # Resume from checkpoint | |
| resume_from_checkpoint_sft: models/sft/checkpoint-100 | |
| resume_from_checkpoint_grpo: null | |
| save_before_training: true | |
| push_to_hub: true | |
| hf_repo_id: "Dat1710/countdown-grpo-qwen2" | |
| # Random seed for reproducibility | |
| seed: 42 | |