# Main configuration file for GRPO training defaults: - dataset: default - model: qwen2.5-3b - training: default - _self_ # Output directory for model checkpoints and logs output_dir: models/grpo # Resume from checkpoint resume_from_checkpoint_sft: models/sft/checkpoint-100 resume_from_checkpoint_grpo: null save_before_training: true push_to_hub: true hf_repo_id: "Dat1710/countdown-grpo-qwen2" # Random seed for reproducibility seed: 42