File size: 468 Bytes
00db46c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
# Main configuration file for GRPO training
defaults:
  - dataset: default
  - model: qwen2.5-3b
  - training: default
  - _self_

# Output directory for model checkpoints and logs
output_dir: models/grpo

# Resume from checkpoint
resume_from_checkpoint_sft: models/sft/checkpoint-100
resume_from_checkpoint_grpo: null
save_before_training: true
push_to_hub: true
hf_repo_id: "Dat1710/countdown-grpo-qwen2"
# Random seed for reproducibility
seed: 42