Claude commited on
Commit
b1685a6
·
unverified ·
1 Parent(s): 384df8f

Increase training scale: more steps, episodes, and SFT epochs

Browse files

- num_training_steps: 15 → 30
- episodes_per_candidate: 5 → 8
- sft_epochs: 2 → 3
- eval_episodes: 10 → 15

https://claude.ai/code/session_01DPirJ78YYN4fJUvUFJ5D6V

Files changed (1) hide show
  1. config.yaml +4 -4
config.yaml CHANGED
@@ -20,13 +20,13 @@ grpo:
20
 
21
  # SFT warm start — prime the model on seed prompts before GRPO
22
  sft_warm_start: true # Enable SFT warm start phase
23
- sft_epochs: 2 # Epochs over seed prompts
24
  sft_lr: 1.0e-4 # Learning rate for SFT phase
25
 
26
  # GRPO training loop
27
- num_training_steps: 15 # Number of policy updates (GRPO iterations)
28
  num_candidates: 4 # Candidate prompts per step (GRPO group size, min=2)
29
- episodes_per_candidate: 5 # Customers each candidate talks to
30
  learning_rate: 2.0e-5 # Lower LR for stability at scale
31
  max_prompt_length: 512 # Max tokens for generated system prompt (hard cap during GRPO)
32
 
@@ -104,7 +104,7 @@ reward:
104
  report:
105
  enabled: true
106
  output_dir: "/workspace/output/reports"
107
- eval_episodes: 10 # Episodes per checkpoint evaluation
108
  example_customers: 5 # Example conversations in report
109
 
110
 
 
20
 
21
  # SFT warm start — prime the model on seed prompts before GRPO
22
  sft_warm_start: true # Enable SFT warm start phase
23
+ sft_epochs: 3 # Epochs over seed prompts
24
  sft_lr: 1.0e-4 # Learning rate for SFT phase
25
 
26
  # GRPO training loop
27
+ num_training_steps: 30 # Number of policy updates (GRPO iterations)
28
  num_candidates: 4 # Candidate prompts per step (GRPO group size, min=2)
29
+ episodes_per_candidate: 8 # Customers each candidate talks to
30
  learning_rate: 2.0e-5 # Lower LR for stability at scale
31
  max_prompt_length: 512 # Max tokens for generated system prompt (hard cap during GRPO)
32
 
 
104
  report:
105
  enabled: true
106
  output_dir: "/workspace/output/reports"
107
+ eval_episodes: 15 # Episodes per checkpoint evaluation
108
  example_customers: 5 # Example conversations in report
109
 
110