Spaces:
Running on T4
Running on T4
Claude commited on
Increase training scale: more steps, episodes, and SFT epochs
Browse files- num_training_steps: 15 → 30
- episodes_per_candidate: 5 → 8
- sft_epochs: 2 → 3
- eval_episodes: 10 → 15
https://claude.ai/code/session_01DPirJ78YYN4fJUvUFJ5D6V
- config.yaml +4 -4
config.yaml
CHANGED
|
@@ -20,13 +20,13 @@ grpo:
|
|
| 20 |
|
| 21 |
# SFT warm start — prime the model on seed prompts before GRPO
|
| 22 |
sft_warm_start: true # Enable SFT warm start phase
|
| 23 |
-
sft_epochs:
|
| 24 |
sft_lr: 1.0e-4 # Learning rate for SFT phase
|
| 25 |
|
| 26 |
# GRPO training loop
|
| 27 |
-
num_training_steps:
|
| 28 |
num_candidates: 4 # Candidate prompts per step (GRPO group size, min=2)
|
| 29 |
-
episodes_per_candidate:
|
| 30 |
learning_rate: 2.0e-5 # Lower LR for stability at scale
|
| 31 |
max_prompt_length: 512 # Max tokens for generated system prompt (hard cap during GRPO)
|
| 32 |
|
|
@@ -104,7 +104,7 @@ reward:
|
|
| 104 |
report:
|
| 105 |
enabled: true
|
| 106 |
output_dir: "/workspace/output/reports"
|
| 107 |
-
eval_episodes:
|
| 108 |
example_customers: 5 # Example conversations in report
|
| 109 |
|
| 110 |
|
|
|
|
| 20 |
|
| 21 |
# SFT warm start — prime the model on seed prompts before GRPO
|
| 22 |
sft_warm_start: true # Enable SFT warm start phase
|
| 23 |
+
sft_epochs: 3 # Epochs over seed prompts
|
| 24 |
sft_lr: 1.0e-4 # Learning rate for SFT phase
|
| 25 |
|
| 26 |
# GRPO training loop
|
| 27 |
+
num_training_steps: 30 # Number of policy updates (GRPO iterations)
|
| 28 |
num_candidates: 4 # Candidate prompts per step (GRPO group size, min=2)
|
| 29 |
+
episodes_per_candidate: 8 # Customers each candidate talks to
|
| 30 |
learning_rate: 2.0e-5 # Lower LR for stability at scale
|
| 31 |
max_prompt_length: 512 # Max tokens for generated system prompt (hard cap during GRPO)
|
| 32 |
|
|
|
|
| 104 |
report:
|
| 105 |
enabled: true
|
| 106 |
output_dir: "/workspace/output/reports"
|
| 107 |
+
eval_episodes: 15 # Episodes per checkpoint evaluation
|
| 108 |
example_customers: 5 # Example conversations in report
|
| 109 |
|
| 110 |
|