Claude commited on
Commit
b1d7ca2
·
unverified ·
1 Parent(s): d831d96

Reduce training defaults for fast iteration: steps=10, episodes=7

Browse files

- steps: 50→10, episodes: 20→7 for faster training runs
- eval-episodes: 30→5, example-customers: 10→3 for faster reports
- Fix hardcoded num_episodes=50 post-training eval to use --episodes flag

https://claude.ai/code/session_01DPirJ78YYN4fJUvUFJ5D6V

Files changed (1) hide show
  1. layer1/train.py +5 -5
layer1/train.py CHANGED
@@ -117,7 +117,7 @@ def run_train(args):
117
  print(best_prompt)
118
 
119
  # Evaluate the trained prompt
120
- result = evaluator.evaluate_prompt(best_prompt, num_episodes=50)
121
  print(f"\nEvaluation: mean_reward={result['mean_reward']:.1f}")
122
 
123
  if args.report:
@@ -158,8 +158,8 @@ def main():
158
  default="mock",
159
  help="Training mode: train (GPU), mock (CPU), eval (single prompt)",
160
  )
161
- parser.add_argument("--episodes", type=int, default=20, help="Episodes per evaluation")
162
- parser.add_argument("--steps", type=int, default=50, help="GRPO training steps (train mode)")
163
  parser.add_argument("--output", type=str, default=None, help="Save results to JSON")
164
  parser.add_argument("--output-dir", type=str, default="./grpo_output", help="Training output dir")
165
  parser.add_argument("--hf-token", type=str, default=None, help="HuggingFace API token")
@@ -174,9 +174,9 @@ def main():
174
  help="Directory for report output")
175
  parser.add_argument("--log-dir", type=str, default="./logs",
176
  help="Directory for training logs")
177
- parser.add_argument("--eval-episodes", type=int, default=30,
178
  help="Episodes per checkpoint for report evaluation")
179
- parser.add_argument("--example-customers", type=int, default=10,
180
  help="Number of example customers in report")
181
  args = parser.parse_args()
182
 
 
117
  print(best_prompt)
118
 
119
  # Evaluate the trained prompt
120
+ result = evaluator.evaluate_prompt(best_prompt, num_episodes=args.episodes)
121
  print(f"\nEvaluation: mean_reward={result['mean_reward']:.1f}")
122
 
123
  if args.report:
 
158
  default="mock",
159
  help="Training mode: train (GPU), mock (CPU), eval (single prompt)",
160
  )
161
+ parser.add_argument("--episodes", type=int, default=7, help="Episodes per evaluation")
162
+ parser.add_argument("--steps", type=int, default=10, help="GRPO training steps (train mode)")
163
  parser.add_argument("--output", type=str, default=None, help="Save results to JSON")
164
  parser.add_argument("--output-dir", type=str, default="./grpo_output", help="Training output dir")
165
  parser.add_argument("--hf-token", type=str, default=None, help="HuggingFace API token")
 
174
  help="Directory for report output")
175
  parser.add_argument("--log-dir", type=str, default="./logs",
176
  help="Directory for training logs")
177
+ parser.add_argument("--eval-episodes", type=int, default=5,
178
  help="Episodes per checkpoint for report evaluation")
179
+ parser.add_argument("--example-customers", type=int, default=3,
180
  help="Number of example customers in report")
181
  args = parser.parse_args()
182