Spaces:
Running on T4
Running on T4
Claude commited on
Reduce training defaults for fast iteration: steps=10, episodes=7
Browse files- steps: 50→10, episodes: 20→7 for faster training runs
- eval-episodes: 30→5, example-customers: 10→3 for faster reports
- Fix hardcoded num_episodes=50 post-training eval to use --episodes flag
https://claude.ai/code/session_01DPirJ78YYN4fJUvUFJ5D6V
- layer1/train.py +5 -5
layer1/train.py
CHANGED
|
@@ -117,7 +117,7 @@ def run_train(args):
|
|
| 117 |
print(best_prompt)
|
| 118 |
|
| 119 |
# Evaluate the trained prompt
|
| 120 |
-
result = evaluator.evaluate_prompt(best_prompt, num_episodes=
|
| 121 |
print(f"\nEvaluation: mean_reward={result['mean_reward']:.1f}")
|
| 122 |
|
| 123 |
if args.report:
|
|
@@ -158,8 +158,8 @@ def main():
|
|
| 158 |
default="mock",
|
| 159 |
help="Training mode: train (GPU), mock (CPU), eval (single prompt)",
|
| 160 |
)
|
| 161 |
-
parser.add_argument("--episodes", type=int, default=
|
| 162 |
-
parser.add_argument("--steps", type=int, default=
|
| 163 |
parser.add_argument("--output", type=str, default=None, help="Save results to JSON")
|
| 164 |
parser.add_argument("--output-dir", type=str, default="./grpo_output", help="Training output dir")
|
| 165 |
parser.add_argument("--hf-token", type=str, default=None, help="HuggingFace API token")
|
|
@@ -174,9 +174,9 @@ def main():
|
|
| 174 |
help="Directory for report output")
|
| 175 |
parser.add_argument("--log-dir", type=str, default="./logs",
|
| 176 |
help="Directory for training logs")
|
| 177 |
-
parser.add_argument("--eval-episodes", type=int, default=
|
| 178 |
help="Episodes per checkpoint for report evaluation")
|
| 179 |
-
parser.add_argument("--example-customers", type=int, default=
|
| 180 |
help="Number of example customers in report")
|
| 181 |
args = parser.parse_args()
|
| 182 |
|
|
|
|
| 117 |
print(best_prompt)
|
| 118 |
|
| 119 |
# Evaluate the trained prompt
|
| 120 |
+
result = evaluator.evaluate_prompt(best_prompt, num_episodes=args.episodes)
|
| 121 |
print(f"\nEvaluation: mean_reward={result['mean_reward']:.1f}")
|
| 122 |
|
| 123 |
if args.report:
|
|
|
|
| 158 |
default="mock",
|
| 159 |
help="Training mode: train (GPU), mock (CPU), eval (single prompt)",
|
| 160 |
)
|
| 161 |
+
parser.add_argument("--episodes", type=int, default=7, help="Episodes per evaluation")
|
| 162 |
+
parser.add_argument("--steps", type=int, default=10, help="GRPO training steps (train mode)")
|
| 163 |
parser.add_argument("--output", type=str, default=None, help="Save results to JSON")
|
| 164 |
parser.add_argument("--output-dir", type=str, default="./grpo_output", help="Training output dir")
|
| 165 |
parser.add_argument("--hf-token", type=str, default=None, help="HuggingFace API token")
|
|
|
|
| 174 |
help="Directory for report output")
|
| 175 |
parser.add_argument("--log-dir", type=str, default="./logs",
|
| 176 |
help="Directory for training logs")
|
| 177 |
+
parser.add_argument("--eval-episodes", type=int, default=5,
|
| 178 |
help="Episodes per checkpoint for report evaluation")
|
| 179 |
+
parser.add_argument("--example-customers", type=int, default=3,
|
| 180 |
help="Number of example customers in report")
|
| 181 |
args = parser.parse_args()
|
| 182 |
|