Spaces:
Running on T4
Running on T4
Claude commited on
Update docstrings to reflect LLM-only training pipeline
Browse filesMockPromptOptimizer and mock mode now clearly document that they
use real LLM inference (Llama 3.1 8B) on both agent and customer
sides — only the prompt selection is "mock" (hand-picked vs GRPO).
https://claude.ai/code/session_01DPirJ78YYN4fJUvUFJ5D6V
- layer1/grpo_trainer.py +4 -3
- layer1/train.py +1 -1
layer1/grpo_trainer.py
CHANGED
|
@@ -275,10 +275,11 @@ class GRPOPromptTrainer:
|
|
| 275 |
|
| 276 |
class MockPromptOptimizer:
|
| 277 |
"""
|
| 278 |
-
|
| 279 |
|
| 280 |
-
|
| 281 |
-
|
|
|
|
| 282 |
"""
|
| 283 |
|
| 284 |
CANDIDATE_PROMPTS = [
|
|
|
|
| 275 |
|
| 276 |
class MockPromptOptimizer:
|
| 277 |
"""
|
| 278 |
+
Evaluates hand-written candidate prompts using real LLM agent + customer.
|
| 279 |
|
| 280 |
+
Tests the pipeline end-to-end with actual Llama 3.1 8B on both sides.
|
| 281 |
+
The prompt selection is "mock" (hand-picked instead of GRPO-generated),
|
| 282 |
+
but evaluation uses real LLM inference to measure actual agent behavior.
|
| 283 |
"""
|
| 284 |
|
| 285 |
CANDIDATE_PROMPTS = [
|
layer1/train.py
CHANGED
|
@@ -5,7 +5,7 @@ Usage:
|
|
| 5 |
# Full GPU training (requires Colab/GPU + train deps)
|
| 6 |
python -m layer1.train --mode train --steps 10
|
| 7 |
|
| 8 |
-
#
|
| 9 |
python -m layer1.train --mode mock --episodes 20
|
| 10 |
|
| 11 |
# Evaluate a single prompt
|
|
|
|
| 5 |
# Full GPU training (requires Colab/GPU + train deps)
|
| 6 |
python -m layer1.train --mode train --steps 10
|
| 7 |
|
| 8 |
+
# Mock optimization (evaluates hand-written prompts via real LLM agent)
|
| 9 |
python -m layer1.train --mode mock --episodes 20
|
| 10 |
|
| 11 |
# Evaluate a single prompt
|