Spaces:
Sleeping
Sleeping
Add debug logging to diagnose GEPA candidate generation issue
Browse files- Added INFO level logging to make_reflective_dataset() to confirm if GEPA is calling it
- Added logging when evaluate() is called with capture_traces=True (training minibatch)
- Added GEPA version check
- Added logging to show GEPA's captured output
- This will help identify why GEPA isn't generating new candidates
- app.py +9 -0
- src/gepa_optimizer/core/optimizer.py +18 -1
- src/gepa_optimizer/core/universal_adapter.py +10 -3
app.py
CHANGED
|
@@ -351,9 +351,18 @@ def safe_optimize(seed_prompt, dataset, model, custom_model="", max_iterations=5
|
|
| 351 |
|
| 352 |
# Run optimization
|
| 353 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 354 |
logger.info(f"π Starting optimization with model: {final_model}")
|
| 355 |
logger.info(f" Parameters: iterations={max_iterations}, metric_calls={max_metric_calls}, batch={batch_size}, llego={use_llego}")
|
| 356 |
logger.info(f" Dataset size: {len(dataset)} examples")
|
|
|
|
| 357 |
|
| 358 |
result = quick_optimize_sync(
|
| 359 |
seed_prompt=seed_prompt,
|
|
|
|
| 351 |
|
| 352 |
# Run optimization
|
| 353 |
try:
|
| 354 |
+
# Check GEPA version for debugging
|
| 355 |
+
if BACKEND_AVAILABLE:
|
| 356 |
+
try:
|
| 357 |
+
import gepa
|
| 358 |
+
logger.info(f"π¦ GEPA library version: {getattr(gepa, '__version__', 'unknown')}")
|
| 359 |
+
except Exception as e:
|
| 360 |
+
logger.warning(f"Could not check GEPA version: {e}")
|
| 361 |
+
|
| 362 |
logger.info(f"π Starting optimization with model: {final_model}")
|
| 363 |
logger.info(f" Parameters: iterations={max_iterations}, metric_calls={max_metric_calls}, batch={batch_size}, llego={use_llego}")
|
| 364 |
logger.info(f" Dataset size: {len(dataset)} examples")
|
| 365 |
+
logger.info(f" π GEPA should call: evaluate(capture_traces=True) β make_reflective_dataset() β propose_new_texts()")
|
| 366 |
|
| 367 |
result = quick_optimize_sync(
|
| 368 |
seed_prompt=seed_prompt,
|
src/gepa_optimizer/core/optimizer.py
CHANGED
|
@@ -1008,9 +1008,26 @@ Output the improved prompt directly and only the prompt."""
|
|
| 1008 |
|
| 1009 |
def _run_gepa_with_logging(self, gepa_params: Dict[str, Any], output_buffer: io.StringIO) -> Any:
|
| 1010 |
"""Run GEPA optimization while capturing its output."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1011 |
# Capture GEPA's print statements and logging
|
| 1012 |
with redirect_stdout(output_buffer), redirect_stderr(output_buffer):
|
| 1013 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1014 |
|
| 1015 |
def _log_pareto_front_info(self, gepa_logs: str) -> int: # Return int instead of None
|
| 1016 |
"""Extract and log pareto front information from GEPA logs. Returns max iteration count."""
|
|
|
|
| 1008 |
|
| 1009 |
def _run_gepa_with_logging(self, gepa_params: Dict[str, Any], output_buffer: io.StringIO) -> Any:
|
| 1010 |
"""Run GEPA optimization while capturing its output."""
|
| 1011 |
+
self.logger.info("π Calling gepa.optimize() - GEPA should now:")
|
| 1012 |
+
self.logger.info(" 1. Evaluate seed on validation set")
|
| 1013 |
+
self.logger.info(" 2. For each iteration: evaluate on training minibatch (capture_traces=True)")
|
| 1014 |
+
self.logger.info(" 3. Call make_reflective_dataset() with trajectories")
|
| 1015 |
+
self.logger.info(" 4. Call propose_new_texts() or reflection_lm to generate new candidates")
|
| 1016 |
+
self.logger.info(" 5. Evaluate new candidates and update Pareto front")
|
| 1017 |
+
|
| 1018 |
# Capture GEPA's print statements and logging
|
| 1019 |
with redirect_stdout(output_buffer), redirect_stderr(output_buffer):
|
| 1020 |
+
result = gepa.optimize(**gepa_params)
|
| 1021 |
+
|
| 1022 |
+
# Log GEPA output for debugging
|
| 1023 |
+
gepa_output = output_buffer.getvalue()
|
| 1024 |
+
if gepa_output:
|
| 1025 |
+
self.logger.info("π GEPA Output (captured):")
|
| 1026 |
+
for line in gepa_output.split('\n')[:50]: # First 50 lines
|
| 1027 |
+
if line.strip():
|
| 1028 |
+
self.logger.info(f" GEPA: {line}")
|
| 1029 |
+
|
| 1030 |
+
return result
|
| 1031 |
|
| 1032 |
def _log_pareto_front_info(self, gepa_logs: str) -> int: # Return int instead of None
|
| 1033 |
"""Extract and log pareto front information from GEPA logs. Returns max iteration count."""
|
src/gepa_optimizer/core/universal_adapter.py
CHANGED
|
@@ -241,7 +241,8 @@ Output the improved prompt directly and only the prompt."""
|
|
| 241 |
# We must NEVER use cache in this case, otherwise trajectories=None breaks GEPA!
|
| 242 |
if capture_traces:
|
| 243 |
dataset_type = 'dfeedback' # Training minibatch - need fresh evaluation with trajectories
|
| 244 |
-
self.logger.
|
|
|
|
| 245 |
# If _is_baseline_evaluation is True, we KNOW this is the validation set
|
| 246 |
elif hasattr(self, '_is_baseline_evaluation') and self._is_baseline_evaluation:
|
| 247 |
dataset_type = 'dpareto' # Baseline is ALWAYS evaluated on validation set
|
|
@@ -597,8 +598,14 @@ Output the improved prompt directly and only the prompt."""
|
|
| 597 |
π₯ NEW: If hybrid mode is enabled, this method ALSO generates hybrid candidates
|
| 598 |
(GEPA Reflection + LLEGO Operators) and stores them for GEPA to use.
|
| 599 |
"""
|
| 600 |
-
# π₯
|
| 601 |
-
self.logger.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 602 |
|
| 603 |
reflective_dataset = {}
|
| 604 |
system_prompt = candidate.get('system_prompt', '')
|
|
|
|
| 241 |
# We must NEVER use cache in this case, otherwise trajectories=None breaks GEPA!
|
| 242 |
if capture_traces:
|
| 243 |
dataset_type = 'dfeedback' # Training minibatch - need fresh evaluation with trajectories
|
| 244 |
+
self.logger.info(f"π― evaluate() called with capture_traces=True - this is a TRAINING MINIBATCH")
|
| 245 |
+
self.logger.info(f" Batch size: {len(batch)}, Will generate trajectories for reflection")
|
| 246 |
# If _is_baseline_evaluation is True, we KNOW this is the validation set
|
| 247 |
elif hasattr(self, '_is_baseline_evaluation') and self._is_baseline_evaluation:
|
| 248 |
dataset_type = 'dpareto' # Baseline is ALWAYS evaluated on validation set
|
|
|
|
| 598 |
π₯ NEW: If hybrid mode is enabled, this method ALSO generates hybrid candidates
|
| 599 |
(GEPA Reflection + LLEGO Operators) and stores them for GEPA to use.
|
| 600 |
"""
|
| 601 |
+
# π₯ DEBUG: Log that this method is being called (CRITICAL for debugging)
|
| 602 |
+
self.logger.info(f"\n{'='*80}")
|
| 603 |
+
self.logger.info(f"π₯ make_reflective_dataset() CALLED BY GEPA")
|
| 604 |
+
self.logger.info(f"{'='*80}")
|
| 605 |
+
self.logger.info(f" Candidate prompt: {candidate.get('system_prompt', '')[:100]}...")
|
| 606 |
+
self.logger.info(f" Eval batch has trajectories: {eval_batch.trajectories is not None and len(eval_batch.trajectories) > 0 if eval_batch.trajectories else False}")
|
| 607 |
+
self.logger.info(f" Eval batch scores: {eval_batch.scores if eval_batch.scores else 'None'}")
|
| 608 |
+
self.logger.info(f" Components to update: {components_to_update}")
|
| 609 |
|
| 610 |
reflective_dataset = {}
|
| 611 |
system_prompt = candidate.get('system_prompt', '')
|