Suhasdev commited on
Commit
5b2a612
Β·
1 Parent(s): b4d947d

Add debug logging to diagnose GEPA candidate generation issue

Browse files

- Added INFO level logging to make_reflective_dataset() to confirm if GEPA is calling it
- Added logging when evaluate() is called with capture_traces=True (training minibatch)
- Added GEPA version check
- Added logging to show GEPA's captured output
- This will help identify why GEPA isn't generating new candidates

app.py CHANGED
@@ -351,9 +351,18 @@ def safe_optimize(seed_prompt, dataset, model, custom_model="", max_iterations=5
351
 
352
  # Run optimization
353
  try:
 
 
 
 
 
 
 
 
354
  logger.info(f"πŸš€ Starting optimization with model: {final_model}")
355
  logger.info(f" Parameters: iterations={max_iterations}, metric_calls={max_metric_calls}, batch={batch_size}, llego={use_llego}")
356
  logger.info(f" Dataset size: {len(dataset)} examples")
 
357
 
358
  result = quick_optimize_sync(
359
  seed_prompt=seed_prompt,
 
351
 
352
  # Run optimization
353
  try:
354
+ # Check GEPA version for debugging
355
+ if BACKEND_AVAILABLE:
356
+ try:
357
+ import gepa
358
+ logger.info(f"πŸ“¦ GEPA library version: {getattr(gepa, '__version__', 'unknown')}")
359
+ except Exception as e:
360
+ logger.warning(f"Could not check GEPA version: {e}")
361
+
362
  logger.info(f"πŸš€ Starting optimization with model: {final_model}")
363
  logger.info(f" Parameters: iterations={max_iterations}, metric_calls={max_metric_calls}, batch={batch_size}, llego={use_llego}")
364
  logger.info(f" Dataset size: {len(dataset)} examples")
365
+ logger.info(f" πŸ” GEPA should call: evaluate(capture_traces=True) β†’ make_reflective_dataset() β†’ propose_new_texts()")
366
 
367
  result = quick_optimize_sync(
368
  seed_prompt=seed_prompt,
src/gepa_optimizer/core/optimizer.py CHANGED
@@ -1008,9 +1008,26 @@ Output the improved prompt directly and only the prompt."""
1008
 
1009
  def _run_gepa_with_logging(self, gepa_params: Dict[str, Any], output_buffer: io.StringIO) -> Any:
1010
  """Run GEPA optimization while capturing its output."""
 
 
 
 
 
 
 
1011
  # Capture GEPA's print statements and logging
1012
  with redirect_stdout(output_buffer), redirect_stderr(output_buffer):
1013
- return gepa.optimize(**gepa_params)
 
 
 
 
 
 
 
 
 
 
1014
 
1015
  def _log_pareto_front_info(self, gepa_logs: str) -> int: # Return int instead of None
1016
  """Extract and log pareto front information from GEPA logs. Returns max iteration count."""
 
1008
 
1009
  def _run_gepa_with_logging(self, gepa_params: Dict[str, Any], output_buffer: io.StringIO) -> Any:
1010
  """Run GEPA optimization while capturing its output."""
1011
+ self.logger.info("πŸ”„ Calling gepa.optimize() - GEPA should now:")
1012
+ self.logger.info(" 1. Evaluate seed on validation set")
1013
+ self.logger.info(" 2. For each iteration: evaluate on training minibatch (capture_traces=True)")
1014
+ self.logger.info(" 3. Call make_reflective_dataset() with trajectories")
1015
+ self.logger.info(" 4. Call propose_new_texts() or reflection_lm to generate new candidates")
1016
+ self.logger.info(" 5. Evaluate new candidates and update Pareto front")
1017
+
1018
  # Capture GEPA's print statements and logging
1019
  with redirect_stdout(output_buffer), redirect_stderr(output_buffer):
1020
+ result = gepa.optimize(**gepa_params)
1021
+
1022
+ # Log GEPA output for debugging
1023
+ gepa_output = output_buffer.getvalue()
1024
+ if gepa_output:
1025
+ self.logger.info("πŸ“‹ GEPA Output (captured):")
1026
+ for line in gepa_output.split('\n')[:50]: # First 50 lines
1027
+ if line.strip():
1028
+ self.logger.info(f" GEPA: {line}")
1029
+
1030
+ return result
1031
 
1032
  def _log_pareto_front_info(self, gepa_logs: str) -> int: # Return int instead of None
1033
  """Extract and log pareto front information from GEPA logs. Returns max iteration count."""
src/gepa_optimizer/core/universal_adapter.py CHANGED
@@ -241,7 +241,8 @@ Output the improved prompt directly and only the prompt."""
241
  # We must NEVER use cache in this case, otherwise trajectories=None breaks GEPA!
242
  if capture_traces:
243
  dataset_type = 'dfeedback' # Training minibatch - need fresh evaluation with trajectories
244
- self.logger.debug(f"🎯 Forced dataset_type to 'dfeedback' (capture_traces=True)")
 
245
  # If _is_baseline_evaluation is True, we KNOW this is the validation set
246
  elif hasattr(self, '_is_baseline_evaluation') and self._is_baseline_evaluation:
247
  dataset_type = 'dpareto' # Baseline is ALWAYS evaluated on validation set
@@ -597,8 +598,14 @@ Output the improved prompt directly and only the prompt."""
597
  πŸ”₯ NEW: If hybrid mode is enabled, this method ALSO generates hybrid candidates
598
  (GEPA Reflection + LLEGO Operators) and stores them for GEPA to use.
599
  """
600
- # πŸ”₯ REMOVED: Excessive diagnostic logs - moved to DEBUG level if needed
601
- self.logger.debug(f"make_reflective_dataset() called - generating feedback and hybrid candidates")
 
 
 
 
 
 
602
 
603
  reflective_dataset = {}
604
  system_prompt = candidate.get('system_prompt', '')
 
241
  # We must NEVER use cache in this case, otherwise trajectories=None breaks GEPA!
242
  if capture_traces:
243
  dataset_type = 'dfeedback' # Training minibatch - need fresh evaluation with trajectories
244
+ self.logger.info(f"🎯 evaluate() called with capture_traces=True - this is a TRAINING MINIBATCH")
245
+ self.logger.info(f" Batch size: {len(batch)}, Will generate trajectories for reflection")
246
  # If _is_baseline_evaluation is True, we KNOW this is the validation set
247
  elif hasattr(self, '_is_baseline_evaluation') and self._is_baseline_evaluation:
248
  dataset_type = 'dpareto' # Baseline is ALWAYS evaluated on validation set
 
598
  πŸ”₯ NEW: If hybrid mode is enabled, this method ALSO generates hybrid candidates
599
  (GEPA Reflection + LLEGO Operators) and stores them for GEPA to use.
600
  """
601
+ # πŸ”₯ DEBUG: Log that this method is being called (CRITICAL for debugging)
602
+ self.logger.info(f"\n{'='*80}")
603
+ self.logger.info(f"πŸ”₯ make_reflective_dataset() CALLED BY GEPA")
604
+ self.logger.info(f"{'='*80}")
605
+ self.logger.info(f" Candidate prompt: {candidate.get('system_prompt', '')[:100]}...")
606
+ self.logger.info(f" Eval batch has trajectories: {eval_batch.trajectories is not None and len(eval_batch.trajectories) > 0 if eval_batch.trajectories else False}")
607
+ self.logger.info(f" Eval batch scores: {eval_batch.scores if eval_batch.scores else 'None'}")
608
+ self.logger.info(f" Components to update: {components_to_update}")
609
 
610
  reflective_dataset = {}
611
  system_prompt = candidate.get('system_prompt', '')