Spaces:

Suhasdev
/

Universal-prompt-Optimizer

Running

Suhasdev commited on Dec 12, 2025

Commit

5b2a612

1 Parent(s): b4d947d

Add debug logging to diagnose GEPA candidate generation issue

- Added INFO level logging to make_reflective_dataset() to confirm if GEPA is calling it
- Added logging when evaluate() is called with capture_traces=True (training minibatch)
- Added GEPA version check
- Added logging to show GEPA's captured output
- This will help identify why GEPA isn't generating new candidates

Files changed (3) hide show

app.py +9 -0
src/gepa_optimizer/core/optimizer.py +18 -1
src/gepa_optimizer/core/universal_adapter.py +10 -3

app.py CHANGED Viewed

@@ -351,9 +351,18 @@ def safe_optimize(seed_prompt, dataset, model, custom_model="", max_iterations=5
         # Run optimization
         try:
             logger.info(f"🚀 Starting optimization with model: {final_model}")
             logger.info(f"   Parameters: iterations={max_iterations}, metric_calls={max_metric_calls}, batch={batch_size}, llego={use_llego}")
             logger.info(f"   Dataset size: {len(dataset)} examples")
             result = quick_optimize_sync(
                 seed_prompt=seed_prompt,

         # Run optimization
         try:
+            # Check GEPA version for debugging
+            if BACKEND_AVAILABLE:
+                try:
+                    import gepa
+                    logger.info(f"📦 GEPA library version: {getattr(gepa, '__version__', 'unknown')}")
+                except Exception as e:
+                    logger.warning(f"Could not check GEPA version: {e}")
             logger.info(f"🚀 Starting optimization with model: {final_model}")
             logger.info(f"   Parameters: iterations={max_iterations}, metric_calls={max_metric_calls}, batch={batch_size}, llego={use_llego}")
             logger.info(f"   Dataset size: {len(dataset)} examples")
+            logger.info(f"   🔍 GEPA should call: evaluate(capture_traces=True) → make_reflective_dataset() → propose_new_texts()")
             result = quick_optimize_sync(
                 seed_prompt=seed_prompt,

src/gepa_optimizer/core/optimizer.py CHANGED Viewed

@@ -1008,9 +1008,26 @@ Output the improved prompt directly and only the prompt."""
     def _run_gepa_with_logging(self, gepa_params: Dict[str, Any], output_buffer: io.StringIO) -> Any:
         """Run GEPA optimization while capturing its output."""
         # Capture GEPA's print statements and logging
         with redirect_stdout(output_buffer), redirect_stderr(output_buffer):
-            return gepa.optimize(**gepa_params)
     def _log_pareto_front_info(self, gepa_logs: str) -> int:  # Return int instead of None
         """Extract and log pareto front information from GEPA logs. Returns max iteration count."""

     def _run_gepa_with_logging(self, gepa_params: Dict[str, Any], output_buffer: io.StringIO) -> Any:
         """Run GEPA optimization while capturing its output."""
+        self.logger.info("🔄 Calling gepa.optimize() - GEPA should now:")
+        self.logger.info("   1. Evaluate seed on validation set")
+        self.logger.info("   2. For each iteration: evaluate on training minibatch (capture_traces=True)")
+        self.logger.info("   3. Call make_reflective_dataset() with trajectories")
+        self.logger.info("   4. Call propose_new_texts() or reflection_lm to generate new candidates")
+        self.logger.info("   5. Evaluate new candidates and update Pareto front")
         # Capture GEPA's print statements and logging
         with redirect_stdout(output_buffer), redirect_stderr(output_buffer):
+            result = gepa.optimize(**gepa_params)
+        # Log GEPA output for debugging
+        gepa_output = output_buffer.getvalue()
+        if gepa_output:
+            self.logger.info("📋 GEPA Output (captured):")
+            for line in gepa_output.split('\n')[:50]:  # First 50 lines
+                if line.strip():
+                    self.logger.info(f"   GEPA: {line}")
+        return result
     def _log_pareto_front_info(self, gepa_logs: str) -> int:  # Return int instead of None
         """Extract and log pareto front information from GEPA logs. Returns max iteration count."""

src/gepa_optimizer/core/universal_adapter.py CHANGED Viewed

@@ -241,7 +241,8 @@ Output the improved prompt directly and only the prompt."""
         # We must NEVER use cache in this case, otherwise trajectories=None breaks GEPA!
         if capture_traces:
             dataset_type = 'dfeedback'  # Training minibatch - need fresh evaluation with trajectories
-            self.logger.debug(f"🎯 Forced dataset_type to 'dfeedback' (capture_traces=True)")
         # If _is_baseline_evaluation is True, we KNOW this is the validation set
         elif hasattr(self, '_is_baseline_evaluation') and self._is_baseline_evaluation:
             dataset_type = 'dpareto'  # Baseline is ALWAYS evaluated on validation set
@@ -597,8 +598,14 @@ Output the improved prompt directly and only the prompt."""
         🔥 NEW: If hybrid mode is enabled, this method ALSO generates hybrid candidates
         (GEPA Reflection + LLEGO Operators) and stores them for GEPA to use.
         """
-        # 🔥 REMOVED: Excessive diagnostic logs - moved to DEBUG level if needed
-        self.logger.debug(f"make_reflective_dataset() called - generating feedback and hybrid candidates")
         reflective_dataset = {}
         system_prompt = candidate.get('system_prompt', '')

         # We must NEVER use cache in this case, otherwise trajectories=None breaks GEPA!
         if capture_traces:
             dataset_type = 'dfeedback'  # Training minibatch - need fresh evaluation with trajectories
+            self.logger.info(f"🎯 evaluate() called with capture_traces=True - this is a TRAINING MINIBATCH")
+            self.logger.info(f"   Batch size: {len(batch)}, Will generate trajectories for reflection")
         # If _is_baseline_evaluation is True, we KNOW this is the validation set
         elif hasattr(self, '_is_baseline_evaluation') and self._is_baseline_evaluation:
             dataset_type = 'dpareto'  # Baseline is ALWAYS evaluated on validation set
         🔥 NEW: If hybrid mode is enabled, this method ALSO generates hybrid candidates
         (GEPA Reflection + LLEGO Operators) and stores them for GEPA to use.
         """
+        # 🔥 DEBUG: Log that this method is being called (CRITICAL for debugging)
+        self.logger.info(f"\n{'='*80}")
+        self.logger.info(f"🔥 make_reflective_dataset() CALLED BY GEPA")
+        self.logger.info(f"{'='*80}")
+        self.logger.info(f"   Candidate prompt: {candidate.get('system_prompt', '')[:100]}...")
+        self.logger.info(f"   Eval batch has trajectories: {eval_batch.trajectories is not None and len(eval_batch.trajectories) > 0 if eval_batch.trajectories else False}")
+        self.logger.info(f"   Eval batch scores: {eval_batch.scores if eval_batch.scores else 'None'}")
+        self.logger.info(f"   Components to update: {components_to_update}")
         reflective_dataset = {}
         system_prompt = candidate.get('system_prompt', '')