Spaces:
Sleeping
Sleeping
Fix: GEPA not generating candidates - training minibatch was incorrectly cached
Browse filesROOT CAUSE:
- When GEPA calls adapter.evaluate(capture_traces=True), it needs trajectories
for reflective mutation to generate new candidates
- The adapter was returning cached results with trajectories=None for training
minibatches because len(batch)=4 >= valset_size=3 was True
- GEPA checks 'if not trajectories: skip', so no new candidates were generated
FIX:
1. When capture_traces=True, ALWAYS treat as training minibatch (dfeedback)
- Never use cache when traces are needed
2. Changed valset detection from '>=' to '==' for exact match
3. Added 'and not capture_traces' check before using cache
src/gepa_optimizer/core/universal_adapter.py
CHANGED
|
@@ -236,13 +236,19 @@ Output the improved prompt directly and only the prompt."""
|
|
| 236 |
# Determine dataset type first (needed for cache check)
|
| 237 |
batch_size_threshold = self._config.batch_size if hasattr(self, '_config') and self._config else 8
|
| 238 |
|
| 239 |
-
# 🔥 CRITICAL FIX: If
|
| 240 |
-
#
|
| 241 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 242 |
dataset_type = 'dpareto' # Baseline is ALWAYS evaluated on validation set
|
| 243 |
self.logger.debug(f"🎯 Forced dataset_type to 'dpareto' (baseline evaluation flag is True)")
|
| 244 |
-
elif hasattr(self, '_valset_size') and self._valset_size is not None and len(batch)
|
| 245 |
-
|
|
|
|
| 246 |
elif len(batch) > batch_size_threshold * 1.5:
|
| 247 |
dataset_type = 'dpareto' # Much larger than batch = likely full valset
|
| 248 |
else:
|
|
@@ -250,7 +256,8 @@ Output the improved prompt directly and only the prompt."""
|
|
| 250 |
|
| 251 |
# 🔥 CRITICAL: Check cache to avoid re-evaluating same prompt on Dpareto
|
| 252 |
# This ensures seed prompt is evaluated ONLY ONCE
|
| 253 |
-
|
|
|
|
| 254 |
normalized_prompt = system_prompt.strip().strip('"\'')
|
| 255 |
if normalized_prompt in self._dpareto_evaluated_candidates:
|
| 256 |
existing_score, existing_type, _ = self._dpareto_evaluated_candidates[normalized_prompt]
|
|
|
|
| 236 |
# Determine dataset type first (needed for cache check)
|
| 237 |
batch_size_threshold = self._config.batch_size if hasattr(self, '_config') and self._config else 8
|
| 238 |
|
| 239 |
+
# 🔥 CRITICAL FIX: If capture_traces=True, this is a TRAINING MINIBATCH for reflection
|
| 240 |
+
# GEPA calls with capture_traces=True when it needs trajectories for reflective mutation
|
| 241 |
+
# We must NEVER use cache in this case, otherwise trajectories=None breaks GEPA!
|
| 242 |
+
if capture_traces:
|
| 243 |
+
dataset_type = 'dfeedback' # Training minibatch - need fresh evaluation with trajectories
|
| 244 |
+
self.logger.debug(f"🎯 Forced dataset_type to 'dfeedback' (capture_traces=True)")
|
| 245 |
+
# If _is_baseline_evaluation is True, we KNOW this is the validation set
|
| 246 |
+
elif hasattr(self, '_is_baseline_evaluation') and self._is_baseline_evaluation:
|
| 247 |
dataset_type = 'dpareto' # Baseline is ALWAYS evaluated on validation set
|
| 248 |
self.logger.debug(f"🎯 Forced dataset_type to 'dpareto' (baseline evaluation flag is True)")
|
| 249 |
+
elif hasattr(self, '_valset_size') and self._valset_size is not None and len(batch) == self._valset_size:
|
| 250 |
+
# 🔥 FIX: Use == not >= to avoid misclassifying training minibatches as validation set
|
| 251 |
+
dataset_type = 'dpareto' # EXACT validation set size = Dpareto
|
| 252 |
elif len(batch) > batch_size_threshold * 1.5:
|
| 253 |
dataset_type = 'dpareto' # Much larger than batch = likely full valset
|
| 254 |
else:
|
|
|
|
| 256 |
|
| 257 |
# 🔥 CRITICAL: Check cache to avoid re-evaluating same prompt on Dpareto
|
| 258 |
# This ensures seed prompt is evaluated ONLY ONCE
|
| 259 |
+
# NOTE: Only applies when capture_traces=False (validation set evaluation)
|
| 260 |
+
if dataset_type == 'dpareto' and not capture_traces:
|
| 261 |
normalized_prompt = system_prompt.strip().strip('"\'')
|
| 262 |
if normalized_prompt in self._dpareto_evaluated_candidates:
|
| 263 |
existing_score, existing_type, _ = self._dpareto_evaluated_candidates[normalized_prompt]
|