Spaces:

Suhasdev
/

Universal-prompt-Optimizer

Sleeping

App Files Files Community

Universal-prompt-Optimizer / src /gepa_optimizer /utils /pareto_logger.py

Suhasdev

Fix duplicate seed prompts in Pareto front - add deduplication checks

74d4bea 3 months ago

raw

history blame contribute delete

24.8 kB

	"""
	Pareto Front Logger - Tracks candidate comparisons and Pareto front updates
	"""

	from typing import Dict, List, Optional
	from collections import defaultdict
	import logging

	logger = logging.getLogger(__name__)

	class ParetoLogger:
	"""Tracks evaluations and Pareto front updates"""

	def __init__(self):
	self.candidates_evaluated = [] # List of (prompt, score, type, dataset)
	self.pareto_front = [] # Current Pareto front (prompt, score, type)
	self.baseline_score = None

	def log_candidate_evaluation(self, prompt: str, score: float, candidate_type: str, dataset_type: str):
	"""Log a candidate evaluation"""
	self.candidates_evaluated.append({
	'prompt': prompt,
	'score': score,
	'type': candidate_type,
	'dataset': dataset_type
	})

	# If evaluated on Dpareto, check against Pareto front
	if dataset_type == 'dpareto':
	self._check_pareto_update(prompt, score, candidate_type)

	def _check_pareto_update(self, prompt: str, score: float, candidate_type: str):
	"""Check if candidate should be added to Pareto front

	🔥 CRITICAL RULE: Candidate must be better than baseline (f(S₀)) to enter Pareto front
	Exception: Seed prompt (S₀) itself is always added as baseline
	"""
	# Get notation for candidate with better mapping
	if candidate_type == 'gepa_reflection':
	cand_notation = 'Sᵣ'
	elif candidate_type == 'llego_crossover' or candidate_type == 'llego_crossover1' or candidate_type == 'llego_crossover2':
	cand_notation = 'Oₓₒ'
	elif candidate_type == 'llego_mutation' or candidate_type == 'llego_mutation1' or candidate_type == 'llego_mutation2':
	cand_notation = 'Oₘᵤₜ'
	elif candidate_type == 'seed':
	cand_notation = 'S₀'
	elif candidate_type == 'unknown' or not candidate_type:
	cand_notation = 'S' # Default for unknown
	else:
	# For any other type, use base notation
	cand_notation = 'S'

	logger.info("\n" + "═" * 80)
	logger.info(f"📊 PARETO FRONT P ANALYSIS - Evaluating {cand_notation}")
	logger.info("═" * 80)

	logger.info(f"\n 📊 Evaluating: {cand_notation} with f({cand_notation}) = {score:.4f}")

	# 🔥 CRITICAL BASELINE CHECK: Candidate must be better than baseline (unless it's the seed itself)
	# Rule: Only candidates with f(candidate) > f(S₀) can enter Pareto front
	# Exception: Seed prompt (S₀) itself is always added as the baseline
	if candidate_type == 'seed':
	# 🔥 FIX: Check if seed prompt is already in Pareto front to prevent duplicates
	normalized_prompt = prompt.strip().strip('"\'')
	for existing_cand in self.pareto_front:
	existing_prompt = existing_cand.get('prompt', '').strip().strip('"\'')
	if existing_prompt == normalized_prompt and existing_cand.get('type') == 'seed':
	logger.info(f"\n ⚠️ {cand_notation} is already in Pareto Front P (duplicate detected)")
	logger.info(f" Skipping duplicate seed prompt addition")
	front_notations = [c.get('notation', 'S') for c in self.pareto_front]
	logger.info(f" P = {{{', '.join(front_notations)}}}")
	return # Skip adding duplicate

	logger.info(f"\n ✅ {cand_notation} is seed prompt - always added as baseline")

	# Set baseline if not already set (safety check - adapter should have done this)
	if self.baseline_score is None:
	self.baseline_score = score
	logger.info(f" 💡 Setting baseline: f(S₀) = {score:.4f}")

	# Add seed to Pareto front immediately (no dominance check needed)
	self.pareto_front.append({
	'prompt': prompt,
	'score': score,
	'type': candidate_type,
	'notation': cand_notation
	})
	self.pareto_front.sort(key=lambda x: x['score'], reverse=True)

	# Display Pareto front with seed
	front_notations = [c.get('notation', 'S') for c in self.pareto_front]
	logger.info(f"\n ✅ ADDED to Pareto Front P (baseline)")
	logger.info(f" P = {{{', '.join(front_notations)}}}")
	self._display_pareto_front()

	return # Seed is always added - skip dominance check
	else:
	# For non-seed candidates, must be better than baseline to proceed
	if self.baseline_score is not None:
	if score > self.baseline_score:
	logger.info(f"\n ✅ {cand_notation} meets baseline requirement:")
	logger.info(f" f(S₀) = {self.baseline_score:.4f} (baseline)")
	logger.info(f" f({cand_notation}) = {score:.4f}")
	logger.info(f" f({cand_notation}) > f(S₀) → Can be added to Pareto front")
	logger.info(f" Improvement over baseline: +{score - self.baseline_score:.4f}")
	else:
	logger.info(f"\n ❌ {cand_notation} does NOT meet baseline requirement:")
	logger.info(f" f(S₀) = {self.baseline_score:.4f} (baseline)")
	logger.info(f" f({cand_notation}) = {score:.4f}")
	logger.info(f" f({cand_notation}) ≤ f(S₀) → NOT ADDED to Pareto front")
	logger.info(f" 💡 Only candidates better than baseline can enter Pareto front")
	logger.info(f" 💡 Difference: {score - self.baseline_score:.4f} (needs to be > 0)")
	return # Skip Pareto front update - candidate is not better than baseline
	else:
	# CRITICAL: Baseline must be set before evaluating any non-seed candidates
	logger.error(f"\n ❌ CRITICAL ERROR: Baseline score not set!")
	logger.error(f" Cannot evaluate {cand_notation} without baseline f(S₀)")
	logger.error(f" 💡 Seed prompt must be evaluated on Dpareto first")
	logger.error(f" 💡 Rejecting candidate to maintain correctness")
	# Debug logging removed - not needed in production
	return # Reject candidate - baseline is required

	# Check if this candidate dominates any in current front
	dominated = []
	for i, front_candidate in enumerate(self.pareto_front):
	front_score = front_candidate['score']
	front_notation = front_candidate.get('notation', 'S')

	# Simple dominance: higher score dominates
	if score > front_score:
	dominated.append(i)
	logger.info(f"\n ✅ {cand_notation} DOMINATES P{i+1}:")
	logger.info(f" f(P{i+1}) = {front_score:.4f}")
	logger.info(f" f({cand_notation}) = {score:.4f}")
	logger.info(f" f({cand_notation}) > f({front_notation}) → DOMINANCE")
	logger.info(f" Improvement: +{score - front_score:.4f}")

	if dominated:
	# Remove dominated candidates
	for i in reversed(dominated):
	removed = self.pareto_front.pop(i)
	removed_notation = removed.get('notation', 'S')
	logger.info(f" ➡️ Removing {removed_notation} from Pareto front P (dominated by {cand_notation})")

	# Add new candidate
	self.pareto_front.append({
	'prompt': prompt,
	'score': score,
	'type': candidate_type,
	'notation': cand_notation
	})

	# Sort by score
	self.pareto_front.sort(key=lambda x: x['score'], reverse=True)

	# Display Pareto front with candidate notations
	front_notations = [c.get('notation', 'S') for c in self.pareto_front]
	logger.info(f"\n ✅ ADDED to Pareto Front P")
	logger.info(f" P = {{{', '.join(front_notations)}}}")
	else:
	# Check if any in front dominates this candidate
	is_dominated = False
	for i, front_candidate in enumerate(self.pareto_front):
	if front_candidate['score'] > score:
	front_notation = front_candidate.get('notation', 'S')
	logger.info(f"\n ❌ {cand_notation} is DOMINATED by {front_notation}:")
	logger.info(f" f({front_notation}) = {front_candidate['score']:.4f}")
	logger.info(f" f({cand_notation}) = {score:.4f}")
	logger.info(f" f({front_notation}) > f({cand_notation}) → DOMINATED")
	logger.info(f" Difference: {score - front_candidate['score']:.4f}")
	is_dominated = True
	break

	if not is_dominated:
	# Check for equal scores (for single-objective, we can add if non-dominated)
	equal_candidates = [c.get('notation', 'S') for c in self.pareto_front if abs(c['score'] - score) < 1e-6]

	# Non-dominated: add to front
	self.pareto_front.append({
	'prompt': prompt,
	'score': score,
	'type': candidate_type,
	'notation': cand_notation
	})
	self.pareto_front.sort(key=lambda x: x['score'], reverse=True)

	# Display Pareto front with candidate notations
	front_notations = [c.get('notation', 'S') for c in self.pareto_front]
	if equal_candidates:
	logger.info(f"\n ✅ ADDED to Pareto Front P (non-dominated)")
	logger.info(f" f({cand_notation}) = {score:.4f} (same score as {', '.join(equal_candidates)})")
	logger.info(f" P = {{{', '.join(front_notations)}}}")
	else:
	logger.info(f"\n ✅ ADDED to Pareto Front P (non-dominated)")
	logger.info(f" {cand_notation} is non-dominated → kept in P")
	logger.info(f" P = {{{', '.join(front_notations)}}}")
	else:
	# Show all dominating candidates with their notations
	dominating_list = [(c.get('notation', 'S'), c['score']) for c in self.pareto_front if c['score'] > score]
	if dominating_list:
	for dom_notation, dom_score in dominating_list:
	logger.info(f"\n ❌ {cand_notation} is DOMINATED by {dom_notation}:")
	logger.info(f" f({dom_notation}) = {dom_score:.4f}")
	logger.info(f" f({cand_notation}) = {score:.4f}")
	logger.info(f" f({dom_notation}) > f({cand_notation}) → DOMINATED")
	logger.info(f"\n ❌ NOT ADDED to Pareto Front P (dominated)")

	self._display_pareto_front()

	def _display_pareto_front(self):
	"""Display current Pareto front with candidate notation"""
	logger.info(f"\n📋 CURRENT PARETO FRONT P (Size: \|P\| = {len(self.pareto_front)}):")
	logger.info("─" * 80)

	if not self.pareto_front:
	logger.info(" P = {} (Empty - no candidates added yet)")
	logger.info(" 💡 NOTATION: P = Pareto front (non-dominated solutions)")
	return

	# Display Pareto front using candidate notations instead of P1, P2, etc.
	front_notations = [c.get('notation', 'S') for c in self.pareto_front]
	logger.info(f" P = {{{', '.join(front_notations)}}}")

	for candidate in self.pareto_front:
	notation = candidate.get('notation', 'S')

	# Enhanced type labels with full notation
	type_labels = {
	'seed': ('🌱 Seed Prompt', 'S₀'),
	'gepa_reflection': ('📝 GEPA Reflection Candidate', 'Sᵣ'),
	'llego_crossover': ('🔀 LLEGO Crossover Offspring', 'Oₓₒ'),
	'llego_mutation': ('🎲 LLEGO Mutation Offspring', 'Oₘᵤₜ'),
	'unknown': ('🔄 Unknown Candidate', 'S')
	}

	cand_type = candidate.get('type', 'unknown')
	type_label, type_notation = type_labels.get(cand_type, (f'🔄 {cand_type}', notation))

	# Use the notation from the candidate if available, otherwise use type notation
	display_notation = notation if notation != 'S' else type_notation

	logger.info(f"\n {display_notation}: {type_label}")
	logger.info(f" f({display_notation}) = {candidate['score']:.4f}")
	prompt_preview = candidate['prompt'][:150] if len(candidate['prompt']) > 150 else candidate['prompt']
	logger.info(f" Prompt ({len(candidate['prompt'])} chars): {prompt_preview}{'...' if len(candidate['prompt']) > 150 else ''}")

	logger.info(f"\n 💡 NOTATION EXPLANATION:")
	logger.info(f" P = Pareto front (set of non-dominated solutions)")
	logger.info(f" S₀ = Seed prompt (baseline)")
	logger.info(f" Sᵣ = GEPA Reflection candidate")
	logger.info(f" Oₓₒ = LLEGO Crossover offspring (combines parents)")
	logger.info(f" Oₘᵤₜ = LLEGO Mutation offspring (explores variations)")
	logger.info(f" f({', '.join(front_notations[:3])}) = Fitness scores of candidates in Pareto front")
	logger.info("─" * 80)

	def set_baseline(self, score: float):
	"""Set baseline score for comparison"""
	self.baseline_score = score
	# Add seed to Pareto front if we have it
	if self.pareto_front:
	seed_candidate = self.pareto_front[0] # First is usually seed
	seed_candidate['baseline_score'] = score

	def batch_update_pareto_front(self, candidates_with_scores: List[Dict]) -> List[Dict]:
	"""
	🔥 BATCH PARETO FRONT UPDATE

	Efficiently update Pareto front with multiple candidates in one operation.

	Steps:
	1. Filter by baseline (score > baseline_score)
	2. Find non-dominated among filtered candidates
	3. Compare with current Pareto front
	4. Update Pareto front (remove dominated, add non-dominated)

	Args:
	candidates_with_scores: List of dicts with keys:
	- 'prompt': str
	- 'score': float
	- 'type': str (candidate_type)
	- 'notation': str (optional, will be generated if missing)

	Returns:
	List of candidates that were added to Pareto front
	"""
	if not candidates_with_scores:
	return []

	logger.info("\n" + "═" * 80)
	logger.info(f"🔥 BATCH PARETO FRONT UPDATE - Processing {len(candidates_with_scores)} candidates")
	logger.info("═" * 80)

	# Step 0: Deduplicate input candidates by prompt text
	seen_prompts = set()
	deduplicated_candidates = []
	for cand in candidates_with_scores:
	normalized_prompt = cand.get('prompt', '').strip().strip('"\'')
	if normalized_prompt not in seen_prompts:
	seen_prompts.add(normalized_prompt)
	deduplicated_candidates.append(cand)
	else:
	logger.info(f" ⚠️ Skipping duplicate candidate: {cand.get('notation', 'S')} (prompt already in batch)")

	if len(deduplicated_candidates) < len(candidates_with_scores):
	logger.info(f" 📊 Deduplicated: {len(candidates_with_scores)} → {len(deduplicated_candidates)} candidates")

	candidates_with_scores = deduplicated_candidates

	# Step 1: Filter by baseline (score > baseline_score)
	if self.baseline_score is None:
	logger.error("❌ Baseline score not set - cannot perform batch update")
	logger.error(" 💡 Seed prompt must be evaluated on Dpareto first")
	return []

	baseline = self.baseline_score
	filtered = []

	for cand in candidates_with_scores:
	score = cand.get('score', 0.0)
	cand_type = cand.get('type', 'unknown')

	# Seed is always included (it's the baseline)
	if cand_type == 'seed':
	# 🔥 FIX: Check if seed is already in Pareto front
	normalized_prompt = cand.get('prompt', '').strip().strip('"\'')
	already_in_front = False
	for existing_cand in self.pareto_front:
	existing_prompt = existing_cand.get('prompt', '').strip().strip('"\'')
	if existing_prompt == normalized_prompt and existing_cand.get('type') == 'seed':
	already_in_front = True
	logger.info(f" ⚠️ Seed prompt already in Pareto front - skipping duplicate")
	break

	if not already_in_front:
	filtered.append(cand)
	continue

	# Non-seed candidates must be better than baseline
	if score > baseline:
	filtered.append(cand)
	logger.info(f" ✅ {cand.get('notation', 'S')} passes baseline: f={score:.4f} > f(S₀)={baseline:.4f}")
	else:
	notation = cand.get('notation', 'S')
	logger.info(f" ❌ {notation} fails baseline: f={score:.4f} ≤ f(S₀)={baseline:.4f}")

	if not filtered:
	logger.info(f"\n ❌ No candidates pass baseline filter (baseline: {baseline:.4f})")
	logger.info(" 💡 All candidates are worse than or equal to seed prompt")
	return []

	logger.info(f"\n 📊 After baseline filter: {len(filtered)}/{len(candidates_with_scores)} candidates remain")

	# Step 2: Find non-dominated among filtered candidates
	# Sort by score (descending) for easier dominance checking
	filtered_sorted = sorted(filtered, key=lambda x: x.get('score', 0.0), reverse=True)
	non_dominated_batch = []

	for i, cand in enumerate(filtered_sorted):
	cand_score = cand.get('score', 0.0)
	cand_notation = cand.get('notation', 'S')
	is_dominated = False

	# Check if dominated by any other candidate in batch
	for other in filtered_sorted[:i]: # Only check candidates with higher scores
	other_score = other.get('score', 0.0)
	if other_score > cand_score:
	other_notation = other.get('notation', 'S')
	logger.info(f" ❌ {cand_notation} dominated by {other_notation} in batch: f({other_notation})={other_score:.4f} > f({cand_notation})={cand_score:.4f}")
	is_dominated = True
	break

	if not is_dominated:
	non_dominated_batch.append(cand)
	logger.info(f" ✅ {cand_notation} is non-dominated in batch: f={cand_score:.4f}")

	logger.info(f"\n 📊 After batch dominance check: {len(non_dominated_batch)}/{len(filtered)} non-dominated candidates")

	if not non_dominated_batch:
	logger.info(" ❌ No non-dominated candidates in batch")
	return []

	# Step 3: Compare with current Pareto front and update
	added_to_front = []
	candidates_to_remove = []

	# First, check which current front candidates are dominated by new batch
	for front_cand in self.pareto_front:
	front_score = front_cand.get('score', 0.0)
	front_notation = front_cand.get('notation', 'S')

	# Check if any new candidate dominates this front candidate
	for new_cand in non_dominated_batch:
	new_score = new_cand.get('score', 0.0)
	new_notation = new_cand.get('notation', 'S')

	if new_score > front_score:
	candidates_to_remove.append(front_cand)
	logger.info(f" ➡️ {front_notation} will be removed (dominated by {new_notation}): f({front_notation})={front_score:.4f} < f({new_notation})={new_score:.4f}")
	break

	# Remove dominated candidates from front
	for cand_to_remove in candidates_to_remove:
	if cand_to_remove in self.pareto_front:
	self.pareto_front.remove(cand_to_remove)

	# Now add non-dominated new candidates (check they're not dominated by remaining front)
	for new_cand in non_dominated_batch:
	new_score = new_cand.get('score', 0.0)
	new_notation = new_cand.get('notation', 'S')
	new_type = new_cand.get('type', 'unknown')
	new_prompt = new_cand.get('prompt', '')

	# Check if dominated by any remaining front candidate
	is_dominated_by_front = False
	for front_cand in self.pareto_front:
	front_score = front_cand.get('score', 0.0)
	if front_score > new_score:
	front_notation = front_cand.get('notation', 'S')
	logger.info(f" ❌ {new_notation} dominated by existing {front_notation}: f({front_notation})={front_score:.4f} > f({new_notation})={new_score:.4f}")
	is_dominated_by_front = True
	break

	if not is_dominated_by_front:
	# Generate notation if missing
	if 'notation' not in new_cand:
	if new_type == 'gepa_reflection':
	new_notation = 'Sᵣ'
	elif new_type.startswith('llego_crossover'):
	new_notation = 'Oₓₒ'
	elif new_type.startswith('llego_mutation'):
	new_notation = 'Oₘᵤₜ'
	elif new_type == 'seed':
	new_notation = 'S₀'
	else:
	new_notation = 'S'

	# Add to Pareto front
	front_entry = {
	'prompt': new_prompt,
	'score': new_score,
	'type': new_type,
	'notation': new_notation
	}
	self.pareto_front.append(front_entry)
	added_to_front.append(new_cand)

	# Also log to candidates_evaluated for tracking
	self.candidates_evaluated.append({
	'prompt': new_prompt,
	'score': new_score,
	'type': new_type,
	'dataset': 'dpareto'
	})

	logger.info(f" ✅ {new_notation} ADDED to Pareto front: f={new_score:.4f}")

	# Sort Pareto front by score
	self.pareto_front.sort(key=lambda x: x.get('score', 0.0), reverse=True)

	# Display updated Pareto front
	logger.info(f"\n{'═'*80}")
	logger.info(f"✅ BATCH UPDATE COMPLETE")
	logger.info(f" Added: {len(added_to_front)} candidates")
	logger.info(f" Removed: {len(candidates_to_remove)} dominated candidates")
	logger.info(f" Pareto front size: \|P\| = {len(self.pareto_front)}")

	front_notations = [c.get('notation', 'S') for c in self.pareto_front]
	logger.info(f" P = {{{', '.join(front_notations)}}}")
	self._display_pareto_front()
	logger.info("═" * 80 + "\n")

	return added_to_front

	# Global instance
	_pareto_logger = ParetoLogger()

	def get_pareto_logger() -> ParetoLogger:
	"""Get global Pareto logger instance"""
	return _pareto_logger

	def reset_pareto_logger() -> ParetoLogger:
	"""Reset global Pareto logger instance (for new runs)"""
	global _pareto_logger
	_pareto_logger = ParetoLogger()
	# Debug logging removed - not needed in production
	return _pareto_logger