File size: 3,905 Bytes
cacd4d0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
"""
Fitness-Guided Crossover Operator.

Adapts LLEGO's fitness-guided crossover for text prompts.
Based on: Decision Tree Induction Through LLMs via Semantically-Aware Evolution (ICLR 2025)
"""

from typing import List, Callable, TYPE_CHECKING
import logging

from .base_operator import BaseCrossoverOperator

if TYPE_CHECKING:
    from .models import PromptCandidate

logger = logging.getLogger(__name__)


class FitnessGuidedCrossover(BaseCrossoverOperator):
    """
    Fitness-guided crossover for text prompts.
    
    Combines high-performing parent prompts to generate offspring
    that target specific fitness levels using LLM semantic understanding.
    
    From LLEGO paper:
    "Fitness-guided crossover exploits high-performing regions of the search space
    by combining parent trees targeting a desired fitness level f* = f_max + α(f_max - f_min)"
    
    Reference: https://github.com/nicolashuynh/LLEGO
    """
    
    def __init__(self, alpha: float = 0.1):
        """
        Initialize crossover operator.
        
        Args:
            alpha: Fitness extrapolation parameter.
                   Higher α = target higher fitness than parents.
                   Default 0.1 from LLEGO paper (target 10% above best parent).
        """
        self.alpha = alpha
        logger.debug(f"FitnessGuidedCrossover initialized with α={alpha}")
    
    def __call__(
        self,
        parents: List["PromptCandidate"],
        target_fitness: float,
        llm: Callable[[str], str]
    ) -> str:
        """
        Combine parent prompts targeting specific fitness.
        
        Args:
            parents: List of PromptCandidate objects (2+ parents)
            target_fitness: Desired fitness for offspring
            llm: Language model callable
            
        Returns:
            str: Offspring prompt
            
        Raises:
            ValueError: If fewer than 2 parents provided
        """
        if len(parents) < 2:
            raise ValueError("Crossover requires at least 2 parents")
        
        # Sort parents by fitness (best first)
        sorted_parents = sorted(parents, key=lambda p: p.fitness, reverse=True)
        
        logger.debug(f"Crossover: {len(parents)} parents, target fitness={target_fitness:.3f}")
        
        # Build crossover prompt and call LLM
        crossover_prompt = self._build_prompt(sorted_parents, target_fitness)
        new_prompt = llm(crossover_prompt)
        
        return new_prompt
    
    def _build_prompt(
        self, 
        parents: List["PromptCandidate"],
        target_fitness: float
    ) -> str:
        """
        Build LLM prompt for crossover operation.
        
        Args:
            parents: Sorted list of parent candidates (best first)
            target_fitness: Target fitness for offspring
            
        Returns:
            str: Prompt for LLM
        """
        # Truncate parents to prevent safety filter issues
        MAX_PARENT_LENGTH = 350
        
        # Build parent descriptions (limit to top 2)
        parent_descriptions = []
        for i, parent in enumerate(parents[:2]):
            truncated = parent.prompt[:MAX_PARENT_LENGTH]
            if len(parent.prompt) > MAX_PARENT_LENGTH:
                truncated += "..."
            parent_descriptions.append(
                f"P{i+1} (f={parent.fitness:.2f}): {truncated}\n"
            )
        
        prompt = f"""Combine these prompts into ONE improved version (target fitness: {target_fitness:.2f}).

{' '.join(parent_descriptions)}
Instructions:
1. Merge the best rules/principles from both parents
2. Organize logic clearly (e.g., "For X tasks: do Y", "If Z: then A")
3. Add structure to handle different cases systematically
4. Keep output format (Element: X, Description:, Reason:)
5. Max 600 chars

Output ONLY the combined prompt:"""
        
        return prompt