Spaces:
Sleeping
Sleeping
| """ | |
| Diversity-Guided Mutation Operator. | |
| Adapts LLEGO's diversity-guided mutation for text prompts. | |
| Based on: Decision Tree Induction Through LLMs via Semantically-Aware Evolution (ICLR 2025) | |
| """ | |
| from typing import List, Callable, TYPE_CHECKING | |
| import numpy as np | |
| import logging | |
| from .base_operator import BaseMutationOperator | |
| if TYPE_CHECKING: | |
| from .models import PromptCandidate | |
| logger = logging.getLogger(__name__) | |
| class DiversityGuidedMutation(BaseMutationOperator): | |
| """ | |
| Diversity-guided mutation for text prompts. | |
| Explores the search space by generating diverse prompt variations | |
| using temperature-controlled LLM sampling. | |
| From LLEGO paper: | |
| "Diversity-guided mutation enables efficient global exploration by sampling | |
| diverse parents with temperature parameter τ" | |
| Reference: https://github.com/nicolashuynh/LLEGO | |
| """ | |
| def __init__(self, tau: float = 10.0, nu: int = 4): | |
| """ | |
| Initialize mutation operator. | |
| Args: | |
| tau: Diversity temperature (higher = more exploration). | |
| Default 10.0 from LLEGO paper. | |
| nu: Parent arity (number of parents to sample for diversity). | |
| Default 4 from LLEGO paper. | |
| """ | |
| self.tau = tau | |
| self.nu = nu | |
| logger.debug(f"DiversityGuidedMutation initialized with τ={tau}, ν={nu}") | |
| def __call__( | |
| self, | |
| parent: "PromptCandidate", | |
| population: List["PromptCandidate"], | |
| llm: Callable[[str], str] | |
| ) -> str: | |
| """ | |
| Mutate a parent prompt to explore new regions. | |
| Args: | |
| parent: Parent PromptCandidate to mutate | |
| population: Current population for diversity guidance | |
| llm: Language model callable | |
| Returns: | |
| str: Mutated prompt | |
| """ | |
| logger.debug(f"Mutation: parent fitness={parent.fitness:.3f}") | |
| # Sample diverse parents for context | |
| diverse_parents = self._sample_diverse_parents(parent, population) | |
| # Build mutation prompt and call LLM | |
| mutation_prompt = self._build_prompt(parent, diverse_parents) | |
| mutated_prompt = llm(mutation_prompt) | |
| return mutated_prompt | |
| def _sample_diverse_parents( | |
| self, | |
| parent: "PromptCandidate", | |
| population: List["PromptCandidate"] | |
| ) -> List["PromptCandidate"]: | |
| """ | |
| Sample diverse parents using temperature-based selection. | |
| Args: | |
| parent: Current parent | |
| population: Population to sample from | |
| Returns: | |
| List of diverse parent candidates | |
| """ | |
| # Calculate diversity scores | |
| diversity_scores = [] | |
| for candidate in population: | |
| if candidate.prompt != parent.prompt: | |
| diversity = self._calculate_diversity(parent.prompt, candidate.prompt) | |
| diversity_scores.append((candidate, diversity)) | |
| if not diversity_scores: | |
| return [parent] | |
| # Temperature-based sampling | |
| scores = np.array([score for _, score in diversity_scores]) | |
| probs = np.exp(scores / self.tau) | |
| probs /= probs.sum() | |
| # Sample nu diverse parents | |
| n_samples = min(self.nu, len(diversity_scores)) | |
| indices = np.random.choice( | |
| len(diversity_scores), | |
| size=n_samples, | |
| replace=False, | |
| p=probs | |
| ) | |
| return [diversity_scores[i][0] for i in indices] | |
| def _calculate_diversity(self, prompt1: str, prompt2: str) -> float: | |
| """ | |
| Calculate semantic diversity between two prompts. | |
| Uses Jaccard distance on words as a simple diversity metric. | |
| Args: | |
| prompt1: First prompt | |
| prompt2: Second prompt | |
| Returns: | |
| float: Diversity score (0-1, higher = more diverse) | |
| """ | |
| words1 = set(prompt1.lower().split()) | |
| words2 = set(prompt2.lower().split()) | |
| intersection = len(words1 & words2) | |
| union = len(words1 | words2) | |
| jaccard_similarity = intersection / union if union > 0 else 0 | |
| return 1 - jaccard_similarity # Higher = more diverse | |
| def _build_prompt( | |
| self, | |
| parent: "PromptCandidate", | |
| diverse_parents: List["PromptCandidate"] | |
| ) -> str: | |
| """ | |
| Build LLM prompt for mutation operation. | |
| Args: | |
| parent: Parent candidate to mutate | |
| diverse_parents: Diverse parents for context | |
| Returns: | |
| str: Prompt for LLM | |
| """ | |
| MAX_PARENT_LENGTH = 350 | |
| MAX_DIVERSE_LENGTH = 200 | |
| parent_truncated = parent.prompt[:MAX_PARENT_LENGTH] | |
| if len(parent.prompt) > MAX_PARENT_LENGTH: | |
| parent_truncated += "..." | |
| # Build diversity context | |
| diversity_context = [] | |
| for i, diverse_parent in enumerate(diverse_parents[:2]): | |
| truncated = diverse_parent.prompt[:MAX_DIVERSE_LENGTH] | |
| if len(diverse_parent.prompt) > MAX_DIVERSE_LENGTH: | |
| truncated += "..." | |
| diversity_context.append(f"V{i+1}: {truncated}") | |
| prompt = f"""Create a variation of this prompt with different decision logic (fitness: {parent.fitness:.2f}). | |
| Parent: {parent_truncated} | |
| {chr(10).join(diversity_context) if diversity_context else ""} | |
| Instructions: | |
| 1. Explore NEW ways to categorize tasks (e.g., by element type, by action, by hierarchy) | |
| 2. Add handling for edge cases the parent might miss | |
| 3. Keep the structured, logical approach | |
| 4. Keep format (Element: X, Description:, Reason:) | |
| 5. Max 600 chars | |
| Output ONLY the new prompt:""" | |
| return prompt | |