| """ |
| AdaEvolve context builder for SkyDiscover. |
| |
| Extends DefaultContextBuilder with AdaEvolve-specific prompt sections: |
| - Evaluator feedback from parent artifacts |
| - Paradigm breakthrough guidance |
| - Sibling context (previous mutations of the same parent) |
| - Error retry context |
| |
| These are assembled into a ``search_guidance`` string and injected into |
| AdaEvolve-specific templates via the ``{search_guidance}`` placeholder. |
| """ |
|
|
| import logging |
| import re |
| from pathlib import Path |
| from typing import Any, Dict, List, Optional, Union |
|
|
| from skydiscover.config import Config |
| from skydiscover.context_builder.default import DefaultContextBuilder |
| from skydiscover.context_builder.utils import TemplateManager, prog_attr |
| from skydiscover.search.base_database import Program |
| from skydiscover.utils.metrics import compute_proxy_score, get_score |
|
|
| logger = logging.getLogger(__name__) |
|
|
|
|
| class AdaEvolveContextBuilder(DefaultContextBuilder): |
| """ |
| Context builder for AdaEvolve's adaptive evolutionary search. |
| |
| Adds a ``{search_guidance}`` section to the prompt containing: |
| - Evaluator diagnostic feedback (from parent's artifacts) |
| - Paradigm breakthrough guidance (when search is globally stagnating) |
| - Sibling context (previous mutations of the same parent) |
| - Error retry context (when retrying after a failed generation) |
| |
| The controller passes raw data via the ``context`` dict: |
| - ``context["paradigm"]``: paradigm dict or None |
| - ``context["siblings"]``: list of Program objects |
| - ``context["error_context"]``: error string or None |
| |
| Evaluator feedback is extracted from the parent program's artifacts. |
| """ |
|
|
| def __init__(self, config: Config): |
| super().__init__(config) |
| default_templates = str(Path(__file__).parent.parent / "default" / "templates") |
| adaevolve_templates = str(Path(__file__).parent / "templates") |
| self.template_manager = TemplateManager( |
| default_templates, adaevolve_templates, self.context_config.template_dir |
| ) |
|
|
| def _db_config(self) -> Any: |
| return getattr(self.config.search, "database", None) |
|
|
| def _is_multiobjective_enabled(self) -> bool: |
| return bool(getattr(self._db_config(), "pareto_objectives", None) or []) |
|
|
| def _objective_descriptions(self) -> List[str]: |
| db_config = self._db_config() |
| higher_is_better = getattr(db_config, "higher_is_better", None) or {} |
| descriptions = [] |
| for objective in getattr(db_config, "pareto_objectives", None) or []: |
| direction = "maximize" if higher_is_better.get(objective, True) else "minimize" |
| descriptions.append(f"{objective} ({direction})") |
| return descriptions |
|
|
| def _metric_to_maximization_value(self, metric_name: str, value: Any) -> Optional[float]: |
| from skydiscover.utils.metrics import normalize_metric_value |
|
|
| higher_is_better = getattr(self._db_config(), "higher_is_better", None) or {} |
| return normalize_metric_value(metric_name, value, higher_is_better) |
|
|
| _PROGRESS_SCORE_MISSING = float("-inf") |
|
|
| def _get_progress_score(self, metrics: Dict[str, Any]) -> float: |
| """Scalar proxy used only for prompt-side progress descriptions. |
| |
| Returns ``_PROGRESS_SCORE_MISSING`` (``-inf``) for empty/missing metrics |
| so that callers can distinguish "no data" from "score is zero". |
| """ |
| db_config = self._db_config() |
| pareto_objectives = getattr(db_config, "pareto_objectives", None) or None |
| return compute_proxy_score( |
| metrics, |
| fitness_key=getattr(db_config, "fitness_key", None), |
| pareto_objectives=pareto_objectives, |
| higher_is_better=getattr(db_config, "higher_is_better", None) or {}, |
| ) |
|
|
| def _task_objective_text(self) -> str: |
| subject = ( |
| "prompt" if (self.config.language or "").lower() in ("text", "prompt") else "program" |
| ) |
| if not self._is_multiobjective_enabled(): |
| return f"Suggest improvements to the {subject} that will improve its COMBINED_SCORE." |
| return ( |
| f"Suggest improvements to the {subject} that improve its Pareto trade-offs across: " |
| + ", ".join(self._objective_descriptions()) |
| + "." |
| ) |
|
|
| def _diversity_dimensions_text(self) -> str: |
| if not self._is_multiobjective_enabled(): |
| return "The system maintains diversity across these dimensions: score, complexity." |
| return "The system maintains diversity across Pareto trade-offs, novelty, and solution structure." |
|
|
| def _diversity_note_text(self) -> str: |
| if not self._is_multiobjective_enabled(): |
| return "Different solutions with similar combined_score but different features are valuable." |
| return "Different solutions with similar overall trade-offs but different objective balances are valuable." |
|
|
| def build_prompt( |
| self, |
| current_program: Union[Program, Dict[str, Program]], |
| context: Dict[str, Any] = None, |
| **kwargs: Any, |
| ) -> Dict[str, str]: |
| """ |
| Build prompt with AdaEvolve-specific search guidance. |
| |
| Computes the ``search_guidance`` string from AdaEvolve context keys, |
| then delegates to the parent's ``build_prompt`` which fills the |
| ``{search_guidance}`` placeholder in AdaEvolve templates. |
| """ |
| context = context or {} |
|
|
| |
| search_guidance = self._build_search_guidance(current_program, context) |
|
|
| |
| kwargs.pop("search_guidance", None) |
|
|
| |
| result = super().build_prompt( |
| current_program, |
| context, |
| search_guidance=search_guidance, |
| task_objective=self._task_objective_text(), |
| diversity_dimensions=self._diversity_dimensions_text(), |
| diversity_note=self._diversity_note_text(), |
| **kwargs, |
| ) |
|
|
| return result |
|
|
| |
| |
| |
|
|
| def _format_current_program( |
| self, |
| current_program: Union[Program, Dict[str, Program]], |
| language: str, |
| ) -> str: |
| """Override to suppress artifacts["feedback"] from {current_program}. |
| |
| AdaEvolve renders evaluator feedback explicitly via _build_search_guidance |
| into {search_guidance}, so we strip it here to avoid duplication. |
| """ |
| |
| if isinstance(current_program, dict): |
| program = list(current_program.values())[0] |
| else: |
| program = current_program |
|
|
| artifacts = getattr(program, "artifacts", None) |
| saved_feedback = None |
| if isinstance(artifacts, dict) and "feedback" in artifacts: |
| saved_feedback = artifacts.pop("feedback") |
|
|
| try: |
| return super()._format_current_program(current_program, language) |
| finally: |
| if saved_feedback is not None and isinstance(artifacts, dict): |
| artifacts["feedback"] = saved_feedback |
|
|
| |
| |
| |
|
|
| def _build_search_guidance( |
| self, |
| current_program: Union[Program, Dict[str, Program]], |
| context: Dict[str, Any], |
| ) -> str: |
| """ |
| Assemble all AdaEvolve-specific guidance sections into one string. |
| |
| Sections are included in priority order: |
| 1. Evaluator feedback (highest value — shows why parent fails) |
| 2. Paradigm breakthrough guidance (when globally stagnating) |
| 3. Sibling context (previous mutations of this parent) |
| 4. Error retry context (when retrying after failure) |
| """ |
| |
| if isinstance(current_program, dict): |
| parent_program = list(current_program.values())[0] |
| else: |
| parent_program = current_program |
|
|
| language = self.config.language or "python" |
| paradigm = context.get("paradigm") |
| siblings = context.get("siblings", []) |
| error_context = context.get("error_context") |
|
|
| sections: List[str] = [] |
|
|
| |
| feedback_section = self._format_evaluator_feedback(parent_program) |
| if feedback_section: |
| sections.append(feedback_section) |
|
|
| |
| if paradigm: |
| sections.append(self._format_paradigm_guidance(paradigm, language)) |
|
|
| |
| if siblings: |
| sibling_section = self._format_sibling_context(siblings, parent_program) |
| if sibling_section: |
| sections.append(sibling_section) |
|
|
| |
| if error_context: |
| sections.append(self._format_error_context(error_context)) |
|
|
| if not sections: |
| return "" |
|
|
| return "\n\n".join(sections) |
|
|
| def _identify_improvement_areas( |
| self, |
| current_program: str, |
| metrics: Dict[str, float], |
| previous_programs: List[Program], |
| ) -> str: |
| """Generate improvement bullets for scalar or Pareto mode.""" |
| if not self._is_multiobjective_enabled(): |
| return super()._identify_improvement_areas(current_program, metrics, previous_programs) |
|
|
| improvement_areas = [ |
| "Focus on Pareto trade-offs across: " + ", ".join(self._objective_descriptions()) |
| ] |
|
|
| current_score = self._get_progress_score(metrics) |
| if previous_programs: |
| prev_metrics = prog_attr(previous_programs[-1], "metrics", {}) or {} |
| prev_score = self._get_progress_score(prev_metrics) |
| |
| missing = self._PROGRESS_SCORE_MISSING |
| if current_score != missing and prev_score != missing: |
| if current_score > prev_score + 1e-6: |
| improvement_areas.append( |
| f"Pareto proxy improved: {prev_score:.4f} -> {current_score:.4f}" |
| ) |
| elif current_score < prev_score - 1e-6: |
| improvement_areas.append( |
| f"Pareto proxy declined: {prev_score:.4f} -> {current_score:.4f}. Revisit recent trade-offs." |
| ) |
| else: |
| improvement_areas.append(f"Pareto proxy unchanged at {current_score:.4f}") |
| elif current_score != missing: |
| improvement_areas.append(f"Pareto proxy at {current_score:.4f} (first measurement)") |
|
|
| threshold = self.context_config.suggest_simplification_after_chars |
| if threshold and len(current_program) > threshold: |
| improvement_areas.append( |
| f"Consider simplifying - solution length exceeds {threshold} characters" |
| ) |
|
|
| return "\n".join(f"- {area}" for area in improvement_areas) |
|
|
| |
| |
| |
|
|
| @staticmethod |
| def _format_evaluator_feedback(parent_program: Program) -> Optional[str]: |
| """ |
| Format evaluator feedback from parent's artifacts. |
| |
| The evaluator may return diagnostic feedback (e.g. analysis of failed |
| examples) in artifacts["feedback"]. This is injected into the prompt |
| so the LLM can make targeted improvements instead of guessing. |
| """ |
| artifacts = getattr(parent_program, "artifacts", None) |
| if not artifacts: |
| return None |
|
|
| feedback = artifacts.get("feedback") |
| if not feedback or not isinstance(feedback, str): |
| return None |
|
|
| |
| max_len = 2000 |
| if len(feedback) > max_len: |
| feedback = feedback[:max_len] + "\n... (truncated)" |
|
|
| return ( |
| "## EVALUATOR FEEDBACK ON CURRENT PROGRAM\n" |
| "The evaluator analyzed cases where the current program failed " |
| "and produced the following diagnostic feedback. " |
| "Use this to make targeted improvements:\n\n" |
| f"{feedback}" |
| ) |
|
|
| @staticmethod |
| def _format_paradigm_guidance(paradigm: Dict[str, Any], language: str) -> str: |
| """ |
| Format paradigm breakthrough guidance for the LLM. |
| |
| Uses different framing for prompt optimization vs code optimization. |
| """ |
| is_prompt_opt = (language or "").lower() in ("text", "prompt") |
|
|
| idea = paradigm.get("idea", "N/A") |
| description = paradigm.get("description", "N/A") |
| target = paradigm.get("what_to_optimize", "score") |
| cautions = paradigm.get("cautions", "N/A") |
| approach_type = paradigm.get("approach_type", "N/A") |
|
|
| if is_prompt_opt: |
| header = "## BREAKTHROUGH STRATEGY - APPLY THIS" |
| intro = "The search has stagnated globally. You MUST apply this breakthrough prompt strategy:" |
| fields = ( |
| f"**STRATEGY:** {idea}\n\n" |
| f"**HOW TO APPLY:**\n{description}\n\n" |
| f"**TARGET:** {target}\n\n" |
| f"**CAUTIONS:** {cautions}\n\n" |
| f"**APPROACH TYPE:** {approach_type}" |
| ) |
| critical_bullets = ( |
| "- You MUST rewrite the prompt using this strategy\n" |
| "- The strategy must be reflected in the actual prompt structure and content\n" |
| "- Keep the prompt clear and well-structured\n" |
| "- Do not add unnecessary verbosity — every sentence should serve a purpose\n" |
| "- Ensure the prompt still addresses the core task" |
| ) |
| else: |
| header = "## BREAKTHROUGH IDEA - IMPLEMENT THIS" |
| intro = "The search has stagnated globally. You MUST implement this breakthrough idea:" |
| fields = ( |
| f"**IDEA:** {idea}\n\n" |
| f"**HOW TO IMPLEMENT:**\n{description}\n\n" |
| f"**TARGET METRIC:** {target}\n\n" |
| f"**CAUTIONS:** {cautions}\n\n" |
| f"**APPROACH TYPE:** {approach_type}" |
| ) |
| critical_bullets = ( |
| "- You MUST implement the breakthrough idea\n" |
| "- Ensure the paradigm is actually used in your implementation (not just mentioned in comments)\n" |
| "- Correctness is essential - your implementation must be correct and functional\n" |
| "- Verify output format matches evaluator requirements\n" |
| "- Make purposeful changes that implement the idea\n" |
| "- Test your implementation logic carefully" |
| ) |
|
|
| return f"{header}\n\n{intro}\n\n{fields}\n\n**CRITICAL:**\n{critical_bullets}" |
|
|
| def _format_sibling_context( |
| self, siblings: List[Program], parent_program: Program |
| ) -> Optional[str]: |
| """ |
| Format sibling context showing previous mutations of the parent. |
| |
| Shows what mutations have been tried before, whether they improved |
| or regressed, so the LLM can avoid repeating failed approaches. |
| """ |
| if not siblings: |
| return None |
|
|
| parent_fitness = self._get_progress_score(getattr(parent_program, "metrics", {})) |
| missing = self._PROGRESS_SCORE_MISSING |
|
|
| improved, regressed, unchanged = 0, 0, 0 |
| entries: List[str] = [] |
|
|
| for i, child in enumerate(siblings, 1): |
| child_fitness = self._get_progress_score(getattr(child, "metrics", {})) |
|
|
| if parent_fitness == missing or child_fitness == missing: |
| entries.append(f" {i}. (metrics unavailable) [UNKNOWN]") |
| unchanged += 1 |
| continue |
|
|
| delta = child_fitness - parent_fitness |
|
|
| if delta > 0.001: |
| status = "IMPROVED" |
| improved += 1 |
| elif delta < -0.001: |
| status = "REGRESSED" |
| regressed += 1 |
| else: |
| status = "NO CHANGE" |
| unchanged += 1 |
|
|
| entries.append( |
| f" {i}. {parent_fitness:.4f} -> {child_fitness:.4f} " f"({delta:+.4f}) [{status}]" |
| ) |
|
|
| lines = [ |
| "## PREVIOUS ATTEMPTS ON THIS PARENT", |
| f"Summary: {improved} improved, {unchanged} unchanged, {regressed} regressed", |
| *entries, |
| "Avoid repeating approaches that didn't work.", |
| ] |
| return "\n".join(lines) |
|
|
| def _format_previous_attempts( |
| self, previous_programs: List[Program], num_previous_attempts: int = 3 |
| ) -> str: |
| """Format recent attempts using AdaEvolve's scalar proxy in Pareto mode.""" |
| if not self._is_multiobjective_enabled(): |
| return super()._format_previous_attempts(previous_programs, num_previous_attempts) |
|
|
| if not previous_programs: |
| return "No previous attempts yet." |
|
|
| try: |
| previous_attempt_template = self.template_manager.get_template("previous_attempt") |
| except (ValueError, KeyError): |
| previous_attempt_template = "### Attempt {attempt_number}\n- Changes: {changes}\n- Metrics: {performance}\n- Outcome: {outcome}" |
|
|
| previous_programs = sorted( |
| previous_programs, |
| key=lambda program: self._get_progress_score(prog_attr(program, "metrics", {}) or {}), |
| reverse=True, |
| ) |
| selected = previous_programs[: min(num_previous_attempts, len(previous_programs))] |
|
|
| lines = [] |
| for i, program in enumerate(reversed(selected)): |
| attempt_number = len(selected) - i |
| metadata = prog_attr(program, "metadata", {}) or {} |
| metrics = prog_attr(program, "metrics", {}) or {} |
|
|
| changes = metadata.get("changes", "Unknown changes") |
| performance_parts = [] |
| for name, value in metrics.items(): |
| if not isinstance(value, bool) and isinstance(value, (int, float)): |
| try: |
| performance_parts.append(f"{name}: {value:.4f}") |
| except (ValueError, TypeError): |
| performance_parts.append(f"{name}: {value}") |
| else: |
| performance_parts.append(f"{name}: {value}") |
| performance_str = ", ".join(performance_parts) if performance_parts else "No metrics" |
|
|
| parent_metrics = metadata.get("parent_metrics", {}) |
| outcome = self._determine_outcome(metrics, parent_metrics) |
|
|
| lines.append( |
| previous_attempt_template.format( |
| attempt_number=attempt_number, |
| changes=changes, |
| performance=performance_str, |
| outcome=outcome, |
| ) |
| + "\n\n" |
| ) |
|
|
| return "".join(lines) |
|
|
| def _determine_outcome( |
| self, program_metrics: Dict[str, Any], parent_metrics: Dict[str, Any] |
| ) -> str: |
| """Describe attempt outcome using the configured scalar proxy in Pareto mode.""" |
| if not self._is_multiobjective_enabled(): |
| return super()._determine_outcome(program_metrics, parent_metrics) |
|
|
| prog_value = self._get_progress_score(program_metrics) |
| parent_value = self._get_progress_score(parent_metrics) |
| missing = self._PROGRESS_SCORE_MISSING |
| if prog_value == missing or parent_value == missing: |
| return "Insufficient metrics for comparison" |
| if prog_value > parent_value + 1e-6: |
| return "Improvement in Pareto proxy" |
| if prog_value < parent_value - 1e-6: |
| return "Regression in Pareto proxy" |
| return "No change in Pareto proxy" |
|
|
| @staticmethod |
| def _format_error_context(error_context: str) -> str: |
| """Format retry error context.""" |
| return ( |
| "## RETRY CONTEXT\n" |
| f"Previous attempt failed with error:\n```\n{error_context}\n```\n" |
| "Please fix this issue in your response." |
| ) |
|
|