sky2 / skydiscover /api.py
JustinTX's picture
Add files using upload-large-folder tool
517cbd2 verified
"""
Public library API for SkyDiscover.
This module exposes the two main entry points for programmatic use:
* `run_discovery`: accept file paths or inline strings for the initial program and evaluator,
wires up configuration, and returns a `DiscoveryResult`.
* `discover_solution`: convenience wrapper when the initial solution is a plain string and
the evaluator is a Python callable.
Quick-start::
from skydiscover import run_discovery
result = run_discovery(
evaluator="examples/my_problem/eval.py",
initial_program="examples/my_problem/init.py", # optional
model="gpt-5",
iterations=50,
)
print(result.best_score, result.best_solution)
"""
import asyncio
import logging
import os
import tempfile
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional, Union
from skydiscover.benchmarks.resolution import resolve_benchmark_problem
from skydiscover.config import Config, apply_overrides, load_config
from skydiscover.runner import Runner
from skydiscover.search.base_database import Program
from skydiscover.utils.metrics import get_score
from skydiscover.utils.prepare import cleanup_temp, prepare_evaluator, prepare_program
logger = logging.getLogger(__name__)
@dataclass
class DiscoveryResult:
"""Result of a single discovery run."""
best_program: Optional[Program]
best_score: float
best_solution: str
metrics: Dict[str, Any]
output_dir: Optional[str]
initial_score: Optional[float] = None
def __repr__(self) -> str:
init = f"{self.initial_score:.4f}" if self.initial_score is not None else "N/A"
return f"DiscoveryResult(best_score={self.best_score:.4f}, initial_score={init})"
def run_discovery(
evaluator: Union[str, Path, Callable],
initial_program: Optional[Union[str, Path, List[str]]] = None,
model: Optional[str] = None,
iterations: Optional[int] = None,
search: Optional[str] = None,
config: Union[str, Path, Config, None] = None,
agentic: bool = False,
output_dir: Optional[str] = None,
system_prompt: Optional[str] = None,
api_base: Optional[str] = None,
cleanup: bool = True,
) -> DiscoveryResult:
"""Run a discovery process and return the best result.
Args:
evaluator: File path or callable (program_path) -> metrics_dict.
initial_program: File path or inline source code (string / list of lines).
Optional — when omitted the LLM generates a solution from scratch.
model: Model name(s), comma-separated. e.g. "gpt-5" or "gpt-5,gemini/gemini-3-pro".
iterations: Max iterations (overrides config).
search: Algorithm name ("topk", "adaevolve", "evox", "openevolve_native", etc.).
config: YAML path, Config object, or None for defaults.
agentic: Enable agentic mode (codebase root derived from initial_program).
output_dir: Where to write results (temp dir if None).
system_prompt: Domain-specific context for the LLM.
api_base: Base URL for an OpenAI-compatible API.
cleanup: Remove temp files after the run.
Returns:
DiscoveryResult with best program, score, solution, metrics, and output directory.
"""
return asyncio.run(
_run_discovery_async(
initial_program,
evaluator,
config,
iterations=iterations,
output_dir=output_dir,
cleanup=cleanup,
agentic=agentic,
model=model,
search=search,
system_prompt=system_prompt,
api_base=api_base,
)
)
async def _run_discovery_async(
initial_program: Optional[Union[str, Path, List[str]]],
evaluator: Union[str, Path, Callable],
config: Union[str, Path, Config, None],
*,
model: Optional[str] = None,
iterations: Optional[int] = None,
search: Optional[str] = None,
agentic: bool = False,
output_dir: Optional[str] = None,
system_prompt: Optional[str] = None,
api_base: Optional[str] = None,
cleanup: bool = True,
) -> DiscoveryResult:
"""Async implementation of run_discovery."""
temp_dir: Optional[str] = None
temp_files: List[str] = []
evaluator_env_vars: Dict[str, str] = {}
try:
if isinstance(config, Config):
config_obj = config
else:
config_obj = load_config(str(config) if config else None)
apply_overrides(
config_obj,
model=model,
api_base=api_base,
agentic=agentic,
search=search,
system_prompt=system_prompt,
)
# Resolve benchmark problem if configured and no initial_program provided
if initial_program is None and config_obj.benchmark and config_obj.benchmark.enabled:
try:
resolution = resolve_benchmark_problem(config_obj.benchmark)
initial_program = resolution.initial_program_path
evaluator = resolution.evaluator_path
evaluator_env_vars = resolution.evaluator_env_vars
logger.info(
f"[Benchmark Loader] Benchmark: {config_obj.benchmark.name}, Initial program: {initial_program}, Evaluator: {evaluator}"
)
except Exception as exc:
raise ValueError(f"Failed to load benchmark problem: {exc}") from exc
# Prepare the program (optional — None means "from scratch")
program_path = (
prepare_program(initial_program, temp_dir, temp_files)
if initial_program is not None
else None
)
if program_path and config_obj.agentic.enabled and not config_obj.agentic.codebase_root:
config_obj.agentic.codebase_root = os.path.dirname(os.path.abspath(program_path))
# Prepare the evaluator
evaluator_path = prepare_evaluator(evaluator, temp_dir, temp_files)
# Prepare the output directory
search_type = (
getattr(config_obj.search, "type", None) if hasattr(config_obj, "search") else None
)
if output_dir is None and cleanup:
temp_dir = tempfile.mkdtemp(prefix="skydiscover_")
actual_output_dir = temp_dir
else:
from skydiscover.config import build_output_dir
actual_output_dir = output_dir or build_output_dir(
search_type or "default", program_path or "scratch"
)
os.makedirs(actual_output_dir, exist_ok=True)
# External backends (openevolve, shinkaevolve, gepa)
if search_type:
from skydiscover.extras.external import KNOWN_EXTERNAL, get_runner, is_external
if is_external(search_type):
if evaluator_env_vars:
env_var_names = ", ".join(sorted(evaluator_env_vars))
raise ValueError(
"Passing evaluator environment variables to external backends is not yet supported. "
f"External backend '{search_type}' cannot be used with evaluator env vars: "
f"{env_var_names}"
)
from skydiscover.extras.monitor import start_monitor, stop_monitor
monitor_server, monitor_callback, feedback_reader = start_monitor(
config_obj, actual_output_dir
)
try:
result = await get_runner(search_type)(
program_path=program_path,
evaluator_path=evaluator_path,
config_obj=config_obj,
iterations=iterations or config_obj.max_iterations,
output_dir=actual_output_dir,
monitor_callback=monitor_callback,
feedback_reader=feedback_reader,
)
except ModuleNotFoundError as exc:
from skydiscover.extras.external import get_package_name
pkg = get_package_name(search_type)
raise ImportError(
f"{exc}\n\nThe '{search_type}' backend requires its package. "
f"Install with: pip install {pkg}"
) from exc
finally:
stop_monitor(monitor_server)
result.output_dir = actual_output_dir if not cleanup else None
return result
if search_type in KNOWN_EXTERNAL:
from skydiscover.extras.external import get_package_name
pkg = get_package_name(search_type)
raise ImportError(
f"Search type '{search_type}' requires the '{pkg}' package. "
f"Install with: pip install {pkg}"
)
if not config_obj.llm.models:
raise ValueError(
"No LLM models configured. Provide a config with models or "
"pass model= directly:\n\n"
" result = run_discovery(evaluator, model='gpt-5')"
)
# Initialize the runner
controller = Runner(
initial_program_path=program_path,
evaluation_file=evaluator_path,
config=config_obj,
output_dir=actual_output_dir,
evaluator_env_vars=evaluator_env_vars,
)
best_program = await controller.run(iterations=iterations)
best_score = 0.0
best_solution = ""
metrics: Dict[str, Any] = {}
if best_program:
best_solution = best_program.solution
metrics = best_program.metrics or {}
best_score = get_score(metrics)
initial_score = controller.initial_score
# Return the result
return DiscoveryResult(
best_program=best_program,
best_score=best_score,
best_solution=best_solution,
metrics=metrics,
output_dir=actual_output_dir if not cleanup else None,
initial_score=initial_score,
)
finally:
if cleanup:
cleanup_temp(temp_files, temp_dir)
def discover_solution(
evaluator: Callable[[str], Dict[str, Any]],
initial_solution: Optional[str] = None,
iterations: int = 100,
search: Optional[str] = None,
model: Optional[str] = None,
**kwargs: Any,
) -> DiscoveryResult:
"""Convenience wrapper: evolve a string solution with a callable evaluator.
Same as run_discovery but defaults to string input + callable evaluator.
"""
return run_discovery(
evaluator=evaluator,
initial_program=initial_solution,
iterations=iterations,
search=search,
model=model,
**kwargs,
)