sky2 / benchmarks /kernelbench /resolver.py
JustinTX's picture
Add files using upload-large-folder tool
af83196 verified
"""KernelBench problem resolver for SkyDiscover.
This resolver fetches GPU kernel optimization problems from the KernelBench
dataset and generates the necessary files for SkyDiscover to run optimization.
"""
import logging
from pathlib import Path
from typing import Any, Dict, Tuple
from skydiscover.benchmarks.base import BenchmarkResolution, BenchmarkResolver
from skydiscover.utils.prepare import prepare_program
logger = logging.getLogger(__name__)
class KernelBenchResolver(BenchmarkResolver):
"""Resolves KernelBench problems by fetching from dataset and generating files.
The resolver:
1. Fetches the reference implementation from KernelBench dataset
2. Generates initial_program.py with EVOLVE-BLOCK markers
3. Sets environment variables for the evaluator
4. Returns paths to the generated initial program and existing evaluator
Required config parameters:
- level: Problem difficulty level (1, 2, or 3)
- problem_id: Specific problem ID within the level
Optional config parameters:
- dataset_src: 'huggingface' (default) or 'local'
- dataset_name: HuggingFace dataset name (default: 'ScalingIntelligence/KernelBench')
- eval_mode: 'local' (default) or 'modal'
- gpu: GPU type for evaluation (default: 'H100')
- num_correct_trials: Number of correctness validation runs (default: 5)
- num_perf_trials: Number of performance measurement runs (default: 100)
"""
def resolve(self, config: Dict[str, Any], output_dir: Path) -> BenchmarkResolution:
"""Fetch KernelBench problem and generate initial_program + configure evaluator.
Args:
config: Configuration dictionary with 'level', 'problem_id', and optional params
output_dir: Directory where generated files will be placed
Returns:
BenchmarkResolution with initial program, evaluator path, and evaluator env vars
"""
# Validate required parameters
level = config.get("level")
problem_id = config.get("problem_id")
if level is None or problem_id is None:
raise ValueError(
"KernelBench resolver requires 'level' and 'problem_id' in config. "
f"Got: level={level}, problem_id={problem_id}"
)
# Extract optional parameters with defaults
dataset_src = config.get("dataset_src", "huggingface")
dataset_name = config.get("dataset_name", "ScalingIntelligence/KernelBench")
eval_mode = config.get("eval_mode", "local")
gpu = config.get("gpu", "H100")
num_correct_trials = config.get("num_correct_trials", 5)
num_perf_trials = config.get("num_perf_trials", 100)
logger.info(f"Resolving KernelBench problem: level={level}, problem_id={problem_id}")
logger.info(f"Eval mode: {eval_mode}, GPU: {gpu}")
# Import KernelBench dataset utilities
try:
from kernelbench.dataset import construct_kernelbench_dataset
except ImportError as e:
raise ImportError(
"KernelBench package not found. Install with: "
"uv pip install 'kernelbench @ git+https://github.com/ScalingIntelligence/KernelBench.git'"
) from e
# Fetch the problem from KernelBench dataset
try:
dataset = construct_kernelbench_dataset(
level=level,
source=dataset_src,
dataset_name=dataset_name,
)
problem = dataset.get_problem_by_id(problem_id)
except Exception as e:
raise RuntimeError(
f"Failed to fetch KernelBench problem (level={level}, id={problem_id}): {e}"
) from e
logger.info(f"Fetched problem: {problem.name} (ID: {problem.problem_id})")
# Generate initial_program.py with EVOLVE-BLOCK markers using prepare_program
output_dir.mkdir(parents=True, exist_ok=True)
initial_program_path = prepare_program(
initial_program=problem.code, temp_dir=str(output_dir), temp_files=[]
)
logger.info(f"Generated initial program: {initial_program_path}")
use_docker = config.get("use_docker", True)
# Use evaluator.py file for native mode, directory for container mode
if use_docker:
evaluator_path = Path(__file__).parent / "evaluator"
logger.info("Using containerized evaluator (Docker required)")
else:
evaluator_path = Path(__file__).parent / "evaluator" / "evaluator.py"
logger.info("Using native Python evaluator (no Docker required)")
evaluator_env_vars = {
"KERNELBENCH_LEVEL": str(level),
"KERNELBENCH_PROBLEM_ID": str(problem_id),
"KERNELBENCH_EVAL_MODE": eval_mode,
"KERNELBENCH_GPU": gpu,
"KERNELBENCH_NUM_CORRECT_TRIALS": str(num_correct_trials),
"KERNELBENCH_NUM_PERF_TRIALS": str(num_perf_trials),
"KERNELBENCH_TIMEOUT": str(config.get("timeout", 300)),
}
mode_desc = "container" if use_docker else "native evaluator"
logger.info(f"Prepared evaluator environment for {mode_desc}:")
logger.info(f" KERNELBENCH_LEVEL={level}")
logger.info(f" KERNELBENCH_PROBLEM_ID={problem_id}")
logger.info(f" KERNELBENCH_EVAL_MODE={eval_mode}")
logger.info(f" KERNELBENCH_GPU={gpu}")
return BenchmarkResolution(
initial_program_path=str(initial_program_path),
evaluator_path=str(evaluator_path),
evaluator_env_vars=evaluator_env_vars,
)
# Module-level resolver instance
resolver = KernelBenchResolver()