FROM python:3.10-slim WORKDIR /benchmark # Install system dependencies RUN apt-get update && apt-get install -y \ git \ curl \ && rm -rf /var/lib/apt/lists/* COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt # wrapper.py provides backwards compatibility for old Python-based evaluators # that define evaluate(program_path) -> dict. Bridges them to the container # JSON protocol. Source of truth: skydiscover/evaluation/wrapper.py COPY . . # Download run_and_check.py from KernelBench repository (pinned to specific commit) ARG KERNELBENCH_COMMIT=423217d RUN curl -o run_and_check.py \ "https://raw.githubusercontent.com/ScalingIntelligence/KernelBench/${KERNELBENCH_COMMIT}/scripts/run_and_check.py" RUN chmod +x evaluate.sh ENTRYPOINT ["./evaluate.sh"]