eval-runner / Dockerfile
ouhenio's picture
Webhook receiver mode: Space receives POSTs, spawns GPU Jobs
9a21135
FROM vllm/vllm-openai:latest
# ─── System deps ──────────────────────────────────────────────────────────────
RUN apt-get update && apt-get install -y --no-install-recommends git && \
rm -rf /var/lib/apt/lists/*
# ─── Ensure `python` exists (base image only has `python3`) ──────────────────
RUN ln -sf /usr/bin/python3 /usr/bin/python
# ─── Remove deep_ep (MoE expert parallelism, requires NVSHMEM not available) ─
RUN pip uninstall -y deep_ep || true
# ─── Install olmes from GitHub (not on PyPI) ─────────────────────────────────
RUN git clone --depth 1 https://github.com/allenai/olmes.git /tmp/olmes && \
pip install --no-cache-dir "/tmp/olmes[gpu]" && \
rm -rf /tmp/olmes
# ─── Python deps for entrypoint ──────────────────────────────────────────────
RUN pip install --no-cache-dir pyyaml huggingface-hub requests
# ─── Apply compatibility patches (must run AFTER all pip installs) ───────────
COPY patches.sh /app/patches.sh
RUN chmod +x /app/patches.sh && /app/patches.sh
# ─── Copy entrypoint + eval config ───────────────────────────────────────────
COPY entrypoint.py /app/entrypoint.py
COPY eval.yaml /app/eval.yaml
WORKDIR /app
# Reset base image's ENTRYPOINT (vllm server requires GPU, crashes on CPU Spaces).
# Space mode: entrypoint detects no WEBHOOK_PAYLOAD β†’ runs webhook receiver on 7860.
# Job mode: WEBHOOK_PAYLOAD is set β†’ runs eval pipeline and exits.
EXPOSE 7860
ENTRYPOINT []
CMD ["python3", "/app/entrypoint.py"]