FROM vllm/vllm-openai:latest # ─── System deps ────────────────────────────────────────────────────────────── RUN apt-get update && apt-get install -y --no-install-recommends git && \ rm -rf /var/lib/apt/lists/* # ─── Ensure `python` exists (base image only has `python3`) ────────────────── RUN ln -sf /usr/bin/python3 /usr/bin/python # ─── Remove deep_ep (MoE expert parallelism, requires NVSHMEM not available) ─ RUN pip uninstall -y deep_ep || true # ─── Install olmes from GitHub (not on PyPI) ───────────────────────────────── RUN git clone --depth 1 https://github.com/allenai/olmes.git /tmp/olmes && \ pip install --no-cache-dir "/tmp/olmes[gpu]" && \ rm -rf /tmp/olmes # ─── Python deps for entrypoint ────────────────────────────────────────────── RUN pip install --no-cache-dir pyyaml huggingface-hub requests # ─── Apply compatibility patches (must run AFTER all pip installs) ─────────── COPY patches.sh /app/patches.sh RUN chmod +x /app/patches.sh && /app/patches.sh # ─── Copy entrypoint + eval config ─────────────────────────────────────────── COPY entrypoint.py /app/entrypoint.py COPY eval.yaml /app/eval.yaml WORKDIR /app # Reset base image's ENTRYPOINT (vllm server requires GPU, crashes on CPU Spaces). # Space mode: entrypoint detects no WEBHOOK_PAYLOAD → runs webhook receiver on 7860. # Job mode: WEBHOOK_PAYLOAD is set → runs eval pipeline and exits. EXPOSE 7860 ENTRYPOINT [] CMD ["python3", "/app/entrypoint.py"]