FROM vllm/vllm-openai:latest

# ─── System deps ──────────────────────────────────────────────────────────────
RUN apt-get update && apt-get install -y --no-install-recommends git && \
    rm -rf /var/lib/apt/lists/*

# ─── Ensure `python` exists (base image only has `python3`) ──────────────────
RUN ln -sf /usr/bin/python3 /usr/bin/python

# ─── Remove deep_ep (MoE expert parallelism, requires NVSHMEM not available) ─
RUN pip uninstall -y deep_ep || true

# ─── Install olmes from GitHub (not on PyPI) ─────────────────────────────────
RUN git clone --depth 1 https://github.com/allenai/olmes.git /tmp/olmes && \
    pip install --no-cache-dir "/tmp/olmes[gpu]" && \
    rm -rf /tmp/olmes

# ─── Python deps for entrypoint ──────────────────────────────────────────────
RUN pip install --no-cache-dir pyyaml huggingface-hub requests

# ─── Apply compatibility patches (must run AFTER all pip installs) ───────────
COPY patches.sh /app/patches.sh
RUN chmod +x /app/patches.sh && /app/patches.sh

# ─── Copy entrypoint + eval config ───────────────────────────────────────────
COPY entrypoint.py /app/entrypoint.py
COPY eval.yaml /app/eval.yaml
WORKDIR /app

# Reset base image's ENTRYPOINT (vllm server requires GPU, crashes on CPU Spaces).
# Space mode: entrypoint detects no WEBHOOK_PAYLOAD → runs webhook receiver on 7860.
# Job mode: WEBHOOK_PAYLOAD is set → runs eval pipeline and exits.
EXPOSE 7860
ENTRYPOINT []
CMD ["python3", "/app/entrypoint.py"]