Spaces:

latam-gpt
/

eval-runner

Running

App Files Files Community

eval-runner / Dockerfile

ouhenio

Webhook receiver mode: Space receives POSTs, spawns GPU Jobs

9a21135 2 months ago

raw

history blame contribute delete

1.94 kB

	FROM vllm/vllm-openai:latest

	# ─── System deps ──────────────────────────────────────────────────────────────
	RUN apt-get update && apt-get install -y --no-install-recommends git && \
	rm -rf /var/lib/apt/lists/*

	# ─── Ensure `python` exists (base image only has `python3`) ──────────────────
	RUN ln -sf /usr/bin/python3 /usr/bin/python

	# ─── Remove deep_ep (MoE expert parallelism, requires NVSHMEM not available) ─
	RUN pip uninstall -y deep_ep \|\| true

	# ─── Install olmes from GitHub (not on PyPI) ─────────────────────────────────
	RUN git clone --depth 1 https://github.com/allenai/olmes.git /tmp/olmes && \
	pip install --no-cache-dir "/tmp/olmes[gpu]" && \
	rm -rf /tmp/olmes

	# ─── Python deps for entrypoint ──────────────────────────────────────────────
	RUN pip install --no-cache-dir pyyaml huggingface-hub requests

	# ─── Apply compatibility patches (must run AFTER all pip installs) ───────────
	COPY patches.sh /app/patches.sh
	RUN chmod +x /app/patches.sh && /app/patches.sh

	# ─── Copy entrypoint + eval config ───────────────────────────────────────────
	COPY entrypoint.py /app/entrypoint.py
	COPY eval.yaml /app/eval.yaml
	WORKDIR /app

	# Reset base image's ENTRYPOINT (vllm server requires GPU, crashes on CPU Spaces).
	# Space mode: entrypoint detects no WEBHOOK_PAYLOAD → runs webhook receiver on 7860.
	# Job mode: WEBHOOK_PAYLOAD is set → runs eval pipeline and exits.
	EXPOSE 7860
	ENTRYPOINT []
	CMD ["python3", "/app/entrypoint.py"]