ARG BASE_IMAGE=vllm/vllm-openai:gemma4-cu130 FROM ${BASE_IMAGE} ARG VLLM_REPO=https://github.com/bluecopa/vllm-spectral.git ARG VLLM_BRANCH=spectral-codebook-docker ARG VLLM_COMMIT=008dd7f87fb9de185e536ad30b4d524024ed9b9f ARG HF_REPO_ID=satya007/gemmacut-spectral ARG SIDECAR_SHA256=e47a36c13467cbedf720e7f782b976df3dcda2d989c727113a8315008661a3e4 ARG INCLUDE_SIDECAR=1 LABEL org.opencontainers.image.title="gemmacut-spectral" LABEL org.opencontainers.image.description="GemmaCut SpectralQuant Phase 2 + Eagle3 vLLM runtime; model weights are not baked into the image." LABEL org.opencontainers.image.source="https://github.com/bluecopa/vllm-spectral" LABEL org.opencontainers.image.revision="${VLLM_COMMIT}" ENV VLLM_SOURCE=/opt/vllm-spectral \ GEMMACUT_HOME=/opt/gemmacut \ SPECTRAL_SIDECAR=/opt/gemmacut/artifacts/spectral_sidecar_chat_v2.pt \ HF_HUB_DISABLE_XET=1 \ SPECTRAL_TRITON_COMPRESS=1 \ SPECTRAL_TRITON_DEQUANT=1 \ SPECTRAL_CUDA_GRAPH=1 \ SPECTRAL_VERIFY=0 \ DISABLE_HYBRID_KV_CACHE_MANAGER=0 SHELL ["/bin/bash", "-o", "pipefail", "-c"] RUN apt-get update && \ apt-get install -y --no-install-recommends \ ca-certificates \ cmake \ git \ ninja-build && \ rm -rf /var/lib/apt/lists/* RUN git clone --branch "${VLLM_BRANCH}" "${VLLM_REPO}" "${VLLM_SOURCE}" && \ git -C "${VLLM_SOURCE}" checkout "${VLLM_COMMIT}" && \ git -C "${VLLM_SOURCE}" log --oneline -1 COPY docker/download_sidecar.py /tmp/download_sidecar.py RUN mkdir -p "${GEMMACUT_HOME}/artifacts" && \ if [[ "${INCLUDE_SIDECAR}" == "1" ]]; then \ HF_REPO_ID="${HF_REPO_ID}" \ SIDECAR_SHA256="${SIDECAR_SHA256}" \ python3 /tmp/download_sidecar.py; \ else \ echo "INCLUDE_SIDECAR=0; mount or set SPECTRAL_SIDECAR at runtime"; \ fi && \ rm -f /tmp/download_sidecar.py COPY docker/entrypoint.sh /usr/local/bin/gemmacut-spectral RUN chmod +x /usr/local/bin/gemmacut-spectral EXPOSE 8000 ENTRYPOINT ["gemmacut-spectral"] CMD ["serve"]