File size: 2,020 Bytes
6b080ac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
ARG BASE_IMAGE=vllm/vllm-openai:gemma4-cu130
FROM ${BASE_IMAGE}

ARG VLLM_REPO=https://github.com/bluecopa/vllm-spectral.git
ARG VLLM_BRANCH=spectral-codebook-docker
ARG VLLM_COMMIT=008dd7f87fb9de185e536ad30b4d524024ed9b9f
ARG HF_REPO_ID=satya007/gemmacut-spectral
ARG SIDECAR_SHA256=e47a36c13467cbedf720e7f782b976df3dcda2d989c727113a8315008661a3e4
ARG INCLUDE_SIDECAR=1

LABEL org.opencontainers.image.title="gemmacut-spectral"
LABEL org.opencontainers.image.description="GemmaCut SpectralQuant Phase 2 + Eagle3 vLLM runtime; model weights are not baked into the image."
LABEL org.opencontainers.image.source="https://github.com/bluecopa/vllm-spectral"
LABEL org.opencontainers.image.revision="${VLLM_COMMIT}"

ENV VLLM_SOURCE=/opt/vllm-spectral \
    GEMMACUT_HOME=/opt/gemmacut \
    SPECTRAL_SIDECAR=/opt/gemmacut/artifacts/spectral_sidecar_chat_v2.pt \
    HF_HUB_DISABLE_XET=1 \
    SPECTRAL_TRITON_COMPRESS=1 \
    SPECTRAL_TRITON_DEQUANT=1 \
    SPECTRAL_CUDA_GRAPH=1 \
    SPECTRAL_VERIFY=0 \
    DISABLE_HYBRID_KV_CACHE_MANAGER=0

SHELL ["/bin/bash", "-o", "pipefail", "-c"]

RUN apt-get update && \
    apt-get install -y --no-install-recommends \
      ca-certificates \
      cmake \
      git \
      ninja-build && \
    rm -rf /var/lib/apt/lists/*

RUN git clone --branch "${VLLM_BRANCH}" "${VLLM_REPO}" "${VLLM_SOURCE}" && \
    git -C "${VLLM_SOURCE}" checkout "${VLLM_COMMIT}" && \
    git -C "${VLLM_SOURCE}" log --oneline -1

COPY docker/download_sidecar.py /tmp/download_sidecar.py
RUN mkdir -p "${GEMMACUT_HOME}/artifacts" && \
    if [[ "${INCLUDE_SIDECAR}" == "1" ]]; then \
      HF_REPO_ID="${HF_REPO_ID}" \
      SIDECAR_SHA256="${SIDECAR_SHA256}" \
      python3 /tmp/download_sidecar.py; \
    else \
      echo "INCLUDE_SIDECAR=0; mount or set SPECTRAL_SIDECAR at runtime"; \
    fi && \
    rm -f /tmp/download_sidecar.py

COPY docker/entrypoint.sh /usr/local/bin/gemmacut-spectral
RUN chmod +x /usr/local/bin/gemmacut-spectral

EXPOSE 8000
ENTRYPOINT ["gemmacut-spectral"]
CMD ["serve"]