case0 / Dockerfile
HusseinEid's picture
Case Zero - initial public release (fully local: Qwen2.5-1.5B via llama.cpp + Supertonic, custom pixel-noir SPA via gradio.Server)
414dc55
raw
history blame
1.7 kB
# Case Zero - Hugging Face Space (Docker SDK, CPU).
# Two stages: (1) Node builds the Preact pixel-art SPA into web/dist; (2) Python compiles
# llama.cpp from source on a stable glibc base (bookworm / gcc 12 - the Gradio-SDK builder's
# trixie/gcc 14 could not build it) and bakes everything. The app is served 100% by one
# gradio.Server: the built SPA as static files + the /api routes. No GPU, no remote endpoint.
# ---- stage 1: build the frontend bundle ----
FROM node:22-slim AS web
WORKDIR /web
COPY web/package.json web/package-lock.json ./
RUN npm ci
COPY web/ ./
RUN npm run build
# ---- stage 2: python runtime ----
FROM python:3.12-slim
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential cmake git libsndfile1 \
&& rm -rf /var/lib/apt/lists/*
RUN useradd -m -u 1000 user
USER user
WORKDIR /home/user/app
ENV HOME=/home/user
ENV PATH=/home/user/.local/bin:$PATH
ENV GRADIO_ANALYTICS_ENABLED=False
ENV HF_HUB_OFFLINE=0
ENV CASE0_PORT=7860
# Portable build (no -march=native) so the compiled wheel runs on any HF CPU.
ENV CMAKE_ARGS=-DGGML_NATIVE=OFF
ENV CMAKE_BUILD_PARALLEL_LEVEL=4
COPY --chown=user requirements.txt .
RUN pip install --no-cache-dir --user -r requirements.txt
COPY --chown=user . .
# Bring in the production SPA bundle built in stage 1 (web/dist is .dockerignored).
COPY --from=web --chown=user /web/dist ./web/dist
# Bake the open weights (LLM GGUF + Supertonic voices) so cold starts are fast and the
# running container needs no network. Falls back to a runtime fetch if this is skipped.
RUN python scripts/fetch_models.py || echo "weight prefetch skipped (will fetch at runtime)"
EXPOSE 7860
CMD ["python", "app.py"]