# Case Zero - Hugging Face Space (Docker SDK, CPU). # Two stages: (1) Node builds the Preact pixel-art SPA into web/dist; (2) Python compiles # llama.cpp from source on a stable glibc base (bookworm / gcc 12 - the Gradio-SDK builder's # trixie/gcc 14 could not build it) and bakes everything. The app is served 100% by one # gradio.Server: the built SPA as static files + the /api routes. No GPU, no remote endpoint. # ---- stage 1: build the frontend bundle ---- FROM node:22-slim AS web WORKDIR /web COPY web/package.json web/package-lock.json ./ RUN npm ci COPY web/ ./ RUN npm run build # ---- stage 2: python runtime ---- FROM python:3.12-slim RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential cmake git libsndfile1 \ && rm -rf /var/lib/apt/lists/* RUN useradd -m -u 1000 user USER user WORKDIR /home/user/app ENV HOME=/home/user ENV PATH=/home/user/.local/bin:$PATH ENV GRADIO_ANALYTICS_ENABLED=False ENV HF_HUB_OFFLINE=0 ENV CASE0_PORT=7860 # Portable build (no -march=native) so the compiled wheel runs on any HF CPU. ENV CMAKE_ARGS=-DGGML_NATIVE=OFF ENV CMAKE_BUILD_PARALLEL_LEVEL=4 COPY --chown=user requirements.txt . RUN pip install --no-cache-dir --user -r requirements.txt COPY --chown=user . . # Bring in the production SPA bundle built in stage 1 (web/dist is .dockerignored). COPY --from=web --chown=user /web/dist ./web/dist # Bake the open weights (LLM GGUF + Supertonic voices) so cold starts are fast and the # running container needs no network. Falls back to a runtime fetch if this is skipped. RUN python scripts/fetch_models.py || echo "weight prefetch skipped (will fetch at runtime)" EXPOSE 7860 CMD ["python", "app.py"]