# Case Zero - Hugging Face Space (Docker SDK, CPU).
# Two stages: (1) Node builds the Preact pixel-art SPA into web/dist; (2) Python compiles
# llama.cpp from source on a stable glibc base (bookworm / gcc 12 - the Gradio-SDK builder's
# trixie/gcc 14 could not build it) and bakes everything. The app is served 100% by one
# gradio.Server: the built SPA as static files + the /api routes. No GPU, no remote endpoint.

# ---- stage 1: build the frontend bundle ----
FROM node:22-slim AS web
WORKDIR /web
COPY web/package.json web/package-lock.json ./
RUN npm ci
COPY web/ ./
RUN npm run build

# ---- stage 2: python runtime ----
FROM python:3.12-slim

RUN apt-get update && apt-get install -y --no-install-recommends \
        build-essential cmake git libsndfile1 \
    && rm -rf /var/lib/apt/lists/*

RUN useradd -m -u 1000 user
USER user
WORKDIR /home/user/app

ENV HOME=/home/user
ENV PATH=/home/user/.local/bin:$PATH
ENV GRADIO_ANALYTICS_ENABLED=False
ENV HF_HUB_OFFLINE=0
ENV CASE0_PORT=7860
# Portable build (no -march=native) so the compiled wheel runs on any HF CPU.
ENV CMAKE_ARGS=-DGGML_NATIVE=OFF
ENV CMAKE_BUILD_PARALLEL_LEVEL=4

COPY --chown=user requirements.txt .
RUN pip install --no-cache-dir --user -r requirements.txt

COPY --chown=user . .
# Bring in the production SPA bundle built in stage 1 (web/dist is .dockerignored).
COPY --from=web --chown=user /web/dist ./web/dist

# Bake the open weights (LLM GGUF + Supertonic voices) so cold starts are fast and the
# running container needs no network. Falls back to a runtime fetch if this is skipped.
RUN python scripts/fetch_models.py || echo "weight prefetch skipped (will fetch at runtime)"

EXPOSE 7860
CMD ["python", "app.py"]