Spaces:
Build error
Build error
| # --- STAGE 1: Build Environment --- | |
| FROM python:3.11-slim-bookworm AS builder | |
| # Set environment variables for high-performance CPU build | |
| ENV DEBIAN_FRONTEND=noninteractive \ | |
| PYTHONUNBUFFERED=1 \ | |
| CMAKE_ARGS="-DGGML_NATIVE=OFF -DGGML_AVX2=ON -DGGML_FLASH_ATTN=ON" \ | |
| FORCE_CMAKE=1 | |
| # Install build essentials | |
| RUN apt-get update && apt-get install -y --no-install-recommends \ | |
| build-essential \ | |
| cmake \ | |
| git \ | |
| curl \ | |
| && apt-get clean && rm -rf /var/lib/apt/lists/* | |
| # Install 'uv' for 2026-standard high-speed dependency resolution | |
| COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ | |
| WORKDIR /app | |
| # Install llama-cpp-python with server support (compiled for CPU) | |
| RUN uv pip install --system llama-cpp-python[server] | |
| # --- STAGE 2: Runtime Environment --- | |
| FROM python:3.11-slim-bookworm | |
| # Hugging Face Spaces requires UID 1000 | |
| RUN useradd -m -u 1000 user | |
| USER user | |
| ENV HOME=/home/user \ | |
| PATH=/home/user/.local/bin:$PATH \ | |
| PYTHONUNBUFFERED=1 | |
| WORKDIR $HOME/app | |
| # Copy the compiled libraries from the builder stage | |
| COPY --from=builder /usr/local/lib/python3.11/site-packages /usr/local/lib/python3.11/site-packages | |
| COPY --from=builder /usr/local/bin /usr/local/bin | |
| # Download the specific Q6_K_XL model provided | |
| # Q6_K_XL is ~700MB; fits easily in the 16GB RAM alongside the 32k KV cache. | |
| RUN apt-get update && apt-get install -y wget && \ | |
| wget -O model.gguf "https://huggingface.co/unsloth/LFM2-700M-GGUF/resolve/main/LFM2-700M-UD-Q6_K_XL.gguf?download=true" && \ | |
| apt-get purge -y wget && apt-get autoremove -y && rm -rf /var/lib/apt/lists/* | |
| # EXPOSE port 7860 (Hugging Face standard) | |
| EXPOSE 7860 | |
| # --- INFERENCE CONFIGURATION --- | |
| # n_ctx: 32768 (Requested context window) | |
| # n_threads: 2 (Matches Hugging Face Free Tier 2 vCPU) | |
| # host: 0.0.0.0 (Binds to all interfaces for HF proxy) | |
| # model_alias: lfm2 (OpenAI compatible endpoint name) | |
| ENTRYPOINT ["python3", "-m", "llama_cpp.server"] | |
| CMD [ \ | |
| "--model", "model.gguf", \ | |
| "--n_ctx", "32768", \ | |
| "--n_threads", "2", \ | |
| "--host", "0.0.0.0", \ | |
| "--port", "7860", \ | |
| "--model_alias", "lfm2-700m" \ | |
| ] |