lfm / Dockerfile
spitfire4794's picture
Create Dockerfile
31491b9 verified
# --- STAGE 1: Build Environment ---
FROM python:3.11-slim-bookworm AS builder
# Set environment variables for high-performance CPU build
ENV DEBIAN_FRONTEND=noninteractive \
PYTHONUNBUFFERED=1 \
CMAKE_ARGS="-DGGML_NATIVE=OFF -DGGML_AVX2=ON -DGGML_FLASH_ATTN=ON" \
FORCE_CMAKE=1
# Install build essentials
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
cmake \
git \
curl \
&& apt-get clean && rm -rf /var/lib/apt/lists/*
# Install 'uv' for 2026-standard high-speed dependency resolution
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
WORKDIR /app
# Install llama-cpp-python with server support (compiled for CPU)
RUN uv pip install --system llama-cpp-python[server]
# --- STAGE 2: Runtime Environment ---
FROM python:3.11-slim-bookworm
# Hugging Face Spaces requires UID 1000
RUN useradd -m -u 1000 user
USER user
ENV HOME=/home/user \
PATH=/home/user/.local/bin:$PATH \
PYTHONUNBUFFERED=1
WORKDIR $HOME/app
# Copy the compiled libraries from the builder stage
COPY --from=builder /usr/local/lib/python3.11/site-packages /usr/local/lib/python3.11/site-packages
COPY --from=builder /usr/local/bin /usr/local/bin
# Download the specific Q6_K_XL model provided
# Q6_K_XL is ~700MB; fits easily in the 16GB RAM alongside the 32k KV cache.
RUN apt-get update && apt-get install -y wget && \
wget -O model.gguf "https://huggingface.co/unsloth/LFM2-700M-GGUF/resolve/main/LFM2-700M-UD-Q6_K_XL.gguf?download=true" && \
apt-get purge -y wget && apt-get autoremove -y && rm -rf /var/lib/apt/lists/*
# EXPOSE port 7860 (Hugging Face standard)
EXPOSE 7860
# --- INFERENCE CONFIGURATION ---
# n_ctx: 32768 (Requested context window)
# n_threads: 2 (Matches Hugging Face Free Tier 2 vCPU)
# host: 0.0.0.0 (Binds to all interfaces for HF proxy)
# model_alias: lfm2 (OpenAI compatible endpoint name)
ENTRYPOINT ["python3", "-m", "llama_cpp.server"]
CMD [ \
"--model", "model.gguf", \
"--n_ctx", "32768", \
"--n_threads", "2", \
"--host", "0.0.0.0", \
"--port", "7860", \
"--model_alias", "lfm2-700m" \
]