Spaces:

spitfire4794
/

lfm

Build error

App Files Files Community

lfm / Dockerfile

spitfire4794

Create Dockerfile

31491b9 verified about 2 months ago

raw

history blame contribute delete

2.12 kB

	# --- STAGE 1: Build Environment ---
	FROM python:3.11-slim-bookworm AS builder

	# Set environment variables for high-performance CPU build
	ENV DEBIAN_FRONTEND=noninteractive \
	PYTHONUNBUFFERED=1 \
	CMAKE_ARGS="-DGGML_NATIVE=OFF -DGGML_AVX2=ON -DGGML_FLASH_ATTN=ON" \
	FORCE_CMAKE=1

	# Install build essentials
	RUN apt-get update && apt-get install -y --no-install-recommends \
	build-essential \
	cmake \
	git \
	curl \
	&& apt-get clean && rm -rf /var/lib/apt/lists/*

	# Install 'uv' for 2026-standard high-speed dependency resolution
	COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
	WORKDIR /app

	# Install llama-cpp-python with server support (compiled for CPU)
	RUN uv pip install --system llama-cpp-python[server]

	# --- STAGE 2: Runtime Environment ---
	FROM python:3.11-slim-bookworm

	# Hugging Face Spaces requires UID 1000
	RUN useradd -m -u 1000 user
	USER user
	ENV HOME=/home/user \
	PATH=/home/user/.local/bin:$PATH \
	PYTHONUNBUFFERED=1

	WORKDIR $HOME/app

	# Copy the compiled libraries from the builder stage
	COPY --from=builder /usr/local/lib/python3.11/site-packages /usr/local/lib/python3.11/site-packages
	COPY --from=builder /usr/local/bin /usr/local/bin

	# Download the specific Q6_K_XL model provided
	# Q6_K_XL is ~700MB; fits easily in the 16GB RAM alongside the 32k KV cache.
	RUN apt-get update && apt-get install -y wget && \
	wget -O model.gguf "https://huggingface.co/unsloth/LFM2-700M-GGUF/resolve/main/LFM2-700M-UD-Q6_K_XL.gguf?download=true" && \
	apt-get purge -y wget && apt-get autoremove -y && rm -rf /var/lib/apt/lists/*

	# EXPOSE port 7860 (Hugging Face standard)
	EXPOSE 7860

	# --- INFERENCE CONFIGURATION ---
	# n_ctx: 32768 (Requested context window)
	# n_threads: 2 (Matches Hugging Face Free Tier 2 vCPU)
	# host: 0.0.0.0 (Binds to all interfaces for HF proxy)
	# model_alias: lfm2 (OpenAI compatible endpoint name)
	ENTRYPOINT ["python3", "-m", "llama_cpp.server"]
	CMD [ \
	"--model", "model.gguf", \
	"--n_ctx", "32768", \
	"--n_threads", "2", \
	"--host", "0.0.0.0", \
	"--port", "7860", \
	"--model_alias", "lfm2-700m" \
	]