Spaces:

omaryasserhassan
/

Server

Sleeping

App Files Files Community

Server / Dockerfile

omaryasserhassan

Update Dockerfile

91c1453 verified 6 months ago

raw

history blame contribute delete

1.64 kB

	FROM python:3.11-slim

	ENV PIP_NO_CACHE_DIR=1 \
	PYTHONUNBUFFERED=1 \
	PORT=7860

	# Minimal runtime libs (no compilers)
	RUN apt-get update && apt-get install -y --no-install-recommends \
	libgomp1 libopenblas0 \
	&& rm -rf /var/lib/apt/lists/*

	WORKDIR /app

	# ---- Python deps (non-llama first for cache) ----
	COPY requirements.txt .
	RUN python -m pip install --upgrade pip setuptools wheel \
	&& pip install --no-cache-dir -r requirements.txt

	# ---- llama-cpp-python from prebuilt wheels (no compiling) ----
	# Try a few known-good versions; first one that has a wheel wins.
	# add build tools only if you must compile
	RUN apt-get update && apt-get install -y --no-install-recommends build-essential cmake && \
	rm -rf /var/lib/apt/lists/*

	# compile with BLAS off (HF CPU friendly)
	RUN CMAKE_ARGS="-DLLAMA_BLAS=OFF -DLLAMA_CUBLAS=OFF -DLLAMA_BLAS_VENDOR=NONE" \
	pip install "llama-cpp-python==0.3.0"

	# ---- App code ----
	COPY . .

	# ---- Model path is configurable via env ----
	ENV MODEL_PATH=/app/models/Llama-3.2-3B-Instruct-Q4_K_M.gguf

	# ---- Pre-download & copy model to MODEL_PATH ----
	RUN python - <<'PY'
	from huggingface_hub import hf_hub_download
	import os, shutil
	dest = os.environ.get("MODEL_PATH", "/app/models/Llama-3.2-3B-Instruct-Q4_K_M.gguf")
	os.makedirs(os.path.dirname(dest), exist_ok=True)
	p = hf_hub_download(
	repo_id="bartowski/Llama-3.2-3B-Instruct-GGUF",
	filename="Llama-3.2-3B-Instruct-Q4_K_M.gguf",
	local_dir=None, local_dir_use_symlinks=False,
	)
	shutil.copy2(p, dest)
	print("Model copied to:", dest)
	PY

	EXPOSE 7860
	CMD ["bash", "-lc", "uvicorn app:app --host 0.0.0.0 --port ${PORT}"]