Server / Dockerfile
omaryasserhassan's picture
Update Dockerfile
91c1453 verified
FROM python:3.11-slim
ENV PIP_NO_CACHE_DIR=1 \
PYTHONUNBUFFERED=1 \
PORT=7860
# Minimal runtime libs (no compilers)
RUN apt-get update && apt-get install -y --no-install-recommends \
libgomp1 libopenblas0 \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
# ---- Python deps (non-llama first for cache) ----
COPY requirements.txt .
RUN python -m pip install --upgrade pip setuptools wheel \
&& pip install --no-cache-dir -r requirements.txt
# ---- llama-cpp-python from prebuilt wheels (no compiling) ----
# Try a few known-good versions; first one that has a wheel wins.
# add build tools only if you must compile
RUN apt-get update && apt-get install -y --no-install-recommends build-essential cmake && \
rm -rf /var/lib/apt/lists/*
# compile with BLAS off (HF CPU friendly)
RUN CMAKE_ARGS="-DLLAMA_BLAS=OFF -DLLAMA_CUBLAS=OFF -DLLAMA_BLAS_VENDOR=NONE" \
pip install "llama-cpp-python==0.3.0"
# ---- App code ----
COPY . .
# ---- Model path is configurable via env ----
ENV MODEL_PATH=/app/models/Llama-3.2-3B-Instruct-Q4_K_M.gguf
# ---- Pre-download & copy model to MODEL_PATH ----
RUN python - <<'PY'
from huggingface_hub import hf_hub_download
import os, shutil
dest = os.environ.get("MODEL_PATH", "/app/models/Llama-3.2-3B-Instruct-Q4_K_M.gguf")
os.makedirs(os.path.dirname(dest), exist_ok=True)
p = hf_hub_download(
repo_id="bartowski/Llama-3.2-3B-Instruct-GGUF",
filename="Llama-3.2-3B-Instruct-Q4_K_M.gguf",
local_dir=None, local_dir_use_symlinks=False,
)
shutil.copy2(p, dest)
print("Model copied to:", dest)
PY
EXPOSE 7860
CMD ["bash", "-lc", "uvicorn app:app --host 0.0.0.0 --port ${PORT}"]