Spaces:
Sleeping
Sleeping
File size: 1,642 Bytes
b6bb2c8 18f566d b6bb2c8 18f566d b6bb2c8 c9f68ed 18f566d c9f68ed b6bb2c8 c9f68ed 91c1453 dd31c02 c9f68ed 18f566d b6bb2c8 c9f68ed b6bb2c8 c9f68ed | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 | FROM python:3.11-slim
ENV PIP_NO_CACHE_DIR=1 \
PYTHONUNBUFFERED=1 \
PORT=7860
# Minimal runtime libs (no compilers)
RUN apt-get update && apt-get install -y --no-install-recommends \
libgomp1 libopenblas0 \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
# ---- Python deps (non-llama first for cache) ----
COPY requirements.txt .
RUN python -m pip install --upgrade pip setuptools wheel \
&& pip install --no-cache-dir -r requirements.txt
# ---- llama-cpp-python from prebuilt wheels (no compiling) ----
# Try a few known-good versions; first one that has a wheel wins.
# add build tools only if you must compile
RUN apt-get update && apt-get install -y --no-install-recommends build-essential cmake && \
rm -rf /var/lib/apt/lists/*
# compile with BLAS off (HF CPU friendly)
RUN CMAKE_ARGS="-DLLAMA_BLAS=OFF -DLLAMA_CUBLAS=OFF -DLLAMA_BLAS_VENDOR=NONE" \
pip install "llama-cpp-python==0.3.0"
# ---- App code ----
COPY . .
# ---- Model path is configurable via env ----
ENV MODEL_PATH=/app/models/Llama-3.2-3B-Instruct-Q4_K_M.gguf
# ---- Pre-download & copy model to MODEL_PATH ----
RUN python - <<'PY'
from huggingface_hub import hf_hub_download
import os, shutil
dest = os.environ.get("MODEL_PATH", "/app/models/Llama-3.2-3B-Instruct-Q4_K_M.gguf")
os.makedirs(os.path.dirname(dest), exist_ok=True)
p = hf_hub_download(
repo_id="bartowski/Llama-3.2-3B-Instruct-GGUF",
filename="Llama-3.2-3B-Instruct-Q4_K_M.gguf",
local_dir=None, local_dir_use_symlinks=False,
)
shutil.copy2(p, dest)
print("Model copied to:", dest)
PY
EXPOSE 7860
CMD ["bash", "-lc", "uvicorn app:app --host 0.0.0.0 --port ${PORT}"]
|