Spaces:
Sleeping
Sleeping
| FROM python:3.11-slim | |
| ENV PIP_NO_CACHE_DIR=1 \ | |
| PYTHONUNBUFFERED=1 \ | |
| PORT=7860 | |
| # Minimal runtime libs (no compilers) | |
| RUN apt-get update && apt-get install -y --no-install-recommends \ | |
| libgomp1 libopenblas0 \ | |
| && rm -rf /var/lib/apt/lists/* | |
| WORKDIR /app | |
| # ---- Python deps (non-llama first for cache) ---- | |
| COPY requirements.txt . | |
| RUN python -m pip install --upgrade pip setuptools wheel \ | |
| && pip install --no-cache-dir -r requirements.txt | |
| # ---- llama-cpp-python from prebuilt wheels (no compiling) ---- | |
| # Try a few known-good versions; first one that has a wheel wins. | |
| # add build tools only if you must compile | |
| RUN apt-get update && apt-get install -y --no-install-recommends build-essential cmake && \ | |
| rm -rf /var/lib/apt/lists/* | |
| # compile with BLAS off (HF CPU friendly) | |
| RUN CMAKE_ARGS="-DLLAMA_BLAS=OFF -DLLAMA_CUBLAS=OFF -DLLAMA_BLAS_VENDOR=NONE" \ | |
| pip install "llama-cpp-python==0.3.0" | |
| # ---- App code ---- | |
| COPY . . | |
| # ---- Model path is configurable via env ---- | |
| ENV MODEL_PATH=/app/models/Llama-3.2-3B-Instruct-Q4_K_M.gguf | |
| # ---- Pre-download & copy model to MODEL_PATH ---- | |
| RUN python - <<'PY' | |
| from huggingface_hub import hf_hub_download | |
| import os, shutil | |
| dest = os.environ.get("MODEL_PATH", "/app/models/Llama-3.2-3B-Instruct-Q4_K_M.gguf") | |
| os.makedirs(os.path.dirname(dest), exist_ok=True) | |
| p = hf_hub_download( | |
| repo_id="bartowski/Llama-3.2-3B-Instruct-GGUF", | |
| filename="Llama-3.2-3B-Instruct-Q4_K_M.gguf", | |
| local_dir=None, local_dir_use_symlinks=False, | |
| ) | |
| shutil.copy2(p, dest) | |
| print("Model copied to:", dest) | |
| PY | |
| EXPOSE 7860 | |
| CMD ["bash", "-lc", "uvicorn app:app --host 0.0.0.0 --port ${PORT}"] | |