FROM python:3.11-slim

ENV PIP_NO_CACHE_DIR=1 \
    PYTHONUNBUFFERED=1 \
    PORT=7860

# Minimal runtime libs (no compilers)
RUN apt-get update && apt-get install -y --no-install-recommends \
    libgomp1 libopenblas0 \
 && rm -rf /var/lib/apt/lists/*

WORKDIR /app

# ---- Python deps (non-llama first for cache) ----
COPY requirements.txt .
RUN python -m pip install --upgrade pip setuptools wheel \
 && pip install --no-cache-dir -r requirements.txt

# ---- llama-cpp-python from prebuilt wheels (no compiling) ----
# Try a few known-good versions; first one that has a wheel wins.
# add build tools only if you must compile
RUN apt-get update && apt-get install -y --no-install-recommends build-essential cmake && \
    rm -rf /var/lib/apt/lists/*

# compile with BLAS off (HF CPU friendly)
RUN CMAKE_ARGS="-DLLAMA_BLAS=OFF -DLLAMA_CUBLAS=OFF -DLLAMA_BLAS_VENDOR=NONE" \
    pip install "llama-cpp-python==0.3.0"

# ---- App code ----
COPY . .

# ---- Model path is configurable via env ----
ENV MODEL_PATH=/app/models/Llama-3.2-3B-Instruct-Q4_K_M.gguf

# ---- Pre-download & copy model to MODEL_PATH ----
RUN python - <<'PY'
from huggingface_hub import hf_hub_download
import os, shutil
dest = os.environ.get("MODEL_PATH", "/app/models/Llama-3.2-3B-Instruct-Q4_K_M.gguf")
os.makedirs(os.path.dirname(dest), exist_ok=True)
p = hf_hub_download(
    repo_id="bartowski/Llama-3.2-3B-Instruct-GGUF",
    filename="Llama-3.2-3B-Instruct-Q4_K_M.gguf",
    local_dir=None, local_dir_use_symlinks=False,
)
shutil.copy2(p, dest)
print("Model copied to:", dest)
PY

EXPOSE 7860
CMD ["bash", "-lc", "uvicorn app:app --host 0.0.0.0 --port ${PORT}"]