FROM python:3.10-slim AS builder ENV PYTHONDONTWRITEBYTECODE=1 \ PYTHONUNBUFFERED=1 \ PIP_NO_CACHE_DIR=1 \ PORT=7860 WORKDIR /app # Build deps: required to compile llama-cpp-python from source on HF RUN apt-get update \ ; apt-get install -y --no-install-recommends \ build-essential \ gcc \ g++ \ cmake \ git \ libgomp1 \ ca-certificates \ ; rm -rf /var/lib/apt/lists/* # Install deps first (better layer caching) COPY requirements.txt ./ # Optional: keep build simple and CPU-friendly ENV CMAKE_ARGS="-DLLAMA_BLAS=OFF" RUN pip install --upgrade pip \ ; pip install -r requirements.txt FROM python:3.10-slim AS runtime ENV PYTHONDONTWRITEBYTECODE=1 \ PYTHONUNBUFFERED=1 \ PORT=7860 WORKDIR /app # Runtime deps (OpenMP) RUN apt-get update \ ; apt-get install -y --no-install-recommends libgomp1 ca-certificates \ ; rm -rf /var/lib/apt/lists/* # Copy installed python packages from builder COPY --from=builder /usr/local /usr/local # Copy app and model files COPY app.py ./ COPY *.gguf ./ EXPOSE 7860 # HuggingFace expects the app to listen on 0.0.0.0:7860 CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--proxy-headers", "--forwarded-allow-ips", "*"]