FROM python:3.11-slim WORKDIR /app RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential cmake && \ rm -rf /var/lib/apt/lists/* # Single-threaded build + disable BLAS → ~3GB peak RAM (fits cpu-basic) ENV CMAKE_BUILD_PARALLEL_LEVEL=1 \ CMAKE_ARGS="-DGGML_BLAS=OFF -DGGML_NATIVE=OFF" \ PYTHONUNBUFFERED=1 \ GRADIO_SERVER_NAME=0.0.0.0 \ GRADIO_SERVER_PORT=7860 RUN pip install --no-cache-dir llama-cpp-python==0.3.20 RUN pip install --no-cache-dir "gradio>=5.0.0" "huggingface_hub>=0.23.0" COPY app.py . EXPOSE 7860 CMD ["python", "app.py"]