| FROM python:3.11-slim | |
| WORKDIR /app | |
| RUN apt-get update && apt-get install -y --no-install-recommends \ | |
| build-essential cmake && \ | |
| rm -rf /var/lib/apt/lists/* | |
| # Single-threaded build + disable BLAS → ~3GB peak RAM (fits cpu-basic) | |
| ENV CMAKE_BUILD_PARALLEL_LEVEL=1 \ | |
| CMAKE_ARGS="-DGGML_BLAS=OFF -DGGML_NATIVE=OFF" \ | |
| PYTHONUNBUFFERED=1 \ | |
| GRADIO_SERVER_NAME=0.0.0.0 \ | |
| GRADIO_SERVER_PORT=7860 | |
| RUN pip install --no-cache-dir llama-cpp-python==0.3.20 | |
| RUN pip install --no-cache-dir "gradio>=5.0.0" "huggingface_hub>=0.23.0" | |
| COPY app.py . | |
| EXPOSE 7860 | |
| CMD ["python", "app.py"] | |