| # Base image: CPU-only Python 3.12 | |
| FROM python:3.12-slim | |
| # Create a non-root user (UID 1000 is required by Hugging Face) | |
| RUN useradd -m -u 1000 user | |
| USER user | |
| ENV HOME=/home/user \ | |
| PATH=/home/user/.local/bin:$PATH \ | |
| HF_HOME=/home/user/huggingface | |
| # Set working directory to the user's home | |
| WORKDIR $HOME/app | |
| # Copy and install dependencies first (for better caching) | |
| COPY --chown=user requirements.txt . | |
| RUN pip install --upgrade pip && \ | |
| pip install --no-cache-dir -r requirements.txt | |
| # Copy application code | |
| COPY --chown=user app.py . | |
| # Hugging Face Spaces MUST use port 7860 | |
| EXPOSE 7860 | |
| # Use Gunicorn with 1 worker and multiple threads for CPU LLM inference | |
| # Note: 1 worker prevents loading the 1GB+ model into memory multiple times | |
| CMD ["gunicorn", "--bind", "0.0.0.0:7860", "--workers", "1", "--threads", "4", "--timeout", "120", "app:app"] | |