# Base image: CPU-only Python 3.12 FROM python:3.12-slim # Create a non-root user (UID 1000 is required by Hugging Face) RUN useradd -m -u 1000 user USER user ENV HOME=/home/user \ PATH=/home/user/.local/bin:$PATH \ HF_HOME=/home/user/huggingface # Set working directory to the user's home WORKDIR $HOME/app # Copy and install dependencies first (for better caching) COPY --chown=user requirements.txt . RUN pip install --upgrade pip && \ pip install --no-cache-dir -r requirements.txt # Copy application code COPY --chown=user app.py . # Hugging Face Spaces MUST use port 7860 EXPOSE 7860 # Use Gunicorn with 1 worker and multiple threads for CPU LLM inference # Note: 1 worker prevents loading the 1GB+ model into memory multiple times CMD ["gunicorn", "--bind", "0.0.0.0:7860", "--workers", "1", "--threads", "4", "--timeout", "120", "app:app"]