# Base image: CPU-only Python 3.12
FROM python:3.12-slim

# Create a non-root user (UID 1000 is required by Hugging Face)
RUN useradd -m -u 1000 user
USER user
ENV HOME=/home/user \
    PATH=/home/user/.local/bin:$PATH \
    HF_HOME=/home/user/huggingface

# Set working directory to the user's home
WORKDIR $HOME/app

# Copy and install dependencies first (for better caching)
COPY --chown=user requirements.txt .
RUN pip install --upgrade pip && \
    pip install --no-cache-dir -r requirements.txt

# Copy application code
COPY --chown=user app.py .

# Hugging Face Spaces MUST use port 7860
EXPOSE 7860

# Use Gunicorn with 1 worker and multiple threads for CPU LLM inference
# Note: 1 worker prevents loading the 1GB+ model into memory multiple times
CMD ["gunicorn", "--bind", "0.0.0.0:7860", "--workers", "1", "--threads", "4", "--timeout", "120", "app:app"]