qwencoder / Dockerfile
Chvigo's picture
Update Dockerfile
7e27ad7 verified
raw
history blame contribute delete
866 Bytes
# Base image: CPU-only Python 3.12
FROM python:3.12-slim
# Create a non-root user (UID 1000 is required by Hugging Face)
RUN useradd -m -u 1000 user
USER user
ENV HOME=/home/user \
PATH=/home/user/.local/bin:$PATH \
HF_HOME=/home/user/huggingface
# Set working directory to the user's home
WORKDIR $HOME/app
# Copy and install dependencies first (for better caching)
COPY --chown=user requirements.txt .
RUN pip install --upgrade pip && \
pip install --no-cache-dir -r requirements.txt
# Copy application code
COPY --chown=user app.py .
# Hugging Face Spaces MUST use port 7860
EXPOSE 7860
# Use Gunicorn with 1 worker and multiple threads for CPU LLM inference
# Note: 1 worker prevents loading the 1GB+ model into memory multiple times
CMD ["gunicorn", "--bind", "0.0.0.0:7860", "--workers", "1", "--threads", "4", "--timeout", "120", "app:app"]