# syntax=docker/dockerfile:1 FROM python:3.10-slim ENV DEBIAN_FRONTEND=noninteractive \ PYTHONUNBUFFERED=1 \ PYTHONDONTWRITEBYTECODE=1 \ PORT=7860 WORKDIR /code RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential \ curl \ libopenblas-dev \ libomp-dev \ && rm -rf /var/lib/apt/lists/* # Install CPU-only PyTorch first so pip does not pull CUDA stacks (~3GB+) into the image (OOM on HF builders). COPY requirements.txt . RUN pip install --no-cache-dir --upgrade pip \ && pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cpu \ && pip install --no-cache-dir -r requirements.txt \ && pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cpu --force-reinstall --no-deps \ && pip install --no-cache-dir huggingface-hub sentencepiece ENV HF_HOME=/models/huggingface \ TRANSFORMERS_CACHE=/models/huggingface \ HUGGINGFACE_HUB_CACHE=/models/huggingface \ HF_HUB_CACHE=/models/huggingface RUN mkdir -p /models/huggingface && chmod -R 777 /models/huggingface # Low thread counts during build reduce peak RSS on small HF build VMs. ENV OMP_NUM_THREADS=2 \ MKL_NUM_THREADS=2 \ NUMEXPR_NUM_THREADS=2 # Hugging Face Hub token (optional). On HF Spaces: add secret HF_TOKEN or HUGGING_FACE_HUB_TOKEN and enable # “available during build” / Docker build args so this reaches the builder (runtime secrets alone may not apply). ARG HF_TOKEN= ARG HUGGING_FACE_HUB_TOKEN= ENV HF_TOKEN=${HF_TOKEN} ENV HUGGING_FACE_HUB_TOKEN=${HUGGING_FACE_HUB_TOKEN} COPY . . RUN python scripts/docker_build_assets.py EXPOSE 7860 CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1", "--timeout-keep-alive", "30"]