ecom-qa-bert / Dockerfile
rnyx's picture
Initial deploy: BERT QA app
3338b6d
# ─────────────────────────────────────────────────────────────────────
# Dockerfile for HuggingFace Spaces (Docker SDK) β€” also works on
# Render, Railway, Fly.io, or any container platform.
#
# Key choices:
# β€’ python:3.11-slim base (small, modern)
# β€’ CPU-only torch wheel installed from PyTorch's CPU index
# β†’ saves ~1.5 GB vs. the default GPU wheel
# β€’ Model is pre-downloaded at build time so the first request is fast
# β€’ Non-root user (HF Spaces requires UID 1000)
# β€’ Gunicorn with a single worker β€” BERT eats memory, extra workers
# would duplicate the ~500 MB model in RAM
# ─────────────────────────────────────────────────────────────────────
FROM python:3.11-slim
ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
PIP_NO_CACHE_DIR=1 \
PIP_DISABLE_PIP_VERSION_CHECK=1 \
HF_HOME=/home/user/.cache/huggingface \
TRANSFORMERS_CACHE=/home/user/.cache/huggingface \
PORT=7860
# System deps β€” only what we truly need
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
curl \
ca-certificates \
&& rm -rf /var/lib/apt/lists/*
# HF Spaces requires a non-root user with UID 1000
RUN useradd -m -u 1000 user
USER user
ENV PATH="/home/user/.local/bin:$PATH"
WORKDIR /home/user/app
# Install CPU-only PyTorch first (big layer β€” cache-friendly)
RUN pip install --user --no-cache-dir \
torch==2.4.1 \
--index-url https://download.pytorch.org/whl/cpu
# Copy requirements and install the rest
COPY --chown=user:user requirements.txt .
RUN pip install --user --no-cache-dir -r requirements.txt
# Pre-download the model into the image so cold starts are fast
ARG HF_MODEL_NAME=deepset/bert-base-cased-squad2
ENV HF_MODEL_NAME=${HF_MODEL_NAME}
RUN python -c "import os; \
from transformers import AutoTokenizer, AutoModelForQuestionAnswering; \
m = os.environ['HF_MODEL_NAME']; \
AutoTokenizer.from_pretrained(m); \
AutoModelForQuestionAnswering.from_pretrained(m); \
print('Model pre-downloaded:', m)"
# Copy application code
COPY --chown=user:user src/ ./src/
COPY --chown=user:user templates/ ./templates/
COPY --chown=user:user static/ ./static/
# Persistent disk on HF Spaces mounts at /data (when enabled in Space settings)
# If /data isn't writable, config.py falls back to ./history.db automatically.
EXPOSE 7860
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
CMD curl -fsS http://localhost:${PORT}/healthz || exit 1
# Single worker, long timeout β€” BERT inference can take a few seconds on CPU
CMD ["gunicorn", "src.app:app", \
"--bind", "0.0.0.0:7860", \
"--workers", "1", \
"--threads", "4", \
"--timeout", "180", \
"--access-logfile", "-", \
"--error-logfile", "-"]