Spaces:

rnyx
/

ecom-qa-bert

Running

App Files Files Community

ecom-qa-bert / Dockerfile

rnyx

Initial deploy: BERT QA app

3338b6d about 2 months ago

raw

history blame contribute delete

3.07 kB

	# ─────────────────────────────────────────────────────────────────────
	# Dockerfile for HuggingFace Spaces (Docker SDK) — also works on
	# Render, Railway, Fly.io, or any container platform.
	#
	# Key choices:
	# • python:3.11-slim base (small, modern)
	# • CPU-only torch wheel installed from PyTorch's CPU index
	# → saves ~1.5 GB vs. the default GPU wheel
	# • Model is pre-downloaded at build time so the first request is fast
	# • Non-root user (HF Spaces requires UID 1000)
	# • Gunicorn with a single worker — BERT eats memory, extra workers
	# would duplicate the ~500 MB model in RAM
	# ─────────────────────────────────────────────────────────────────────
	FROM python:3.11-slim

	ENV PYTHONDONTWRITEBYTECODE=1 \
	PYTHONUNBUFFERED=1 \
	PIP_NO_CACHE_DIR=1 \
	PIP_DISABLE_PIP_VERSION_CHECK=1 \
	HF_HOME=/home/user/.cache/huggingface \
	TRANSFORMERS_CACHE=/home/user/.cache/huggingface \
	PORT=7860

	# System deps — only what we truly need
	RUN apt-get update && apt-get install -y --no-install-recommends \
	build-essential \
	curl \
	ca-certificates \
	&& rm -rf /var/lib/apt/lists/*

	# HF Spaces requires a non-root user with UID 1000
	RUN useradd -m -u 1000 user
	USER user
	ENV PATH="/home/user/.local/bin:$PATH"
	WORKDIR /home/user/app

	# Install CPU-only PyTorch first (big layer — cache-friendly)
	RUN pip install --user --no-cache-dir \
	torch==2.4.1 \
	--index-url https://download.pytorch.org/whl/cpu

	# Copy requirements and install the rest
	COPY --chown=user:user requirements.txt .
	RUN pip install --user --no-cache-dir -r requirements.txt

	# Pre-download the model into the image so cold starts are fast
	ARG HF_MODEL_NAME=deepset/bert-base-cased-squad2
	ENV HF_MODEL_NAME=${HF_MODEL_NAME}
	RUN python -c "import os; \
	from transformers import AutoTokenizer, AutoModelForQuestionAnswering; \
	m = os.environ['HF_MODEL_NAME']; \
	AutoTokenizer.from_pretrained(m); \
	AutoModelForQuestionAnswering.from_pretrained(m); \
	print('Model pre-downloaded:', m)"

	# Copy application code
	COPY --chown=user:user src/ ./src/
	COPY --chown=user:user templates/ ./templates/
	COPY --chown=user:user static/ ./static/

	# Persistent disk on HF Spaces mounts at /data (when enabled in Space settings)
	# If /data isn't writable, config.py falls back to ./history.db automatically.

	EXPOSE 7860

	HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
	CMD curl -fsS http://localhost:${PORT}/healthz \|\| exit 1

	# Single worker, long timeout — BERT inference can take a few seconds on CPU
	CMD ["gunicorn", "src.app:app", \
	"--bind", "0.0.0.0:7860", \
	"--workers", "1", \
	"--threads", "4", \
	"--timeout", "180", \
	"--access-logfile", "-", \
	"--error-logfile", "-"]