# syntax=docker/dockerfile:1.6 ARG PY_BASE=python:3.9-slim-bullseye FROM ${PY_BASE} ENV PYTHONDONTWRITEBYTECODE=1 \ PYTHONUNBUFFERED=1 \ PIP_NO_CACHE_DIR=1 \ TOKENIZERS_PARALLELISM=false \ OMP_NUM_THREADS=1 \ TRANSFORMERS_CACHE=/cache/hf # Force a rebuild when you tweak deps ARG DEPS_REFRESH=2025-09-07-06 ENV DEPS_REFRESH=$DEPS_REFRESH RUN echo "CACHEBUSTER=$DEPS_REFRESH" # System deps (tesseract + libs for opencv wheels) RUN apt-get update && apt-get install -y --no-install-recommends \ ca-certificates curl \ tesseract-ocr tesseract-ocr-eng tesseract-ocr-osd \ libgl1 libglib2.0-0 \ && rm -rf /var/lib/apt/lists/* ENV TESSDATA_PREFIX=/usr/share/tesseract-ocr/4.00/tessdata WORKDIR /app COPY requirements.txt . # Install python deps RUN python -m pip install --upgrade pip setuptools wheel \ && pip install --no-cache-dir -r requirements.txt # Install spaCy model matching spaCy 3.2.x WITHOUT pulling new deps RUN pip install --no-deps \ "en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.2.0/en_core_web_sm-3.2.0-py3-none-any.whl" # Robust version dump (no __version__ attribute assumptions) RUN python - <<'PY' import sys, pkgutil try: import importlib.metadata as md except ImportError: import importlib_metadata as md # py3.8 fallback (not used here) def v(name): try: return md.version(name) except md.PackageNotFoundError: return "not-installed" print("python:", sys.version.split()[0]) for name in ("pydantic","typing-extensions","spacy","thinc","en-core-web-sm"): print(f"{name}:", v(name)) print("has en_core_web_sm:", bool(pkgutil.find_loader("en_core_web_sm"))) PY # App code COPY . . # Writable caches RUN mkdir -p /cache/hf /tmp && chmod -R 777 /cache /tmp # If you have a starter DB in the repo, uncomment the next line to seed it: RUN mkdir -p /data && chmod -R 777 /data #COPY app.db /data/app.db ENV DB_DIR=/data ENV DB_PATH=/data/app.db # (optional) expose as a volume so you can mount from host if you want persistence VOLUME ["/data"] #ENV PORT=8000 EXPOSE 7860 HEALTHCHECK --interval=30s --timeout=10s --retries=3 \ CMD curl -fsS "http://127.0.0.1:${PORT:-7860}/api/health/" || exit 1 # bind to $PORT provided by HF; include proxy headers CMD ["sh","-c","uvicorn backend:app --host 0.0.0.0 --port ${PORT:-7860} --proxy-headers --forwarded-allow-ips='*'"]