handbook-ocr-engine / Dockerfile
internationalscholarsprogram's picture
Fix Dockerfile: update package names for Debian Trixie
dc9cb16 verified
FROM python:3.12-slim
# System dependencies: Tesseract OCR, OpenCV headless deps, WeasyPrint
RUN apt-get update && apt-get install -y --no-install-recommends \
# Tesseract OCR
tesseract-ocr \
tesseract-ocr-eng \
libtesseract-dev \
leptonica-progs \
# OpenCV headless deps
libgl1 \
libglib2.0-0 \
# WeasyPrint deps
libpango-1.0-0 \
libpangocairo-1.0-0 \
libcairo2 \
libgdk-pixbuf-2.0-0 \
libffi-dev \
libharfbuzz0b \
# Font rendering
fonts-liberation \
fonts-dejavu \
fontconfig \
# Utilities
wget \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
# Python deps
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Application code
COPY app/ ./app/
# Temp directories
RUN mkdir -p /tmp/handbook_uploads /tmp/handbook_exports
# Env defaults
COPY .env.example .env.example
ENV PORT=7860
EXPOSE 7860
# Single worker — OCR is CPU-intensive; keep memory bounded
CMD ["sh", "-c", "uvicorn app.main:app --host 0.0.0.0 --port ${PORT:-7860} --workers 1 --timeout-keep-alive 300"]