# syntax=docker/dockerfile:1.6 # Base Python image FROM python:3.11-slim # Environment for reliable, quiet, and unbuffered Python ENV PYTHONDONTWRITEBYTECODE=1 \ PYTHONUNBUFFERED=1 \ PIP_NO_CACHE_DIR=1 \ PIP_DISABLE_PIP_VERSION_CHECK=1 \ PORT=7860 # Install system packages required for OCR and PDF rasterization # - tesseract-ocr and language data (eng) # - poppler-utils provides `pdftoppm` used by pdf2image # - libgl1 needed by some Pillow operations in headless containers RUN apt-get update && \ apt-get install -y --no-install-recommends \ tesseract-ocr \ tesseract-ocr-eng \ poppler-utils \ libgl1 \ && rm -rf /var/lib/apt/lists/* # Set tessdata path (generally correct for Debian-based images) ENV TESSDATA_PREFIX=/usr/share/tesseract-ocr/4.00/tessdata # App directory WORKDIR /app # Install Python dependencies first for better layer caching COPY requirements.txt /app/requirements.txt RUN pip install --no-cache-dir -r /app/requirements.txt # Copy application code COPY . /app # Expose default HF Spaces port EXPOSE 7860 # Start the FastAPI server # Note: Hugging Face sets PORT env var, but we default to 7860 CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]