# UPDATE: Changed from 3.9 to 3.11 to support newer NumPy/Pandas versions FROM python:3.11-slim # Set the working directory in the container WORKDIR /app # Install system dependencies # Added 'poppler-utils' (required for pdf2image) # Added 'libgl1' & 'libglib2.0-0' (required for OpenCV headless) # Added 'curl' just in case, though we use python requests for healthcheck RUN apt-get update && apt-get install -y \ tesseract-ocr \ tesseract-ocr-eng \ tesseract-ocr-deu \ tesseract-ocr-fra \ tesseract-ocr-spa \ tesseract-ocr-por \ tesseract-ocr-ita \ tesseract-ocr-rus \ tesseract-ocr-chi-sim \ tesseract-ocr-jpn \ tesseract-ocr-kor \ poppler-utils \ libgl1 \ libglib2.0-0 \ && rm -rf /var/lib/apt/lists/* # Upgrade pip, setuptools, and wheel before installing deps COPY requirements.txt . RUN pip install --upgrade pip setuptools wheel \ && pip install --default-timeout=100 --retries=10 --no-cache-dir -r requirements.txt # Copy the current directory contents into the container COPY . . # Create a non-root user for security (Production Best Practice) RUN useradd -m appuser && chown -R appuser /app USER appuser # Set environment variables ENV HF_HOME=/tmp/cache ENV PORT=7860 # Create cache directory (if still needed) RUN mkdir -p ${HF_HOME} && chmod 777 ${HF_HOME} # Expose port EXPOSE $PORT # Run FastAPI with Uvicorn CMD bash -c "\ while true; do \ curl -s https://xce009-ocr-api.hf.space/api/v1/ping >/dev/null; \ sleep 300; \ done & \ uvicorn main:app --host 0.0.0.0 --port ${PORT} --workers 4"