# Dockerfile # Use a base image with Python installed. Python 3.10 is fine. FROM python:3.10-slim-buster # Set the working directory inside the container (created as root) WORKDIR /app # Install system dependencies needed for pdf2image (Poppler) and pytesseract (Tesseract) # These RUN commands need to be executed as root. RUN apt-get update && apt-get install -y \ libpoppler-dev \ tesseract-ocr \ tesseract-ocr-eng \ tesseract-ocr-tur \ tesseract-ocr-ara \ tesseract-ocr-fra \ # Add other languages if needed && rm -rf /var/lib/apt/lists/* # Create the user RUN useradd -m -u 1000 user # IMPORTANT: Change ownership of the /app directory to the new 'user' RUN chown -R user:user /app # Switch to the non-root user. All subsequent commands (COPY, RUN for pip, CMD) will be executed as 'user'. USER user # IMPORTANT: Add /home/user/.local/bin to the PATH for the 'user' ENV PATH="/home/user/.local/bin:$PATH" # Set the Hugging Face cache directory to a path where the 'user' has write permissions. ENV HF_HOME=/app/huggingface_cache # Create the cache directory RUN mkdir -p $HF_HOME # Copy requirements.txt and install Python dependencies COPY --chown=user:user requirements.txt . RUN pip install --no-cache-dir -r requirements.txt # Clean up any old Hugging Face Hub lock files before copying the application code. RUN rm -rf "${HF_HOME}/hub/tmp" RUN find "${HF_HOME}/hub/models--" -name "*.lock" -type f -delete || true # Copy the rest of your application code COPY --chown=user:user . /app # Set environment variables for Tesseract and potentially for Python's sqlite3 module ENV TESSDATA_PREFIX=/usr/share/tesseract-ocr/4.00/tessdata ENV TESSERACT_CMD=/usr/bin/tesseract ENV PYTHON_IS_PYTESSERACT_ONLY=true # Command to run your FastAPI application using Uvicorn CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]