ocrser / Dockerfile
sanali209's picture
Update Dockerfile
2074c35 verified
FROM python:3.12-slim
WORKDIR /app
# Install system dependencies
# libgl1 and libglib2.0-0 are often required for image processing libraries (like cv2)
# libgl1-mesa-glx is deprecated/unavailable in newer Debian versions
RUN apt-get update && apt-get install -y \
build-essential \
libgl1 \
libglib2.0-0 \
tesseract-ocr \
&& rm -rf /var/lib/apt/lists/*
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Create a non-root user (Standard for Hugging Face Spaces)
RUN useradd -m -u 1000 user
# Set environment variables to ensure models are downloaded to a writable directory
# Docling/RapidOCR and HuggingFace Hub use these
ENV HOME=/home/user \
PATH=/home/user/.local/bin:$PATH \
HF_HOME=/home/user/.cache/huggingface \
RAPIDOCR_CACHE_DIR=/home/user/.cache/rapidocr \
XDG_CACHE_HOME=/home/user/.cache
# Create cache directories and set permissions
RUN mkdir -p /home/user/.cache/huggingface \
/home/user/.cache/rapidocr \
&& chown -R user:user /home/user
# Switch to user to run the pre-download script
# This ensures files are owned by 'user' and not 'root'
USER user
# Pre-download models by initializing the converter once
# This will trigger downloads into the writable cache directories defined above
RUN python -c "from docling.document_converter import DocumentConverter; DocumentConverter()"
COPY --chown=user:user . .
EXPOSE 7860
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]