File size: 1,487 Bytes
8e70c48 c08c2fc 8e70c48 c08c2fc 8e70c48 c08c2fc 2074c35 c08c2fc 83c5b00 c08c2fc 83c5b00 c08c2fc 8e70c48 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 | FROM python:3.12-slim
WORKDIR /app
# Install system dependencies
# libgl1 and libglib2.0-0 are often required for image processing libraries (like cv2)
# libgl1-mesa-glx is deprecated/unavailable in newer Debian versions
RUN apt-get update && apt-get install -y \
build-essential \
libgl1 \
libglib2.0-0 \
tesseract-ocr \
&& rm -rf /var/lib/apt/lists/*
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Create a non-root user (Standard for Hugging Face Spaces)
RUN useradd -m -u 1000 user
# Set environment variables to ensure models are downloaded to a writable directory
# Docling/RapidOCR and HuggingFace Hub use these
ENV HOME=/home/user \
PATH=/home/user/.local/bin:$PATH \
HF_HOME=/home/user/.cache/huggingface \
RAPIDOCR_CACHE_DIR=/home/user/.cache/rapidocr \
XDG_CACHE_HOME=/home/user/.cache
# Create cache directories and set permissions
RUN mkdir -p /home/user/.cache/huggingface \
/home/user/.cache/rapidocr \
&& chown -R user:user /home/user
# Switch to user to run the pre-download script
# This ensures files are owned by 'user' and not 'root'
USER user
# Pre-download models by initializing the converter once
# This will trigger downloads into the writable cache directories defined above
RUN python -c "from docling.document_converter import DocumentConverter; DocumentConverter()"
COPY --chown=user:user . .
EXPOSE 7860
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"] |