Spaces:

sanali209
/

ocrser

Sleeping

ocrser / Dockerfile

Update Dockerfile

2074c35 verified 5 months ago

1.49 kB

	FROM python:3.12-slim

	WORKDIR /app

	# Install system dependencies
	# libgl1 and libglib2.0-0 are often required for image processing libraries (like cv2)
	# libgl1-mesa-glx is deprecated/unavailable in newer Debian versions
	RUN apt-get update && apt-get install -y \
	build-essential \
	libgl1 \
	libglib2.0-0 \
	tesseract-ocr \
	&& rm -rf /var/lib/apt/lists/*

	COPY requirements.txt .

	RUN pip install --no-cache-dir -r requirements.txt

	# Create a non-root user (Standard for Hugging Face Spaces)
	RUN useradd -m -u 1000 user

	# Set environment variables to ensure models are downloaded to a writable directory
	# Docling/RapidOCR and HuggingFace Hub use these
	ENV HOME=/home/user \
	PATH=/home/user/.local/bin:$PATH \
	HF_HOME=/home/user/.cache/huggingface \
	RAPIDOCR_CACHE_DIR=/home/user/.cache/rapidocr \
	XDG_CACHE_HOME=/home/user/.cache

	# Create cache directories and set permissions
	RUN mkdir -p /home/user/.cache/huggingface \
	/home/user/.cache/rapidocr \
	&& chown -R user:user /home/user

	# Switch to user to run the pre-download script
	# This ensures files are owned by 'user' and not 'root'
	USER user

	# Pre-download models by initializing the converter once
	# This will trigger downloads into the writable cache directories defined above
	RUN python -c "from docling.document_converter import DocumentConverter; DocumentConverter()"

	COPY --chown=user:user . .

	EXPOSE 7860

	CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]