invoice-processor-ml / Dockerfile
GSoumyajit2005's picture
Refactor: Replace Tesseract with DocTR and integrate LayoutLMv3-DocTR model
ec0b507
raw
history blame contribute delete
834 Bytes
# Use an official Python runtime
FROM python:3.10-slim
# 1. Install system dependencies (DocTR + OpenCV + POPPLER)
# DocTR requires OpenGL and GStreamer libraries for image processing
RUN apt-get update && apt-get install -y \
libgl1-mesa-dev \
libglib2.0-0 \
libgstreamer1.0-0 \
libgstreamer-plugins-base1.0-0 \
poppler-utils \
ffmpeg libsm6 libxext6 \
&& rm -rf /var/lib/apt/lists/*
# Set work directory
WORKDIR /app
# Install Python dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Copy application code
COPY . .
# 2. Change Port to 7860 (Hugging Face default)
EXPOSE 7860
# 3. Run Streamlit
CMD ["streamlit", "run", "app.py", "--server.port", "7860", "--server.address", "0.0.0.0", "--server.enableCORS", "false", "--server.enableXsrfProtection", "false"]