document-extraction / Dockerfile
vkumartr's picture
Update Dockerfile
73995f4 verified
FROM python:3.10
ENV CUDA_VISIBLE_DEVICES=-1
ARG PIP_NO_CACHE_DIR=1
# Install system dependencies, including Poppler
RUN apt-get update && apt-get install -y poppler-utils && rm -rf /var/lib/apt/lists/*
# Install specific version of numpy for compatibility
RUN pip install numpy==1.26.4
# Install pymongo for MongoDB Operations
RUN pip install pymongo
# Install JsonSchema for JSON Validation
RUN pip install jsonschema
#Install pdf2image dependency
RUN pip install --no-cache-dir pdf2image
# Install paddlepaddle (latest stable version)
RUN pip install paddlepaddle -i https://pypi.org/simple
# Install dependencies
RUN pip install torch==2.1.1 -i https://download.pytorch.org/whl/cpu
RUN pip install transformers fastapi pypdf2 fitz boto3 pillow openai==0.28 setuptools pymupdf python-dotenv uvicorn
# Install libGL (needed for OpenCV)
RUN apt-get update && apt-get install -y libgl1
RUN pip install paddleocr==2.7.0.3 && pip uninstall -y opencv-python opencv-contrib-python && pip install opencv-python
RUN useradd -m -u 1000 user
USER user
ENV HOME=/home/user
WORKDIR $HOME/app
RUN mkdir output
COPY --chown=user app.py $HOME/app
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]