| |
| FROM ubuntu:22.04 |
|
|
| |
| ENV DEBIAN_FRONTEND=noninteractive |
|
|
| |
| RUN apt-get update && \ |
| apt-get install -y \ |
| software-properties-common && \ |
| add-apt-repository ppa:deadsnakes/ppa && \ |
| apt-get update && \ |
| apt-get install -y \ |
| python3.10 \ |
| python3.10-venv \ |
| python3.10-distutils \ |
| python3-pip \ |
| wget \ |
| git \ |
| libgl1 \ |
| libreoffice \ |
| fonts-noto-cjk \ |
| fonts-wqy-zenhei \ |
| fonts-wqy-microhei \ |
| ttf-mscorefonts-installer \ |
| fontconfig \ |
| libglib2.0-0 \ |
| libxrender1 \ |
| libsm6 \ |
| libxext6 \ |
| poppler-utils \ |
| && rm -rf /var/lib/apt/lists/* |
|
|
| |
| RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1 |
|
|
| |
| RUN python3 -m venv /opt/mineru_venv |
|
|
| |
| RUN /bin/bash -c "wget https://github.com/opendatalab/MinerU/raw/master/magic-pdf.template.json && \ |
| cp magic-pdf.template.json /root/magic-pdf.json && \ |
| source /opt/mineru_venv/bin/activate && \ |
| pip3 install --upgrade pip && \ |
| pip3 install -U magic-pdf[full]" |
|
|
| # Download models and update the configuration file |
| RUN /bin/bash -c "pip3 install huggingface_hub && \ |
| wget https://github.com/opendatalab/MinerU/raw/master/scripts/download_models_hf.py -O download_models.py && \ |
| python3 download_models.py && \ |
| sed -i 's|cpu|cuda|g' /root/magic-pdf.json" |
|
|
| |
| ENTRYPOINT ["/bin/bash", "-c", "source /opt/mineru_venv/bin/activate && exec \"$@\"", "--"] |