|
|
|
|
|
FROM ubuntu:22.04 |
|
|
|
|
|
|
|
|
ENV DEBIAN_FRONTEND=noninteractive |
|
|
|
|
|
|
|
|
RUN apt-get update && \ |
|
|
apt-get install -y \ |
|
|
software-properties-common && \ |
|
|
add-apt-repository ppa:deadsnakes/ppa && \ |
|
|
apt-get update && \ |
|
|
apt-get install -y \ |
|
|
python3.10 \ |
|
|
python3.10-venv \ |
|
|
python3.10-distutils \ |
|
|
python3-pip \ |
|
|
wget \ |
|
|
git \ |
|
|
libgl1 \ |
|
|
libreoffice \ |
|
|
fonts-noto-cjk \ |
|
|
fonts-wqy-zenhei \ |
|
|
fonts-wqy-microhei \ |
|
|
ttf-mscorefonts-installer \ |
|
|
fontconfig \ |
|
|
libglib2.0-0 \ |
|
|
libxrender1 \ |
|
|
libsm6 \ |
|
|
libxext6 \ |
|
|
poppler-utils \ |
|
|
&& rm -rf /var/lib/apt/lists/* |
|
|
|
|
|
|
|
|
RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1 |
|
|
|
|
|
|
|
|
RUN python3 -m venv /opt/mineru_venv |
|
|
|
|
|
|
|
|
RUN /bin/bash -c "wget https://github.com/opendatalab/MinerU/raw/master/magic-pdf.template.json && \ |
|
|
cp magic-pdf.template.json /root/magic-pdf.json && \ |
|
|
source /opt/mineru_venv/bin/activate && \ |
|
|
pip3 install --upgrade pip && \ |
|
|
pip3 install -U magic-pdf[full]" |
|
|
|
|
|
# Download models and update the configuration file |
|
|
RUN /bin/bash -c "pip3 install huggingface_hub && \ |
|
|
wget https://github.com/opendatalab/MinerU/raw/master/scripts/download_models_hf.py -O download_models.py && \ |
|
|
python3 download_models.py && \ |
|
|
sed -i 's|cpu|cuda|g' /root/magic-pdf.json" |
|
|
|
|
|
|
|
|
ENTRYPOINT ["/bin/bash", "-c", "source /opt/mineru_venv/bin/activate && exec \"$@\"", "--"] |