File size: 2,459 Bytes
aef1493
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
FROM openjdk:17-jdk-slim AS builder

WORKDIR /app

RUN apt-get update && apt-get install -y \
    git \
    curl \
    gradle \
    maven

RUN git clone https://github.com/Stirling-Tools/Stirling-PDF.git .

RUN ./gradlew build

FROM openjdk:17-jdk-slim

WORKDIR /app

COPY --from=builder /app/build/libs/*.jar /app/stirling-pdf.jar

RUN useradd -m -d /home/appuser appuser && \
    mkdir -p /home/appuser/.cache/dconf && \
    chown -R appuser:appuser /home/appuser && \
    mkdir -p /app/logs /app/configs /app/customFiles /usr/share/tessdata && \
    chmod -R 777 /app/logs /app/configs /app/customFiles /usr/share/tessdata

RUN apt-get update && apt-get install -y \
    libreoffice \
    poppler-utils \
    tesseract-ocr \
    tesseract-ocr-eng \
    wget \
    python3 \
    python3-pip && \
    pip3 install --no-cache-dir unoconv WeasyPrint pdf2image pillow && \
    cd /usr/share/tessdata && \
    wget https://github.com/tesseract-ocr/tessdata/raw/main/chi_sim.traineddata && \
    wget https://github.com/tesseract-ocr/tessdata/raw/main/chi_tra.traineddata && \
    wget https://github.com/tesseract-ocr/tessdata/raw/main/eng.traineddata && \
    wget https://github.com/tesseract-ocr/tessdata/raw/main/jpn.traineddata && \
    wget https://github.com/tesseract-ocr/tessdata/raw/main/kor.traineddata && \
    wget https://github.com/tesseract-ocr/tessdata/raw/main/rus.traineddata && \
    wget https://github.com/tesseract-ocr/tessdata/raw/main/fra.traineddata && \
    wget https://github.com/tesseract-ocr/tessdata/raw/main/deu.traineddata && \
    wget https://github.com/tesseract-ocr/tessdata/raw/main/spa.traineddata && \
    wget https://github.com/tesseract-ocr/tessdata/raw/main/ita.traineddata && \
    wget https://github.com/tesseract-ocr/tessdata/raw/main/por.traineddata && \
    wget https://github.com/tesseract-ocr/tessdata/raw/main/vie.traineddata && \
    wget https://github.com/tesseract-ocr/tessdata/raw/main/tha.traineddata && \
    rm -rf /var/lib/apt/lists/*

ENV HOME=/home/appuser \
    XDG_RUNTIME_DIR=/tmp/runtime-appuser \
    LIBREOFFICE_CONFIG_DIR=/tmp/libreoffice

RUN mkdir -p ${XDG_RUNTIME_DIR} ${LIBREOFFICE_CONFIG_DIR} && \
    chown -R appuser:appuser ${XDG_RUNTIME_DIR} ${LIBREOFFICE_CONFIG_DIR} && \
    chmod 777 ${XDG_RUNTIME_DIR} ${LIBREOFFICE_CONFIG_DIR}

USER appuser

EXPOSE 7860

CMD ["java", "-Dserver.port=7860", "-Dserver.address=0.0.0.0", "-Dfile.encoding=UTF-8", "-jar", "/app/stirling-pdf.jar"]