File size: 3,331 Bytes
2e1a095
 
 
 
 
 
 
 
6d5a99d
 
 
2e1a095
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
FROM python:3.10-slim

ENV PYTHONUNBUFFERED=1 \
    PIP_NO_CACHE_DIR=1 \
    WORK_DIR=/data/arabic-translator \
    DATABASE_PATH=/data/arabic-translator/data/arabic_reader.sqlite3 \
    TESSDATA_DIR=/usr/share/tesseract-ocr/5/tessdata \
    ESPEAK_NG_EXE=/usr/bin/espeak-ng \
    OCR_ENGINE=tesseract \
    OCR_RENDER_ZOOM=2 \
    TESSERACT_PSM=4 \
    DEFAULT_VOICE_ID=silma-local \
    MAX_UPLOAD_MB=512 \
    OUTPUT_RETENTION_DAYS=7 \
    OUTPUT_MAX_FILES=25 \
    AUDIO_FORMAT=mp3 \
    MP3_BITRATE=96k \
    SILMA_FULL_NORMALIZER_DEPS=0 \
    SILMA_ENABLE_NORMALIZER=0 \
    SILMA_FORCE_TASHKEEL=0 \
    SILMA_NORMALIZE_NUMBERS=0

ARG INSTALL_QARI_OCR=0
ARG INSTALL_TAWKEED_OCR=0
ARG INSTALL_KATIB_OCR=0
ARG INSTALL_ARABIC_QWEN_OCR=0
ARG INSTALL_ARABIC_GLM_OCR=0
ARG INSTALL_BASEER_OCR=0
ARG INSTALL_PADDLEOCR_VL=0
ARG INSTALL_SUPERTONIC=0

WORKDIR /app

RUN apt-get update && apt-get install -y --no-install-recommends \
    bash \
    espeak-ng \
    ffmpeg \
    libsndfile1 \
    tesseract-ocr \
    tesseract-ocr-ara \
    && rm -rf /var/lib/apt/lists/*

COPY requirements.txt requirements-silma.txt requirements-supertonic.txt requirements-paddleocr.txt requirements-paddleocr-vl.txt requirements-qari-ocr.txt requirements-tawkeed-ocr.txt requirements-katib-ocr.txt requirements-arabic-qwen-ocr.txt requirements-arabic-glm-ocr.txt requirements-baseer-ocr.txt ./
COPY scripts/setup_silma.sh scripts/setup_supertonic.sh scripts/setup_paddleocr.sh scripts/setup_paddleocr_vl.sh scripts/setup_qari_ocr.sh scripts/setup_tawkeed_ocr.sh scripts/setup_katib_ocr.sh scripts/setup_arabic_qwen_ocr.sh scripts/setup_arabic_glm_ocr.sh scripts/setup_baseer_ocr.sh ./scripts/

RUN python -m venv .venv \
    && .venv/bin/python -m pip install --upgrade pip \
    && .venv/bin/python -m pip install -r requirements.txt \
    && chmod +x scripts/setup_silma.sh scripts/setup_supertonic.sh scripts/setup_paddleocr.sh scripts/setup_paddleocr_vl.sh scripts/setup_qari_ocr.sh scripts/setup_tawkeed_ocr.sh scripts/setup_katib_ocr.sh scripts/setup_arabic_qwen_ocr.sh scripts/setup_arabic_glm_ocr.sh scripts/setup_baseer_ocr.sh \
    && PYTHON_BIN=python scripts/setup_silma.sh \
    && PYTHON_BIN=python scripts/setup_paddleocr.sh \
    && if [ "$INSTALL_SUPERTONIC" = "1" ]; then PYTHON_BIN=python scripts/setup_supertonic.sh; fi \
    && if [ "$INSTALL_QARI_OCR" = "1" ]; then PYTHON_BIN=python scripts/setup_qari_ocr.sh; fi \
    && if [ "$INSTALL_TAWKEED_OCR" = "1" ]; then PYTHON_BIN=python scripts/setup_tawkeed_ocr.sh; fi \
    && if [ "$INSTALL_KATIB_OCR" = "1" ]; then PYTHON_BIN=python scripts/setup_katib_ocr.sh; fi \
    && if [ "$INSTALL_ARABIC_QWEN_OCR" = "1" ]; then PYTHON_BIN=python scripts/setup_arabic_qwen_ocr.sh; fi \
    && if [ "$INSTALL_ARABIC_GLM_OCR" = "1" ]; then PYTHON_BIN=python scripts/setup_arabic_glm_ocr.sh; fi \
    && if [ "$INSTALL_BASEER_OCR" = "1" ]; then PYTHON_BIN=python scripts/setup_baseer_ocr.sh; fi \
    && if [ "$INSTALL_PADDLEOCR_VL" = "1" ]; then PYTHON_BIN=python scripts/setup_paddleocr_vl.sh; fi

COPY app ./app
COPY api ./api
COPY static ./static
COPY scripts ./scripts

RUN mkdir -p /data/arabic-translator/uploads /data/arabic-translator/outputs /data/arabic-translator/data

EXPOSE 7860

CMD [".venv/bin/python", "-m", "uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]