Syncre's picture
Deploy Arabic Audio Reader worker
6d5a99d verified
FROM python:3.10-slim
ENV PYTHONUNBUFFERED=1 \
PIP_NO_CACHE_DIR=1 \
WORK_DIR=/data/arabic-translator \
DATABASE_PATH=/data/arabic-translator/data/arabic_reader.sqlite3 \
TESSDATA_DIR=/usr/share/tesseract-ocr/5/tessdata \
ESPEAK_NG_EXE=/usr/bin/espeak-ng \
OCR_ENGINE=tesseract \
OCR_RENDER_ZOOM=2 \
TESSERACT_PSM=4 \
DEFAULT_VOICE_ID=silma-local \
MAX_UPLOAD_MB=512 \
OUTPUT_RETENTION_DAYS=7 \
OUTPUT_MAX_FILES=25 \
AUDIO_FORMAT=mp3 \
MP3_BITRATE=96k \
SILMA_FULL_NORMALIZER_DEPS=0 \
SILMA_ENABLE_NORMALIZER=0 \
SILMA_FORCE_TASHKEEL=0 \
SILMA_NORMALIZE_NUMBERS=0
ARG INSTALL_QARI_OCR=0
ARG INSTALL_TAWKEED_OCR=0
ARG INSTALL_KATIB_OCR=0
ARG INSTALL_ARABIC_QWEN_OCR=0
ARG INSTALL_ARABIC_GLM_OCR=0
ARG INSTALL_BASEER_OCR=0
ARG INSTALL_PADDLEOCR_VL=0
ARG INSTALL_SUPERTONIC=0
WORKDIR /app
RUN apt-get update && apt-get install -y --no-install-recommends \
bash \
espeak-ng \
ffmpeg \
libsndfile1 \
tesseract-ocr \
tesseract-ocr-ara \
&& rm -rf /var/lib/apt/lists/*
COPY requirements.txt requirements-silma.txt requirements-supertonic.txt requirements-paddleocr.txt requirements-paddleocr-vl.txt requirements-qari-ocr.txt requirements-tawkeed-ocr.txt requirements-katib-ocr.txt requirements-arabic-qwen-ocr.txt requirements-arabic-glm-ocr.txt requirements-baseer-ocr.txt ./
COPY scripts/setup_silma.sh scripts/setup_supertonic.sh scripts/setup_paddleocr.sh scripts/setup_paddleocr_vl.sh scripts/setup_qari_ocr.sh scripts/setup_tawkeed_ocr.sh scripts/setup_katib_ocr.sh scripts/setup_arabic_qwen_ocr.sh scripts/setup_arabic_glm_ocr.sh scripts/setup_baseer_ocr.sh ./scripts/
RUN python -m venv .venv \
&& .venv/bin/python -m pip install --upgrade pip \
&& .venv/bin/python -m pip install -r requirements.txt \
&& chmod +x scripts/setup_silma.sh scripts/setup_supertonic.sh scripts/setup_paddleocr.sh scripts/setup_paddleocr_vl.sh scripts/setup_qari_ocr.sh scripts/setup_tawkeed_ocr.sh scripts/setup_katib_ocr.sh scripts/setup_arabic_qwen_ocr.sh scripts/setup_arabic_glm_ocr.sh scripts/setup_baseer_ocr.sh \
&& PYTHON_BIN=python scripts/setup_silma.sh \
&& PYTHON_BIN=python scripts/setup_paddleocr.sh \
&& if [ "$INSTALL_SUPERTONIC" = "1" ]; then PYTHON_BIN=python scripts/setup_supertonic.sh; fi \
&& if [ "$INSTALL_QARI_OCR" = "1" ]; then PYTHON_BIN=python scripts/setup_qari_ocr.sh; fi \
&& if [ "$INSTALL_TAWKEED_OCR" = "1" ]; then PYTHON_BIN=python scripts/setup_tawkeed_ocr.sh; fi \
&& if [ "$INSTALL_KATIB_OCR" = "1" ]; then PYTHON_BIN=python scripts/setup_katib_ocr.sh; fi \
&& if [ "$INSTALL_ARABIC_QWEN_OCR" = "1" ]; then PYTHON_BIN=python scripts/setup_arabic_qwen_ocr.sh; fi \
&& if [ "$INSTALL_ARABIC_GLM_OCR" = "1" ]; then PYTHON_BIN=python scripts/setup_arabic_glm_ocr.sh; fi \
&& if [ "$INSTALL_BASEER_OCR" = "1" ]; then PYTHON_BIN=python scripts/setup_baseer_ocr.sh; fi \
&& if [ "$INSTALL_PADDLEOCR_VL" = "1" ]; then PYTHON_BIN=python scripts/setup_paddleocr_vl.sh; fi
COPY app ./app
COPY api ./api
COPY static ./static
COPY scripts ./scripts
RUN mkdir -p /data/arabic-translator/uploads /data/arabic-translator/outputs /data/arabic-translator/data
EXPOSE 7860
CMD [".venv/bin/python", "-m", "uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]