# Dockerfile for DOCX to PDF Converter with Enhanced Arabic Support FROM ubuntu:22.04 # Set environment variables for Arabic support ENV DEBIAN_FRONTEND=noninteractive ENV LANG=ar_SA.UTF-8 ENV LC_ALL=ar_SA.UTF-8 ENV PYTHONUNBUFFERED=1 # Set environment variables for LibreOffice ENV HOME=/tmp ENV TMPDIR=/tmp ENV XDG_CONFIG_HOME=/tmp/.config # CRITICAL: Completely disable Java to prevent javaldx errors ENV SAL_DISABLE_JAVA=1 ENV SAL_DISABLE_JAVA_SECURITY=1 ENV LIBO_DISABLE_JAVA=1 ENV UNO_PATH=/usr/lib/libreoffice/program # Install system dependencies including Arabic fonts and Microsoft fonts RUN apt-get update && apt-get install -y \ python3 \ python3-pip \ libreoffice \ libreoffice-writer \ libreoffice-l10n-ar \ # REMOVED: libreoffice-java-common and openjdk-11-jre-headless to prevent javaldx errors \ fonts-liberation \ fonts-liberation2 \ fonts-dejavu \ fonts-dejavu-core \ fonts-dejavu-extra \ fonts-croscore \ fonts-noto-core \ fonts-noto-ui-core \ fonts-noto-mono \ fonts-noto-color-emoji \ # ADDED: Available Arabic fonts in Ubuntu 22.04 fonts-noto-naskh-arabic \ fonts-noto-kufi-arabic \ fonts-hosny-amiri \ fontconfig \ wget \ curl \ unzip \ locales \ # Add Microsoft fonts for Arial support fonts-freefont-ttf \ # Add unoconv for fallback conversion unoconv \ && rm -rf /var/lib/apt/lists/* # Manually install Scheherazade New font from Google Fonts RUN mkdir -p /usr/share/fonts/truetype/scheherazade \ && wget -q https://github.com/google/fonts/raw/main/ofl/scheherazadenew/ScheherazadeNew-Regular.ttf -O /usr/share/fonts/truetype/scheherazade/ScheherazadeNew-Regular.ttf \ && wget -q https://github.com/google/fonts/raw/main/ofl/scheherazadenew/ScheherazadeNew-Bold.ttf -O /usr/share/fonts/truetype/scheherazade/ScheherazadeNew-Bold.ttf \ && fc-cache -fv # Generate Arabic locale RUN locale-gen ar_SA.UTF-8 # Update font cache RUN fc-cache -fv # Fix LibreOffice Java integration issues - Comprehensive fix # Create all necessary LibreOffice directories with proper permissions RUN mkdir -p /tmp/.config/libreoffice/4/user \ /usr/lib/libreoffice/share/fonts/truetype \ /usr/lib/libreoffice/share/fonts/type1 \ && chmod -R 777 /tmp/.config \ && chmod -R 777 /usr/lib/libreoffice/share/fonts || true # Create empty registrymodifications.xcu to prevent initialization errors RUN echo '\ \ \ \ \ true\ \ \ \ \ \ false\ \ \ \ \ \ false\ \ \ ' > /tmp/.config/libreoffice/4/user/registrymodifications.xcu \ && chmod 666 /tmp/.config/libreoffice/4/user/registrymodifications.xcu # Pre-initialize LibreOffice to avoid first-run errors - More comprehensive approach # Run LibreOffice with specific flags to complete initial setup # REMOVED --disable-java flag as it's not supported in LibreOffice 7.3.7.2 RUN HOME=/tmp timeout 30 libreoffice --headless --invisible --nologo --norestore --nofirststartwizard --safe-mode --version || true # Set working directory WORKDIR /app # Copy requirements first to leverage Docker cache COPY requirements.txt . RUN pip3 install --no-cache-dir -r requirements.txt # Create necessary directories with proper permissions RUN mkdir -p /tmp/libreoffice_conversion /tmp/.config /tmp/fonts/truetype \ && chmod -R 777 /tmp # Create static directory RUN mkdir -p static # Copy all remaining files COPY . . # Setup additional Arabic fonts using Python script with proper permissions # Use /tmp for font installation to avoid permission issues RUN sed -i 's|/usr/share/fonts/truetype|/tmp/fonts/truetype|g' setup_fonts.py && \ mkdir -p /tmp/fonts/truetype/arabic-enhanced && \ chmod -R 777 /tmp/fonts/truetype && \ python3 setup_fonts.py || echo "Font setup failed, continuing with default fonts..." # Update font cache after installing additional fonts RUN fc-cache -fv # REMOVED: Java paths and symbolic links that were causing javaldx errors # Expose port (Hugging Face Spaces requires port 7860) EXPOSE 7860 # Health check (Hugging Face Spaces requires port 7860) HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \ CMD curl -f http://localhost:7860/health || exit 1 # Run the application CMD ["python3", "main.py"]