pdf / Dockerfile
fokan's picture
Upload 55 files
2760df1 verified
# Dockerfile for DOCX to PDF Converter with Enhanced Arabic Support
FROM ubuntu:22.04
# Set environment variables for Arabic support
ENV DEBIAN_FRONTEND=noninteractive
ENV LANG=ar_SA.UTF-8
ENV LC_ALL=ar_SA.UTF-8
ENV PYTHONUNBUFFERED=1
# Set environment variables for LibreOffice
ENV HOME=/tmp
ENV TMPDIR=/tmp
ENV XDG_CONFIG_HOME=/tmp/.config
# CRITICAL: Completely disable Java to prevent javaldx errors
ENV SAL_DISABLE_JAVA=1
ENV SAL_DISABLE_JAVA_SECURITY=1
ENV LIBO_DISABLE_JAVA=1
ENV UNO_PATH=/usr/lib/libreoffice/program
# Install system dependencies including Arabic fonts and Microsoft fonts
RUN apt-get update && apt-get install -y \
python3 \
python3-pip \
libreoffice \
libreoffice-writer \
libreoffice-l10n-ar \
# REMOVED: libreoffice-java-common and openjdk-11-jre-headless to prevent javaldx errors \
fonts-liberation \
fonts-liberation2 \
fonts-dejavu \
fonts-dejavu-core \
fonts-dejavu-extra \
fonts-croscore \
fonts-noto-core \
fonts-noto-ui-core \
fonts-noto-mono \
fonts-noto-color-emoji \
# ADDED: Available Arabic fonts in Ubuntu 22.04
fonts-noto-naskh-arabic \
fonts-noto-kufi-arabic \
fonts-hosny-amiri \
fontconfig \
wget \
curl \
unzip \
locales \
# Add Microsoft fonts for Arial support
fonts-freefont-ttf \
# Add unoconv for fallback conversion
unoconv \
&& rm -rf /var/lib/apt/lists/*
# Manually install Scheherazade New font from Google Fonts
RUN mkdir -p /usr/share/fonts/truetype/scheherazade \
&& wget -q https://github.com/google/fonts/raw/main/ofl/scheherazadenew/ScheherazadeNew-Regular.ttf -O /usr/share/fonts/truetype/scheherazade/ScheherazadeNew-Regular.ttf \
&& wget -q https://github.com/google/fonts/raw/main/ofl/scheherazadenew/ScheherazadeNew-Bold.ttf -O /usr/share/fonts/truetype/scheherazade/ScheherazadeNew-Bold.ttf \
&& fc-cache -fv
# Generate Arabic locale
RUN locale-gen ar_SA.UTF-8
# Update font cache
RUN fc-cache -fv
# Fix LibreOffice Java integration issues - Comprehensive fix
# Create all necessary LibreOffice directories with proper permissions
RUN mkdir -p /tmp/.config/libreoffice/4/user \
/usr/lib/libreoffice/share/fonts/truetype \
/usr/lib/libreoffice/share/fonts/type1 \
&& chmod -R 777 /tmp/.config \
&& chmod -R 777 /usr/lib/libreoffice/share/fonts || true
# Create empty registrymodifications.xcu to prevent initialization errors
RUN echo '<?xml version="1.0" encoding="UTF-8"?>\
<oor:items xmlns:oor="http://openoffice.org/2001/registry" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">\
<!-- Disable first start wizard and user installation -->\
<item oor:path="/org.openoffice.Setup/Office/Factories/org.openoffice.Setup:Factory['\''com.sun.star.comp.framework.ProtocolHandler'\'']">\
<prop oor:name="FirstStartWizardCompleted" oor:op="fuse">\
<value>true</value>\
</prop>\
</item>\
<!-- CRITICAL: Disable Java completely to prevent javaldx errors -->\
<item oor:path="/org.openoffice.Setup/Office">\
<prop oor:name="JavaSupport" oor:op="fuse">\
<value>false</value>\
</prop>\
</item>\
<!-- Disable Java security to prevent javaldx errors -->\
<item oor:path="/org.openoffice.Office.Java">\
<prop oor:name="Enabled" oor:op="fuse">\
<value>false</value>\
</prop>\
</item>\
</oor:items>' > /tmp/.config/libreoffice/4/user/registrymodifications.xcu \
&& chmod 666 /tmp/.config/libreoffice/4/user/registrymodifications.xcu
# Pre-initialize LibreOffice to avoid first-run errors - More comprehensive approach
# Run LibreOffice with specific flags to complete initial setup
# REMOVED --disable-java flag as it's not supported in LibreOffice 7.3.7.2
RUN HOME=/tmp timeout 30 libreoffice --headless --invisible --nologo --norestore --nofirststartwizard --safe-mode --version || true
# Set working directory
WORKDIR /app
# Copy requirements first to leverage Docker cache
COPY requirements.txt .
RUN pip3 install --no-cache-dir -r requirements.txt
# Create necessary directories with proper permissions
RUN mkdir -p /tmp/libreoffice_conversion /tmp/.config /tmp/fonts/truetype \
&& chmod -R 777 /tmp
# Create static directory
RUN mkdir -p static
# Copy all remaining files
COPY . .
# Setup additional Arabic fonts using Python script with proper permissions
# Use /tmp for font installation to avoid permission issues
RUN sed -i 's|/usr/share/fonts/truetype|/tmp/fonts/truetype|g' setup_fonts.py && \
mkdir -p /tmp/fonts/truetype/arabic-enhanced && \
chmod -R 777 /tmp/fonts/truetype && \
python3 setup_fonts.py || echo "Font setup failed, continuing with default fonts..."
# Update font cache after installing additional fonts
RUN fc-cache -fv
# REMOVED: Java paths and symbolic links that were causing javaldx errors
# Expose port (Hugging Face Spaces requires port 7860)
EXPOSE 7860
# Health check (Hugging Face Spaces requires port 7860)
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
CMD curl -f http://localhost:7860/health || exit 1
# Run the application
CMD ["python3", "main.py"]