python-doc-convert / Dockerfile
omthakur1's picture
optimize: Reduce Docker image size - remove GUI deps, add aggressive cleanup
8b08dbb
# Hugging Face Spaces Dockerfile for Document Conversion
# Optimized for minimal size and fast startup
FROM python:3.10-slim
# Install ONLY essential packages (no GUI, no unnecessary libraries)
RUN apt-get update && apt-get install -y --no-install-recommends \
# LibreOffice headless only (no GUI components)
libreoffice-writer-nogui \
libreoffice-calc-nogui \
libreoffice-impress-nogui \
# Tesseract OCR (English only)
tesseract-ocr \
tesseract-ocr-eng \
# Minimal Java runtime for LibreOffice
default-jre-headless \
# OpenCV system dependencies (minimal)
libgl1 \
libglib2.0-0 \
libsm6 \
libxext6 \
libxrender1 \
# Aggressive cleanup to reduce image size
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* \
&& rm -rf /tmp/* /var/tmp/* \
# Remove LibreOffice bloat (galleries, extra fonts, docs)
&& rm -rf /usr/lib/libreoffice/share/gallery \
&& rm -rf /usr/share/fonts/truetype/liberation \
&& rm -rf /usr/share/doc \
&& rm -rf /usr/share/man \
&& rm -rf /usr/share/locale
# Set working directory
WORKDIR /app
# Copy requirements and install Python dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Copy application code
COPY app.py .
# Create temp directory for conversions
RUN mkdir -p /tmp/conversions
# Expose port 7860 (Hugging Face Spaces default)
EXPOSE 7860
# Set environment variables
ENV PYTHONUNBUFFERED=1
ENV PORT=7860
# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
CMD python -c "import requests; requests.get('http://localhost:7860/health')"
# Run the application
CMD ["gunicorn", "--bind", "0.0.0.0:7860", "--workers", "2", "--timeout", "60", "app:app"]