# Hugging Face Spaces Dockerfile for Document Conversion # Optimized for minimal size and fast startup FROM python:3.10-slim # Install ONLY essential packages (no GUI, no unnecessary libraries) RUN apt-get update && apt-get install -y --no-install-recommends \ # LibreOffice headless only (no GUI components) libreoffice-writer-nogui \ libreoffice-calc-nogui \ libreoffice-impress-nogui \ # Tesseract OCR (English only) tesseract-ocr \ tesseract-ocr-eng \ # Minimal Java runtime for LibreOffice default-jre-headless \ # OpenCV system dependencies (minimal) libgl1 \ libglib2.0-0 \ libsm6 \ libxext6 \ libxrender1 \ # Aggressive cleanup to reduce image size && apt-get clean \ && rm -rf /var/lib/apt/lists/* \ && rm -rf /tmp/* /var/tmp/* \ # Remove LibreOffice bloat (galleries, extra fonts, docs) && rm -rf /usr/lib/libreoffice/share/gallery \ && rm -rf /usr/share/fonts/truetype/liberation \ && rm -rf /usr/share/doc \ && rm -rf /usr/share/man \ && rm -rf /usr/share/locale # Set working directory WORKDIR /app # Copy requirements and install Python dependencies COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt # Copy application code COPY app.py . # Create temp directory for conversions RUN mkdir -p /tmp/conversions # Expose port 7860 (Hugging Face Spaces default) EXPOSE 7860 # Set environment variables ENV PYTHONUNBUFFERED=1 ENV PORT=7860 # Health check HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \ CMD python -c "import requests; requests.get('http://localhost:7860/health')" # Run the application CMD ["gunicorn", "--bind", "0.0.0.0:7860", "--workers", "2", "--timeout", "60", "app:app"]