Spaces:
Paused
Paused
| # Hugging Face Spaces Dockerfile for MinerU Document Parser API | |
| # Based on official MinerU Docker deployment | |
| # Optimized for L40S GPU (Ada Lovelace architecture, 48GB VRAM) | |
| # Build: v1.4.0 - Using mineru[core] for full backend support | |
| # Use official vLLM image as base (includes CUDA, PyTorch, vLLM properly configured) | |
| # v0.14.1 includes security patches (CVE-2025-66448/CVE-2025-30165) and memory leak fixes | |
| # Supports Ampere, Ada Lovelace, Hopper architectures (L40S is Ada Lovelace) | |
| FROM vllm/vllm-openai:v0.14.1 | |
| USER root | |
| RUN echo "========== BUILD STARTED at $(date -u '+%Y-%m-%d %H:%M:%S UTC') ==========" | |
| # Install system dependencies (fonts required by MinerU, curl for health checks) | |
| RUN echo "========== STEP 1: Installing system dependencies ==========" && \ | |
| apt-get update && apt-get install -y --no-install-recommends \ | |
| fonts-noto-core \ | |
| fonts-noto-cjk \ | |
| fontconfig \ | |
| libgl1 \ | |
| curl \ | |
| poppler-utils \ | |
| && fc-cache -fv && \ | |
| rm -rf /var/lib/apt/lists/* && \ | |
| echo "========== System dependencies installed ==========" | |
| # Create non-root user for HF Spaces (required by HuggingFace) | |
| RUN useradd -m -u 1000 user | |
| # Set environment variables (MINERU_MODEL_SOURCE set later after download) | |
| # LD_LIBRARY_PATH includes pip nvidia packages for cuDNN (libcudnn.so.9) | |
| ENV PYTHONUNBUFFERED=1 \ | |
| PYTHONDONTWRITEBYTECODE=1 \ | |
| MINERU_BACKEND=pipeline \ | |
| MINERU_LANG=en \ | |
| MAX_FILE_SIZE_MB=1024 \ | |
| HF_HOME=/home/user/.cache/huggingface \ | |
| TORCH_HOME=/home/user/.cache/torch \ | |
| MODELSCOPE_CACHE=/home/user/.cache/modelscope \ | |
| XDG_CACHE_HOME=/home/user/.cache \ | |
| HOME=/home/user \ | |
| PATH=/home/user/.local/bin:/usr/local/bin:/usr/bin:$PATH \ | |
| LD_LIBRARY_PATH=/home/user/.local/lib/python3.12/site-packages/nvidia/cudnn/lib:$LD_LIBRARY_PATH \ | |
| VLLM_GPU_MEMORY_UTILIZATION=0.4 | |
| # Create cache directories with correct ownership | |
| RUN mkdir -p /home/user/.cache/huggingface \ | |
| /home/user/.cache/torch \ | |
| /home/user/.cache/modelscope \ | |
| /home/user/app && \ | |
| chown -R user:user /home/user | |
| # Switch to non-root user | |
| USER user | |
| WORKDIR /home/user/app | |
| # Copy requirements first for better caching | |
| COPY --chown=user:user requirements.txt . | |
| # Install Python dependencies | |
| # Note: nvidia-cudnn-cu12 provides libcudnn.so.9 required by torch | |
| RUN echo "========== STEP 2: Installing Python dependencies ==========" && \ | |
| pip install --user --upgrade pip && \ | |
| pip install --user nvidia-cudnn-cu12 && \ | |
| pip install --user -r requirements.txt && \ | |
| echo "Reinstalling modelscope in user space for torch compatibility..." && \ | |
| pip install --user --force-reinstall modelscope && \ | |
| echo "Installed packages:" && \ | |
| pip list --user | grep -E "(mineru|fastapi|uvicorn|httpx|pydantic|modelscope|torch|cudnn|doclayout)" && \ | |
| echo "========== Python dependencies installed ==========" | |
| # Create MinerU config file (required BEFORE downloading models) | |
| # The mineru-models-download command reads ~/mineru.json to know where to store models | |
| RUN echo "========== STEP 3a: Creating MinerU config ==========" && \ | |
| mkdir -p /home/user/.cache/mineru/models && \ | |
| echo '{"models-dir": {"pipeline": "/home/user/.cache/mineru/models", "vlm": "/home/user/.cache/mineru/models"}, "config_version": "1.3.1"}' > /home/user/mineru.json && \ | |
| cat /home/user/mineru.json && \ | |
| echo "========== MinerU config created ==========" | |
| # Download MinerU models using official tool | |
| RUN echo "========== STEP 3b: Downloading MinerU models ==========" && \ | |
| echo "This downloads all required models (~4-5GB)..." && \ | |
| echo "Cache directories before download:" && \ | |
| ls -la /home/user/.cache/ && \ | |
| echo "Downloading all models from huggingface..." && \ | |
| mineru-models-download --source huggingface --model_type all && \ | |
| echo "" && \ | |
| echo "========== Model cache summary ==========" && \ | |
| echo "MinerU models cache:" && \ | |
| du -sh /home/user/.cache/mineru 2>/dev/null || echo " (empty)" && \ | |
| ls -la /home/user/.cache/mineru/models 2>/dev/null || echo " (no files)" && \ | |
| find /home/user/.cache/mineru -type f 2>/dev/null | head -20 || echo " (no files found)" && \ | |
| echo "HuggingFace cache:" && \ | |
| du -sh /home/user/.cache/huggingface 2>/dev/null || echo " (empty)" && \ | |
| echo "Total cache size:" && \ | |
| du -sh /home/user/.cache 2>/dev/null || echo " (empty)" && \ | |
| echo "========== Models downloaded ==========" | |
| # Set model source to local AFTER downloading (prevents re-download at runtime) | |
| ENV MINERU_MODEL_SOURCE=local | |
| # Copy application code | |
| COPY --chown=user:user . . | |
| RUN echo "Files in app directory:" && ls -la /home/user/app/ && \ | |
| echo "========== BUILD COMPLETED at $(date -u '+%Y-%m-%d %H:%M:%S UTC') ==========" | |
| # Expose the port | |
| EXPOSE 7860 | |
| # Health check | |
| HEALTHCHECK --interval=30s --timeout=30s --start-period=300s --retries=5 \ | |
| CMD curl -f http://localhost:7860/ || exit 1 | |
| # Override vLLM entrypoint and run our FastAPI server | |
| ENTRYPOINT [] | |
| CMD ["/usr/bin/python3", "-m", "uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1", "--timeout-keep-alive", "300"] | |