sidoutcome commited on
Commit
3e4135a
·
1 Parent(s): 0ef1544

fix: use Python 3.12 (Ubuntu 24.04 default), simplify Dockerfile, combine install steps

Browse files
Files changed (1) hide show
  1. Dockerfile +16 -56
Dockerfile CHANGED
@@ -1,48 +1,34 @@
1
  # Hugging Face Spaces Dockerfile for PaddleOCR-VL Document Parser API
2
  # GPU-accelerated document parsing with PaddleOCR-VL-1.5 + PaddlePaddle
3
- # Build: v5.0.0 - PaddleOCR-VL for high-quality OCR on Nvidia T4
4
  #
5
  # NOTE: Run with --shm-size 16g for PaddlePaddle shared memory:
6
  # docker build -t hf-docling .
7
  # docker run --gpus all --shm-size 16g -p 7860:7860 -e API_TOKEN=test hf-docling
8
 
9
- # CUDA 12.6 runtime with cuDNN (required by PaddlePaddle GPU)
10
  FROM nvidia/cuda:12.6.3-cudnn-runtime-ubuntu24.04
11
 
12
  USER root
13
 
14
- RUN echo "========== BUILD STARTED at $(date -u '+%Y-%m-%d %H:%M:%S UTC') =========="
15
-
16
- # Install system dependencies
17
- RUN echo "========== STEP 1: Installing system dependencies ==========" && \
18
- apt-get update && apt-get install -y --no-install-recommends \
19
- # Python 3.11
20
- python3.11 \
21
- python3.11-venv \
22
- python3.11-dev \
23
  python3-pip \
24
- # Fonts for document rendering
 
25
  fonts-noto-core \
26
  fonts-noto-cjk \
27
  fontconfig \
28
- # Image processing (required by OpenCV)
29
  libgl1 \
30
  libglib2.0-0 \
31
- # PDF utilities (required by pdf2image)
32
  poppler-utils \
33
- # Health checks
34
  curl \
35
- && fc-cache -fv && \
36
- # Set python3.11 as default python3/python
37
- update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1 && \
38
- update-alternatives --install /usr/bin/python python /usr/bin/python3.11 1 && \
39
- rm -rf /var/lib/apt/lists/* && \
40
- echo "========== System dependencies installed =========="
41
 
42
- # Create non-root user for HF Spaces (required by HuggingFace)
43
  RUN useradd -m -u 1000 user
44
 
45
- # Set environment variables
46
  ENV PYTHONUNBUFFERED=1 \
47
  PYTHONDONTWRITEBYTECODE=1 \
48
  IMAGES_SCALE=2.0 \
@@ -52,52 +38,26 @@ ENV PYTHONUNBUFFERED=1 \
52
  HOME=/home/user \
53
  PATH=/home/user/.local/bin:/usr/local/bin:/usr/bin:$PATH
54
 
55
- # Create cache directories with correct ownership
56
- RUN echo "========== STEP 2: Creating cache directories ==========" && \
57
- mkdir -p /home/user/.cache/huggingface \
58
- /home/user/.cache/paddleocr \
59
- /home/user/app && \
60
- chown -R user:user /home/user && \
61
- echo "========== Cache directories created =========="
62
 
63
- # Switch to non-root user
64
  USER user
65
  WORKDIR /home/user/app
66
 
67
- # Copy requirements first for better caching
68
  COPY --chown=user:user requirements.txt .
69
 
70
- # Install PaddlePaddle GPU (must be installed before paddleocr)
71
- RUN echo "========== STEP 3: Installing PaddlePaddle GPU ==========" && \
72
- python -m pip install --user --upgrade pip && \
73
- python -m pip install --user paddlepaddle-gpu==3.2.1 -i https://www.paddlepaddle.org.cn/packages/stable/cu126/ && \
74
- echo "========== PaddlePaddle GPU installed (verified at runtime) =========="
75
-
76
- # Install Python dependencies from requirements.txt
77
- RUN echo "========== STEP 4: Installing Python dependencies ==========" && \
78
- python -m pip install --user -r requirements.txt && \
79
- echo "Installed packages:" && \
80
- pip list --user && \
81
- echo "========== Python dependencies installed =========="
82
-
83
- # NOTE: Model pre-download skipped — PaddlePaddle GPU requires CUDA at import time,
84
- # which is unavailable during Docker build. Model downloads on first startup (~60s).
85
- RUN echo "========== STEP 5: Skipping model pre-download (no GPU during build) =========="
86
 
87
- # Copy application code
88
  COPY --chown=user:user . .
89
 
90
- RUN echo "========== STEP 6: Finalizing build ==========" && \
91
- chmod +x start.sh && \
92
- echo "Files in app directory:" && ls -la /home/user/app/ && \
93
- echo "========== BUILD COMPLETED at $(date -u '+%Y-%m-%d %H:%M:%S UTC') =========="
94
 
95
- # Expose the port (HF Spaces standard)
96
  EXPOSE 7860
97
 
98
- # Health check
99
  HEALTHCHECK --interval=30s --timeout=30s --start-period=300s --retries=5 \
100
  CMD curl -f http://localhost:7860/ || exit 1
101
 
102
- # Single-process FastAPI app (no vLLM sidecar needed)
103
  CMD ["/bin/bash", "/home/user/app/start.sh"]
 
1
  # Hugging Face Spaces Dockerfile for PaddleOCR-VL Document Parser API
2
  # GPU-accelerated document parsing with PaddleOCR-VL-1.5 + PaddlePaddle
3
+ # Build: v5.0.0 - PaddleOCR-VL for high-quality OCR on Nvidia L4/T4
4
  #
5
  # NOTE: Run with --shm-size 16g for PaddlePaddle shared memory:
6
  # docker build -t hf-docling .
7
  # docker run --gpus all --shm-size 16g -p 7860:7860 -e API_TOKEN=test hf-docling
8
 
 
9
  FROM nvidia/cuda:12.6.3-cudnn-runtime-ubuntu24.04
10
 
11
  USER root
12
 
13
+ # Install system dependencies (Python 3.12 is default in Ubuntu 24.04)
14
+ RUN apt-get update && apt-get install -y --no-install-recommends \
15
+ python3 \
 
 
 
 
 
 
16
  python3-pip \
17
+ python3-venv \
18
+ python3-dev \
19
  fonts-noto-core \
20
  fonts-noto-cjk \
21
  fontconfig \
 
22
  libgl1 \
23
  libglib2.0-0 \
 
24
  poppler-utils \
 
25
  curl \
26
+ && fc-cache -fv \
27
+ && rm -rf /var/lib/apt/lists/*
 
 
 
 
28
 
29
+ # Create non-root user for HF Spaces
30
  RUN useradd -m -u 1000 user
31
 
 
32
  ENV PYTHONUNBUFFERED=1 \
33
  PYTHONDONTWRITEBYTECODE=1 \
34
  IMAGES_SCALE=2.0 \
 
38
  HOME=/home/user \
39
  PATH=/home/user/.local/bin:/usr/local/bin:/usr/bin:$PATH
40
 
41
+ RUN mkdir -p /home/user/.cache/huggingface /home/user/.cache/paddleocr /home/user/app \
42
+ && chown -R user:user /home/user
 
 
 
 
 
43
 
 
44
  USER user
45
  WORKDIR /home/user/app
46
 
 
47
  COPY --chown=user:user requirements.txt .
48
 
49
+ # Install PaddlePaddle GPU then Python deps
50
+ RUN python3 -m pip install --user --upgrade pip --break-system-packages && \
51
+ python3 -m pip install --user --break-system-packages paddlepaddle-gpu==3.2.1 -i https://www.paddlepaddle.org.cn/packages/stable/cu126/ && \
52
+ python3 -m pip install --user --break-system-packages -r requirements.txt
 
 
 
 
 
 
 
 
 
 
 
 
53
 
 
54
  COPY --chown=user:user . .
55
 
56
+ RUN chmod +x start.sh
 
 
 
57
 
 
58
  EXPOSE 7860
59
 
 
60
  HEALTHCHECK --interval=30s --timeout=30s --start-period=300s --retries=5 \
61
  CMD curl -f http://localhost:7860/ || exit 1
62
 
 
63
  CMD ["/bin/bash", "/home/user/app/start.sh"]