Spaces:
Running on T4
Running on T4
Commit ·
3e4135a
1
Parent(s): 0ef1544
fix: use Python 3.12 (Ubuntu 24.04 default), simplify Dockerfile, combine install steps
Browse files- Dockerfile +16 -56
Dockerfile
CHANGED
|
@@ -1,48 +1,34 @@
|
|
| 1 |
# Hugging Face Spaces Dockerfile for PaddleOCR-VL Document Parser API
|
| 2 |
# GPU-accelerated document parsing with PaddleOCR-VL-1.5 + PaddlePaddle
|
| 3 |
-
# Build: v5.0.0 - PaddleOCR-VL for high-quality OCR on Nvidia T4
|
| 4 |
#
|
| 5 |
# NOTE: Run with --shm-size 16g for PaddlePaddle shared memory:
|
| 6 |
# docker build -t hf-docling .
|
| 7 |
# docker run --gpus all --shm-size 16g -p 7860:7860 -e API_TOKEN=test hf-docling
|
| 8 |
|
| 9 |
-
# CUDA 12.6 runtime with cuDNN (required by PaddlePaddle GPU)
|
| 10 |
FROM nvidia/cuda:12.6.3-cudnn-runtime-ubuntu24.04
|
| 11 |
|
| 12 |
USER root
|
| 13 |
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
RUN echo "========== STEP 1: Installing system dependencies ==========" && \
|
| 18 |
-
apt-get update && apt-get install -y --no-install-recommends \
|
| 19 |
-
# Python 3.11
|
| 20 |
-
python3.11 \
|
| 21 |
-
python3.11-venv \
|
| 22 |
-
python3.11-dev \
|
| 23 |
python3-pip \
|
| 24 |
-
|
|
|
|
| 25 |
fonts-noto-core \
|
| 26 |
fonts-noto-cjk \
|
| 27 |
fontconfig \
|
| 28 |
-
# Image processing (required by OpenCV)
|
| 29 |
libgl1 \
|
| 30 |
libglib2.0-0 \
|
| 31 |
-
# PDF utilities (required by pdf2image)
|
| 32 |
poppler-utils \
|
| 33 |
-
# Health checks
|
| 34 |
curl \
|
| 35 |
-
&& fc-cache -fv
|
| 36 |
-
|
| 37 |
-
update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1 && \
|
| 38 |
-
update-alternatives --install /usr/bin/python python /usr/bin/python3.11 1 && \
|
| 39 |
-
rm -rf /var/lib/apt/lists/* && \
|
| 40 |
-
echo "========== System dependencies installed =========="
|
| 41 |
|
| 42 |
-
# Create non-root user for HF Spaces
|
| 43 |
RUN useradd -m -u 1000 user
|
| 44 |
|
| 45 |
-
# Set environment variables
|
| 46 |
ENV PYTHONUNBUFFERED=1 \
|
| 47 |
PYTHONDONTWRITEBYTECODE=1 \
|
| 48 |
IMAGES_SCALE=2.0 \
|
|
@@ -52,52 +38,26 @@ ENV PYTHONUNBUFFERED=1 \
|
|
| 52 |
HOME=/home/user \
|
| 53 |
PATH=/home/user/.local/bin:/usr/local/bin:/usr/bin:$PATH
|
| 54 |
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
mkdir -p /home/user/.cache/huggingface \
|
| 58 |
-
/home/user/.cache/paddleocr \
|
| 59 |
-
/home/user/app && \
|
| 60 |
-
chown -R user:user /home/user && \
|
| 61 |
-
echo "========== Cache directories created =========="
|
| 62 |
|
| 63 |
-
# Switch to non-root user
|
| 64 |
USER user
|
| 65 |
WORKDIR /home/user/app
|
| 66 |
|
| 67 |
-
# Copy requirements first for better caching
|
| 68 |
COPY --chown=user:user requirements.txt .
|
| 69 |
|
| 70 |
-
# Install PaddlePaddle GPU
|
| 71 |
-
RUN
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
echo "========== PaddlePaddle GPU installed (verified at runtime) =========="
|
| 75 |
-
|
| 76 |
-
# Install Python dependencies from requirements.txt
|
| 77 |
-
RUN echo "========== STEP 4: Installing Python dependencies ==========" && \
|
| 78 |
-
python -m pip install --user -r requirements.txt && \
|
| 79 |
-
echo "Installed packages:" && \
|
| 80 |
-
pip list --user && \
|
| 81 |
-
echo "========== Python dependencies installed =========="
|
| 82 |
-
|
| 83 |
-
# NOTE: Model pre-download skipped — PaddlePaddle GPU requires CUDA at import time,
|
| 84 |
-
# which is unavailable during Docker build. Model downloads on first startup (~60s).
|
| 85 |
-
RUN echo "========== STEP 5: Skipping model pre-download (no GPU during build) =========="
|
| 86 |
|
| 87 |
-
# Copy application code
|
| 88 |
COPY --chown=user:user . .
|
| 89 |
|
| 90 |
-
RUN
|
| 91 |
-
chmod +x start.sh && \
|
| 92 |
-
echo "Files in app directory:" && ls -la /home/user/app/ && \
|
| 93 |
-
echo "========== BUILD COMPLETED at $(date -u '+%Y-%m-%d %H:%M:%S UTC') =========="
|
| 94 |
|
| 95 |
-
# Expose the port (HF Spaces standard)
|
| 96 |
EXPOSE 7860
|
| 97 |
|
| 98 |
-
# Health check
|
| 99 |
HEALTHCHECK --interval=30s --timeout=30s --start-period=300s --retries=5 \
|
| 100 |
CMD curl -f http://localhost:7860/ || exit 1
|
| 101 |
|
| 102 |
-
# Single-process FastAPI app (no vLLM sidecar needed)
|
| 103 |
CMD ["/bin/bash", "/home/user/app/start.sh"]
|
|
|
|
| 1 |
# Hugging Face Spaces Dockerfile for PaddleOCR-VL Document Parser API
|
| 2 |
# GPU-accelerated document parsing with PaddleOCR-VL-1.5 + PaddlePaddle
|
| 3 |
+
# Build: v5.0.0 - PaddleOCR-VL for high-quality OCR on Nvidia L4/T4
|
| 4 |
#
|
| 5 |
# NOTE: Run with --shm-size 16g for PaddlePaddle shared memory:
|
| 6 |
# docker build -t hf-docling .
|
| 7 |
# docker run --gpus all --shm-size 16g -p 7860:7860 -e API_TOKEN=test hf-docling
|
| 8 |
|
|
|
|
| 9 |
FROM nvidia/cuda:12.6.3-cudnn-runtime-ubuntu24.04
|
| 10 |
|
| 11 |
USER root
|
| 12 |
|
| 13 |
+
# Install system dependencies (Python 3.12 is default in Ubuntu 24.04)
|
| 14 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 15 |
+
python3 \
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
python3-pip \
|
| 17 |
+
python3-venv \
|
| 18 |
+
python3-dev \
|
| 19 |
fonts-noto-core \
|
| 20 |
fonts-noto-cjk \
|
| 21 |
fontconfig \
|
|
|
|
| 22 |
libgl1 \
|
| 23 |
libglib2.0-0 \
|
|
|
|
| 24 |
poppler-utils \
|
|
|
|
| 25 |
curl \
|
| 26 |
+
&& fc-cache -fv \
|
| 27 |
+
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
+
# Create non-root user for HF Spaces
|
| 30 |
RUN useradd -m -u 1000 user
|
| 31 |
|
|
|
|
| 32 |
ENV PYTHONUNBUFFERED=1 \
|
| 33 |
PYTHONDONTWRITEBYTECODE=1 \
|
| 34 |
IMAGES_SCALE=2.0 \
|
|
|
|
| 38 |
HOME=/home/user \
|
| 39 |
PATH=/home/user/.local/bin:/usr/local/bin:/usr/bin:$PATH
|
| 40 |
|
| 41 |
+
RUN mkdir -p /home/user/.cache/huggingface /home/user/.cache/paddleocr /home/user/app \
|
| 42 |
+
&& chown -R user:user /home/user
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
|
|
|
| 44 |
USER user
|
| 45 |
WORKDIR /home/user/app
|
| 46 |
|
|
|
|
| 47 |
COPY --chown=user:user requirements.txt .
|
| 48 |
|
| 49 |
+
# Install PaddlePaddle GPU then Python deps
|
| 50 |
+
RUN python3 -m pip install --user --upgrade pip --break-system-packages && \
|
| 51 |
+
python3 -m pip install --user --break-system-packages paddlepaddle-gpu==3.2.1 -i https://www.paddlepaddle.org.cn/packages/stable/cu126/ && \
|
| 52 |
+
python3 -m pip install --user --break-system-packages -r requirements.txt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
|
|
|
|
| 54 |
COPY --chown=user:user . .
|
| 55 |
|
| 56 |
+
RUN chmod +x start.sh
|
|
|
|
|
|
|
|
|
|
| 57 |
|
|
|
|
| 58 |
EXPOSE 7860
|
| 59 |
|
|
|
|
| 60 |
HEALTHCHECK --interval=30s --timeout=30s --start-period=300s --retries=5 \
|
| 61 |
CMD curl -f http://localhost:7860/ || exit 1
|
| 62 |
|
|
|
|
| 63 |
CMD ["/bin/bash", "/home/user/app/start.sh"]
|