Spaces:
Running
refactor: replace source compilation with official pre-built GHCR images
Browse filesUse ghcr.io/bytedance/deer-flow-backend:latest and
ghcr.io/bytedance/deer-flow-frontend:latest as source stages instead
of cloning + building from source. Eliminates:
- pnpm install + Next.js Turbopack build (OOM risk, 15-20 min)
- uv sync + grpcio/sympy native compilation (stall risk, 10-15 min)
Build now pulls pre-built images (~5 min) instead of compiling (~30 min).
Retain minimal alpine/git clone only for skills/ and config.example.yaml
which are not bundled in the official images.
Image structure verified from upstream Dockerfiles:
backend: /app/backend/ (source + .venv) matches start.sh cd /app/backend
frontend: /app/frontend/ (built .next + node_modules) matches start.sh cd /app/frontend
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
- Dockerfile +38 -92
|
@@ -3,92 +3,39 @@
|
|
| 3 |
# HuggingFlow β DeerFlow Research Agent for Hugging Face Spaces
|
| 4 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 5 |
#
|
| 6 |
-
#
|
|
|
|
|
|
|
| 7 |
# Public port 7860 β health-server.js β nginx:7861 β backend:8001 / frontend:3000
|
| 8 |
#
|
| 9 |
# Build args:
|
| 10 |
-
#
|
| 11 |
-
#
|
| 12 |
-
#
|
|
|
|
| 13 |
|
| 14 |
ARG UV_IMAGE=ghcr.io/astral-sh/uv:0.7.20
|
| 15 |
-
ARG
|
|
|
|
| 16 |
|
| 17 |
# ββ uv source ββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 18 |
FROM ${UV_IMAGE} AS uv-source
|
| 19 |
|
| 20 |
-
# ββ
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
FROM alpine/git:latest AS source
|
| 22 |
-
ARG DEER_FLOW_REF
|
| 23 |
RUN git clone --depth=1 \
|
| 24 |
https://github.com/bytedance/deer-flow.git /src && \
|
| 25 |
cd /src && \
|
| 26 |
git log --oneline -1
|
| 27 |
|
| 28 |
-
# ββ
|
| 29 |
-
FROM node:22-alpine AS frontend-builder
|
| 30 |
-
|
| 31 |
-
RUN corepack enable && corepack install -g pnpm@10.26.2
|
| 32 |
-
|
| 33 |
-
WORKDIR /app
|
| 34 |
-
COPY --from=source /src/frontend ./frontend
|
| 35 |
-
|
| 36 |
-
# pnpm virtual store uses hard links β COPY in later stages works correctly
|
| 37 |
-
# BuildKit cache mount makes pnpm install survive flaky HF Spaces network
|
| 38 |
-
RUN --mount=type=cache,target=/root/.local/share/pnpm/store \
|
| 39 |
-
cd frontend && \
|
| 40 |
-
( pnpm install --frozen-lockfile \
|
| 41 |
-
|| (echo "pnpm install retry 2" && pnpm install --frozen-lockfile) \
|
| 42 |
-
|| (echo "pnpm install retry 3" && pnpm install --frozen-lockfile) )
|
| 43 |
-
|
| 44 |
-
# SKIP_ENV_VALIDATION=1 bypasses t3-oss env checks (no secrets at build time)
|
| 45 |
-
# NODE_OPTIONS caps heap to 3 GB β prevents OOMKilled on HF Spaces build servers
|
| 46 |
-
RUN cd frontend && SKIP_ENV_VALIDATION=1 NODE_OPTIONS="--max-old-space-size=3072" pnpm build
|
| 47 |
-
|
| 48 |
-
# ββ Stage 3: Install Python backend dependencies ββββββββββββββββββ
|
| 49 |
-
# NOTE: COPY --from=frontend-builder serializes this stage after the frontend build.
|
| 50 |
-
# BuildKit would otherwise run both stages in parallel, exhausting HF Spaces build memory.
|
| 51 |
-
FROM python:3.12-slim-bookworm AS backend-builder
|
| 52 |
-
|
| 53 |
-
# Serialize: wait for frontend stage to finish before starting backend compilation.
|
| 54 |
-
# This prevents OOMKilled caused by Next.js + grpcio compilation running simultaneously.
|
| 55 |
-
COPY --from=frontend-builder /app/frontend/.next/package.json /tmp/.frontend-build-done
|
| 56 |
-
|
| 57 |
-
COPY --from=uv-source /uv /uvx /usr/local/bin/
|
| 58 |
-
|
| 59 |
-
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 60 |
-
build-essential ca-certificates curl git \
|
| 61 |
-
&& rm -rf /var/lib/apt/lists/*
|
| 62 |
-
|
| 63 |
-
ENV UV_HTTP_TIMEOUT=120 \
|
| 64 |
-
UV_CONCURRENT_DOWNLOADS=2 \
|
| 65 |
-
UV_INDEX_URL=https://pypi.org/simple \
|
| 66 |
-
UV_LINK_MODE=copy
|
| 67 |
-
|
| 68 |
-
WORKDIR /app
|
| 69 |
-
COPY --from=source /src/backend ./backend
|
| 70 |
-
|
| 71 |
-
# uv sync with BuildKit cache mount (matches DeerFlow's official Dockerfile pattern):
|
| 72 |
-
# - --mount=type=cache persists uv's wheel cache across retries within the same RUN,
|
| 73 |
-
# so each retry only re-fetches the wheel(s) that failed previously
|
| 74 |
-
# - --no-install-package skips heavy markitdown[all] extras not needed for web research:
|
| 75 |
-
# speechrecognition (audio), magika+onnxruntime (file detection), pdfminer-six (PDF)
|
| 76 |
-
# β saves ~65MB of downloads on a flaky HF Spaces build network
|
| 77 |
-
RUN --mount=type=cache,target=/root/.cache/uv,sharing=locked \
|
| 78 |
-
cd backend && \
|
| 79 |
-
( uv sync --no-install-package onnxruntime --no-install-package magika --no-install-package speechrecognition --no-install-package pdfminer-six \
|
| 80 |
-
|| (echo "retry 2" && uv sync --no-install-package onnxruntime --no-install-package magika --no-install-package speechrecognition --no-install-package pdfminer-six) \
|
| 81 |
-
|| (echo "retry 3" && uv sync --no-install-package onnxruntime --no-install-package magika --no-install-package speechrecognition --no-install-package pdfminer-six) \
|
| 82 |
-
|| (echo "retry 4" && uv sync --no-install-package onnxruntime --no-install-package magika --no-install-package speechrecognition --no-install-package pdfminer-six) \
|
| 83 |
-
|| (echo "retry 5" && uv sync --no-install-package onnxruntime --no-install-package magika --no-install-package speechrecognition --no-install-package pdfminer-six) \
|
| 84 |
-
|| (echo "retry 6" && uv sync --no-install-package onnxruntime --no-install-package magika --no-install-package speechrecognition --no-install-package pdfminer-six) \
|
| 85 |
-
|| (echo "retry 7" && uv sync --no-install-package onnxruntime --no-install-package magika --no-install-package speechrecognition --no-install-package pdfminer-six) \
|
| 86 |
-
|| (echo "retry 8" && uv sync --no-install-package onnxruntime --no-install-package magika --no-install-package speechrecognition --no-install-package pdfminer-six) \
|
| 87 |
-
|| (echo "retry 9" && uv sync --no-install-package onnxruntime --no-install-package magika --no-install-package speechrecognition --no-install-package pdfminer-six) \
|
| 88 |
-
|| (echo "retry 10" && uv sync --no-install-package onnxruntime --no-install-package magika --no-install-package speechrecognition --no-install-package pdfminer-six) \
|
| 89 |
-
|| (echo "ERROR: uv sync failed after 10 attempts" && exit 1) )
|
| 90 |
-
|
| 91 |
-
# ββ Stage 4: Runtime βββββββββββββββββββββββββββββββββββββββββββββ
|
| 92 |
FROM python:3.12-slim-bookworm
|
| 93 |
|
| 94 |
ENV LANG=C.UTF-8 \
|
|
@@ -98,12 +45,12 @@ ENV LANG=C.UTF-8 \
|
|
| 98 |
|
| 99 |
ARG NODE_MAJOR=22
|
| 100 |
|
| 101 |
-
#
|
| 102 |
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 103 |
curl ca-certificates gnupg nginx jq \
|
| 104 |
&& rm -rf /var/lib/apt/lists/*
|
| 105 |
|
| 106 |
-
#
|
| 107 |
RUN mkdir -p /etc/apt/keyrings \
|
| 108 |
&& curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key \
|
| 109 |
| gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg \
|
|
@@ -113,7 +60,7 @@ RUN mkdir -p /etc/apt/keyrings \
|
|
| 113 |
&& apt-get update && apt-get install -y --no-install-recommends nodejs \
|
| 114 |
&& rm -rf /var/lib/apt/lists/*
|
| 115 |
|
| 116 |
-
#
|
| 117 |
RUN pip3 install --no-cache-dir --break-system-packages --timeout 120 --retries 5 \
|
| 118 |
huggingface_hub pyyaml \
|
| 119 |
|| (echo "pip retry 2" && pip3 install --no-cache-dir --break-system-packages --timeout 120 --retries 5 huggingface_hub pyyaml) \
|
|
@@ -123,7 +70,7 @@ RUN pip3 install --no-cache-dir --break-system-packages --timeout 120 --retries
|
|
| 123 |
# pnpm for `pnpm start` in Next.js runtime
|
| 124 |
RUN corepack enable && corepack install -g pnpm@10.26.2
|
| 125 |
|
| 126 |
-
# uv for backend startup
|
| 127 |
COPY --from=uv-source /uv /uvx /usr/local/bin/
|
| 128 |
|
| 129 |
# ββ Create non-root user UID=1000 (required by HF Spaces) ββββββββ
|
|
@@ -135,26 +82,25 @@ RUN useradd -m -u 1000 -s /bin/bash user && \
|
|
| 135 |
/app/data \
|
| 136 |
/tmp/nginx-tmp && \
|
| 137 |
chown -R 1000:1000 /app /tmp/nginx-tmp && \
|
| 138 |
-
# nginx non-root: redirect all temp/pid/log paths to writable dirs
|
| 139 |
chown -R 1000:1000 /var/log/nginx /var/lib/nginx 2>/dev/null || true
|
| 140 |
|
| 141 |
-
# ββ Copy built artifacts ββββββββββββββββββββββββββββ
|
| 142 |
-
# Backend: Python source + pre-built .venv
|
| 143 |
-
COPY --from=backend-
|
| 144 |
-
#
|
| 145 |
-
COPY --from=
|
| 146 |
-
#
|
|
|
|
|
|
|
| 147 |
COPY --from=source --chown=1000:1000 /src/config.example.yaml /app/config.example.yaml
|
| 148 |
-
# Frontend: built .next + node_modules (pnpm hard links β self-contained after COPY)
|
| 149 |
-
COPY --from=frontend-builder --chown=1000:1000 /app/frontend /app/frontend
|
| 150 |
|
| 151 |
# ββ Copy HuggingFlow runtime scripts βββββββββββββββββββββββββββββ
|
| 152 |
-
COPY --chown=1000:1000 nginx.conf
|
| 153 |
-
COPY --chown=1000:1000 start.sh
|
| 154 |
-
COPY --chown=1000:1000 deerflow-sync.py
|
| 155 |
-
COPY --chown=1000:1000 health-server.js
|
| 156 |
-
COPY --chown=1000:1000 cloudflare-proxy.js
|
| 157 |
-
COPY --chown=1000:1000 cloudflare-proxy-setup.py
|
| 158 |
COPY --chown=1000:1000 cloudflare-keepalive-setup.py /app/cloudflare-keepalive-setup.py
|
| 159 |
|
| 160 |
RUN chmod +x \
|
|
@@ -168,8 +114,8 @@ WORKDIR /app
|
|
| 168 |
|
| 169 |
EXPOSE 7860
|
| 170 |
|
| 171 |
-
#
|
| 172 |
-
HEALTHCHECK --interval=30s --timeout=10s --start-period=
|
| 173 |
CMD curl -fsS http://localhost:7860/health || exit 1
|
| 174 |
|
| 175 |
CMD ["/app/start.sh"]
|
|
|
|
| 3 |
# HuggingFlow β DeerFlow Research Agent for Hugging Face Spaces
|
| 4 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 5 |
#
|
| 6 |
+
# Uses official pre-built DeerFlow images from GHCR β no compile step.
|
| 7 |
+
# Build time: ~5 min (was 30+ min building from source).
|
| 8 |
+
#
|
| 9 |
# Public port 7860 β health-server.js β nginx:7861 β backend:8001 / frontend:3000
|
| 10 |
#
|
| 11 |
# Build args:
|
| 12 |
+
# DEERFLOW_BACKEND β backend image (default: ghcr.io/bytedance/deer-flow-backend:latest)
|
| 13 |
+
# DEERFLOW_FRONTEND β frontend image (default: ghcr.io/bytedance/deer-flow-frontend:latest)
|
| 14 |
+
# UV_IMAGE β uv tool image (default: ghcr.io/astral-sh/uv:0.7.20)
|
| 15 |
+
# NODE_MAJOR β Node.js major version (default: 22)
|
| 16 |
|
| 17 |
ARG UV_IMAGE=ghcr.io/astral-sh/uv:0.7.20
|
| 18 |
+
ARG DEERFLOW_BACKEND=ghcr.io/bytedance/deer-flow-backend:latest
|
| 19 |
+
ARG DEERFLOW_FRONTEND=ghcr.io/bytedance/deer-flow-frontend:latest
|
| 20 |
|
| 21 |
# ββ uv source ββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 22 |
FROM ${UV_IMAGE} AS uv-source
|
| 23 |
|
| 24 |
+
# ββ Pre-built DeerFlow images (no source compilation needed) ββββββ
|
| 25 |
+
# Backend image layout: /app/backend/ (Python source + .venv)
|
| 26 |
+
# Frontend image layout: /app/frontend/ (built .next + node_modules)
|
| 27 |
+
FROM ${DEERFLOW_BACKEND} AS backend-src
|
| 28 |
+
FROM ${DEERFLOW_FRONTEND} AS frontend-src
|
| 29 |
+
|
| 30 |
+
# ββ Minimal source clone (skills + config only) βββββββββββββββββββ
|
| 31 |
+
# skills/ and config.example.yaml are not bundled in the official images
|
| 32 |
FROM alpine/git:latest AS source
|
|
|
|
| 33 |
RUN git clone --depth=1 \
|
| 34 |
https://github.com/bytedance/deer-flow.git /src && \
|
| 35 |
cd /src && \
|
| 36 |
git log --oneline -1
|
| 37 |
|
| 38 |
+
# ββ Runtime βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
FROM python:3.12-slim-bookworm
|
| 40 |
|
| 41 |
ENV LANG=C.UTF-8 \
|
|
|
|
| 45 |
|
| 46 |
ARG NODE_MAJOR=22
|
| 47 |
|
| 48 |
+
# Layer 1: nginx + base tools (rarely changes β stays cached)
|
| 49 |
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 50 |
curl ca-certificates gnupg nginx jq \
|
| 51 |
&& rm -rf /var/lib/apt/lists/*
|
| 52 |
|
| 53 |
+
# Layer 2: Node.js (separate layer β apt network stall doesn't force pip re-run)
|
| 54 |
RUN mkdir -p /etc/apt/keyrings \
|
| 55 |
&& curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key \
|
| 56 |
| gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg \
|
|
|
|
| 60 |
&& apt-get update && apt-get install -y --no-install-recommends nodejs \
|
| 61 |
&& rm -rf /var/lib/apt/lists/*
|
| 62 |
|
| 63 |
+
# Layer 3: Python helpers with retries (flaky HF Spaces network)
|
| 64 |
RUN pip3 install --no-cache-dir --break-system-packages --timeout 120 --retries 5 \
|
| 65 |
huggingface_hub pyyaml \
|
| 66 |
|| (echo "pip retry 2" && pip3 install --no-cache-dir --break-system-packages --timeout 120 --retries 5 huggingface_hub pyyaml) \
|
|
|
|
| 70 |
# pnpm for `pnpm start` in Next.js runtime
|
| 71 |
RUN corepack enable && corepack install -g pnpm@10.26.2
|
| 72 |
|
| 73 |
+
# uv for backend startup (`uv run --no-sync uvicorn ...`)
|
| 74 |
COPY --from=uv-source /uv /uvx /usr/local/bin/
|
| 75 |
|
| 76 |
# ββ Create non-root user UID=1000 (required by HF Spaces) ββββββββ
|
|
|
|
| 82 |
/app/data \
|
| 83 |
/tmp/nginx-tmp && \
|
| 84 |
chown -R 1000:1000 /app /tmp/nginx-tmp && \
|
|
|
|
| 85 |
chown -R 1000:1000 /var/log/nginx /var/lib/nginx 2>/dev/null || true
|
| 86 |
|
| 87 |
+
# ββ Copy pre-built DeerFlow artifacts ββββββββββββββββββββββββββββ
|
| 88 |
+
# Backend: Python source + pre-built .venv (no uv sync / grpcio compile)
|
| 89 |
+
COPY --from=backend-src --chown=1000:1000 /app/backend /app/backend
|
| 90 |
+
# Frontend: built .next + node_modules (no pnpm install / Next.js build)
|
| 91 |
+
COPY --from=frontend-src --chown=1000:1000 /app/frontend /app/frontend
|
| 92 |
+
# Skills (not bundled in official images)
|
| 93 |
+
COPY --from=source --chown=1000:1000 /src/skills /app/skills
|
| 94 |
+
# Config template
|
| 95 |
COPY --from=source --chown=1000:1000 /src/config.example.yaml /app/config.example.yaml
|
|
|
|
|
|
|
| 96 |
|
| 97 |
# ββ Copy HuggingFlow runtime scripts βββββββββββββββββββββββββββββ
|
| 98 |
+
COPY --chown=1000:1000 nginx.conf /etc/nginx/nginx.conf
|
| 99 |
+
COPY --chown=1000:1000 start.sh /app/start.sh
|
| 100 |
+
COPY --chown=1000:1000 deerflow-sync.py /app/deerflow-sync.py
|
| 101 |
+
COPY --chown=1000:1000 health-server.js /app/health-server.js
|
| 102 |
+
COPY --chown=1000:1000 cloudflare-proxy.js /app/cloudflare-proxy.js
|
| 103 |
+
COPY --chown=1000:1000 cloudflare-proxy-setup.py /app/cloudflare-proxy-setup.py
|
| 104 |
COPY --chown=1000:1000 cloudflare-keepalive-setup.py /app/cloudflare-keepalive-setup.py
|
| 105 |
|
| 106 |
RUN chmod +x \
|
|
|
|
| 114 |
|
| 115 |
EXPOSE 7860
|
| 116 |
|
| 117 |
+
# 60s start period β no compilation, just config generation + service startup
|
| 118 |
+
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s \
|
| 119 |
CMD curl -fsS http://localhost:7860/health || exit 1
|
| 120 |
|
| 121 |
CMD ["/app/start.sh"]
|