somratpro Claude Opus 4.7 commited on
Commit
45fe8f0
Β·
1 Parent(s): 6cd3e11

refactor: replace source compilation with official pre-built GHCR images

Browse files

Use ghcr.io/bytedance/deer-flow-backend:latest and
ghcr.io/bytedance/deer-flow-frontend:latest as source stages instead
of cloning + building from source. Eliminates:
- pnpm install + Next.js Turbopack build (OOM risk, 15-20 min)
- uv sync + grpcio/sympy native compilation (stall risk, 10-15 min)

Build now pulls pre-built images (~5 min) instead of compiling (~30 min).
Retain minimal alpine/git clone only for skills/ and config.example.yaml
which are not bundled in the official images.

Image structure verified from upstream Dockerfiles:
backend: /app/backend/ (source + .venv) matches start.sh cd /app/backend
frontend: /app/frontend/ (built .next + node_modules) matches start.sh cd /app/frontend

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

Files changed (1) hide show
  1. Dockerfile +38 -92
Dockerfile CHANGED
@@ -3,92 +3,39 @@
3
  # HuggingFlow β€” DeerFlow Research Agent for Hugging Face Spaces
4
  # ════════════════════════════════════════════════════════════════
5
  #
6
- # Single-container deployment of DeerFlow (frontend + backend + nginx)
 
 
7
  # Public port 7860 β†’ health-server.js β†’ nginx:7861 β†’ backend:8001 / frontend:3000
8
  #
9
  # Build args:
10
- # DEER_FLOW_REF β€” git ref to clone (branch/tag/sha, default: main)
11
- # UV_IMAGE β€” uv tool image (default: ghcr.io/astral-sh/uv:0.7.20)
12
- # NODE_MAJOR β€” Node.js major version (default: 22)
 
13
 
14
  ARG UV_IMAGE=ghcr.io/astral-sh/uv:0.7.20
15
- ARG DEER_FLOW_REF=main
 
16
 
17
  # ── uv source ────────────────────────────────────────────────────
18
  FROM ${UV_IMAGE} AS uv-source
19
 
20
- # ── Stage 1: Clone DeerFlow source ───────────────────────────────
 
 
 
 
 
 
 
21
  FROM alpine/git:latest AS source
22
- ARG DEER_FLOW_REF
23
  RUN git clone --depth=1 \
24
  https://github.com/bytedance/deer-flow.git /src && \
25
  cd /src && \
26
  git log --oneline -1
27
 
28
- # ── Stage 2: Build Next.js frontend ──────────────────────────────
29
- FROM node:22-alpine AS frontend-builder
30
-
31
- RUN corepack enable && corepack install -g pnpm@10.26.2
32
-
33
- WORKDIR /app
34
- COPY --from=source /src/frontend ./frontend
35
-
36
- # pnpm virtual store uses hard links β€” COPY in later stages works correctly
37
- # BuildKit cache mount makes pnpm install survive flaky HF Spaces network
38
- RUN --mount=type=cache,target=/root/.local/share/pnpm/store \
39
- cd frontend && \
40
- ( pnpm install --frozen-lockfile \
41
- || (echo "pnpm install retry 2" && pnpm install --frozen-lockfile) \
42
- || (echo "pnpm install retry 3" && pnpm install --frozen-lockfile) )
43
-
44
- # SKIP_ENV_VALIDATION=1 bypasses t3-oss env checks (no secrets at build time)
45
- # NODE_OPTIONS caps heap to 3 GB β€” prevents OOMKilled on HF Spaces build servers
46
- RUN cd frontend && SKIP_ENV_VALIDATION=1 NODE_OPTIONS="--max-old-space-size=3072" pnpm build
47
-
48
- # ── Stage 3: Install Python backend dependencies ──────────────────
49
- # NOTE: COPY --from=frontend-builder serializes this stage after the frontend build.
50
- # BuildKit would otherwise run both stages in parallel, exhausting HF Spaces build memory.
51
- FROM python:3.12-slim-bookworm AS backend-builder
52
-
53
- # Serialize: wait for frontend stage to finish before starting backend compilation.
54
- # This prevents OOMKilled caused by Next.js + grpcio compilation running simultaneously.
55
- COPY --from=frontend-builder /app/frontend/.next/package.json /tmp/.frontend-build-done
56
-
57
- COPY --from=uv-source /uv /uvx /usr/local/bin/
58
-
59
- RUN apt-get update && apt-get install -y --no-install-recommends \
60
- build-essential ca-certificates curl git \
61
- && rm -rf /var/lib/apt/lists/*
62
-
63
- ENV UV_HTTP_TIMEOUT=120 \
64
- UV_CONCURRENT_DOWNLOADS=2 \
65
- UV_INDEX_URL=https://pypi.org/simple \
66
- UV_LINK_MODE=copy
67
-
68
- WORKDIR /app
69
- COPY --from=source /src/backend ./backend
70
-
71
- # uv sync with BuildKit cache mount (matches DeerFlow's official Dockerfile pattern):
72
- # - --mount=type=cache persists uv's wheel cache across retries within the same RUN,
73
- # so each retry only re-fetches the wheel(s) that failed previously
74
- # - --no-install-package skips heavy markitdown[all] extras not needed for web research:
75
- # speechrecognition (audio), magika+onnxruntime (file detection), pdfminer-six (PDF)
76
- # β€” saves ~65MB of downloads on a flaky HF Spaces build network
77
- RUN --mount=type=cache,target=/root/.cache/uv,sharing=locked \
78
- cd backend && \
79
- ( uv sync --no-install-package onnxruntime --no-install-package magika --no-install-package speechrecognition --no-install-package pdfminer-six \
80
- || (echo "retry 2" && uv sync --no-install-package onnxruntime --no-install-package magika --no-install-package speechrecognition --no-install-package pdfminer-six) \
81
- || (echo "retry 3" && uv sync --no-install-package onnxruntime --no-install-package magika --no-install-package speechrecognition --no-install-package pdfminer-six) \
82
- || (echo "retry 4" && uv sync --no-install-package onnxruntime --no-install-package magika --no-install-package speechrecognition --no-install-package pdfminer-six) \
83
- || (echo "retry 5" && uv sync --no-install-package onnxruntime --no-install-package magika --no-install-package speechrecognition --no-install-package pdfminer-six) \
84
- || (echo "retry 6" && uv sync --no-install-package onnxruntime --no-install-package magika --no-install-package speechrecognition --no-install-package pdfminer-six) \
85
- || (echo "retry 7" && uv sync --no-install-package onnxruntime --no-install-package magika --no-install-package speechrecognition --no-install-package pdfminer-six) \
86
- || (echo "retry 8" && uv sync --no-install-package onnxruntime --no-install-package magika --no-install-package speechrecognition --no-install-package pdfminer-six) \
87
- || (echo "retry 9" && uv sync --no-install-package onnxruntime --no-install-package magika --no-install-package speechrecognition --no-install-package pdfminer-six) \
88
- || (echo "retry 10" && uv sync --no-install-package onnxruntime --no-install-package magika --no-install-package speechrecognition --no-install-package pdfminer-six) \
89
- || (echo "ERROR: uv sync failed after 10 attempts" && exit 1) )
90
-
91
- # ── Stage 4: Runtime ─────────────────────────────────────────────
92
  FROM python:3.12-slim-bookworm
93
 
94
  ENV LANG=C.UTF-8 \
@@ -98,12 +45,12 @@ ENV LANG=C.UTF-8 \
98
 
99
  ARG NODE_MAJOR=22
100
 
101
- # Install: nginx, curl, jq, gnupg (separate layer β€” cached independently)
102
  RUN apt-get update && apt-get install -y --no-install-recommends \
103
  curl ca-certificates gnupg nginx jq \
104
  && rm -rf /var/lib/apt/lists/*
105
 
106
- # Install Node.js (separate layer so apt cache miss doesn't re-download pip packages)
107
  RUN mkdir -p /etc/apt/keyrings \
108
  && curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key \
109
  | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg \
@@ -113,7 +60,7 @@ RUN mkdir -p /etc/apt/keyrings \
113
  && apt-get update && apt-get install -y --no-install-recommends nodejs \
114
  && rm -rf /var/lib/apt/lists/*
115
 
116
- # Install Python helpers (separate layer + retries for flaky HF Spaces network)
117
  RUN pip3 install --no-cache-dir --break-system-packages --timeout 120 --retries 5 \
118
  huggingface_hub pyyaml \
119
  || (echo "pip retry 2" && pip3 install --no-cache-dir --break-system-packages --timeout 120 --retries 5 huggingface_hub pyyaml) \
@@ -123,7 +70,7 @@ RUN pip3 install --no-cache-dir --break-system-packages --timeout 120 --retries
123
  # pnpm for `pnpm start` in Next.js runtime
124
  RUN corepack enable && corepack install -g pnpm@10.26.2
125
 
126
- # uv for backend startup
127
  COPY --from=uv-source /uv /uvx /usr/local/bin/
128
 
129
  # ── Create non-root user UID=1000 (required by HF Spaces) ────────
@@ -135,26 +82,25 @@ RUN useradd -m -u 1000 -s /bin/bash user && \
135
  /app/data \
136
  /tmp/nginx-tmp && \
137
  chown -R 1000:1000 /app /tmp/nginx-tmp && \
138
- # nginx non-root: redirect all temp/pid/log paths to writable dirs
139
  chown -R 1000:1000 /var/log/nginx /var/lib/nginx 2>/dev/null || true
140
 
141
- # ── Copy built artifacts ──────────────────────────────────────────
142
- # Backend: Python source + pre-built .venv from uv sync
143
- COPY --from=backend-builder --chown=1000:1000 /app/backend /app/backend
144
- # Skills directory (read-only agent skills)
145
- COPY --from=source --chown=1000:1000 /src/skills /app/skills
146
- # Config template (used to generate config.yaml at startup)
 
 
147
  COPY --from=source --chown=1000:1000 /src/config.example.yaml /app/config.example.yaml
148
- # Frontend: built .next + node_modules (pnpm hard links β€” self-contained after COPY)
149
- COPY --from=frontend-builder --chown=1000:1000 /app/frontend /app/frontend
150
 
151
  # ── Copy HuggingFlow runtime scripts ─────────────────────────────
152
- COPY --chown=1000:1000 nginx.conf /etc/nginx/nginx.conf
153
- COPY --chown=1000:1000 start.sh /app/start.sh
154
- COPY --chown=1000:1000 deerflow-sync.py /app/deerflow-sync.py
155
- COPY --chown=1000:1000 health-server.js /app/health-server.js
156
- COPY --chown=1000:1000 cloudflare-proxy.js /app/cloudflare-proxy.js
157
- COPY --chown=1000:1000 cloudflare-proxy-setup.py /app/cloudflare-proxy-setup.py
158
  COPY --chown=1000:1000 cloudflare-keepalive-setup.py /app/cloudflare-keepalive-setup.py
159
 
160
  RUN chmod +x \
@@ -168,8 +114,8 @@ WORKDIR /app
168
 
169
  EXPOSE 7860
170
 
171
- # 120s start period: frontend build + backend uv sync + DB init takes ~60-90s on cold start
172
- HEALTHCHECK --interval=30s --timeout=10s --start-period=120s \
173
  CMD curl -fsS http://localhost:7860/health || exit 1
174
 
175
  CMD ["/app/start.sh"]
 
3
  # HuggingFlow β€” DeerFlow Research Agent for Hugging Face Spaces
4
  # ════════════════════════════════════════════════════════════════
5
  #
6
+ # Uses official pre-built DeerFlow images from GHCR β€” no compile step.
7
+ # Build time: ~5 min (was 30+ min building from source).
8
+ #
9
  # Public port 7860 β†’ health-server.js β†’ nginx:7861 β†’ backend:8001 / frontend:3000
10
  #
11
  # Build args:
12
+ # DEERFLOW_BACKEND β€” backend image (default: ghcr.io/bytedance/deer-flow-backend:latest)
13
+ # DEERFLOW_FRONTEND β€” frontend image (default: ghcr.io/bytedance/deer-flow-frontend:latest)
14
+ # UV_IMAGE β€” uv tool image (default: ghcr.io/astral-sh/uv:0.7.20)
15
+ # NODE_MAJOR β€” Node.js major version (default: 22)
16
 
17
  ARG UV_IMAGE=ghcr.io/astral-sh/uv:0.7.20
18
+ ARG DEERFLOW_BACKEND=ghcr.io/bytedance/deer-flow-backend:latest
19
+ ARG DEERFLOW_FRONTEND=ghcr.io/bytedance/deer-flow-frontend:latest
20
 
21
  # ── uv source ────────────────────────────────────────────────────
22
  FROM ${UV_IMAGE} AS uv-source
23
 
24
+ # ── Pre-built DeerFlow images (no source compilation needed) ──────
25
+ # Backend image layout: /app/backend/ (Python source + .venv)
26
+ # Frontend image layout: /app/frontend/ (built .next + node_modules)
27
+ FROM ${DEERFLOW_BACKEND} AS backend-src
28
+ FROM ${DEERFLOW_FRONTEND} AS frontend-src
29
+
30
+ # ── Minimal source clone (skills + config only) ───────────────────
31
+ # skills/ and config.example.yaml are not bundled in the official images
32
  FROM alpine/git:latest AS source
 
33
  RUN git clone --depth=1 \
34
  https://github.com/bytedance/deer-flow.git /src && \
35
  cd /src && \
36
  git log --oneline -1
37
 
38
+ # ── Runtime ───────────────────────────────────────────────────────
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  FROM python:3.12-slim-bookworm
40
 
41
  ENV LANG=C.UTF-8 \
 
45
 
46
  ARG NODE_MAJOR=22
47
 
48
+ # Layer 1: nginx + base tools (rarely changes β€” stays cached)
49
  RUN apt-get update && apt-get install -y --no-install-recommends \
50
  curl ca-certificates gnupg nginx jq \
51
  && rm -rf /var/lib/apt/lists/*
52
 
53
+ # Layer 2: Node.js (separate layer β€” apt network stall doesn't force pip re-run)
54
  RUN mkdir -p /etc/apt/keyrings \
55
  && curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key \
56
  | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg \
 
60
  && apt-get update && apt-get install -y --no-install-recommends nodejs \
61
  && rm -rf /var/lib/apt/lists/*
62
 
63
+ # Layer 3: Python helpers with retries (flaky HF Spaces network)
64
  RUN pip3 install --no-cache-dir --break-system-packages --timeout 120 --retries 5 \
65
  huggingface_hub pyyaml \
66
  || (echo "pip retry 2" && pip3 install --no-cache-dir --break-system-packages --timeout 120 --retries 5 huggingface_hub pyyaml) \
 
70
  # pnpm for `pnpm start` in Next.js runtime
71
  RUN corepack enable && corepack install -g pnpm@10.26.2
72
 
73
+ # uv for backend startup (`uv run --no-sync uvicorn ...`)
74
  COPY --from=uv-source /uv /uvx /usr/local/bin/
75
 
76
  # ── Create non-root user UID=1000 (required by HF Spaces) ────────
 
82
  /app/data \
83
  /tmp/nginx-tmp && \
84
  chown -R 1000:1000 /app /tmp/nginx-tmp && \
 
85
  chown -R 1000:1000 /var/log/nginx /var/lib/nginx 2>/dev/null || true
86
 
87
+ # ── Copy pre-built DeerFlow artifacts ────────────────────────────
88
+ # Backend: Python source + pre-built .venv (no uv sync / grpcio compile)
89
+ COPY --from=backend-src --chown=1000:1000 /app/backend /app/backend
90
+ # Frontend: built .next + node_modules (no pnpm install / Next.js build)
91
+ COPY --from=frontend-src --chown=1000:1000 /app/frontend /app/frontend
92
+ # Skills (not bundled in official images)
93
+ COPY --from=source --chown=1000:1000 /src/skills /app/skills
94
+ # Config template
95
  COPY --from=source --chown=1000:1000 /src/config.example.yaml /app/config.example.yaml
 
 
96
 
97
  # ── Copy HuggingFlow runtime scripts ─────────────────────────────
98
+ COPY --chown=1000:1000 nginx.conf /etc/nginx/nginx.conf
99
+ COPY --chown=1000:1000 start.sh /app/start.sh
100
+ COPY --chown=1000:1000 deerflow-sync.py /app/deerflow-sync.py
101
+ COPY --chown=1000:1000 health-server.js /app/health-server.js
102
+ COPY --chown=1000:1000 cloudflare-proxy.js /app/cloudflare-proxy.js
103
+ COPY --chown=1000:1000 cloudflare-proxy-setup.py /app/cloudflare-proxy-setup.py
104
  COPY --chown=1000:1000 cloudflare-keepalive-setup.py /app/cloudflare-keepalive-setup.py
105
 
106
  RUN chmod +x \
 
114
 
115
  EXPOSE 7860
116
 
117
+ # 60s start period β€” no compilation, just config generation + service startup
118
+ HEALTHCHECK --interval=30s --timeout=10s --start-period=60s \
119
  CMD curl -fsS http://localhost:7860/health || exit 1
120
 
121
  CMD ["/app/start.sh"]