somratpro Claude Sonnet 4.6 commited on
Commit
5d8d23e
·
0 Parent(s):

init: HuggingDeer — DeerFlow on Hugging Face Spaces

Browse files

Single-container Docker deployment of DeerFlow (frontend + backend + nginx).
Clones deer-flow source at build time, builds Next.js and Python backend,
runs all three services inside one container on port 7860.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (5) hide show
  1. Dockerfile +123 -0
  2. README.md +75 -0
  3. deer-sync.py +183 -0
  4. nginx.conf +140 -0
  5. start.sh +417 -0
Dockerfile ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ════════════════════════════════════════════════════════════════
2
+ # HuggingDeer — DeerFlow Research Agent for Hugging Face Spaces
3
+ # ════════════════════════════════════════════════════════════════
4
+ #
5
+ # Single-container deployment of DeerFlow (frontend + backend + nginx)
6
+ # on port 7860 as required by HF Spaces Docker runtime.
7
+ #
8
+ # Build args:
9
+ # DEER_FLOW_REF — git ref to clone (branch/tag/sha, default: main)
10
+ # UV_IMAGE — uv tool image (default: ghcr.io/astral-sh/uv:0.7.20)
11
+ # NODE_MAJOR — Node.js major version (default: 22)
12
+
13
+ ARG UV_IMAGE=ghcr.io/astral-sh/uv:0.7.20
14
+ ARG DEER_FLOW_REF=main
15
+
16
+ # ── uv source ────────────────────────────────────────────────────
17
+ FROM ${UV_IMAGE} AS uv-source
18
+
19
+ # ── Stage 1: Clone DeerFlow source ───────────────────────────────
20
+ FROM alpine/git:latest AS source
21
+ ARG DEER_FLOW_REF
22
+ RUN git clone --depth=1 \
23
+ https://github.com/bytedance/deer-flow.git /src && \
24
+ cd /src && \
25
+ git log --oneline -1
26
+
27
+ # ── Stage 2: Build Next.js frontend ──────────────────────────────
28
+ FROM node:22-alpine AS frontend-builder
29
+
30
+ RUN corepack enable && corepack install -g pnpm@10.26.2
31
+
32
+ WORKDIR /app
33
+ COPY --from=source /src/frontend ./frontend
34
+
35
+ # pnpm virtual store uses hard links — COPY in later stages works correctly
36
+ RUN cd frontend && pnpm install --frozen-lockfile
37
+
38
+ # SKIP_ENV_VALIDATION=1 bypasses t3-oss env checks (no secrets at build time)
39
+ RUN cd frontend && SKIP_ENV_VALIDATION=1 pnpm build
40
+
41
+ # ── Stage 3: Install Python backend dependencies ──────────────────
42
+ FROM python:3.12-slim-bookworm AS backend-builder
43
+
44
+ COPY --from=uv-source /uv /uvx /usr/local/bin/
45
+
46
+ RUN apt-get update && apt-get install -y --no-install-recommends \
47
+ build-essential ca-certificates curl git \
48
+ && rm -rf /var/lib/apt/lists/*
49
+
50
+ WORKDIR /app
51
+ COPY --from=source /src/backend ./backend
52
+
53
+ # uv sync installs into backend/.venv (isolated from system python)
54
+ RUN cd backend && uv sync
55
+
56
+ # ── Stage 4: Runtime ─────────────────────────────────────────────
57
+ FROM python:3.12-slim-bookworm
58
+
59
+ ENV LANG=C.UTF-8 \
60
+ LC_ALL=C.UTF-8 \
61
+ PYTHONIOENCODING=utf-8 \
62
+ PYTHONUNBUFFERED=1
63
+
64
+ ARG NODE_MAJOR=22
65
+
66
+ # Install: Node.js (for Next.js runtime), nginx (reverse proxy), runtime tools
67
+ RUN apt-get update && apt-get install -y --no-install-recommends \
68
+ curl ca-certificates gnupg nginx jq \
69
+ && mkdir -p /etc/apt/keyrings \
70
+ && curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key \
71
+ | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg \
72
+ && echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] \
73
+ https://deb.nodesource.com/node_${NODE_MAJOR}.x nodistro main" \
74
+ > /etc/apt/sources.list.d/nodesource.list \
75
+ && apt-get update && apt-get install -y --no-install-recommends nodejs \
76
+ && pip3 install --no-cache-dir --break-system-packages huggingface_hub pyyaml \
77
+ && rm -rf /var/lib/apt/lists/*
78
+
79
+ # pnpm for `pnpm start` in Next.js runtime
80
+ RUN corepack enable && corepack install -g pnpm@10.26.2
81
+
82
+ # uv for backend startup
83
+ COPY --from=uv-source /uv /uvx /usr/local/bin/
84
+
85
+ # ── Create non-root user UID=1000 (required by HF Spaces) ────────
86
+ RUN useradd -m -u 1000 -s /bin/bash user && \
87
+ mkdir -p \
88
+ /app/backend \
89
+ /app/frontend \
90
+ /app/skills \
91
+ /app/data \
92
+ /tmp/nginx-tmp && \
93
+ chown -R 1000:1000 /app /tmp/nginx-tmp && \
94
+ # nginx non-root: redirect all temp/pid/log paths to writable dirs
95
+ chown -R 1000:1000 /var/log/nginx /var/lib/nginx 2>/dev/null || true
96
+
97
+ # ── Copy built artifacts ──────────────────────────────────────────
98
+ # Backend: Python source + pre-built .venv from uv sync
99
+ COPY --from=backend-builder --chown=1000:1000 /app/backend /app/backend
100
+ # Skills directory (read-only agent skills)
101
+ COPY --from=source --chown=1000:1000 /src/skills /app/skills
102
+ # Config template (used to generate config.yaml at startup)
103
+ COPY --from=source --chown=1000:1000 /src/config.example.yaml /app/config.example.yaml
104
+ # Frontend: built .next + node_modules (pnpm hard links — self-contained after COPY)
105
+ COPY --from=frontend-builder --chown=1000:1000 /app/frontend /app/frontend
106
+
107
+ # ── Copy HuggingDeer runtime scripts ─────────────────────────────
108
+ COPY --chown=1000:1000 nginx.conf /etc/nginx/nginx.conf
109
+ COPY --chown=1000:1000 start.sh /app/start.sh
110
+ COPY --chown=1000:1000 deer-sync.py /app/deer-sync.py
111
+
112
+ RUN chmod +x /app/start.sh /app/deer-sync.py
113
+
114
+ USER user
115
+ WORKDIR /app
116
+
117
+ EXPOSE 7860
118
+
119
+ # 120s start period: frontend build + backend uv sync + DB init takes ~60-90s on cold start
120
+ HEALTHCHECK --interval=30s --timeout=10s --start-period=120s \
121
+ CMD curl -fsS http://localhost:7860/health || exit 1
122
+
123
+ CMD ["/app/start.sh"]
README.md ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 🦌 HuggingDeer
2
+
3
+ **DeerFlow** research agent running as a self-hosted [Hugging Face Space](https://huggingface.co/spaces) (Docker).
4
+
5
+ Single-container deployment — frontend (Next.js) + backend (FastAPI) + nginx all in one image. No Docker-in-Docker, no Kubernetes.
6
+
7
+ ## Required Secrets
8
+
9
+ Set these in **Settings → Variables and Secrets** on your HF Space:
10
+
11
+ | Secret | Required | Description |
12
+ |--------|----------|-------------|
13
+ | `LLM_MODEL` | ✅ | Model in `provider/name` format (see below) |
14
+ | `LLM_API_KEY` | ✅ | API key for the chosen provider |
15
+ | `SERPER_API_KEY` | recommended | Google Search via Serper (better than DuckDuckGo) |
16
+ | `TAVILY_API_KEY` | optional | Alternative web search |
17
+ | `JINA_API_KEY` | optional | Better web page fetching |
18
+ | `AUTH_JWT_SECRET` | optional | JWT signing secret — auto-generated if not set (sessions reset on restart) |
19
+ | `HF_TOKEN` | optional | Your HF token — enables dataset backup/restore of threads |
20
+ | `BACKUP_DATASET_NAME` | optional | HF dataset repo for backup (default: `huggingdeer-backup`) |
21
+
22
+ ## LLM_MODEL format
23
+
24
+ ```
25
+ provider/model-name
26
+ ```
27
+
28
+ Examples:
29
+
30
+ ```
31
+ openai/gpt-4o
32
+ openai/gpt-4o-mini
33
+ anthropic/claude-sonnet-4-5
34
+ anthropic/claude-opus-4-5
35
+ google/gemini-2.5-flash
36
+ deepseek/deepseek-chat
37
+ deepseek/deepseek-reasoner
38
+ openrouter/anthropic/claude-3-5-sonnet
39
+ mistral/mistral-large-latest
40
+ groq/llama-3.3-70b-versatile
41
+ ```
42
+
43
+ ## Deploy to HF Spaces
44
+
45
+ 1. Duplicate this repo to your HF account as a **Docker Space**
46
+ 2. Add required secrets
47
+ 3. Space builds and starts (~5-10 min on first build)
48
+
49
+ ## Optional env vars
50
+
51
+ | Variable | Default | Description |
52
+ |----------|---------|-------------|
53
+ | `CUSTOM_BASE_URL` | — | OpenAI-compatible API base URL (for custom providers) |
54
+ | `SYNC_INTERVAL` | `600` | Seconds between HF Dataset backups |
55
+ | `BACKEND_READY_TIMEOUT` | `120` | Seconds to wait for backend startup |
56
+ | `FRONTEND_READY_TIMEOUT` | `120` | Seconds to wait for frontend startup |
57
+ | `SPACE_HOST` | auto | Set by HF Spaces automatically |
58
+
59
+ ## What runs inside
60
+
61
+ | Process | Port | Role |
62
+ |---------|------|------|
63
+ | nginx | 7860 | Public reverse proxy (routes `/api/*` → backend, `/*` → frontend) |
64
+ | uvicorn (FastAPI) | 8001 | DeerFlow gateway — agents, threads, auth |
65
+ | Next.js | 3000 | DeerFlow UI |
66
+
67
+ ## Caveats
68
+
69
+ - **No Docker sandbox**: DeerFlow's `bash` / code execution tool is disabled by default (`allow_host_bash: false`). File read/write and web search work fine.
70
+ - **Ephemeral storage**: container resets on restart. Enable `HF_TOKEN` + `BACKUP_DATASET_NAME` to persist threads.
71
+ - **Single worker**: backend runs 2 uvicorn workers. For heavy use, consider a dedicated server.
72
+
73
+ ## Source
74
+
75
+ DeerFlow: https://github.com/bytedance/deer-flow
deer-sync.py ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ HuggingDeer state sync — backup/restore DeerFlow runtime data to/from HF Dataset.
4
+
5
+ Syncs:
6
+ - deerflow.db (SQLite thread/session database)
7
+ - config.yaml (generated config, may contain user edits)
8
+ - workspace/ (agent-created files in the sandbox workspace)
9
+
10
+ Usage:
11
+ deer-sync.py restore — restore from HF Dataset on startup
12
+ deer-sync.py sync-once — push current state to HF Dataset
13
+ deer-sync.py loop — sync-once on an interval (reads SYNC_INTERVAL env)
14
+ """
15
+
16
+ import os
17
+ import sys
18
+ import time
19
+ import shutil
20
+ import tarfile
21
+ import tempfile
22
+ import logging
23
+ from pathlib import Path
24
+
25
+ logging.basicConfig(level=logging.INFO, format="%(message)s")
26
+ log = logging.getLogger(__name__)
27
+
28
+ HF_TOKEN = os.environ.get("HF_TOKEN", "")
29
+ BACKUP_REPO = os.environ.get("BACKUP_DATASET_NAME", "huggingdeer-backup")
30
+ HF_USERNAME = os.environ.get("HF_USERNAME", "")
31
+ DATA_DIR = Path(os.environ.get("DEER_FLOW_HOME", "/app/data"))
32
+ CONFIG_PATH = Path(os.environ.get("DEER_FLOW_CONFIG_PATH", DATA_DIR / "config.yaml"))
33
+ SYNC_INTERVAL = int(os.environ.get("SYNC_INTERVAL", "600"))
34
+
35
+ ARCHIVE_NAME = "deerflow-state.tar.gz"
36
+
37
+ # Files/dirs to include in the backup archive
38
+ BACKUP_TARGETS = [
39
+ DATA_DIR / "deerflow.db",
40
+ DATA_DIR / "workspace",
41
+ CONFIG_PATH,
42
+ ]
43
+
44
+
45
+ def _get_api():
46
+ """Return authenticated HfApi or raise."""
47
+ if not HF_TOKEN:
48
+ raise RuntimeError("HF_TOKEN not set")
49
+ from huggingface_hub import HfApi
50
+ return HfApi(token=HF_TOKEN)
51
+
52
+
53
+ def _resolve_repo_id(api) -> str:
54
+ """Resolve BACKUP_REPO to a full repo_id (username/repo-name)."""
55
+ if "/" in BACKUP_REPO:
56
+ return BACKUP_REPO
57
+ if HF_USERNAME:
58
+ return f"{HF_USERNAME}/{BACKUP_REPO}"
59
+ # Auto-detect from token
60
+ user = api.whoami()
61
+ return f"{user['name']}/{BACKUP_REPO}"
62
+
63
+
64
+ def _ensure_repo(api, repo_id: str):
65
+ """Create the dataset repo if it doesn't exist."""
66
+ from huggingface_hub import create_repo
67
+ try:
68
+ create_repo(
69
+ repo_id=repo_id,
70
+ repo_type="dataset",
71
+ private=True,
72
+ token=HF_TOKEN,
73
+ exist_ok=True,
74
+ )
75
+ except Exception as exc:
76
+ log.warning("Could not ensure dataset repo: %s", exc)
77
+
78
+
79
+ def _make_archive(dest: Path):
80
+ """Pack BACKUP_TARGETS into a .tar.gz archive."""
81
+ with tarfile.open(dest, "w:gz") as tar:
82
+ for target in BACKUP_TARGETS:
83
+ if target.exists():
84
+ arcname = target.relative_to(DATA_DIR.parent)
85
+ tar.add(target, arcname=str(arcname))
86
+ log.debug(" + %s", arcname)
87
+
88
+
89
+ def _extract_archive(src: Path):
90
+ """Unpack archive into DATA_DIR.parent (restores original paths)."""
91
+ extract_root = DATA_DIR.parent
92
+ with tarfile.open(src, "r:gz") as tar:
93
+ for member in tar.getmembers():
94
+ tar.extract(member, path=extract_root)
95
+ log.info("Extracted state to %s", extract_root)
96
+
97
+
98
+ def restore():
99
+ """Download and unpack the latest state archive from HF Dataset."""
100
+ if not HF_TOKEN:
101
+ log.info("No HF_TOKEN — skipping restore.")
102
+ return
103
+
104
+ try:
105
+ api = _get_api()
106
+ repo_id = _resolve_repo_id(api)
107
+ _ensure_repo(api, repo_id)
108
+
109
+ from huggingface_hub import hf_hub_download
110
+ with tempfile.TemporaryDirectory() as tmp:
111
+ try:
112
+ local = hf_hub_download(
113
+ repo_id=repo_id,
114
+ filename=ARCHIVE_NAME,
115
+ repo_type="dataset",
116
+ token=HF_TOKEN,
117
+ local_dir=tmp,
118
+ )
119
+ _extract_archive(Path(local))
120
+ log.info("State restored from %s", repo_id)
121
+ except Exception as exc:
122
+ if "404" in str(exc) or "not found" in str(exc).lower() or "does not exist" in str(exc).lower():
123
+ log.info("No existing backup found in %s — starting fresh.", repo_id)
124
+ else:
125
+ raise
126
+ except Exception as exc:
127
+ log.warning("Restore failed: %s", exc)
128
+ raise
129
+
130
+
131
+ def sync_once():
132
+ """Pack current state and upload to HF Dataset."""
133
+ if not HF_TOKEN:
134
+ return
135
+
136
+ try:
137
+ api = _get_api()
138
+ repo_id = _resolve_repo_id(api)
139
+ _ensure_repo(api, repo_id)
140
+
141
+ with tempfile.TemporaryDirectory() as tmp:
142
+ archive = Path(tmp) / ARCHIVE_NAME
143
+ _make_archive(archive)
144
+
145
+ if not archive.exists() or archive.stat().st_size == 0:
146
+ log.info("Nothing to backup — skipping upload.")
147
+ return
148
+
149
+ api.upload_file(
150
+ path_or_fileobj=str(archive),
151
+ path_in_repo=ARCHIVE_NAME,
152
+ repo_id=repo_id,
153
+ repo_type="dataset",
154
+ token=HF_TOKEN,
155
+ )
156
+ size_kb = archive.stat().st_size // 1024
157
+ log.info("State synced to %s (%d KB)", repo_id, size_kb)
158
+ except Exception as exc:
159
+ log.warning("Sync failed: %s", exc)
160
+
161
+
162
+ def loop():
163
+ """Run sync_once every SYNC_INTERVAL seconds."""
164
+ log.info("Starting periodic sync (interval: %ds)", SYNC_INTERVAL)
165
+ while True:
166
+ time.sleep(SYNC_INTERVAL)
167
+ try:
168
+ sync_once()
169
+ except Exception as exc:
170
+ log.warning("Periodic sync error: %s", exc)
171
+
172
+
173
+ if __name__ == "__main__":
174
+ cmd = sys.argv[1] if len(sys.argv) > 1 else "help"
175
+ if cmd == "restore":
176
+ restore()
177
+ elif cmd == "sync-once":
178
+ sync_once()
179
+ elif cmd == "loop":
180
+ loop()
181
+ else:
182
+ print(__doc__)
183
+ sys.exit(1)
nginx.conf ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ events {
2
+ worker_connections 1024;
3
+ }
4
+
5
+ # Non-root nginx: all paths redirected to /tmp
6
+ pid /tmp/nginx.pid;
7
+ error_log /tmp/nginx-error.log warn;
8
+
9
+ http {
10
+ # Non-root temp dirs
11
+ client_body_temp_path /tmp/nginx-tmp/client;
12
+ proxy_temp_path /tmp/nginx-tmp/proxy;
13
+ fastcgi_temp_path /tmp/nginx-tmp/fastcgi;
14
+ uwsgi_temp_path /tmp/nginx-tmp/uwsgi;
15
+ scgi_temp_path /tmp/nginx-tmp/scgi;
16
+
17
+ access_log /dev/stdout;
18
+ error_log /dev/stderr warn;
19
+
20
+ sendfile on;
21
+ tcp_nopush on;
22
+ tcp_nodelay on;
23
+ keepalive_timeout 65;
24
+
25
+ # ── DeerFlow on HF Spaces ─────────────────────────────────────
26
+ server {
27
+ listen 7860 default_server;
28
+ server_name _;
29
+
30
+ # Allow 100 MB uploads (thread file attachments)
31
+ client_max_body_size 100M;
32
+
33
+ # HF Spaces embeds the app in an iframe — must allow framing
34
+ add_header X-Frame-Options "ALLOWALL" always;
35
+ add_header Content-Security-Policy "frame-ancestors *" always;
36
+
37
+ # CORS: strip upstream headers to avoid duplicates, then re-add
38
+ proxy_hide_header Access-Control-Allow-Origin;
39
+ proxy_hide_header Access-Control-Allow-Methods;
40
+ proxy_hide_header Access-Control-Allow-Headers;
41
+ proxy_hide_header Access-Control-Allow-Credentials;
42
+
43
+ add_header Access-Control-Allow-Origin "*" always;
44
+ add_header Access-Control-Allow-Methods "GET, POST, PUT, DELETE, PATCH, OPTIONS" always;
45
+ add_header Access-Control-Allow-Headers "*" always;
46
+
47
+ # CORS preflight
48
+ if ($request_method = OPTIONS) {
49
+ return 204;
50
+ }
51
+
52
+ # ── LangGraph-compatible API (rewrites /api/langgraph/* → /api/*) ──
53
+ # The backend exposes /api/* natively; the /api/langgraph/ prefix is a
54
+ # public-facing alias used by the Next.js client and LangGraph SDK.
55
+ location /api/langgraph/ {
56
+ rewrite ^/api/langgraph/(.*) /api/$1 break;
57
+ proxy_pass http://127.0.0.1:8001;
58
+ proxy_http_version 1.1;
59
+ proxy_set_header Host $host;
60
+ proxy_set_header X-Real-IP $remote_addr;
61
+ proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
62
+ proxy_set_header X-Forwarded-Proto $scheme;
63
+ proxy_set_header Connection "";
64
+
65
+ # SSE / streaming (agent responses are streamed as server-sent events)
66
+ proxy_buffering off;
67
+ proxy_cache off;
68
+ proxy_set_header X-Accel-Buffering no;
69
+ chunked_transfer_encoding on;
70
+
71
+ proxy_connect_timeout 600s;
72
+ proxy_send_timeout 600s;
73
+ proxy_read_timeout 600s;
74
+ }
75
+
76
+ # ── Health check ──────────────────────────────────────────
77
+ location = /health {
78
+ proxy_pass http://127.0.0.1:8001/health;
79
+ proxy_http_version 1.1;
80
+ proxy_set_header Host $host;
81
+ }
82
+
83
+ # ── API docs (Swagger / ReDoc / OpenAPI) ──────────────────
84
+ location ~ ^/(docs|redoc|openapi\.json)$ {
85
+ proxy_pass http://127.0.0.1:8001;
86
+ proxy_http_version 1.1;
87
+ proxy_set_header Host $host;
88
+ }
89
+
90
+ # ── Thread file uploads (large body, no buffering) ────────
91
+ location ~ ^/api/threads/[^/]+/uploads {
92
+ proxy_pass http://127.0.0.1:8001;
93
+ proxy_http_version 1.1;
94
+ proxy_set_header Host $host;
95
+ proxy_set_header X-Real-IP $remote_addr;
96
+ proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
97
+ proxy_set_header X-Forwarded-Proto $scheme;
98
+ proxy_request_buffering off;
99
+ client_max_body_size 100M;
100
+ }
101
+
102
+ # ── All remaining /api/* routes → backend ─────────────────
103
+ location /api/ {
104
+ proxy_pass http://127.0.0.1:8001;
105
+ proxy_http_version 1.1;
106
+ proxy_set_header Host $host;
107
+ proxy_set_header X-Real-IP $remote_addr;
108
+ proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
109
+ proxy_set_header X-Forwarded-Proto $scheme;
110
+ proxy_set_header Connection "";
111
+
112
+ # SSE support for all streaming API routes
113
+ proxy_buffering off;
114
+ proxy_cache off;
115
+ proxy_set_header X-Accel-Buffering no;
116
+ chunked_transfer_encoding on;
117
+
118
+ proxy_connect_timeout 600s;
119
+ proxy_send_timeout 600s;
120
+ proxy_read_timeout 600s;
121
+ }
122
+
123
+ # ── All other requests → Next.js frontend ─────────────────
124
+ location / {
125
+ proxy_pass http://127.0.0.1:3000;
126
+ proxy_http_version 1.1;
127
+ proxy_set_header Host $host;
128
+ proxy_set_header X-Real-IP $remote_addr;
129
+ proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
130
+ proxy_set_header X-Forwarded-Proto $scheme;
131
+ proxy_set_header Upgrade $http_upgrade;
132
+ proxy_set_header Connection "upgrade";
133
+ proxy_cache_bypass $http_upgrade;
134
+
135
+ proxy_connect_timeout 600s;
136
+ proxy_send_timeout 600s;
137
+ proxy_read_timeout 600s;
138
+ }
139
+ }
140
+ }
start.sh ADDED
@@ -0,0 +1,417 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ set -euo pipefail
3
+ umask 0077
4
+
5
+ # ════════════════════════════════════════════════════════════════
6
+ # HuggingDeer — DeerFlow on Hugging Face Spaces
7
+ # ════════════════════════════════════════════════════════════════
8
+
9
+ APP_DIR="/app"
10
+ DATA_DIR="${DEER_FLOW_HOME:-/app/data}"
11
+ CONFIG_PATH="${DEER_FLOW_CONFIG_PATH:-$DATA_DIR/config.yaml}"
12
+ BACKEND_PORT="${BACKEND_PORT:-8001}"
13
+ FRONTEND_PORT="${FRONTEND_PORT:-3000}"
14
+ PUBLIC_PORT="${PORT:-7860}"
15
+ SYNC_INTERVAL="${SYNC_INTERVAL:-600}"
16
+ BACKEND_READY_TIMEOUT="${BACKEND_READY_TIMEOUT:-120}"
17
+ FRONTEND_READY_TIMEOUT="${FRONTEND_READY_TIMEOUT:-120}"
18
+
19
+ # Export shell vars so inline Python scripts can read them via os.environ
20
+ export DATA_DIR CONFIG_PATH BACKUP_DATASET_NAME SYNC_INTERVAL
21
+ export DEER_FLOW_HOME="$DATA_DIR"
22
+ export DEER_FLOW_CONFIG_PATH="$CONFIG_PATH"
23
+ export DEER_FLOW_SKILLS_PATH="/app/skills"
24
+
25
+ echo ""
26
+ echo " ╔══════════════════════════════════════════╗"
27
+ echo " ║ 🦌 HuggingDeer — DeerFlow ║"
28
+ echo " ╚══════════════════════════════════════════╝"
29
+ echo ""
30
+
31
+ # ── Required env validation ───────────────────────────────────────
32
+ ERRORS=""
33
+ if [ -z "${LLM_MODEL:-}" ]; then
34
+ ERRORS="${ERRORS} - LLM_MODEL is not set (e.g. openai/gpt-4o, anthropic/claude-sonnet-4-5)\n"
35
+ fi
36
+ if [ -z "${LLM_API_KEY:-}" ]; then
37
+ ERRORS="${ERRORS} - LLM_API_KEY is not set\n"
38
+ fi
39
+ if [ -n "$ERRORS" ]; then
40
+ echo "Missing required secrets:"
41
+ printf "%b" "$ERRORS"
42
+ echo ""
43
+ echo "Add them in HF Spaces → Settings → Secrets"
44
+ exit 1
45
+ fi
46
+
47
+ # ── Setup runtime directories ─────────────────────────────────────
48
+ mkdir -p \
49
+ "$DATA_DIR" \
50
+ "$DATA_DIR/threads" \
51
+ "$DATA_DIR/uploads" \
52
+ "$DATA_DIR/workspace" \
53
+ "$DATA_DIR/logs" \
54
+ /tmp/nginx-tmp/client \
55
+ /tmp/nginx-tmp/proxy \
56
+ /tmp/nginx-tmp/fastcgi \
57
+ /tmp/nginx-tmp/uwsgi \
58
+ /tmp/nginx-tmp/scgi
59
+
60
+ # ── Provider → env var + langchain class mapping ──────────────────
61
+ # Parse LLM_MODEL in format "provider/model-name" (e.g. "openai/gpt-4o")
62
+ LLM_PROVIDER=$(echo "$LLM_MODEL" | cut -d'/' -f1)
63
+ LLM_MODEL_NAME=$(echo "$LLM_MODEL" | cut -d'/' -f2-)
64
+
65
+ # Resolve provider-specific settings
66
+ LANGCHAIN_CLASS="langchain_openai:ChatOpenAI"
67
+ API_KEY_FIELD="api_key"
68
+ MODEL_BASE_URL=""
69
+ SUPPORTS_THINKING="false"
70
+
71
+ case "$LLM_PROVIDER" in
72
+ anthropic)
73
+ export ANTHROPIC_API_KEY="${ANTHROPIC_API_KEY:-$LLM_API_KEY}"
74
+ LANGCHAIN_CLASS="langchain_anthropic:ChatAnthropic"
75
+ API_KEY_FIELD="api_key"
76
+ SUPPORTS_THINKING="true"
77
+ ;;
78
+ google|gemini)
79
+ export GEMINI_API_KEY="${GEMINI_API_KEY:-$LLM_API_KEY}"
80
+ export GOOGLE_API_KEY="${GOOGLE_API_KEY:-$LLM_API_KEY}"
81
+ LANGCHAIN_CLASS="langchain_google_genai:ChatGoogleGenerativeAI"
82
+ API_KEY_FIELD="gemini_api_key"
83
+ LLM_MODEL_NAME="${LLM_MODEL_NAME:-$LLM_PROVIDER}"
84
+ SUPPORTS_THINKING="true"
85
+ ;;
86
+ deepseek)
87
+ export DEEPSEEK_API_KEY="${DEEPSEEK_API_KEY:-$LLM_API_KEY}"
88
+ LANGCHAIN_CLASS="deerflow.models.patched_deepseek:PatchedChatDeepSeek"
89
+ API_KEY_FIELD="api_key"
90
+ MODEL_BASE_URL="https://api.deepseek.com/v1"
91
+ SUPPORTS_THINKING="true"
92
+ ;;
93
+ openrouter)
94
+ export OPENROUTER_API_KEY="${OPENROUTER_API_KEY:-$LLM_API_KEY}"
95
+ export OPENAI_API_KEY="${OPENAI_API_KEY:-$LLM_API_KEY}"
96
+ LANGCHAIN_CLASS="langchain_openai:ChatOpenAI"
97
+ API_KEY_FIELD="api_key"
98
+ MODEL_BASE_URL="https://openrouter.ai/api/v1"
99
+ # OpenRouter model names include provider prefix (e.g. anthropic/claude-3-5-sonnet)
100
+ LLM_MODEL_NAME="$LLM_MODEL"
101
+ ;;
102
+ qwen|dashscope|alibaba)
103
+ export DASHSCOPE_API_KEY="${DASHSCOPE_API_KEY:-$LLM_API_KEY}"
104
+ LANGCHAIN_CLASS="langchain_openai:ChatOpenAI"
105
+ API_KEY_FIELD="api_key"
106
+ MODEL_BASE_URL="https://dashscope.aliyuncs.com/compatible-mode/v1"
107
+ ;;
108
+ moonshot|kimi)
109
+ export MOONSHOT_API_KEY="${MOONSHOT_API_KEY:-$LLM_API_KEY}"
110
+ LANGCHAIN_CLASS="langchain_openai:ChatOpenAI"
111
+ API_KEY_FIELD="api_key"
112
+ MODEL_BASE_URL="https://api.moonshot.cn/v1"
113
+ ;;
114
+ mistral)
115
+ export MISTRAL_API_KEY="${MISTRAL_API_KEY:-$LLM_API_KEY}"
116
+ LANGCHAIN_CLASS="langchain_openai:ChatOpenAI"
117
+ API_KEY_FIELD="api_key"
118
+ MODEL_BASE_URL="https://api.mistral.ai/v1"
119
+ ;;
120
+ xai|grok)
121
+ export XAI_API_KEY="${XAI_API_KEY:-$LLM_API_KEY}"
122
+ LANGCHAIN_CLASS="langchain_openai:ChatOpenAI"
123
+ API_KEY_FIELD="api_key"
124
+ MODEL_BASE_URL="https://api.x.ai/v1"
125
+ ;;
126
+ groq)
127
+ export GROQ_API_KEY="${GROQ_API_KEY:-$LLM_API_KEY}"
128
+ LANGCHAIN_CLASS="langchain_openai:ChatOpenAI"
129
+ API_KEY_FIELD="api_key"
130
+ MODEL_BASE_URL="https://api.groq.com/openai/v1"
131
+ ;;
132
+ openai|*)
133
+ export OPENAI_API_KEY="${OPENAI_API_KEY:-$LLM_API_KEY}"
134
+ LANGCHAIN_CLASS="langchain_openai:ChatOpenAI"
135
+ API_KEY_FIELD="api_key"
136
+ ;;
137
+ esac
138
+
139
+ # Custom OpenAI-compatible provider override
140
+ if [ -n "${CUSTOM_BASE_URL:-}" ]; then
141
+ export OPENAI_API_KEY="${OPENAI_API_KEY:-$LLM_API_KEY}"
142
+ LANGCHAIN_CLASS="langchain_openai:ChatOpenAI"
143
+ API_KEY_FIELD="api_key"
144
+ MODEL_BASE_URL="$CUSTOM_BASE_URL"
145
+ fi
146
+
147
+ export LLM_PROVIDER LLM_MODEL_NAME LANGCHAIN_CLASS API_KEY_FIELD MODEL_BASE_URL SUPPORTS_THINKING
148
+ export SERPER_API_KEY="${SERPER_API_KEY:-}"
149
+ export TAVILY_API_KEY="${TAVILY_API_KEY:-}"
150
+ export JINA_API_KEY="${JINA_API_KEY:-}"
151
+
152
+ # ── Restore from HF Dataset (if configured) ───────────────────────
153
+ if [ -n "${HF_TOKEN:-}" ]; then
154
+ echo "Restoring state from HF Dataset..."
155
+ python3 "$APP_DIR/deer-sync.py" restore || echo "Warning: restore failed, starting fresh."
156
+ else
157
+ echo "HF_TOKEN not set — running without dataset persistence."
158
+ fi
159
+
160
+ # ── Generate config.yaml ──────────────────────────────────────────
161
+ echo "Generating config.yaml..."
162
+ python3 - <<'PYEOF'
163
+ import os, yaml
164
+ from pathlib import Path
165
+
166
+ data_dir = Path(os.environ["DATA_DIR"])
167
+ config_path = Path(os.environ["CONFIG_PATH"])
168
+
169
+ # Load example config as base if no user config exists
170
+ if not config_path.exists():
171
+ example = Path("/app/config.example.yaml")
172
+ if example.exists():
173
+ base = yaml.safe_load(example.read_text()) or {}
174
+ else:
175
+ base = {}
176
+ else:
177
+ base = yaml.safe_load(config_path.read_text()) or {}
178
+
179
+ model_name = os.environ["LLM_MODEL_NAME"]
180
+ lc_class = os.environ["LANGCHAIN_CLASS"]
181
+ api_key_field = os.environ["API_KEY_FIELD"]
182
+ base_url = os.environ.get("MODEL_BASE_URL", "")
183
+ llm_api_key = os.environ.get("LLM_API_KEY", "")
184
+ thinking = os.environ.get("SUPPORTS_THINKING", "false").lower() == "true"
185
+
186
+ # Build model entry
187
+ model_entry = {
188
+ "name": model_name,
189
+ "display_name": model_name,
190
+ "use": lc_class,
191
+ "model": model_name,
192
+ api_key_field: llm_api_key,
193
+ "request_timeout": 600.0,
194
+ "max_retries": 2,
195
+ "max_tokens": 8192,
196
+ }
197
+ if base_url:
198
+ model_entry["base_url"] = base_url
199
+ if thinking:
200
+ model_entry["supports_thinking"] = True
201
+
202
+ # Override models section with our single configured model
203
+ base["models"] = [model_entry]
204
+
205
+ # Sandbox: local (no Docker on HF Spaces)
206
+ base.setdefault("sandbox", {})
207
+ base["sandbox"]["use"] = "deerflow.sandbox.local:LocalSandboxProvider"
208
+ base["sandbox"]["allow_host_bash"] = False
209
+
210
+ # Search tools: prefer Serper > Tavily > DuckDuckGo (default)
211
+ serper_key = os.environ.get("SERPER_API_KEY", "")
212
+ tavily_key = os.environ.get("TAVILY_API_KEY", "")
213
+
214
+ if serper_key:
215
+ web_search_tool = {
216
+ "name": "web_search", "group": "web",
217
+ "use": "deerflow.community.serper.tools:web_search_tool",
218
+ "max_results": 5, "api_key": serper_key,
219
+ }
220
+ elif tavily_key:
221
+ web_search_tool = {
222
+ "name": "web_search", "group": "web",
223
+ "use": "deerflow.community.tavily.tools:web_search_tool",
224
+ "max_results": 5, "api_key": tavily_key,
225
+ }
226
+ else:
227
+ web_search_tool = {
228
+ "name": "web_search", "group": "web",
229
+ "use": "deerflow.community.ddg_search.tools:web_search_tool",
230
+ "max_results": 5,
231
+ }
232
+
233
+ # Preserve existing tool list, replacing web_search entry
234
+ existing_tools = base.get("tools", [])
235
+ other_tools = [t for t in existing_tools if t.get("name") != "web_search"]
236
+ base["tools"] = [web_search_tool] + other_tools
237
+
238
+ # Jina AI web_fetch (no key needed for basic usage)
239
+ jina_key = os.environ.get("JINA_API_KEY", "")
240
+ has_web_fetch = any(t.get("name") == "web_fetch" for t in base["tools"])
241
+ if not has_web_fetch:
242
+ web_fetch_entry = {
243
+ "name": "web_fetch", "group": "web",
244
+ "use": "deerflow.community.jina_ai.tools:web_fetch_tool",
245
+ "timeout": 15,
246
+ }
247
+ if jina_key:
248
+ web_fetch_entry["api_key"] = jina_key
249
+ base["tools"].append(web_fetch_entry)
250
+
251
+ # Persistence: SQLite in data dir
252
+ base.setdefault("database", {})
253
+ base["database"].setdefault("backend", "sqlite")
254
+ # Database file lives in DATA_DIR (persisted via HF Dataset sync)
255
+ db_path = str(data_dir / "deerflow.db")
256
+ base["database"].setdefault("url", f"sqlite+aiosqlite:///{db_path}")
257
+
258
+ # Skills path
259
+ base.setdefault("skills", {})
260
+ base["skills"]["path"] = "/app/skills"
261
+
262
+ # CORS: allow HF Space URL + localhost
263
+ space_host = os.environ.get("SPACE_HOST", "")
264
+ cors_origins = ["http://localhost:3000", "http://localhost:7860"]
265
+ if space_host:
266
+ cors_origins.append(f"https://{space_host}")
267
+
268
+ # Set via env (picked up by gateway config loader)
269
+ os.environ["CORS_ORIGINS"] = ",".join(cors_origins)
270
+
271
+ config_path.parent.mkdir(parents=True, exist_ok=True)
272
+ config_path.write_text(yaml.safe_dump(base, sort_keys=False, allow_unicode=True))
273
+ config_path.chmod(0o600)
274
+ print(f"Config written to {config_path}")
275
+ PYEOF
276
+
277
+ # ── CORS origins env for backend ─────────────────────────────────
278
+ SPACE_HOST="${SPACE_HOST:-}"
279
+ if [ -n "$SPACE_HOST" ]; then
280
+ export CORS_ORIGINS="${CORS_ORIGINS:-http://localhost:3000,http://localhost:7860,https://$SPACE_HOST}"
281
+ else
282
+ export CORS_ORIGINS="${CORS_ORIGINS:-http://localhost:3000,http://localhost:7860}"
283
+ fi
284
+
285
+ # ── Startup summary ───────────────────────────────────────────────
286
+ echo ""
287
+ echo "Model : $LLM_MODEL"
288
+ echo "Provider : $LLM_PROVIDER"
289
+ echo "Data dir : $DATA_DIR"
290
+ if [ -n "${SERPER_API_KEY:-}" ]; then
291
+ echo "Search : Serper (Google)"
292
+ elif [ -n "${TAVILY_API_KEY:-}" ]; then
293
+ echo "Search : Tavily"
294
+ else
295
+ echo "Search : DuckDuckGo (no API key)"
296
+ fi
297
+ if [ -n "${HF_TOKEN:-}" ]; then
298
+ echo "Backup : ${BACKUP_DATASET_NAME:-huggingdeer-backup} (every ${SYNC_INTERVAL}s)"
299
+ else
300
+ echo "Backup : disabled"
301
+ fi
302
+ if [ -n "$SPACE_HOST" ]; then
303
+ echo "URL : https://$SPACE_HOST"
304
+ fi
305
+ echo ""
306
+
307
+ # ── Graceful shutdown ─────────────────────────────────────────────
308
+ graceful_shutdown() {
309
+ echo "Shutting down HuggingDeer..."
310
+ if [ -n "${HF_TOKEN:-}" ]; then
311
+ echo "Saving state to HF Dataset..."
312
+ python3 "$APP_DIR/deer-sync.py" sync-once || echo "Warning: shutdown sync failed."
313
+ fi
314
+ # Stop nginx daemon (nginx -s quit = graceful drain)
315
+ nginx -s quit 2>/dev/null || true
316
+ # Stop background shell jobs (backend, frontend, sync loop)
317
+ kill $(jobs -p) 2>/dev/null || true
318
+ sleep 2
319
+ exit 0
320
+ }
321
+ trap graceful_shutdown SIGTERM SIGINT
322
+
323
+ # ── Start nginx ───────────────────────────────────────────────────
324
+ echo "Starting nginx on port $PUBLIC_PORT..."
325
+ # Validate config first
326
+ nginx -t 2>/dev/null && nginx || {
327
+ echo "nginx config error:"
328
+ nginx -t
329
+ exit 1
330
+ }
331
+
332
+ # ── Start backend (uvicorn) ───────────────────────────────────────
333
+ echo "Starting DeerFlow backend on port $BACKEND_PORT..."
334
+ (
335
+ cd "$APP_DIR/backend" && \
336
+ PYTHONPATH=. \
337
+ uv run --no-sync \
338
+ uvicorn app.gateway.app:app \
339
+ --host 127.0.0.1 \
340
+ --port "$BACKEND_PORT" \
341
+ --workers 2 \
342
+ 2>&1 | tee -a "$DATA_DIR/logs/backend.log"
343
+ ) &
344
+ BACKEND_PID=$!
345
+
346
+ # Wait for backend to be ready
347
+ echo "Waiting for backend..."
348
+ ready=false
349
+ for ((i=0; i<BACKEND_READY_TIMEOUT; i++)); do
350
+ if (echo > "/dev/tcp/127.0.0.1/$BACKEND_PORT") 2>/dev/null; then
351
+ ready=true
352
+ break
353
+ fi
354
+ if ! kill -0 "$BACKEND_PID" 2>/dev/null; then
355
+ echo "Backend process died. Last 30 log lines:"
356
+ echo "────────────────────────────────────────"
357
+ tail -30 "$DATA_DIR/logs/backend.log" || true
358
+ exit 1
359
+ fi
360
+ sleep 1
361
+ done
362
+ if [ "$ready" != "true" ]; then
363
+ echo "Backend failed to start within ${BACKEND_READY_TIMEOUT}s. Last 30 log lines:"
364
+ tail -30 "$DATA_DIR/logs/backend.log" || true
365
+ exit 1
366
+ fi
367
+ echo "Backend ready."
368
+
369
+ # ── Start frontend (Next.js) ──────────────────────────────────────
370
+ echo "Starting Next.js frontend on port $FRONTEND_PORT..."
371
+ (
372
+ cd "$APP_DIR/frontend" && \
373
+ DEER_FLOW_INTERNAL_GATEWAY_BASE_URL="http://127.0.0.1:$BACKEND_PORT" \
374
+ PORT="$FRONTEND_PORT" \
375
+ node node_modules/.bin/next start -p "$FRONTEND_PORT" \
376
+ 2>&1 | tee -a "$DATA_DIR/logs/frontend.log"
377
+ ) &
378
+ FRONTEND_PID=$!
379
+
380
+ # Wait for frontend
381
+ echo "Waiting for frontend..."
382
+ ready=false
383
+ for ((i=0; i<FRONTEND_READY_TIMEOUT; i++)); do
384
+ if (echo > "/dev/tcp/127.0.0.1/$FRONTEND_PORT") 2>/dev/null; then
385
+ ready=true
386
+ break
387
+ fi
388
+ if ! kill -0 "$FRONTEND_PID" 2>/dev/null; then
389
+ echo "Frontend process died. Last 30 log lines:"
390
+ echo "────────────────────────────────────────"
391
+ tail -30 "$DATA_DIR/logs/frontend.log" || true
392
+ exit 1
393
+ fi
394
+ sleep 1
395
+ done
396
+ if [ "$ready" != "true" ]; then
397
+ echo "Frontend failed to start within ${FRONTEND_READY_TIMEOUT}s. Last 30 log lines:"
398
+ tail -30 "$DATA_DIR/logs/frontend.log" || true
399
+ exit 1
400
+ fi
401
+ echo "Frontend ready."
402
+ echo ""
403
+ echo "HuggingDeer is up ✓ → http://localhost:$PUBLIC_PORT"
404
+ echo ""
405
+
406
+ # ── Periodic HF Dataset sync ──────────────────────────────────────
407
+ if [ -n "${HF_TOKEN:-}" ]; then
408
+ (
409
+ while true; do
410
+ sleep "$SYNC_INTERVAL"
411
+ python3 "$APP_DIR/deer-sync.py" sync-once 2>/dev/null || true
412
+ done
413
+ ) &
414
+ fi
415
+
416
+ # ── Wait for backend (primary process) ───────────────────────────
417
+ wait "$BACKEND_PID"