Spaces:
Running
Running
Commit ·
5d8d23e
0
Parent(s):
init: HuggingDeer — DeerFlow on Hugging Face Spaces
Browse filesSingle-container Docker deployment of DeerFlow (frontend + backend + nginx).
Clones deer-flow source at build time, builds Next.js and Python backend,
runs all three services inside one container on port 7860.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
- Dockerfile +123 -0
- README.md +75 -0
- deer-sync.py +183 -0
- nginx.conf +140 -0
- start.sh +417 -0
Dockerfile
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ════════════════════════════════════════════════════════════════
|
| 2 |
+
# HuggingDeer — DeerFlow Research Agent for Hugging Face Spaces
|
| 3 |
+
# ════════════════════════════════════════════════════════════════
|
| 4 |
+
#
|
| 5 |
+
# Single-container deployment of DeerFlow (frontend + backend + nginx)
|
| 6 |
+
# on port 7860 as required by HF Spaces Docker runtime.
|
| 7 |
+
#
|
| 8 |
+
# Build args:
|
| 9 |
+
# DEER_FLOW_REF — git ref to clone (branch/tag/sha, default: main)
|
| 10 |
+
# UV_IMAGE — uv tool image (default: ghcr.io/astral-sh/uv:0.7.20)
|
| 11 |
+
# NODE_MAJOR — Node.js major version (default: 22)
|
| 12 |
+
|
| 13 |
+
ARG UV_IMAGE=ghcr.io/astral-sh/uv:0.7.20
|
| 14 |
+
ARG DEER_FLOW_REF=main
|
| 15 |
+
|
| 16 |
+
# ── uv source ────────────────────────────────────────────────────
|
| 17 |
+
FROM ${UV_IMAGE} AS uv-source
|
| 18 |
+
|
| 19 |
+
# ── Stage 1: Clone DeerFlow source ───────────────────────────────
|
| 20 |
+
FROM alpine/git:latest AS source
|
| 21 |
+
ARG DEER_FLOW_REF
|
| 22 |
+
RUN git clone --depth=1 \
|
| 23 |
+
https://github.com/bytedance/deer-flow.git /src && \
|
| 24 |
+
cd /src && \
|
| 25 |
+
git log --oneline -1
|
| 26 |
+
|
| 27 |
+
# ── Stage 2: Build Next.js frontend ──────────────────────────────
|
| 28 |
+
FROM node:22-alpine AS frontend-builder
|
| 29 |
+
|
| 30 |
+
RUN corepack enable && corepack install -g pnpm@10.26.2
|
| 31 |
+
|
| 32 |
+
WORKDIR /app
|
| 33 |
+
COPY --from=source /src/frontend ./frontend
|
| 34 |
+
|
| 35 |
+
# pnpm virtual store uses hard links — COPY in later stages works correctly
|
| 36 |
+
RUN cd frontend && pnpm install --frozen-lockfile
|
| 37 |
+
|
| 38 |
+
# SKIP_ENV_VALIDATION=1 bypasses t3-oss env checks (no secrets at build time)
|
| 39 |
+
RUN cd frontend && SKIP_ENV_VALIDATION=1 pnpm build
|
| 40 |
+
|
| 41 |
+
# ── Stage 3: Install Python backend dependencies ──────────────────
|
| 42 |
+
FROM python:3.12-slim-bookworm AS backend-builder
|
| 43 |
+
|
| 44 |
+
COPY --from=uv-source /uv /uvx /usr/local/bin/
|
| 45 |
+
|
| 46 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 47 |
+
build-essential ca-certificates curl git \
|
| 48 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 49 |
+
|
| 50 |
+
WORKDIR /app
|
| 51 |
+
COPY --from=source /src/backend ./backend
|
| 52 |
+
|
| 53 |
+
# uv sync installs into backend/.venv (isolated from system python)
|
| 54 |
+
RUN cd backend && uv sync
|
| 55 |
+
|
| 56 |
+
# ── Stage 4: Runtime ─────────────────────────────────────────────
|
| 57 |
+
FROM python:3.12-slim-bookworm
|
| 58 |
+
|
| 59 |
+
ENV LANG=C.UTF-8 \
|
| 60 |
+
LC_ALL=C.UTF-8 \
|
| 61 |
+
PYTHONIOENCODING=utf-8 \
|
| 62 |
+
PYTHONUNBUFFERED=1
|
| 63 |
+
|
| 64 |
+
ARG NODE_MAJOR=22
|
| 65 |
+
|
| 66 |
+
# Install: Node.js (for Next.js runtime), nginx (reverse proxy), runtime tools
|
| 67 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 68 |
+
curl ca-certificates gnupg nginx jq \
|
| 69 |
+
&& mkdir -p /etc/apt/keyrings \
|
| 70 |
+
&& curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key \
|
| 71 |
+
| gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg \
|
| 72 |
+
&& echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] \
|
| 73 |
+
https://deb.nodesource.com/node_${NODE_MAJOR}.x nodistro main" \
|
| 74 |
+
> /etc/apt/sources.list.d/nodesource.list \
|
| 75 |
+
&& apt-get update && apt-get install -y --no-install-recommends nodejs \
|
| 76 |
+
&& pip3 install --no-cache-dir --break-system-packages huggingface_hub pyyaml \
|
| 77 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 78 |
+
|
| 79 |
+
# pnpm for `pnpm start` in Next.js runtime
|
| 80 |
+
RUN corepack enable && corepack install -g pnpm@10.26.2
|
| 81 |
+
|
| 82 |
+
# uv for backend startup
|
| 83 |
+
COPY --from=uv-source /uv /uvx /usr/local/bin/
|
| 84 |
+
|
| 85 |
+
# ── Create non-root user UID=1000 (required by HF Spaces) ────────
|
| 86 |
+
RUN useradd -m -u 1000 -s /bin/bash user && \
|
| 87 |
+
mkdir -p \
|
| 88 |
+
/app/backend \
|
| 89 |
+
/app/frontend \
|
| 90 |
+
/app/skills \
|
| 91 |
+
/app/data \
|
| 92 |
+
/tmp/nginx-tmp && \
|
| 93 |
+
chown -R 1000:1000 /app /tmp/nginx-tmp && \
|
| 94 |
+
# nginx non-root: redirect all temp/pid/log paths to writable dirs
|
| 95 |
+
chown -R 1000:1000 /var/log/nginx /var/lib/nginx 2>/dev/null || true
|
| 96 |
+
|
| 97 |
+
# ── Copy built artifacts ──────────────────────────────────────────
|
| 98 |
+
# Backend: Python source + pre-built .venv from uv sync
|
| 99 |
+
COPY --from=backend-builder --chown=1000:1000 /app/backend /app/backend
|
| 100 |
+
# Skills directory (read-only agent skills)
|
| 101 |
+
COPY --from=source --chown=1000:1000 /src/skills /app/skills
|
| 102 |
+
# Config template (used to generate config.yaml at startup)
|
| 103 |
+
COPY --from=source --chown=1000:1000 /src/config.example.yaml /app/config.example.yaml
|
| 104 |
+
# Frontend: built .next + node_modules (pnpm hard links — self-contained after COPY)
|
| 105 |
+
COPY --from=frontend-builder --chown=1000:1000 /app/frontend /app/frontend
|
| 106 |
+
|
| 107 |
+
# ── Copy HuggingDeer runtime scripts ─────────────────────────────
|
| 108 |
+
COPY --chown=1000:1000 nginx.conf /etc/nginx/nginx.conf
|
| 109 |
+
COPY --chown=1000:1000 start.sh /app/start.sh
|
| 110 |
+
COPY --chown=1000:1000 deer-sync.py /app/deer-sync.py
|
| 111 |
+
|
| 112 |
+
RUN chmod +x /app/start.sh /app/deer-sync.py
|
| 113 |
+
|
| 114 |
+
USER user
|
| 115 |
+
WORKDIR /app
|
| 116 |
+
|
| 117 |
+
EXPOSE 7860
|
| 118 |
+
|
| 119 |
+
# 120s start period: frontend build + backend uv sync + DB init takes ~60-90s on cold start
|
| 120 |
+
HEALTHCHECK --interval=30s --timeout=10s --start-period=120s \
|
| 121 |
+
CMD curl -fsS http://localhost:7860/health || exit 1
|
| 122 |
+
|
| 123 |
+
CMD ["/app/start.sh"]
|
README.md
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 🦌 HuggingDeer
|
| 2 |
+
|
| 3 |
+
**DeerFlow** research agent running as a self-hosted [Hugging Face Space](https://huggingface.co/spaces) (Docker).
|
| 4 |
+
|
| 5 |
+
Single-container deployment — frontend (Next.js) + backend (FastAPI) + nginx all in one image. No Docker-in-Docker, no Kubernetes.
|
| 6 |
+
|
| 7 |
+
## Required Secrets
|
| 8 |
+
|
| 9 |
+
Set these in **Settings → Variables and Secrets** on your HF Space:
|
| 10 |
+
|
| 11 |
+
| Secret | Required | Description |
|
| 12 |
+
|--------|----------|-------------|
|
| 13 |
+
| `LLM_MODEL` | ✅ | Model in `provider/name` format (see below) |
|
| 14 |
+
| `LLM_API_KEY` | ✅ | API key for the chosen provider |
|
| 15 |
+
| `SERPER_API_KEY` | recommended | Google Search via Serper (better than DuckDuckGo) |
|
| 16 |
+
| `TAVILY_API_KEY` | optional | Alternative web search |
|
| 17 |
+
| `JINA_API_KEY` | optional | Better web page fetching |
|
| 18 |
+
| `AUTH_JWT_SECRET` | optional | JWT signing secret — auto-generated if not set (sessions reset on restart) |
|
| 19 |
+
| `HF_TOKEN` | optional | Your HF token — enables dataset backup/restore of threads |
|
| 20 |
+
| `BACKUP_DATASET_NAME` | optional | HF dataset repo for backup (default: `huggingdeer-backup`) |
|
| 21 |
+
|
| 22 |
+
## LLM_MODEL format
|
| 23 |
+
|
| 24 |
+
```
|
| 25 |
+
provider/model-name
|
| 26 |
+
```
|
| 27 |
+
|
| 28 |
+
Examples:
|
| 29 |
+
|
| 30 |
+
```
|
| 31 |
+
openai/gpt-4o
|
| 32 |
+
openai/gpt-4o-mini
|
| 33 |
+
anthropic/claude-sonnet-4-5
|
| 34 |
+
anthropic/claude-opus-4-5
|
| 35 |
+
google/gemini-2.5-flash
|
| 36 |
+
deepseek/deepseek-chat
|
| 37 |
+
deepseek/deepseek-reasoner
|
| 38 |
+
openrouter/anthropic/claude-3-5-sonnet
|
| 39 |
+
mistral/mistral-large-latest
|
| 40 |
+
groq/llama-3.3-70b-versatile
|
| 41 |
+
```
|
| 42 |
+
|
| 43 |
+
## Deploy to HF Spaces
|
| 44 |
+
|
| 45 |
+
1. Duplicate this repo to your HF account as a **Docker Space**
|
| 46 |
+
2. Add required secrets
|
| 47 |
+
3. Space builds and starts (~5-10 min on first build)
|
| 48 |
+
|
| 49 |
+
## Optional env vars
|
| 50 |
+
|
| 51 |
+
| Variable | Default | Description |
|
| 52 |
+
|----------|---------|-------------|
|
| 53 |
+
| `CUSTOM_BASE_URL` | — | OpenAI-compatible API base URL (for custom providers) |
|
| 54 |
+
| `SYNC_INTERVAL` | `600` | Seconds between HF Dataset backups |
|
| 55 |
+
| `BACKEND_READY_TIMEOUT` | `120` | Seconds to wait for backend startup |
|
| 56 |
+
| `FRONTEND_READY_TIMEOUT` | `120` | Seconds to wait for frontend startup |
|
| 57 |
+
| `SPACE_HOST` | auto | Set by HF Spaces automatically |
|
| 58 |
+
|
| 59 |
+
## What runs inside
|
| 60 |
+
|
| 61 |
+
| Process | Port | Role |
|
| 62 |
+
|---------|------|------|
|
| 63 |
+
| nginx | 7860 | Public reverse proxy (routes `/api/*` → backend, `/*` → frontend) |
|
| 64 |
+
| uvicorn (FastAPI) | 8001 | DeerFlow gateway — agents, threads, auth |
|
| 65 |
+
| Next.js | 3000 | DeerFlow UI |
|
| 66 |
+
|
| 67 |
+
## Caveats
|
| 68 |
+
|
| 69 |
+
- **No Docker sandbox**: DeerFlow's `bash` / code execution tool is disabled by default (`allow_host_bash: false`). File read/write and web search work fine.
|
| 70 |
+
- **Ephemeral storage**: container resets on restart. Enable `HF_TOKEN` + `BACKUP_DATASET_NAME` to persist threads.
|
| 71 |
+
- **Single worker**: backend runs 2 uvicorn workers. For heavy use, consider a dedicated server.
|
| 72 |
+
|
| 73 |
+
## Source
|
| 74 |
+
|
| 75 |
+
DeerFlow: https://github.com/bytedance/deer-flow
|
deer-sync.py
ADDED
|
@@ -0,0 +1,183 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
HuggingDeer state sync — backup/restore DeerFlow runtime data to/from HF Dataset.
|
| 4 |
+
|
| 5 |
+
Syncs:
|
| 6 |
+
- deerflow.db (SQLite thread/session database)
|
| 7 |
+
- config.yaml (generated config, may contain user edits)
|
| 8 |
+
- workspace/ (agent-created files in the sandbox workspace)
|
| 9 |
+
|
| 10 |
+
Usage:
|
| 11 |
+
deer-sync.py restore — restore from HF Dataset on startup
|
| 12 |
+
deer-sync.py sync-once — push current state to HF Dataset
|
| 13 |
+
deer-sync.py loop — sync-once on an interval (reads SYNC_INTERVAL env)
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
import os
|
| 17 |
+
import sys
|
| 18 |
+
import time
|
| 19 |
+
import shutil
|
| 20 |
+
import tarfile
|
| 21 |
+
import tempfile
|
| 22 |
+
import logging
|
| 23 |
+
from pathlib import Path
|
| 24 |
+
|
| 25 |
+
logging.basicConfig(level=logging.INFO, format="%(message)s")
|
| 26 |
+
log = logging.getLogger(__name__)
|
| 27 |
+
|
| 28 |
+
HF_TOKEN = os.environ.get("HF_TOKEN", "")
|
| 29 |
+
BACKUP_REPO = os.environ.get("BACKUP_DATASET_NAME", "huggingdeer-backup")
|
| 30 |
+
HF_USERNAME = os.environ.get("HF_USERNAME", "")
|
| 31 |
+
DATA_DIR = Path(os.environ.get("DEER_FLOW_HOME", "/app/data"))
|
| 32 |
+
CONFIG_PATH = Path(os.environ.get("DEER_FLOW_CONFIG_PATH", DATA_DIR / "config.yaml"))
|
| 33 |
+
SYNC_INTERVAL = int(os.environ.get("SYNC_INTERVAL", "600"))
|
| 34 |
+
|
| 35 |
+
ARCHIVE_NAME = "deerflow-state.tar.gz"
|
| 36 |
+
|
| 37 |
+
# Files/dirs to include in the backup archive
|
| 38 |
+
BACKUP_TARGETS = [
|
| 39 |
+
DATA_DIR / "deerflow.db",
|
| 40 |
+
DATA_DIR / "workspace",
|
| 41 |
+
CONFIG_PATH,
|
| 42 |
+
]
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def _get_api():
|
| 46 |
+
"""Return authenticated HfApi or raise."""
|
| 47 |
+
if not HF_TOKEN:
|
| 48 |
+
raise RuntimeError("HF_TOKEN not set")
|
| 49 |
+
from huggingface_hub import HfApi
|
| 50 |
+
return HfApi(token=HF_TOKEN)
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def _resolve_repo_id(api) -> str:
|
| 54 |
+
"""Resolve BACKUP_REPO to a full repo_id (username/repo-name)."""
|
| 55 |
+
if "/" in BACKUP_REPO:
|
| 56 |
+
return BACKUP_REPO
|
| 57 |
+
if HF_USERNAME:
|
| 58 |
+
return f"{HF_USERNAME}/{BACKUP_REPO}"
|
| 59 |
+
# Auto-detect from token
|
| 60 |
+
user = api.whoami()
|
| 61 |
+
return f"{user['name']}/{BACKUP_REPO}"
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def _ensure_repo(api, repo_id: str):
|
| 65 |
+
"""Create the dataset repo if it doesn't exist."""
|
| 66 |
+
from huggingface_hub import create_repo
|
| 67 |
+
try:
|
| 68 |
+
create_repo(
|
| 69 |
+
repo_id=repo_id,
|
| 70 |
+
repo_type="dataset",
|
| 71 |
+
private=True,
|
| 72 |
+
token=HF_TOKEN,
|
| 73 |
+
exist_ok=True,
|
| 74 |
+
)
|
| 75 |
+
except Exception as exc:
|
| 76 |
+
log.warning("Could not ensure dataset repo: %s", exc)
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
def _make_archive(dest: Path):
|
| 80 |
+
"""Pack BACKUP_TARGETS into a .tar.gz archive."""
|
| 81 |
+
with tarfile.open(dest, "w:gz") as tar:
|
| 82 |
+
for target in BACKUP_TARGETS:
|
| 83 |
+
if target.exists():
|
| 84 |
+
arcname = target.relative_to(DATA_DIR.parent)
|
| 85 |
+
tar.add(target, arcname=str(arcname))
|
| 86 |
+
log.debug(" + %s", arcname)
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
def _extract_archive(src: Path):
|
| 90 |
+
"""Unpack archive into DATA_DIR.parent (restores original paths)."""
|
| 91 |
+
extract_root = DATA_DIR.parent
|
| 92 |
+
with tarfile.open(src, "r:gz") as tar:
|
| 93 |
+
for member in tar.getmembers():
|
| 94 |
+
tar.extract(member, path=extract_root)
|
| 95 |
+
log.info("Extracted state to %s", extract_root)
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
def restore():
|
| 99 |
+
"""Download and unpack the latest state archive from HF Dataset."""
|
| 100 |
+
if not HF_TOKEN:
|
| 101 |
+
log.info("No HF_TOKEN — skipping restore.")
|
| 102 |
+
return
|
| 103 |
+
|
| 104 |
+
try:
|
| 105 |
+
api = _get_api()
|
| 106 |
+
repo_id = _resolve_repo_id(api)
|
| 107 |
+
_ensure_repo(api, repo_id)
|
| 108 |
+
|
| 109 |
+
from huggingface_hub import hf_hub_download
|
| 110 |
+
with tempfile.TemporaryDirectory() as tmp:
|
| 111 |
+
try:
|
| 112 |
+
local = hf_hub_download(
|
| 113 |
+
repo_id=repo_id,
|
| 114 |
+
filename=ARCHIVE_NAME,
|
| 115 |
+
repo_type="dataset",
|
| 116 |
+
token=HF_TOKEN,
|
| 117 |
+
local_dir=tmp,
|
| 118 |
+
)
|
| 119 |
+
_extract_archive(Path(local))
|
| 120 |
+
log.info("State restored from %s", repo_id)
|
| 121 |
+
except Exception as exc:
|
| 122 |
+
if "404" in str(exc) or "not found" in str(exc).lower() or "does not exist" in str(exc).lower():
|
| 123 |
+
log.info("No existing backup found in %s — starting fresh.", repo_id)
|
| 124 |
+
else:
|
| 125 |
+
raise
|
| 126 |
+
except Exception as exc:
|
| 127 |
+
log.warning("Restore failed: %s", exc)
|
| 128 |
+
raise
|
| 129 |
+
|
| 130 |
+
|
| 131 |
+
def sync_once():
|
| 132 |
+
"""Pack current state and upload to HF Dataset."""
|
| 133 |
+
if not HF_TOKEN:
|
| 134 |
+
return
|
| 135 |
+
|
| 136 |
+
try:
|
| 137 |
+
api = _get_api()
|
| 138 |
+
repo_id = _resolve_repo_id(api)
|
| 139 |
+
_ensure_repo(api, repo_id)
|
| 140 |
+
|
| 141 |
+
with tempfile.TemporaryDirectory() as tmp:
|
| 142 |
+
archive = Path(tmp) / ARCHIVE_NAME
|
| 143 |
+
_make_archive(archive)
|
| 144 |
+
|
| 145 |
+
if not archive.exists() or archive.stat().st_size == 0:
|
| 146 |
+
log.info("Nothing to backup — skipping upload.")
|
| 147 |
+
return
|
| 148 |
+
|
| 149 |
+
api.upload_file(
|
| 150 |
+
path_or_fileobj=str(archive),
|
| 151 |
+
path_in_repo=ARCHIVE_NAME,
|
| 152 |
+
repo_id=repo_id,
|
| 153 |
+
repo_type="dataset",
|
| 154 |
+
token=HF_TOKEN,
|
| 155 |
+
)
|
| 156 |
+
size_kb = archive.stat().st_size // 1024
|
| 157 |
+
log.info("State synced to %s (%d KB)", repo_id, size_kb)
|
| 158 |
+
except Exception as exc:
|
| 159 |
+
log.warning("Sync failed: %s", exc)
|
| 160 |
+
|
| 161 |
+
|
| 162 |
+
def loop():
|
| 163 |
+
"""Run sync_once every SYNC_INTERVAL seconds."""
|
| 164 |
+
log.info("Starting periodic sync (interval: %ds)", SYNC_INTERVAL)
|
| 165 |
+
while True:
|
| 166 |
+
time.sleep(SYNC_INTERVAL)
|
| 167 |
+
try:
|
| 168 |
+
sync_once()
|
| 169 |
+
except Exception as exc:
|
| 170 |
+
log.warning("Periodic sync error: %s", exc)
|
| 171 |
+
|
| 172 |
+
|
| 173 |
+
if __name__ == "__main__":
|
| 174 |
+
cmd = sys.argv[1] if len(sys.argv) > 1 else "help"
|
| 175 |
+
if cmd == "restore":
|
| 176 |
+
restore()
|
| 177 |
+
elif cmd == "sync-once":
|
| 178 |
+
sync_once()
|
| 179 |
+
elif cmd == "loop":
|
| 180 |
+
loop()
|
| 181 |
+
else:
|
| 182 |
+
print(__doc__)
|
| 183 |
+
sys.exit(1)
|
nginx.conf
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
events {
|
| 2 |
+
worker_connections 1024;
|
| 3 |
+
}
|
| 4 |
+
|
| 5 |
+
# Non-root nginx: all paths redirected to /tmp
|
| 6 |
+
pid /tmp/nginx.pid;
|
| 7 |
+
error_log /tmp/nginx-error.log warn;
|
| 8 |
+
|
| 9 |
+
http {
|
| 10 |
+
# Non-root temp dirs
|
| 11 |
+
client_body_temp_path /tmp/nginx-tmp/client;
|
| 12 |
+
proxy_temp_path /tmp/nginx-tmp/proxy;
|
| 13 |
+
fastcgi_temp_path /tmp/nginx-tmp/fastcgi;
|
| 14 |
+
uwsgi_temp_path /tmp/nginx-tmp/uwsgi;
|
| 15 |
+
scgi_temp_path /tmp/nginx-tmp/scgi;
|
| 16 |
+
|
| 17 |
+
access_log /dev/stdout;
|
| 18 |
+
error_log /dev/stderr warn;
|
| 19 |
+
|
| 20 |
+
sendfile on;
|
| 21 |
+
tcp_nopush on;
|
| 22 |
+
tcp_nodelay on;
|
| 23 |
+
keepalive_timeout 65;
|
| 24 |
+
|
| 25 |
+
# ── DeerFlow on HF Spaces ─────────────────────────────────────
|
| 26 |
+
server {
|
| 27 |
+
listen 7860 default_server;
|
| 28 |
+
server_name _;
|
| 29 |
+
|
| 30 |
+
# Allow 100 MB uploads (thread file attachments)
|
| 31 |
+
client_max_body_size 100M;
|
| 32 |
+
|
| 33 |
+
# HF Spaces embeds the app in an iframe — must allow framing
|
| 34 |
+
add_header X-Frame-Options "ALLOWALL" always;
|
| 35 |
+
add_header Content-Security-Policy "frame-ancestors *" always;
|
| 36 |
+
|
| 37 |
+
# CORS: strip upstream headers to avoid duplicates, then re-add
|
| 38 |
+
proxy_hide_header Access-Control-Allow-Origin;
|
| 39 |
+
proxy_hide_header Access-Control-Allow-Methods;
|
| 40 |
+
proxy_hide_header Access-Control-Allow-Headers;
|
| 41 |
+
proxy_hide_header Access-Control-Allow-Credentials;
|
| 42 |
+
|
| 43 |
+
add_header Access-Control-Allow-Origin "*" always;
|
| 44 |
+
add_header Access-Control-Allow-Methods "GET, POST, PUT, DELETE, PATCH, OPTIONS" always;
|
| 45 |
+
add_header Access-Control-Allow-Headers "*" always;
|
| 46 |
+
|
| 47 |
+
# CORS preflight
|
| 48 |
+
if ($request_method = OPTIONS) {
|
| 49 |
+
return 204;
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
# ── LangGraph-compatible API (rewrites /api/langgraph/* → /api/*) ──
|
| 53 |
+
# The backend exposes /api/* natively; the /api/langgraph/ prefix is a
|
| 54 |
+
# public-facing alias used by the Next.js client and LangGraph SDK.
|
| 55 |
+
location /api/langgraph/ {
|
| 56 |
+
rewrite ^/api/langgraph/(.*) /api/$1 break;
|
| 57 |
+
proxy_pass http://127.0.0.1:8001;
|
| 58 |
+
proxy_http_version 1.1;
|
| 59 |
+
proxy_set_header Host $host;
|
| 60 |
+
proxy_set_header X-Real-IP $remote_addr;
|
| 61 |
+
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
| 62 |
+
proxy_set_header X-Forwarded-Proto $scheme;
|
| 63 |
+
proxy_set_header Connection "";
|
| 64 |
+
|
| 65 |
+
# SSE / streaming (agent responses are streamed as server-sent events)
|
| 66 |
+
proxy_buffering off;
|
| 67 |
+
proxy_cache off;
|
| 68 |
+
proxy_set_header X-Accel-Buffering no;
|
| 69 |
+
chunked_transfer_encoding on;
|
| 70 |
+
|
| 71 |
+
proxy_connect_timeout 600s;
|
| 72 |
+
proxy_send_timeout 600s;
|
| 73 |
+
proxy_read_timeout 600s;
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
+
# ── Health check ──────────────────────────────────────────
|
| 77 |
+
location = /health {
|
| 78 |
+
proxy_pass http://127.0.0.1:8001/health;
|
| 79 |
+
proxy_http_version 1.1;
|
| 80 |
+
proxy_set_header Host $host;
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
# ── API docs (Swagger / ReDoc / OpenAPI) ──────────────────
|
| 84 |
+
location ~ ^/(docs|redoc|openapi\.json)$ {
|
| 85 |
+
proxy_pass http://127.0.0.1:8001;
|
| 86 |
+
proxy_http_version 1.1;
|
| 87 |
+
proxy_set_header Host $host;
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
# ── Thread file uploads (large body, no buffering) ────────
|
| 91 |
+
location ~ ^/api/threads/[^/]+/uploads {
|
| 92 |
+
proxy_pass http://127.0.0.1:8001;
|
| 93 |
+
proxy_http_version 1.1;
|
| 94 |
+
proxy_set_header Host $host;
|
| 95 |
+
proxy_set_header X-Real-IP $remote_addr;
|
| 96 |
+
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
| 97 |
+
proxy_set_header X-Forwarded-Proto $scheme;
|
| 98 |
+
proxy_request_buffering off;
|
| 99 |
+
client_max_body_size 100M;
|
| 100 |
+
}
|
| 101 |
+
|
| 102 |
+
# ── All remaining /api/* routes → backend ─────────────────
|
| 103 |
+
location /api/ {
|
| 104 |
+
proxy_pass http://127.0.0.1:8001;
|
| 105 |
+
proxy_http_version 1.1;
|
| 106 |
+
proxy_set_header Host $host;
|
| 107 |
+
proxy_set_header X-Real-IP $remote_addr;
|
| 108 |
+
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
| 109 |
+
proxy_set_header X-Forwarded-Proto $scheme;
|
| 110 |
+
proxy_set_header Connection "";
|
| 111 |
+
|
| 112 |
+
# SSE support for all streaming API routes
|
| 113 |
+
proxy_buffering off;
|
| 114 |
+
proxy_cache off;
|
| 115 |
+
proxy_set_header X-Accel-Buffering no;
|
| 116 |
+
chunked_transfer_encoding on;
|
| 117 |
+
|
| 118 |
+
proxy_connect_timeout 600s;
|
| 119 |
+
proxy_send_timeout 600s;
|
| 120 |
+
proxy_read_timeout 600s;
|
| 121 |
+
}
|
| 122 |
+
|
| 123 |
+
# ── All other requests → Next.js frontend ─────────────────
|
| 124 |
+
location / {
|
| 125 |
+
proxy_pass http://127.0.0.1:3000;
|
| 126 |
+
proxy_http_version 1.1;
|
| 127 |
+
proxy_set_header Host $host;
|
| 128 |
+
proxy_set_header X-Real-IP $remote_addr;
|
| 129 |
+
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
| 130 |
+
proxy_set_header X-Forwarded-Proto $scheme;
|
| 131 |
+
proxy_set_header Upgrade $http_upgrade;
|
| 132 |
+
proxy_set_header Connection "upgrade";
|
| 133 |
+
proxy_cache_bypass $http_upgrade;
|
| 134 |
+
|
| 135 |
+
proxy_connect_timeout 600s;
|
| 136 |
+
proxy_send_timeout 600s;
|
| 137 |
+
proxy_read_timeout 600s;
|
| 138 |
+
}
|
| 139 |
+
}
|
| 140 |
+
}
|
start.sh
ADDED
|
@@ -0,0 +1,417 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
set -euo pipefail
|
| 3 |
+
umask 0077
|
| 4 |
+
|
| 5 |
+
# ════════════════════════════════════════════════════════════════
|
| 6 |
+
# HuggingDeer — DeerFlow on Hugging Face Spaces
|
| 7 |
+
# ════════════════════════════════════════════════════════════════
|
| 8 |
+
|
| 9 |
+
APP_DIR="/app"
|
| 10 |
+
DATA_DIR="${DEER_FLOW_HOME:-/app/data}"
|
| 11 |
+
CONFIG_PATH="${DEER_FLOW_CONFIG_PATH:-$DATA_DIR/config.yaml}"
|
| 12 |
+
BACKEND_PORT="${BACKEND_PORT:-8001}"
|
| 13 |
+
FRONTEND_PORT="${FRONTEND_PORT:-3000}"
|
| 14 |
+
PUBLIC_PORT="${PORT:-7860}"
|
| 15 |
+
SYNC_INTERVAL="${SYNC_INTERVAL:-600}"
|
| 16 |
+
BACKEND_READY_TIMEOUT="${BACKEND_READY_TIMEOUT:-120}"
|
| 17 |
+
FRONTEND_READY_TIMEOUT="${FRONTEND_READY_TIMEOUT:-120}"
|
| 18 |
+
|
| 19 |
+
# Export shell vars so inline Python scripts can read them via os.environ
|
| 20 |
+
export DATA_DIR CONFIG_PATH BACKUP_DATASET_NAME SYNC_INTERVAL
|
| 21 |
+
export DEER_FLOW_HOME="$DATA_DIR"
|
| 22 |
+
export DEER_FLOW_CONFIG_PATH="$CONFIG_PATH"
|
| 23 |
+
export DEER_FLOW_SKILLS_PATH="/app/skills"
|
| 24 |
+
|
| 25 |
+
echo ""
|
| 26 |
+
echo " ╔══════════════════════════════════════════╗"
|
| 27 |
+
echo " ║ 🦌 HuggingDeer — DeerFlow ║"
|
| 28 |
+
echo " ╚══════════════════════════════════════════╝"
|
| 29 |
+
echo ""
|
| 30 |
+
|
| 31 |
+
# ── Required env validation ───────────────────────────────────────
|
| 32 |
+
ERRORS=""
|
| 33 |
+
if [ -z "${LLM_MODEL:-}" ]; then
|
| 34 |
+
ERRORS="${ERRORS} - LLM_MODEL is not set (e.g. openai/gpt-4o, anthropic/claude-sonnet-4-5)\n"
|
| 35 |
+
fi
|
| 36 |
+
if [ -z "${LLM_API_KEY:-}" ]; then
|
| 37 |
+
ERRORS="${ERRORS} - LLM_API_KEY is not set\n"
|
| 38 |
+
fi
|
| 39 |
+
if [ -n "$ERRORS" ]; then
|
| 40 |
+
echo "Missing required secrets:"
|
| 41 |
+
printf "%b" "$ERRORS"
|
| 42 |
+
echo ""
|
| 43 |
+
echo "Add them in HF Spaces → Settings → Secrets"
|
| 44 |
+
exit 1
|
| 45 |
+
fi
|
| 46 |
+
|
| 47 |
+
# ── Setup runtime directories ─────────────────────────────────────
|
| 48 |
+
mkdir -p \
|
| 49 |
+
"$DATA_DIR" \
|
| 50 |
+
"$DATA_DIR/threads" \
|
| 51 |
+
"$DATA_DIR/uploads" \
|
| 52 |
+
"$DATA_DIR/workspace" \
|
| 53 |
+
"$DATA_DIR/logs" \
|
| 54 |
+
/tmp/nginx-tmp/client \
|
| 55 |
+
/tmp/nginx-tmp/proxy \
|
| 56 |
+
/tmp/nginx-tmp/fastcgi \
|
| 57 |
+
/tmp/nginx-tmp/uwsgi \
|
| 58 |
+
/tmp/nginx-tmp/scgi
|
| 59 |
+
|
| 60 |
+
# ── Provider → env var + langchain class mapping ──────────────────
|
| 61 |
+
# Parse LLM_MODEL in format "provider/model-name" (e.g. "openai/gpt-4o")
|
| 62 |
+
LLM_PROVIDER=$(echo "$LLM_MODEL" | cut -d'/' -f1)
|
| 63 |
+
LLM_MODEL_NAME=$(echo "$LLM_MODEL" | cut -d'/' -f2-)
|
| 64 |
+
|
| 65 |
+
# Resolve provider-specific settings
|
| 66 |
+
LANGCHAIN_CLASS="langchain_openai:ChatOpenAI"
|
| 67 |
+
API_KEY_FIELD="api_key"
|
| 68 |
+
MODEL_BASE_URL=""
|
| 69 |
+
SUPPORTS_THINKING="false"
|
| 70 |
+
|
| 71 |
+
case "$LLM_PROVIDER" in
|
| 72 |
+
anthropic)
|
| 73 |
+
export ANTHROPIC_API_KEY="${ANTHROPIC_API_KEY:-$LLM_API_KEY}"
|
| 74 |
+
LANGCHAIN_CLASS="langchain_anthropic:ChatAnthropic"
|
| 75 |
+
API_KEY_FIELD="api_key"
|
| 76 |
+
SUPPORTS_THINKING="true"
|
| 77 |
+
;;
|
| 78 |
+
google|gemini)
|
| 79 |
+
export GEMINI_API_KEY="${GEMINI_API_KEY:-$LLM_API_KEY}"
|
| 80 |
+
export GOOGLE_API_KEY="${GOOGLE_API_KEY:-$LLM_API_KEY}"
|
| 81 |
+
LANGCHAIN_CLASS="langchain_google_genai:ChatGoogleGenerativeAI"
|
| 82 |
+
API_KEY_FIELD="gemini_api_key"
|
| 83 |
+
LLM_MODEL_NAME="${LLM_MODEL_NAME:-$LLM_PROVIDER}"
|
| 84 |
+
SUPPORTS_THINKING="true"
|
| 85 |
+
;;
|
| 86 |
+
deepseek)
|
| 87 |
+
export DEEPSEEK_API_KEY="${DEEPSEEK_API_KEY:-$LLM_API_KEY}"
|
| 88 |
+
LANGCHAIN_CLASS="deerflow.models.patched_deepseek:PatchedChatDeepSeek"
|
| 89 |
+
API_KEY_FIELD="api_key"
|
| 90 |
+
MODEL_BASE_URL="https://api.deepseek.com/v1"
|
| 91 |
+
SUPPORTS_THINKING="true"
|
| 92 |
+
;;
|
| 93 |
+
openrouter)
|
| 94 |
+
export OPENROUTER_API_KEY="${OPENROUTER_API_KEY:-$LLM_API_KEY}"
|
| 95 |
+
export OPENAI_API_KEY="${OPENAI_API_KEY:-$LLM_API_KEY}"
|
| 96 |
+
LANGCHAIN_CLASS="langchain_openai:ChatOpenAI"
|
| 97 |
+
API_KEY_FIELD="api_key"
|
| 98 |
+
MODEL_BASE_URL="https://openrouter.ai/api/v1"
|
| 99 |
+
# OpenRouter model names include provider prefix (e.g. anthropic/claude-3-5-sonnet)
|
| 100 |
+
LLM_MODEL_NAME="$LLM_MODEL"
|
| 101 |
+
;;
|
| 102 |
+
qwen|dashscope|alibaba)
|
| 103 |
+
export DASHSCOPE_API_KEY="${DASHSCOPE_API_KEY:-$LLM_API_KEY}"
|
| 104 |
+
LANGCHAIN_CLASS="langchain_openai:ChatOpenAI"
|
| 105 |
+
API_KEY_FIELD="api_key"
|
| 106 |
+
MODEL_BASE_URL="https://dashscope.aliyuncs.com/compatible-mode/v1"
|
| 107 |
+
;;
|
| 108 |
+
moonshot|kimi)
|
| 109 |
+
export MOONSHOT_API_KEY="${MOONSHOT_API_KEY:-$LLM_API_KEY}"
|
| 110 |
+
LANGCHAIN_CLASS="langchain_openai:ChatOpenAI"
|
| 111 |
+
API_KEY_FIELD="api_key"
|
| 112 |
+
MODEL_BASE_URL="https://api.moonshot.cn/v1"
|
| 113 |
+
;;
|
| 114 |
+
mistral)
|
| 115 |
+
export MISTRAL_API_KEY="${MISTRAL_API_KEY:-$LLM_API_KEY}"
|
| 116 |
+
LANGCHAIN_CLASS="langchain_openai:ChatOpenAI"
|
| 117 |
+
API_KEY_FIELD="api_key"
|
| 118 |
+
MODEL_BASE_URL="https://api.mistral.ai/v1"
|
| 119 |
+
;;
|
| 120 |
+
xai|grok)
|
| 121 |
+
export XAI_API_KEY="${XAI_API_KEY:-$LLM_API_KEY}"
|
| 122 |
+
LANGCHAIN_CLASS="langchain_openai:ChatOpenAI"
|
| 123 |
+
API_KEY_FIELD="api_key"
|
| 124 |
+
MODEL_BASE_URL="https://api.x.ai/v1"
|
| 125 |
+
;;
|
| 126 |
+
groq)
|
| 127 |
+
export GROQ_API_KEY="${GROQ_API_KEY:-$LLM_API_KEY}"
|
| 128 |
+
LANGCHAIN_CLASS="langchain_openai:ChatOpenAI"
|
| 129 |
+
API_KEY_FIELD="api_key"
|
| 130 |
+
MODEL_BASE_URL="https://api.groq.com/openai/v1"
|
| 131 |
+
;;
|
| 132 |
+
openai|*)
|
| 133 |
+
export OPENAI_API_KEY="${OPENAI_API_KEY:-$LLM_API_KEY}"
|
| 134 |
+
LANGCHAIN_CLASS="langchain_openai:ChatOpenAI"
|
| 135 |
+
API_KEY_FIELD="api_key"
|
| 136 |
+
;;
|
| 137 |
+
esac
|
| 138 |
+
|
| 139 |
+
# Custom OpenAI-compatible provider override
|
| 140 |
+
if [ -n "${CUSTOM_BASE_URL:-}" ]; then
|
| 141 |
+
export OPENAI_API_KEY="${OPENAI_API_KEY:-$LLM_API_KEY}"
|
| 142 |
+
LANGCHAIN_CLASS="langchain_openai:ChatOpenAI"
|
| 143 |
+
API_KEY_FIELD="api_key"
|
| 144 |
+
MODEL_BASE_URL="$CUSTOM_BASE_URL"
|
| 145 |
+
fi
|
| 146 |
+
|
| 147 |
+
export LLM_PROVIDER LLM_MODEL_NAME LANGCHAIN_CLASS API_KEY_FIELD MODEL_BASE_URL SUPPORTS_THINKING
|
| 148 |
+
export SERPER_API_KEY="${SERPER_API_KEY:-}"
|
| 149 |
+
export TAVILY_API_KEY="${TAVILY_API_KEY:-}"
|
| 150 |
+
export JINA_API_KEY="${JINA_API_KEY:-}"
|
| 151 |
+
|
| 152 |
+
# ── Restore from HF Dataset (if configured) ───────────────────────
|
| 153 |
+
if [ -n "${HF_TOKEN:-}" ]; then
|
| 154 |
+
echo "Restoring state from HF Dataset..."
|
| 155 |
+
python3 "$APP_DIR/deer-sync.py" restore || echo "Warning: restore failed, starting fresh."
|
| 156 |
+
else
|
| 157 |
+
echo "HF_TOKEN not set — running without dataset persistence."
|
| 158 |
+
fi
|
| 159 |
+
|
| 160 |
+
# ── Generate config.yaml ──────────────────────────────────────────
|
| 161 |
+
echo "Generating config.yaml..."
|
| 162 |
+
python3 - <<'PYEOF'
|
| 163 |
+
import os, yaml
|
| 164 |
+
from pathlib import Path
|
| 165 |
+
|
| 166 |
+
data_dir = Path(os.environ["DATA_DIR"])
|
| 167 |
+
config_path = Path(os.environ["CONFIG_PATH"])
|
| 168 |
+
|
| 169 |
+
# Load example config as base if no user config exists
|
| 170 |
+
if not config_path.exists():
|
| 171 |
+
example = Path("/app/config.example.yaml")
|
| 172 |
+
if example.exists():
|
| 173 |
+
base = yaml.safe_load(example.read_text()) or {}
|
| 174 |
+
else:
|
| 175 |
+
base = {}
|
| 176 |
+
else:
|
| 177 |
+
base = yaml.safe_load(config_path.read_text()) or {}
|
| 178 |
+
|
| 179 |
+
model_name = os.environ["LLM_MODEL_NAME"]
|
| 180 |
+
lc_class = os.environ["LANGCHAIN_CLASS"]
|
| 181 |
+
api_key_field = os.environ["API_KEY_FIELD"]
|
| 182 |
+
base_url = os.environ.get("MODEL_BASE_URL", "")
|
| 183 |
+
llm_api_key = os.environ.get("LLM_API_KEY", "")
|
| 184 |
+
thinking = os.environ.get("SUPPORTS_THINKING", "false").lower() == "true"
|
| 185 |
+
|
| 186 |
+
# Build model entry
|
| 187 |
+
model_entry = {
|
| 188 |
+
"name": model_name,
|
| 189 |
+
"display_name": model_name,
|
| 190 |
+
"use": lc_class,
|
| 191 |
+
"model": model_name,
|
| 192 |
+
api_key_field: llm_api_key,
|
| 193 |
+
"request_timeout": 600.0,
|
| 194 |
+
"max_retries": 2,
|
| 195 |
+
"max_tokens": 8192,
|
| 196 |
+
}
|
| 197 |
+
if base_url:
|
| 198 |
+
model_entry["base_url"] = base_url
|
| 199 |
+
if thinking:
|
| 200 |
+
model_entry["supports_thinking"] = True
|
| 201 |
+
|
| 202 |
+
# Override models section with our single configured model
|
| 203 |
+
base["models"] = [model_entry]
|
| 204 |
+
|
| 205 |
+
# Sandbox: local (no Docker on HF Spaces)
|
| 206 |
+
base.setdefault("sandbox", {})
|
| 207 |
+
base["sandbox"]["use"] = "deerflow.sandbox.local:LocalSandboxProvider"
|
| 208 |
+
base["sandbox"]["allow_host_bash"] = False
|
| 209 |
+
|
| 210 |
+
# Search tools: prefer Serper > Tavily > DuckDuckGo (default)
|
| 211 |
+
serper_key = os.environ.get("SERPER_API_KEY", "")
|
| 212 |
+
tavily_key = os.environ.get("TAVILY_API_KEY", "")
|
| 213 |
+
|
| 214 |
+
if serper_key:
|
| 215 |
+
web_search_tool = {
|
| 216 |
+
"name": "web_search", "group": "web",
|
| 217 |
+
"use": "deerflow.community.serper.tools:web_search_tool",
|
| 218 |
+
"max_results": 5, "api_key": serper_key,
|
| 219 |
+
}
|
| 220 |
+
elif tavily_key:
|
| 221 |
+
web_search_tool = {
|
| 222 |
+
"name": "web_search", "group": "web",
|
| 223 |
+
"use": "deerflow.community.tavily.tools:web_search_tool",
|
| 224 |
+
"max_results": 5, "api_key": tavily_key,
|
| 225 |
+
}
|
| 226 |
+
else:
|
| 227 |
+
web_search_tool = {
|
| 228 |
+
"name": "web_search", "group": "web",
|
| 229 |
+
"use": "deerflow.community.ddg_search.tools:web_search_tool",
|
| 230 |
+
"max_results": 5,
|
| 231 |
+
}
|
| 232 |
+
|
| 233 |
+
# Preserve existing tool list, replacing web_search entry
|
| 234 |
+
existing_tools = base.get("tools", [])
|
| 235 |
+
other_tools = [t for t in existing_tools if t.get("name") != "web_search"]
|
| 236 |
+
base["tools"] = [web_search_tool] + other_tools
|
| 237 |
+
|
| 238 |
+
# Jina AI web_fetch (no key needed for basic usage)
|
| 239 |
+
jina_key = os.environ.get("JINA_API_KEY", "")
|
| 240 |
+
has_web_fetch = any(t.get("name") == "web_fetch" for t in base["tools"])
|
| 241 |
+
if not has_web_fetch:
|
| 242 |
+
web_fetch_entry = {
|
| 243 |
+
"name": "web_fetch", "group": "web",
|
| 244 |
+
"use": "deerflow.community.jina_ai.tools:web_fetch_tool",
|
| 245 |
+
"timeout": 15,
|
| 246 |
+
}
|
| 247 |
+
if jina_key:
|
| 248 |
+
web_fetch_entry["api_key"] = jina_key
|
| 249 |
+
base["tools"].append(web_fetch_entry)
|
| 250 |
+
|
| 251 |
+
# Persistence: SQLite in data dir
|
| 252 |
+
base.setdefault("database", {})
|
| 253 |
+
base["database"].setdefault("backend", "sqlite")
|
| 254 |
+
# Database file lives in DATA_DIR (persisted via HF Dataset sync)
|
| 255 |
+
db_path = str(data_dir / "deerflow.db")
|
| 256 |
+
base["database"].setdefault("url", f"sqlite+aiosqlite:///{db_path}")
|
| 257 |
+
|
| 258 |
+
# Skills path
|
| 259 |
+
base.setdefault("skills", {})
|
| 260 |
+
base["skills"]["path"] = "/app/skills"
|
| 261 |
+
|
| 262 |
+
# CORS: allow HF Space URL + localhost
|
| 263 |
+
space_host = os.environ.get("SPACE_HOST", "")
|
| 264 |
+
cors_origins = ["http://localhost:3000", "http://localhost:7860"]
|
| 265 |
+
if space_host:
|
| 266 |
+
cors_origins.append(f"https://{space_host}")
|
| 267 |
+
|
| 268 |
+
# Set via env (picked up by gateway config loader)
|
| 269 |
+
os.environ["CORS_ORIGINS"] = ",".join(cors_origins)
|
| 270 |
+
|
| 271 |
+
config_path.parent.mkdir(parents=True, exist_ok=True)
|
| 272 |
+
config_path.write_text(yaml.safe_dump(base, sort_keys=False, allow_unicode=True))
|
| 273 |
+
config_path.chmod(0o600)
|
| 274 |
+
print(f"Config written to {config_path}")
|
| 275 |
+
PYEOF
|
| 276 |
+
|
| 277 |
+
# ── CORS origins env for backend ─────────────────────────────────
|
| 278 |
+
SPACE_HOST="${SPACE_HOST:-}"
|
| 279 |
+
if [ -n "$SPACE_HOST" ]; then
|
| 280 |
+
export CORS_ORIGINS="${CORS_ORIGINS:-http://localhost:3000,http://localhost:7860,https://$SPACE_HOST}"
|
| 281 |
+
else
|
| 282 |
+
export CORS_ORIGINS="${CORS_ORIGINS:-http://localhost:3000,http://localhost:7860}"
|
| 283 |
+
fi
|
| 284 |
+
|
| 285 |
+
# ── Startup summary ───────────────────────────────────────────────
|
| 286 |
+
echo ""
|
| 287 |
+
echo "Model : $LLM_MODEL"
|
| 288 |
+
echo "Provider : $LLM_PROVIDER"
|
| 289 |
+
echo "Data dir : $DATA_DIR"
|
| 290 |
+
if [ -n "${SERPER_API_KEY:-}" ]; then
|
| 291 |
+
echo "Search : Serper (Google)"
|
| 292 |
+
elif [ -n "${TAVILY_API_KEY:-}" ]; then
|
| 293 |
+
echo "Search : Tavily"
|
| 294 |
+
else
|
| 295 |
+
echo "Search : DuckDuckGo (no API key)"
|
| 296 |
+
fi
|
| 297 |
+
if [ -n "${HF_TOKEN:-}" ]; then
|
| 298 |
+
echo "Backup : ${BACKUP_DATASET_NAME:-huggingdeer-backup} (every ${SYNC_INTERVAL}s)"
|
| 299 |
+
else
|
| 300 |
+
echo "Backup : disabled"
|
| 301 |
+
fi
|
| 302 |
+
if [ -n "$SPACE_HOST" ]; then
|
| 303 |
+
echo "URL : https://$SPACE_HOST"
|
| 304 |
+
fi
|
| 305 |
+
echo ""
|
| 306 |
+
|
| 307 |
+
# ── Graceful shutdown ─────────────────────────────────────────────
|
| 308 |
+
graceful_shutdown() {
|
| 309 |
+
echo "Shutting down HuggingDeer..."
|
| 310 |
+
if [ -n "${HF_TOKEN:-}" ]; then
|
| 311 |
+
echo "Saving state to HF Dataset..."
|
| 312 |
+
python3 "$APP_DIR/deer-sync.py" sync-once || echo "Warning: shutdown sync failed."
|
| 313 |
+
fi
|
| 314 |
+
# Stop nginx daemon (nginx -s quit = graceful drain)
|
| 315 |
+
nginx -s quit 2>/dev/null || true
|
| 316 |
+
# Stop background shell jobs (backend, frontend, sync loop)
|
| 317 |
+
kill $(jobs -p) 2>/dev/null || true
|
| 318 |
+
sleep 2
|
| 319 |
+
exit 0
|
| 320 |
+
}
|
| 321 |
+
trap graceful_shutdown SIGTERM SIGINT
|
| 322 |
+
|
| 323 |
+
# ── Start nginx ───────────────────────────────────────────────────
|
| 324 |
+
echo "Starting nginx on port $PUBLIC_PORT..."
|
| 325 |
+
# Validate config first
|
| 326 |
+
nginx -t 2>/dev/null && nginx || {
|
| 327 |
+
echo "nginx config error:"
|
| 328 |
+
nginx -t
|
| 329 |
+
exit 1
|
| 330 |
+
}
|
| 331 |
+
|
| 332 |
+
# ── Start backend (uvicorn) ───────────────────────────────────────
|
| 333 |
+
echo "Starting DeerFlow backend on port $BACKEND_PORT..."
|
| 334 |
+
(
|
| 335 |
+
cd "$APP_DIR/backend" && \
|
| 336 |
+
PYTHONPATH=. \
|
| 337 |
+
uv run --no-sync \
|
| 338 |
+
uvicorn app.gateway.app:app \
|
| 339 |
+
--host 127.0.0.1 \
|
| 340 |
+
--port "$BACKEND_PORT" \
|
| 341 |
+
--workers 2 \
|
| 342 |
+
2>&1 | tee -a "$DATA_DIR/logs/backend.log"
|
| 343 |
+
) &
|
| 344 |
+
BACKEND_PID=$!
|
| 345 |
+
|
| 346 |
+
# Wait for backend to be ready
|
| 347 |
+
echo "Waiting for backend..."
|
| 348 |
+
ready=false
|
| 349 |
+
for ((i=0; i<BACKEND_READY_TIMEOUT; i++)); do
|
| 350 |
+
if (echo > "/dev/tcp/127.0.0.1/$BACKEND_PORT") 2>/dev/null; then
|
| 351 |
+
ready=true
|
| 352 |
+
break
|
| 353 |
+
fi
|
| 354 |
+
if ! kill -0 "$BACKEND_PID" 2>/dev/null; then
|
| 355 |
+
echo "Backend process died. Last 30 log lines:"
|
| 356 |
+
echo "────────────────────────────────────────"
|
| 357 |
+
tail -30 "$DATA_DIR/logs/backend.log" || true
|
| 358 |
+
exit 1
|
| 359 |
+
fi
|
| 360 |
+
sleep 1
|
| 361 |
+
done
|
| 362 |
+
if [ "$ready" != "true" ]; then
|
| 363 |
+
echo "Backend failed to start within ${BACKEND_READY_TIMEOUT}s. Last 30 log lines:"
|
| 364 |
+
tail -30 "$DATA_DIR/logs/backend.log" || true
|
| 365 |
+
exit 1
|
| 366 |
+
fi
|
| 367 |
+
echo "Backend ready."
|
| 368 |
+
|
| 369 |
+
# ── Start frontend (Next.js) ──────────────────────────────────────
|
| 370 |
+
echo "Starting Next.js frontend on port $FRONTEND_PORT..."
|
| 371 |
+
(
|
| 372 |
+
cd "$APP_DIR/frontend" && \
|
| 373 |
+
DEER_FLOW_INTERNAL_GATEWAY_BASE_URL="http://127.0.0.1:$BACKEND_PORT" \
|
| 374 |
+
PORT="$FRONTEND_PORT" \
|
| 375 |
+
node node_modules/.bin/next start -p "$FRONTEND_PORT" \
|
| 376 |
+
2>&1 | tee -a "$DATA_DIR/logs/frontend.log"
|
| 377 |
+
) &
|
| 378 |
+
FRONTEND_PID=$!
|
| 379 |
+
|
| 380 |
+
# Wait for frontend
|
| 381 |
+
echo "Waiting for frontend..."
|
| 382 |
+
ready=false
|
| 383 |
+
for ((i=0; i<FRONTEND_READY_TIMEOUT; i++)); do
|
| 384 |
+
if (echo > "/dev/tcp/127.0.0.1/$FRONTEND_PORT") 2>/dev/null; then
|
| 385 |
+
ready=true
|
| 386 |
+
break
|
| 387 |
+
fi
|
| 388 |
+
if ! kill -0 "$FRONTEND_PID" 2>/dev/null; then
|
| 389 |
+
echo "Frontend process died. Last 30 log lines:"
|
| 390 |
+
echo "────────────────────────────────────────"
|
| 391 |
+
tail -30 "$DATA_DIR/logs/frontend.log" || true
|
| 392 |
+
exit 1
|
| 393 |
+
fi
|
| 394 |
+
sleep 1
|
| 395 |
+
done
|
| 396 |
+
if [ "$ready" != "true" ]; then
|
| 397 |
+
echo "Frontend failed to start within ${FRONTEND_READY_TIMEOUT}s. Last 30 log lines:"
|
| 398 |
+
tail -30 "$DATA_DIR/logs/frontend.log" || true
|
| 399 |
+
exit 1
|
| 400 |
+
fi
|
| 401 |
+
echo "Frontend ready."
|
| 402 |
+
echo ""
|
| 403 |
+
echo "HuggingDeer is up ✓ → http://localhost:$PUBLIC_PORT"
|
| 404 |
+
echo ""
|
| 405 |
+
|
| 406 |
+
# ── Periodic HF Dataset sync ──────────────────────────────────────
|
| 407 |
+
if [ -n "${HF_TOKEN:-}" ]; then
|
| 408 |
+
(
|
| 409 |
+
while true; do
|
| 410 |
+
sleep "$SYNC_INTERVAL"
|
| 411 |
+
python3 "$APP_DIR/deer-sync.py" sync-once 2>/dev/null || true
|
| 412 |
+
done
|
| 413 |
+
) &
|
| 414 |
+
fi
|
| 415 |
+
|
| 416 |
+
# ── Wait for backend (primary process) ───────────────────────────
|
| 417 |
+
wait "$BACKEND_PID"
|