File size: 5,858 Bytes
033ca06 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 | import os
import re
from pathlib import Path
# Virtual path prefix seen by agents inside the sandbox
VIRTUAL_PATH_PREFIX = "/mnt/user-data"
_SAFE_THREAD_ID_RE = re.compile(r"^[A-Za-z0-9_\-]+$")
class Paths:
"""
Centralized path configuration for DeerFlow application data.
Directory layout (host side):
{base_dir}/
βββ memory.json
βββ threads/
βββ {thread_id}/
βββ user-data/ <-- mounted as /mnt/user-data/ inside sandbox
βββ workspace/ <-- /mnt/user-data/workspace/
βββ uploads/ <-- /mnt/user-data/uploads/
βββ outputs/ <-- /mnt/user-data/outputs/
BaseDir resolution (in priority order):
1. Constructor argument `base_dir`
2. DEER_FLOW_HOME environment variable
3. Local dev fallback: cwd/.deer-flow (when cwd is the backend/ dir)
4. Default: $HOME/.deer-flow
"""
def __init__(self, base_dir: str | Path | None = None) -> None:
self._base_dir = Path(base_dir).resolve() if base_dir is not None else None
@property
def base_dir(self) -> Path:
"""Root directory for all application data."""
if self._base_dir is not None:
return self._base_dir
if env_home := os.getenv("DEER_FLOW_HOME"):
return Path(env_home).resolve()
cwd = Path.cwd()
if cwd.name == "backend" or (cwd / "pyproject.toml").exists():
return cwd / ".deer-flow"
return Path.home() / ".deer-flow"
@property
def memory_file(self) -> Path:
"""Path to the persisted memory file: `{base_dir}/memory.json`."""
return self.base_dir / "memory.json"
def thread_dir(self, thread_id: str) -> Path:
"""
Host path for a thread's data: `{base_dir}/threads/{thread_id}/`
This directory contains a `user-data/` subdirectory that is mounted
as `/mnt/user-data/` inside the sandbox.
Raises:
ValueError: If `thread_id` contains unsafe characters (path separators
or `..`) that could cause directory traversal.
"""
if not _SAFE_THREAD_ID_RE.match(thread_id):
raise ValueError(
f"Invalid thread_id {thread_id!r}: only alphanumeric characters, "
"hyphens, and underscores are allowed."
)
return self.base_dir / "threads" / thread_id
def sandbox_work_dir(self, thread_id: str) -> Path:
"""
Host path for the agent's workspace directory.
Host: `{base_dir}/threads/{thread_id}/user-data/workspace/`
Sandbox: `/mnt/user-data/workspace/`
"""
return self.thread_dir(thread_id) / "user-data" / "workspace"
def sandbox_uploads_dir(self, thread_id: str) -> Path:
"""
Host path for user-uploaded files.
Host: `{base_dir}/threads/{thread_id}/user-data/uploads/`
Sandbox: `/mnt/user-data/uploads/`
"""
return self.thread_dir(thread_id) / "user-data" / "uploads"
def sandbox_outputs_dir(self, thread_id: str) -> Path:
"""
Host path for agent-generated artifacts.
Host: `{base_dir}/threads/{thread_id}/user-data/outputs/`
Sandbox: `/mnt/user-data/outputs/`
"""
return self.thread_dir(thread_id) / "user-data" / "outputs"
def sandbox_user_data_dir(self, thread_id: str) -> Path:
"""
Host path for the user-data root.
Host: `{base_dir}/threads/{thread_id}/user-data/`
Sandbox: `/mnt/user-data/`
"""
return self.thread_dir(thread_id) / "user-data"
def ensure_thread_dirs(self, thread_id: str) -> None:
"""Create all standard sandbox directories for a thread."""
self.sandbox_work_dir(thread_id).mkdir(parents=True, exist_ok=True)
self.sandbox_uploads_dir(thread_id).mkdir(parents=True, exist_ok=True)
self.sandbox_outputs_dir(thread_id).mkdir(parents=True, exist_ok=True)
def resolve_virtual_path(self, thread_id: str, virtual_path: str) -> Path:
"""Resolve a sandbox virtual path to the actual host filesystem path.
Args:
thread_id: The thread ID.
virtual_path: Virtual path as seen inside the sandbox, e.g.
``/mnt/user-data/outputs/report.pdf``.
Leading slashes are stripped before matching.
Returns:
The resolved absolute host filesystem path.
Raises:
ValueError: If the path does not start with the expected virtual
prefix or a path-traversal attempt is detected.
"""
stripped = virtual_path.lstrip("/")
prefix = VIRTUAL_PATH_PREFIX.lstrip("/")
# Require an exact segment-boundary match to avoid prefix confusion
# (e.g. reject paths like "mnt/user-dataX/...").
if stripped != prefix and not stripped.startswith(prefix + "/"):
raise ValueError(f"Path must start with /{prefix}")
relative = stripped[len(prefix) :].lstrip("/")
base = self.sandbox_user_data_dir(thread_id).resolve()
actual = (base / relative).resolve()
try:
actual.relative_to(base)
except ValueError:
raise ValueError("Access denied: path traversal detected")
return actual
# ββ Singleton ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
_paths: Paths | None = None
def get_paths() -> Paths:
"""Return the global Paths singleton (lazy-initialized)."""
global _paths
if _paths is None:
_paths = Paths()
return _paths
|