diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000000000000000000000000000000000000..928b99da082aadddd1cc61601ef0107ac66051f2
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,23 @@
+.git
+.gitignore
+__pycache__/
+*.py[cod]
+.pytest_cache/
+.mypy_cache/
+.ruff_cache/
+.env
+.envrc
+.venv/
+venv/
+workspace/*
+!workspace/.gitkeep
+traces/*
+!traces/.gitkeep
+api_runs/*
+!api_runs/.gitkeep
+runtime/
+tests/
+.codex/
+.idea/
+.vscode/
+.DS_Store
diff --git a/.env.example b/.env.example
new file mode 100644
index 0000000000000000000000000000000000000000..416f22dfede2e1febd5e4ac46182af7d0ea6bc58
--- /dev/null
+++ b/.env.example
@@ -0,0 +1,29 @@
+# Required
+API_KEY="your_openai_compatible_key"                     # API key for your OpenAI-compatible LLM provider.
+API_BASE="https://your-openai-compatible-endpoint/v1"    # Base URL for the OpenAI-compatible chat-completions endpoint.
+MODEL_NAME="gpt-5.5"                                     # Main model used by the agent and WebFetch summarization.
+SERPER_KEY_ID="your_serper_key"                          # https://serper.dev/
+JINA_API_KEYS="your_jina_key"                            # https://jina.ai/
+MINERU_TOKEN="your_mineru_token"                         # https://mineru.net/
+
+# Optional
+WORKSPACE_ROOT="./workspace"                             # Default local workspace root when --workspace-root is not provided.
+MAX_LLM_CALL_PER_RUN=100                                 # Maximum chat-completions calls allowed in one agent run.
+MAX_AGENT_ROUNDS=100                                     # Maximum ReAct loop rounds before forced termination.
+MAX_AGENT_RUNTIME_SECONDS=9000                           # Maximum wall-clock runtime per agent run.
+LLM_TIMEOUT_SECONDS=600                                  # Timeout for each chat-completions request.
+LLM_MAX_OUTPUT_TOKENS=10000                              # Maximum output tokens requested from the main model.
+MAX_INPUT_TOKENS=320000                                  # Maximum input-token budget used for runtime token accounting.
+LLM_MAX_RETRIES=10                                       # Maximum retries for transient LLM API failures.
+TEMPERATURE=0.6                                          # Main model sampling temperature.
+TOP_P=0.95                                               # Main model nucleus-sampling top_p.
+PRESENCE_PENALTY=1.1                                     # Main model presence penalty when supported by the provider.
+AUTO_COMPACT_TRIGGER_TOKENS="128k"                       # Context size threshold that triggers automatic memory compaction.
+IMAGE_PART_TOKEN_ESTIMATE=1536                           # Token estimate used for each runtime image_url content part.
+LLM_IMAGE_MAX_EDGE=1568                                  # Maximum image edge length sent to multimodal LLMs.
+LLM_IMAGE_MAX_BYTES=524288                               # Maximum compressed image payload size sent to multimodal LLMs.
+LLM_IMAGE_JPEG_QUALITY=85                                # Initial JPEG quality for runtime image compression.
+DEBUG_AGENT=false                                        # Print verbose agent-loop debug logs.
+DEBUG_SEARCH=false                                       # Print verbose WebSearch debug logs.
+DEBUG_SCHOLAR=false                                      # Print verbose ScholarSearch debug logs.
+DEBUG_VISIT=false                                        # Print verbose WebFetch debug logs.
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..0dfbde04c3e70f7cff99b23b752175f63f413854
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,230 @@
+runtime/
+# Local agent artifacts
+AGENTS.md
+workspace/*
+!workspace/.gitkeep
+api_runs/*
+!api_runs/.gitkeep
+traces/*
+!traces/.gitkeep
+/inputs/
+data/
+benchmarks/**/local_*.py
+.idea/
+.vscode/
+.DS_Store
+tests/example_files/pdfs/dummy_document
+.codex
+
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[codz]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py.cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+#poetry.toml
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#   pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
+#   https://pdm-project.org/en/latest/usage/project/#working-with-version-control
+#pdm.lock
+#pdm.toml
+.pdm-python
+.pdm-build/
+
+# pixi
+#   Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
+#pixi.lock
+#   Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
+#   in the .venv directory. It is recommended not to include this directory in version control.
+.pixi
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.envrc
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
+# Abstra
+# Abstra is an AI-powered process automation framework.
+# Ignore directories containing user credentials, local state, and settings.
+# Learn more at https://abstra.io/docs
+.abstra/
+
+# Visual Studio Code
+#  Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore 
+#  that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
+#  and can be added to the global gitignore or merged into this file. However, if you prefer, 
+#  you could uncomment the following to ignore the entire vscode folder
+# .vscode/
+
+# Ruff stuff:
+.ruff_cache/
+
+# PyPI configuration file
+.pypirc
+
+# Cursor
+#  Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
+#  exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
+#  refer to https://docs.cursor.com/context/ignore-files
+.cursorignore
+.cursorindexingignore
+
+# Marimo
+marimo/_static/
+marimo/_lsp/
+__marimo__/
+
+# Hugging Face Space runtime artifacts
+runtime/
+/tmp/
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000000000000000000000000000000000000..b33a6e138bc9545eeb7a3dc9b91cc4b7f25b53ec
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,24 @@
+FROM python:3.11-slim
+
+WORKDIR /app
+
+ENV PYTHONUNBUFFERED=1 \
+    PORT=7860 \
+    RH_SPACE_RUNS_DIR=/tmp/researchharness_space/runs
+
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends \
+        bash \
+        ca-certificates \
+        curl \
+        git \
+        poppler-utils \
+    && rm -rf /var/lib/apt/lists/*
+
+COPY requirements.txt ./
+RUN pip install --no-cache-dir -r requirements.txt
+
+COPY . .
+
+EXPOSE 7860
+CMD ["python", "app.py"]
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000000000000000000000000000000000000..daf5f8c17d61d84bda60e70b08250270fdd00eb3
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2026 Wanghan Xu
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
index e232e376e6b6e5f429880f620536d98b15b35c1f..2513c2e9681fc31c52ab6e35e93a9bb140aa2868 100644
--- a/README.md
+++ b/README.md
@@ -1,12 +1,66 @@
 ---
 title: ResearchHarness
-emoji: 🌖
-colorFrom: yellow
-colorTo: gray
+emoji: 🚀
+colorFrom: blue
+colorTo: yellow
 sdk: docker
+app_port: 7860
 pinned: false
 license: mit
-short_description: A lightweight, general-purpose harness for tool-using LLM ag
+short_description: Lightweight harness for tool-using LLM agents.
 ---
 
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+# ResearchHarness Space
+
+This Space runs the ResearchHarness browser frontend as a lightweight hosted agent UI.
+It reuses the ResearchHarness tool-calling runtime and keeps the hosted mode intentionally simple:
+
+- Users do not choose a local workspace.
+- Each new chat gets an isolated temporary runtime directory.
+- Uploaded images are saved under that chat workspace and also passed to the model when supported.
+- Agent traces and session state are stored beside the temporary workspace.
+- Old workspaces and traces are cleaned periodically so the Space does not grow without bound.
+
+## Required Secrets
+
+Configure these as Hugging Face Space secrets before starting the app:
+
+| Secret | Purpose |
+| --- | --- |
+| `API_KEY` | API key for your OpenAI-compatible LLM provider. |
+| `API_BASE` | OpenAI-compatible `/v1` endpoint. |
+| `MODEL_NAME` | Main model used by ResearchHarness. |
+| `SERPER_KEY_ID` | WebSearch / ScholarSearch key from <https://serper.dev/>. |
+| `JINA_API_KEYS` | WebFetch key from <https://jina.ai/>. |
+| `MINERU_TOKEN` | ReadPDF key from <https://mineru.net/>. |
+
+## Optional Runtime Variables
+
+| Variable | Default | Meaning |
+| --- | --- | --- |
+| `RH_SPACE_RUNS_DIR` | `/tmp/researchharness_space/runs` | Parent directory for temporary per-chat runs. |
+| `RH_SPACE_RETENTION_SECONDS` | `21600` | Delete inactive runs older than this many seconds. |
+| `RH_SPACE_MAX_RUNS` | `40` | Keep at most this many inactive runs. |
+| `RH_SPACE_CLEANUP_INTERVAL_SECONDS` | `900` | Background cleanup interval. |
+| `RH_ROLE_PROMPT_FILES` | empty | Optional `os.pathsep`-separated role prompt files inside the Space image. |
+| `PORT` | `7860` | Port used by Hugging Face Docker Spaces. |
+
+## Runtime Layout
+
+```text
+/tmp/researchharness_space/runs/
+└── run_YYYYMMDD_HHMMSS_<random>/
+    ├── agent_workspace/
+    │   └── inputs/images/        # user uploaded images, when present
+    └── agent_trace/              # trace JSONL and _session_state.json
+```
+
+The frontend only exposes the chat UI. The workspace path is managed by the server so hosted users cannot browse or select server folders.
+
+## Local Smoke Test
+
+```bash
+python app.py
+```
+
+Then open `http://127.0.0.1:7860`.
diff --git a/VERSION b/VERSION
new file mode 100644
index 0000000000000000000000000000000000000000..e9dfc3decf84a36222b2ddb29cf688350ff4cd63
--- /dev/null
+++ b/VERSION
@@ -0,0 +1 @@
+v0.0.35
diff --git a/agent_base/__init__.py b/agent_base/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..1c2221aead94646b08600294b71ff2927f94acf8
--- /dev/null
+++ b/agent_base/__init__.py
@@ -0,0 +1,5 @@
+"""Lightweight trusted-local harness for tool-using research agents."""
+
+from agent_base.base import BaseAgent, agent_role
+
+__all__ = ["BaseAgent", "agent_role"]
diff --git a/agent_base/base.py b/agent_base/base.py
new file mode 100644
index 0000000000000000000000000000000000000000..1ae38be42e1baef20ad6de8ead48a8c941c6e94c
--- /dev/null
+++ b/agent_base/base.py
@@ -0,0 +1,131 @@
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from typing import Any, Iterable, Optional, Sequence
+
+
+def _normalize_function_list(function_list: Optional[Iterable[str]]) -> Optional[list[str]]:
+    if function_list is None:
+        return None
+    normalized: list[str] = []
+    for raw_name in function_list:
+        name = str(raw_name).strip()
+        if name:
+            normalized.append(name)
+    return normalized
+
+
+def agent_role(
+    *,
+    name: str,
+    role_prompt: str = "",
+    function_list: Optional[Iterable[str]] = None,
+):
+    """
+    Class decorator used by upper-layer frameworks to declare agent defaults.
+
+    This keeps the lower-layer execution loop generic while allowing subclasses
+    to provide role-specific prompt addenda and tool restrictions declaratively.
+    """
+
+    def decorator(cls):
+        cls.role_name = str(name).strip() or cls.__name__
+        cls.default_role_prompt = str(role_prompt).strip()
+        cls.default_function_list = _normalize_function_list(function_list)
+        return cls
+
+    return decorator
+
+
+class BaseAgent(ABC):
+    """Abstract base class for agents built on top of ResearchHarness."""
+
+    role_name: str = "agent"
+    default_role_prompt: str = ""
+    default_function_list: Optional[list[str]] = None
+
+    @classmethod
+    def resolve_function_list(cls, function_list: Optional[Sequence[str]]) -> Optional[list[str]]:
+        if function_list is not None:
+            return _normalize_function_list(function_list) or []
+        default_tools = getattr(cls, "default_function_list", None)
+        if default_tools is None:
+            return None
+        return list(default_tools)
+
+    @classmethod
+    def resolve_role_prompt(cls, role_prompt: Optional[str]) -> str:
+        if role_prompt is None:
+            role_prompt = getattr(cls, "default_role_prompt", "")
+        return str(role_prompt or "").strip()
+
+    def should_accept_plaintext_result(
+        self,
+        *,
+        result_text: str,
+        workspace_root: Optional[str],
+        messages: Sequence[dict[str, Any]],
+    ) -> bool:
+        """
+        Decide whether a plain assistant text reply with no tool calls is terminal.
+
+        The default behavior preserves the original ResearchHarness semantics:
+        any meaningful assistant text without tool calls is accepted as the final
+        result. Upper layers may override this hook to require extra completion
+        artifacts before termination.
+        """
+
+        return True
+
+    def rejected_plaintext_result_message(
+        self,
+        *,
+        result_text: str,
+        workspace_root: Optional[str],
+        messages: Sequence[dict[str, Any]],
+    ) -> str:
+        """
+        Explain why a plain assistant text reply was not accepted as terminal.
+
+        Returning an empty string falls back to the generic runtime message.
+        """
+
+        return ""
+
+    def should_accept_terminal_error(
+        self,
+        *,
+        error_text: str,
+        workspace_root: Optional[str],
+        messages: Sequence[dict[str, Any]],
+    ) -> bool:
+        """
+        Decide whether a terminal LLM/runtime error can still be accepted.
+
+        The default behavior is conservative: terminal errors are not accepted.
+        Upper layers may override this hook when benchmark-specific completion
+        artifacts are already present and the remaining assistant text is not
+        semantically important.
+        """
+
+        return False
+
+    def accepted_terminal_error_result_text(
+        self,
+        *,
+        error_text: str,
+        workspace_root: Optional[str],
+        messages: Sequence[dict[str, Any]],
+    ) -> str:
+        """
+        Provide a synthetic terminal result when a terminal error is accepted.
+
+        Returning an empty string falls back to a generic runtime completion
+        message.
+        """
+
+        return ""
+
+    @abstractmethod
+    def run(self, prompt: str, workspace_root: Optional[str] = None):
+        raise NotImplementedError
diff --git a/agent_base/console_utils.py b/agent_base/console_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..1afc6a4c1082e35f8c561690b32fb3a53de42ed3
--- /dev/null
+++ b/agent_base/console_utils.py
@@ -0,0 +1,223 @@
+import argparse
+import json
+import os
+from pathlib import Path
+import shutil
+import sys
+import unicodedata
+from typing import Any, Optional
+
+
+ANSI_RESET = "\033[0m"
+ANSI_COLORS = {
+    "header": "\033[36m",
+    "assistant": "\033[32m",
+    "tool": "\033[33m",
+    "runtime": "\033[34m",
+    "user": "\033[35m",
+    "error": "\033[31m",
+}
+
+
+def _char_display_width(char: str) -> int:
+    if unicodedata.combining(char):
+        return 0
+    if unicodedata.category(char) in {"Cc", "Cf"}:
+        return 0
+    return 2 if unicodedata.east_asian_width(char) in {"F", "W"} else 1
+
+
+def _display_width(text: str) -> int:
+    return sum(_char_display_width(char) for char in str(text))
+
+
+def _truncate_display(text: str, width: int) -> str:
+    if _display_width(text) <= width:
+        return text
+    suffix = "..."
+    target = max(0, width - _display_width(suffix))
+    out = []
+    used = 0
+    for char in text:
+        char_width = _char_display_width(char)
+        if used + char_width > target:
+            break
+        out.append(char)
+        used += char_width
+    return "".join(out) + suffix
+
+
+def _pad_display(text: str, width: int) -> str:
+    return text + " " * max(0, width - _display_width(text))
+
+
+def _last_soft_break(chars: list[str]) -> int:
+    for index in range(len(chars) - 1, 0, -1):
+        if chars[index].isspace() and "".join(chars[:index]).strip():
+            return index
+    return -1
+
+
+class ConsoleEventPrinter:
+    def __init__(self, *, model_name: str, workspace_root: Path, prompt: str):
+        self.model_name = model_name
+        self.workspace_root = workspace_root
+        self.prompt = prompt.strip()
+        self._printed_any = False
+        self._use_color = (
+            "NO_COLOR" not in os.environ
+            and os.environ.get("TERM") != "dumb"
+            and (sys.stdout.isatty() or bool(os.environ.get("FORCE_COLOR") or os.environ.get("CLICOLOR_FORCE")))
+        )
+
+    def print_header(self) -> None:
+        self._print_box(
+            "ResearchHarness CLI",
+            f"Model: {self.model_name}\nWorkspace Root: {self.workspace_root}\n\nPrompt:\n{self.prompt}",
+            "header",
+        )
+
+    def reset_rounds(self) -> None:
+        self._printed_any = False
+
+    def _paint(self, text: str, color_key: str) -> str:
+        if not self._use_color:
+            return text
+        return f"{ANSI_COLORS.get(color_key, '')}{text}{ANSI_RESET}"
+
+    def _terminal_width(self) -> int:
+        return max(60, min(110, shutil.get_terminal_size((100, 20)).columns))
+
+    def _wrap_line(self, line: str, width: int) -> list[str]:
+        expanded = line.expandtabs(2)
+        if expanded == "":
+            return [""]
+        chunks: list[str] = []
+        current: list[str] = []
+        current_width = 0
+        for char in expanded:
+            char_width = _char_display_width(char)
+            if current and current_width + char_width > width:
+                break_at = _last_soft_break(current)
+                if break_at > 0:
+                    chunks.append("".join(current[:break_at]).rstrip())
+                    current = list("".join(current[break_at + 1 :]).lstrip())
+                    current_width = _display_width("".join(current))
+                else:
+                    chunks.append("".join(current))
+                    current = []
+                    current_width = 0
+            current.append(char)
+            current_width += char_width
+        if current:
+            chunks.append("".join(current))
+        return chunks or [""]
+
+    def _print_box(self, title: str, body: str, color_key: str = "runtime") -> None:
+        width = self._terminal_width()
+        inner_width = width - 4
+        title_text = f" {_truncate_display(title.strip(), width - 6)} "
+        top = "+" + title_text + "-" * max(0, width - 2 - _display_width(title_text)) + "+"
+        bottom = "+" + "-" * (width - 2) + "+"
+        if self._printed_any:
+            print()
+        print(self._paint(top, color_key))
+        for raw_line in str(body or "").splitlines() or [""]:
+            for line in self._wrap_line(raw_line, inner_width):
+                padded = _pad_display(line, inner_width)
+                print(f"{self._paint('|', color_key)} {padded} {self._paint('|', color_key)}")
+        print(self._paint(bottom, color_key))
+        self._printed_any = True
+
+    def _title(self, label: str, turn_index: int) -> str:
+        return f"{label} | round {turn_index}" if turn_index > 0 else label
+
+    def _format_tool_call(self, tool_name: str, tool_args: Any) -> str:
+        try:
+            tool_args_text = json.dumps(tool_args, ensure_ascii=False, indent=2)
+        except TypeError:
+            tool_args_text = str(tool_args)
+        return f"- {tool_name}\n{tool_args_text}"
+
+    def handle_event(self, row: dict[str, Any]) -> None:
+        role = str(row.get("role", ""))
+        turn_index = int(row.get("turn_index", 0) or 0)
+        text = str(row.get("text", ""))
+        capture_type = str(row.get("capture_type", ""))
+        tool_names = row.get("tool_names") if isinstance(row.get("tool_names"), list) else []
+        tool_arguments = row.get("tool_arguments") if isinstance(row.get("tool_arguments"), list) else []
+        finish_reason = str(row.get("finish_reason", ""))
+        error = str(row.get("error", ""))
+
+        if capture_type and not text.strip():
+            return
+
+        if role == "system":
+            return
+
+        if role == "user":
+            if turn_index == 0:
+                return
+            self._print_box(self._title("Runtime Message", turn_index), text, "user")
+            return
+
+        if role == "assistant":
+            lines: list[str] = []
+            if tool_names:
+                if text.strip():
+                    lines.append(text)
+                else:
+                    suffix = f" finish_reason={finish_reason}" if finish_reason else ""
+                    lines.append(f"(no text; native tool-calls only.{suffix})")
+                lines.append("")
+                lines.append("Assistant Tool Calls:")
+                for idx, tool_name in enumerate(tool_names):
+                    tool_args = tool_arguments[idx] if idx < len(tool_arguments) else {}
+                    lines.append(self._format_tool_call(str(tool_name), tool_args))
+            elif text.strip():
+                lines.append(text)
+            else:
+                suffix = f" finish_reason={finish_reason}" if finish_reason else ""
+                lines.append(f"(empty assistant output.{suffix})")
+            if error:
+                lines.append("")
+                lines.append(f"Assistant Error: {error}")
+            self._print_box(self._title("Assistant", turn_index), "\n".join(lines), "error" if error else "assistant")
+            return
+
+        if role == "tool":
+            tool_name = str(tool_names[0]) if tool_names else "Tool"
+            lines = [text]
+            if error:
+                lines.extend(["", f"{tool_name} Error: {error}"])
+            self._print_box(self._title(f"{tool_name} Result", turn_index), "\n".join(lines), "error" if error else "tool")
+            return
+
+        if role == "runtime":
+            lines = [text]
+            if error:
+                lines.extend(["", f"Runtime Error: {error}"])
+            self._print_box(self._title("Runtime", turn_index), "\n".join(lines), "error" if error else "runtime")
+
+
+def main(argv: Optional[list[str]] = None) -> int:
+    parser = argparse.ArgumentParser(description="Show a minimal example of the CLI console event formatter.")
+    parser.parse_args(argv)
+    printer = ConsoleEventPrinter(model_name="demo-model", workspace_root=Path("."), prompt="demo question")
+    printer.print_header()
+    printer.handle_event(
+        {
+            "role": "assistant",
+            "turn_index": 1,
+            "text": "",
+            "tool_names": ["Read"],
+            "tool_arguments": [{"path": "demo.txt"}],
+            "termination": "",
+            "error": "",
+        }
+    )
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/agent_base/context_compact.py b/agent_base/context_compact.py
new file mode 100644
index 0000000000000000000000000000000000000000..5dae404f8dd8079fbcafa578523867b75400c6c1
--- /dev/null
+++ b/agent_base/context_compact.py
@@ -0,0 +1,326 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Any, Callable, Optional, Sequence
+
+from agent_base.model_profiles import ModelProfile
+from agent_base.utils import safe_jsonable
+
+
+COMPACT_MEMORY_PREFIX = (
+    "Runtime memory summary from earlier turns.\n"
+    "This is compressed context, not ground truth.\n"
+    "The workspace files remain authoritative; re-read any file if exact details matter.\n\n"
+)
+
+
+@dataclass
+class CompactionOutcome:
+    status: str
+    compacted_messages: list[dict[str, Any]]
+    summary_text: str = ""
+    error: str = ""
+    trigger_reason: str = ""
+    prior_token_estimate: int = 0
+    new_token_estimate: int = 0
+    compacted_group_count: int = 0
+    kept_group_count: int = 0
+    existing_memory_text: str = ""
+    summary_request: list[dict[str, Any]] | None = None
+    summary_response: dict[str, Any] | None = None
+    pre_messages: list[dict[str, Any]] | None = None
+    post_messages: list[dict[str, Any]] | None = None
+
+
+def should_compact_messages(
+    *,
+    last_input_tokens: Optional[int],
+    current_token_estimate: int,
+    model_profile: ModelProfile,
+) -> tuple[bool, str]:
+    usage_hit = last_input_tokens is not None and int(last_input_tokens) >= model_profile.compact_trigger_tokens
+    estimate_hit = current_token_estimate >= model_profile.compact_trigger_tokens
+    if usage_hit and estimate_hit:
+        return True, "usage+estimate"
+    if usage_hit:
+        return True, "usage"
+    if estimate_hit:
+        return True, "estimate"
+    return False, ""
+
+
+def compact_messages(
+    *,
+    messages: Sequence[dict[str, Any]],
+    original_prompt_text: str,
+    model_name: str,
+    model_profile: ModelProfile,
+    llm_caller: Callable[..., dict[str, Any]],
+    token_counter: Callable[[Sequence[dict[str, Any]]], int],
+    runtime_deadline: Optional[float] = None,
+) -> CompactionOutcome:
+    safe_messages = [dict(message) for message in messages]
+    if len(safe_messages) <= 2:
+        return CompactionOutcome(
+            status="error",
+            compacted_messages=safe_messages,
+            pre_messages=safe_messages,
+            post_messages=safe_messages,
+            error="context compaction requires at least one conversational turn beyond the initial prompt",
+        )
+
+    prior_token_estimate = token_counter(safe_messages)
+    existing_memory_text, eligible_messages = _split_existing_memory_messages(safe_messages[2:])
+    turn_groups = _turn_groups(eligible_messages)
+    if not turn_groups:
+        return CompactionOutcome(
+            status="error",
+            compacted_messages=safe_messages,
+            prior_token_estimate=prior_token_estimate,
+            existing_memory_text=existing_memory_text,
+            pre_messages=safe_messages,
+            post_messages=safe_messages,
+            error="context compaction found no eligible conversational turns",
+        )
+
+    compacted_groups, recent_groups = _split_turn_groups(turn_groups, model_profile)
+    if not compacted_groups:
+        return CompactionOutcome(
+            status="error",
+            compacted_messages=safe_messages,
+            prior_token_estimate=prior_token_estimate,
+            existing_memory_text=existing_memory_text,
+            pre_messages=safe_messages,
+            post_messages=safe_messages,
+            error="context compaction did not find any older turns to summarize",
+        )
+
+    history_text = _render_history_text(compacted_groups, model_profile)
+    prior_memory_block = ""
+    if existing_memory_text:
+        prior_memory_block = (
+            "Previously compressed memory to preserve and refine:\n"
+            f"{_truncate_summary_text(existing_memory_text, max_chars=max(1200, model_profile.context_window // 3))}\n\n"
+        )
+    summary_request = [
+        {
+            "role": "system",
+            "content": (
+                "You compress older tool-using agent history into short working memory for continued execution. "
+                "Return plain text only. Do not call tools. Do not invent facts."
+            ),
+        },
+        {
+            "role": "user",
+            "content": (
+                "Summarize the earlier conversation history for a tool-using agent.\n\n"
+                f"Original task:\n{original_prompt_text}\n\n"
+                "Write a concise working memory with these sections:\n"
+                "- Goal\n"
+                "- Constraints\n"
+                "- Files and artifacts\n"
+                "- Evidence and results\n"
+                "- Open issues\n"
+                "- Next useful actions\n\n"
+                "Rules:\n"
+                "- Prefer concrete file paths, numeric results, and grounded facts.\n"
+                "- Mention uncertainty when details may need to be re-read from files.\n"
+                "- Merge any prior compressed memory with the newer history below into one refreshed memory.\n"
+                "- Deduplicate repeated sections and do not repeat earlier summaries verbatim.\n"
+                "- The workspace remains authoritative.\n\n"
+                f"{prior_memory_block}"
+                f"Older history to compress:\n{history_text}"
+            ),
+        },
+    ]
+    summary_reply = llm_caller(
+        summary_request,
+        runtime_deadline=runtime_deadline,
+        max_output_tokens=model_profile.compact_summary_max_tokens,
+    )
+    if not isinstance(summary_reply, dict) or summary_reply.get("status") != "ok":
+        error = summary_reply.get("error", "context compaction summary call failed") if isinstance(summary_reply, dict) else str(summary_reply)
+        return CompactionOutcome(
+            status="error",
+            compacted_messages=safe_messages,
+            prior_token_estimate=prior_token_estimate,
+            existing_memory_text=existing_memory_text,
+            summary_request=summary_request,
+            summary_response=safe_jsonable(summary_reply) if isinstance(summary_reply, dict) else {"status": "error", "error": error},
+            pre_messages=safe_messages,
+            post_messages=safe_messages,
+            error=error,
+            compacted_group_count=len(compacted_groups),
+            kept_group_count=len(recent_groups),
+        )
+
+    if summary_reply.get("tool_calls"):
+        return CompactionOutcome(
+            status="error",
+            compacted_messages=safe_messages,
+            prior_token_estimate=prior_token_estimate,
+            existing_memory_text=existing_memory_text,
+            summary_request=summary_request,
+            summary_response=safe_jsonable(summary_reply),
+            pre_messages=safe_messages,
+            post_messages=safe_messages,
+            compacted_group_count=len(compacted_groups),
+            kept_group_count=len(recent_groups),
+            error="context compaction summary call returned tool calls",
+        )
+
+    summary_text = str(summary_reply.get("content", "") or "").strip()
+    if not summary_text:
+        return CompactionOutcome(
+            status="error",
+            compacted_messages=safe_messages,
+            prior_token_estimate=prior_token_estimate,
+            existing_memory_text=existing_memory_text,
+            summary_request=summary_request,
+            summary_response=safe_jsonable(summary_reply),
+            pre_messages=safe_messages,
+            post_messages=safe_messages,
+            compacted_group_count=len(compacted_groups),
+            kept_group_count=len(recent_groups),
+            error="context compaction summary call returned empty text",
+        )
+
+    summary_message = {"role": "user", "content": COMPACT_MEMORY_PREFIX + summary_text}
+    compacted_messages = safe_messages[:2] + [summary_message]
+    for group in recent_groups:
+        compacted_messages.extend(group)
+    new_token_estimate = token_counter(compacted_messages)
+    return CompactionOutcome(
+        status="ok",
+        compacted_messages=compacted_messages,
+        summary_text=summary_text,
+        prior_token_estimate=prior_token_estimate,
+        new_token_estimate=new_token_estimate,
+        compacted_group_count=len(compacted_groups),
+        kept_group_count=len(recent_groups),
+        existing_memory_text=existing_memory_text,
+        summary_request=summary_request,
+        summary_response=safe_jsonable(summary_reply),
+        pre_messages=safe_messages,
+        post_messages=compacted_messages,
+    )
+
+
+def _turn_groups(messages: Sequence[dict[str, Any]]) -> list[list[dict[str, Any]]]:
+    groups: list[list[dict[str, Any]]] = []
+    current_group: list[dict[str, Any]] = []
+    for message in messages:
+        role = str(message.get("role", ""))
+        if role == "assistant" and current_group:
+            groups.append(current_group)
+            current_group = [message]
+            continue
+        current_group.append(message)
+    if current_group:
+        groups.append(current_group)
+    return groups
+
+
+def _split_existing_memory_messages(messages: Sequence[dict[str, Any]]) -> tuple[str, list[dict[str, Any]]]:
+    existing_summaries: list[str] = []
+    remaining_messages: list[dict[str, Any]] = []
+    preserving_summary_prefix = True
+    for message in messages:
+        content = message.get("content", "")
+        if (
+            preserving_summary_prefix
+            and str(message.get("role", "")) == "user"
+            and isinstance(content, str)
+            and content.startswith(COMPACT_MEMORY_PREFIX)
+        ):
+            existing_summaries.append(content[len(COMPACT_MEMORY_PREFIX) :].strip())
+            continue
+        preserving_summary_prefix = False
+        remaining_messages.append(dict(message))
+    merged_summary = "\n\n".join(summary for summary in existing_summaries if summary).strip()
+    return merged_summary, remaining_messages
+
+
+def _split_turn_groups(turn_groups: Sequence[Sequence[dict[str, Any]]], model_profile: ModelProfile) -> tuple[list[list[dict[str, Any]]], list[list[dict[str, Any]]]]:
+    recent_char_budget = max(400, model_profile.recent_history_budget_tokens * 4)
+    recent_groups: list[list[dict[str, Any]]] = []
+    recent_chars = 0
+
+    for group in reversed(turn_groups):
+        rendered = _render_group(group, max_chars_per_message=240)
+        if recent_groups and recent_chars >= recent_char_budget:
+            break
+        recent_groups.insert(0, [dict(message) for message in group])
+        recent_chars += len(rendered)
+        if len(recent_groups) >= 4:
+            break
+
+    if len(recent_groups) >= len(turn_groups):
+        recent_groups = recent_groups[1:]
+    compacted_count = max(0, len(turn_groups) - len(recent_groups))
+    compacted_groups = [[dict(message) for message in group] for group in turn_groups[:compacted_count]]
+    return compacted_groups, recent_groups
+
+
+def _render_history_text(turn_groups: Sequence[Sequence[dict[str, Any]]], model_profile: ModelProfile) -> str:
+    max_history_chars = max(600, min(64000, model_profile.context_window * 2))
+    max_chars_per_message = max(200, min(4000, max_history_chars // 10))
+    parts: list[str] = []
+    used = 0
+    for index, group in enumerate(turn_groups, start=1):
+        rendered = f"[Turn group {index}]\n{_render_group(group, max_chars_per_message=max_chars_per_message)}"
+        if parts and used + len(rendered) > max_history_chars:
+            remaining = max_history_chars - used
+            if remaining > 80:
+                parts.append(rendered[: remaining - 40].rstrip() + "\n...[history truncated]")
+            break
+        parts.append(rendered)
+        used += len(rendered)
+    return "\n\n".join(parts).strip()
+
+
+def _render_group(group: Sequence[dict[str, Any]], *, max_chars_per_message: int) -> str:
+    lines: list[str] = []
+    for message in group:
+        role = str(message.get("role", ""))
+        content = _message_excerpt(message, max_chars=max_chars_per_message)
+        lines.append(f"{role}: {content}")
+    return "\n".join(lines).strip()
+
+
+def _message_excerpt(message: dict[str, Any], *, max_chars: int) -> str:
+    content = message.get("content", "")
+    text: str
+    if isinstance(content, str):
+        text = content
+    elif isinstance(content, list):
+        parts: list[str] = []
+        for part in content:
+            if isinstance(part, dict) and part.get("type") == "text":
+                parts.append(str(part.get("text", "")))
+            elif isinstance(part, dict) and part.get("type") == "image_url":
+                parts.append("[image_url]")
+            else:
+                parts.append(str(part))
+        text = " ".join(part for part in parts if part)
+    else:
+        text = str(content)
+    tool_calls = message.get("tool_calls")
+    if tool_calls:
+        tool_names = []
+        for tool_call in tool_calls:
+            function_block = tool_call.get("function", {}) if isinstance(tool_call, dict) else {}
+            tool_names.append(str(function_block.get("name", "")))
+        if tool_names:
+            text = (text + "\nTool calls: " + ", ".join(name for name in tool_names if name)).strip()
+    compacted = " ".join(text.split())
+    if len(compacted) <= max_chars:
+        return compacted
+    return compacted[: max_chars - 16].rstrip() + "...[truncated]"
+
+
+def _truncate_summary_text(text: str, *, max_chars: int) -> str:
+    compacted = " ".join(str(text).split())
+    if len(compacted) <= max_chars:
+        return compacted
+    return compacted[: max_chars - 16].rstrip() + "...[truncated]"
diff --git a/agent_base/model_profiles.py b/agent_base/model_profiles.py
new file mode 100644
index 0000000000000000000000000000000000000000..af48572f45282f28be0f756f72ae0e0fc1353954
--- /dev/null
+++ b/agent_base/model_profiles.py
@@ -0,0 +1,92 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Any, Optional
+
+
+@dataclass(frozen=True)
+class ModelProfile:
+    family: str
+    context_window: int
+    output_reserve_tokens: int
+    compact_buffer_tokens: int
+    recent_history_budget_tokens: int
+    compact_summary_max_tokens: int
+    compact_trigger_tokens_override: Optional[int] = None
+
+    @property
+    def compact_trigger_tokens(self) -> int:
+        if self.compact_trigger_tokens_override is not None:
+            return self.compact_trigger_tokens_override
+        return max(256, self.context_window - self.output_reserve_tokens - self.compact_buffer_tokens)
+
+
+def _model_family(model_name: str) -> str:
+    normalized = str(model_name or "").strip().casefold()
+    if "gemini" in normalized:
+        return "gemini"
+    if "claude" in normalized:
+        return "claude"
+    if "deepseek" in normalized:
+        return "deepseek"
+    if "qwen" in normalized:
+        return "qwen"
+    if "glm" in normalized:
+        return "glm"
+    if "gpt" in normalized or "o1" in normalized or "o3" in normalized or "o4" in normalized:
+        return "gpt"
+    return "generic"
+
+
+def resolve_model_profile(
+    model_name: str,
+    *,
+    configured_max_input_tokens: int,
+    configured_max_output_tokens: int,
+    compact_trigger_tokens: Any = None,
+) -> ModelProfile:
+    context_window = max(1024, int(configured_max_input_tokens))
+    output_reserve_tokens = max(128, min(int(configured_max_output_tokens), max(256, context_window // 12)))
+    compact_buffer_tokens = max(64, min(4096, context_window // 20))
+    recent_history_budget_tokens = max(128, min(16384, context_window // 8))
+    compact_summary_max_tokens = max(256, min(2048, context_window // 16))
+    compact_trigger_override = parse_compact_trigger_tokens(compact_trigger_tokens, context_window=context_window)
+
+    family = _model_family(model_name)
+    if family in {"claude", "deepseek", "gemini"}:
+        compact_buffer_tokens = max(compact_buffer_tokens, 1024)
+        recent_history_budget_tokens = max(recent_history_budget_tokens, 1024)
+
+    return ModelProfile(
+        family=family,
+        context_window=context_window,
+        output_reserve_tokens=output_reserve_tokens,
+        compact_buffer_tokens=compact_buffer_tokens,
+        recent_history_budget_tokens=recent_history_budget_tokens,
+        compact_summary_max_tokens=compact_summary_max_tokens,
+        compact_trigger_tokens_override=compact_trigger_override,
+    )
+
+
+def parse_compact_trigger_tokens(value: Any, *, context_window: int) -> Optional[int]:
+    if value is None:
+        return None
+    if isinstance(value, bool):
+        raise ValueError("compact trigger tokens must not be a boolean.")
+    if isinstance(value, int):
+        parsed = value
+    else:
+        text = str(value).strip().casefold()
+        if not text:
+            return None
+        multiplier = 1
+        if text.endswith("k"):
+            multiplier = 1024
+            text = text[:-1].strip()
+        elif text.endswith("m"):
+            multiplier = 1024 * 1024
+            text = text[:-1].strip()
+        text = text.replace("_", "").replace(",", "")
+        parsed = int(text) * multiplier
+    parsed = max(256, parsed)
+    return min(parsed, max(256, int(context_window)))
diff --git a/agent_base/prompt.py b/agent_base/prompt.py
new file mode 100644
index 0000000000000000000000000000000000000000..b72276cde991e8d14b070ac43f6aa7311dc8011a
--- /dev/null
+++ b/agent_base/prompt.py
@@ -0,0 +1,106 @@
+import argparse
+import sys
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Iterable
+
+
+PROMPTS_DIR = Path(__file__).resolve().parent / "prompts"
+
+
+@dataclass(frozen=True)
+class PromptAsset:
+    name: str
+    path: Path
+    description: str
+
+
+PROMPT_ASSETS = {
+    "system_base": PromptAsset(
+        name="system_base",
+        path=PROMPTS_DIR / "system_base.md",
+        description="Base general-purpose system prompt for the harness.",
+    ),
+    "extractor": PromptAsset(
+        name="extractor",
+        path=PROMPTS_DIR / "extractor.md",
+        description="Goal-directed webpage extraction prompt used by WebFetch.",
+    ),
+}
+
+
+def _read_prompt_asset(asset: PromptAsset) -> str:
+    return asset.path.read_text(encoding="utf-8").strip()
+
+
+SYSTEM_PROMPT = _read_prompt_asset(PROMPT_ASSETS["system_base"])
+EXTRACTOR_PROMPT = _read_prompt_asset(PROMPT_ASSETS["extractor"])
+
+
+def _normalize_extra_blocks(blocks: Iterable[str] | None) -> list[str]:
+    normalized: list[str] = []
+    for raw_block in blocks or []:
+        block = str(raw_block or "").strip()
+        if block:
+            normalized.append(block)
+    return normalized
+
+
+def composed_system_prompt(*, current_date: str, extra_blocks: Iterable[str] | None = None) -> str:
+    blocks = [SYSTEM_PROMPT.rstrip()]
+    for block in _normalize_extra_blocks(extra_blocks):
+        blocks.append(block.rstrip())
+    blocks.append(f"Current date: {current_date}")
+    return "\n\n".join(blocks)
+
+
+def _show_asset(name: str) -> str:
+    asset = PROMPT_ASSETS.get(name)
+    if asset is None:
+        valid = ", ".join(sorted(PROMPT_ASSETS))
+        raise ValueError(f"Unknown prompt asset '{name}'. Available assets: {valid}")
+    return _read_prompt_asset(asset)
+
+
+def main(argv: list[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(description="Inspect prompt assets.")
+    parser.add_argument("--show-system", action="store_true", help="Print the composed system prompt.")
+    parser.add_argument("--show-extractor", action="store_true", help="Print the extractor prompt.")
+    parser.add_argument("--show-asset", metavar="NAME", help="Print one prompt asset by name.")
+    parser.add_argument("--list-assets", action="store_true", help="List registered prompt assets.")
+    parser.add_argument(
+        "--with-extra-file",
+        action="append",
+        default=[],
+        dest="extra_files",
+        help="Append one extra prompt block file when printing the composed system prompt. May be passed multiple times.",
+    )
+    args = parser.parse_args(argv)
+
+    extra_blocks = [Path(path).read_text(encoding="utf-8") for path in args.extra_files]
+
+    if args.list_assets:
+        for asset in sorted(PROMPT_ASSETS.values(), key=lambda item: item.name):
+            print(f"{asset.name}: {asset.description}")
+        return 0
+
+    if args.show_asset:
+        print(_show_asset(args.show_asset))
+        return 0
+
+    if args.show_system:
+        print(composed_system_prompt(current_date="<DATE>", extra_blocks=extra_blocks))
+        return 0
+
+    if args.show_extractor:
+        print(EXTRACTOR_PROMPT)
+        return 0
+
+    print(f"prompt_asset_dir={PROMPTS_DIR}")
+    print(f"system_prompt_chars={len(composed_system_prompt(current_date='<DATE>', extra_blocks=extra_blocks))}")
+    print(f"extractor_prompt_chars={len(EXTRACTOR_PROMPT)}")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main(sys.argv[1:]))
diff --git a/agent_base/prompts/extractor.md b/agent_base/prompts/extractor.md
new file mode 100644
index 0000000000000000000000000000000000000000..729bff5fc50fb3ec8d9d0d96eca0101ab87ffe1a
--- /dev/null
+++ b/agent_base/prompts/extractor.md
@@ -0,0 +1,19 @@
+Please process the following webpage content and user goal to extract relevant information.
+
+## **Webpage Content** 
+{webpage_content}
+
+## **User Goal**
+{goal}
+
+## **Task Guidelines**
+1. **Content Scanning for Rationale**: Locate the **specific sections/data** directly related to the user's goal within the webpage content
+2. **Key Extraction for Evidence**: Identify and extract the **most relevant information** from the content. Preserve the most useful original context as fully as practical.
+3. **Summary Output for Summary**: Organize a concise, goal-focused summary with clear logical flow.
+
+## **Output Requirements**
+- Return a single JSON object only.
+- Required keys: `"rational"`, `"evidence"`, `"summary"`.
+- All three fields must always be present.
+- `"evidence"` and `"summary"` must be non-empty strings whenever relevant content exists.
+- If the page is irrelevant or insufficient, still return valid strings explaining that limitation.
diff --git a/agent_base/prompts/system_base.md b/agent_base/prompts/system_base.md
new file mode 100644
index 0000000000000000000000000000000000000000..ed0100587db58a04a481c732286016f73568ac17
--- /dev/null
+++ b/agent_base/prompts/system_base.md
@@ -0,0 +1,232 @@
+You are a capable all-purpose AI assistant. You do far more than simple question answering: you handle complex tasks, investigate problems, work through project-level requests, and support serious research work. Work from evidence, not guesses. Use the available tools deliberately, keep control flow simple, and stop as soon as you have enough verified information to complete the task correctly.
+
+# Role And Operating Principles
+
+## Mission
+
+- Prefer direct evidence over memory or inference.
+- Prefer deterministic local computation over mental arithmetic or paraphrase.
+- Prefer the smallest sufficient tool for the current step.
+- If a tool can verify the exact claim, use it.
+
+## Planning, Memory, And Long-Horizon Work
+
+- For substantial, uncertain, or multi-stage tasks, create a local `plan.md` early in the workspace and keep it updated as the work progresses.
+- Use `plan.md` to track the overall goal, phased goals, the current stage, and concrete acceptance checklists for each stage.
+- In `plan.md`, mark work that is not yet complete as `[ ]`, work that is completed and verified as `[Y]`, and work that cannot currently be completed as `[N]` with a short factual reason.
+- Keep `plan.md` aligned with reality. When evidence changes the plan, update the plan instead of continuing with an outdated plan.
+- For long-running tasks, prefer `Write` to create `plan.md` and `Edit` to keep it current.
+- When ongoing work depends on durable facts that may be easy to forget, maintain a local `memory.md`.
+- Use `memory.md` to store important factual state such as resolved paths, URLs, measurements, assumptions, decisions, blockers, and other verified facts that should persist across the task.
+- Keep `memory.md` compact, factual, and easy to update. Record evidence and decisions, not raw hidden reasoning.
+- Small one-step tasks do not need a full `plan.md` or `memory.md` if they would add overhead without helping execution.
+
+## Exploration And Convergence
+
+- Explore broadly enough at the beginning of a task to identify the real solution path, the relevant files, the relevant evidence, and the main constraints.
+- Once you have enough evidence, converge and execute cleanly instead of reopening every branch.
+- Follow the same pattern inside each phase: early exploration to understand the phase, then focused execution to finish it.
+- Keep exploration purposeful. Use it to reduce uncertainty, compare plausible paths, or verify assumptions.
+- Let `plan.md`, the current acceptance checklist, and newly gathered evidence determine when to continue exploring, when to revise the plan, and when to move forward.
+- Non-interactive or benchmark-style runs:
+  - If `AskUser` is unavailable or forbidden, do not ask follow-up questions.
+  - Make the best independent attempt possible from the prompt, workspace, and tools.
+  - If the task can be answered by reading local files, searching, fetching a page, inspecting images, or running a small computation, make a bounded attempt before saying the information is unavailable.
+  - Keep the attempt proportional to the task; avoid unrelated research, open-ended browsing, or repeated failed tool calls once a short investigation has established the limitation.
+- Interactive runs:
+  - Avoid asking the user before doing ordinary investigation.
+  - Avoid trying indefinitely when a concise clarification would unblock the task.
+  - First make a reasonable bounded attempt using the available workspace and tools.
+  - If key information, preference, or approval is still missing after that attempt, ask one concise clarification with `AskUser`, then continue from the user's answer.
+
+## Truthfulness, Evidence, And Claims
+
+- Anchor your work to actual tool outputs, explicit user input, and deterministic computation.
+- If evidence is missing, gather it or clearly state the limitation.
+- Treat missing outputs, failed commands, and unknowns honestly.
+- Keep claims proportional to the evidence you actually gathered.
+- Prefer an explicit limitation over a polished but unsupported answer.
+- Do not fabricate tool outputs, file contents, experiment results, citations, numeric values, or completion status.
+- Do not claim that a file, report, plot, experiment, or result exists unless you produced it or verified it directly.
+- If the user or task explicitly names a method, framework, protocol, model family, interpretability technique, metric, comparison axis, or ablation, treat that named item as part of the task contract.
+- Do not quietly replace an explicitly named method or protocol with a looser approximation just because a generic analysis is easier.
+- If an explicitly named method may be blocked by a missing library, missing data, or missing capability, verify that limitation early with tools and then state the limitation plainly before you substitute anything.
+
+# Safety And Scope
+
+## Boundaries
+
+- Stay inside the current workspace root.
+- Do not attempt to access secrets, credentials, or sensitive files such as `.env`, SSH keys, cloud credentials, `.git-credentials`, or `.netrc`.
+- Do not run destructive or privilege-oriented commands such as `sudo`, `su`, `shutdown`, `reboot`, disk-formatting commands, or obviously destructive deletion commands.
+- Prefer read-only inspection unless the user explicitly asks for a modification or the task clearly requires one.
+- Use the web tools for external information gathering. Do not use `Bash` or `Terminal*` as a substitute for arbitrary network retrieval.
+
+# Tool Use And Execution
+
+## Native Tool Calling Contract
+
+- Use the API's native tool calling interface when tools are needed. Do not write pseudo-XML, pseudo-tool JSON, or tag-based tool requests in plain text.
+- If a turn includes native tool calls, that turn is a tool-use turn. Any accompanying text is treated as working context, not as the final result.
+- Multiple tool calls in one turn are allowed only when they are independent.
+- If tool B depends on the output of tool A, do not request them in the same turn. Wait for tool A's result first.
+- If the user explicitly names required tools, call those exact tools instead of substituting a different tool.
+- If you are calling tools, that turn is not finished yet. Do not draft, preview, or guess the final result, including candidate field values, partial JSON, or a "likely final result".
+- Keep tool turns structured. Brief text may explain the current tool step, but the tool call itself is the action.
+- When no more tools are needed, return the final result as plain text.
+- If the user requires a strict format such as JSON, output only that payload as the plain final result text.
+- Do not emit legacy protocol tags such as `<tool_call>`, `<tool_response>`, `<think>`, or `<answer>`.
+
+## Tool Selection And Routing
+
+- Use this routing order:
+  - local file discovery by pathname pattern -> `Glob`
+  - local text search across files -> `Grep`
+  - local text / code / data files -> `Read`
+  - local PDF -> `ReadPDF`
+  - local image -> `ReadImage`
+  - local deterministic computation / parsing / transformation -> `Bash`
+  - discover candidate webpages -> `WebSearch`
+  - find paper metadata -> `ScholarSearch`
+  - verify actual page content -> `WebFetch`
+  - ask the human user for essential missing information -> `AskUser`
+  - persistent interactive shell state -> `Terminal*`
+- Search results and scholar results are discovery aids. They are not page-verification evidence by themselves.
+- Prefer `Bash` over `Terminal*` unless persistent interactive shell state is genuinely required.
+
+## Human Clarification Workflow
+
+- Only use `AskUser` if it is available in the current tool list. If it is not available, do not simulate a question in plain text; continue independently and report limitations when necessary.
+- Use `AskUser` only when continuing correctly depends on information, preference, or approval that cannot be determined from the workspace, available tools, or the user's existing instructions.
+- Do not use `AskUser` to avoid ordinary investigation, reading files, running commands, or making a reasonable evidence-backed decision.
+- Ask one concise question at a time. Include brief context when it helps the user answer accurately.
+- After receiving an `AskUser` answer, treat it as explicit user input, continue the task, and preserve the answer in the normal tool trace.
+
+## Workspace And Local File Workflow
+
+- Treat local files as discoverable resources inside the current workspace.
+- If a workspace root was provided for this run, that workspace is the default starting location for `Bash` and `TerminalStart`.
+- That means a first-turn `Bash` command like `ls` should list the workspace root directly.
+- Both relative paths and absolute paths are valid local path inputs.
+- Relative local paths resolve from the current workspace.
+- If a tool returns an absolute path, prefer reusing that exact path in later tool calls instead of reconstructing it.
+- Prefer `Glob` for file discovery by pattern and `Grep` for text search when those tools are sufficient.
+- `Glob` and `Grep` default to the current workspace root.
+- If the local file layout is unclear, explore it directly with `Bash`, for example `pwd`, `ls`, `find`, or `rg --files`.
+- For file-modification tasks, prefer `Write` for initial creation and `Edit` for targeted follow-up changes before verification.
+- Default pattern for local tasks:
+  - explore the workspace only if needed
+  - discover with `Glob` / `Grep` when helpful
+  - inspect with `Read` / `ReadPDF` / `ReadImage`
+  - compute or validate with `Bash`
+  - produce the final result from the actual tool output
+- For PDF tasks, prefer `ReadPDF` before `Bash` whenever the PDF content itself matters.
+- `ReadPDF` can expose both extracted text and extracted local image paths from the PDF parser.
+- If the task asks about a figure, caption, chart, diagram, or text visible inside a local PDF figure:
+  - start with `ReadPDF`
+  - use the extracted text and extracted image paths to identify the relevant figure
+  - then call `ReadImage` on the actual extracted local image file
+  - use `Bash` only for PDF-specific processing that `ReadPDF` does not already provide
+- Do not put `Read` and a path-dependent `Bash` command in the same turn when the Bash command needs the exact resolved path from `Read`.
+- When moving from file tools to `Bash`, prefer the absolute path shown by `Read` / `ReadPDF` or set `workdir` to the correct directory.
+- Do not assume a referenced local file sits in the current directory. If you have not yet seen the resolved path, either wait for `Read` or explore with `Bash`.
+- If a previous `Bash` command failed because it guessed the wrong working directory or used a relative path incorrectly, immediately retry with the exact absolute path from the file tool output.
+- If the user wants a value derived from a local file, do not guess from inspection alone when local computation is cheap. Compute it.
+- If a trusted local PyTorch `.pt` or `.pth` file fails to load because of
+  `weights_only` defaults or missing custom classes, try a compatible recovery
+  path such as `weights_only=False` or explicit safe globals after verifying
+  the file origin inside the workspace.
+
+## Bash Guidance
+
+- Treat `Bash` as the primary local execution tool.
+- Use it for:
+  - short `python3` snippets
+  - `pwd`, `ls`, `find`, `rg`, `git`
+  - parsing CSV / JSON / text
+  - ranking, sorting, aggregating, validating, and formatting
+  - combining outputs from other tools into a deterministic result
+- For temporary Python, prefer a heredoc:
+
+```bash
+python3 - <<'PY'
+print("hello")
+PY
+```
+
+- In Bash Python snippets, print only the values you need, ideally as valid JSON or short deterministic lines.
+- For output-sensitive tasks, make the Bash command print machine-friendly output first, then base the final result on that exact output.
+- Use explicit `timeout` values for heavier commands.
+- When using `Bash` to run temporary Python, keep the script deterministic and print only the values you need.
+- Do not use `Bash` for basic pathname globbing or simple text search when `Glob` or `Grep` already covers the need.
+
+## Web Research Workflow
+
+- If the user asks to visit a page, fetch a page, verify against a page, confirm page content, or explicitly requires `WebFetch`, you must call `WebFetch` before producing the final result.
+- If the user says "search first, then visit the page to verify it" or equivalent, the required pattern is:
+  - search first
+  - fetch the chosen page with `WebFetch`
+  - only then produce the final result
+- Do not treat `WebSearch` or `ScholarSearch` snippets as a substitute for `WebFetch` when page verification is required.
+- The `visited_url` in the final result should be a URL that was actually passed to `WebFetch`.
+
+## Terminal Workflow
+
+- In most tasks, do not use `Terminal*`.
+- If the user explicitly requires `Terminal*`, do not substitute `Bash`.
+- Use `Terminal*` only for genuinely stateful shell workflows, such as:
+  - starting a long-running process and polling it later
+  - interacting with a REPL or debugger
+  - keeping shell state across multiple incremental commands
+  - sending `Ctrl-C` or terminating a persistent foreground process
+- Do not use `Terminal*` for a single one-shot command, a single Python snippet, a single grep, or a single git command.
+- If you start a terminal session, keep the lifecycle disciplined:
+  - `TerminalStart`
+  - `TerminalWrite` / `TerminalRead` as needed
+  - `TerminalInterrupt` only when necessary
+  - `TerminalKill` when done
+
+# Recovery And Finalization
+
+## Failure Handling And Recovery
+
+- If a tool fails, react to that actual failure. Do not fabricate missing outputs.
+- After any tool call, wait for the returned tool response before deciding the next step.
+- If a value can be checked locally with `Bash`, prefer checking it over paraphrasing from a previous tool output.
+- If required tools are still missing, your only valid next move is another tool turn, not a partial result.
+- If the current plan is blocked by real evidence, update `plan.md`, revise the phase goal, or change the approach instead of pretending the blocker is resolved.
+
+## Finalization Discipline
+
+- The final result must satisfy the user's original request, not a simplified or reformulated version of it.
+- Match the user's stated output requirements exactly when they are explicit, including format, required fields, ordering constraints, style constraints, scope constraints, and any stated completion conditions.
+- If the user asks for a strict format such as JSON, Markdown, a table, bullet points, or a specific schema, the final result must follow that format exactly.
+- If the user asks for specific deliverables, make sure the final result covers those deliverables directly instead of replacing them with a generic summary.
+- If the user did not specify a strict final format, default to a clear, sufficiently detailed summary of what you did, what you found, what you changed or produced, and any important limitations or remaining gaps.
+- Do not end with a minimal or cryptic answer when the user expects an explanation of the completed work.
+- Final answers must be complete and self-contained enough for the user to understand the result directly.
+- You may reference local files you created or inspected, but do not make those files the only carrier of the answer.
+- When local artifacts matter, include the actual answer plus a concise summary of the relevant evidence, changes, or solution steps.
+- If the user explicitly requires specific tools, satisfy that requirement before producing the final result.
+- If the user asks for externally verified facts, gather evidence with the relevant web tools before producing the final result.
+- If page verification is required, do not produce the final result until a `WebFetch` response has been received.
+- When enough evidence has been collected, give the final result immediately.
+- Before emitting the final result text, make sure:
+  - the final result addresses the original user request directly
+  - all user-required tools have already been called
+  - any required page verification has already gone through `WebFetch`
+  - any required local computation has already been checked with `Bash`
+  - the final payload matches the user-required format exactly
+  - if JSON is required, the payload is a single valid JSON object with balanced braces, no trailing commas, and no extra closing characters
+  - there is no unfinished tool step still pending
+
+## Common Mistakes To Avoid
+
+- Do not produce the final result from search snippets when the task requires page verification.
+- Do not use `ScholarSearch` as a replacement for `WebFetch` on page-verification tasks.
+- Do not use `Terminal*` for one-shot work; prefer `Bash` or file tools.
+- Do not reach for `Bash` first when the task is simply "find matching files" or "search text in files"; use `Glob` or `Grep`.
+- Do not skip `ReadPDF` for local PDF figure tasks when `ReadPDF` can already give you the extracted text and local image paths you need.
+- Do not ignore path and working-directory implications when switching from file tools to `Bash`.
+- Do not output placeholder results such as `{\"error\":\"waiting_for_required_tool_calls\"}`, `TBD`, `{}`, or partial final JSON while tool work is still pending.
+- Do not claim a tool was used unless this run actually contains that tool call.
diff --git a/agent_base/provider_compat.py b/agent_base/provider_compat.py
new file mode 100644
index 0000000000000000000000000000000000000000..34c319f0568f6fe0ff4eccac06bff22a4bd352f0
--- /dev/null
+++ b/agent_base/provider_compat.py
@@ -0,0 +1,31 @@
+import re
+from typing import Any
+
+
+_MODEL_NAME_SPLIT_RE = re.compile(r"[/:\s]+")
+
+
+def model_rejects_sampling_params(model_name: str) -> bool:
+    normalized = str(model_name or "").strip().casefold()
+    if not normalized:
+        return False
+    parts = [part for part in _MODEL_NAME_SPLIT_RE.split(normalized) if part]
+    return any(part.startswith("claude") for part in parts)
+
+
+def apply_sampling_params(
+    request_kwargs: dict[str, Any],
+    *,
+    model_name: str,
+    temperature: Any = None,
+    top_p: Any = None,
+    presence_penalty: Any = None,
+) -> None:
+    if model_rejects_sampling_params(model_name):
+        return
+    if temperature is not None:
+        request_kwargs["temperature"] = temperature
+    if top_p is not None:
+        request_kwargs["top_p"] = top_p
+    if presence_penalty is not None:
+        request_kwargs["presence_penalty"] = presence_penalty
diff --git a/agent_base/react_agent.py b/agent_base/react_agent.py
new file mode 100644
index 0000000000000000000000000000000000000000..1d4debb17c6c4f7da0ef59588039d9b2ce1f762a
--- /dev/null
+++ b/agent_base/react_agent.py
@@ -0,0 +1,1453 @@
+import argparse
+from contextlib import contextmanager
+import json
+import os
+import re
+import signal
+import sys
+import threading
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional, Sequence, Type
+
+from openai import OpenAI, APIError, APIConnectionError, APITimeoutError
+import tiktoken
+from agent_base.base import BaseAgent
+from agent_base.console_utils import ConsoleEventPrinter
+from agent_base.context_compact import compact_messages, should_compact_messages
+from agent_base.model_profiles import resolve_model_profile
+from agent_base.provider_compat import apply_sampling_params
+from agent_base.prompt import composed_system_prompt
+from agent_base.session_state import AgentSessionState, CompactionRecord, persist_session_state, resolve_session_state_path
+from agent_base.trace_utils import FlatTraceWriter
+from agent_base.tools.tooling import normalize_workspace_root
+from agent_base.tools.tool_file import Edit, Glob, Grep, Read, ReadImage, ReadPDF, Write
+from agent_base.tools.tool_runtime import Bash, TerminalInterrupt, TerminalKill, TerminalRead, TerminalStart, TerminalWrite
+from agent_base.tools.tool_user import AskUser
+from agent_base.tools.tool_web import ScholarSearch, WebFetch, WebSearch
+from agent_base.utils import (
+    PROJECT_ROOT,
+    MissingRequiredEnvError,
+    append_saved_image_paths_to_prompt,
+    env_flag,
+    image_input_content_parts,
+    load_dotenv,
+    read_role_prompt_files,
+    require_required_env,
+    safe_jsonable,
+    stage_image_file_for_input,
+)
+
+import datetime
+import random
+import time
+
+AVAILABLE_TOOLS = [
+    Glob(),
+    Grep(),
+    Read(),
+    ReadPDF(),
+    ReadImage(),
+    Write(),
+    Edit(),
+    Bash(),
+    WebSearch(),
+    ScholarSearch(),
+    WebFetch(),
+    AskUser(),
+    TerminalStart(),
+    TerminalWrite(),
+    TerminalRead(),
+    TerminalInterrupt(),
+    TerminalKill(),
+]
+AVAILABLE_TOOL_MAP = {tool.name: tool for tool in AVAILABLE_TOOLS}
+DEFAULT_IMAGE_TOKEN_ESTIMATE = 1536
+DEFAULT_MODEL_NAME = "gpt-5.4"
+DEFAULT_MAX_LLM_CALLS = 100
+DEFAULT_MAX_ROUNDS = 100
+DEFAULT_MAX_RUNTIME_SECONDS = 150 * 60
+DEFAULT_MAX_OUTPUT_TOKENS = 10000
+DEFAULT_MAX_INPUT_TOKENS = 320000
+DEFAULT_MAX_RETRIES = 10
+DEFAULT_TEMPERATURE = 0.6
+DEFAULT_TOP_P = 0.95
+DEFAULT_PRESENCE_PENALTY = 1.1
+DEFAULT_LLM_TIMEOUT_SECONDS = 600.0
+
+
+class LLMHardTimeoutError(TimeoutError):
+    pass
+
+
+@contextmanager
+def llm_hard_timeout(timeout_seconds: float):
+    if (
+        timeout_seconds <= 0
+        or threading.current_thread() is not threading.main_thread()
+        or not hasattr(signal, "SIGALRM")
+    ):
+        yield
+        return
+
+    def _handle_timeout(signum, frame):
+        raise LLMHardTimeoutError(f"LLM request exceeded hard timeout of {timeout_seconds:.1f}s")
+
+    previous_handler = signal.getsignal(signal.SIGALRM)
+    previous_timer = signal.getitimer(signal.ITIMER_REAL)
+    signal.signal(signal.SIGALRM, _handle_timeout)
+    signal.setitimer(signal.ITIMER_REAL, timeout_seconds)
+    try:
+        yield
+    finally:
+        signal.setitimer(signal.ITIMER_REAL, 0)
+        signal.signal(signal.SIGALRM, previous_handler)
+        if previous_timer[0] > 0:
+            signal.setitimer(signal.ITIMER_REAL, previous_timer[0], previous_timer[1])
+
+
+def today_date():
+    return datetime.date.today().strftime("%Y-%m-%d")
+
+
+def max_llm_calls_per_run() -> int:
+    return int(os.getenv("MAX_LLM_CALL_PER_RUN", str(DEFAULT_MAX_LLM_CALLS)))
+
+
+def max_agent_rounds() -> int:
+    return int(os.getenv("MAX_AGENT_ROUNDS", str(DEFAULT_MAX_ROUNDS)))
+
+
+def max_agent_runtime_seconds() -> int:
+    return int(os.getenv("MAX_AGENT_RUNTIME_SECONDS", str(DEFAULT_MAX_RUNTIME_SECONDS)))
+
+
+def llm_max_output_tokens() -> int:
+    return int(os.getenv("LLM_MAX_OUTPUT_TOKENS", str(DEFAULT_MAX_OUTPUT_TOKENS)))
+
+
+def remaining_runtime_seconds(runtime_deadline: Optional[float]) -> Optional[float]:
+    if runtime_deadline is None:
+        return None
+    return runtime_deadline - time.time()
+
+
+def debug_enabled() -> bool:
+    return env_flag("DEBUG_AGENT")
+
+
+def assistant_text_content(content: Any) -> str:
+    if content is None:
+        return ""
+    if isinstance(content, str):
+        return content
+    if isinstance(content, list):
+        text_parts: list[str] = []
+        for part in content:
+            if isinstance(part, dict) and part.get("type") == "text":
+                text_parts.append(str(part.get("text", "")))
+            else:
+                text_parts.append(str(part))
+        return "".join(text_parts)
+    return str(content)
+
+
+def message_trace_text(content: Any) -> str:
+    if isinstance(content, str):
+        return content
+    if not isinstance(content, list):
+        return str(content)
+    text_parts: list[str] = []
+    for part in content:
+        if not isinstance(part, dict):
+            text_parts.append(str(part))
+            continue
+        part_type = part.get("type")
+        if part_type == "text":
+            text_parts.append(str(part.get("text", "")))
+        elif part_type == "image_url":
+            image_url = part.get("image_url", {})
+            url = image_url.get("url", "") if isinstance(image_url, dict) else ""
+            url_text = str(url)
+            if url_text.startswith("data:image/"):
+                url_text = url_text.split(",", 1)[0] + ",...(base64 omitted)"
+            text_parts.append(f"[image_url: {url_text}]")
+        else:
+            text_parts.append(str(part))
+    return "\n".join(text for text in text_parts if text)
+
+
+def _message_has_image_content(message: dict[str, Any]) -> bool:
+    content = message.get("content")
+    return isinstance(content, list) and any(isinstance(part, dict) and part.get("type") == "image_url" for part in content)
+
+
+def _last_assistant_message_index(messages: Sequence[dict[str, Any]]) -> int:
+    for index in range(len(messages) - 1, -1, -1):
+        if isinstance(messages[index], dict) and messages[index].get("role") == "assistant":
+            return index
+    return -1
+
+
+def _image_reference_summary(part: dict[str, Any]) -> str:
+    image_url = part.get("image_url", {})
+    url = image_url.get("url", "") if isinstance(image_url, dict) else ""
+    url_text = str(url)
+    if url_text.startswith("data:image/"):
+        return url_text.split(",", 1)[0] + ",...(base64 omitted)"
+    elif len(url_text) > 180:
+        return url_text[:180] + "...(truncated)"
+    return url_text or "unavailable"
+
+
+def _image_path_hint_from_text(text: str) -> str:
+    patterns = (
+        r"\[User-provided image saved at ([^\]\n]+)\]",
+        r"Local image path:\s*([^\n]+)",
+    )
+    for pattern in patterns:
+        match = re.search(pattern, text)
+        if match:
+            return match.group(1).strip()
+    return ""
+
+
+def _omitted_image_part_text(part: dict[str, Any], *, saved_path_hint: str = "") -> str:
+    url_text = _image_reference_summary(part)
+    path_text = f" Saved local path: {saved_path_hint}." if saved_path_hint else ""
+    return (
+        "[Previous image omitted from this model request to avoid repeatedly resending image bytes. "
+        f"Original image reference: {url_text}. "
+        f"{path_text} "
+        "The nearby conversation text or tool metadata records saved local paths when available; "
+        "use ReadImage on the saved path if visual details are needed again.]"
+    )
+
+
+def _replace_image_parts_with_text(content: Any, *, message_index: int) -> tuple[Any, list[dict[str, Any]]]:
+    if not isinstance(content, list):
+        return content, []
+    replacement: list[Any] = []
+    omitted_images: list[dict[str, Any]] = []
+    image_index = 0
+    last_text_path_hint = ""
+    for part in content:
+        if isinstance(part, dict) and part.get("type") == "text":
+            path_hint = _image_path_hint_from_text(str(part.get("text", "")))
+            if path_hint:
+                last_text_path_hint = path_hint
+        if isinstance(part, dict) and part.get("type") == "image_url":
+            omitted_images.append(
+                {
+                    "message_index": message_index,
+                    "image_index": image_index,
+                    "reference_summary": _image_reference_summary(part),
+                    "saved_path_hint": last_text_path_hint,
+                }
+            )
+            replacement.append({"type": "text", "text": _omitted_image_part_text(part, saved_path_hint=last_text_path_hint)})
+        else:
+            replacement.append(safe_jsonable(part))
+        if isinstance(part, dict) and part.get("type") == "image_url":
+            image_index += 1
+    return replacement, omitted_images
+
+
+def prepare_messages_for_llm(messages: Sequence[dict[str, Any]]) -> tuple[list[dict[str, Any]], dict[str, Any]]:
+    """Return request messages with old image bytes replaced by text references.
+
+    Image content parts are only needed immediately after they enter the
+    conversation. Older image parts stay represented as text so the agent can
+    re-read saved paths with ReadImage without resending the image every round.
+    """
+    last_assistant_index = _last_assistant_message_index(messages)
+    request_messages: list[dict[str, Any]] = []
+    omitted_images: list[dict[str, Any]] = []
+    for index, raw_message in enumerate(messages):
+        message = safe_jsonable(raw_message)
+        if not isinstance(message, dict):
+            request_messages.append({"role": "user", "content": str(message)})
+            continue
+        if index <= last_assistant_index and _message_has_image_content(message):
+            message = dict(message)
+            message["content"], message_omitted_images = _replace_image_parts_with_text(
+                message.get("content"),
+                message_index=index,
+            )
+            omitted_images.extend(message_omitted_images)
+        request_messages.append(message)
+    image_aging = {
+        "omitted_image_count": len(omitted_images),
+        "omitted_images": omitted_images,
+    }
+    return request_messages, image_aging
+
+
+def assistant_reasoning_content(message: Any) -> Optional[Any]:
+    if hasattr(message, "model_dump"):
+        try:
+            dumped = safe_jsonable(message.model_dump())
+            if isinstance(dumped, dict) and "reasoning_content" in dumped:
+                return dumped.get("reasoning_content")
+        except Exception:
+            pass
+    model_extra = getattr(message, "model_extra", None)
+    if isinstance(model_extra, dict) and "reasoning_content" in model_extra:
+        return safe_jsonable(model_extra.get("reasoning_content"))
+    raw_reasoning = getattr(message, "reasoning_content", None)
+    if raw_reasoning is None:
+        return None
+    return safe_jsonable(raw_reasoning)
+
+
+def assistant_has_meaningful_text(content: Any) -> bool:
+    return bool(assistant_text_content(content).strip())
+
+
+def input_tokens_from_usage(usage: Any) -> Optional[int]:
+    if not isinstance(usage, dict):
+        return None
+    for key in ("prompt_tokens", "input_tokens"):
+        value = usage.get(key)
+        if isinstance(value, int):
+            return value
+    return None
+
+
+def llm_call_trace_payload(
+    *,
+    request_messages: Sequence[dict[str, Any]],
+    image_aging: Optional[dict[str, Any]] = None,
+    response: Any,
+    model_name: str,
+    native_tools: Sequence[dict[str, Any]],
+) -> dict[str, Any]:
+    payload = {
+        "model_name": model_name,
+        "request_messages": safe_jsonable(list(request_messages)),
+        "tools_enabled": bool(native_tools),
+        "native_tools": safe_jsonable(list(native_tools)),
+        "response": safe_jsonable(response),
+    }
+    if image_aging and int(image_aging.get("omitted_image_count", 0) or 0) > 0:
+        payload["image_aging"] = safe_jsonable(image_aging)
+    return payload
+
+
+def compaction_trace_payload(
+    *,
+    trigger_reason: str,
+    outcome: Any,
+) -> dict[str, Any]:
+    return {
+        "trigger_reason": trigger_reason,
+        "status": getattr(outcome, "status", ""),
+        "error": getattr(outcome, "error", ""),
+        "prior_token_estimate": getattr(outcome, "prior_token_estimate", 0),
+        "new_token_estimate": getattr(outcome, "new_token_estimate", 0),
+        "compacted_group_count": getattr(outcome, "compacted_group_count", 0),
+        "kept_group_count": getattr(outcome, "kept_group_count", 0),
+        "existing_memory_text": getattr(outcome, "existing_memory_text", ""),
+        "summary_request": safe_jsonable(getattr(outcome, "summary_request", []) or []),
+        "summary_response": safe_jsonable(getattr(outcome, "summary_response", {}) or {}),
+        "summary_text": getattr(outcome, "summary_text", ""),
+        "pre_messages": safe_jsonable(getattr(outcome, "pre_messages", []) or []),
+        "post_messages": safe_jsonable(getattr(outcome, "post_messages", []) or []),
+    }
+
+
+def legacy_protocol_error(content: str) -> Optional[str]:
+    stripped = content.lstrip()
+    if stripped.startswith("<tool_call>"):
+        return "assistant emitted deprecated text <tool_call> protocol"
+    if stripped.startswith("<tool_response>"):
+        return "assistant emitted deprecated text <tool_response> protocol"
+    if stripped.startswith("<think>"):
+        return "assistant emitted deprecated text <think> protocol"
+    if stripped.startswith("<answer>"):
+        return "assistant emitted deprecated text <answer> protocol"
+    return None
+
+
+def tool_schema(tool: Any) -> dict[str, Any]:
+    return {
+        "type": "function",
+        "function": {
+            "name": tool.name,
+            "description": tool.description,
+            "parameters": tool.parameters,
+        },
+    }
+
+
+def resolved_tool_names(function_list: Optional[Sequence[str]]) -> list[str]:
+    if function_list is None:
+        return list(AVAILABLE_TOOL_MAP.keys())
+    resolved: list[str] = []
+    for raw_name in function_list:
+        name = str(raw_name).strip()
+        if name:
+            resolved.append(name)
+    return resolved
+
+
+def available_tool_schemas(function_list: Optional[Sequence[str]] = None) -> list[dict[str, Any]]:
+    return [tool_schema(AVAILABLE_TOOL_MAP[name]) for name in resolved_tool_names(function_list)]
+
+
+def normalized_tool_call(tool_call: Any) -> dict[str, Any]:
+    return {
+        "id": getattr(tool_call, "id", ""),
+        "type": "function",
+        "function": {
+            "name": tool_call.function.name,
+            "arguments": tool_call.function.arguments,
+        },
+    }
+
+
+def tool_result_message_content(result: Any) -> str:
+    if isinstance(result, dict) and result.get("kind") == "image_tool_result":
+        return str(result.get("text", "")).strip() or "ReadImage returned no metadata."
+    if isinstance(result, (dict, list)):
+        return json.dumps(safe_jsonable(result), ensure_ascii=False)
+    return str(result)
+
+
+def model_supports_runtime_image_parts(model_name: str) -> bool:
+    normalized = str(model_name or "").strip().casefold()
+    if "deepseek" in normalized:
+        return False
+    return True
+
+
+def image_context_message(result: Any, model_name: str) -> Optional[dict[str, Any]]:
+    if not isinstance(result, dict) or result.get("kind") != "image_tool_result":
+        return None
+    image_url = str(result.get("image_url", "")).strip()
+    if not image_url and model_supports_runtime_image_parts(model_name):
+        return None
+    metadata_text = str(result.get("text", "")).strip()
+    text = (
+        "Runtime image context from ReadImage.\n"
+        "Use the attached image as evidence produced by that tool call when deciding the next step or final result.\n"
+        "Do not assume that all required tool work is complete merely because an image is attached."
+    )
+    if metadata_text:
+        text += "\n\nReadImage metadata:\n" + metadata_text
+    if not model_supports_runtime_image_parts(model_name):
+        text += (
+            "\n\nThis model endpoint does not accept runtime image content parts, so only the "
+            "ReadImage metadata is forwarded in conversation history. Do not invent visual details "
+            "that are not supported by the metadata."
+        )
+        return {"role": "user", "content": text}
+    return {
+        "role": "user",
+        "content": [
+            {"type": "text", "text": text},
+            {"type": "image_url", "image_url": {"url": image_url, "detail": "auto"}},
+        ],
+    }
+
+
+def api_tool_message(tool_call_id: str, result: Any) -> dict[str, Any]:
+    return {
+        "role": "tool",
+        "tool_call_id": tool_call_id,
+        "content": tool_result_message_content(result),
+    }
+
+
+def assistant_history_message(
+    *,
+    content: Any,
+    tool_calls: Optional[list[dict[str, Any]]] = None,
+    reasoning_content: Optional[Any] = None,
+    raw_message: Optional[dict[str, Any]] = None,
+) -> dict[str, Any]:
+    if isinstance(raw_message, dict):
+        message = safe_jsonable(raw_message)
+        if isinstance(message, dict):
+            message["role"] = "assistant"
+            if content is not None or "content" not in message:
+                message["content"] = content
+            if tool_calls and "tool_calls" not in message:
+                message["tool_calls"] = tool_calls
+            elif "tool_calls" in message and not message.get("tool_calls"):
+                message.pop("tool_calls", None)
+            if reasoning_content is not None and "reasoning_content" not in message:
+                message["reasoning_content"] = reasoning_content
+            elif "reasoning_content" in message and message.get("reasoning_content") is None:
+                message.pop("reasoning_content", None)
+            return message
+    message: dict[str, Any] = {"role": "assistant", "content": content}
+    if tool_calls:
+        message["tool_calls"] = tool_calls
+    if reasoning_content is not None:
+        message["reasoning_content"] = reasoning_content
+    return message
+
+
+def assistant_retry_history_message(
+    *,
+    content: Any,
+    reasoning_content: Optional[Any] = None,
+) -> Optional[dict[str, Any]]:
+    if reasoning_content is None and not assistant_has_meaningful_text(content):
+        return None
+    # For retry/correction branches, preserve a replay-safe assistant history
+    # message without tool calls so provider-specific reasoning state is not
+    # lost while avoiding invalid unfinished tool-call history.
+    return assistant_history_message(
+        content=assistant_text_content(content),
+        reasoning_content=reasoning_content,
+    )
+
+
+def parse_tool_arguments_list(tool_calls: list[dict[str, Any]]) -> list[Any]:
+    def _maybe_parse_nested_json(raw: Any) -> Any:
+        if not isinstance(raw, str):
+            return raw
+        try:
+            parsed = json.loads(raw)
+        except (TypeError, ValueError):
+            return raw
+        if isinstance(parsed, str):
+            nested_text = parsed.strip()
+            if nested_text.startswith("{") or nested_text.startswith("["):
+                try:
+                    return json.loads(nested_text)
+                except (TypeError, ValueError):
+                    return parsed
+        return parsed
+
+    parsed_arguments: list[Any] = []
+    for tool_call in tool_calls:
+        function_block = tool_call.get("function", {}) if isinstance(tool_call, dict) else {}
+        tool_arguments_raw = function_block.get("arguments", {})
+        parsed = _maybe_parse_nested_json(tool_arguments_raw)
+        parsed_arguments.append(safe_jsonable(parsed))
+    return parsed_arguments
+
+
+def image_trace_paths(result: Any) -> list[str]:
+    if not isinstance(result, dict) or result.get("kind") != "image_tool_result":
+        return []
+    path = str(result.get("path", "")).strip()
+    return [path] if path else []
+
+
+def image_context_trace_text(result: Any) -> str:
+    if not isinstance(result, dict) or result.get("kind") != "image_tool_result":
+        return ""
+    metadata_text = str(result.get("text", "")).strip()
+    text = (
+        "Runtime image context from ReadImage.\n"
+        "Use the attached image as evidence produced by that tool call when deciding the next step or final result.\n"
+        "Do not assume that all required tool work is complete merely because an image is attached."
+    )
+    if metadata_text:
+        text += "\n\nReadImage metadata:\n" + metadata_text
+    return text
+
+
+def default_llm_config() -> dict:
+    model_name = os.environ.get("MODEL_NAME", DEFAULT_MODEL_NAME)
+    return {
+        "model": model_name,
+        "api_key": os.environ.get("API_KEY", "EMPTY"),
+        "api_base": os.environ.get("API_BASE"),
+        "timeout_seconds": float(os.environ.get("LLM_TIMEOUT_SECONDS", str(DEFAULT_LLM_TIMEOUT_SECONDS))),
+        "generate_cfg": {
+            "max_input_tokens": int(os.environ.get("MAX_INPUT_TOKENS", str(DEFAULT_MAX_INPUT_TOKENS))),
+            "max_output_tokens": int(os.environ.get("LLM_MAX_OUTPUT_TOKENS", str(DEFAULT_MAX_OUTPUT_TOKENS))),
+            "max_retries": int(os.environ.get("LLM_MAX_RETRIES", str(DEFAULT_MAX_RETRIES))),
+            "temperature": float(os.environ.get("TEMPERATURE", str(DEFAULT_TEMPERATURE))),
+            "top_p": float(os.environ.get("TOP_P", str(DEFAULT_TOP_P))),
+            "presence_penalty": float(os.environ.get("PRESENCE_PENALTY", str(DEFAULT_PRESENCE_PENALTY))),
+        },
+    }
+
+
+def execute_tool_by_name(tool_map: dict[str, Any], tool_name: str, tool_args: Any, **kwargs):
+    if tool_name not in tool_map:
+        return f"Error: Tool {tool_name} not found"
+    tool = tool_map[tool_name]
+    if tool_name == "ReadImage" and hasattr(tool, "call_for_llm"):
+        return tool.call_for_llm(tool_args, **kwargs)
+    return tool.call(tool_args, **kwargs)
+
+
+class MultiTurnReactAgent(BaseAgent):
+    def __init__(
+        self,
+        function_list: Optional[List[str]] = None,
+        llm: Optional[Dict] = None,
+        trace_dir: Optional[str] = None,
+        role_prompt: Optional[str] = None,
+        max_llm_calls: Optional[int] = None,
+        max_rounds: Optional[int] = None,
+        max_runtime_seconds: Optional[int] = None,
+    ):
+        if not isinstance(llm, dict):
+            raise ValueError("llm must be a dict configuration.")
+        requested_tools = self.resolve_function_list(function_list)
+        if requested_tools is None:
+            requested_tools = list(AVAILABLE_TOOL_MAP.keys())
+        unknown_tools = [tool for tool in requested_tools if tool not in AVAILABLE_TOOL_MAP]
+        if unknown_tools:
+            raise ValueError(f"Unknown tools requested: {unknown_tools}")
+        if "model" not in llm or not str(llm["model"]).strip():
+            raise ValueError('llm["model"] must be a non-empty string.')
+        if "generate_cfg" not in llm or not isinstance(llm["generate_cfg"], dict):
+            raise ValueError('llm["generate_cfg"] must be a dict.')
+
+        self.tool_map = {tool_name: AVAILABLE_TOOL_MAP[tool_name] for tool_name in requested_tools}
+        self.tool_names = list(self.tool_map.keys())
+        self.model = str(llm["model"])
+        self.llm_generate_cfg = llm["generate_cfg"]
+        self.trace_dir = Path(trace_dir) if trace_dir else None
+        self.trace_path: Optional[Path] = None
+        self.session_state_path: Optional[Path] = None
+        self.role_prompt = self.resolve_role_prompt(role_prompt)
+        self.max_llm_calls = int(max_llm_calls) if max_llm_calls is not None else max_llm_calls_per_run()
+        self.max_rounds = int(max_rounds) if max_rounds is not None else max_agent_rounds()
+        self.max_runtime_seconds = (
+            int(max_runtime_seconds) if max_runtime_seconds is not None else max_agent_runtime_seconds()
+        )
+        if self.max_rounds <= 0:
+            raise ValueError("max_rounds must be > 0.")
+        self._native_tools = [tool_schema(self.tool_map[tool_name]) for tool_name in self.tool_names]
+        self._encoding = tiktoken.get_encoding("cl100k_base")
+        self._native_tools_token_estimate = len(
+            self._encoding.encode(json.dumps(self._native_tools, ensure_ascii=False))
+        )
+        self._llm_timeout_seconds = float(
+            llm.get("timeout_seconds", os.getenv("LLM_TIMEOUT_SECONDS", str(DEFAULT_LLM_TIMEOUT_SECONDS)))
+        )
+        self._llm_api_key = str(llm.get("api_key") or os.environ.get("API_KEY", "EMPTY"))
+        api_base = str(llm.get("api_base") or os.environ.get("API_BASE", "")).strip()
+        self._llm_api_base = api_base or None
+        self._llm_client = (
+            OpenAI(
+                api_key=self._llm_api_key,
+                base_url=self._llm_api_base,
+                timeout=self._llm_timeout_seconds,
+            )
+            if self._llm_api_base
+            else None
+        )
+
+    def _call_chat_completion(
+        self,
+        msgs,
+        *,
+        include_native_tools: bool,
+        max_tries=10,
+        runtime_deadline: Optional[float] = None,
+        max_output_tokens: Optional[int] = None,
+        temperature: Optional[float] = None,
+        top_p: Optional[float] = None,
+        presence_penalty: Optional[float] = None,
+    ) -> dict[str, Any]:
+        max_tries = int(self.llm_generate_cfg.get("max_retries", max_tries))
+        if self._llm_client is None or not self._llm_api_base:
+            return {"status": "error", "error": "llm api error: API_BASE is not set."}
+
+        base_sleep_time = 1
+        last_error = "unknown llm error"
+        for attempt in range(max_tries):
+            remaining = remaining_runtime_seconds(runtime_deadline)
+            if remaining is not None and remaining <= 0:
+                last_error = "agent runtime limit reached before llm call could complete"
+                break
+            try:
+                if debug_enabled():
+                    print(f"--- Attempting to call the service, try {attempt + 1}/{max_tries} ---")
+                request_timeout = (
+                    min(self._llm_timeout_seconds, max(remaining, 0.001))
+                    if remaining is not None
+                    else self._llm_timeout_seconds
+                )
+                request_client = self._llm_client.with_options(timeout=request_timeout)
+                request_kwargs = dict(
+                    model=self.model,
+                    messages=msgs,
+                    max_tokens=int(
+                        max_output_tokens
+                        if max_output_tokens is not None
+                        else self.llm_generate_cfg.get("max_output_tokens", llm_max_output_tokens())
+                    ),
+                )
+                apply_sampling_params(
+                    request_kwargs,
+                    model_name=self.model,
+                    temperature=(
+                        temperature if temperature is not None else self.llm_generate_cfg.get("temperature", 0.6)
+                    ),
+                    top_p=top_p if top_p is not None else self.llm_generate_cfg.get("top_p", 0.95),
+                    presence_penalty=(
+                        presence_penalty
+                        if presence_penalty is not None
+                        else self.llm_generate_cfg.get("presence_penalty", 1.1)
+                    ),
+                )
+                if include_native_tools and self._native_tools:
+                    request_kwargs["tools"] = self._native_tools
+                    request_kwargs["tool_choice"] = "auto"
+                    request_kwargs["parallel_tool_calls"] = True
+                with llm_hard_timeout(request_timeout):
+                    chat_response = request_client.chat.completions.create(**request_kwargs)
+                choice = chat_response.choices[0]
+                message = choice.message
+                content = message.content
+                tool_calls = [normalized_tool_call(tool_call) for tool_call in (message.tool_calls or [])]
+                reasoning_content = assistant_reasoning_content(message)
+                raw_message = safe_jsonable(message.model_dump()) if hasattr(message, "model_dump") else None
+                usage = safe_jsonable(chat_response.usage.model_dump()) if getattr(chat_response, "usage", None) else None
+
+                if assistant_has_meaningful_text(content) or tool_calls:
+                    if debug_enabled():
+                        print("--- Service call successful, received a valid response ---")
+                    return {
+                        "status": "ok",
+                        "finish_reason": choice.finish_reason,
+                        "content": content,
+                        "tool_calls": tool_calls,
+                        "reasoning_content": reasoning_content,
+                        "raw_message": raw_message,
+                        "usage": usage,
+                    }
+                else:
+                    last_error = "empty response from llm api"
+                    if debug_enabled():
+                        print(f"Warning: Attempt {attempt + 1} received an empty response.")
+
+            except (APIError, APIConnectionError, APITimeoutError, LLMHardTimeoutError) as e:
+                last_error = str(e)
+                if debug_enabled():
+                    print(f"Error: Attempt {attempt + 1} failed with an API or network error: {e}")
+
+            if attempt < max_tries - 1:
+                sleep_time = base_sleep_time * (2 ** attempt) + random.uniform(0, 1)
+                sleep_time = min(sleep_time, 30)
+                remaining = remaining_runtime_seconds(runtime_deadline)
+                if remaining is not None:
+                    if remaining <= 0:
+                        last_error = "agent runtime limit reached before llm retry could complete"
+                        break
+                    sleep_time = min(sleep_time, remaining)
+                if debug_enabled():
+                    print(f"Retrying in {sleep_time:.2f} seconds...")
+                if sleep_time > 0:
+                    time.sleep(sleep_time)
+            else:
+                if debug_enabled():
+                    print("Error: All retry attempts have been exhausted. The call has failed.")
+
+        return {"status": "error", "error": f"llm api error: {last_error}"}
+
+    def call_llm_api(self, msgs, max_tries=10, runtime_deadline: Optional[float] = None) -> dict[str, Any]:
+        return self._call_chat_completion(
+            msgs,
+            include_native_tools=True,
+            max_tries=max_tries,
+            runtime_deadline=runtime_deadline,
+        )
+
+    def call_compaction_api(
+        self,
+        msgs,
+        *,
+        runtime_deadline: Optional[float] = None,
+        max_output_tokens: Optional[int] = None,
+    ) -> dict[str, Any]:
+        return self._call_chat_completion(
+            msgs,
+            include_native_tools=False,
+            max_tries=3,
+            runtime_deadline=runtime_deadline,
+            max_output_tokens=max_output_tokens,
+            temperature=0.0,
+            top_p=1.0,
+            presence_penalty=0.0,
+        )
+
+    def count_tokens(self, messages, *, include_tool_schema: bool = True):
+        image_token_estimate = int(os.getenv("IMAGE_PART_TOKEN_ESTIMATE", str(DEFAULT_IMAGE_TOKEN_ESTIMATE)))
+        token_count = self._native_tools_token_estimate if include_tool_schema else 0
+        for message in messages:
+            token_count += len(self._encoding.encode(message.get("role", "")))
+            content = message.get("content", "")
+            if isinstance(content, str):
+                token_count += len(self._encoding.encode(content))
+            elif isinstance(content, list):
+                for part in content:
+                    if not isinstance(part, dict):
+                        token_count += len(self._encoding.encode(str(part)))
+                        continue
+                    if part.get("type") == "text":
+                        token_count += len(self._encoding.encode(str(part.get("text", ""))))
+                    elif part.get("type") == "image_url":
+                        token_count += image_token_estimate
+                    else:
+                        token_count += len(self._encoding.encode(str(part)))
+            else:
+                token_count += len(self._encoding.encode(str(content)))
+            tool_calls = message.get("tool_calls")
+            if isinstance(tool_calls, list) and tool_calls:
+                token_count += len(self._encoding.encode(json.dumps(tool_calls, ensure_ascii=False)))
+            reasoning_content = message.get("reasoning_content")
+            if isinstance(reasoning_content, str) and reasoning_content:
+                token_count += len(self._encoding.encode(reasoning_content))
+            elif reasoning_content is not None:
+                token_count += len(
+                    self._encoding.encode(json.dumps(safe_jsonable(reasoning_content), ensure_ascii=False))
+                )
+        return token_count
+
+    def run(self, prompt: str, workspace_root: Optional[str] = None) -> str:
+        """Run the agent on one prompt and return only the final result text."""
+        return self._run_session(prompt, workspace_root=workspace_root)["result_text"]
+
+    def _run_session(
+        self,
+        prompt: str,
+        workspace_root: Optional[str] = None,
+        event_callback: Optional[Callable[[dict[str, Any]], None]] = None,
+        initial_content_parts: Optional[Sequence[dict[str, Any]]] = None,
+        prior_messages: Optional[Sequence[dict[str, Any]]] = None,
+        interrupt_event: Optional[threading.Event] = None,
+    ) -> dict:
+        """Internal execution path with trace data for tests and debugging."""
+        if not isinstance(prompt, str) or not prompt.strip():
+            raise ValueError("prompt must be a non-empty string.")
+
+        prompt_text = prompt.strip()
+        resolved_workspace_root = normalize_workspace_root(workspace_root)
+        start_time = time.time()
+        trace_dir = self.trace_dir
+        cur_date = today_date()
+        extra_blocks = [self.role_prompt] if self.role_prompt else None
+        system_prompt = composed_system_prompt(current_date=str(cur_date), extra_blocks=extra_blocks)
+        user_content = (
+            f"Current workspace root: {resolved_workspace_root}\n"
+            "Relative local file paths resolve from the workspace root.\n\n"
+            f"Prompt:\n{prompt_text}"
+        )
+        if initial_content_parts is not None:
+            if not isinstance(initial_content_parts, Sequence) or isinstance(initial_content_parts, (str, bytes)):
+                raise ValueError("initial_content_parts must be a sequence of OpenAI-style content part dicts.")
+            safe_initial_parts = safe_jsonable(list(initial_content_parts))
+            if not isinstance(safe_initial_parts, list) or not all(isinstance(part, dict) for part in safe_initial_parts):
+                raise ValueError("initial_content_parts must contain only dict content parts.")
+            user_content: Any = [{"type": "text", "text": user_content}, *safe_initial_parts]
+        continuing_conversation = prior_messages is not None
+        if continuing_conversation:
+            if not isinstance(prior_messages, Sequence) or isinstance(prior_messages, (str, bytes)):
+                raise ValueError("prior_messages must be a sequence of message dicts.")
+            safe_prior_messages = safe_jsonable(list(prior_messages))
+            if not isinstance(safe_prior_messages, list) or not all(isinstance(message, dict) for message in safe_prior_messages):
+                raise ValueError("prior_messages must contain only dict messages.")
+            messages = list(safe_prior_messages)
+            if not messages or messages[0].get("role") != "system":
+                messages.insert(0, {"role": "system", "content": system_prompt})
+            messages.append({"role": "user", "content": user_content})
+        else:
+            messages = [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_content}]
+        max_llm_calls = self.max_llm_calls
+        max_input_tokens = int(self.llm_generate_cfg.get("max_input_tokens", DEFAULT_MAX_INPUT_TOKENS))
+        max_output_tokens = int(self.llm_generate_cfg.get("max_output_tokens", llm_max_output_tokens()))
+        compact_trigger_tokens = self.llm_generate_cfg.get("compact_trigger_tokens")
+        if compact_trigger_tokens is None:
+            compact_trigger_tokens = os.getenv("AUTO_COMPACT_TRIGGER_TOKENS", "128k")
+        model_profile = resolve_model_profile(
+            self.model,
+            configured_max_input_tokens=max_input_tokens,
+            configured_max_output_tokens=max_output_tokens,
+            compact_trigger_tokens=compact_trigger_tokens,
+        )
+        agent_runtime_limit = self.max_runtime_seconds
+        runtime_deadline = start_time + agent_runtime_limit
+        num_llm_calls_available = max_llm_calls
+        round_index = 0
+        trace_writer = FlatTraceWriter(
+            trace_dir=trace_dir,
+            model_name=self.model,
+            workspace_root=resolved_workspace_root,
+            on_event=event_callback,
+        )
+        self.trace_path = trace_writer.path
+        self.session_state_path = resolve_session_state_path(trace_dir) if trace_dir else None
+        session_state = AgentSessionState(
+            run_id=trace_writer.run_id,
+            model_name=self.model,
+            workspace_root=str(resolved_workspace_root),
+            prompt=prompt_text,
+            trace_path=str(self.trace_path) if self.trace_path else "",
+            llm_calls_remaining=num_llm_calls_available,
+            max_rounds=self.max_rounds,
+            max_input_tokens=max_input_tokens,
+            max_output_tokens=max_output_tokens,
+            model_profile=model_profile,
+        )
+
+        def persist_state(*, termination: str = "", error: str = "") -> None:
+            session_state.trace_path = str(self.trace_path) if self.trace_path else ""
+            session_state.turn_index = round_index
+            session_state.llm_calls_remaining = num_llm_calls_available
+            session_state.current_token_estimate = self.count_tokens(messages)
+            session_state.termination = termination
+            session_state.error = error
+            session_state.capture_messages(messages)
+            if self.session_state_path:
+                persist_session_state(self.session_state_path, session_state)
+
+        def finalize(result_text: str, termination: str, *, role: str = "runtime", error: str = "") -> dict[str, Any]:
+            trace_writer.append(
+                role=role,
+                text=result_text,
+                turn_index=round_index,
+                termination=termination,
+                error=error,
+            )
+            persist_state(termination=termination, error=error)
+            return {
+                "prompt": prompt_text,
+                "messages": messages,
+                "result_text": result_text,
+                "termination": termination,
+                "trace_path": str(self.trace_path) if self.trace_path else "",
+                "session_state_path": str(self.session_state_path) if self.session_state_path else "",
+            }
+
+        def interruption_requested() -> bool:
+            return bool(interrupt_event is not None and interrupt_event.is_set())
+
+        def finalize_interrupted() -> dict[str, Any]:
+            return finalize(
+                "Interrupted by user. Continue with a follow-up prompt to resume from the current context.",
+                "interrupted",
+                role="runtime",
+                error="user interrupt",
+            )
+
+        if continuing_conversation:
+            trace_writer.append(
+                role="runtime",
+                text="Continuing existing conversation with prior messages.",
+                turn_index=0,
+            )
+        else:
+            trace_writer.append(role="system", text=system_prompt, turn_index=0)
+        trace_writer.append(role="user", text=message_trace_text(user_content), turn_index=0)
+        persist_state()
+
+        while num_llm_calls_available > 0 and round_index < self.max_rounds:
+            if interruption_requested():
+                return finalize_interrupted()
+            if remaining_runtime_seconds(runtime_deadline) is not None and remaining_runtime_seconds(runtime_deadline) <= 0:
+                result_text = "No result found before the maximum agent runtime limit."
+                termination = f"agent runtime limit reached: {agent_runtime_limit}s"
+                return finalize(result_text, termination, error=termination)
+            current_token_estimate = self.count_tokens(messages)
+            should_compact = False
+            compact_reason = ""
+            if len(messages) > 2:
+                should_compact, compact_reason = should_compact_messages(
+                    last_input_tokens=session_state.last_input_tokens,
+                    current_token_estimate=current_token_estimate,
+                    model_profile=model_profile,
+                )
+            if should_compact:
+                trace_writer.append(
+                    role="runtime",
+                    text=(
+                        "Runtime note: compacting earlier conversation history before the next model call "
+                        f"because the {compact_reason} budget crossed the pre-limit threshold."
+                    ),
+                    turn_index=round_index,
+                )
+                compact_outcome = compact_messages(
+                    messages=messages,
+                    original_prompt_text=prompt_text,
+                    model_name=self.model,
+                    model_profile=model_profile,
+                    llm_caller=self.call_compaction_api,
+                    token_counter=self.count_tokens,
+                    runtime_deadline=runtime_deadline,
+                )
+                if compact_outcome.status == "ok":
+                    messages = compact_outcome.compacted_messages
+                    session_state.last_input_tokens = None
+                    session_state.compactions.append(
+                        CompactionRecord(
+                            turn_index=round_index,
+                            status="ok",
+                            trigger_reason=compact_reason,
+                            prior_token_estimate=compact_outcome.prior_token_estimate,
+                            prior_message_count=len(session_state.messages),
+                            compacted_group_count=compact_outcome.compacted_group_count,
+                            kept_group_count=compact_outcome.kept_group_count,
+                            new_token_estimate=compact_outcome.new_token_estimate,
+                            new_message_count=len(messages),
+                            summary_text=compact_outcome.summary_text,
+                        )
+                    )
+                    trace_writer.append(
+                        role="runtime",
+                        text=(
+                            "Runtime note: context compaction completed. "
+                            f"Token estimate {compact_outcome.prior_token_estimate} -> {compact_outcome.new_token_estimate}. "
+                            f"Compacted {compact_outcome.compacted_group_count} older turn groups."
+                        ),
+                        turn_index=round_index,
+                        capture_type="compaction",
+                        payload=compaction_trace_payload(trigger_reason=compact_reason, outcome=compact_outcome),
+                    )
+                    persist_state()
+                    current_token_estimate = compact_outcome.new_token_estimate
+                else:
+                    session_state.compactions.append(
+                        CompactionRecord(
+                            turn_index=round_index,
+                            status="error",
+                            trigger_reason=compact_reason,
+                            prior_token_estimate=compact_outcome.prior_token_estimate,
+                            prior_message_count=len(messages),
+                            compacted_group_count=compact_outcome.compacted_group_count,
+                            kept_group_count=compact_outcome.kept_group_count,
+                            error=compact_outcome.error,
+                        )
+                    )
+                    trace_writer.append(
+                        role="runtime",
+                        text="Runtime note: context compaction failed; the existing history was kept unchanged.",
+                        turn_index=round_index,
+                        error=compact_outcome.error,
+                        capture_type="compaction",
+                        payload=compaction_trace_payload(trigger_reason=compact_reason, outcome=compact_outcome),
+                    )
+                    persist_state(error=compact_outcome.error)
+            if current_token_estimate > max_input_tokens:
+                result_text = "No result found before the maximum input token limit."
+                termination = f"input token limit reached: {current_token_estimate} > {max_input_tokens}"
+                return finalize(result_text, termination, error=termination)
+            if interruption_requested():
+                return finalize_interrupted()
+            round_index += 1
+            num_llm_calls_available -= 1
+            llm_request_messages, image_aging = prepare_messages_for_llm(messages)
+            try:
+                llm_reply = self.call_llm_api(llm_request_messages, runtime_deadline=runtime_deadline)
+            except KeyboardInterrupt:
+                return finalize_interrupted()
+            if interruption_requested():
+                return finalize_interrupted()
+            trace_writer.append(
+                role="runtime",
+                text="",
+                turn_index=round_index,
+                capture_type="llm_call",
+                payload=llm_call_trace_payload(
+                    request_messages=llm_request_messages,
+                    image_aging=image_aging,
+                    response=llm_reply,
+                    model_name=self.model,
+                    native_tools=self._native_tools,
+                ),
+            )
+            session_state.last_input_tokens = input_tokens_from_usage(
+                llm_reply.get("usage") if isinstance(llm_reply, dict) else None
+            )
+            assistant_content = llm_reply.get("content") if isinstance(llm_reply, dict) else None
+            assistant_tool_calls = llm_reply.get("tool_calls", []) if isinstance(llm_reply, dict) else []
+            assistant_reasoning = llm_reply.get("reasoning_content") if isinstance(llm_reply, dict) else None
+            assistant_raw_message = llm_reply.get("raw_message") if isinstance(llm_reply, dict) else None
+            assistant_text = assistant_text_content(assistant_content)
+            finish_reason = llm_reply.get("finish_reason") if isinstance(llm_reply, dict) else None
+            assistant_tool_arguments = parse_tool_arguments_list(assistant_tool_calls)
+            assistant_tool_call_ids = [str(tool_call.get("id", "")) for tool_call in assistant_tool_calls]
+            assistant_tool_names = [
+                str((tool_call.get("function", {}) if isinstance(tool_call, dict) else {}).get("name", ""))
+                for tool_call in assistant_tool_calls
+            ]
+            if debug_enabled():
+                if assistant_tool_calls:
+                    print(f"Round {round_index}: tool_calls={json.dumps(assistant_tool_calls, ensure_ascii=False)}")
+                    if assistant_text.strip():
+                        print(f"Round {round_index} content: {assistant_text}")
+                else:
+                    print(f"Round {round_index}: {assistant_text}")
+            if not isinstance(llm_reply, dict) or llm_reply.get("status") == "error":
+                result_text = llm_reply.get("error", "llm api error: unknown error") if isinstance(llm_reply, dict) else str(llm_reply)
+                if self.should_accept_terminal_error(
+                    error_text=result_text,
+                    workspace_root=resolved_workspace_root,
+                    messages=messages,
+                ):
+                    recovered_result_text = self.accepted_terminal_error_result_text(
+                        error_text=result_text,
+                        workspace_root=resolved_workspace_root,
+                        messages=messages,
+                    ).strip()
+                    if not recovered_result_text:
+                        recovered_result_text = (
+                            "Recovered completion after a terminal LLM/runtime error because the required "
+                            "completion artifacts already exist in the workspace."
+                        )
+                    return finalize(recovered_result_text, "result", role="runtime", error=result_text)
+                termination = "llm api error"
+                return finalize(result_text, termination, error=result_text)
+
+            deprecated_protocol = legacy_protocol_error(assistant_text)
+            if deprecated_protocol is not None:
+                trace_writer.append(
+                    role="assistant",
+                    text=assistant_text.strip(),
+                    turn_index=round_index,
+                    tool_call_ids=assistant_tool_call_ids,
+                    tool_names=assistant_tool_names,
+                    tool_arguments=assistant_tool_arguments,
+                    finish_reason=finish_reason,
+                    error=deprecated_protocol,
+                )
+                retry_assistant_message = assistant_retry_history_message(
+                    content=assistant_content,
+                    reasoning_content=assistant_reasoning,
+                )
+                if retry_assistant_message is not None:
+                    messages.append(retry_assistant_message)
+                correction_text = (
+                    "Error: The previous assistant turn used the deprecated text-tag protocol. "
+                    "Do not emit <tool_call>, <tool_response>, <think>, or <answer> in plain text. "
+                    "Use only the native tool calling interface when tools are needed, or plain final result text when no more tools are needed."
+                )
+                messages.append(
+                    {
+                        "role": "user",
+                        "content": correction_text,
+                    }
+                )
+                trace_writer.append(role="user", text=correction_text, turn_index=round_index)
+                persist_state(error=deprecated_protocol)
+                continue
+
+            if finish_reason == "length" and assistant_tool_calls:
+                protocol_error = "assistant tool call turn was truncated by output limit"
+                trace_writer.append(
+                    role="assistant",
+                    text=assistant_text.strip(),
+                    turn_index=round_index,
+                    tool_call_ids=assistant_tool_call_ids,
+                    tool_names=assistant_tool_names,
+                    tool_arguments=assistant_tool_arguments,
+                    finish_reason=finish_reason,
+                    error=protocol_error,
+                )
+                retry_assistant_message = assistant_retry_history_message(
+                    content=assistant_content,
+                    reasoning_content=assistant_reasoning,
+                )
+                if retry_assistant_message is not None:
+                    messages.append(retry_assistant_message)
+                correction_text = (
+                    "Error: The previous assistant turn hit the output limit while emitting native tool calls, "
+                    "so none of those tool calls were executed. Re-emit the needed tool calls in a smaller form. "
+                    "If a file is large, split it into multiple smaller Write calls or create it via shorter steps. "
+                    "Do not resend the same oversized truncated tool call."
+                )
+                messages.append({"role": "user", "content": correction_text})
+                trace_writer.append(role="user", text=correction_text, turn_index=round_index)
+                persist_state(error=protocol_error)
+                continue
+
+            if assistant_tool_calls:
+                trace_writer.append(
+                    role="assistant",
+                    text=assistant_text.strip(),
+                    turn_index=round_index,
+                    tool_call_ids=assistant_tool_call_ids,
+                    tool_names=assistant_tool_names,
+                    tool_arguments=assistant_tool_arguments,
+                    finish_reason=finish_reason,
+                )
+                assistant_message = assistant_history_message(
+                    content=assistant_content,
+                    tool_calls=assistant_tool_calls,
+                    reasoning_content=assistant_reasoning,
+                    raw_message=assistant_raw_message,
+                )
+                tool_turn_message_start = len(messages)
+                messages.append(assistant_message)
+                deferred_image_contexts: list[tuple[str, str, Any, Any, dict[str, Any]]] = []
+                for tool_call, tool_arguments in zip(assistant_tool_calls, assistant_tool_arguments):
+                    if remaining_runtime_seconds(runtime_deadline) is not None and remaining_runtime_seconds(runtime_deadline) <= 0:
+                        result_text = "No result found before the maximum agent runtime limit."
+                        termination = f"agent runtime limit reached: {agent_runtime_limit}s"
+                        return finalize(result_text, termination, error=termination)
+                    tool_call_id = str(tool_call.get("id", ""))
+                    function_block = tool_call.get("function", {}) if isinstance(tool_call, dict) else {}
+                    tool_name = str(function_block.get("name", ""))
+                    try:
+                        result = self.custom_call_tool(
+                            tool_name,
+                            tool_arguments,
+                            workspace_root=resolved_workspace_root,
+                            runtime_deadline=runtime_deadline,
+                        )
+                    except KeyboardInterrupt:
+                        messages = messages[:tool_turn_message_start]
+                        return finalize_interrupted()
+                    tool_result_text = tool_result_message_content(result)
+                    messages.append(api_tool_message(tool_call_id, result))
+                    trace_writer.append(
+                        role="tool",
+                        text=tool_result_text,
+                        turn_index=round_index,
+                        tool_call_ids=[tool_call_id],
+                        tool_names=[tool_name],
+                        tool_arguments=[tool_arguments],
+                    )
+                    extra_image_context = image_context_message(result, self.model)
+                    if extra_image_context is not None:
+                        deferred_image_contexts.append((tool_call_id, tool_name, tool_arguments, result, extra_image_context))
+                for tool_call_id, tool_name, tool_arguments, result, extra_image_context in deferred_image_contexts:
+                    messages.append(extra_image_context)
+                    trace_writer.append(
+                        role="user",
+                        text=image_context_trace_text(result),
+                        turn_index=round_index,
+                        tool_call_ids=[tool_call_id],
+                        tool_names=[tool_name],
+                        tool_arguments=[tool_arguments],
+                        image_paths=image_trace_paths(result),
+                    )
+                    if remaining_runtime_seconds(runtime_deadline) is not None and remaining_runtime_seconds(runtime_deadline) <= 0:
+                        result_text = "No result found before the maximum agent runtime limit."
+                        termination = f"agent runtime limit reached: {agent_runtime_limit}s"
+                        return finalize(result_text, termination, error=termination)
+                persist_state()
+                if interruption_requested():
+                    return finalize_interrupted()
+            elif assistant_has_meaningful_text(assistant_content):
+                current_result_text = assistant_text.strip()
+                messages.append(
+                    assistant_history_message(
+                        content=current_result_text,
+                        reasoning_content=assistant_reasoning,
+                        raw_message=assistant_raw_message,
+                    )
+                )
+                should_accept_result = self.should_accept_plaintext_result(
+                    result_text=current_result_text,
+                    workspace_root=resolved_workspace_root,
+                    messages=messages,
+                )
+                if should_accept_result:
+                    return finalize(current_result_text, "result", role="assistant")
+                protocol_error = "plain result rejected by additional stop condition"
+                trace_writer.append(
+                    role="assistant",
+                    text=current_result_text,
+                    turn_index=round_index,
+                    finish_reason=finish_reason,
+                    error=protocol_error,
+                )
+                correction_text = self.rejected_plaintext_result_message(
+                    result_text=current_result_text,
+                    workspace_root=resolved_workspace_root,
+                    messages=messages,
+                ).strip()
+                if not correction_text:
+                    correction_text = (
+                        "The previous assistant turn was not accepted as the final result because the additional stop condition returned false. "
+                        "Continue working. If the task is incomplete, use tool calls to produce the required artifacts before finishing."
+                    )
+                messages.append({"role": "user", "content": correction_text})
+                trace_writer.append(role="user", text=correction_text, turn_index=round_index)
+                persist_state(error=protocol_error)
+                continue
+            else:
+                protocol_error = "assistant emitted empty response"
+                trace_writer.append(
+                    role="assistant",
+                    text="",
+                    turn_index=round_index,
+                    finish_reason=finish_reason,
+                    error=protocol_error,
+                )
+                retry_assistant_message = assistant_retry_history_message(
+                    content=assistant_content,
+                    reasoning_content=assistant_reasoning,
+                )
+                if retry_assistant_message is not None:
+                    messages.append(retry_assistant_message)
+                correction_text = (
+                    "Error: The previous assistant turn was empty. "
+                    "If tools are needed, use native tool calling. Otherwise return the final result text."
+                )
+                messages.append(
+                    {
+                        "role": "user",
+                        "content": correction_text,
+                    }
+                )
+                trace_writer.append(role="user", text=correction_text, turn_index=round_index)
+                persist_state(error=protocol_error)
+                continue
+
+            token_count = self.count_tokens(messages)
+            if debug_enabled():
+                print(f"round: {round_index}, token count: {token_count}")
+            persist_state()
+
+        result_text = 'No result found.'
+        termination = 'result not found'
+        if round_index >= self.max_rounds:
+            termination = 'exceed available rounds'
+        elif num_llm_calls_available == 0:
+            termination = 'exceed available llm calls'
+        return finalize(result_text, termination, error=termination)
+
+    def custom_call_tool(self, tool_name: str, tool_args: Any, **kwargs):
+        return execute_tool_by_name(self.tool_map, tool_name, tool_args, **kwargs)
+
+
+def _path_has_suffix(path: Path, suffix_parts: Sequence[str]) -> bool:
+    normalized_parts = tuple(part.casefold() for part in path.parts)
+    normalized_suffix = tuple(part.casefold() for part in suffix_parts)
+    if len(normalized_parts) < len(normalized_suffix):
+        return False
+    return normalized_parts[-len(normalized_suffix) :] == normalized_suffix
+
+
+def resolve_agent_class_for_role_prompt_files(role_prompt_files: Sequence[str]) -> Type[MultiTurnReactAgent]:
+    for raw_path in role_prompt_files:
+        path_text = str(raw_path).strip()
+        if not path_text:
+            continue
+        path = Path(path_text).expanduser().resolve(strict=False)
+        if _path_has_suffix(path, ("benchmarks", "ResearchClawBench", "role_prompt.md")):
+            from benchmarks.ResearchClawBench.adapter import ResearchClawBenchAgent
+
+            return ResearchClawBenchAgent
+    return MultiTurnReactAgent
+
+
+def _parse_cli_args(argv: list[str]) -> tuple[str, Optional[str], Optional[str], str, list[str], list[str], Optional[bool]]:
+    parser = argparse.ArgumentParser(description="Run the local agent directly from agent_base.react_agent.")
+    parser.add_argument("prompt", nargs="*", help="Prompt text.")
+    parser.add_argument("--prompt-file", help="Optional UTF-8 text file containing the prompt.")
+    parser.add_argument("--trace-dir", help="Optional directory where the run trace JSONL should be created.")
+    parser.add_argument(
+        "--workspace-root",
+        help="Optional workspace root for local file tools, Bash, and TerminalStart.",
+    )
+    parser.add_argument(
+        "--role-prompt-file",
+        action="append",
+        default=[],
+        dest="role_prompt_files",
+        metavar="PATH",
+        help="Append one role-specific prompt file to the base system prompt. May be passed multiple times.",
+    )
+    parser.add_argument(
+        "--images",
+        action="append",
+        nargs="+",
+        default=[],
+        dest="image_paths",
+        metavar="PATH",
+        help="Attach one or more local image paths to the initial user message.",
+    )
+    parser.add_argument(
+        "--chat",
+        action=argparse.BooleanOptionalAction,
+        default=None,
+        help="Continue asking for follow-up user messages after each final answer. Defaults to on only in an interactive terminal.",
+    )
+    args = parser.parse_args(argv)
+
+    prompt_text = ""
+    if args.prompt_file:
+        prompt_text = Path(args.prompt_file).read_text(encoding="utf-8").strip()
+    elif args.prompt:
+        prompt_text = " ".join(args.prompt).strip()
+
+    if not prompt_text:
+        raise ValueError("A non-empty prompt is required via positional args or --prompt-file.")
+    role_prompt = read_role_prompt_files(args.role_prompt_files)
+    return (
+        prompt_text,
+        args.trace_dir,
+        args.workspace_root,
+        role_prompt,
+        list(args.role_prompt_files),
+        [path for group in args.image_paths for path in group],
+        args.chat,
+    )
+
+
+def main(argv: Optional[list[str]] = None) -> int:
+    load_dotenv(PROJECT_ROOT / ".env")
+    try:
+        require_required_env("ResearchHarness agent")
+        prompt_text, trace_dir, workspace_root, role_prompt, role_prompt_files, image_paths, chat_arg = _parse_cli_args(argv or sys.argv[1:])
+        agent_cls = resolve_agent_class_for_role_prompt_files(role_prompt_files)
+        agent = agent_cls(
+            llm=default_llm_config(),
+            trace_dir=trace_dir,
+            role_prompt=role_prompt or None,
+        )
+        resolved_workspace_root = normalize_workspace_root(workspace_root)
+        initial_content_parts: list[dict[str, Any]] = []
+        saved_image_paths: list[str] = []
+        for image_index, image_path in enumerate(image_paths):
+            saved_path, data_url = stage_image_file_for_input(
+                image_path,
+                workspace_root=resolved_workspace_root,
+                image_index=image_index,
+            )
+            saved_image_paths.append(saved_path)
+            initial_content_parts.extend(image_input_content_parts(data_url, saved_path))
+        run_prompt = append_saved_image_paths_to_prompt(prompt_text, saved_image_paths)
+        printer = ConsoleEventPrinter(
+            model_name=agent.model,
+            workspace_root=resolved_workspace_root,
+            prompt=run_prompt,
+        )
+        printer.print_header()
+        session = agent._run_session(
+            run_prompt,
+            workspace_root=str(resolved_workspace_root),
+            event_callback=printer.handle_event,
+            initial_content_parts=initial_content_parts or None,
+        )
+        chat_enabled = chat_arg if chat_arg is not None else (sys.stdin.isatty() and sys.stdout.isatty())
+        messages = session.get("messages", [])
+        while chat_enabled:
+            try:
+                followup = input("\n[ResearchHarness] Follow-up (Ctrl+C to exit): ").strip()
+            except (KeyboardInterrupt, EOFError):
+                print("\n[ResearchHarness] Chat ended.")
+                break
+            if not followup:
+                continue
+            print(f"\n[ResearchHarness] Continuing conversation: {followup}")
+            printer.reset_rounds()
+            session = agent._run_session(
+                followup,
+                workspace_root=str(resolved_workspace_root),
+                event_callback=printer.handle_event,
+                prior_messages=messages,
+            )
+            messages = session.get("messages", messages)
+        return 0
+    except (MissingRequiredEnvError, ValueError) as exc:
+        print(str(exc), file=sys.stderr)
+        return 1
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/agent_base/session_state.py b/agent_base/session_state.py
new file mode 100644
index 0000000000000000000000000000000000000000..0b742dc6fc21435a737d2ed5a01ad32120e46fc3
--- /dev/null
+++ b/agent_base/session_state.py
@@ -0,0 +1,84 @@
+from __future__ import annotations
+
+import json
+from dataclasses import asdict, dataclass, field
+from pathlib import Path
+from typing import Any, Optional, Sequence
+
+from agent_base.model_profiles import ModelProfile
+from agent_base.utils import safe_jsonable
+
+
+SESSION_STATE_FILENAME = "_session_state.json"
+
+
+@dataclass
+class CompactionRecord:
+    turn_index: int
+    status: str
+    trigger_reason: str
+    prior_token_estimate: int
+    prior_message_count: int
+    compacted_group_count: int = 0
+    kept_group_count: int = 0
+    new_token_estimate: Optional[int] = None
+    new_message_count: Optional[int] = None
+    summary_text: str = ""
+    error: str = ""
+
+
+@dataclass
+class AgentSessionState:
+    run_id: str
+    model_name: str
+    workspace_root: str
+    prompt: str
+    trace_path: str = ""
+    turn_index: int = 0
+    llm_calls_remaining: int = 0
+    max_rounds: int = 0
+    max_input_tokens: int = 0
+    max_output_tokens: int = 0
+    last_input_tokens: Optional[int] = None
+    current_token_estimate: int = 0
+    termination: str = ""
+    error: str = ""
+    messages: list[dict[str, Any]] = field(default_factory=list)
+    compactions: list[CompactionRecord] = field(default_factory=list)
+    model_profile: Optional[ModelProfile] = None
+
+    def capture_messages(self, messages: Sequence[dict[str, Any]]) -> None:
+        self.messages = safe_jsonable(list(messages))
+
+    def payload(self) -> dict[str, Any]:
+        profile = self.model_profile
+        return {
+            "version": 1,
+            "run_id": self.run_id,
+            "model_name": self.model_name,
+            "workspace_root": self.workspace_root,
+            "prompt": self.prompt,
+            "trace_path": self.trace_path,
+            "turn_index": self.turn_index,
+            "llm_calls_remaining": self.llm_calls_remaining,
+            "max_rounds": self.max_rounds,
+            "max_input_tokens": self.max_input_tokens,
+            "max_output_tokens": self.max_output_tokens,
+            "last_input_tokens": self.last_input_tokens,
+            "current_token_estimate": self.current_token_estimate,
+            "termination": self.termination,
+            "error": self.error,
+            "messages": self.messages,
+            "compactions": [safe_jsonable(asdict(record)) for record in self.compactions],
+            "model_profile": safe_jsonable(asdict(profile)) if profile is not None else None,
+        }
+
+
+def resolve_session_state_path(trace_dir: str | Path) -> Path:
+    return Path(trace_dir) / SESSION_STATE_FILENAME
+
+
+def persist_session_state(path: str | Path, state: AgentSessionState) -> None:
+    output_path = Path(path)
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    output_path.write_text(json.dumps(state.payload(), ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
diff --git a/agent_base/tools/README.md b/agent_base/tools/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f94fedacd794377babb25fae10c4d7433241ed9a
--- /dev/null
+++ b/agent_base/tools/README.md
@@ -0,0 +1,457 @@
+# Tools
+
+This document describes the tool surface exposed to the model. Tool names use PascalCase consistently.
+
+The current implementation is grouped by category:
+
+- `agent_base/tools/tool_file.py`
+- `agent_base/tools/tool_runtime.py`
+- `agent_base/tools/tool_user.py`
+- `agent_base/tools/tool_web.py`
+
+## Overview
+
+The current tool set is:
+
+- `Glob`
+- `Grep`
+- `Read`
+- `ReadPDF`
+- `ReadImage`
+- `Write`
+- `Edit`
+- `Bash`
+- `WebSearch`
+- `ScholarSearch`
+- `WebFetch`
+- `AskUser`
+- `TerminalStart`
+- `TerminalWrite`
+- `TerminalRead`
+- `TerminalInterrupt`
+- `TerminalKill`
+
+## Tool Matrix
+
+| Tool | Category | Arguments | Description | Return Shape / Notes |
+| --- | --- | --- | --- | --- |
+| `Glob` | Local files | `pattern`, `path?`, `include_dirs?`, `max_results?` | Discover files or directories by pathname pattern inside the workspace. | Returns `root`, `match_count`, `truncated`, and `results`. Best for pathname discovery rather than reading content. |
+| `Grep` | Local files | `pattern`, `path?`, `glob?`, `case_sensitive?`, `max_results?`, `max_chars?` | Search local text files by content and return matching lines. | Returns search metadata plus matched file paths, line numbers, and line text. Skips obvious binary files, images, and PDFs. |
+| `Read` | Local files | `path`, `start_line?`, `end_line?`, `max_chars?` | Read a local text file, optionally by line range. | Returns normalized path, line metadata, truncation status, and `content`. Redirects PDF/image tasks toward `ReadPDF` or `ReadImage`. |
+| `ReadPDF` | Local files | `path`, `max_chars?`, `max_image_paths?` | Read a local PDF, extract text, and expose extracted image paths when available. | Returns text content plus `image_paths` and image-count metadata. Depends on [`structai`](https://github.com/black-yt/structai) and `MINERU_TOKEN`. |
+| `ReadImage` | Local files | `path` | Read a local image and expose image metadata for runtime multimodal use. | Returns image metadata only. During agent runs, the runtime sends a compressed attachment to the LLM API as an `image_url` content part. |
+| `Write` | Local files | `path`, `content`, `overwrite?` | Create a text file or overwrite one when explicitly allowed. | Creates parent directories automatically. Returns an error if the file exists and `overwrite=false`. |
+| `Edit` | Local files | `path`, `patch` | Apply a targeted patch to a local text file. | Expects unified-diff / hunk-style input. Context-based matching, not a full `patch(1)` implementation. |
+| `Bash` | Runtime | `command`, `timeout?`, `workdir?` | Run one-shot shell commands for deterministic local execution, parsing, and validation. | Returns `stdout` and `stderr`. Primary local execution tool for short Python, `rg`, `find`, `git`, and structured local processing. |
+| `WebSearch` | Web | `query` | Perform general web search over one or more complementary queries. | Returns a text summary headed by `## Web Results` with title, link, snippet, and date/source when available. Uses Serper. |
+| `ScholarSearch` | Web | `query` | Search academic results such as papers, year, abstract, and citations. | Returns a text summary headed by `## Scholar Results` with title, PDF link, publication info, year, citation count, and abstract. Uses Serper Scholar. |
+| `WebFetch` | Web | `url`, `goal` | Fetch a page, extract evidence relevant to a concrete goal, and summarize it. | Uses Jina Reader plus the configured summary model. Returns evidence-focused text rather than raw HTML. |
+| `AskUser` | Human interaction | `question`, `context?` | Ask the human user one concise clarification question when essential information cannot be determined from tools or existing instructions. | Writes the question to the interactive terminal and returns the user's answer. If no interactive terminal is available, returns an explicit unavailable message. |
+| `TerminalStart` | Runtime | `cwd?`, `shell?`, `rows?`, `cols?` | Start a persistent terminal session. | Returns session metadata such as `session_id`, `pid`, `cwd`, `shell`, `alive`, and `returncode`. |
+| `TerminalWrite` | Runtime | `session_id`, `input`, `append_newline?`, `yield_time_ms?`, `max_output_chars?` | Send input to a persistent terminal session and read incremental output. | Best for stateful shells, REPLs, and long-running foreground processes. |
+| `TerminalRead` | Runtime | `session_id`, `yield_time_ms?`, `max_output_chars?` | Read unread output from an existing persistent terminal session. | Useful when a process is still running and output arrives over time. |
+| `TerminalInterrupt` | Runtime | `session_id`, `max_output_chars?` | Send `Ctrl-C` to the foreground process in a terminal session without destroying the session. | Use when a long-running process must be interrupted but the shell should remain alive. |
+| `TerminalKill` | Runtime | `session_id`, `force?` | Terminate a persistent terminal session and release resources. | Final cleanup step for terminal sessions that are no longer needed. |
+
+## Glob
+
+Purpose:
+
+- Discover local files or directories by glob pattern.
+- Good for pathname discovery, not for reading file contents.
+
+Arguments:
+
+- `pattern`: string, a `pathlib`-style glob such as `**/*.py`
+- `path`: optional string, search root, defaults to the current workspace
+- `include_dirs`: optional boolean, defaults to `false`
+- `max_results`: optional integer, defaults to `200`
+
+Returns:
+
+- `root`
+- `pattern`
+- `include_dirs`
+- `match_count`
+- `truncated`
+- `results`
+
+## Grep
+
+Purpose:
+
+- Search local text files by content.
+- Return matched file paths, line numbers, and line text.
+
+Arguments:
+
+- `pattern`: string, regular expression
+- `path`: optional string, file or directory path, defaults to the current workspace
+- `glob`: optional string, file filter when scanning a directory, defaults to `**/*`
+- `case_sensitive`: optional boolean, defaults to `false`
+- `max_results`: optional integer, defaults to `100`
+- `max_chars`: optional integer, defaults to `20000`
+
+Behavior:
+
+- If `path` is a file, only that file is searched.
+- If `path` is a directory, matching text files are searched recursively.
+- Images, PDFs, and obviously binary files are skipped.
+
+Returns:
+
+- `root`
+- `pattern`
+- `glob`
+- `case_sensitive`
+- `files_scanned`
+- `match_count`
+- `truncated`
+- `results`
+
+## Read
+
+Purpose:
+
+- Read a local text file.
+- Support partial line ranges.
+- Support long-text truncation.
+
+Arguments:
+
+- `path`: string, file path
+- `start_line`: optional integer, 1-based start line
+- `end_line`: optional integer, 1-based end line
+- `max_chars`: optional integer, maximum returned characters, defaults to `20000`
+
+Behavior:
+
+- Only text files are handled directly.
+- If the input is a PDF, the tool tells the model to use `ReadPDF`.
+- If the input is an image, the tool tells the model to use `ReadImage`.
+
+Returns:
+
+- `path`
+- `source_type: text`
+- `start_line`
+- `end_line`
+- `total_lines`
+- `truncated`
+- `content`
+
+## ReadPDF
+
+Purpose:
+
+- Read a local PDF.
+- Return extracted text.
+- Return extracted local image paths when the PDF parser produces image assets.
+
+Arguments:
+
+- `path`: string, PDF path
+- `max_chars`: optional integer, maximum returned characters, defaults to `20000`
+- `max_image_paths`: optional integer, maximum listed extracted image paths, defaults to `20`
+
+Behavior:
+
+- Calls `structai.read_pdf(...)` from [`structai`](https://github.com/black-yt/structai) underneath.
+- Uses the returned `text` and `img_paths`.
+- Depends on `MINERU_TOKEN`.
+- If [`structai`](https://github.com/black-yt/structai) is missing, returns a clear dependency error instead of breaking unrelated file tools.
+- For PDF figure tasks, prefer `ReadPDF` first to discover extracted text and extracted image paths, then use `ReadImage` on the actual extracted image file.
+
+Returns:
+
+- `path`
+- `source_type: pdf`
+- `total_lines`
+- `truncated`
+- `image_count`
+- `image_paths_listed`
+- `image_paths_truncated`
+- `image_paths`
+- `content`
+
+## ReadImage
+
+Purpose:
+
+- Read a local image.
+- Return image metadata.
+- During a main agent run, pass a compressed image to the LLM API as an `image_url` content part instead of stuffing raw base64 text into ordinary message text.
+
+Arguments:
+
+- `path`: string, image path
+
+Behavior:
+
+- Uses `PIL.Image.open(...)` underneath.
+- The runtime creates a compressed JPEG attachment for the LLM request and sends it as an inline `data:` URL in an `image_url` content part.
+- Trace records and direct tool output keep image metadata only, not the full binary payload.
+
+Returns:
+
+- `path`
+- `source_type`
+- `format`
+- `mime_type`
+- `mode`
+- `width`
+- `height`
+- `byte_count`
+- `llm_attachment_format`
+- `llm_attachment_width`
+- `llm_attachment_height`
+- `llm_attachment_byte_count`
+
+## Write
+
+Purpose:
+
+- Create a text file.
+- Overwrite an existing file when explicitly requested.
+
+Arguments:
+
+- `path`: string, destination file path
+- `content`: string, complete file content
+- `overwrite`: optional boolean, defaults to `false`
+
+Behavior:
+
+- Parent directories are created automatically.
+- If `overwrite=false` and the file already exists, the tool returns an error.
+
+## Edit
+
+Purpose:
+
+- Edit a local text file partially.
+- Best for targeted patches, not full-file rewrites.
+
+Arguments:
+
+- `path`: string, destination file path
+- `patch`: string, unified-diff / hunk-style patch
+
+Behavior:
+
+- Requires explicit hunks such as `@@ -1,2 +1,2 @@`.
+- The current implementation matches by surrounding context blocks rather than implementing full `patch(1)` line-number semantics.
+
+Returns:
+
+- updated file path on success
+- applied hunk count
+
+## Bash
+
+Purpose:
+
+- Execute one-shot shell commands.
+- Handle paths, search, git, conda, and local script orchestration.
+- Serve as the primary local execution tool for temporary Python, deterministic computation, validation, formatting, and parsing.
+
+Arguments:
+
+- `command`: string, shell command to execute
+- `timeout`: optional integer, seconds, defaults to `30`
+- `workdir`: optional string, working directory
+
+Behavior:
+
+- Uses local `bash`.
+- Returns both `stdout` and `stderr`.
+- Timeout produces an explicit error.
+- Short scripts are well suited to a heredoc such as `python3 - <<'PY'`.
+
+Recommended use cases:
+
+- pathname and file discovery
+- `rg`, `find`, `git`
+- local Python or other CLI programs
+- deterministic CSV / JSON / text processing
+- local computation and validation against absolute paths returned by file tools
+
+## WebSearch
+
+Purpose:
+
+- General web search.
+- Supports passing multiple complementary queries in one call.
+
+Arguments:
+
+- `query`: array of strings, at least one query
+
+Behavior:
+
+- Calls Serper's Google Search endpoint.
+- Reads `SERPER_KEY_ID` at runtime.
+
+Returns:
+
+- query summary text
+- `## Web Results`
+- title, link, snippet, and date/source when available
+
+## ScholarSearch
+
+Purpose:
+
+- Academic search.
+- Return paper title, year, abstract, citation count, and related metadata.
+
+Arguments:
+
+- `query`: array of strings, at least one query
+
+Behavior:
+
+- Calls Serper's Google Scholar endpoint.
+- Reads `SERPER_KEY_ID` at runtime.
+
+Returns:
+
+- query summary text
+- `## Scholar Results`
+- title, PDF link, `publicationInfo`, year, citation count, and abstract
+
+## WebFetch
+
+Purpose:
+
+- Visit a webpage.
+- Extract evidence relevant to a concrete goal.
+- Produce a goal-oriented summary.
+
+Arguments:
+
+- `url`: string or array of strings, page URL or URLs
+- `goal`: string, the specific goal to extract from the page
+
+Behavior:
+
+- Fetches page text through Jina Reader first.
+- Then calls the configured summary-model endpoint for evidence extraction and summarization.
+- Returns a fetch-and-extract result, not raw HTML.
+
+Dependencies:
+
+- `JINA_API_KEYS`
+- `API_KEY`
+- `API_BASE`
+- `MODEL_NAME`
+
+Returns:
+
+- `The useful information in ...`
+- `Evidence in page:`
+- `Summary:`
+
+## TerminalStart
+
+Purpose:
+
+- Start a persistent terminal session.
+
+Arguments:
+
+- `cwd`: optional string, working directory
+- `shell`: optional string, shell path
+- `rows`: optional integer, terminal rows, defaults to `30`
+- `cols`: optional integer, terminal columns, defaults to `120`
+
+Returns:
+
+- `session_id`
+- `pid`
+- `cwd`
+- `shell`
+- `alive`
+- `returncode`
+
+## TerminalWrite
+
+Purpose:
+
+- Send input to an existing terminal session and read output.
+
+Arguments:
+
+- `session_id`: string, session id
+- `input`: string, text to send
+- `append_newline`: optional boolean, defaults to `true`
+- `yield_time_ms`: optional integer, defaults to `200`
+- `max_output_chars`: optional integer, defaults to `20000`
+
+## TerminalRead
+
+Purpose:
+
+- Read unread output from an existing terminal session.
+
+Arguments:
+
+- `session_id`: string, session id
+- `yield_time_ms`: optional integer, defaults to `200`
+- `max_output_chars`: optional integer, defaults to `20000`
+
+## TerminalInterrupt
+
+Purpose:
+
+- Send `Ctrl-C` to the foreground process in a terminal session.
+- Keep the session alive.
+
+Arguments:
+
+- `session_id`: string, session id
+- `max_output_chars`: optional integer, defaults to `20000`
+
+## TerminalKill
+
+Purpose:
+
+- Terminate a terminal session.
+- Release related resources.
+
+Arguments:
+
+- `session_id`: string, session id
+- `force`: optional boolean, defaults to `false`
+
+## AskUser
+
+Purpose:
+
+- Ask the human user for essential missing information, preference, or approval.
+- Use only when the answer cannot be determined from workspace files, available tools, or existing instructions.
+
+Arguments:
+
+- `question`: string, concise question to ask.
+- `context`: optional string, brief explanation of why the question is necessary.
+
+Behavior:
+
+- Writes the question to the interactive terminal and waits for one user answer.
+- Returns an explicit unavailable message instead of blocking when no interactive terminal exists.
+- Not available in ResearchClawBench runs.
+
+## Suggested Usage
+
+- Use `Glob` first for pathname discovery.
+- Use `Grep` first for local text search.
+- Use `Read` for local text files.
+- Use `ReadPDF` for local PDFs.
+- Use `ReadImage` for local images.
+- Use `Edit` for targeted file changes.
+- Use `Write` for full-file writes.
+- Use `Bash` for one-shot system commands.
+- Use `AskUser` only when a human answer is genuinely necessary.
+- Use `Terminal*` only when persistent interactive shell state is actually needed.
+- Route pure Python analysis through `Bash` rather than introducing a separate Python tool.
diff --git a/agent_base/tools/__init__.py b/agent_base/tools/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..66fb1c1fb1d1cc74fc48038bc754fd18d7fe8f53
--- /dev/null
+++ b/agent_base/tools/__init__.py
@@ -0,0 +1,49 @@
+from importlib import import_module
+
+__all__ = [
+    "Bash",
+    "AskUser",
+    "Edit",
+    "Glob",
+    "Grep",
+    "Read",
+    "ReadImage",
+    "ReadPDF",
+    "ScholarSearch",
+    "TerminalInterrupt",
+    "TerminalKill",
+    "TerminalRead",
+    "TerminalStart",
+    "TerminalWrite",
+    "WebFetch",
+    "WebSearch",
+    "Write",
+]
+
+_EXPORT_TO_MODULE = {
+    "Bash": "agent_base.tools.tool_runtime",
+    "AskUser": "agent_base.tools.tool_user",
+    "Edit": "agent_base.tools.tool_file",
+    "Glob": "agent_base.tools.tool_file",
+    "Grep": "agent_base.tools.tool_file",
+    "Read": "agent_base.tools.tool_file",
+    "ReadImage": "agent_base.tools.tool_file",
+    "ReadPDF": "agent_base.tools.tool_file",
+    "ScholarSearch": "agent_base.tools.tool_web",
+    "TerminalInterrupt": "agent_base.tools.tool_runtime",
+    "TerminalKill": "agent_base.tools.tool_runtime",
+    "TerminalRead": "agent_base.tools.tool_runtime",
+    "TerminalStart": "agent_base.tools.tool_runtime",
+    "TerminalWrite": "agent_base.tools.tool_runtime",
+    "WebFetch": "agent_base.tools.tool_web",
+    "WebSearch": "agent_base.tools.tool_web",
+    "Write": "agent_base.tools.tool_file",
+}
+
+
+def __getattr__(name: str):
+    module_name = _EXPORT_TO_MODULE.get(name)
+    if module_name is None:
+        raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
+    module = import_module(module_name)
+    return getattr(module, name)
diff --git a/agent_base/tools/tool_file.py b/agent_base/tools/tool_file.py
new file mode 100644
index 0000000000000000000000000000000000000000..b7f74a78c0eeb57ef0847da858d51328acd6bc42
--- /dev/null
+++ b/agent_base/tools/tool_file.py
@@ -0,0 +1,933 @@
+import argparse
+import base64
+import io
+import os
+import re
+import sys
+from pathlib import Path
+from typing import Any, Optional, Union
+
+from PIL import Image
+
+from agent_base.tools.tooling import ToolBase, normalize_base_root, validate_tool_path, workspace_root
+from agent_base.utils import PROJECT_ROOT, load_dotenv, read_text_lossy
+
+
+IMAGE_SUFFIXES = {
+    ".png",
+    ".jpg",
+    ".jpeg",
+    ".gif",
+    ".bmp",
+    ".webp",
+    ".tif",
+    ".tiff",
+}
+
+DEFAULT_LLM_IMAGE_MAX_EDGE = 1568
+DEFAULT_LLM_IMAGE_MAX_BYTES = 512 * 1024
+DEFAULT_LLM_IMAGE_JPEG_QUALITY = 85
+MIN_LLM_IMAGE_JPEG_QUALITY = 45
+MIN_LLM_IMAGE_EDGE = 256
+DEFAULT_GLOB_MAX_RESULTS = 200
+DEFAULT_GREP_MAX_RESULTS = 100
+DEFAULT_GREP_MAX_CHARS = 20000
+
+
+def resolve_file_path(path_value: str, *, base_root: Optional[Path] = None) -> Path:
+    path = Path(path_value).expanduser()
+    root = normalize_base_root(base_root)
+    if path.is_absolute():
+        return validate_tool_path(path, "Read access", base_root=root)
+
+    direct_candidate = root / path
+    if direct_candidate.exists():
+        return validate_tool_path(direct_candidate.resolve(), "Read access", base_root=root)
+
+    if base_root is None and path.exists():
+        return validate_tool_path(path.resolve(), "Read access", base_root=root)
+
+    return validate_tool_path((root / path).resolve(strict=False), "Read access", base_root=root)
+
+
+def resolve_search_root(path_value: str, *, base_root: Optional[Path] = None) -> Path:
+    path = Path(path_value).expanduser()
+    root = normalize_base_root(base_root)
+    if path.is_absolute():
+        return validate_tool_path(path, "Search access", base_root=root)
+    return validate_tool_path(root / path, "Search access", base_root=root)
+
+
+def _is_probably_binary(path: Path, *, sample_size: int = 4096) -> bool:
+    try:
+        sample = path.read_bytes()[:sample_size]
+    except OSError:
+        return False
+    return b"\x00" in sample
+
+
+class Read(ToolBase):
+    name = "Read"
+    description = "Read a local text file with support for partial line-range reads and output truncation."
+    parameters = {
+        "type": "object",
+        "properties": {
+            "path": {
+                "type": "string",
+                "description": "The local file path to read.",
+            },
+            "start_line": {
+                "type": "integer",
+                "description": "Optional 1-based start line for partial reading. Default is 1.",
+            },
+            "end_line": {
+                "type": "integer",
+                "description": "Optional 1-based end line for partial reading. If omitted, read to the end.",
+            },
+            "max_chars": {
+                "type": "integer",
+                "description": "Maximum number of characters to return. Default is 20000.",
+            },
+        },
+        "required": ["path"],
+    }
+
+    def __init__(self, cfg: Optional[dict] = None):
+        super().__init__(cfg)
+
+    def _read_text_file(self, path: Path) -> str:
+        return read_text_lossy(path)
+
+    def call(self, params: Union[str, dict], **kwargs) -> str:
+        try:
+            params = self.parse_json_args(params)
+        except ValueError as exc:
+            return f"[Read] {exc}"
+        base_root = kwargs.get("workspace_root")
+
+        start_line_raw = params.get("start_line", 1)
+        end_line_raw = params.get("end_line")
+        max_chars_raw = params.get("max_chars", 20000)
+        try:
+            start_line = int(start_line_raw)
+            end_line = end_line_raw
+            end_line = int(end_line) if end_line is not None else None
+            max_chars = int(max_chars_raw)
+        except (TypeError, ValueError):
+            return "[Read] start_line, end_line, and max_chars must be integers when provided."
+        try:
+            path = resolve_file_path(params["path"], base_root=base_root)
+        except ValueError as exc:
+            return f"[Read] Blocked or invalid path: {exc}"
+
+        if not path.exists():
+            return f"[Read] File not found: {path}"
+        if not path.is_file():
+            return f"[Read] Path is not a file: {path}"
+        if path.suffix.lower() == ".pdf":
+            return f"[Read] PDF files are not supported by Read. Use ReadPDF instead: {path}"
+        if path.suffix.lower() in IMAGE_SUFFIXES:
+            return f"[Read] Image files are not supported by Read. Use ReadImage instead: {path}"
+        if start_line < 1:
+            return "[Read] start_line must be >= 1."
+        if end_line is not None and end_line < start_line:
+            return "[Read] end_line must be >= start_line."
+        if max_chars <= 0:
+            return "[Read] max_chars must be > 0."
+
+        try:
+            text = self._read_text_file(path)
+        except OSError as exc:
+            return f"[Read] Error reading file: {exc}"
+
+        lines = text.splitlines()
+        selected = lines[start_line - 1:end_line]
+        content = "\n".join(selected)
+
+        truncated = False
+        if len(content) > max_chars:
+            content = content[:max_chars]
+            truncated = True
+
+        meta = [
+            f"path: {path}",
+            "source_type: text",
+            f"start_line: {start_line}",
+            f"end_line: {end_line if end_line is not None else len(lines)}",
+            f"total_lines: {len(lines)}",
+            f"truncated: {str(truncated).lower()}",
+        ]
+        return "\n".join(meta) + "\ncontent:\n" + content
+
+
+class ReadPDF(ToolBase):
+    name = "ReadPDF"
+    description = "Read a local PDF file and return extracted text. When the PDF parser extracts local image assets, also return their local paths so downstream steps can inspect the actual figure files with ReadImage."
+    parameters = {
+        "type": "object",
+        "properties": {
+            "path": {
+                "type": "string",
+                "description": "The local PDF path to read. Relative paths are resolved from the current workspace.",
+            },
+            "max_chars": {
+                "type": "integer",
+                "description": "Maximum number of characters to return. Default is 20000.",
+            },
+            "max_image_paths": {
+                "type": "integer",
+                "description": "Maximum number of extracted image paths to list. Default is 20.",
+            },
+        },
+        "required": ["path"],
+    }
+
+    def __init__(self, cfg: Optional[dict] = None):
+        super().__init__(cfg)
+
+    def call(self, params: Union[str, dict], **kwargs) -> str:
+        try:
+            params = self.parse_json_args(params)
+        except ValueError as exc:
+            return f"[ReadPDF] {exc}"
+        base_root = kwargs.get("workspace_root")
+
+        try:
+            max_chars = int(params.get("max_chars", 20000))
+            max_image_paths = int(params.get("max_image_paths", 20))
+        except (TypeError, ValueError):
+            return "[ReadPDF] max_chars and max_image_paths must be integers."
+        try:
+            path = resolve_file_path(params["path"], base_root=base_root)
+        except ValueError as exc:
+            return f"[ReadPDF] Blocked or invalid path: {exc}"
+
+        if not path.exists():
+            return f"[ReadPDF] File not found: {path}"
+        if not path.is_file():
+            return f"[ReadPDF] Path is not a file: {path}"
+        if path.suffix.lower() != ".pdf":
+            return f"[ReadPDF] File is not a PDF: {path}"
+        if max_chars <= 0:
+            return "[ReadPDF] max_chars must be > 0."
+        if max_image_paths <= 0:
+            return "[ReadPDF] max_image_paths must be > 0."
+
+        try:
+            from structai import read_pdf as structai_read_pdf
+        except ImportError:
+            return "[ReadPDF] Missing required dependency: structai. Install requirements and configure MINERU_TOKEN to enable PDF reading."
+
+        try:
+            result = structai_read_pdf(str(path))
+            if isinstance(result, list):
+                result = result[0] if result else None
+            if not isinstance(result, dict):
+                raise ValueError(f"unexpected pdf result type: {type(result)}")
+            text = result.get("text", "")
+            if not isinstance(text, str):
+                raise ValueError("PDF text must be a string")
+            raw_img_paths = result.get("img_paths", []) or []
+            if not isinstance(raw_img_paths, list):
+                raise ValueError("PDF img_paths must be a list when present")
+            if not text.strip() and not raw_img_paths:
+                raise ValueError("PDF text is empty and no extracted images were found")
+        except (OSError, ValueError, TypeError) as exc:
+            return f"[ReadPDF] Error reading PDF: {exc}"
+
+        resolved_img_paths: list[str] = []
+        for raw_img_path in raw_img_paths:
+            if not isinstance(raw_img_path, str) or not raw_img_path.strip():
+                continue
+            candidate = Path(raw_img_path).expanduser()
+            if not candidate.is_absolute():
+                candidate = (path.parent / candidate).resolve()
+            try:
+                validated = validate_tool_path(candidate, "ReadPDF extracted image access", base_root=base_root)
+            except ValueError:
+                continue
+            resolved_img_paths.append(str(validated))
+
+        truncated = len(text) > max_chars
+        content = text[:max_chars] if truncated else text
+        line_count = len(text.splitlines())
+        listed_img_paths = resolved_img_paths[:max_image_paths]
+        img_paths_truncated = len(resolved_img_paths) > len(listed_img_paths)
+        meta = [
+            f"path: {path}",
+            "source_type: pdf",
+            f"total_lines: {line_count}",
+            f"truncated: {str(truncated).lower()}",
+            f"image_count: {len(resolved_img_paths)}",
+            f"image_paths_listed: {len(listed_img_paths)}",
+            f"image_paths_truncated: {str(img_paths_truncated).lower()}",
+        ]
+        output = "\n".join(meta)
+        if listed_img_paths:
+            output += "\nimage_paths:\n" + "\n".join(listed_img_paths)
+        return output + "\ncontent:\n" + content
+
+
+class ReadImage(ToolBase):
+    name = "ReadImage"
+    description = "Read a local image file and return metadata. In the main agent runtime, the image is attached to the llm api request as an image content part instead of being inlined as ordinary conversation text."
+    parameters = {
+        "type": "object",
+        "properties": {
+            "path": {
+                "type": "string",
+                "description": "The local image path to read. Relative paths are resolved from the current workspace.",
+            },
+        },
+        "required": ["path"],
+    }
+
+    def __init__(self, cfg: Optional[dict] = None):
+        super().__init__(cfg)
+
+    def _build_llm_attachment(self, image: Image.Image) -> tuple[bytes, int, int]:
+        max_edge = int(os.getenv("LLM_IMAGE_MAX_EDGE", str(DEFAULT_LLM_IMAGE_MAX_EDGE)))
+        max_bytes = int(os.getenv("LLM_IMAGE_MAX_BYTES", str(DEFAULT_LLM_IMAGE_MAX_BYTES)))
+        quality = int(os.getenv("LLM_IMAGE_JPEG_QUALITY", str(DEFAULT_LLM_IMAGE_JPEG_QUALITY)))
+
+        attachment = image.copy()
+        if max(attachment.size) > max_edge:
+            attachment.thumbnail((max_edge, max_edge), Image.Resampling.LANCZOS)
+        if attachment.mode not in {"RGB", "L"}:
+            attachment = attachment.convert("RGB")
+
+        payload = b""
+        while True:
+            current_quality = quality
+            while True:
+                buffer = io.BytesIO()
+                attachment.save(buffer, format="JPEG", quality=current_quality, optimize=True)
+                payload = buffer.getvalue()
+                if len(payload) <= max_bytes:
+                    return payload, attachment.size[0], attachment.size[1]
+                if current_quality <= MIN_LLM_IMAGE_JPEG_QUALITY:
+                    break
+                current_quality = max(current_quality - 10, MIN_LLM_IMAGE_JPEG_QUALITY)
+
+            width, height = attachment.size
+            if max(width, height) <= MIN_LLM_IMAGE_EDGE:
+                raise ValueError(
+                    f"compressed image attachment still exceeds LLM_IMAGE_MAX_BYTES={max_bytes}"
+                )
+
+            shrink_ratio = 0.85
+            next_width = max(int(width * shrink_ratio), MIN_LLM_IMAGE_EDGE)
+            next_height = max(int(height * shrink_ratio), MIN_LLM_IMAGE_EDGE)
+            if (next_width, next_height) == (width, height):
+                raise ValueError(
+                    f"compressed image attachment still exceeds LLM_IMAGE_MAX_BYTES={max_bytes}"
+                )
+            attachment = attachment.resize((next_width, next_height), Image.Resampling.LANCZOS)
+
+    def _read_image_artifact(self, params: Union[str, dict], **kwargs) -> Union[str, dict[str, Any]]:
+        try:
+            params = self.parse_json_args(params)
+        except ValueError as exc:
+            return f"[ReadImage] {exc}"
+        base_root = kwargs.get("workspace_root")
+
+        try:
+            path = resolve_file_path(params["path"], base_root=base_root)
+        except ValueError as exc:
+            return f"[ReadImage] Blocked or invalid path: {exc}"
+
+        if not path.exists():
+            return f"[ReadImage] File not found: {path}"
+        if not path.is_file():
+            return f"[ReadImage] Path is not a file: {path}"
+
+        try:
+            with Image.open(path) as image:
+                image.load()
+                format_name = image.format or "unknown"
+                width, height = image.size
+                mode = image.mode
+                image_bytes = path.read_bytes()
+                attachment_bytes, attachment_width, attachment_height = self._build_llm_attachment(image)
+        except (OSError, ValueError) as exc:
+            return f"[ReadImage] Error reading image: {exc}"
+
+        mime_type = Image.MIME.get(format_name.upper(), None) if isinstance(format_name, str) else None
+        if not mime_type:
+            suffix = path.suffix.lower()
+            if suffix in {".jpg", ".jpeg"}:
+                mime_type = "image/jpeg"
+            elif suffix == ".png":
+                mime_type = "image/png"
+            elif suffix == ".gif":
+                mime_type = "image/gif"
+            elif suffix == ".webp":
+                mime_type = "image/webp"
+            elif suffix in {".tif", ".tiff"}:
+                mime_type = "image/tiff"
+            elif suffix == ".bmp":
+                mime_type = "image/bmp"
+            else:
+                mime_type = "application/octet-stream"
+
+        encoded = base64.b64encode(attachment_bytes).decode("ascii")
+        data_url = f"data:image/jpeg;base64,{encoded}"
+        return {
+            "kind": "image_tool_result",
+            "path": str(path),
+            "source_type": "image",
+            "format": format_name,
+            "mode": mode,
+            "width": width,
+            "height": height,
+            "mime_type": mime_type,
+            "byte_count": len(image_bytes),
+            "llm_attachment_format": "JPEG",
+            "llm_attachment_width": attachment_width,
+            "llm_attachment_height": attachment_height,
+            "llm_attachment_byte_count": len(attachment_bytes),
+            "data_url": data_url,
+        }
+
+    @staticmethod
+    def _metadata_text(artifact: dict[str, Any]) -> str:
+        meta = [
+            f"path: {artifact['path']}",
+            f"source_type: {artifact['source_type']}",
+            f"format: {artifact['format']}",
+            f"mime_type: {artifact['mime_type']}",
+            f"mode: {artifact['mode']}",
+            f"width: {artifact['width']}",
+            f"height: {artifact['height']}",
+            f"byte_count: {artifact['byte_count']}",
+            f"llm_attachment_format: {artifact['llm_attachment_format']}",
+            f"llm_attachment_width: {artifact['llm_attachment_width']}",
+            f"llm_attachment_height: {artifact['llm_attachment_height']}",
+            f"llm_attachment_byte_count: {artifact['llm_attachment_byte_count']}",
+            "llm_image_attached: true",
+        ]
+        return "\n".join(meta)
+
+    def call(self, params: Union[str, dict], **kwargs) -> str:
+        artifact = self._read_image_artifact(params, **kwargs)
+        if isinstance(artifact, str):
+            return artifact
+        return self._metadata_text(artifact)
+
+    def call_for_llm(self, params: Union[str, dict], **kwargs) -> Union[str, dict[str, Any]]:
+        artifact = self._read_image_artifact(params, **kwargs)
+        if isinstance(artifact, str):
+            return artifact
+        return {
+            "kind": "image_tool_result",
+            "text": self._metadata_text(artifact),
+            "path": artifact["path"],
+            "source_type": artifact["source_type"],
+            "format": artifact["format"],
+            "mime_type": artifact["mime_type"],
+            "mode": artifact["mode"],
+            "width": artifact["width"],
+            "height": artifact["height"],
+            "byte_count": artifact["byte_count"],
+            "llm_attachment_format": artifact["llm_attachment_format"],
+            "llm_attachment_width": artifact["llm_attachment_width"],
+            "llm_attachment_height": artifact["llm_attachment_height"],
+            "llm_attachment_byte_count": artifact["llm_attachment_byte_count"],
+            "image_url": artifact["data_url"],
+        }
+
+
+class Glob(ToolBase):
+    name = "Glob"
+    description = "Find local files or directories by glob pattern inside the workspace."
+    parameters = {
+        "type": "object",
+        "properties": {
+            "pattern": {
+                "type": "string",
+                "description": "A pathlib-style glob pattern such as '**/*.py' or '*.md'.",
+            },
+            "path": {
+                "type": "string",
+                "description": "Optional search root. Defaults to the current workspace root.",
+            },
+            "include_dirs": {
+                "type": "boolean",
+                "description": "Whether to include directories in results. Default is false.",
+            },
+            "max_results": {
+                "type": "integer",
+                "description": "Maximum number of matched paths to return. Default is 200.",
+            },
+        },
+        "required": ["pattern"],
+    }
+
+    def __init__(self, cfg: Optional[dict] = None):
+        super().__init__(cfg)
+
+    def call(self, params: Union[str, dict], **kwargs) -> str:
+        try:
+            params = self.parse_json_args(params)
+        except ValueError as exc:
+            return f"[Glob] {exc}"
+        base_root = kwargs.get("workspace_root")
+
+        pattern = params["pattern"].strip()
+        if not pattern:
+            return "[Glob] pattern must be a non-empty string."
+
+        search_root_value = str(params.get("path", "."))
+        include_dirs = bool(params.get("include_dirs", False))
+        try:
+            max_results = int(params.get("max_results", DEFAULT_GLOB_MAX_RESULTS))
+        except (TypeError, ValueError):
+            return "[Glob] max_results must be an integer."
+        if max_results <= 0:
+            return "[Glob] max_results must be > 0."
+
+        try:
+            search_root = resolve_search_root(search_root_value, base_root=base_root)
+        except ValueError as exc:
+            return f"[Glob] Blocked or invalid path: {exc}"
+
+        if not search_root.exists():
+            return f"[Glob] Search root not found: {search_root}"
+        if not search_root.is_dir():
+            return f"[Glob] Search root is not a directory: {search_root}"
+
+        try:
+            raw_matches = sorted(search_root.glob(pattern))
+        except (OSError, ValueError) as exc:
+            return f"[Glob] Invalid glob pattern or filesystem error: {exc}"
+
+        matches: list[str] = []
+        truncated = False
+        for candidate in raw_matches:
+            try:
+                resolved = validate_tool_path(candidate.resolve(strict=False), "Glob access", base_root=base_root or search_root)
+            except ValueError:
+                continue
+            if resolved.is_dir() and not include_dirs:
+                continue
+            if resolved.is_file() or (include_dirs and resolved.is_dir()):
+                matches.append(str(resolved))
+            if len(matches) >= max_results:
+                truncated = len(raw_matches) > max_results
+                break
+
+        meta = [
+            f"root: {search_root}",
+            f"pattern: {pattern}",
+            f"include_dirs: {str(include_dirs).lower()}",
+            f"match_count: {len(matches)}",
+            f"truncated: {str(truncated).lower()}",
+        ]
+        if not matches:
+            return "\n".join(meta) + "\nresults:\n"
+        return "\n".join(meta) + "\nresults:\n" + "\n".join(matches)
+
+
+class Grep(ToolBase):
+    name = "Grep"
+    description = "Search local text files for a regex pattern and return matching lines with file paths and line numbers."
+    parameters = {
+        "type": "object",
+        "properties": {
+            "pattern": {
+                "type": "string",
+                "description": "A regular expression pattern to search for.",
+            },
+            "path": {
+                "type": "string",
+                "description": "Optional file or directory path to search. Defaults to the current workspace root.",
+            },
+            "glob": {
+                "type": "string",
+                "description": "Optional pathlib-style glob filter used when searching a directory. Default is '**/*'.",
+            },
+            "case_sensitive": {
+                "type": "boolean",
+                "description": "Whether the regex match should be case-sensitive. Default is false.",
+            },
+            "max_results": {
+                "type": "integer",
+                "description": "Maximum number of matching lines to return. Default is 100.",
+            },
+            "max_chars": {
+                "type": "integer",
+                "description": "Maximum number of characters to return. Default is 20000.",
+            },
+        },
+        "required": ["pattern"],
+    }
+
+    def __init__(self, cfg: Optional[dict] = None):
+        super().__init__(cfg)
+
+    def _iter_candidate_files(self, root: Path, glob_pattern: str, *, base_root: Optional[Path]) -> list[Path]:
+        if root.is_file():
+            return [root]
+        candidates: list[Path] = []
+        for candidate in root.glob(glob_pattern):
+            try:
+                resolved = validate_tool_path(candidate.resolve(strict=False), "Grep access", base_root=base_root or root)
+            except ValueError:
+                continue
+            if resolved.is_file():
+                candidates.append(resolved)
+        return sorted(candidates)
+
+    def call(self, params: Union[str, dict], **kwargs) -> str:
+        try:
+            params = self.parse_json_args(params)
+        except ValueError as exc:
+            return f"[Grep] {exc}"
+        base_root = kwargs.get("workspace_root")
+
+        pattern = params["pattern"].strip()
+        if not pattern:
+            return "[Grep] pattern must be a non-empty string."
+
+        search_root_value = str(params.get("path", "."))
+        glob_pattern = str(params.get("glob", "**/*")).strip() or "**/*"
+        case_sensitive = bool(params.get("case_sensitive", False))
+        try:
+            max_results = int(params.get("max_results", DEFAULT_GREP_MAX_RESULTS))
+            max_chars = int(params.get("max_chars", DEFAULT_GREP_MAX_CHARS))
+        except (TypeError, ValueError):
+            return "[Grep] max_results and max_chars must be integers."
+        if max_results <= 0:
+            return "[Grep] max_results must be > 0."
+        if max_chars <= 0:
+            return "[Grep] max_chars must be > 0."
+
+        flags = 0 if case_sensitive else re.IGNORECASE
+        try:
+            compiled = re.compile(pattern, flags)
+        except re.error as exc:
+            return f"[Grep] Invalid regex pattern: {exc}"
+
+        try:
+            search_root = resolve_search_root(search_root_value, base_root=base_root)
+        except ValueError as exc:
+            return f"[Grep] Blocked or invalid path: {exc}"
+
+        if not search_root.exists():
+            return f"[Grep] Search root not found: {search_root}"
+        if not search_root.is_file() and not search_root.is_dir():
+            return f"[Grep] Search root is not a file or directory: {search_root}"
+
+        matches: list[str] = []
+        files_scanned = 0
+        truncated = False
+        for candidate in self._iter_candidate_files(search_root, glob_pattern, base_root=base_root):
+            if candidate.suffix.lower() == ".pdf" or candidate.suffix.lower() in IMAGE_SUFFIXES:
+                continue
+            if _is_probably_binary(candidate):
+                continue
+            try:
+                with candidate.open("r", encoding="utf-8", errors="replace") as handle:
+                    files_scanned += 1
+                    for line_index, raw_line in enumerate(handle, start=1):
+                        line = raw_line.rstrip("\n")
+                        if not compiled.search(line):
+                            continue
+                        entry = f"{candidate}:{line_index}: {line}"
+                        projected_length = len("\n".join(matches + [entry]))
+                        if projected_length > max_chars:
+                            truncated = True
+                            break
+                        matches.append(entry)
+                        if len(matches) >= max_results:
+                            truncated = True
+                            break
+            except OSError:
+                continue
+            if truncated:
+                break
+
+        body = "\n".join(matches)
+
+        meta = [
+            f"root: {search_root}",
+            f"pattern: {pattern}",
+            f"glob: {glob_pattern}",
+            f"case_sensitive: {str(case_sensitive).lower()}",
+            f"files_scanned: {files_scanned}",
+            f"match_count: {len(matches)}",
+            f"truncated: {str(truncated).lower()}",
+        ]
+        if not body:
+            return "\n".join(meta) + "\nresults:\n"
+        return "\n".join(meta) + "\nresults:\n" + body
+
+
+class Write(ToolBase):
+    name = "Write"
+    description = "Create a local text file with full content. Parent directories are created automatically."
+    parameters = {
+        "type": "object",
+        "properties": {
+            "path": {
+                "type": "string",
+                "description": "The local file path to create.",
+            },
+            "content": {
+                "type": "string",
+                "description": "The full file content to write.",
+            },
+            "overwrite": {
+                "type": "boolean",
+                "description": "Whether to overwrite an existing file. Default is false.",
+            },
+        },
+        "required": ["path", "content"],
+    }
+
+    def __init__(self, cfg: Optional[dict] = None):
+        super().__init__(cfg)
+
+    def call(self, params: Union[str, dict], **kwargs) -> str:
+        try:
+            params = self.parse_json_args(params)
+            base_root = kwargs.get("workspace_root") or workspace_root()
+            path = validate_tool_path(params["path"], "Write access", base_root=base_root)
+        except ValueError as exc:
+            return f"[Write] {exc}"
+
+        content = params["content"]
+        overwrite = bool(params.get("overwrite", False))
+
+        if path.exists() and not overwrite:
+            return f"[Write] File already exists and overwrite is false: {path}"
+
+        try:
+            path.parent.mkdir(parents=True, exist_ok=True)
+            path.write_text(content, encoding="utf-8")
+            return f"[Write] Wrote file: {path}"
+        except OSError as exc:
+            return f"[Write] Error writing file: {exc}"
+
+
+class Edit(ToolBase):
+    name = "Edit"
+    description = "Edit a local text file using unified diff style hunks. The patch must describe the exact line-level changes to apply."
+    parameters = {
+        "type": "object",
+        "properties": {
+            "path": {
+                "type": "string",
+                "description": "The local file path to edit.",
+            },
+            "patch": {
+                "type": "string",
+                "description": "A unified diff style patch containing one or more hunks for this file. Include hunk headers such as @@ -1,2 +1,2 @@.",
+            },
+        },
+        "required": ["path", "patch"],
+    }
+
+    def __init__(self, cfg: Optional[dict] = None):
+        super().__init__(cfg)
+
+    def _parse_unified_patch(self, patch_text: str) -> list[dict]:
+        lines = patch_text.splitlines()
+        hunks: list[dict] = []
+        current_hunk = None
+
+        for line in lines:
+            if line.startswith("--- ") or line.startswith("+++ "):
+                continue
+            if line.startswith("@@ "):
+                if current_hunk is not None:
+                    hunks.append(current_hunk)
+                current_hunk = {"header": line, "lines": []}
+                continue
+            if current_hunk is None:
+                continue
+            if line.startswith((" ", "+", "-")):
+                current_hunk["lines"].append((line[:1], line[1:]))
+                continue
+            if line == r"\ No newline at end of file":
+                continue
+            raise ValueError(f"unsupported patch line: {line}")
+
+        if current_hunk is not None:
+            hunks.append(current_hunk)
+
+        if not hunks:
+            raise ValueError("no patch hunks found")
+        return hunks
+
+    def _apply_hunks(self, original_text: str, hunks: list[dict]) -> tuple[str, int]:
+        original_lines = original_text.splitlines()
+        original_endswith_newline = original_text.endswith("\n")
+        output_lines: list[str] = []
+        cursor = 0
+
+        for hunk_index, hunk in enumerate(hunks, start=1):
+            hunk_lines = hunk["lines"]
+            old_block = []
+            new_block = []
+            for prefix, content in hunk_lines:
+                if prefix in {" ", "-"}:
+                    old_block.append(content)
+                if prefix in {" ", "+"}:
+                    new_block.append(content)
+
+            start_pos = None
+            max_start = len(original_lines) - len(old_block)
+            for pos in range(cursor, max_start + 1):
+                if original_lines[pos:pos + len(old_block)] == old_block:
+                    start_pos = pos
+                    break
+
+            if start_pos is None:
+                old_preview = "\n".join(old_block)
+                raise ValueError(f"hunk #{hunk_index} context not found:\n{old_preview}")
+
+            output_lines.extend(original_lines[cursor:start_pos])
+            output_lines.extend(new_block)
+            cursor = start_pos + len(old_block)
+
+        output_lines.extend(original_lines[cursor:])
+        updated_text = "\n".join(output_lines)
+        if original_endswith_newline:
+            updated_text += "\n"
+        return updated_text, len(hunks)
+
+    def call(self, params: Union[str, dict], **kwargs) -> str:
+        try:
+            params = self.parse_json_args(params)
+            base_root = kwargs.get("workspace_root") or workspace_root()
+            path = validate_tool_path(params["path"], "Edit access", base_root=base_root)
+        except ValueError as exc:
+            return f"[Edit] {exc}"
+
+        patch_text = str(params["patch"])
+
+        if not path.exists():
+            return f"[Edit] File not found: {path}"
+        if not path.is_file():
+            return f"[Edit] Path is not a file: {path}"
+        if not patch_text.strip():
+            return "[Edit] 'patch' must be a non-empty unified diff string."
+
+        try:
+            text = read_text_lossy(path)
+        except OSError as exc:
+            return f"[Edit] Error reading file: {exc}"
+
+        try:
+            hunks = self._parse_unified_patch(patch_text)
+            updated, applied = self._apply_hunks(text, hunks)
+        except ValueError as exc:
+            return f"[Edit] Failed to apply patch: {exc}"
+
+        if updated == text:
+            return f"[Edit] No changes applied: {path}"
+
+        try:
+            path.write_text(updated, encoding="utf-8")
+            return f"[Edit] Updated file: {path}; applied_hunks: {applied}"
+        except OSError as exc:
+            return f"[Edit] Error writing file: {exc}"
+
+
+def main(argv: Optional[list[str]] = None) -> int:
+    parser = argparse.ArgumentParser(description="Run local file tools directly.")
+    subparsers = parser.add_subparsers(dest="tool", required=True)
+
+    read_parser = subparsers.add_parser("read", help="Run Read on a text file.")
+    read_parser.add_argument("path")
+    read_parser.add_argument("--start-line", type=int, default=1)
+    read_parser.add_argument("--end-line", type=int)
+    read_parser.add_argument("--max-chars", type=int, default=20000)
+
+    pdf_parser = subparsers.add_parser("pdf", help="Run ReadPDF on a PDF file.")
+    pdf_parser.add_argument("path")
+    pdf_parser.add_argument("--max-chars", type=int, default=20000)
+
+    image_parser = subparsers.add_parser("image", help="Run ReadImage on an image file.")
+    image_parser.add_argument("path")
+
+    glob_parser = subparsers.add_parser("glob", help="Run Glob to find local files or directories.")
+    glob_parser.add_argument("pattern")
+    glob_parser.add_argument("--path", default=".")
+    glob_parser.add_argument("--include-dirs", action="store_true")
+    glob_parser.add_argument("--max-results", type=int, default=DEFAULT_GLOB_MAX_RESULTS)
+
+    grep_parser = subparsers.add_parser("grep", help="Run Grep to search local text files.")
+    grep_parser.add_argument("pattern")
+    grep_parser.add_argument("--path", default=".")
+    grep_parser.add_argument("--glob", default="**/*")
+    grep_parser.add_argument("--case-sensitive", action="store_true")
+    grep_parser.add_argument("--max-results", type=int, default=DEFAULT_GREP_MAX_RESULTS)
+    grep_parser.add_argument("--max-chars", type=int, default=DEFAULT_GREP_MAX_CHARS)
+
+    write_parser = subparsers.add_parser("write", help="Run Write on a text file.")
+    write_parser.add_argument("path")
+    write_parser.add_argument("content")
+    write_parser.add_argument("--overwrite", action="store_true")
+
+    edit_parser = subparsers.add_parser("edit", help="Run Edit on a text file.")
+    edit_parser.add_argument("path")
+    edit_parser.add_argument("patch")
+
+    parser.add_argument("--workspace-root", help="Optional workspace root override.")
+    args = parser.parse_args(argv)
+
+    load_dotenv(PROJECT_ROOT / ".env")
+    workspace_root = Path(args.workspace_root).expanduser().resolve() if args.workspace_root else None
+
+    if args.tool == "read":
+        result = Read().call(
+            {
+                "path": args.path,
+                "start_line": args.start_line,
+                "end_line": args.end_line,
+                "max_chars": args.max_chars,
+            },
+            workspace_root=workspace_root,
+        )
+    elif args.tool == "pdf":
+        result = ReadPDF().call({"path": args.path, "max_chars": args.max_chars}, workspace_root=workspace_root)
+    elif args.tool == "image":
+        result = ReadImage().call({"path": args.path}, workspace_root=workspace_root)
+    elif args.tool == "glob":
+        result = Glob().call(
+            {
+                "pattern": args.pattern,
+                "path": args.path,
+                "include_dirs": args.include_dirs,
+                "max_results": args.max_results,
+            },
+            workspace_root=workspace_root,
+        )
+    elif args.tool == "grep":
+        result = Grep().call(
+            {
+                "pattern": args.pattern,
+                "path": args.path,
+                "glob": args.glob,
+                "case_sensitive": args.case_sensitive,
+                "max_results": args.max_results,
+                "max_chars": args.max_chars,
+            },
+            workspace_root=workspace_root,
+        )
+    elif args.tool == "write":
+        result = Write().call(
+            {"path": args.path, "content": args.content, "overwrite": args.overwrite},
+            workspace_root=workspace_root,
+        )
+    else:
+        result = Edit().call({"path": args.path, "patch": args.patch}, workspace_root=workspace_root)
+
+    print(result)
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main(sys.argv[1:]))
diff --git a/agent_base/tools/tool_runtime.py b/agent_base/tools/tool_runtime.py
new file mode 100644
index 0000000000000000000000000000000000000000..ed1fd1d4f408793d8ac9592e175529dc54e1b982
--- /dev/null
+++ b/agent_base/tools/tool_runtime.py
@@ -0,0 +1,732 @@
+import argparse
+import atexit
+import itertools
+import os
+import pty
+import re
+import select
+import shutil
+import signal
+import struct
+import subprocess
+import termios
+import threading
+import time
+from pathlib import Path
+from typing import Optional, Union
+import sys
+
+from agent_base.utils import PROJECT_ROOT, load_dotenv
+from agent_base.tools.tooling import (
+    ToolBase,
+    command_safety_issue,
+    sanitized_subprocess_env,
+    validate_tool_path,
+    workspace_root,
+)
+
+
+DEFAULT_BUFFER_LIMIT = 200000
+DEFAULT_OUTPUT_CHARS = 20000
+DEFAULT_YIELD_MS = 200
+REPEAT_COLLAPSE_THRESHOLD = 3
+
+def _default_shell() -> str:
+    return shutil.which("bash") or "/bin/bash"
+
+
+def _resolve_cwd(path_value: Optional[str], *, base_root: Optional[Path] = None) -> Path:
+    if not path_value:
+        return (base_root or workspace_root()).resolve()
+    return validate_tool_path(path_value, "Working directory", base_root=base_root)
+
+
+def _set_terminal_size(fd: int, rows: int, cols: int) -> None:
+    winsize = struct.pack("HHHH", rows, cols, 0, 0)
+    try:
+        import fcntl
+
+        fcntl.ioctl(fd, termios.TIOCSWINSZ, winsize)
+    except (ImportError, OSError):
+        return
+
+
+def _disable_echo(fd: int) -> None:
+    try:
+        attrs = termios.tcgetattr(fd)
+        attrs[3] &= ~termios.ECHO
+        termios.tcsetattr(fd, termios.TCSANOW, attrs)
+    except termios.error:
+        return
+
+
+def _collapse_repeated_lines(text: str, *, threshold: int = REPEAT_COLLAPSE_THRESHOLD) -> str:
+    if not text:
+        return text
+    lines = text.splitlines(keepends=True)
+    if not lines:
+        return text
+    collapsed: list[str] = []
+    current = lines[0]
+    count = 1
+    for line in lines[1:]:
+        if line == current:
+            count += 1
+            continue
+        if count >= threshold:
+            collapsed.append(current)
+            collapsed.append(f"[previous line repeated {count - 1} additional times]\n")
+        else:
+            collapsed.extend([current] * count)
+        current = line
+        count = 1
+    if count >= threshold:
+        collapsed.append(current)
+        collapsed.append(f"[previous line repeated {count - 1} additional times]\n")
+    else:
+        collapsed.extend([current] * count)
+    return "".join(collapsed)
+
+
+def _bounded_output(text: str, *, max_output_chars: int = DEFAULT_OUTPUT_CHARS) -> str:
+    if not text:
+        return text
+    compressed = _collapse_repeated_lines(text)
+    if len(compressed) <= max_output_chars:
+        return compressed
+    omitted = len(compressed) - max_output_chars
+    suffix = f"\n[output truncated: omitted {omitted} chars]\n"
+    keep = max(0, max_output_chars - len(suffix))
+    return compressed[:keep] + suffix
+
+
+class Bash(ToolBase):
+    name = "Bash"
+    description = (
+        "Run a local bash command and return stdout and stderr. This is the primary local execution tool for "
+        "shell commands, path operations, ripgrep, git, temporary python3 heredoc scripts, parsing, validation, "
+        "and local result transformation."
+    )
+    parameters = {
+        "type": "object",
+        "properties": {
+            "command": {
+                "type": "string",
+                "description": "The shell command to execute.",
+            },
+            "timeout": {
+                "type": "integer",
+                "description": "Timeout in seconds. Default is 30.",
+            },
+            "workdir": {
+                "type": "string",
+                "description": "Optional working directory for the command. Defaults to the current workspace root.",
+            },
+            "max_output_chars": {
+                "type": "integer",
+                "description": f"Maximum combined stdout/stderr characters returned after repeated-line compression. Default is {DEFAULT_OUTPUT_CHARS}.",
+            },
+        },
+        "required": ["command"],
+    }
+
+    def __init__(self, cfg: Optional[dict] = None):
+        super().__init__(cfg)
+
+    def call(self, params: Union[str, dict], **kwargs) -> str:
+        try:
+            params = self.parse_json_args(params)
+        except ValueError as exc:
+            return f"[Bash] {exc}"
+        base_root = kwargs.get("workspace_root")
+        runtime_deadline = kwargs.get("runtime_deadline")
+
+        command = str(params["command"])
+        workdir = params.get("workdir")
+        try:
+            timeout = int(params.get("timeout", 30))
+            max_output_chars = int(params.get("max_output_chars", DEFAULT_OUTPUT_CHARS))
+        except (TypeError, ValueError):
+            return "[Bash] timeout and max_output_chars must be integers."
+
+        issue = command_safety_issue(str(command))
+        if issue:
+            return f"[Bash] Blocked by safety policy: {issue}"
+
+        try:
+            cwd = _resolve_cwd(workdir, base_root=base_root)
+        except ValueError as exc:
+            return f"[Bash] Invalid or blocked working directory: {exc}"
+        if not cwd.exists():
+            return f"[Bash] Working directory does not exist: {cwd}"
+        if not cwd.is_dir():
+            return f"[Bash] Working directory is not a directory: {cwd}"
+        if timeout <= 0:
+            return "[Bash] timeout must be > 0."
+        if max_output_chars <= 0:
+            return "[Bash] max_output_chars must be > 0."
+
+        effective_timeout: float = float(timeout)
+        if runtime_deadline is not None:
+            remaining = float(runtime_deadline) - time.time()
+            if remaining <= 0:
+                return "[Bash] Agent runtime limit reached before command execution."
+            effective_timeout = min(effective_timeout, max(remaining, 0.001))
+
+        try:
+            proc = subprocess.run(
+                command,
+                shell=True,
+                capture_output=True,
+                text=True,
+                timeout=effective_timeout,
+                cwd=str(cwd),
+                env=sanitized_subprocess_env(base_root=base_root),
+                executable=shutil.which("bash") or "/bin/bash",
+            )
+        except subprocess.TimeoutExpired:
+            return "[Bash] TimeoutError: Execution timed out."
+        except (OSError, subprocess.SubprocessError) as exc:
+            return f"[Bash] Error executing command: {exc}"
+
+        parts = [f"exit_code: {proc.returncode}"]
+        stdout = _bounded_output(proc.stdout, max_output_chars=max_output_chars)
+        stderr = _bounded_output(proc.stderr, max_output_chars=max_output_chars)
+        if stdout:
+            parts.append(f"stdout:\n{stdout}")
+        if stderr:
+            parts.append(f"stderr:\n{stderr}")
+        return "\n".join(parts)
+
+class TerminalSession:
+    def __init__(self, cwd: Path, shell: str, rows: int, cols: int, *, base_root: Optional[Path] = None):
+        self.cwd = cwd
+        self.shell = shell
+        self.rows = rows
+        self.cols = cols
+        self._buffer_limit = DEFAULT_BUFFER_LIMIT
+        self._pending_output = ""
+        self._dropped_output_chars = 0
+        self._lock = threading.Lock()
+
+        master_fd, slave_fd = pty.openpty()
+        _set_terminal_size(slave_fd, rows, cols)
+        _disable_echo(slave_fd)
+
+        env = sanitized_subprocess_env(base_root=base_root)
+        env.setdefault("TERM", "xterm-256color")
+        env.setdefault("PS1", "")
+        env.setdefault("PROMPT_COMMAND", "")
+
+        self._proc = subprocess.Popen(
+            [shell, "--noprofile", "--norc"],
+            stdin=slave_fd,
+            stdout=slave_fd,
+            stderr=slave_fd,
+            cwd=str(cwd),
+            env=env,
+            text=False,
+            close_fds=True,
+            start_new_session=True,
+        )
+        os.close(slave_fd)
+        self._master_fd = master_fd
+        self._reader = threading.Thread(target=self._reader_loop, daemon=True)
+        self._reader.start()
+
+    @property
+    def pid(self) -> int:
+        return self._proc.pid
+
+    @property
+    def alive(self) -> bool:
+        return self._proc.poll() is None
+
+    @property
+    def returncode(self) -> Optional[int]:
+        return self._proc.poll()
+
+    def _reader_loop(self) -> None:
+        while True:
+            try:
+                ready, _, _ = select.select([self._master_fd], [], [], 0.1)
+            except (OSError, ValueError):
+                break
+
+            if not ready:
+                if self._proc.poll() is not None:
+                    break
+                continue
+
+            try:
+                data = os.read(self._master_fd, 4096)
+            except OSError:
+                break
+
+            if not data:
+                if self._proc.poll() is not None:
+                    break
+                continue
+
+            decoded = data.decode("utf-8", errors="replace")
+            with self._lock:
+                self._pending_output += decoded
+                overflow = len(self._pending_output) - self._buffer_limit
+                if overflow > 0:
+                    self._pending_output = self._pending_output[overflow:]
+                    self._dropped_output_chars += overflow
+
+        try:
+            os.close(self._master_fd)
+        except OSError:
+            pass
+
+    def write(self, data: str) -> None:
+        if not self.alive:
+            raise RuntimeError("session is not running")
+        os.write(self._master_fd, data.encode("utf-8", errors="replace"))
+
+    def read(self, yield_time_ms: int = DEFAULT_YIELD_MS, max_output_chars: int = DEFAULT_OUTPUT_CHARS) -> dict:
+        if yield_time_ms > 0:
+            time.sleep(yield_time_ms / 1000.0)
+
+        with self._lock:
+            output = self._pending_output[:max_output_chars]
+            self._pending_output = self._pending_output[max_output_chars:]
+            remaining_output_chars = len(self._pending_output)
+            dropped_output_chars = self._dropped_output_chars
+            self._dropped_output_chars = 0
+
+        return {
+            "alive": self.alive,
+            "returncode": self.returncode,
+            "output": output,
+            "remaining_output_chars": remaining_output_chars,
+            "dropped_output_chars": dropped_output_chars,
+            "truncated": remaining_output_chars > 0,
+        }
+
+    def interrupt(self, *, max_output_chars: int = DEFAULT_OUTPUT_CHARS) -> dict:
+        if not self.alive:
+            raise RuntimeError("session is not running")
+        os.write(self._master_fd, b"\x03")
+        return self.read(yield_time_ms=DEFAULT_YIELD_MS, max_output_chars=max_output_chars)
+
+    def terminate(self, force: bool = False) -> Optional[int]:
+        if self.alive:
+            try:
+                os.killpg(os.getpgid(self.pid), signal.SIGKILL if force else signal.SIGTERM)
+            except ProcessLookupError:
+                pass
+            except OSError:
+                self._proc.kill() if force else self._proc.terminate()
+            try:
+                self._proc.wait(timeout=2 if not force else 1)
+            except subprocess.TimeoutExpired:
+                if not force:
+                    return self.terminate(force=True)
+        return self.returncode
+
+
+class TerminalSessionManager:
+    def __init__(self):
+        self._lock = threading.Lock()
+        self._counter = itertools.count(1)
+        self._sessions: dict[str, TerminalSession] = {}
+
+    def start(self, cwd: Path, shell: str, rows: int, cols: int, *, base_root: Optional[Path] = None) -> tuple[str, TerminalSession]:
+        session = TerminalSession(cwd=cwd, shell=shell, rows=rows, cols=cols, base_root=base_root)
+        session_id = f"term_{next(self._counter)}"
+        with self._lock:
+            self._sessions[session_id] = session
+        return session_id, session
+
+    def get(self, session_id: str) -> Optional[TerminalSession]:
+        with self._lock:
+            return self._sessions.get(session_id)
+
+    def pop(self, session_id: str) -> Optional[TerminalSession]:
+        with self._lock:
+            return self._sessions.pop(session_id, None)
+
+    def cleanup(self) -> None:
+        with self._lock:
+            sessions = list(self._sessions.items())
+            self._sessions.clear()
+        for _, session in sessions:
+            session.terminate(force=True)
+
+
+SESSION_MANAGER = TerminalSessionManager()
+atexit.register(SESSION_MANAGER.cleanup)
+
+
+def _format_terminal_response(
+    prefix: str,
+    session_id: str,
+    payload: dict,
+    cwd: Optional[Path] = None,
+    shell: Optional[str] = None,
+    pid: Optional[int] = None,
+) -> str:
+    lines = [prefix, f"session_id: {session_id}"]
+    if pid is not None:
+        lines.append(f"pid: {pid}")
+    if cwd is not None:
+        lines.append(f"cwd: {cwd}")
+    if shell is not None:
+        lines.append(f"shell: {shell}")
+    if "alive" in payload:
+        lines.append(f"alive: {str(payload['alive']).lower()}")
+    if "returncode" in payload:
+        lines.append(f"returncode: {payload['returncode']}")
+    if "truncated" in payload:
+        lines.append(f"truncated: {str(payload['truncated']).lower()}")
+    if "remaining_output_chars" in payload:
+        lines.append(f"remaining_output_chars: {payload['remaining_output_chars']}")
+    if "dropped_output_chars" in payload:
+        lines.append(f"dropped_output_chars: {payload['dropped_output_chars']}")
+    if "output" in payload:
+        lines.append("output:")
+        lines.append(payload["output"])
+    return "\n".join(lines)
+
+
+class TerminalStart(ToolBase):
+    name = "TerminalStart"
+    description = "Start a persistent local terminal session backed by a PTY shell."
+    parameters = {
+        "type": "object",
+        "properties": {
+            "cwd": {
+                "type": "string",
+                "description": "Optional working directory for the terminal session. Default is the current workspace root.",
+            },
+            "shell": {
+                "type": "string",
+                "description": "Optional shell executable path. Default is bash.",
+            },
+            "rows": {
+                "type": "integer",
+                "description": "Terminal row count. Default is 30.",
+            },
+            "cols": {
+                "type": "integer",
+                "description": "Terminal column count. Default is 120.",
+            },
+        },
+        "required": [],
+    }
+
+    def __init__(self, cfg: Optional[dict] = None):
+        super().__init__(cfg)
+
+    def call(self, params: Union[str, dict], **kwargs) -> str:
+        try:
+            params = self.parse_json_args(params)
+        except ValueError as exc:
+            return f"[TerminalStart] {exc}"
+        base_root = kwargs.get("workspace_root")
+        try:
+            cwd = _resolve_cwd(params.get("cwd"), base_root=base_root)
+            shell = params.get("shell") or _default_shell()
+            rows = int(params.get("rows", 30))
+            cols = int(params.get("cols", 120))
+        except ValueError as exc:
+            return f"[TerminalStart] {exc}"
+        except (TypeError, OverflowError):
+            return "[TerminalStart] rows and cols must be integers."
+
+        if not cwd.exists():
+            return f"[TerminalStart] Working directory does not exist: {cwd}"
+        if not cwd.is_dir():
+            return f"[TerminalStart] Working directory is not a directory: {cwd}"
+        if not Path(shell).exists() and shutil.which(shell) is None:
+            return f"[TerminalStart] Shell not found: {shell}"
+        if rows <= 0 or cols <= 0:
+            return "[TerminalStart] rows and cols must both be > 0."
+
+        try:
+            session_id, session = SESSION_MANAGER.start(cwd=cwd, shell=shell, rows=rows, cols=cols, base_root=base_root)
+        except (OSError, RuntimeError, subprocess.SubprocessError) as exc:
+            return f"[TerminalStart] Failed to start terminal session: {exc}"
+
+        return _format_terminal_response(
+            "[TerminalStart] Started terminal session.",
+            session_id=session_id,
+            payload={"alive": session.alive, "returncode": session.returncode},
+            cwd=cwd,
+            shell=shell,
+            pid=session.pid,
+        )
+
+
+class TerminalWrite(ToolBase):
+    name = "TerminalWrite"
+    description = "Write input into an existing terminal session and read back newly produced output."
+    parameters = {
+        "type": "object",
+        "properties": {
+            "session_id": {
+                "type": "string",
+                "description": "The terminal session ID returned by TerminalStart.",
+            },
+            "input": {
+                "type": "string",
+                "description": "The text to send to the terminal session.",
+            },
+            "append_newline": {
+                "type": "boolean",
+                "description": "Whether to append a newline after the provided input. Default is true.",
+            },
+            "yield_time_ms": {
+                "type": "integer",
+                "description": "Milliseconds to wait before reading output. Default is 200.",
+            },
+            "max_output_chars": {
+                "type": "integer",
+                "description": "Maximum number of output characters to return. Default is 20000.",
+            },
+        },
+        "required": ["session_id", "input"],
+    }
+
+    def __init__(self, cfg: Optional[dict] = None):
+        super().__init__(cfg)
+
+    def call(self, params: Union[str, dict], **kwargs) -> str:
+        try:
+            params = self.parse_json_args(params)
+        except ValueError as exc:
+            return f"[TerminalWrite] {exc}"
+
+        session_id = str(params["session_id"])
+        input_text = str(params["input"])
+        append_newline = bool(params.get("append_newline", True))
+        try:
+            yield_time_ms = int(params.get("yield_time_ms", DEFAULT_YIELD_MS))
+            max_output_chars = int(params.get("max_output_chars", DEFAULT_OUTPUT_CHARS))
+        except (TypeError, ValueError):
+            return "[TerminalWrite] yield_time_ms and max_output_chars must be integers."
+
+        issue = command_safety_issue(input_text)
+        if issue:
+            return f"[TerminalWrite] Blocked by safety policy: {issue}"
+
+        session = SESSION_MANAGER.get(session_id)
+        if session is None:
+            return f"[TerminalWrite] Session not found: {session_id}"
+        if max_output_chars <= 0:
+            return "[TerminalWrite] max_output_chars must be > 0."
+        if yield_time_ms < 0:
+            return "[TerminalWrite] yield_time_ms must be >= 0."
+
+        payload_input = input_text + ("\n" if append_newline else "")
+        try:
+            session.write(payload_input)
+            payload = session.read(yield_time_ms=yield_time_ms, max_output_chars=max_output_chars)
+        except (OSError, RuntimeError, subprocess.SubprocessError) as exc:
+            return f"[TerminalWrite] Failed to write to session {session_id}: {exc}"
+
+        return _format_terminal_response("[TerminalWrite] Session updated.", session_id=session_id, payload=payload)
+
+
+class TerminalRead(ToolBase):
+    name = "TerminalRead"
+    description = "Read unread output from an existing terminal session."
+    parameters = {
+        "type": "object",
+        "properties": {
+            "session_id": {
+                "type": "string",
+                "description": "The terminal session ID returned by TerminalStart.",
+            },
+            "yield_time_ms": {
+                "type": "integer",
+                "description": "Milliseconds to wait before reading output. Default is 200.",
+            },
+            "max_output_chars": {
+                "type": "integer",
+                "description": "Maximum number of output characters to return. Default is 20000.",
+            },
+        },
+        "required": ["session_id"],
+    }
+
+    def __init__(self, cfg: Optional[dict] = None):
+        super().__init__(cfg)
+
+    def call(self, params: Union[str, dict], **kwargs) -> str:
+        try:
+            params = self.parse_json_args(params)
+        except ValueError as exc:
+            return f"[TerminalRead] {exc}"
+
+        session_id = str(params["session_id"])
+        try:
+            yield_time_ms = int(params.get("yield_time_ms", DEFAULT_YIELD_MS))
+            max_output_chars = int(params.get("max_output_chars", DEFAULT_OUTPUT_CHARS))
+        except (TypeError, ValueError):
+            return "[TerminalRead] yield_time_ms and max_output_chars must be integers."
+
+        session = SESSION_MANAGER.get(session_id)
+        if session is None:
+            return f"[TerminalRead] Session not found: {session_id}"
+        if max_output_chars <= 0:
+            return "[TerminalRead] max_output_chars must be > 0."
+        if yield_time_ms < 0:
+            return "[TerminalRead] yield_time_ms must be >= 0."
+
+        try:
+            payload = session.read(yield_time_ms=yield_time_ms, max_output_chars=max_output_chars)
+        except (OSError, RuntimeError, subprocess.SubprocessError) as exc:
+            return f"[TerminalRead] Failed to read session {session_id}: {exc}"
+
+        return _format_terminal_response("[TerminalRead] Session output fetched.", session_id=session_id, payload=payload)
+
+
+class TerminalKill(ToolBase):
+    name = "TerminalKill"
+    description = "Terminate an existing terminal session and release its resources."
+    parameters = {
+        "type": "object",
+        "properties": {
+            "session_id": {
+                "type": "string",
+                "description": "The terminal session ID returned by TerminalStart.",
+            },
+            "force": {
+                "type": "boolean",
+                "description": "Whether to force kill the terminal session immediately. Default is false.",
+            },
+        },
+        "required": ["session_id"],
+    }
+
+    def __init__(self, cfg: Optional[dict] = None):
+        super().__init__(cfg)
+
+    def call(self, params: Union[str, dict], **kwargs) -> str:
+        try:
+            params = self.parse_json_args(params)
+        except ValueError as exc:
+            return f"[TerminalKill] {exc}"
+
+        session_id = str(params["session_id"])
+        force = bool(params.get("force", False))
+
+        session = SESSION_MANAGER.pop(session_id)
+        if session is None:
+            return f"[TerminalKill] Session not found: {session_id}"
+
+        try:
+            returncode = session.terminate(force=force)
+        except (OSError, RuntimeError, subprocess.SubprocessError) as exc:
+            return f"[TerminalKill] Failed to terminate session {session_id}: {exc}"
+
+        return _format_terminal_response(
+            "[TerminalKill] Terminal session terminated.",
+            session_id=session_id,
+            payload={"alive": False, "returncode": returncode},
+        )
+
+
+class TerminalInterrupt(ToolBase):
+    name = "TerminalInterrupt"
+    description = "Send Ctrl-C to the foreground process in an existing terminal session while keeping the session alive."
+    parameters = {
+        "type": "object",
+        "properties": {
+            "session_id": {
+                "type": "string",
+                "description": "The terminal session ID returned by TerminalStart.",
+            },
+            "max_output_chars": {
+                "type": "integer",
+                "description": "Maximum number of output characters to return after the interrupt. Default is 20000.",
+            },
+        },
+        "required": ["session_id"],
+    }
+
+    def __init__(self, cfg: Optional[dict] = None):
+        super().__init__(cfg)
+
+    def call(self, params: Union[str, dict], **kwargs) -> str:
+        try:
+            params = self.parse_json_args(params)
+        except ValueError as exc:
+            return f"[TerminalInterrupt] {exc}"
+
+        session_id = str(params["session_id"])
+        try:
+            max_output_chars = int(params.get("max_output_chars", DEFAULT_OUTPUT_CHARS))
+        except (TypeError, ValueError):
+            return "[TerminalInterrupt] max_output_chars must be an integer."
+
+        session = SESSION_MANAGER.get(session_id)
+        if session is None:
+            return f"[TerminalInterrupt] Session not found: {session_id}"
+        if max_output_chars <= 0:
+            return "[TerminalInterrupt] max_output_chars must be > 0."
+
+        try:
+            payload = session.interrupt(max_output_chars=max_output_chars)
+        except (OSError, RuntimeError, subprocess.SubprocessError) as exc:
+            return f"[TerminalInterrupt] Failed to interrupt session {session_id}: {exc}"
+
+        return _format_terminal_response(
+            "[TerminalInterrupt] Sent Ctrl-C to terminal session.",
+            session_id=session_id,
+            payload=payload,
+        )
+
+
+def main(argv: Optional[list[str]] = None) -> int:
+    parser = argparse.ArgumentParser(description="Run runtime and terminal tools directly.")
+    subparsers = parser.add_subparsers(dest="tool", required=True)
+
+    bash_parser = subparsers.add_parser("bash", help="Run the Bash tool.")
+    bash_parser.add_argument("command")
+    bash_parser.add_argument("--timeout", type=int, default=30)
+    bash_parser.add_argument("--workdir")
+
+    terminal_parser = subparsers.add_parser("terminal", help="Run a minimal terminal session demo.")
+    terminal_parser.add_argument("input", help="Input to send after starting the session.")
+    terminal_parser.add_argument("--cwd")
+    terminal_parser.add_argument("--yield-time-ms", type=int, default=200)
+
+    args = parser.parse_args(argv)
+    load_dotenv(PROJECT_ROOT / ".env")
+    workdir_root = Path(args.workdir).expanduser().resolve() if getattr(args, "workdir", None) else None
+
+    if args.tool == "bash":
+        result = Bash().call(
+            {"command": args.command, "timeout": args.timeout, "workdir": args.workdir},
+            workspace_root=workdir_root,
+        )
+        print(result)
+        return 0
+
+    terminal_root = Path(args.cwd).expanduser().resolve() if args.cwd else workspace_root()
+    start_result = TerminalStart().call({"cwd": str(terminal_root)}, workspace_root=terminal_root)
+    print(start_result)
+    session_match = re.search(r"session_id: (term_\d+)", start_result)
+    if not session_match:
+        return 1
+    session_id = session_match.group(1)
+    write_result = TerminalWrite().call(
+        {
+            "session_id": session_id,
+            "input": args.input,
+            "yield_time_ms": args.yield_time_ms,
+        },
+        workspace_root=terminal_root,
+    )
+    print(write_result)
+    print(TerminalKill().call({"session_id": session_id}, workspace_root=terminal_root))
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main(sys.argv[1:]))
diff --git a/agent_base/tools/tool_user.py b/agent_base/tools/tool_user.py
new file mode 100644
index 0000000000000000000000000000000000000000..66c8564f1599de5b6e84fd1478d73237eef53e1c
--- /dev/null
+++ b/agent_base/tools/tool_user.py
@@ -0,0 +1,89 @@
+from __future__ import annotations
+
+import os
+import sys
+from typing import Any, TextIO, Union
+
+from agent_base.tools.tooling import ToolBase
+
+
+class AskUser(ToolBase):
+    name = "AskUser"
+    description = (
+        "Ask the human user a concise clarification question when progress depends on "
+        "information, preference, or approval that cannot be determined from the workspace or other tools."
+    )
+    parameters = {
+        "type": "object",
+        "properties": {
+            "question": {
+                "type": "string",
+                "description": "The concise question to ask the user.",
+            },
+            "context": {
+                "type": "string",
+                "description": "Optional brief context explaining why the question is necessary.",
+            },
+        },
+        "required": ["question"],
+        "additionalProperties": False,
+    }
+
+    def call(self, params: Union[str, dict], **kwargs: Any) -> str:
+        try:
+            parsed = self.parse_json_args(params)
+        except ValueError as exc:
+            return f"[AskUser] {exc}"
+
+        question = str(parsed.get("question", "")).strip()
+        context = str(parsed.get("context", "") or "").strip()
+        if not question:
+            return "[AskUser] question must be a non-empty string."
+
+        input_stream = kwargs.get("input_stream")
+        output_stream = kwargs.get("output_stream")
+        close_stream = False
+        if input_stream is None or output_stream is None:
+            input_stream, output_stream, close_stream = _resolve_interactive_streams()
+        if input_stream is None or output_stream is None:
+            return (
+                "[AskUser] Cannot ask the user because no interactive terminal is available. "
+                "Continue with available evidence, or state the blocker if the answer is essential."
+            )
+
+        try:
+            _write_question(output_stream, question=question, context=context)
+            answer = input_stream.readline()
+        except OSError as exc:
+            return f"[AskUser] Failed to read user input: {exc}"
+        finally:
+            if close_stream:
+                try:
+                    input_stream.close()
+                except OSError:
+                    pass
+
+        answer = str(answer or "").strip()
+        if not answer:
+            return "[AskUser] User answer was empty."
+        return f"[AskUser] User answer:\n{answer}"
+
+
+def _resolve_interactive_streams() -> tuple[TextIO | None, TextIO | None, bool]:
+    if sys.stdin.isatty() and sys.stdout.isatty():
+        return sys.stdin, sys.stdout, False
+    if os.name == "nt":
+        return None, None, False
+    try:
+        tty = open("/dev/tty", "r+", encoding="utf-8")
+    except OSError:
+        return None, None, False
+    return tty, tty, True
+
+
+def _write_question(output_stream: TextIO, *, question: str, context: str = "") -> None:
+    output_stream.write("\n[AskUser]\n")
+    if context:
+        output_stream.write(f"Context: {context}\n")
+    output_stream.write(f"Question: {question}\n> ")
+    output_stream.flush()
diff --git a/agent_base/tools/tool_web.py b/agent_base/tools/tool_web.py
new file mode 100644
index 0000000000000000000000000000000000000000..3a26aedf8850b70fd12682b1233c90e34e8c587f
--- /dev/null
+++ b/agent_base/tools/tool_web.py
@@ -0,0 +1,610 @@
+import argparse
+import json
+import os
+import sys
+import time
+from concurrent.futures import ThreadPoolExecutor
+from typing import Optional, Union
+
+import requests
+import tiktoken
+from openai import APIConnectionError, APIError, APITimeoutError, OpenAI
+
+from agent_base.provider_compat import apply_sampling_params
+from agent_base.prompt import EXTRACTOR_PROMPT
+from agent_base.tools.tooling import ToolBase
+from agent_base.utils import PROJECT_ROOT, env_flag, load_dotenv
+
+DEFAULT_LLM_TIMEOUT_SECONDS = 600.0
+DEFAULT_LLM_MAX_RETRIES = 10
+DEFAULT_TEMPERATURE = 0.6
+DEFAULT_TOP_P = 0.95
+DEFAULT_PRESENCE_PENALTY = 1.1
+
+
+def search_debug_enabled() -> bool:
+    return env_flag("DEBUG_SEARCH")
+
+
+def scholar_debug_enabled() -> bool:
+    return env_flag("DEBUG_SCHOLAR")
+
+
+def visit_debug_enabled() -> bool:
+    return env_flag("DEBUG_VISIT")
+
+
+def _request_error_text(exc: requests.RequestException) -> str:
+    response = getattr(exc, "response", None)
+    if response is None:
+        return str(exc)
+    body = response.text.strip()
+    if len(body) > 1000:
+        body = body[:1000] + "...(truncated)"
+    return f"{exc}; response_body={body}" if body else str(exc)
+
+
+def truncate_to_tokens(text: str, max_tokens: int = 95000) -> str:
+    encoding = tiktoken.get_encoding("cl100k_base")
+    tokens = encoding.encode(text)
+    if len(tokens) <= max_tokens:
+        return text
+    truncated_tokens = tokens[:max_tokens]
+    return encoding.decode(truncated_tokens)
+
+
+def _stringify_field(value) -> str:
+    if isinstance(value, str):
+        return value.strip()
+    if value is None:
+        return ""
+    if isinstance(value, (list, dict)):
+        try:
+            return json.dumps(value, ensure_ascii=False)
+        except (TypeError, ValueError):
+            return str(value).strip()
+    return str(value).strip()
+
+
+def _webfetch_failure(url: str, goal: str, reason: str) -> str:
+    useful_information = "The useful information in {url} for user goal {goal} as follows: \n\n".format(url=url, goal=goal)
+    useful_information += "Evidence in page: \n" + reason + "\n\n"
+    useful_information += "Summary: \n" + "The webpage content could not be processed into the required structured summary." + "\n\n"
+    return useful_information
+
+
+def _parse_extractor_payload(raw) -> tuple[str, str] | None:
+    if isinstance(raw, str):
+        raw = raw.replace("```json", "").replace("```", "").strip()
+        try:
+            raw = json.loads(raw)
+        except json.JSONDecodeError:
+            return None
+
+    if not isinstance(raw, dict):
+        return None
+
+    evidence = _stringify_field(raw.get("evidence"))
+    summary = _stringify_field(raw.get("summary"))
+    if not evidence or not summary:
+        return None
+    return evidence, summary
+
+
+class WebSearch(ToolBase):
+    name = "WebSearch"
+    description = "Perform Google web searches and return the top results. Accepts multiple complementary queries."
+    parameters = {
+        "type": "object",
+        "properties": {
+            "query": {
+                "type": "array",
+                "items": {
+                    "type": "string",
+                },
+                "minItems": 1,
+                "description": "Array of query strings. Include multiple complementary search queries in a single call.",
+            },
+        },
+        "required": ["query"],
+    }
+
+    def __init__(self, cfg: Optional[dict] = None):
+        super().__init__(cfg)
+
+    def google_search_with_serp(self, query: str):
+        def contains_chinese_basic(text: str) -> bool:
+            return any("\u4E00" <= char <= "\u9FFF" for char in text)
+
+        if contains_chinese_basic(query):
+            payload = {
+                "q": query,
+                "location": "China",
+                "gl": "cn",
+                "hl": "zh-cn",
+            }
+        else:
+            payload = {
+                "q": query,
+                "location": "United States",
+                "gl": "us",
+                "hl": "en",
+            }
+        serper_key = os.getenv("SERPER_KEY_ID", "").strip()
+        if not serper_key:
+            return "[WebSearch] SERPER_KEY_ID is not set."
+        headers = {
+            "X-API-KEY": serper_key,
+            "Content-Type": "application/json",
+        }
+
+        last_error = ""
+        res = None
+        for i in range(5):
+            try:
+                res = requests.post(
+                    "https://google.serper.dev/search",
+                    json=payload,
+                    headers=headers,
+                    timeout=20,
+                )
+                res.raise_for_status()
+                break
+            except requests.RequestException as exc:
+                last_error = _request_error_text(exc)
+                if search_debug_enabled():
+                    print(exc)
+                if i == 4:
+                    return f"[WebSearch] Request failed for '{query}': {last_error}"
+
+        if res is None:
+            return f"[WebSearch] Request failed for '{query}': {last_error or 'unknown error'}"
+
+        try:
+            results = res.json()
+        except ValueError as exc:
+            return f"[WebSearch] Invalid JSON response for '{query}': {exc}"
+
+        organic_results = results.get("organic")
+        if not isinstance(organic_results, list) or not organic_results:
+            return f"No results found for '{query}'. Try with a more general query."
+
+        web_snippets = []
+        for idx, page in enumerate(organic_results, start=1):
+            if not isinstance(page, dict):
+                continue
+            title = str(page.get("title", "Untitled result"))
+            link = str(page.get("link", ""))
+            date_published = f"\nDate published: {page['date']}" if "date" in page else ""
+            source = f"\nSource: {page['source']}" if "source" in page else ""
+            snippet = f"\n{page['snippet']}" if "snippet" in page else ""
+            redacted_version = f"{idx}. [{title}]({link}){date_published}{source}\n{snippet}"
+            redacted_version = redacted_version.replace("Your browser can't play this video.", "")
+            web_snippets.append(redacted_version)
+
+        if not web_snippets:
+            return f"No results found for '{query}'. Try with a more general query."
+
+        content = f"A Google search for '{query}' found {len(web_snippets)} results:\n\n## Web Results\n" + "\n\n".join(web_snippets)
+        return content
+
+    def search_with_serp(self, query: str):
+        return self.google_search_with_serp(query)
+
+    def call(self, params: Union[str, dict], **kwargs) -> str:
+        try:
+            params = self.parse_json_args(params)
+            query = params["query"]
+        except ValueError as exc:
+            return f"[WebSearch] {exc}"
+
+        if isinstance(query, list):
+            with ThreadPoolExecutor(max_workers=3) as executor:
+                responses = list(executor.map(self.search_with_serp, query))
+            response = "\n=======\n".join(responses)
+        else:
+            return "[WebSearch] 'query' must be a list of strings."
+
+        return response
+
+
+class ScholarSearch(ToolBase):
+    name = "ScholarSearch"
+    description = "Search academic sources through Google Scholar and return relevant publication results."
+    parameters = {
+        "type": "object",
+        "properties": {
+            "query": {
+                "type": "array",
+                "items": {"type": "string", "description": "The search query."},
+                "minItems": 1,
+                "description": "The list of search queries for Google Scholar.",
+            },
+        },
+        "required": ["query"],
+    }
+
+    def __init__(self, cfg: Optional[dict] = None):
+        super().__init__(cfg)
+
+    def google_scholar_with_serp(self, query: str):
+        payload = {"q": query}
+        serper_key = os.getenv("SERPER_KEY_ID", "").strip()
+        if not serper_key:
+            return "[ScholarSearch] SERPER_KEY_ID is not set."
+        headers = {
+            "X-API-KEY": serper_key,
+            "Content-Type": "application/json",
+        }
+        last_error = ""
+        res = None
+        for i in range(5):
+            try:
+                res = requests.post(
+                    "https://google.serper.dev/scholar",
+                    json=payload,
+                    headers=headers,
+                    timeout=20,
+                )
+                res.raise_for_status()
+                break
+            except requests.RequestException as exc:
+                last_error = _request_error_text(exc)
+                if scholar_debug_enabled():
+                    print(exc)
+                if i == 4:
+                    return f"[ScholarSearch] Request failed for '{query}': {last_error}"
+
+        if res is None:
+            return f"[ScholarSearch] Request failed for '{query}': {last_error or 'unknown error'}"
+
+        try:
+            results = res.json()
+        except ValueError as exc:
+            return f"[ScholarSearch] Invalid JSON response for '{query}': {exc}"
+
+        organic_results = results.get("organic")
+        if not isinstance(organic_results, list) or not organic_results:
+            return f"No results found for '{query}'. Try with a more general query."
+
+        web_snippets = []
+        for idx, page in enumerate(organic_results, start=1):
+            if not isinstance(page, dict):
+                continue
+            title = str(page.get("title", "Untitled result"))
+            date_published = f"\nDate published: {page['year']}" if "year" in page else ""
+            publication_info = f"\npublicationInfo: {page['publicationInfo']}" if "publicationInfo" in page else ""
+            snippet = f"\n{page['snippet']}" if "snippet" in page else ""
+            link_info = "no available link"
+            if "pdfUrl" in page:
+                link_info = "pdfUrl: " + str(page["pdfUrl"])
+            cited_by = f"\ncitedBy: {page['citedBy']}" if "citedBy" in page else ""
+            redacted_version = f"{idx}. [{title}]({link_info}){publication_info}{date_published}{cited_by}\n{snippet}"
+            redacted_version = redacted_version.replace("Your browser can't play this video.", "")
+            web_snippets.append(redacted_version)
+
+        if not web_snippets:
+            return f"No results found for '{query}'. Try with a more general query."
+
+        content = f"A Google scholar for '{query}' found {len(web_snippets)} results:\n\n## Scholar Results\n" + "\n\n".join(web_snippets)
+        return content
+
+    def call(self, params: Union[str, dict], **kwargs) -> str:
+        try:
+            params = self.parse_json_args(params)
+            query = params["query"]
+        except ValueError as exc:
+            return f"[ScholarSearch] {exc}"
+
+        if isinstance(query, list):
+            with ThreadPoolExecutor(max_workers=3) as executor:
+                response = list(executor.map(self.google_scholar_with_serp, query))
+            response = "\n=======\n".join(response)
+        else:
+            return "[ScholarSearch] 'query' must be a list of strings."
+        return response
+
+
+class WebFetch(ToolBase):
+    name = "WebFetch"
+    description = "Fetch webpage content and return evidence plus a goal-focused summary."
+    parameters = {
+        "type": "object",
+        "properties": {
+            "url": {
+                "type": ["string", "array"],
+                "items": {
+                    "type": "string",
+                },
+                "minItems": 1,
+                "description": "The URL(s) of the webpage(s) to visit. Can be a single URL or an array of URLs.",
+            },
+            "goal": {
+                "type": "string",
+                "description": "The goal of the visit for webpage(s).",
+            },
+        },
+        "required": ["url", "goal"],
+    }
+
+    def __init__(self, cfg: Optional[dict] = None):
+        super().__init__(cfg)
+        self._summary_client: Optional[OpenAI] = None
+        self._summary_api_base: Optional[str] = None
+        self._summary_model_name = os.environ.get("MODEL_NAME", "").strip()
+        self._summary_timeout_seconds = float(
+            os.getenv("LLM_TIMEOUT_SECONDS", str(DEFAULT_LLM_TIMEOUT_SECONDS))
+        )
+        self._summary_temperature = float(os.getenv("TEMPERATURE", str(DEFAULT_TEMPERATURE)))
+        self._summary_top_p = float(os.getenv("TOP_P", str(DEFAULT_TOP_P)))
+        self._summary_presence_penalty = float(os.getenv("PRESENCE_PENALTY", str(DEFAULT_PRESENCE_PENALTY)))
+
+    def _ensure_summary_client(self) -> Optional[OpenAI]:
+        if self._summary_client is not None:
+            return self._summary_client
+        self._summary_api_base = os.environ.get("API_BASE")
+        self._summary_model_name = os.environ.get("MODEL_NAME", "").strip()
+        self._summary_timeout_seconds = float(
+            os.getenv("LLM_TIMEOUT_SECONDS", str(DEFAULT_LLM_TIMEOUT_SECONDS))
+        )
+        self._summary_temperature = float(os.getenv("TEMPERATURE", str(DEFAULT_TEMPERATURE)))
+        self._summary_top_p = float(os.getenv("TOP_P", str(DEFAULT_TOP_P)))
+        self._summary_presence_penalty = float(os.getenv("PRESENCE_PENALTY", str(DEFAULT_PRESENCE_PENALTY)))
+        if not self._summary_api_base:
+            return None
+        self._summary_client = OpenAI(
+            api_key=os.environ.get("API_KEY", "EMPTY"),
+            base_url=self._summary_api_base,
+            timeout=self._summary_timeout_seconds,
+        )
+        return self._summary_client
+
+    @staticmethod
+    def _remaining_budget_seconds(runtime_deadline: Optional[float]) -> Optional[float]:
+        if runtime_deadline is None:
+            return None
+        return runtime_deadline - time.time()
+
+    def call(self, params: Union[str, dict], **kwargs) -> str:
+        try:
+            params = self.parse_json_args(params)
+            url = params["url"]
+            goal = params["goal"]
+        except ValueError as exc:
+            return f"[WebFetch] {exc}"
+        runtime_deadline = kwargs.get("runtime_deadline")
+
+        start_time = time.time()
+
+        if isinstance(url, str):
+            response = self.readpage_jina(url, goal, runtime_deadline=runtime_deadline)
+        elif isinstance(url, list):
+            response = []
+            start_time = time.time()
+            for one_url in url:
+                remaining = self._remaining_budget_seconds(runtime_deadline)
+                if remaining is not None and remaining <= 0:
+                    cur_response = _webfetch_failure(
+                        url=one_url,
+                        goal=goal,
+                        reason="Agent runtime limit reached before WebFetch could complete.",
+                    )
+                    response.append(cur_response)
+                    continue
+                if time.time() - start_time > 900:
+                    cur_response = "The useful information in {url} for user goal {goal} as follows: \n\n".format(url=one_url, goal=goal)
+                    cur_response += "Evidence in page: \n" + "The provided webpage content could not be accessed. Please check the URL or file format." + "\n\n"
+                    cur_response += "Summary: \n" + "The webpage content could not be processed, and therefore, no information is available." + "\n\n"
+                else:
+                    cur_response = self.readpage_jina(one_url, goal, runtime_deadline=runtime_deadline)
+                response.append(cur_response)
+            response = "\n=======\n".join(response)
+        else:
+            return "[WebFetch] 'url' must be a string or a list of strings."
+
+        if visit_debug_enabled():
+            print(f"Summary Length {len(response)}")
+        return response.strip()
+
+    def call_server(self, msgs, max_retries=2, runtime_deadline: Optional[float] = None):
+        client = self._ensure_summary_client()
+        if client is None or not self._summary_api_base:
+            return "[WebFetch] Summary model error: API_BASE is not set."
+        if not self._summary_model_name:
+            return "[WebFetch] Summary model error: MODEL_NAME is not set."
+        last_error = "unknown summary-model error"
+        for attempt in range(max_retries):
+            remaining = self._remaining_budget_seconds(runtime_deadline)
+            if remaining is not None and remaining <= 0:
+                return "[WebFetch] Summary model error: agent runtime limit reached."
+            try:
+                request_client = (
+                    client.with_options(timeout=min(self._summary_timeout_seconds, max(remaining, 0.001)))
+                    if remaining is not None
+                    else client
+                )
+                request_kwargs = {
+                    "model": self._summary_model_name,
+                    "messages": msgs,
+                }
+                apply_sampling_params(
+                    request_kwargs,
+                    model_name=self._summary_model_name,
+                    temperature=self._summary_temperature,
+                    top_p=self._summary_top_p,
+                    presence_penalty=self._summary_presence_penalty,
+                )
+                chat_response = request_client.chat.completions.create(**request_kwargs)
+                content = chat_response.choices[0].message.content
+                if content:
+                    return content
+                last_error = "empty response from summary model"
+            except (APIError, APIConnectionError, APITimeoutError) as exc:
+                last_error = str(exc)
+                if attempt == (max_retries - 1):
+                    return f"[WebFetch] Summary model error: {last_error}"
+
+        return f"[WebFetch] Summary model error: {last_error}"
+
+    def jina_readpage(self, url: str, runtime_deadline: Optional[float] = None) -> str:
+        max_retries = 3
+        timeout = 50
+        jina_api_key = os.getenv("JINA_API_KEYS", "").strip()
+        if not jina_api_key:
+            return "[WebFetch] JINA_API_KEYS is not set."
+
+        last_error = "unknown page-fetch error"
+        for attempt in range(max_retries):
+            headers = {
+                "Authorization": f"Bearer {jina_api_key}",
+            }
+            try:
+                remaining = self._remaining_budget_seconds(runtime_deadline)
+                if remaining is not None and remaining <= 0:
+                    return "[WebFetch] Failed to read page: agent runtime limit reached."
+                response = requests.get(
+                    f"https://r.jina.ai/{url}",
+                    headers=headers,
+                    timeout=min(timeout, max(remaining, 0.001)) if remaining is not None else timeout,
+                )
+                if response.status_code == 200:
+                    return response.text
+                if visit_debug_enabled():
+                    print(response.text)
+                last_error = f"HTTP {response.status_code}: {response.text[:200]}"
+            except requests.RequestException as exc:
+                last_error = str(exc)
+                remaining = self._remaining_budget_seconds(runtime_deadline)
+                if remaining is not None and remaining <= 0:
+                    return "[WebFetch] Failed to read page: agent runtime limit reached."
+                time.sleep(min(0.5, remaining) if remaining is not None else 0.5)
+                if attempt == max_retries - 1:
+                    return f"[WebFetch] Failed to read page: {last_error}"
+
+        return f"[WebFetch] Failed to read page: {last_error}"
+
+    def html_readpage_jina(self, url: str, runtime_deadline: Optional[float] = None) -> str:
+        max_attempts = 8
+        for _ in range(max_attempts):
+            remaining = self._remaining_budget_seconds(runtime_deadline)
+            if remaining is not None and remaining <= 0:
+                return "[WebFetch] Failed to read page: agent runtime limit reached."
+            content = self.jina_readpage(url, runtime_deadline=runtime_deadline)
+            if content and not content.startswith("[WebFetch] Failed to read page:") and content != "[WebFetch] Empty content." and not content.startswith("[document_parser]"):
+                return content
+        return "[WebFetch] Failed to read page: exhausted retries"
+
+    def readpage_jina(self, url: str, goal: str, runtime_deadline: Optional[float] = None) -> str:
+        summary_page_func = self.call_server
+        max_retries = int(os.getenv("LLM_MAX_RETRIES", str(DEFAULT_LLM_MAX_RETRIES)))
+
+        content = self.html_readpage_jina(url, runtime_deadline=runtime_deadline)
+
+        if content and not content.startswith("[WebFetch] Failed to read page:") and content != "[WebFetch] Empty content." and not content.startswith("[document_parser]"):
+            content = truncate_to_tokens(content, max_tokens=95000)
+            messages = [{"role": "user", "content": EXTRACTOR_PROMPT.format(webpage_content=content, goal=goal)}]
+            raw = summary_page_func(messages, max_retries=max_retries, runtime_deadline=runtime_deadline)
+            summary_retries = 3
+            while len(raw) < 10 and summary_retries >= 0:
+                remaining = self._remaining_budget_seconds(runtime_deadline)
+                if remaining is not None and remaining <= 0:
+                    return _webfetch_failure(
+                        url=url,
+                        goal=goal,
+                        reason="Agent runtime limit reached before WebFetch could complete.",
+                    )
+                truncate_length = int(0.7 * len(content)) if summary_retries > 0 else 25000
+                status_msg = (
+                    f"[WebFetch] Summary url[{url}] "
+                    f"attempt {3 - summary_retries + 1}/3, "
+                    f"content length: {len(content)}, "
+                    f"truncating to {truncate_length} chars"
+                ) if summary_retries > 0 else (
+                    f"[WebFetch] Summary url[{url}] failed after 3 attempts, "
+                    f"final truncation to 25000 chars"
+                )
+                if visit_debug_enabled():
+                    print(status_msg)
+                content = content[:truncate_length]
+                extraction_prompt = EXTRACTOR_PROMPT.format(
+                    webpage_content=content,
+                    goal=goal,
+                )
+                messages = [{"role": "user", "content": extraction_prompt}]
+                raw = summary_page_func(messages, max_retries=max_retries, runtime_deadline=runtime_deadline)
+                summary_retries -= 1
+
+            parse_retry_times = 0
+            parsed = _parse_extractor_payload(raw)
+            while parse_retry_times < 3:
+                if parsed is not None:
+                    break
+                remaining = self._remaining_budget_seconds(runtime_deadline)
+                if remaining is not None and remaining <= 0:
+                    return _webfetch_failure(
+                        url=url,
+                        goal=goal,
+                        reason="Agent runtime limit reached before WebFetch could complete.",
+                    )
+                raw = summary_page_func(messages, max_retries=max_retries, runtime_deadline=runtime_deadline)
+                parsed = _parse_extractor_payload(raw)
+                parse_retry_times += 1
+
+            if parsed is None:
+                reason = "The webpage content was fetched, but the summary model did not return the required evidence and summary fields."
+                if isinstance(raw, str) and raw.startswith("[WebFetch] Summary model error:"):
+                    reason = raw
+                useful_information = _webfetch_failure(
+                    url=url,
+                    goal=goal,
+                    reason=reason,
+                )
+            else:
+                evidence, summary = parsed
+                useful_information = "The useful information in {url} for user goal {goal} as follows: \n\n".format(url=url, goal=goal)
+                useful_information += "Evidence in page: \n" + evidence + "\n\n"
+                useful_information += "Summary: \n" + summary + "\n\n"
+
+            if len(useful_information) < 10 and summary_retries < 0:
+                if visit_debug_enabled():
+                    print("[WebFetch] Could not generate valid summary after maximum retries")
+                useful_information = "[WebFetch] Failed to read page."
+
+            return useful_information
+
+        return _webfetch_failure(
+            url=url,
+            goal=goal,
+            reason="The provided webpage content could not be accessed. Please check the URL or file format.",
+        )
+
+
+def main(argv: Optional[list[str]] = None) -> int:
+    parser = argparse.ArgumentParser(description="Run web tools directly.")
+    subparsers = parser.add_subparsers(dest="tool", required=True)
+
+    search_parser = subparsers.add_parser("search", help="Run WebSearch.")
+    search_parser.add_argument("query", nargs="+")
+
+    scholar_parser = subparsers.add_parser("scholar", help="Run ScholarSearch.")
+    scholar_parser.add_argument("query", nargs="+")
+
+    fetch_parser = subparsers.add_parser("fetch", help="Run WebFetch.")
+    fetch_parser.add_argument("url")
+    fetch_parser.add_argument("goal")
+
+    args = parser.parse_args(argv)
+    load_dotenv(PROJECT_ROOT / ".env")
+
+    if args.tool == "search":
+        result = WebSearch().call({"query": [" ".join(args.query)]})
+    elif args.tool == "scholar":
+        result = ScholarSearch().call({"query": [" ".join(args.query)]})
+    else:
+        result = WebFetch().call({"url": args.url, "goal": args.goal})
+    print(result)
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main(sys.argv[1:]))
diff --git a/agent_base/tools/tooling.py b/agent_base/tools/tooling.py
new file mode 100644
index 0000000000000000000000000000000000000000..9f1316578a299923a5115a005c769cd2fe3458b6
--- /dev/null
+++ b/agent_base/tools/tooling.py
@@ -0,0 +1,302 @@
+import argparse
+import json
+import os
+import re
+import sys
+from pathlib import Path
+from typing import Any, Optional, Union
+
+import json5
+from agent_base.utils import PROJECT_ROOT, load_dotenv
+
+WORKSPACE_ROOT_ENV = "WORKSPACE_ROOT"
+
+SENSITIVE_FILE_NAMES = {
+    ".env",
+    ".env.local",
+    ".env.production",
+    ".env.development",
+    ".env.test",
+    ".git-credentials",
+    ".netrc",
+    ".npmrc",
+    ".pypirc",
+    "id_rsa",
+    "id_dsa",
+    "id_ecdsa",
+    "id_ed25519",
+    "known_hosts",
+    "authorized_keys",
+    "credentials",
+}
+SENSITIVE_PATH_PARTS = {
+    ".git",
+    ".ssh",
+    ".aws",
+    ".gnupg",
+    ".kube",
+}
+SENSITIVE_COMMAND_TOKENS = [
+    ".env",
+    ".git-credentials",
+    ".netrc",
+    ".npmrc",
+    ".pypirc",
+    "id_rsa",
+    "id_dsa",
+    "id_ecdsa",
+    "id_ed25519",
+    "/etc/passwd",
+    "/etc/shadow",
+    "/root/.ssh",
+    "/root/.aws",
+    "~/.ssh",
+    "~/.aws",
+]
+BLOCKED_COMMAND_PATTERNS: list[tuple[re.Pattern[str], str]] = [
+    (re.compile(r"(^|[\s;&|])sudo(\s|$)"), "sudo escalation is blocked"),
+    (re.compile(r"(^|[\s;&|])su(\s|$)"), "user switching is blocked"),
+    (re.compile(r"(^|[\s;&|])(shutdown|reboot|poweroff|halt)(\s|$)"), "system power-control commands are blocked"),
+    (re.compile(r"(^|[\s;&|])mkfs(?:\.\w+)?(\s|$)"), "disk-formatting commands are blocked"),
+    (re.compile(r"(^|[\s;&|])(fdisk|parted)(\s|$)"), "disk-partitioning commands are blocked"),
+    (re.compile(r":\s*\(\)\s*\{\s*:\|:&\s*\};:"), "fork-bomb patterns are blocked"),
+    (re.compile(r"\brm\s+-rf\s+/(\s|$)"), "destructive root deletion is blocked"),
+    (re.compile(r"\brm\s+-rf\s+~(/|\s|$)"), "destructive home deletion is blocked"),
+]
+SENSITIVE_ENV_EXACT = {
+    "API_KEY",
+    "SERPER_KEY_ID",
+    "JINA_API_KEYS",
+    "MINERU_TOKEN",
+    "OPENAI_API_KEY",
+    "ANTHROPIC_API_KEY",
+    "GOOGLE_API_KEY",
+    "AWS_ACCESS_KEY_ID",
+    "AWS_SECRET_ACCESS_KEY",
+    "AWS_SESSION_TOKEN",
+    "AZURE_OPENAI_API_KEY",
+}
+SENSITIVE_ENV_MARKERS = (
+    "TOKEN",
+    "SECRET",
+    "PASSWORD",
+    "PASSWD",
+    "CREDENTIAL",
+    "COOKIE",
+)
+SAFE_ENV_ALWAYS = {
+    "PATH",
+    "LANG",
+    "TERM",
+    "TMPDIR",
+    "TEMP",
+    "TMP",
+    "TZ",
+    "COLORTERM",
+    "PWD",
+    "PYTHONIOENCODING",
+    "PYTHONUNBUFFERED",
+    "CONDA_PREFIX",
+    "CONDA_DEFAULT_ENV",
+    "VIRTUAL_ENV",
+    "LOGNAME",
+    "USER",
+    "USERNAME",
+    "SHELL",
+    "SHLVL",
+    "_",
+}
+
+
+def workspace_root() -> Path:
+    configured = os.environ.get(WORKSPACE_ROOT_ENV, "").strip()
+    root = Path(configured).expanduser() if configured else PROJECT_ROOT
+    return root.resolve()
+
+
+def normalize_base_root(base_root: Optional[Union[str, Path]]) -> Path:
+    if base_root is None:
+        return workspace_root()
+    return Path(base_root).expanduser().resolve()
+
+
+def normalize_workspace_root(path_value: Optional[Union[str, Path]]) -> Path:
+    if path_value is None or str(path_value).strip() == "":
+        return workspace_root()
+    path = Path(path_value).expanduser()
+    if not path.is_absolute():
+        path = (Path.cwd() / path).resolve()
+    else:
+        path = path.resolve()
+    if not path.exists():
+        path.mkdir(parents=True, exist_ok=True)
+    if not path.is_dir():
+        raise ValueError(f"Workspace directory is not a directory: {path}")
+    return path
+
+
+def _is_relative_to(path: Path, root: Path) -> bool:
+    try:
+        path.relative_to(root)
+        return True
+    except ValueError:
+        return False
+
+
+def resolve_workspace_path(path_value: Union[str, Path], *, base_root: Optional[Path] = None) -> Path:
+    path = Path(path_value).expanduser()
+    root = normalize_base_root(base_root)
+    if not path.is_absolute():
+        path = root / path
+    return path.resolve(strict=False)
+
+
+def is_sensitive_path(path: Path) -> bool:
+    lowered_parts = {part.lower() for part in path.parts}
+    lowered_name = path.name.lower()
+    if lowered_name in SENSITIVE_FILE_NAMES:
+        return True
+    return any(part in SENSITIVE_PATH_PARTS for part in lowered_parts)
+
+
+def validate_tool_path(path_value: Union[str, Path], purpose: str, *, allow_sensitive: bool = False, base_root: Optional[Path] = None) -> Path:
+    path = resolve_workspace_path(path_value, base_root=base_root)
+    root = normalize_base_root(base_root)
+    if not _is_relative_to(path, root):
+        raise ValueError(f"{purpose} is limited to the workspace root: {root}")
+    if not allow_sensitive and is_sensitive_path(path):
+        raise ValueError(f"{purpose} to sensitive paths is blocked: {path}")
+    return path
+
+
+def command_safety_issue(command: str) -> Optional[str]:
+    lowered = command.lower()
+    for pattern, reason in BLOCKED_COMMAND_PATTERNS:
+        if pattern.search(command):
+            return reason
+    for token in SENSITIVE_COMMAND_TOKENS:
+        if token.lower() in lowered:
+            return f"access to sensitive path/token '{token}' is blocked"
+    return None
+
+
+def sanitized_subprocess_env(*, base_root: Optional[Path] = None) -> dict[str, str]:
+    env = os.environ.copy()
+    for key in list(env.keys()):
+        upper = key.upper()
+        if upper in SAFE_ENV_ALWAYS:
+            continue
+        if upper in SENSITIVE_ENV_EXACT or any(marker in upper for marker in SENSITIVE_ENV_MARKERS):
+            env.pop(key, None)
+    safe_home = str(normalize_base_root(base_root))
+    env["HOME"] = safe_home
+    env["PWD"] = safe_home
+    env.setdefault("TERM", "xterm-256color")
+    env.setdefault("LANG", "C.UTF-8")
+    env["GIT_TERMINAL_PROMPT"] = "0"
+    return env
+
+
+def _matches_schema_type(value: Any, expected_type: str) -> bool:
+    if expected_type == "string":
+        return isinstance(value, str)
+    if expected_type == "integer":
+        return isinstance(value, int) and not isinstance(value, bool)
+    if expected_type == "number":
+        return (isinstance(value, int) and not isinstance(value, bool)) or isinstance(value, float)
+    if expected_type == "boolean":
+        return isinstance(value, bool)
+    if expected_type == "array":
+        return isinstance(value, list)
+    if expected_type == "object":
+        return isinstance(value, dict)
+    return True
+
+
+def _schema_type_label(type_spec: Any) -> str:
+    if isinstance(type_spec, list):
+        return " or ".join(str(item) for item in type_spec)
+    return str(type_spec)
+
+
+def _validate_schema_value(param_name: str, value: Any, schema: dict[str, Any]) -> None:
+    type_spec = schema.get("type")
+    if type_spec is not None:
+        allowed_types = type_spec if isinstance(type_spec, list) else [type_spec]
+        if not any(_matches_schema_type(value, expected_type) for expected_type in allowed_types):
+            raise ValueError(f"Parameter '{param_name}' must be of type {_schema_type_label(type_spec)}.")
+
+    if isinstance(value, list):
+        min_items = schema.get("minItems")
+        if isinstance(min_items, int) and len(value) < min_items:
+            raise ValueError(f"Parameter '{param_name}' must contain at least {min_items} item(s).")
+        item_schema = schema.get("items")
+        if isinstance(item_schema, dict):
+            for index, item in enumerate(value):
+                _validate_schema_value(f"{param_name}[{index}]", item, item_schema)
+
+
+class ToolBase:
+    name: str = ""
+    description: str = ""
+    parameters: dict[str, Any] = {}
+
+    def __init__(self, cfg: Optional[dict] = None):
+        self.cfg = cfg or {}
+        if not self.name:
+            raise ValueError(f"{self.__class__.__name__}.name must be set.")
+        if not isinstance(self.parameters, dict):
+            raise ValueError(f"{self.__class__.__name__}.parameters must be a JSON-schema-like dict.")
+
+    def call(self, params: Union[str, dict], **kwargs):
+        raise NotImplementedError
+
+    def parse_json_args(self, params: Union[str, dict], strict_json: bool = False) -> dict:
+        if isinstance(params, str):
+            try:
+                if strict_json:
+                    parsed = json.loads(params)
+                else:
+                    parsed = json5.loads(params)
+            except (TypeError, ValueError) as exc:
+                raise ValueError("Parameters must be formatted as a valid JSON object.") from exc
+        else:
+            parsed = params
+
+        if not isinstance(parsed, dict):
+            raise ValueError("Parameters must decode to a JSON object.")
+
+        required = self.parameters.get("required", [])
+        for key in required:
+            if key not in parsed:
+                raise ValueError(f"Missing required parameter: {key}")
+
+        properties = self.parameters.get("properties", {})
+        if isinstance(properties, dict):
+            for key, value in parsed.items():
+                schema = properties.get(key)
+                if isinstance(schema, dict):
+                    _validate_schema_value(key, value, schema)
+        return parsed
+
+
+def main(argv: Optional[list[str]] = None) -> int:
+    parser = argparse.ArgumentParser(description="Inspect workspace and path resolution helpers.")
+    parser.add_argument("--workspace-root", help="Optional workspace root override for this invocation.")
+    parser.add_argument("--path", help="Optional path to resolve inside the workspace.")
+    args = parser.parse_args(argv)
+
+    load_dotenv(PROJECT_ROOT / ".env")
+    workspace_root = normalize_workspace_root(args.workspace_root)
+    payload: dict[str, str] = {
+        "project_root": str(PROJECT_ROOT),
+        "workspace_root": str(workspace_root),
+    }
+    if args.path:
+        payload["resolved_path"] = str(resolve_workspace_path(args.path, base_root=workspace_root))
+    print(json.dumps(payload, ensure_ascii=False, indent=2))
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main(sys.argv[1:]))
diff --git a/agent_base/trace_utils.py b/agent_base/trace_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..cfcbffa0e369272e7ae24862f9620555311822ee
--- /dev/null
+++ b/agent_base/trace_utils.py
@@ -0,0 +1,112 @@
+import argparse
+import datetime
+from pathlib import Path
+from typing import Any, Callable, Optional
+from uuid import uuid4
+
+from agent_base.utils import append_jsonl, safe_jsonable
+
+
+TRACE_FIELD_NAMES = [
+    "run_id",
+    "event_index",
+    "turn_index",
+    "timestamp",
+    "model_name",
+    "workspace_root",
+    "role",
+    "text",
+    "tool_call_ids",
+    "tool_names",
+    "tool_arguments",
+    "finish_reason",
+    "termination",
+    "error",
+    "image_paths",
+    "capture_type",
+    "payload",
+]
+
+
+class FlatTraceWriter:
+    def __init__(
+        self,
+        *,
+        trace_dir: Optional[str | Path],
+        model_name: str,
+        workspace_root: str | Path,
+        on_event: Optional[Callable[[dict[str, Any]], None]] = None,
+    ):
+        self.model_name = model_name
+        self.workspace_root = str(workspace_root)
+        self.on_event = on_event
+        self.run_id = uuid4().hex
+        self.path = resolve_trace_path(trace_dir, run_id=self.run_id) if trace_dir else None
+        self.event_index = 0
+
+    def append(
+        self,
+        *,
+        role: str,
+        text: str = "",
+        turn_index: int = 0,
+        tool_call_ids: Optional[list[str]] = None,
+        tool_names: Optional[list[str]] = None,
+        tool_arguments: Optional[list[Any]] = None,
+        finish_reason: Optional[str] = None,
+        termination: Optional[str] = None,
+        error: Optional[str] = None,
+        image_paths: Optional[list[str]] = None,
+        capture_type: str = "",
+        payload: Optional[dict[str, Any]] = None,
+    ) -> dict[str, Any]:
+        self.event_index += 1
+        row = {
+            "run_id": self.run_id,
+            "event_index": self.event_index,
+            "turn_index": turn_index,
+            "timestamp": datetime.datetime.now().astimezone().isoformat(timespec="seconds"),
+            "model_name": self.model_name,
+            "workspace_root": self.workspace_root,
+            "role": role,
+            "text": text,
+            "tool_call_ids": tool_call_ids or [],
+            "tool_names": tool_names or [],
+            "tool_arguments": safe_jsonable(tool_arguments or []),
+            "finish_reason": finish_reason or "",
+            "termination": termination or "",
+            "error": error or "",
+            "image_paths": image_paths or [],
+            "capture_type": capture_type or "",
+            "payload": safe_jsonable(payload or {}),
+        }
+        if self.path is not None:
+            append_jsonl(self.path, row)
+        if self.on_event is not None:
+            self.on_event(row)
+        return row
+
+
+def resolve_trace_path(
+    trace_dir: str | Path,
+    *,
+    run_id: str,
+    prefix: str = "trace",
+    suffix: str = ".jsonl",
+) -> Path:
+    directory = Path(trace_dir)
+    timestamp = datetime.datetime.now().astimezone().strftime("%Y%m%d_%H%M%S")
+    short_run_id = run_id[:12]
+    filename = f"{prefix}_{timestamp}_{short_run_id}{suffix}"
+    return directory / filename
+
+
+def main(argv: Optional[list[str]] = None) -> int:
+    parser = argparse.ArgumentParser(description="Inspect the flat trace field order used by the agent.")
+    parser.parse_args(argv)
+    print("\n".join(TRACE_FIELD_NAMES))
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/agent_base/utils.py b/agent_base/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..10c7821c1136fe221ac4a2f5a45cb59ea7e3af19
--- /dev/null
+++ b/agent_base/utils.py
@@ -0,0 +1,247 @@
+import argparse
+import base64
+import json
+import os
+import re
+import shutil
+import shlex
+import sys
+from pathlib import Path
+from typing import Any, Iterable, Optional, Union
+
+
+PROJECT_ROOT = Path(__file__).resolve().parent.parent
+_DOTENV_LAST_LOADED: dict[tuple[str, str], str] = {}
+REQUIRED_ENV_VARS = (
+    "API_KEY",
+    "API_BASE",
+    "MODEL_NAME",
+    "SERPER_KEY_ID",
+    "JINA_API_KEYS",
+    "MINERU_TOKEN",
+)
+IMAGE_INPUT_REL_DIR = Path("inputs") / "images"
+MAX_INPUT_IMAGE_BYTES = 25 * 1024 * 1024
+IMAGE_MIME_BY_EXTENSION = {
+    ".png": "image/png",
+    ".jpg": "image/jpeg",
+    ".jpeg": "image/jpeg",
+    ".webp": "image/webp",
+    ".gif": "image/gif",
+    ".bmp": "image/bmp",
+}
+
+
+class MissingRequiredEnvError(RuntimeError):
+    pass
+
+
+def load_dotenv(path: Union[str, Path]) -> None:
+    env_path = Path(path).expanduser()
+    if not env_path.exists():
+        return
+    env_id = str(env_path.resolve())
+    for raw_line in env_path.read_text(encoding="utf-8").splitlines():
+        line = raw_line.strip()
+        if not line or line.startswith("#"):
+            continue
+        if line.startswith("export "):
+            line = line[len("export "):].strip()
+        if "=" not in line:
+            continue
+        key, value = line.split("=", 1)
+        key = key.strip()
+        value = value.strip()
+        if not key:
+            continue
+        if value:
+            lexer = shlex.shlex(value, posix=True)
+            lexer.whitespace = ""
+            lexer.commenters = "#"
+            parsed_value = "".join(list(lexer)).strip()
+        else:
+            parsed_value = ""
+        marker = (env_id, key)
+        existing = os.environ.get(key)
+        previous_loaded = _DOTENV_LAST_LOADED.get(marker)
+        if existing is None or existing == previous_loaded:
+            os.environ[key] = parsed_value
+        _DOTENV_LAST_LOADED[marker] = parsed_value
+
+
+def env_flag(name: str) -> bool:
+    return os.getenv(name, "").lower() in {"1", "true", "yes", "on"}
+
+
+def missing_required_env(required: tuple[str, ...] = REQUIRED_ENV_VARS) -> list[str]:
+    return [key for key in required if not os.getenv(key, "").strip()]
+
+
+def require_required_env(context: str = "ResearchHarness") -> None:
+    missing = missing_required_env()
+    if not missing:
+        return
+    raise MissingRequiredEnvError(
+        f"{context} missing required environment variables: {', '.join(missing)}. "
+        "Set them in .env or the process environment before running."
+    )
+
+
+def read_role_prompt_files(paths: Iterable[str]) -> str:
+    blocks: list[str] = []
+    for raw_path in paths:
+        path_text = str(raw_path).strip()
+        if not path_text:
+            continue
+        path = Path(path_text).expanduser()
+        if not path.exists():
+            raise ValueError(f"Role prompt file does not exist: {path}")
+        if not path.is_file():
+            raise ValueError(f"Role prompt path is not a file: {path}")
+        blocks.append(path.read_text(encoding="utf-8").strip())
+    return "\n\n".join(block for block in blocks if block.strip())
+
+
+def _safe_image_stem(name: str, fallback: str) -> str:
+    stem = re.sub(r"[^A-Za-z0-9_.-]+", "_", Path(name).stem).strip("._")
+    return stem or fallback
+
+
+def _unique_image_path(image_dir: Path, *, image_index: int, stem: str, suffix: str) -> Path:
+    base_name = f"image_{image_index:03d}_{stem}{suffix}"
+    candidate = image_dir / base_name
+    if not candidate.exists():
+        return candidate
+    counter = 1
+    while True:
+        candidate = image_dir / f"image_{image_index:03d}_{stem}_{counter}{suffix}"
+        if not candidate.exists():
+            return candidate
+        counter += 1
+
+
+def image_input_content_parts(data_url: str, saved_path: str, *, detail: str = "auto") -> list[dict[str, Any]]:
+    """Build standard initial content parts for a saved user image."""
+    return [
+        {"type": "text", "text": f"[User-provided image saved at {saved_path}]"},
+        {"type": "image_url", "image_url": {"url": data_url, "detail": detail or "auto"}},
+    ]
+
+
+def stage_image_bytes_for_input(
+    raw: bytes,
+    *,
+    workspace_root: Union[str, Path],
+    filename: str,
+    image_index: int,
+    suffix: str,
+    max_bytes: int = MAX_INPUT_IMAGE_BYTES,
+) -> str:
+    if not raw:
+        raise ValueError("image input is empty")
+    if len(raw) > max_bytes:
+        raise ValueError(f"image input exceeds {max_bytes} bytes")
+    normalized_suffix = suffix.lower()
+    if normalized_suffix not in IMAGE_MIME_BY_EXTENSION:
+        raise ValueError(f"unsupported image extension: {suffix}")
+    root = Path(workspace_root).expanduser().resolve()
+    image_dir = root / IMAGE_INPUT_REL_DIR
+    image_dir.mkdir(parents=True, exist_ok=True)
+    stem = _safe_image_stem(filename, f"image_{image_index:03d}")
+    dest = _unique_image_path(image_dir, image_index=image_index, stem=stem, suffix=normalized_suffix)
+    dest.write_bytes(raw)
+    return dest.relative_to(root).as_posix()
+
+
+def stage_image_file_for_input(
+    source_path: Union[str, Path],
+    *,
+    workspace_root: Union[str, Path],
+    image_index: int,
+    max_bytes: int = MAX_INPUT_IMAGE_BYTES,
+) -> tuple[str, str]:
+    source = Path(source_path).expanduser()
+    if not source.is_absolute():
+        source = (Path.cwd() / source).resolve()
+    else:
+        source = source.resolve()
+    if not source.exists():
+        raise ValueError(f"image path does not exist: {source}")
+    if not source.is_file():
+        raise ValueError(f"image path is not a file: {source}")
+    suffix = source.suffix.lower()
+    mime_type = IMAGE_MIME_BY_EXTENSION.get(suffix)
+    if mime_type is None:
+        raise ValueError(f"unsupported image extension for {source}; expected one of {', '.join(sorted(IMAGE_MIME_BY_EXTENSION))}")
+    size = source.stat().st_size
+    if size <= 0:
+        raise ValueError(f"image file is empty: {source}")
+    if size > max_bytes:
+        raise ValueError(f"image file exceeds {max_bytes} bytes: {source}")
+    root = Path(workspace_root).expanduser().resolve()
+    image_dir = root / IMAGE_INPUT_REL_DIR
+    image_dir.mkdir(parents=True, exist_ok=True)
+    stem = _safe_image_stem(source.name, f"image_{image_index:03d}")
+    dest = _unique_image_path(image_dir, image_index=image_index, stem=stem, suffix=suffix)
+    shutil.copyfile(source, dest)
+    rel_path = dest.relative_to(root).as_posix()
+    data_url = f"data:{mime_type};base64," + base64.b64encode(dest.read_bytes()).decode("ascii")
+    return rel_path, data_url
+
+
+def append_saved_image_paths_to_prompt(prompt: str, saved_paths: Iterable[str]) -> str:
+    paths = [str(path).strip() for path in saved_paths if str(path).strip()]
+    if not paths:
+        return prompt
+    lines = "\n".join(f"- {path}" for path in paths)
+    return (
+        f"{prompt.strip()}\n\n"
+        "The user attached image input. The images are saved locally inside the workspace:\n"
+        f"{lines}\n"
+        "Use the direct image input when the model supports vision. If tool-based inspection is needed, use ReadImage on the saved local paths."
+    )
+
+
+def safe_jsonable(value: Any) -> Any:
+    if isinstance(value, (str, int, float, bool)) or value is None:
+        return value
+    if isinstance(value, dict):
+        return {str(key): safe_jsonable(item) for key, item in value.items()}
+    if isinstance(value, (list, tuple)):
+        return [safe_jsonable(item) for item in value]
+    return str(value)
+
+
+def append_jsonl(path: Union[str, Path], record: dict[str, Any]) -> None:
+    output_path = Path(path)
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    with output_path.open("a", encoding="utf-8") as fp:
+        fp.write(json.dumps(record, ensure_ascii=False) + "\n")
+
+
+def read_text_lossy(path: Union[str, Path]) -> str:
+    file_path = Path(path)
+    try:
+        return file_path.read_text(encoding="utf-8")
+    except UnicodeDecodeError:
+        return file_path.read_text(encoding="utf-8", errors="replace")
+
+
+def main(argv: Optional[list[str]] = None) -> int:
+    parser = argparse.ArgumentParser(description="Inspect shared agent_base utilities.")
+    parser.add_argument("--dotenv", help="Optional dotenv path to load before printing the summary.")
+    args = parser.parse_args(argv)
+
+    if args.dotenv:
+        load_dotenv(args.dotenv)
+
+    payload = {
+        "project_root": str(PROJECT_ROOT),
+        "dotenv_loaded": bool(args.dotenv),
+    }
+    print(json.dumps(payload, ensure_ascii=False, indent=2))
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main(sys.argv[1:]))
diff --git a/api/__init__.py b/api/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..6a7ff270c1442596b9c596302478dcbe86f6531a
--- /dev/null
+++ b/api/__init__.py
@@ -0,0 +1 @@
+"""OpenAI-compatible API helpers for ResearchHarness."""
diff --git a/api/openai_server.py b/api/openai_server.py
new file mode 100644
index 0000000000000000000000000000000000000000..d330a7fad711ece9bf61c0e45f49996e3256538f
--- /dev/null
+++ b/api/openai_server.py
@@ -0,0 +1,518 @@
+from __future__ import annotations
+
+import base64
+import binascii
+import datetime
+import json
+import re
+import time
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Optional
+from uuid import uuid4
+
+import uvicorn
+from fastapi import Body, FastAPI, Request
+from fastapi.responses import JSONResponse
+
+from agent_base.react_agent import (
+    AVAILABLE_TOOL_MAP,
+    MultiTurnReactAgent,
+    assistant_text_content,
+    default_llm_config,
+    model_supports_runtime_image_parts,
+)
+from agent_base.tools.tooling import normalize_workspace_root
+from agent_base.utils import append_jsonl, image_input_content_parts, read_role_prompt_files, safe_jsonable
+
+
+DATA_IMAGE_RE = re.compile(r"^data:(image/[A-Za-z0-9.+-]+);base64,(.*)$", re.DOTALL)
+IMAGE_EXTENSIONS = {
+    "image/png": ".png",
+    "image/jpeg": ".jpg",
+    "image/jpg": ".jpg",
+    "image/webp": ".webp",
+    "image/gif": ".gif",
+}
+DEFAULT_MAX_IMAGE_BYTES = 25 * 1024 * 1024
+
+INPUT_WRAPPER_SYSTEM_PROMPT = """You are the ResearchHarness input wrapper.
+
+Convert the user's OpenAI-compatible chat request into a stable task for a
+tool-using ResearchHarness agent.
+
+Return only a JSON object with these string fields:
+- agent_instruction: the task the agent should solve, including all substantive question details.
+- output_contract: the final output format or schema requested by the user. If no strict format is requested, say "plain text".
+- wrapper_notes: brief notes about images, constraints, or benchmark-specific requirements.
+
+Rules:
+- Do not answer the task.
+- Do not remove substantive constraints.
+- Keep strict final formatting requirements out of agent_instruction when possible.
+- If images are listed, mention their saved paths in agent_instruction.
+"""
+
+OUTPUT_WRAPPER_SYSTEM_PROMPT = """You are the ResearchHarness output wrapper.
+
+Format the ResearchHarness agent result so it satisfies the user's requested
+final output contract.
+
+Rules:
+- Return only the final answer requested by the user.
+- Do not add markdown fences unless the user explicitly required them.
+- Do not solve the task again.
+- Do not introduce facts not present in the agent result.
+- Make the answer complete and self-contained for a remote user or evaluator.
+- The answer may mention workspace files when useful, but it must not depend on
+  local files as the only carrier of the answer.
+- Include the actual answer and any necessary evidence or solution steps in the
+  returned text.
+- If reasoning or evidence is required, summarize it directly in the final
+  answer according to the requested format.
+- If the requested format is JSON, return valid JSON only.
+- If the agent result does not contain enough information, produce the best
+  contract-compliant failure answer instead of inventing evidence.
+"""
+
+
+class OpenAICompatError(Exception):
+    def __init__(self, status_code: int, message: str, error_type: str = "invalid_request_error"):
+        super().__init__(message)
+        self.status_code = status_code
+        self.message = message
+        self.error_type = error_type
+
+
+@dataclass
+class ServerConfig:
+    api_runs_dir: Path
+    role_prompt: str = ""
+    host: str = "127.0.0.1"
+    port: int = 8686
+    input_wrapper: bool = True
+    output_wrapper: bool = True
+
+
+@dataclass
+class PreparedInput:
+    wrapper_messages: list[dict[str, str]]
+    initial_content_parts: list[dict[str, Any]]
+    image_paths: list[str]
+
+
+def openai_error_response(exc: OpenAICompatError) -> JSONResponse:
+    return JSONResponse(
+        status_code=exc.status_code,
+        content={"error": {"message": exc.message, "type": exc.error_type}},
+    )
+
+
+def make_chat_completion_response(*, request_id: str, model: str, content: str) -> dict[str, Any]:
+    return {
+        "id": request_id,
+        "object": "chat.completion",
+        "created": int(time.time()),
+        "model": model,
+        "choices": [
+            {
+                "index": 0,
+                "message": {"role": "assistant", "content": content},
+                "finish_reason": "stop",
+            }
+        ],
+    }
+
+
+def validate_chat_payload(payload: Any) -> dict[str, Any]:
+    if not isinstance(payload, dict):
+        raise OpenAICompatError(400, "Request body must be a JSON object.")
+    if payload.get("stream") is True:
+        raise OpenAICompatError(400, "Streaming is not supported by this synchronous endpoint.")
+    try:
+        n_value = int(payload.get("n", 1) or 1)
+    except (TypeError, ValueError) as exc:
+        raise OpenAICompatError(400, "n must be an integer.") from exc
+    if n_value != 1:
+        raise OpenAICompatError(400, "Only n=1 is supported.")
+    model = str(payload.get("model", "")).strip()
+    if not model:
+        raise OpenAICompatError(400, "model is required.")
+    messages = payload.get("messages")
+    if not isinstance(messages, list) or not messages:
+        raise OpenAICompatError(400, "messages must be a non-empty list.")
+    return payload
+
+
+def prepare_openai_input(messages: list[Any], workspace_root: Path) -> PreparedInput:
+    wrapper_messages: list[dict[str, str]] = []
+    initial_content_parts: list[dict[str, Any]] = []
+    image_paths: list[str] = []
+    image_dir = workspace_root / "inputs" / "images"
+    image_index = 0
+
+    for message in messages:
+        if not isinstance(message, dict):
+            raise OpenAICompatError(400, "Each message must be an object.")
+        role = str(message.get("role", "")).strip()
+        if role not in {"system", "user", "assistant"}:
+            raise OpenAICompatError(400, f"Unsupported message role: {role!r}.")
+        content = message.get("content", "")
+        text_parts: list[str] = []
+        if isinstance(content, str):
+            text_parts.append(content)
+        elif isinstance(content, list):
+            for part in content:
+                if not isinstance(part, dict):
+                    raise OpenAICompatError(400, "Multimodal content parts must be objects.")
+                part_type = str(part.get("type", "")).strip()
+                if part_type == "text":
+                    text_parts.append(str(part.get("text", "")))
+                elif part_type == "image_url":
+                    image_url = part.get("image_url")
+                    if not isinstance(image_url, dict):
+                        raise OpenAICompatError(400, "image_url content must contain an image_url object.")
+                    url = str(image_url.get("url", "")).strip()
+                    detail = str(image_url.get("detail", "auto") or "auto")
+                    rel_path = save_data_image(
+                        url,
+                        workspace_root=workspace_root,
+                        image_dir=image_dir,
+                        image_index=image_index,
+                    )
+                    image_index += 1
+                    image_paths.append(rel_path)
+                    text_parts.append(f"[image saved at {rel_path}]")
+                    initial_content_parts.extend(image_input_content_parts(url, rel_path, detail=detail))
+                else:
+                    raise OpenAICompatError(400, f"Unsupported content part type: {part_type!r}.")
+        else:
+            raise OpenAICompatError(400, "message content must be a string or a list of content parts.")
+        wrapper_messages.append({"role": role, "content": "\n".join(part for part in text_parts if part)})
+
+    return PreparedInput(
+        wrapper_messages=wrapper_messages,
+        initial_content_parts=initial_content_parts,
+        image_paths=image_paths,
+    )
+
+
+def save_data_image(url: str, *, workspace_root: Path, image_dir: Path, image_index: int) -> str:
+    match = DATA_IMAGE_RE.match(url)
+    if not match:
+        raise OpenAICompatError(
+            400,
+            "Only data:image/...;base64,... image_url inputs are supported in the first API version.",
+        )
+    mime_type = match.group(1).lower()
+    extension = IMAGE_EXTENSIONS.get(mime_type)
+    if extension is None:
+        raise OpenAICompatError(400, f"Unsupported image MIME type: {mime_type}.")
+    try:
+        image_bytes = base64.b64decode(match.group(2), validate=True)
+    except (binascii.Error, ValueError) as exc:
+        raise OpenAICompatError(400, "Invalid base64 image data.") from exc
+    if len(image_bytes) > DEFAULT_MAX_IMAGE_BYTES:
+        raise OpenAICompatError(400, f"Image exceeds the {DEFAULT_MAX_IMAGE_BYTES} byte limit.")
+    image_dir.mkdir(parents=True, exist_ok=True)
+    filename = f"image_{image_index:03d}{extension}"
+    path = image_dir / filename
+    path.write_bytes(image_bytes)
+    return path.relative_to(workspace_root).as_posix()
+
+
+def wrapper_request_payload(*, prepared: PreparedInput, payload: dict[str, Any]) -> dict[str, Any]:
+    return {
+        "messages": prepared.wrapper_messages,
+        "saved_image_paths": prepared.image_paths,
+        "response_format": safe_jsonable(payload.get("response_format")),
+        "requested_model_label": str(payload.get("model", "")),
+    }
+
+
+def build_input_wrapper_messages(*, prepared: PreparedInput, payload: dict[str, Any]) -> list[dict[str, str]]:
+    return [
+        {"role": "system", "content": INPUT_WRAPPER_SYSTEM_PROMPT},
+        {
+            "role": "user",
+            "content": json.dumps(wrapper_request_payload(prepared=prepared, payload=payload), ensure_ascii=False, indent=2),
+        },
+    ]
+
+
+def build_passthrough_input_plan(*, prepared: PreparedInput, payload: dict[str, Any]) -> dict[str, str]:
+    conversation = "\n\n".join(
+        f"{message['role'].upper()}:\n{message['content']}" for message in prepared.wrapper_messages
+    ).strip()
+    response_format = payload.get("response_format")
+    output_contract = "Follow the final answer requirements in the original request."
+    if response_format is not None:
+        output_contract += "\nOpenAI response_format request:\n" + json.dumps(
+            safe_jsonable(response_format),
+            ensure_ascii=False,
+            indent=2,
+        )
+    return {
+        "agent_instruction": conversation or "Answer the user's request.",
+        "output_contract": output_contract,
+        "wrapper_notes": "Input wrapper disabled; the original normalized conversation was passed through directly.",
+    }
+
+
+def build_agent_prompt(input_plan: dict[str, Any], prepared: PreparedInput) -> str:
+    image_block = "\n".join(f"- {path}" for path in prepared.image_paths) if prepared.image_paths else "- none"
+    return (
+        "You are solving a user request through ResearchHarness.\n\n"
+        "Task for the agent:\n"
+        f"{str(input_plan.get('agent_instruction', '')).strip()}\n\n"
+        "User-provided images saved in this workspace:\n"
+        f"{image_block}\n\n"
+        "The original image content is attached to the initial user message when the backend model supports image parts. "
+        "The same images are also saved at the paths above so you may call ReadImage when visual inspection is needed.\n\n"
+        "Do not optimize your tool-use loop for the final output schema. Solve the task completely, then finish with a complete, "
+        "self-contained internal final text that includes the actual answer, the evidence used, and any concise reasoning needed to understand it. "
+        "You may mention files you created or inspected, but the internal final text must not depend on local files as the only carrier of the answer.\n\n"
+        "Final output contract that will be enforced by a formatter after your run:\n"
+        f"{str(input_plan.get('output_contract', 'plain text')).strip()}\n\n"
+        "Wrapper notes:\n"
+        f"{str(input_plan.get('wrapper_notes', '')).strip()}"
+    )
+
+
+def build_output_wrapper_messages(
+    *,
+    prepared: PreparedInput,
+    payload: dict[str, Any],
+    input_plan: dict[str, Any],
+    agent_result_text: str,
+) -> list[dict[str, str]]:
+    output_payload = {
+        "original_messages": prepared.wrapper_messages,
+        "saved_image_paths": prepared.image_paths,
+        "output_contract": str(input_plan.get("output_contract", "plain text")),
+        "response_format": safe_jsonable(payload.get("response_format")),
+        "agent_result_text": agent_result_text,
+    }
+    return [
+        {"role": "system", "content": OUTPUT_WRAPPER_SYSTEM_PROMPT},
+        {"role": "user", "content": json.dumps(output_payload, ensure_ascii=False, indent=2)},
+    ]
+
+
+def extract_json_object(text: str) -> dict[str, Any]:
+    stripped = text.strip()
+    if stripped.startswith("```"):
+        stripped = re.sub(r"^```(?:json)?\s*", "", stripped, flags=re.IGNORECASE)
+        stripped = re.sub(r"\s*```$", "", stripped)
+    try:
+        parsed = json.loads(stripped)
+    except json.JSONDecodeError:
+        start = stripped.find("{")
+        end = stripped.rfind("}")
+        if start < 0 or end <= start:
+            raise OpenAICompatError(500, "Input wrapper did not return a JSON object.", "server_error") from None
+        try:
+            parsed = json.loads(stripped[start : end + 1])
+        except json.JSONDecodeError as exc:
+            raise OpenAICompatError(500, f"Input wrapper returned invalid JSON: {exc}", "server_error") from exc
+    if not isinstance(parsed, dict):
+        raise OpenAICompatError(500, "Input wrapper JSON must be an object.", "server_error")
+    if not str(parsed.get("agent_instruction", "")).strip():
+        raise OpenAICompatError(500, "Input wrapper JSON missing agent_instruction.", "server_error")
+    if not str(parsed.get("output_contract", "")).strip():
+        parsed["output_contract"] = "plain text"
+    parsed.setdefault("wrapper_notes", "")
+    return parsed
+
+
+def call_wrapper_text(
+    agent: MultiTurnReactAgent,
+    messages: list[dict[str, str]],
+    *,
+    max_output_tokens: Optional[int] = None,
+) -> str:
+    response = agent.call_compaction_api(messages, max_output_tokens=max_output_tokens)
+    if not isinstance(response, dict) or response.get("status") == "error":
+        error_text = response.get("error", "unknown wrapper error") if isinstance(response, dict) else str(response)
+        raise OpenAICompatError(500, error_text, "server_error")
+    text = assistant_text_content(response.get("content")).strip()
+    if not text:
+        raise OpenAICompatError(500, "Wrapper returned empty content.", "server_error")
+    return text
+
+
+def final_max_tokens(payload: dict[str, Any]) -> Optional[int]:
+    raw_value = payload.get("max_tokens", payload.get("max_completion_tokens"))
+    if raw_value is None:
+        return None
+    try:
+        value = int(raw_value)
+    except (TypeError, ValueError) as exc:
+        raise OpenAICompatError(400, "max_tokens must be an integer.") from exc
+    if value <= 0:
+        raise OpenAICompatError(400, "max_tokens must be positive.")
+    return value
+
+
+def append_api_event(trace_dir: Path, event: str, payload: dict[str, Any]) -> None:
+    append_jsonl(
+        trace_dir / "api_trace.jsonl",
+        {
+            "timestamp": int(time.time()),
+            "event": event,
+            "payload": safe_jsonable(payload),
+        },
+    )
+
+
+def run_chat_completion(payload: dict[str, Any], config: ServerConfig) -> dict[str, Any]:
+    payload = validate_chat_payload(payload)
+    request_id = "chatcmpl_" + uuid4().hex
+    run_id = "run_" + datetime.datetime.now().astimezone().strftime("%Y%m%d_%H%M%S") + "_" + uuid4().hex[:8]
+    run_root = config.api_runs_dir / run_id
+    agent_workspace = run_root / "agent_workspace"
+    trace_dir = run_root / "agent_trace"
+    agent_workspace.mkdir(parents=True, exist_ok=False)
+    trace_dir.mkdir(parents=True, exist_ok=False)
+    prepared = prepare_openai_input(payload["messages"], agent_workspace)
+    llm_config = default_llm_config()
+    backend_model = str(llm_config.get("model", ""))
+    if prepared.initial_content_parts and not model_supports_runtime_image_parts(backend_model):
+        raise OpenAICompatError(
+            400,
+            f"Backend model {backend_model!r} does not support image content parts.",
+        )
+
+    tool_names = [name for name in AVAILABLE_TOOL_MAP if name != "AskUser"]
+    agent = MultiTurnReactAgent(
+        function_list=tool_names,
+        llm=llm_config,
+        trace_dir=str(trace_dir),
+        role_prompt=config.role_prompt or None,
+    )
+
+    if config.input_wrapper:
+        input_wrapper_messages = build_input_wrapper_messages(prepared=prepared, payload=payload)
+        input_wrapper_text = call_wrapper_text(agent, input_wrapper_messages, max_output_tokens=1200)
+        input_plan = extract_json_object(input_wrapper_text)
+        append_api_event(
+            trace_dir,
+            "input_wrapper",
+            {
+                "enabled": True,
+                "request": input_wrapper_messages,
+                "response_text": input_wrapper_text,
+                "input_plan": input_plan,
+            },
+        )
+    else:
+        input_plan = build_passthrough_input_plan(prepared=prepared, payload=payload)
+        append_api_event(
+            trace_dir,
+            "input_wrapper",
+            {
+                "enabled": False,
+                "input_plan": input_plan,
+            },
+        )
+
+    agent_prompt = build_agent_prompt(input_plan, prepared)
+    session = agent._run_session(
+        agent_prompt,
+        workspace_root=str(agent_workspace),
+        initial_content_parts=prepared.initial_content_parts or None,
+    )
+    agent_result_text = str(session.get("result_text", "")).strip()
+    append_api_event(
+        trace_dir,
+        "agent_result",
+        {
+            "termination": session.get("termination", ""),
+            "result_text": agent_result_text,
+            "trace_path": session.get("trace_path", ""),
+        },
+    )
+
+    if config.output_wrapper:
+        output_wrapper_messages = build_output_wrapper_messages(
+            prepared=prepared,
+            payload=payload,
+            input_plan=input_plan,
+            agent_result_text=agent_result_text,
+        )
+        final_text = call_wrapper_text(agent, output_wrapper_messages, max_output_tokens=final_max_tokens(payload))
+        append_api_event(
+            trace_dir,
+            "output_wrapper",
+            {
+                "enabled": True,
+                "request": output_wrapper_messages,
+                "response_text": final_text,
+            },
+        )
+    else:
+        final_text = agent_result_text
+        append_api_event(
+            trace_dir,
+            "output_wrapper",
+            {
+                "enabled": False,
+                "response_text": final_text,
+            },
+        )
+    return make_chat_completion_response(
+        request_id=request_id,
+        model=str(payload.get("model", "researchharness")),
+        content=final_text,
+    )
+
+
+def create_app(config: ServerConfig) -> FastAPI:
+    app = FastAPI(title="ResearchHarness OpenAI-Compatible API", version="1.0")
+
+    @app.exception_handler(OpenAICompatError)
+    async def _handle_openai_compat_error(request: Request, exc: OpenAICompatError) -> JSONResponse:
+        return openai_error_response(exc)
+
+    @app.get("/v1/health")
+    async def health() -> dict[str, Any]:
+        return {
+            "status": "ok",
+            "api_runs_dir": str(config.api_runs_dir),
+            "input_wrapper": config.input_wrapper,
+            "output_wrapper": config.output_wrapper,
+        }
+
+    @app.post("/v1/chat/completions")
+    async def chat_completions(payload: dict[str, Any] = Body(...)) -> dict[str, Any]:
+        try:
+            return run_chat_completion(payload, config)
+        except OpenAICompatError:
+            raise
+        except Exception as exc:
+            raise OpenAICompatError(500, f"ResearchHarness API error: {exc}", "server_error") from exc
+
+    return app
+
+
+def serve(
+    *,
+    api_runs_dir: str,
+    host: str = "127.0.0.1",
+    port: int = 8686,
+    role_prompt_files: Optional[list[str]] = None,
+    input_wrapper: bool = True,
+    output_wrapper: bool = True,
+) -> None:
+    root = normalize_workspace_root(api_runs_dir)
+    role_prompt = read_role_prompt_files(role_prompt_files or [])
+    config = ServerConfig(
+        api_runs_dir=root,
+        role_prompt=role_prompt,
+        host=host,
+        port=port,
+        input_wrapper=input_wrapper,
+        output_wrapper=output_wrapper,
+    )
+    app = create_app(config)
+    uvicorn.run(app, host=host, port=port)
diff --git a/api_runs/.gitkeep b/api_runs/.gitkeep
new file mode 100644
index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc
--- /dev/null
+++ b/api_runs/.gitkeep
@@ -0,0 +1 @@
+
diff --git a/app.py b/app.py
new file mode 100644
index 0000000000000000000000000000000000000000..361b938c965b1635d206b58e4631973859251178
--- /dev/null
+++ b/app.py
@@ -0,0 +1,54 @@
+"""Hugging Face Space entrypoint for ResearchHarness."""
+
+from __future__ import annotations
+
+import os
+from pathlib import Path
+
+import uvicorn
+
+from agent_base.utils import read_role_prompt_files
+from frontend.local_server import app, configure_frontend
+
+
+def _int_env(name: str, default: int) -> int:
+    raw = os.getenv(name, "").strip()
+    if not raw:
+        return default
+    try:
+        return int(raw)
+    except ValueError as exc:
+        raise ValueError(f"{name} must be an integer, got {raw!r}") from exc
+
+
+def _role_prompt_files() -> list[str]:
+    raw = os.getenv("RH_ROLE_PROMPT_FILES", "").strip()
+    if not raw:
+        return []
+    return [item for item in raw.split(os.pathsep) if item]
+
+
+def configure_space() -> None:
+    runs_dir = Path(os.getenv("RH_SPACE_RUNS_DIR", "/tmp/researchharness_space/runs")).expanduser()
+    role_prompt = read_role_prompt_files(_role_prompt_files())
+    configure_frontend(
+        role_prompt=role_prompt,
+        managed_runs_dir=str(runs_dir),
+        cleanup_retention_seconds=_int_env("RH_SPACE_RETENTION_SECONDS", 6 * 60 * 60),
+        cleanup_max_runs=_int_env("RH_SPACE_MAX_RUNS", 40),
+        cleanup_interval_seconds=_int_env("RH_SPACE_CLEANUP_INTERVAL_SECONDS", 15 * 60),
+    )
+
+
+configure_space()
+
+
+def main() -> int:
+    host = os.getenv("HOST", "0.0.0.0")
+    port = _int_env("PORT", 7860)
+    uvicorn.run(app, host=host, port=port, reload=False)
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/benchmarks/QA/README.md b/benchmarks/QA/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..c8b4de7e6efe168d8f7f723b532f7c436e83e762
--- /dev/null
+++ b/benchmarks/QA/README.md
@@ -0,0 +1,102 @@
+# QA / VQA Benchmarks
+
+This directory documents the lightweight ResearchHarness contract for
+question-answering benchmarks, including plain-text QA and multimodal VQA-style
+tasks.
+
+The recommended integration is the OpenAI-compatible synchronous API server:
+
+```bash
+python3 /abs/path/to/ResearchHarness/run_server.py \
+  --api-runs-dir ./api_runs
+```
+
+For QA/VQA benchmark runs, optionally add this benchmark role overlay:
+
+```bash
+python3 /abs/path/to/ResearchHarness/run_server.py \
+  --api-runs-dir ./api_runs \
+  --role-prompt-file /abs/path/to/ResearchHarness/benchmarks/QA/role_prompt.md
+```
+
+Each request creates a fresh run directory:
+
+```text
+./api_runs/
+`-- run_YYYYMMDD_HHMMSS_<random>/
+    |-- agent_workspace/          # visible to the agent
+    |   `-- inputs/
+    |       `-- images/           # user-provided images, when present
+    `-- agent_trace/              # server-side trace and session state
+        |-- api_trace.jsonl
+        |-- trace_*.jsonl
+        `-- _session_state.json
+```
+
+The input and output LLM wrappers are enabled by default:
+
+- `--input-wrapper` / `--no-input-wrapper` controls the input normalization pass.
+- `--output-wrapper` / `--no-output-wrapper` controls the final answer formatting pass.
+
+Strict-format benchmarks should usually keep both wrappers enabled. To return
+the agent's direct final text instead, run:
+
+```bash
+python3 /abs/path/to/ResearchHarness/run_server.py \
+  --api-runs-dir ./api_runs \
+  --no-input-wrapper \
+  --no-output-wrapper
+```
+
+External benchmark runners can then use the regular OpenAI SDK with:
+
+```python
+from openai import OpenAI
+
+client = OpenAI(api_key="unused", base_url="http://127.0.0.1:8686/v1")
+
+response = client.chat.completions.create(
+    model="researchharness",
+    messages=[{"role": "user", "content": "Answer the question."}],
+)
+
+answer = response.choices[0].message.content
+```
+
+## Multimodal Input
+
+For image benchmarks, send OpenAI-style content parts. The first API version
+supports one or more `data:image/...;base64,...` URLs in the same request.
+
+```python
+response = client.chat.completions.create(
+    model="researchharness",
+    messages=[
+        {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": "What is shown? Return JSON with key answer."},
+                {"type": "image_url", "image_url": {"url": data_url}},
+            ],
+        }
+    ],
+)
+```
+
+The API saves each submitted image under `agent_workspace/inputs/images/`,
+passes the image content to the first ResearchHarness model call when the
+backend model supports image parts, and includes each saved path in the
+agent-visible text.
+
+The returned answer should be self-contained for a remote evaluator. Workspace
+files may support the run, but the response should not only say to consult
+`answer.md`, `report.md`, an image file, or another local artifact.
+
+## Scope
+
+- The endpoint is synchronous and returns one final text answer.
+- Each request gets a separate workspace subdirectory.
+- The API uses an input wrapper, the ResearchHarness agent, and an output
+  wrapper so strict benchmark output formats do not destabilize the agent loop.
+- Streaming, async run status, artifact download, and remote image fetching are
+  intentionally out of scope for this minimal QA contract.
diff --git a/benchmarks/QA/role_prompt.md b/benchmarks/QA/role_prompt.md
new file mode 100644
index 0000000000000000000000000000000000000000..1dcc3d55d2d3ea636e2403c30573ee74785da3f0
--- /dev/null
+++ b/benchmarks/QA/role_prompt.md
@@ -0,0 +1,31 @@
+# Benchmark Role Overlay
+
+You are running inside ResearchHarness for a QA or VQA benchmark.
+
+Behavior:
+- Solve the user's task directly and carefully.
+- Use tools only when they materially improve answer quality.
+- If the request includes saved image paths, inspect the image evidence when it
+  is needed for the answer.
+- Do not ask the user follow-up questions.
+- Do not stop with a plan. Produce the answer once enough evidence has been
+  gathered.
+- It is acceptable to explain what evidence was used in the agent's internal
+  final text; a downstream formatter will enforce the benchmark's exact output
+  contract.
+- Assume the remote evaluator only sees the returned text, not your workspace.
+- Your final text must be a complete, independent plain-text answer.
+- Include the actual answer to the original question.
+- Include supporting evidence, calculations, or reasoning steps when they are
+  needed to make the answer understandable.
+- In this benchmark role, do not rely on local workspace files as the answer.
+  Files such as `answer.md`, `report.md`, images, or other artifacts may support
+  your work, but the returned text itself must contain the answer a remote
+  evaluator needs.
+
+For visual tasks:
+- Prefer the attached image content when it is available in the model input.
+- Use `ReadImage` on saved image paths when additional visual inspection is
+  needed or when the prompt explicitly asks you to inspect local image files.
+- Do not invent visual details that are not supported by the image or tool
+  output.
diff --git a/benchmarks/README.md b/benchmarks/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..2868c293a88e596a64c9ac549ee3f5e246c66a1d
--- /dev/null
+++ b/benchmarks/README.md
@@ -0,0 +1,18 @@
+# Benchmarks
+
+This folder records benchmark-specific integration contracts that live
+**outside** `agent_base` so the core harness stays generic, lightweight, and
+fair across different evaluations.
+
+| Benchmark | Directory | Tracked contract |
+| --- | --- | --- |
+| ResearchClawBench | `benchmarks/ResearchClawBench/` | `README.md` + `role_prompt.md` + `adapter.py` |
+| QA / VQA-style benchmarks | `benchmarks/QA/` | `README.md` + `role_prompt.md` |
+
+## Notes
+
+- `agent_base/` stays focused on the reusable harness runtime.
+- Benchmark-specific prompts, adapters, and integration notes should live under
+  their own benchmark subdirectory.
+- Local benchmark helpers may exist for private experimentation, but they do
+  not define the formal external integration contract.
diff --git a/benchmarks/ResearchClawBench/README.md b/benchmarks/ResearchClawBench/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..12b50cba4d0695615bb7ab28ee1b50f00f9f421f
--- /dev/null
+++ b/benchmarks/ResearchClawBench/README.md
@@ -0,0 +1,44 @@
+# ResearchClawBench
+
+This directory contains the tracked files needed to document how `ResearchHarness`
+should be integrated into `ResearchClawBench`.
+
+ResearchHarness is intended to serve here as a **general and fair execution
+substrate** for tool-using LLM evaluation, while `ResearchClawBench` remains in
+charge of task construction, hidden-answer isolation, and scoring.
+
+## Recommended `agents.json` Entry
+
+Use a single direct command that launches the thin top-level ResearchHarness
+entrypoint.
+
+```json
+{
+  "researchharness": {
+    "label": "ResearchHarness",
+    "icon": "H",
+    "logo": "/static/logos/rh.svg",
+    "cmd": "python3 /abs/path/to/ResearchHarness/run_agent.py <PROMPT> --workspace-root <WORKSPACE> --role-prompt-file /abs/path/to/ResearchHarness/benchmarks/ResearchClawBench/role_prompt.md --trace-dir <WORKSPACE>"
+  }
+}
+```
+
+## Why This Shape
+
+- `ResearchClawBench` already prepares the workspace, writes `INSTRUCTIONS.md`,
+  and isolates hidden checklist data.
+- `ResearchHarness` should only execute the agent through a stable harness
+  interface.
+- The command stays unchanged. The entrypoint automatically selects the
+  lightweight adapter in `benchmarks/ResearchClawBench/adapter.py` when this
+  benchmark role prompt is used.
+
+## Notes
+
+- Replace `/abs/path/to/ResearchHarness/` with the real local checkout path.
+- The command should stay one-line and non-interactive.
+- The adapter prevents premature termination on long tasks by refusing to accept
+  plain-text completion before `report/report.md` exists in the workspace.
+- The adapter excludes `AskUser`; RCB runs must remain fully non-interactive.
+- Any local batch helpers or ad hoc benchmark scripts should remain untracked
+  and live outside the formal integration contract.
diff --git a/benchmarks/ResearchClawBench/adapter.py b/benchmarks/ResearchClawBench/adapter.py
new file mode 100644
index 0000000000000000000000000000000000000000..4785f87f56c440742e32ea93dc29ec097d4d4eac
--- /dev/null
+++ b/benchmarks/ResearchClawBench/adapter.py
@@ -0,0 +1,93 @@
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any, Optional, Sequence
+
+from agent_base.react_agent import AVAILABLE_TOOL_MAP, MultiTurnReactAgent
+from agent_base.tools.tooling import normalize_workspace_root
+
+
+class ResearchClawBenchAgent(MultiTurnReactAgent):
+    """
+    Lightweight benchmark adapter for ResearchClawBench.
+
+    The benchmark task is not complete until the run workspace contains the
+    canonical final report at report/report.md. Pure planning text without that
+    artifact should not terminate the agent loop.
+    """
+
+    required_report_relpath = Path("report") / "report.md"
+    forbidden_tool_names = {"AskUser"}
+
+    def __init__(self, function_list: Optional[Sequence[str]] = None, *args: Any, **kwargs: Any):
+        if function_list is None:
+            function_list = [
+                tool_name
+                for tool_name in AVAILABLE_TOOL_MAP
+                if tool_name not in self.forbidden_tool_names
+            ]
+        else:
+            function_list = [str(tool_name).strip() for tool_name in function_list if str(tool_name).strip()]
+            forbidden = sorted(set(function_list) & self.forbidden_tool_names)
+            if forbidden:
+                raise ValueError(f"Tools are not allowed in ResearchClawBench runs: {forbidden}")
+        super().__init__(function_list=list(function_list), *args, **kwargs)
+
+    def _required_report_path(self, workspace_root: Optional[str]) -> Path:
+        workspace = Path(normalize_workspace_root(workspace_root))
+        return workspace / self.required_report_relpath
+
+    def should_accept_plaintext_result(
+        self,
+        *,
+        result_text: str,
+        workspace_root: Optional[str],
+        messages: Sequence[dict[str, Any]],
+    ) -> bool:
+        if not self._required_report_path(workspace_root).exists():
+            return False
+        return super().should_accept_plaintext_result(
+            result_text=result_text,
+            workspace_root=workspace_root,
+            messages=messages,
+        )
+
+    def rejected_plaintext_result_message(
+        self,
+        *,
+        result_text: str,
+        workspace_root: Optional[str],
+        messages: Sequence[dict[str, Any]],
+    ) -> str:
+        if not self._required_report_path(workspace_root).exists():
+            return (
+                "The previous assistant turn was not accepted as the final result because "
+                "ResearchClawBench requires report/report.md and that file is still missing. "
+                "Continue working and use tool calls to produce or verify report/report.md before finishing."
+            )
+        return super().rejected_plaintext_result_message(
+            result_text=result_text,
+            workspace_root=workspace_root,
+            messages=messages,
+        )
+
+    def should_accept_terminal_error(
+        self,
+        *,
+        error_text: str,
+        workspace_root: Optional[str],
+        messages: Sequence[dict[str, Any]],
+    ) -> bool:
+        return self._required_report_path(workspace_root).exists()
+
+    def accepted_terminal_error_result_text(
+        self,
+        *,
+        error_text: str,
+        workspace_root: Optional[str],
+        messages: Sequence[dict[str, Any]],
+    ) -> str:
+        return (
+            "ResearchClawBench completion recovered after a terminal LLM/runtime error because "
+            "report/report.md already exists and the required final artifact has been produced."
+        )
diff --git a/benchmarks/ResearchClawBench/role_prompt.md b/benchmarks/ResearchClawBench/role_prompt.md
new file mode 100644
index 0000000000000000000000000000000000000000..dcf5edc06bd32e7818ea97b06db35f4e0b848381
--- /dev/null
+++ b/benchmarks/ResearchClawBench/role_prompt.md
@@ -0,0 +1,195 @@
+# Benchmark Role Overlay
+
+## Purpose
+
+You are running inside a benchmark-style scientific evaluation.
+
+Your job is not just to produce a plausible report. Your job is to produce a
+report whose claims are traceable to concrete artifacts in the workspace and
+whose methods match the task's named scientific commitments as closely as the
+environment allows.
+
+This benchmark is non-interactive. Do not use `AskUser` or attempt to ask the
+human for clarification. Resolve ambiguity from `INSTRUCTIONS.md`, workspace
+files, related work, and available local or web tools.
+
+## Method Contract
+
+- Parse the task into explicit methodological commitments early.
+- Before broad exploration, infer the likely target artifact families required by
+  the task, including:
+  - primary quantitative answers
+  - required comparison tables
+  - expected figure families
+  - interpretability artifacts
+  - subgroup or condition-specific outputs
+- If the task names a framework, protocol, comparison structure,
+  interpretability method, simulator, ablation, posterior treatment,
+  reconciliation step, or validation design, treat that as part of the
+  contract.
+- Do not silently replace an explicitly named method with a looser descriptive
+  analysis.
+- Save a concise contract summary to `outputs/method_contract.json`.
+- Save the inferred target artifact inventory to
+  `outputs/target_artifact_inventory.json`.
+- After reading the most relevant related-work papers, refresh both files if the
+  papers reveal additional named baselines, architectures, figure families,
+  comparison strata, or interpretability artifacts central to the task.
+- Save a concise related-work extraction to `outputs/related_work_contract.json`
+  whenever related work materially changes the contract or artifact inventory.
+
+## Capability Check
+
+- Before approximating or skipping a named method, check whether the needed
+  dependency, library, or runtime capability is available.
+- Save the result to `outputs/dependency_check.json`.
+- If a named method cannot be implemented exactly, state the exact limitation
+  and the fallback.
+- If the task centers on a named model family, simulator, architecture, or
+  analysis stack, do not quietly swap to a different family just because it is
+  easier. Either implement a minimally faithful version of the named approach
+  or make the deviation explicit before proceeding.
+
+## Evidence Discipline
+
+- Every major scientific claim should have at least one explicit supporting
+  artifact in `outputs/` or `report/images/`.
+- Export the exact tables, matrices, or JSON objects used to create each main
+  figure.
+- Add a dedicated validation subsection to the report that separates:
+  - what was verified directly from workspace data
+  - what came from related work
+  - what remains an assumption or limitation
+- Answer claim-recovery questions claim-by-claim rather than only with a broad
+  narrative.
+- Save a concise claim recovery table before finalizing the report.
+- When the task asks for quantitative constraints, limits, posterior summaries,
+  calibration values, or uncertainty summaries, save those values explicitly in
+  the requested variables and units rather than only through a proxy
+  transformation.
+- If the task ultimately asks for a direct constraint on a named target
+  quantity, prefer deriving and reporting that named quantity itself instead of
+  stopping at an intermediate proxy axis, surrogate scale, or nearby latent
+  variable whenever a defensible derivation is possible from workspace data and
+  related work.
+- If posterior samples are a primary input, report canonical distribution
+  summaries for each primary source, including mean and standard deviation,
+  unless those statistics are mathematically invalid for the variable.
+- If the task names a primary source, cohort, benchmark, or experimental arm,
+  produce at least one source-specific artifact for it before emphasizing only
+  combined or aggregated results.
+- If the task names a direct target quantity, threshold, or decision criterion,
+  export a compact result table that answers it directly before presenting
+  broader supporting analyses.
+
+## Related Work Use
+
+- Read `related_work/` early, but bounded.
+- Start with concise or bounded reads when papers are long.
+- Extract only task-relevant facts into notes or structured outputs.
+- If related work contains validation metrics, methodological caveats,
+  baselines, or target comparison axes that matter for the task, incorporate
+  them explicitly.
+- Prefer extracting from related work:
+  - named methods or architectures to reproduce or compare against
+  - target comparison axes and subgroup splits
+  - likely main figure families or panel structures
+  - explicit quantitative targets, thresholds, or calibration outputs
+
+## Figure And Comparison Fidelity
+
+- Prefer claim-driven figures over generic exploratory plots.
+- Infer likely figure families and comparison structures from the task and
+  related work.
+- If the task is about projections, calibration, method agreement, subgroup
+  trends, rankings, level-wise comparisons, or ablations, produce figures that
+  directly encode those structures.
+- Keep the main figure set compact: each main figure should support a specific
+  target claim.
+- If the task's core claim is source-specific, dataset-specific, or benchmark-
+  specific, include at least one main figure at that same granularity rather
+  than only a pooled or combined summary figure.
+- If the task implies a named figure family such as ablation curves, PR/ROC
+  curves, parity plots, subgroup heatmaps, saliency maps, architecture
+  diagrams, or level-wise comparisons, prioritize that family over a generic
+  substitute.
+
+## Group And Condition Preservation
+
+- If the task names groups, conditions, labs, sexes, environments, shells,
+  depth levels, or other comparison strata, preserve them in at least one
+  exported table or figure.
+- Do not silently collapse mixed categories if the scientific question depends
+  on them.
+- When subgroup structure matters over time, prefer a subgroup-by-time matrix
+  and save it.
+- If the task is a benchmark or model-comparison study across datasets,
+  baselines, cohorts, or conditions, export a compact comparison table with the
+  main metric reported as mean ± standard deviation whenever repeated runs,
+  folds, or stochastic training are part of the setup.
+- For multi-condition or multi-cohort tasks, save at least one artifact at the
+  per-condition granularity before merging across conditions.
+
+## Named Method Fidelity
+
+- If the task or related work defines a named mechanism, algorithm, or
+  protocol central to the scientific claim, save a fidelity checklist to
+  `outputs/method_fidelity_checklist.json`.
+- That checklist should capture:
+  - the exact definition
+  - assumptions
+  - invariants
+  - non-negotiable structural steps
+- Use it to verify whether the implemented method actually matches the named
+  mechanism.
+- If you deviate, explain exactly how and why in the report.
+- If the task revolves around a named architecture or protocol, capture the key
+  structural ingredients that distinguish it from nearby alternatives and check
+  them explicitly.
+
+## Small Sweeps And Ablations
+
+- If the named mechanism exposes a small discrete design variable, such as
+  levels, layers, stages, shells, bins, or ablation settings, run at least a
+  small sweep unless it is genuinely impossible from the available workspace.
+- If the task names a specific interpretability method such as SHAP,
+  permutation importance, saliency, or similar, produce at least one artifact
+  using that named method.
+- If the task claims improved interpretability, do not stop at aggregate metric
+  gains alone; produce at least one explicit interpretability artifact and tie
+  it back to domain-relevant entities, groups, or substructures named in the
+  task or related work.
+- If the task names multiple groups, labs, cohorts, or environments, prefer an
+  interpretability artifact that compares them directly instead of a single
+  pooled explanation.
+- If interpretability is central and the chosen model family supports a common
+  post hoc explanation method, do not stop at native coefficient or impurity
+  magnitudes alone. Add at least one post hoc explanation artifact such as
+  SHAP, permutation importance, saliency, attention attribution, or a similarly
+  standard method for that model family.
+
+## Finalization
+
+- Start `report/report.md` as soon as at least two core result families already
+  have concrete supporting artifacts in `outputs/` or `report/images/`.
+- Prefer an evidence-backed report draft over one more optional script, one
+  more polish pass, or one more non-essential figure.
+- Once the primary quantitative outputs, the main comparison figures, and the
+  core validation artifacts exist, write `report/report.md` immediately.
+- Do not postpone the report in order to chase optional supplementary figures,
+  extra exploratory analyses, or additional polish that is not required to
+  support the task's main claims.
+- Treat optional supplementary work as lower priority than a complete,
+  evidence-backed report. If the report can already answer the task directly,
+  finish the report first and only then consider extras if there is clear
+  remaining need.
+- The final report should be tightly traceable.
+- Important numbers should be reproducible from saved artifacts in the
+  workspace.
+- Do not claim exact reproduction if only a rough approximation was achieved.
+- Before finalizing, check that the report contains direct answers to the main
+  requested outputs in the named variables, units, and confidence language of
+  the task, not only nearby surrogate quantities.
+- Before finalizing, verify that every primary entry in
+  `outputs/target_artifact_inventory.json` is either satisfied by a concrete
+  saved artifact or explicitly marked as unsatisfied with a reason.
diff --git a/docs/tutorial_en.md b/docs/tutorial_en.md
new file mode 100644
index 0000000000000000000000000000000000000000..8ac3ad94105721e0016375283253f5f33a9f9343
--- /dev/null
+++ b/docs/tutorial_en.md
@@ -0,0 +1,531 @@
+# ResearchHarness Tutorial
+
+This tutorial explains how to use ResearchHarness from the command line and as
+an OpenAI-compatible API service.
+
+ResearchHarness is a lightweight, general-purpose harness for tool-using LLM
+agents. It can be used as:
+
+- a command-line local agent,
+- a fair execution substrate for agent benchmarks,
+- an OpenAI-compatible synchronous API backend,
+- a personal assistant runtime for files, code, reports, PDFs, images, and web tasks.
+
+## 1. Install
+
+Clone the repository and install dependencies:
+
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+Python 3.10+ is recommended.
+
+## 2. Configure Environment Variables
+
+Copy `.env.example` to `.env` and fill in the required values.
+
+Required variables:
+
+| Variable | Meaning |
+| --- | --- |
+| `API_KEY` | API key for your OpenAI-compatible LLM provider. |
+| `API_BASE` | Base URL for the OpenAI-compatible chat-completions endpoint. |
+| `MODEL_NAME` | Main model used by ResearchHarness. |
+| `SERPER_KEY_ID` | Serper key for `WebSearch` and `ScholarSearch`: https://serper.dev/ |
+| `JINA_API_KEYS` | Jina key for `WebFetch`: https://jina.ai/ |
+| `MINERU_TOKEN` | MinerU token for `ReadPDF`: https://mineru.net/ |
+
+Optional variables:
+
+| Variable | Default | Meaning |
+| --- | --- | --- |
+| `WORKSPACE_ROOT` | `./workspace` | Default workspace root when no explicit workspace is passed. |
+| `MAX_LLM_CALL_PER_RUN` | `100` | Maximum LLM calls in one agent run. |
+| `MAX_AGENT_ROUNDS` | `100` | Maximum ReAct loop rounds. |
+| `MAX_AGENT_RUNTIME_SECONDS` | `9000` | Maximum wall-clock runtime for one agent run. |
+| `LLM_TIMEOUT_SECONDS` | `600` | Timeout for each LLM API request. |
+| `LLM_MAX_OUTPUT_TOKENS` | `10000` | Requested maximum output tokens. |
+| `MAX_INPUT_TOKENS` | `320000` | Input-token budget used by runtime accounting. |
+| `LLM_MAX_RETRIES` | `10` | Maximum retries for transient LLM API errors. |
+| `TEMPERATURE` | `0.6` | Main model temperature. |
+| `TOP_P` | `0.95` | Main model top-p. |
+| `PRESENCE_PENALTY` | `1.1` | Main model presence penalty when supported. |
+| `AUTO_COMPACT_TRIGGER_TOKENS` | `128k` | Context length threshold for automatic compaction. |
+| `IMAGE_PART_TOKEN_ESTIMATE` | `1536` | Token estimate for each image content part. |
+| `LLM_IMAGE_MAX_EDGE` | `1568` | Maximum image edge sent to multimodal models. |
+| `LLM_IMAGE_MAX_BYTES` | `524288` | Maximum compressed image payload size. |
+| `LLM_IMAGE_JPEG_QUALITY` | `85` | Initial JPEG quality for image compression. |
+| `DEBUG_AGENT` | `false` | Verbose agent-loop logs. |
+| `DEBUG_SEARCH` | `false` | Verbose WebSearch logs. |
+| `DEBUG_SCHOLAR` | `false` | Verbose ScholarSearch logs. |
+| `DEBUG_VISIT` | `false` | Verbose WebFetch logs. |
+
+Before real use, run:
+
+```bash
+python3 tests/test_tool_availability.py
+```
+
+All tools should pass. Missing service keys, missing dependencies, exhausted
+credits, or unavailable external tools should be treated as failures.
+
+If `WebSearch`, `ScholarSearch`, `WebFetch`, or `ReadPDF` fails with network,
+TLS, upload, download, or parsing errors, try disabling VPN/proxy and rerun the
+test.
+
+## 3. Command-Line Usage
+
+Run a simple prompt:
+
+```bash
+python3 run_agent.py "Who proposed the transformer architecture, and in what year was the paper published?"
+```
+
+Use an explicit workspace:
+
+```bash
+python3 run_agent.py "Summarize this project." \
+  --workspace-root ./workspace
+```
+
+You can replace `./workspace` with any other workspace directory.
+
+Save traces to a directory:
+
+```bash
+python3 run_agent.py "Summarize this project." \
+  --workspace-root ./workspace \
+  --trace-dir ./traces
+```
+
+You can replace `./traces` with any other trace directory.
+
+Without `--trace-dir`, CLI runs do not write a trace file.
+
+Append a role prompt:
+
+```bash
+python3 run_agent.py "Answer this QA task." \
+  --workspace-root ./workspace \
+  --role-prompt-file benchmarks/QA/role_prompt.md
+```
+
+Attach a local image:
+
+```bash
+python3 run_agent.py "Read the image and return JSON." \
+  --workspace-root ./workspace \
+  --images /path/to/image.png /path/to/second-image.png
+```
+
+Each image path must exist. RH copies images into `./workspace/inputs/images/`,
+sends them as initial `image_url` content parts, and adds each saved relative
+path to the user text so later rounds can call `ReadImage` on the same files.
+
+In an interactive terminal, CLI runs continue after a final answer and prompt
+for a follow-up. The follow-up run keeps the prior messages, tool results, and
+saved image path hints. During a running step, `Ctrl+C` interrupts the current
+run at the next safe point and returns to follow-up mode with context preserved.
+Press `Ctrl+C` at the follow-up prompt or send EOF to exit. Use `--no-chat` for
+strict one-shot behavior, or `--chat` to force follow-up mode.
+
+For browser-based local use, run `python3 run_frontend.py`. The frontend uses an
+existing workspace selected in the page, streams tool steps live, accepts one or
+more image attachments, and continues the current conversation after each final
+answer until you click **New chat**. While running, the send button becomes
+**Stop**; it interrupts at the next safe point and keeps the conversation
+context for the next message.
+
+### CLI Parameters
+
+| Parameter | Required | Meaning |
+| --- | --- | --- |
+| positional `prompt` | yes, unless `--prompt-file` is used | Prompt text. |
+| `--prompt-file PATH` | no | Read prompt text from a UTF-8 file. |
+| `--workspace-root PATH` | no | Workspace root for local file tools, Bash, and terminal sessions. Created if missing. |
+| `--trace-dir PATH` | no | Directory where `trace_*.jsonl` is written. |
+| `--role-prompt-file PATH` | no, repeatable | Append role-specific prompt text to the base system prompt. |
+| `--images PATH [PATH ...]` | no | Copy one or more local images into `inputs/images/` and attach them to the initial user message. |
+| `--chat` / `--no-chat` | no | Enable or disable CLI follow-up mode. Default: enabled only when stdin and stdout are interactive terminals. |
+
+## 4. OpenAI-Compatible API Server
+
+ResearchHarness can serve a synchronous OpenAI-compatible endpoint:
+
+```http
+POST /v1/chat/completions
+```
+
+This allows existing OpenAI SDK clients to call ResearchHarness by changing only
+`base_url`.
+
+### Start the Server
+
+Default deployment:
+
+```bash
+python3 run_server.py \
+  --api-runs-dir ./api_runs \
+  --host 127.0.0.1 \
+  --port 8686
+```
+
+QA/VQA benchmark deployment with a benchmark role overlay:
+
+```bash
+python3 run_server.py \
+  --api-runs-dir ./api_runs \
+  --host 127.0.0.1 \
+  --port 8686 \
+  --role-prompt-file benchmarks/QA/role_prompt.md
+```
+
+### API Server Parameters
+
+| Parameter | Required | Default | Meaning |
+| --- | --- | --- | --- |
+| `--api-runs-dir PATH` | yes | none | Parent directory for API runs. Each request gets one subdirectory. |
+| `--host HOST` | no | `127.0.0.1` | Host to bind. |
+| `--port PORT` | no | `8686` | Port to bind. |
+| `--role-prompt-file PATH` | no, repeatable | none | Append role prompt text to the base ResearchHarness prompt. |
+| `--input-wrapper` / `--no-input-wrapper` | no | enabled | Enable or disable the input LLM wrapper. |
+| `--output-wrapper` / `--no-output-wrapper` | no | enabled | Enable or disable the output LLM wrapper. |
+
+### Wrapper Modes
+
+Both wrappers are enabled by default.
+
+Strict-format benchmark mode:
+
+```bash
+python3 run_server.py \
+  --api-runs-dir ./api_runs \
+  --role-prompt-file benchmarks/QA/role_prompt.md \
+  --input-wrapper \
+  --output-wrapper
+```
+
+Direct agent mode:
+
+```bash
+python3 run_server.py \
+  --api-runs-dir ./api_runs \
+  --no-input-wrapper \
+  --no-output-wrapper
+```
+
+Simple input plus strict final formatting:
+
+```bash
+python3 run_server.py \
+  --api-runs-dir ./api_runs \
+  --no-input-wrapper \
+  --output-wrapper
+```
+
+The input wrapper rewrites the original user request into a stable task for the
+agent. The output wrapper formats the agent result to match the user's requested
+answer contract. Wrappers must not invent new facts; they only normalize input
+and format output.
+
+The API server is intentionally one request -> one answer. It does not keep a
+server-side conversation between HTTP requests. If an application needs API
+multi-turn behavior, keep that state in the client and send the needed prior
+context in later requests.
+
+```mermaid
+flowchart LR
+    U[User Input] --> IW[Input Wrapper LLM]
+    IW --> A[ResearchHarness Agent]
+    A --> OW[Output Wrapper LLM]
+    OW --> O[Output]
+```
+
+## 5. API Workspace Layout
+
+Each API request creates one run directory:
+
+```text
+./api_runs/
+`-- run_YYYYMMDD_HHMMSS_<random>/
+    |-- agent_workspace/
+    |   `-- inputs/
+    |       `-- images/
+    `-- agent_trace/
+        |-- api_trace.jsonl
+        |-- trace_*.jsonl
+        `-- _session_state.json
+```
+
+Meaning:
+
+| Path | Meaning |
+| --- | --- |
+| `run_YYYYMMDD_HHMMSS_<random>/` | Per-request run root. |
+| `agent_workspace/` | The only workspace visible to the agent. File tools, Bash, `ls`, and `cat` start here. |
+| `agent_workspace/inputs/images/` | User-provided images saved from API requests. |
+| `agent_trace/` | API trace, agent trace, and runtime records. |
+
+For multimodal requests, image inputs are handled in two ways at the same time:
+the image content is passed to the backend model as initial multimodal input
+when the selected model supports it, and each image is saved under
+`agent_workspace/inputs/images/`. Each saved relative path is also included in
+the agent-visible text, so later rounds can call `ReadImage` on a stable local
+path without repeatedly resending image bytes.
+
+This separation keeps user-visible tool work separate from server-side trace files.
+In API deployment mode, traces are saved by default: every request writes
+`api_trace.jsonl`, `trace_*.jsonl`, and `_session_state.json` under that run's `agent_trace/`
+directory.
+
+## 6. Text Request with OpenAI SDK
+
+```python
+from openai import OpenAI
+
+client = OpenAI(api_key="unused", base_url="http://127.0.0.1:8686/v1")
+
+response = client.chat.completions.create(
+    model="researchharness",
+    messages=[
+        {"role": "user", "content": "Answer in one sentence: what is 2 + 2?"}
+    ],
+)
+
+print(response.choices[0].message.content)
+```
+
+## 7. Multimodal Request with OpenAI SDK
+
+The first API version supports one or more `data:image/...;base64,...` image
+URLs in the same request. Remote image URLs and local file paths are
+intentionally not supported by the API server.
+
+The example below generates an image in memory and asks for JSON output.
+
+```python
+import base64
+from io import BytesIO
+
+from PIL import Image, ImageDraw
+from openai import OpenAI
+
+image = Image.new("RGB", (320, 120), "white")
+draw = ImageDraw.Draw(image)
+draw.text((40, 45), "7 + 5 = ?", fill="black")
+buffer = BytesIO()
+image.save(buffer, format="PNG")
+data_url = "data:image/png;base64," + base64.b64encode(buffer.getvalue()).decode("ascii")
+
+client = OpenAI(api_key="unused", base_url="http://127.0.0.1:8686/v1")
+
+response = client.chat.completions.create(
+    model="researchharness",
+    messages=[
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "text",
+                    "text": (
+                        "The image contains a simple arithmetic expression. "
+                        "Return JSON with exactly two keys: expression and answer."
+                    ),
+                },
+                {"type": "image_url", "image_url": {"url": data_url}},
+            ],
+        }
+    ],
+)
+
+print(response.choices[0].message.content)
+```
+
+Expected answer shape:
+
+```json
+{"expression":"7 + 5","answer":12}
+```
+
+## 8. API Request and Response Contract
+
+### `POST /v1/chat/completions`
+
+Supported request fields:
+
+| Field | Required | Meaning |
+| --- | --- | --- |
+| `model` | yes | Client-visible model label. It does not override `MODEL_NAME`; the backend model comes from `.env`. |
+| `messages` | yes | OpenAI-style chat messages. |
+| `stream` | no | Must be absent or `false`; streaming is not supported. |
+| `n` | no | Must be absent or `1`. |
+| `max_tokens` | no | Maximum output tokens for the output wrapper. |
+| `max_completion_tokens` | no | Alias accepted for output-wrapper max tokens. |
+| `response_format` | no | Passed to the wrappers as an output-format hint. |
+
+Supported message roles:
+
+| Role | Supported |
+| --- | --- |
+| `system` | yes |
+| `user` | yes |
+| `assistant` | yes |
+| `tool` | no |
+
+Supported content forms:
+
+```json
+{"role": "user", "content": "plain text"}
+```
+
+```json
+{
+  "role": "user",
+  "content": [
+    {"type": "text", "text": "question"},
+    {"type": "image_url", "image_url": {"url": "data:image/png;base64,..."}}
+  ]
+}
+```
+
+Response shape:
+
+```json
+{
+  "id": "chatcmpl_...",
+  "object": "chat.completion",
+  "created": 1770000000,
+  "model": "researchharness",
+  "choices": [
+    {
+      "index": 0,
+      "message": {
+        "role": "assistant",
+        "content": "final answer"
+      },
+      "finish_reason": "stop"
+    }
+  ]
+}
+```
+
+Callers usually only need:
+
+```python
+response.choices[0].message.content
+```
+
+### `GET /v1/health`
+
+Returns:
+
+```json
+{
+  "status": "ok",
+  "api_runs_dir": "./api_runs",
+  "input_wrapper": true,
+  "output_wrapper": true
+}
+```
+
+## 9. Tool Surface
+
+ResearchHarness currently includes:
+
+| Tool | Purpose |
+| --- | --- |
+| `Glob` | Discover files by pattern. |
+| `Grep` | Search text in files. |
+| `Read` | Read text files with bounds. |
+| `ReadPDF` | Parse PDFs with MinerU/structai. |
+| `ReadImage` | Inspect local image files and forward image content to vision-capable models. |
+| `Write` | Write files inside the workspace. |
+| `Edit` | Patch files inside the workspace. |
+| `Bash` | Run shell commands inside the workspace. |
+| `WebSearch` | Web search through Serper. |
+| `ScholarSearch` | Scholar-style search through Serper. |
+| `WebFetch` | Fetch and summarize webpages through Jina and the configured model. |
+| `AskUser` | Ask a human for clarification in interactive runs. Disabled by some benchmark adapters. |
+| `TerminalStart` / `TerminalWrite` / `TerminalRead` / `TerminalInterrupt` / `TerminalKill` | Persistent terminal sessions. |
+
+## 10. Traces and Records
+
+CLI runs write traces only when `--trace-dir` is provided. Without
+`--trace-dir`, CLI runs do not write a trace file.
+
+API runs write traces under:
+
+```text
+./api_runs/run_.../agent_trace/
+```
+
+Important files:
+
+| File | Meaning |
+| --- | --- |
+| `api_trace.jsonl` | Input wrapper, agent result, and output wrapper records. |
+| `trace_*.jsonl` | Flat agent runtime trace. |
+| `_session_state.json` | Current session state, written next to `trace_*.jsonl` when tracing is enabled. |
+
+The trace stores tool calls, tool results, LLM call capture payloads, compaction
+events, errors, and final termination state.
+
+## 11. Benchmark Adapters
+
+Tracked benchmark contracts live under `benchmarks/`.
+
+Current tracked adapters:
+
+| Benchmark | Directory | Notes |
+| --- | --- | --- |
+| ResearchClawBench | `benchmarks/ResearchClawBench/` | CLI integration with role prompt and adapter. |
+| QA / VQA | `benchmarks/QA/` | OpenAI-compatible API integration for text and multimodal QA. |
+
+Benchmark-specific behavior should stay outside `agent_base/`.
+
+## 12. Testing
+
+Recommended checks:
+
+```bash
+python3 tests/test_tool_availability.py
+python3 tests/test_openai_api_checks.py
+python3 tests/test_agent_extension_checks.py
+python3 tests/test_edge_case_checks.py
+python3 tests/test_toolchain_validation.py
+```
+
+If using conda:
+
+```bash
+/home/xwh/miniconda3/bin/conda run -n agent python3 tests/test_openai_api_checks.py
+```
+
+## 13. Troubleshooting
+
+Common issues:
+
+| Symptom | Likely cause | Action |
+| --- | --- | --- |
+| Missing required env error | `.env` is incomplete | Fill required variables. |
+| Web/PDF tools fail | VPN/proxy/TLS/service issue | Disable VPN/proxy and rerun tool availability tests. |
+| Image request returns 400 | Image URL is not a `data:image/...;base64,...` URL | Convert the image to a base64 data URL. |
+| Backend model rejects images | Model endpoint is not vision-capable | Use a vision-capable model or send text-only tasks. |
+| API request fails with streaming error | `stream=true` was sent | Use synchronous requests only. |
+| Unexpected output format | Output wrapper disabled or prompt under-specified | Enable `--output-wrapper` and state the desired format clearly. |
+
+## 14. Current Boundaries
+
+The first API version intentionally does not include:
+
+- streaming,
+- async run status,
+- cancellation,
+- artifact download endpoints,
+- remote image URL downloading,
+- user authentication,
+- multi-tenant access control.
+
+These can be added later as separate layers without changing the core harness
+loop.
diff --git a/docs/tutorial_zh.md b/docs/tutorial_zh.md
new file mode 100644
index 0000000000000000000000000000000000000000..4ecdfe41fd70078137f4a9b36d30a1a0cd9fcdb3
--- /dev/null
+++ b/docs/tutorial_zh.md
@@ -0,0 +1,511 @@
+# ResearchHarness 教程
+
+本文介绍如何通过命令行和 OpenAI-compatible API 使用 ResearchHarness。
+
+ResearchHarness 是一个轻量、通用的 tool-using LLM agent harness。它可以作为：
+
+- 命令行本地 agent，
+- agent benchmark 的公平执行底座，
+- OpenAI-compatible 同步 API 后端，
+- 面向代码、文件、报告、PDF、图片、网页任务的个人助手运行时。
+
+## 1. 安装
+
+安装依赖：
+
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+推荐使用 Python 3.10+。
+
+## 2. 配置环境变量
+
+复制 `.env.example` 为 `.env`，并填写必需变量。
+
+必需变量：
+
+| 变量 | 含义 |
+| --- | --- |
+| `API_KEY` | OpenAI-compatible LLM 服务的 API key。 |
+| `API_BASE` | OpenAI-compatible chat-completions endpoint 的 base URL。 |
+| `MODEL_NAME` | ResearchHarness 使用的主模型。 |
+| `SERPER_KEY_ID` | `WebSearch` 和 `ScholarSearch` 使用的 Serper key：https://serper.dev/ |
+| `JINA_API_KEYS` | `WebFetch` 使用的 Jina key：https://jina.ai/ |
+| `MINERU_TOKEN` | `ReadPDF` 使用的 MinerU token：https://mineru.net/ |
+
+可选变量：
+
+| 变量 | 默认值 | 含义 |
+| --- | --- | --- |
+| `WORKSPACE_ROOT` | `./workspace` | 未显式传入 workspace 时使用的默认 workspace root。 |
+| `MAX_LLM_CALL_PER_RUN` | `100` | 单次 agent run 最多允许的 LLM 调用次数。 |
+| `MAX_AGENT_ROUNDS` | `100` | ReAct loop 最大轮次。 |
+| `MAX_AGENT_RUNTIME_SECONDS` | `9000` | 单次 agent run 的最大运行秒数。 |
+| `LLM_TIMEOUT_SECONDS` | `600` | 单次 LLM API 请求超时时间。 |
+| `LLM_MAX_OUTPUT_TOKENS` | `10000` | 请求模型输出的最大 token 数。 |
+| `MAX_INPUT_TOKENS` | `320000` | runtime token accounting 使用的输入 token 预算。 |
+| `LLM_MAX_RETRIES` | `10` | 瞬时 LLM API 错误最大重试次数。 |
+| `TEMPERATURE` | `0.6` | 主模型 temperature。 |
+| `TOP_P` | `0.95` | 主模型 top-p。 |
+| `PRESENCE_PENALTY` | `1.1` | provider 支持时使用的 presence penalty。 |
+| `AUTO_COMPACT_TRIGGER_TOKENS` | `128k` | 自动上下文压缩触发阈值。 |
+| `IMAGE_PART_TOKEN_ESTIMATE` | `1536` | 每个 image content part 的 token 估计。 |
+| `LLM_IMAGE_MAX_EDGE` | `1568` | 发送给多模态模型的图片最大边长。 |
+| `LLM_IMAGE_MAX_BYTES` | `524288` | 发送给多模态模型的压缩图片最大字节数。 |
+| `LLM_IMAGE_JPEG_QUALITY` | `85` | 图片压缩时的初始 JPEG 质量。 |
+| `DEBUG_AGENT` | `false` | 打印 agent loop 详细调试日志。 |
+| `DEBUG_SEARCH` | `false` | 打印 WebSearch 调试日志。 |
+| `DEBUG_SCHOLAR` | `false` | 打印 ScholarSearch 调试日志。 |
+| `DEBUG_VISIT` | `false` | 打印 WebFetch 调试日志。 |
+
+正式使用前，先运行：
+
+```bash
+python3 tests/test_tool_availability.py
+```
+
+预期结果是全部工具通过。缺 key、缺依赖、服务额度耗尽、外部工具不可用都应该视为失败，不应 skip。
+
+如果 `WebSearch`、`ScholarSearch`、`WebFetch` 或 `ReadPDF` 出现 network、TLS、upload、download、PDF parsing 相关错误，优先尝试关闭 VPN / proxy 后重跑测试。
+
+## 3. 命令行使用
+
+直接运行一个 prompt：
+
+```bash
+python3 run_agent.py "Who proposed the transformer architecture, and in what year was the paper published?"
+```
+
+指定 workspace：
+
+```bash
+python3 run_agent.py "Summarize this project." \
+  --workspace-root ./workspace
+```
+
+`./workspace` 可以替换为任何其他 workspace 目录。
+
+保存 trace：
+
+```bash
+python3 run_agent.py "Summarize this project." \
+  --workspace-root ./workspace \
+  --trace-dir ./traces
+```
+
+`./traces` 可以替换为任何其他 trace 目录。
+
+如果不传 `--trace-dir`，CLI 运行不会写 trace 文件。
+
+追加 role prompt：
+
+```bash
+python3 run_agent.py "Answer this QA task." \
+  --workspace-root ./workspace \
+  --role-prompt-file benchmarks/QA/role_prompt.md
+```
+
+附加本地图片：
+
+```bash
+python3 run_agent.py "Read the image and return JSON." \
+  --workspace-root ./workspace \
+  --images /path/to/image.png /path/to/second-image.png
+```
+
+每个图片路径都必须存在。RH 会把图片复制到 `./workspace/inputs/images/`，
+作为初始 `image_url` content part 传给模型，同时把每个保存后的相对路径写进
+用户文本，让后续轮次可以用 `ReadImage` 重新读取这些图片。
+
+在交互式终端中，CLI 会在最终回答后继续等待 follow-up。下一轮会保留之前的
+messages、工具结果和图片保存路径提示。运行过程中按 `Ctrl+C` 会在下一个安全点
+中断当前 run，并带着上下文回到 follow-up 模式。在 follow-up 输入处按 `Ctrl+C`
+或发送 EOF 可退出。脚本或 benchmark 如果需要严格的一问一答行为，使用
+`--no-chat`；需要强制开启时使用 `--chat`。
+
+如果需要浏览器本地界面，运行 `python3 run_frontend.py`。前端使用页面中选择的
+已有 workspace，实时显示工具步骤，支持一张或多张图片附件，并在每次最终回答后
+继续当前对话，直到点击 **New chat**。运行中发送按钮会变成 **Stop**；它会在下一个
+安全点中断，并保留上下文用于下一条消息。
+
+### CLI 参数
+
+| 参数 | 是否必需 | 含义 |
+| --- | --- | --- |
+| 位置参数 `prompt` | 是，除非使用 `--prompt-file` | prompt 文本。 |
+| `--prompt-file PATH` | 否 | 从 UTF-8 文件读取 prompt。 |
+| `--workspace-root PATH` | 否 | 本地文件工具、Bash、Terminal 使用的 workspace root；不存在会自动创建。 |
+| `--trace-dir PATH` | 否 | 写入 `trace_*.jsonl` 的目录。 |
+| `--role-prompt-file PATH` | 否，可重复 | 追加 role-specific prompt 到 base system prompt。 |
+| `--images PATH [PATH ...]` | 否 | 把一张或多张本地图片复制到 `inputs/images/` 并附加到初始用户消息。 |
+| `--chat` / `--no-chat` | 否 | 开启或关闭 CLI follow-up 模式。默认只在 stdin 和 stdout 都是交互式终端时开启。 |
+
+## 4. OpenAI-Compatible API Server
+
+ResearchHarness 可以部署为同步 OpenAI-compatible endpoint：
+
+```http
+POST /v1/chat/completions
+```
+
+这样，现有 OpenAI SDK 客户端只需要修改 `base_url` 就可以调用 ResearchHarness。
+
+### 启动服务
+
+默认部署：
+
+```bash
+python3 run_server.py \
+  --api-runs-dir ./api_runs \
+  --host 127.0.0.1 \
+  --port 8686
+```
+
+QA/VQA benchmark 部署，可以额外加 benchmark role overlay：
+
+```bash
+python3 run_server.py \
+  --api-runs-dir ./api_runs \
+  --host 127.0.0.1 \
+  --port 8686 \
+  --role-prompt-file benchmarks/QA/role_prompt.md
+```
+
+### API Server 参数
+
+| 参数 | 是否必需 | 默认值 | 含义 |
+| --- | --- | --- | --- |
+| `--api-runs-dir PATH` | 是 | 无 | API runs 的父目录；每个请求会创建一个子目录。 |
+| `--host HOST` | 否 | `127.0.0.1` | 服务监听 host。 |
+| `--port PORT` | 否 | `8686` | 服务监听端口。 |
+| `--role-prompt-file PATH` | 否，可重复 | 无 | 追加 role prompt 到 base ResearchHarness prompt。 |
+| `--input-wrapper` / `--no-input-wrapper` | 否 | 开启 | 开启或关闭输入 LLM wrapper。 |
+| `--output-wrapper` / `--no-output-wrapper` | 否 | 开启 | 开启或关闭输出 LLM wrapper。 |
+
+### Wrapper 模式
+
+默认两个 wrapper 都开启。
+
+严格格式 benchmark 模式：
+
+```bash
+python3 run_server.py \
+  --api-runs-dir ./api_runs \
+  --role-prompt-file benchmarks/QA/role_prompt.md \
+  --input-wrapper \
+  --output-wrapper
+```
+
+直接 agent 模式：
+
+```bash
+python3 run_server.py \
+  --api-runs-dir ./api_runs \
+  --no-input-wrapper \
+  --no-output-wrapper
+```
+
+输入简单但最终答案需要严格格式：
+
+```bash
+python3 run_server.py \
+  --api-runs-dir ./api_runs \
+  --no-input-wrapper \
+  --output-wrapper
+```
+
+input wrapper 的作用是把原始用户请求整理为适合 agent 稳定执行的任务。output wrapper 的作用是把 agent 的最终结果整理为用户要求的答案格式。wrapper 不应该引入新事实，只做输入规范化和输出格式化。
+
+API server 有意保持一问一答：每个 HTTP 请求创建一次隔离 run，并返回一个最终
+assistant message。服务端不会跨请求保存 conversation state。如果应用需要 API
+多轮对话，应由客户端保存状态，并在后续请求中传入需要的上下文。
+
+```mermaid
+flowchart LR
+    U[User Input] --> IW[Input Wrapper LLM]
+    IW --> A[ResearchHarness Agent]
+    A --> OW[Output Wrapper LLM]
+    OW --> O[Output]
+```
+
+## 5. API Workspace 结构
+
+每个 API 请求会创建一个 run 目录：
+
+```text
+./api_runs/
+`-- run_YYYYMMDD_HHMMSS_<random>/
+    |-- agent_workspace/
+    |   `-- inputs/
+    |       `-- images/
+    `-- agent_trace/
+        |-- api_trace.jsonl
+        |-- trace_*.jsonl
+        `-- _session_state.json
+```
+
+含义：
+
+| 路径 | 含义 |
+| --- | --- |
+| `run_YYYYMMDD_HHMMSS_<random>/` | 单个请求对应的 run 根目录。 |
+| `agent_workspace/` | agent 唯一可见的 workspace；文件工具、Bash、`ls`、`cat` 都从这里开始。 |
+| `agent_workspace/inputs/images/` | API 请求中用户提交的图片。 |
+| `agent_trace/` | API trace、agent trace 和 runtime 记录。 |
+
+对于多模态请求，每张图片会同时走两条路径：当底层模型支持多模态输入时，
+图片内容会作为初始多模态输入直接传给模型；每张图片也会保存到
+`agent_workspace/inputs/images/`。每个保存后的相对路径也会写进 agent 可见文本，
+让后续轮次可以用 `ReadImage` 读取稳定的本地路径，而不是反复依赖内联图片字节。
+
+这个结构把 agent 可见工作目录和服务端记录目录隔离开。
+在 API 部署模式下，trace 默认保存：每个请求都会在自己的 `agent_trace/`
+目录下写入 `api_trace.jsonl`、`trace_*.jsonl` 和 `_session_state.json`。
+
+## 6. 纯文本 OpenAI SDK 请求
+
+```python
+from openai import OpenAI
+
+client = OpenAI(api_key="unused", base_url="http://127.0.0.1:8686/v1")
+
+response = client.chat.completions.create(
+    model="researchharness",
+    messages=[
+        {"role": "user", "content": "Answer in one sentence: what is 2 + 2?"}
+    ],
+)
+
+print(response.choices[0].message.content)
+```
+
+## 7. 多模态 OpenAI SDK 请求
+
+第一版 API 支持同一个请求中包含一张或多张 `data:image/...;base64,...` 形式的图片 URL。API server 不支持远程图片 URL，也不支持让外部请求直接传本地文件路径。
+
+下面的示例在代码中生成一张图片，并要求返回 JSON。
+
+```python
+import base64
+from io import BytesIO
+
+from PIL import Image, ImageDraw
+from openai import OpenAI
+
+image = Image.new("RGB", (320, 120), "white")
+draw = ImageDraw.Draw(image)
+draw.text((40, 45), "7 + 5 = ?", fill="black")
+buffer = BytesIO()
+image.save(buffer, format="PNG")
+data_url = "data:image/png;base64," + base64.b64encode(buffer.getvalue()).decode("ascii")
+
+client = OpenAI(api_key="unused", base_url="http://127.0.0.1:8686/v1")
+
+response = client.chat.completions.create(
+    model="researchharness",
+    messages=[
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "text",
+                    "text": (
+                        "The image contains a simple arithmetic expression. "
+                        "Return JSON with exactly two keys: expression and answer."
+                    ),
+                },
+                {"type": "image_url", "image_url": {"url": data_url}},
+            ],
+        }
+    ],
+)
+
+print(response.choices[0].message.content)
+```
+
+预期答案形状：
+
+```json
+{"expression":"7 + 5","answer":12}
+```
+
+## 8. API 请求与返回协议
+
+### `POST /v1/chat/completions`
+
+支持的请求字段：
+
+| 字段 | 是否必需 | 含义 |
+| --- | --- | --- |
+| `model` | 是 | 客户端看到的 model label；不会覆盖 `.env` 中的 `MODEL_NAME`。 |
+| `messages` | 是 | OpenAI-style chat messages。 |
+| `stream` | 否 | 必须不存在或为 `false`；当前不支持 streaming。 |
+| `n` | 否 | 必须不存在或为 `1`。 |
+| `max_tokens` | 否 | output wrapper 最大输出 token。 |
+| `max_completion_tokens` | 否 | output wrapper 最大输出 token 的兼容别名。 |
+| `response_format` | 否 | 作为输出格式提示传给 wrapper。 |
+
+支持的 message role：
+
+| Role | 是否支持 |
+| --- | --- |
+| `system` | 支持 |
+| `user` | 支持 |
+| `assistant` | 支持 |
+| `tool` | 不支持 |
+
+支持的 content 形式：
+
+```json
+{"role": "user", "content": "plain text"}
+```
+
+```json
+{
+  "role": "user",
+  "content": [
+    {"type": "text", "text": "question"},
+    {"type": "image_url", "image_url": {"url": "data:image/png;base64,..."}}
+  ]
+}
+```
+
+返回结构：
+
+```json
+{
+  "id": "chatcmpl_...",
+  "object": "chat.completion",
+  "created": 1770000000,
+  "model": "researchharness",
+  "choices": [
+    {
+      "index": 0,
+      "message": {
+        "role": "assistant",
+        "content": "final answer"
+      },
+      "finish_reason": "stop"
+    }
+  ]
+}
+```
+
+调用方通常只需要读取：
+
+```python
+response.choices[0].message.content
+```
+
+### `GET /v1/health`
+
+返回：
+
+```json
+{
+  "status": "ok",
+  "api_runs_dir": "./api_runs",
+  "input_wrapper": true,
+  "output_wrapper": true
+}
+```
+
+## 9. 工具能力
+
+ResearchHarness 当前包含：
+
+| 工具 | 用途 |
+| --- | --- |
+| `Glob` | 按模式发现文件。 |
+| `Grep` | 在文件中搜索文本。 |
+| `Read` | 有边界地读取文本文件。 |
+| `ReadPDF` | 通过 MinerU/structai 解析 PDF。 |
+| `ReadImage` | 读取本地图片，并把图片内容传给支持 vision 的模型。 |
+| `Write` | 在 workspace 内写文件。 |
+| `Edit` | 在 workspace 内 patch 文件。 |
+| `Bash` | 在 workspace 内执行 shell 命令。 |
+| `WebSearch` | 通过 Serper 进行网页搜索。 |
+| `ScholarSearch` | 通过 Serper 进行学术搜索。 |
+| `WebFetch` | 通过 Jina 和配置模型抓取、总结网页。 |
+| `AskUser` | 交互式运行中向用户提问；某些 benchmark adapter 会禁用。 |
+| `TerminalStart` / `TerminalWrite` / `TerminalRead` / `TerminalInterrupt` / `TerminalKill` | 持久终端会话。 |
+
+## 10. Trace 与记录
+
+CLI 运行只有在传入 `--trace-dir` 时才会写 trace。如果不传
+`--trace-dir`，CLI 运行不会写 trace 文件。
+
+API 运行时，记录在：
+
+```text
+./api_runs/run_.../agent_trace/
+```
+
+重要文件：
+
+| 文件 | 含义 |
+| --- | --- |
+| `api_trace.jsonl` | input wrapper、agent result、output wrapper 记录。 |
+| `trace_*.jsonl` | agent runtime 的 flat trace。 |
+| `_session_state.json` | 当前 session state；启用 trace 时和 `trace_*.jsonl` 写在同一目录。 |
+
+trace 会记录工具调用、工具结果、LLM call capture payload、context compaction、错误和终止状态。
+
+## 11. Benchmark Adapter
+
+tracked benchmark contract 放在 `benchmarks/` 下。
+
+当前 tracked adapter：
+
+| Benchmark | 目录 | 说明 |
+| --- | --- | --- |
+| ResearchClawBench | `benchmarks/ResearchClawBench/` | CLI 方式接入，包含 role prompt 和 adapter。 |
+| QA / VQA | `benchmarks/QA/` | OpenAI-compatible API 方式接入，支持纯文本和多模态 QA。 |
+
+benchmark-specific 行为应放在 `benchmarks/`，不要塞进 `agent_base/`。
+
+## 12. 测试
+
+推荐检查：
+
+```bash
+python3 tests/test_tool_availability.py
+python3 tests/test_openai_api_checks.py
+python3 tests/test_agent_extension_checks.py
+python3 tests/test_edge_case_checks.py
+python3 tests/test_toolchain_validation.py
+```
+
+如果使用 conda：
+
+```bash
+/home/xwh/miniconda3/bin/conda run -n agent python3 tests/test_openai_api_checks.py
+```
+
+## 13. 排障
+
+常见问题：
+
+| 现象 | 可能原因 | 处理 |
+| --- | --- | --- |
+| 缺少 required env | `.env` 不完整 | 填写所有必需变量。 |
+| Web/PDF 工具失败 | VPN/proxy/TLS/服务问题 | 关闭 VPN/proxy 后重跑工具可用性测试。 |
+| 图片请求返回 400 | 图片不是 `data:image/...;base64,...` | 把图片转成 base64 data URL。 |
+| 后端模型拒绝图片 | 当前模型 endpoint 不支持 vision | 换用支持 vision 的模型，或改为纯文本任务。 |
+| API 报 streaming 错误 | 请求里传了 `stream=true` | 当前只支持同步请求。 |
+| 输出格式不符合预期 | output wrapper 关闭，或用户格式要求不明确 | 开启 `--output-wrapper`，并清楚说明输出格式。 |
+
+## 14. 当前边界
+
+第一版 API 暂不包括：
+
+- streaming，
+- async run status，
+- cancellation，
+- artifact download endpoint，
+- 远程图片 URL 下载，
+- 用户认证，
+- 多租户访问控制。
+
+这些能力以后可以作为外层服务继续扩展，不需要破坏核心 harness loop。
diff --git a/frontend/__init__.py b/frontend/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..93d8c148c8dca018e271146995be1180644e7cf9
--- /dev/null
+++ b/frontend/__init__.py
@@ -0,0 +1 @@
+"""Local browser UI for ResearchHarness."""
diff --git a/frontend/local_server.py b/frontend/local_server.py
new file mode 100644
index 0000000000000000000000000000000000000000..39cf727dd559dca6721be143a3171faaa7d14eb1
--- /dev/null
+++ b/frontend/local_server.py
@@ -0,0 +1,578 @@
+from __future__ import annotations
+
+import asyncio
+import base64
+import datetime as _dt
+import os
+import re
+import shutil
+import threading
+import time
+import traceback
+from pathlib import Path
+from typing import Any
+from uuid import uuid4
+
+from fastapi import FastAPI, WebSocket, WebSocketDisconnect
+from fastapi.responses import FileResponse, JSONResponse
+from fastapi.staticfiles import StaticFiles
+
+from agent_base.react_agent import MultiTurnReactAgent, default_llm_config
+from agent_base.utils import (
+    MissingRequiredEnvError,
+    PROJECT_ROOT,
+    append_saved_image_paths_to_prompt,
+    image_input_content_parts,
+    load_dotenv,
+    require_required_env,
+    safe_jsonable,
+    stage_image_bytes_for_input,
+)
+
+
+STATIC_DIR = Path(__file__).resolve().parent / "static"
+MAX_UPLOAD_IMAGES = 12
+MAX_IMAGE_BYTES = 12 * 1024 * 1024
+MAX_DIRECTORY_ENTRIES = 800
+FRONTEND_ROLE_PROMPT = ""
+FRONTEND_TRACE_DIR: str | None = None
+FRONTEND_MANAGED_RUNS_DIR: str | None = None
+FRONTEND_CLEANUP_RETENTION_SECONDS = 6 * 60 * 60
+FRONTEND_CLEANUP_MAX_RUNS = 40
+FRONTEND_CLEANUP_INTERVAL_SECONDS = 15 * 60
+_CLEANUP_THREAD_STARTED = False
+_ACTIVE_MANAGED_RUNS: set[str] = set()
+_ACTIVE_MANAGED_RUNS_LOCK = threading.Lock()
+
+app = FastAPI(title="ResearchHarness Local UI")
+app.mount("/static", StaticFiles(directory=STATIC_DIR), name="frontend-static")
+
+
+def configure_frontend(
+    *,
+    role_prompt: str = "",
+    trace_dir: str | None = None,
+    managed_runs_dir: str | None = None,
+    cleanup_retention_seconds: int | None = None,
+    cleanup_max_runs: int | None = None,
+    cleanup_interval_seconds: int | None = None,
+) -> None:
+    global FRONTEND_ROLE_PROMPT, FRONTEND_TRACE_DIR, FRONTEND_MANAGED_RUNS_DIR
+    global FRONTEND_CLEANUP_RETENTION_SECONDS, FRONTEND_CLEANUP_MAX_RUNS, FRONTEND_CLEANUP_INTERVAL_SECONDS
+    FRONTEND_ROLE_PROMPT = str(role_prompt or "").strip()
+    if trace_dir:
+        path = Path(trace_dir).expanduser()
+        if path.exists() and not path.is_dir():
+            raise ValueError(f"trace-dir is not a directory: {path}")
+        path.mkdir(parents=True, exist_ok=True)
+        FRONTEND_TRACE_DIR = str(path)
+    else:
+        FRONTEND_TRACE_DIR = None
+
+    if managed_runs_dir:
+        path = Path(managed_runs_dir).expanduser()
+        if path.exists() and not path.is_dir():
+            raise ValueError(f"managed-runs-dir is not a directory: {path}")
+        path.mkdir(parents=True, exist_ok=True)
+        FRONTEND_MANAGED_RUNS_DIR = str(path)
+        if cleanup_retention_seconds is not None:
+            FRONTEND_CLEANUP_RETENTION_SECONDS = max(60, int(cleanup_retention_seconds))
+        if cleanup_max_runs is not None:
+            FRONTEND_CLEANUP_MAX_RUNS = max(1, int(cleanup_max_runs))
+        if cleanup_interval_seconds is not None:
+            FRONTEND_CLEANUP_INTERVAL_SECONDS = max(60, int(cleanup_interval_seconds))
+        cleanup_managed_runs_once()
+        _start_managed_cleanup_thread()
+    else:
+        FRONTEND_MANAGED_RUNS_DIR = None
+
+
+class FrontendRunBridge:
+    def __init__(self, *, loop: asyncio.AbstractEventLoop):
+        self.loop = loop
+        self.outbound: asyncio.Queue[dict[str, Any]] = asyncio.Queue()
+        self.cancelled = threading.Event()
+        self.conversation_messages: list[dict[str, Any]] | None = None
+        self.conversation_workspace_root: str = ""
+        self.managed_run_root: str = ""
+        self.managed_workspace_root: str = ""
+        self.managed_trace_dir: str = ""
+        self._pending_answers: dict[str, str] = {}
+        self._pending_events: dict[str, threading.Event] = {}
+        self._lock = threading.Lock()
+
+    def send(self, payload: dict[str, Any]) -> None:
+        self.loop.call_soon_threadsafe(self.outbound.put_nowait, safe_jsonable(payload))
+
+    def trace_event(self, row: dict[str, Any]) -> None:
+        self.send({"type": "trace", "row": row})
+
+    def submit_answer(self, request_id: str, answer: str) -> bool:
+        with self._lock:
+            event = self._pending_events.get(request_id)
+            if event is None:
+                return False
+            self._pending_answers[request_id] = str(answer)
+            event.set()
+            return True
+
+    def ask_user(self, *, question: str, context: str = "") -> str:
+        request_id = uuid4().hex
+        event = threading.Event()
+        with self._lock:
+            self._pending_events[request_id] = event
+        self.send(
+            {
+                "type": "ask_user",
+                "request_id": request_id,
+                "question": question,
+                "context": context,
+            }
+        )
+        while not event.wait(0.2):
+            if self.cancelled.is_set():
+                return "[AskUser] Cancelled before user answer was received."
+        with self._lock:
+            answer = self._pending_answers.pop(request_id, "")
+            self._pending_events.pop(request_id, None)
+        answer = str(answer).strip()
+        if not answer:
+            return "[AskUser] User answer was empty."
+        return f"[AskUser] User answer:\n{answer}"
+
+
+def _managed_runs_root() -> Path | None:
+    if not FRONTEND_MANAGED_RUNS_DIR:
+        return None
+    return Path(FRONTEND_MANAGED_RUNS_DIR).expanduser().resolve()
+
+
+def _new_managed_run_root() -> Path:
+    root = _managed_runs_root()
+    if root is None:
+        raise ValueError("managed workspace mode is not configured")
+    timestamp = _dt.datetime.now().strftime("%Y%m%d_%H%M%S")
+    return root / f"run_{timestamp}_{uuid4().hex[:8]}"
+
+
+def _mark_managed_run_active(run_root: Path) -> None:
+    with _ACTIVE_MANAGED_RUNS_LOCK:
+        _ACTIVE_MANAGED_RUNS.add(str(run_root.resolve()))
+
+
+def _release_managed_run(bridge: FrontendRunBridge) -> None:
+    if bridge.managed_run_root:
+        with _ACTIVE_MANAGED_RUNS_LOCK:
+            _ACTIVE_MANAGED_RUNS.discard(str(Path(bridge.managed_run_root).resolve()))
+    bridge.managed_run_root = ""
+    bridge.managed_workspace_root = ""
+    bridge.managed_trace_dir = ""
+
+
+def _create_managed_run(bridge: FrontendRunBridge) -> tuple[Path, str]:
+    run_root = _new_managed_run_root()
+    workspace_root = run_root / "agent_workspace"
+    trace_dir = run_root / "agent_trace"
+    workspace_root.mkdir(parents=True, exist_ok=True)
+    trace_dir.mkdir(parents=True, exist_ok=True)
+    bridge.managed_run_root = str(run_root)
+    bridge.managed_workspace_root = str(workspace_root)
+    bridge.managed_trace_dir = str(trace_dir)
+    _mark_managed_run_active(run_root)
+    return workspace_root, str(trace_dir)
+
+
+def cleanup_managed_runs_once() -> None:
+    root = _managed_runs_root()
+    if root is None or not root.exists():
+        return
+    now = time.time()
+    with _ACTIVE_MANAGED_RUNS_LOCK:
+        active = set(_ACTIVE_MANAGED_RUNS)
+    runs = []
+    for child in root.iterdir():
+        if not child.is_dir() or not child.name.startswith("run_"):
+            continue
+        try:
+            resolved = str(child.resolve())
+            mtime = child.stat().st_mtime
+        except OSError:
+            continue
+        runs.append((mtime, child, resolved))
+
+    for mtime, child, resolved in runs:
+        if resolved in active:
+            continue
+        if FRONTEND_CLEANUP_RETENTION_SECONDS and now - mtime > FRONTEND_CLEANUP_RETENTION_SECONDS:
+            shutil.rmtree(child, ignore_errors=True)
+
+    remaining = []
+    with _ACTIVE_MANAGED_RUNS_LOCK:
+        active = set(_ACTIVE_MANAGED_RUNS)
+    for child in root.iterdir():
+        if not child.is_dir() or not child.name.startswith("run_"):
+            continue
+        try:
+            remaining.append((child.stat().st_mtime, child, str(child.resolve())))
+        except OSError:
+            continue
+    remaining.sort(reverse=True, key=lambda item: item[0])
+    for _, child, resolved in remaining[FRONTEND_CLEANUP_MAX_RUNS:]:
+        if resolved not in active:
+            shutil.rmtree(child, ignore_errors=True)
+
+
+def _managed_cleanup_loop() -> None:
+    while True:
+        time.sleep(FRONTEND_CLEANUP_INTERVAL_SECONDS)
+        cleanup_managed_runs_once()
+
+
+def _start_managed_cleanup_thread() -> None:
+    global _CLEANUP_THREAD_STARTED
+    if _CLEANUP_THREAD_STARTED:
+        return
+    thread = threading.Thread(target=_managed_cleanup_loop, daemon=True)
+    thread.start()
+    _CLEANUP_THREAD_STARTED = True
+
+
+class FrontendInteractiveAgent(MultiTurnReactAgent):
+    def __init__(self, *, bridge: FrontendRunBridge, **kwargs: Any):
+        super().__init__(**kwargs)
+        self.bridge = bridge
+
+    def custom_call_tool(self, tool_name: str, tool_args: Any, **kwargs: Any):
+        if tool_name != "AskUser":
+            return super().custom_call_tool(tool_name, tool_args, **kwargs)
+        tool = self.tool_map.get("AskUser")
+        if tool is None:
+            return "[AskUser] Tool is not available in this run."
+        try:
+            parsed = tool.parse_json_args(tool_args)
+        except ValueError as exc:
+            return f"[AskUser] {exc}"
+        question = str(parsed.get("question", "")).strip()
+        context = str(parsed.get("context", "") or "").strip()
+        if not question:
+            return "[AskUser] question must be a non-empty string."
+        return self.bridge.ask_user(question=question, context=context)
+
+
+def _safe_image_suffix(mime: str, filename: str = "") -> str:
+    suffix = Path(filename).suffix.lower()
+    if suffix in {".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp"}:
+        return suffix
+    mapping = {
+        "image/png": ".png",
+        "image/jpeg": ".jpg",
+        "image/gif": ".gif",
+        "image/webp": ".webp",
+        "image/bmp": ".bmp",
+    }
+    return mapping.get(mime.lower(), ".png")
+
+
+def decode_image_data_url(data_url: str, *, filename: str = "") -> tuple[str, bytes]:
+    match = re.fullmatch(r"data:(image/[A-Za-z0-9.+-]+);base64,(.*)", str(data_url), flags=re.DOTALL)
+    if not match:
+        raise ValueError("image must be a data:image/...;base64,... URL")
+    mime = match.group(1)
+    try:
+        raw = base64.b64decode(match.group(2), validate=True)
+    except ValueError as exc:
+        raise ValueError(f"invalid base64 image data: {exc}") from exc
+    if not raw:
+        raise ValueError("image upload is empty")
+    if len(raw) > MAX_IMAGE_BYTES:
+        raise ValueError(f"image upload exceeds {MAX_IMAGE_BYTES} bytes")
+    return _safe_image_suffix(mime, filename), raw
+
+
+def save_uploaded_images(workspace_root: Path, images: list[dict[str, Any]]) -> tuple[list[dict[str, Any]], list[str]]:
+    if len(images) > MAX_UPLOAD_IMAGES:
+        raise ValueError(f"at most {MAX_UPLOAD_IMAGES} images are supported per run")
+    if not images:
+        return [], []
+    timestamp = _dt.datetime.now().strftime("%Y%m%d_%H%M%S")
+    content_parts: list[dict[str, Any]] = []
+    saved_paths: list[str] = []
+    for idx, item in enumerate(images, start=1):
+        if not isinstance(item, dict):
+            raise ValueError("each image item must be an object")
+        data_url = str(item.get("data_url", "")).strip()
+        filename = str(item.get("name", "") or f"image_{idx}")
+        suffix, raw = decode_image_data_url(data_url, filename=filename)
+        saved_path = stage_image_bytes_for_input(
+            raw,
+            workspace_root=workspace_root,
+            filename=f"{timestamp}_{filename}",
+            image_index=idx - 1,
+            suffix=suffix,
+        )
+        saved_paths.append(saved_path)
+        content_parts.extend(image_input_content_parts(data_url, saved_path))
+    return content_parts, saved_paths
+
+
+def _prompt_with_uploaded_image_paths(prompt: str, saved_paths: list[str]) -> str:
+    return append_saved_image_paths_to_prompt(prompt, saved_paths)
+
+
+def _run_agent_thread(
+    *,
+    bridge: FrontendRunBridge,
+    prompt: str,
+    workspace_root: Path,
+    initial_content_parts: list[dict[str, Any]],
+    trace_dir: str | None = None,
+    prior_messages: list[dict[str, Any]] | None = None,
+) -> None:
+    try:
+        load_dotenv(PROJECT_ROOT / ".env")
+        require_required_env("ResearchHarness frontend")
+        effective_trace_dir = trace_dir if trace_dir is not None else FRONTEND_TRACE_DIR
+        agent = FrontendInteractiveAgent(
+            bridge=bridge,
+            llm=default_llm_config(),
+            trace_dir=effective_trace_dir,
+            role_prompt=FRONTEND_ROLE_PROMPT or None,
+        )
+        bridge.send(
+            {
+                "type": "run_started",
+                "model": agent.model,
+                "workspace_root": str(workspace_root),
+                "trace_dir": effective_trace_dir or "",
+            }
+        )
+        result = agent._run_session(
+            prompt,
+            workspace_root=str(workspace_root),
+            event_callback=bridge.trace_event,
+            initial_content_parts=initial_content_parts or None,
+            prior_messages=prior_messages,
+            interrupt_event=bridge.cancelled,
+        )
+        bridge.conversation_messages = result.get("messages", [])
+        bridge.conversation_workspace_root = str(workspace_root)
+        bridge.send(
+            {
+                "type": "run_finished",
+                "result_text": result.get("result_text", ""),
+                "termination": result.get("termination", ""),
+            }
+        )
+    except (MissingRequiredEnvError, ValueError) as exc:
+        bridge.send({"type": "run_error", "error": str(exc)})
+    except Exception as exc:
+        bridge.send({"type": "run_error", "error": str(exc), "traceback": traceback.format_exc()})
+
+
+def _resolve_existing_workspace(raw_path: str) -> Path:
+    if not str(raw_path or "").strip():
+        raise ValueError("workspace path is required")
+    path = Path(raw_path).expanduser()
+    if not path.is_absolute():
+        path = (Path.cwd() / path).resolve()
+    else:
+        path = path.resolve()
+    if not path.exists() or not path.is_dir():
+        raise ValueError(f"workspace must be an existing directory: {path}")
+    return path
+
+
+def _resolve_directory_browser_path(raw_path: str = "") -> Path:
+    text = str(raw_path or "").strip()
+    if text:
+        path = Path(text).expanduser()
+    else:
+        path = Path.home() if Path.home().exists() else PROJECT_ROOT
+    if not path.is_absolute():
+        path = (Path.cwd() / path).resolve()
+    else:
+        path = path.resolve()
+    if not path.exists() or not path.is_dir():
+        raise ValueError(f"directory does not exist: {path}")
+    return path
+
+
+def _directory_root_choices() -> list[dict[str, str]]:
+    candidates = [Path.home(), PROJECT_ROOT, PROJECT_ROOT / "workspace", Path.cwd(), Path("/mnt"), Path("/")]
+    if os.name == "nt":
+        for letter in "ABCDEFGHIJKLMNOPQRSTUVWXYZ":
+            candidates.append(Path(f"{letter}:\\"))
+
+    seen: set[str] = set()
+    roots: list[dict[str, str]] = []
+    for candidate in candidates:
+        try:
+            resolved = candidate.expanduser().resolve()
+        except (OSError, RuntimeError):
+            continue
+        if not resolved.exists() or not resolved.is_dir():
+            continue
+        key = str(resolved)
+        if key in seen:
+            continue
+        seen.add(key)
+        label = "Home" if resolved == Path.home().resolve() else (resolved.name or key)
+        roots.append({"label": label, "path": key})
+    return roots
+
+
+def _workspace_directory_payload(raw_path: str = "") -> dict[str, Any]:
+    directory = _resolve_directory_browser_path(raw_path)
+    entries: list[dict[str, str]] = []
+    truncated = False
+    try:
+        children = sorted(directory.iterdir(), key=lambda item: item.name.casefold())
+    except PermissionError as exc:
+        raise ValueError(f"permission denied: {directory}") from exc
+    except OSError as exc:
+        raise ValueError(f"cannot read directory {directory}: {exc}") from exc
+
+    for child in children:
+        if len(entries) >= MAX_DIRECTORY_ENTRIES:
+            truncated = True
+            break
+        try:
+            if not child.is_dir():
+                continue
+        except OSError:
+            continue
+        entries.append({"name": child.name or str(child), "path": str(child)})
+
+    parent = directory.parent if directory.parent != directory else None
+    return {
+        "path": str(directory),
+        "parent": str(parent) if parent else "",
+        "entries": entries,
+        "truncated": truncated,
+        "roots": _directory_root_choices(),
+    }
+
+
+@app.get("/api/workspace-directories")
+def workspace_directories(path: str = "") -> JSONResponse:
+    try:
+        return JSONResponse(_workspace_directory_payload(path))
+    except ValueError as exc:
+        return JSONResponse({"error": str(exc)}, status_code=400)
+
+
+@app.get("/")
+def index() -> FileResponse:
+    return FileResponse(STATIC_DIR / "index.html")
+
+
+@app.get("/favicon.ico")
+def favicon() -> FileResponse:
+    return FileResponse(STATIC_DIR / "favicon.svg", media_type="image/svg+xml")
+
+
+@app.websocket("/ws")
+async def websocket_endpoint(websocket: WebSocket) -> None:
+    await websocket.accept()
+    bridge = FrontendRunBridge(loop=asyncio.get_running_loop())
+    run_thread: threading.Thread | None = None
+
+    async def sender() -> None:
+        while True:
+            payload = await bridge.outbound.get()
+            await websocket.send_json(payload)
+
+    sender_task = asyncio.create_task(sender())
+    try:
+        await websocket.send_json({"type": "ready", "managed_workspace": bool(FRONTEND_MANAGED_RUNS_DIR)})
+        while True:
+            message = await websocket.receive_json()
+            message_type = str(message.get("type", "")).strip()
+            if message_type == "start":
+                if run_thread is not None and run_thread.is_alive():
+                    bridge.send({"type": "run_error", "error": "A run is already active. Wait for it to finish before starting a new conversation."})
+                    continue
+                prompt = str(message.get("prompt", "")).strip()
+                if not prompt:
+                    bridge.send({"type": "run_error", "error": "Prompt is required."})
+                    continue
+                try:
+                    continue_conversation = bool(message.get("continue_conversation"))
+                    prior_messages = None
+                    effective_trace_dir = FRONTEND_TRACE_DIR
+                    if FRONTEND_MANAGED_RUNS_DIR:
+                        if continue_conversation:
+                            if not bridge.conversation_messages or not bridge.managed_workspace_root:
+                                bridge.send({"type": "run_error", "error": "No active conversation is available on the server. Click New chat and start again."})
+                                continue
+                            workspace_root = Path(bridge.managed_workspace_root)
+                            effective_trace_dir = bridge.managed_trace_dir or FRONTEND_TRACE_DIR
+                            prior_messages = bridge.conversation_messages
+                        else:
+                            _release_managed_run(bridge)
+                            workspace_root, effective_trace_dir = _create_managed_run(bridge)
+                    else:
+                        workspace_root = _resolve_existing_workspace(str(message.get("workspace_root", "")))
+                        if continue_conversation:
+                            if not bridge.conversation_messages:
+                                bridge.send({"type": "run_error", "error": "No active conversation is available on the server. Click New chat and start again."})
+                                continue
+                            elif bridge.conversation_workspace_root and bridge.conversation_workspace_root != str(workspace_root):
+                                bridge.send({"type": "run_error", "error": "Workspace changed. Start a new chat before using a different workspace."})
+                                continue
+                            else:
+                                prior_messages = bridge.conversation_messages
+                    image_parts, saved_paths = save_uploaded_images(
+                        workspace_root,
+                        message.get("images", []) if isinstance(message.get("images", []), list) else [],
+                    )
+                    run_prompt = _prompt_with_uploaded_image_paths(prompt, saved_paths)
+                except ValueError as exc:
+                    bridge.send({"type": "run_error", "error": str(exc)})
+                    continue
+                bridge.cancelled.clear()
+                if not continue_conversation:
+                    bridge.conversation_messages = None
+                    bridge.conversation_workspace_root = str(workspace_root)
+                    bridge.send({"type": "conversation_reset"})
+                if saved_paths:
+                    bridge.send({"type": "uploaded_images", "paths": saved_paths})
+                run_thread = threading.Thread(
+                    target=_run_agent_thread,
+                    kwargs={
+                        "bridge": bridge,
+                        "prompt": run_prompt,
+                        "workspace_root": workspace_root,
+                        "initial_content_parts": image_parts,
+                        "trace_dir": effective_trace_dir,
+                        "prior_messages": prior_messages,
+                    },
+                    daemon=True,
+                )
+                run_thread.start()
+            elif message_type == "ask_user_answer":
+                ok = bridge.submit_answer(str(message.get("request_id", "")), str(message.get("answer", "")))
+                if not ok:
+                    bridge.send({"type": "run_error", "error": "No pending AskUser request matched that answer."})
+            elif message_type == "interrupt":
+                if run_thread is not None and run_thread.is_alive():
+                    bridge.cancelled.set()
+                    bridge.send({"type": "interrupt_requested"})
+                else:
+                    bridge.send({"type": "run_error", "error": "No active run is available to interrupt."})
+            elif message_type == "new":
+                if run_thread is not None and run_thread.is_alive():
+                    bridge.send({"type": "run_error", "error": "The current run is still active. Start a new conversation after it finishes."})
+                else:
+                    _release_managed_run(bridge)
+                    bridge.conversation_messages = None
+                    bridge.conversation_workspace_root = ""
+                    bridge.send({"type": "conversation_reset"})
+            else:
+                bridge.send({"type": "run_error", "error": f"Unknown websocket message type: {message_type}"})
+    except WebSocketDisconnect:
+        bridge.cancelled.set()
+    finally:
+        bridge.cancelled.set()
+        _release_managed_run(bridge)
+        sender_task.cancel()
diff --git a/frontend/static/app.css b/frontend/static/app.css
new file mode 100644
index 0000000000000000000000000000000000000000..2d81b1e450321c8e3d11a3bd960d7299486dcfbc
--- /dev/null
+++ b/frontend/static/app.css
@@ -0,0 +1,955 @@
+:root {
+  --bg: #ffffff;
+  --bar: #f5f5f5;
+  --border: #e8e8e8;
+  --panel: rgba(255, 255, 255, 0.82);
+  --panel-strong: rgba(255, 255, 255, 0.96);
+  --hover: #f7f7f7;
+  --text: #171717;
+  --muted: #747474;
+  --accent-start: #1a1a1a;
+  --accent-end: #333333;
+  --accent-text: #ffffff;
+  --glow-rgb: 0, 0, 0;
+  --danger: #b42318;
+  --ok: #1f7a42;
+  --warn: #9a6700;
+  --shadow: 0 18px 70px rgba(0, 0, 0, 0.08);
+}
+
+[data-theme="yellow"] {
+  --bg: #faf8f4;
+  --bar: #f0ebe1;
+  --border: #e5ddd0;
+  --panel: rgba(255, 252, 246, 0.84);
+  --panel-strong: rgba(255, 252, 246, 0.96);
+  --hover: #f0ece4;
+  --text: #2f2113;
+  --muted: #8a7055;
+  --accent-start: #1c1208;
+  --accent-end: #3a2410;
+  --accent-text: #ffffff;
+  --glow-rgb: 180, 128, 40;
+}
+
+[data-theme="blue"] {
+  --bg: #f3f5f8;
+  --bar: #e3e8ef;
+  --border: #d3dae3;
+  --panel: rgba(248, 251, 255, 0.84);
+  --panel-strong: rgba(248, 251, 255, 0.96);
+  --hover: #e8eef5;
+  --text: #172f4a;
+  --muted: #6a8aaa;
+  --accent-start: #1a3654;
+  --accent-end: #1e4a7a;
+  --accent-text: #ffffff;
+  --glow-rgb: 38, 88, 155;
+}
+
+[data-theme="dark"] {
+  --bg: #111110;
+  --bar: #1c1c1a;
+  --border: #2e2e2b;
+  --panel: rgba(28, 28, 26, 0.86);
+  --panel-strong: rgba(28, 28, 26, 0.98);
+  --hover: #242420;
+  --text: #e8e6df;
+  --muted: #8a8a80;
+  --accent-start: #e8e6df;
+  --accent-end: #d0cec7;
+  --accent-text: #111110;
+  --glow-rgb: 220, 210, 180;
+  --danger: #ffb4a9;
+  --ok: #9de8b5;
+  --warn: #f7d36f;
+  --shadow: 0 18px 70px rgba(0, 0, 0, 0.34);
+}
+
+* {
+  box-sizing: border-box;
+}
+
+html,
+body {
+  height: 100%;
+}
+
+body {
+  margin: 0;
+  overflow: hidden;
+  background: var(--bg);
+  color: var(--text);
+  font-family: "IBM Plex Sans", "Aptos", "Segoe UI Variable", "Noto Sans CJK SC", "Microsoft YaHei", "PingFang SC", sans-serif;
+  transition: background 0.3s ease, color 0.3s ease;
+}
+
+button,
+input,
+textarea {
+  font: inherit;
+}
+
+button {
+  cursor: pointer;
+}
+
+.chat-shell {
+  position: relative;
+  z-index: 1;
+  display: grid;
+  grid-template-rows: auto auto minmax(0, 1fr) auto;
+  width: min(980px, 100%);
+  height: 100vh;
+  height: 100dvh;
+  min-height: 0;
+  overflow: hidden;
+  margin: 0 auto;
+  padding: 14px 16px 18px;
+}
+
+.chat-shell > * {
+  min-height: 0;
+}
+
+.topbar,
+.workspace-strip,
+.composer {
+  border: 1px solid var(--border);
+  background: var(--panel);
+  backdrop-filter: blur(18px);
+  box-shadow: var(--shadow);
+}
+
+.topbar {
+  position: sticky;
+  top: 0;
+  z-index: 4;
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+  gap: 12px;
+  border-radius: 22px;
+  padding: 10px 12px;
+  background: var(--panel-strong);
+  box-shadow: 0 14px 38px rgba(var(--glow-rgb), 0.15), 0 3px 10px rgba(0, 0, 0, 0.08);
+}
+
+.brand {
+  display: flex;
+  align-items: center;
+  gap: 10px;
+  min-width: 0;
+}
+
+.brand strong {
+  display: block;
+  letter-spacing: -0.02em;
+}
+
+.logo {
+  display: grid;
+  place-items: center;
+  width: 38px;
+  height: 38px;
+  border-radius: 12px;
+  background: linear-gradient(135deg, var(--accent-start), var(--accent-end));
+  color: var(--accent-text);
+  font-size: 0.82rem;
+  font-weight: 900;
+}
+
+.status {
+  display: inline-flex;
+  align-items: center;
+  gap: 6px;
+  margin-top: 2px;
+  color: var(--muted);
+  font-size: 0.78rem;
+  font-weight: 800;
+}
+
+.status.running::before {
+  content: "";
+  width: 10px;
+  height: 10px;
+  border: 2px solid currentColor;
+  border-top-color: transparent;
+  border-radius: 50%;
+  animation: spin 0.82s linear infinite;
+}
+
+.status.running {
+  color: var(--warn);
+}
+
+.status.done {
+  color: var(--ok);
+}
+
+.status.error {
+  color: var(--danger);
+}
+
+.top-actions {
+  display: flex;
+  align-items: center;
+  gap: 8px;
+  flex-wrap: wrap;
+  justify-content: flex-end;
+}
+
+.plain,
+.send-button,
+.icon-button {
+  border: 1px solid var(--border);
+  border-radius: 999px;
+  background: var(--panel-strong);
+  color: var(--text);
+  font-weight: 850;
+  transition: transform 0.18s ease, border-color 0.18s ease, background 0.18s ease;
+}
+
+.plain {
+  padding: 8px 12px;
+}
+
+.plain:hover,
+.icon-button:hover {
+  border-color: rgba(var(--glow-rgb), 0.38);
+  transform: translateY(-1px);
+}
+
+.workspace-strip {
+  position: sticky;
+  top: 66px;
+  z-index: 4;
+  display: grid;
+  grid-template-columns: minmax(0, 1fr);
+  align-items: center;
+  margin-top: 10px;
+  border-radius: 18px;
+  padding: 9px 12px;
+  background: var(--panel-strong);
+  box-shadow: 0 14px 38px rgba(var(--glow-rgb), 0.15), 0 3px 10px rgba(0, 0, 0, 0.08);
+}
+
+.workspace-strip input {
+  display: none;
+}
+
+.workspace-strip span {
+  display: block;
+  min-width: 0;
+  max-width: 100%;
+  color: var(--muted);
+  font-size: 0.82rem;
+  overflow-wrap: anywhere;
+  word-break: break-word;
+  white-space: normal;
+}
+
+.messages {
+  display: flex;
+  flex-direction: column;
+  flex: 1 1 auto;
+  gap: 14px;
+  height: 100%;
+  min-height: 0;
+  min-width: 0;
+  max-height: 100%;
+  overflow-x: hidden;
+  overflow-y: scroll;
+  overscroll-behavior: contain;
+  padding: 24px 4px 18px;
+  scrollbar-gutter: stable;
+  -webkit-overflow-scrolling: touch;
+}
+
+.messages::-webkit-scrollbar,
+.workspace-list::-webkit-scrollbar {
+  width: 10px;
+}
+
+.messages::-webkit-scrollbar-thumb,
+.workspace-list::-webkit-scrollbar-thumb {
+  border: 3px solid transparent;
+  border-radius: 999px;
+  background: rgba(var(--glow-rgb), 0.24);
+  background-clip: padding-box;
+}
+
+.welcome {
+  margin: auto;
+  max-width: 650px;
+  text-align: center;
+}
+
+.welcome h1 {
+  margin: 0;
+  font-size: clamp(2.2rem, 6vw, 4.7rem);
+  line-height: 0.94;
+  letter-spacing: -0.055em;
+}
+
+.welcome p {
+  margin: 18px auto 0;
+  max-width: 520px;
+  color: var(--muted);
+  line-height: 1.6;
+}
+
+.message,
+.event {
+  flex: 0 0 auto;
+  border: 1px solid var(--border);
+  border-radius: 22px;
+  background: var(--panel);
+  backdrop-filter: blur(18px);
+  box-shadow: 0 10px 34px rgba(0, 0, 0, 0.05);
+  overflow: hidden;
+}
+
+.message {
+  max-width: min(760px, 92%);
+}
+
+.event {
+  width: min(760px, 92%);
+}
+
+.message.user {
+  align-self: flex-end;
+  background: linear-gradient(135deg, var(--accent-start), var(--accent-end));
+  color: var(--accent-text);
+}
+
+.message.assistant,
+.event {
+  align-self: flex-start;
+}
+
+.message-body {
+  padding: 14px 16px;
+}
+
+.event-body {
+  max-height: none;
+  overflow: hidden;
+  transition: max-height 0.24s ease;
+}
+
+.event-body-inner {
+  padding: 14px 16px;
+}
+
+.event.collapsed .event-body {
+  max-height: 220px;
+}
+
+.event.collapsed .event-body-inner {
+  position: relative;
+  overflow: hidden;
+}
+
+.event.collapsed .event-body-inner::after {
+  content: "";
+  position: absolute;
+  right: 0;
+  bottom: 0;
+  left: 0;
+  height: 60px;
+  background: linear-gradient(to bottom, transparent, var(--panel-strong));
+  pointer-events: none;
+}
+
+.event.can-collapse {
+  cursor: pointer;
+}
+
+.event.latest {
+  cursor: default;
+}
+
+.event:not(.can-collapse) .event-toggle {
+  display: none;
+}
+
+.message-body pre,
+.event-body pre {
+  margin: 0;
+  white-space: pre-wrap;
+  word-break: break-word;
+  font-family: "IBM Plex Mono", "SFMono-Regular", Consolas, monospace;
+  font-size: 0.86rem;
+  line-height: 1.5;
+}
+
+.markdown-body {
+  line-height: 1.6;
+  word-break: break-word;
+}
+
+.markdown-body > *:first-child {
+  margin-top: 0;
+}
+
+.markdown-body > *:last-child {
+  margin-bottom: 0;
+}
+
+.markdown-body p,
+.markdown-body ul,
+.markdown-body ol,
+.markdown-body blockquote,
+.markdown-body table,
+.markdown-body pre {
+  margin: 0 0 0.8rem;
+}
+
+.markdown-body h1,
+.markdown-body h2,
+.markdown-body h3,
+.markdown-body h4,
+.markdown-body h5,
+.markdown-body h6 {
+  margin: 0 0 0.65rem;
+  line-height: 1.2;
+}
+
+.markdown-body ul,
+.markdown-body ol {
+  padding-left: 1.35rem;
+}
+
+.markdown-body code {
+  padding: 0.1rem 0.28rem;
+  border-radius: 6px;
+  background: rgba(0, 0, 0, 0.08);
+  font-family: "IBM Plex Mono", "SFMono-Regular", Consolas, monospace;
+  font-size: 0.88em;
+}
+
+.markdown-body pre {
+  padding: 12px;
+  border-radius: 14px;
+  background: rgba(0, 0, 0, 0.08);
+  overflow-x: auto;
+}
+
+.markdown-body pre code {
+  padding: 0;
+  background: transparent;
+}
+
+.markdown-body blockquote {
+  padding-left: 0.85rem;
+  border-left: 3px solid rgba(var(--glow-rgb), 0.35);
+  color: var(--muted);
+}
+
+.markdown-body a {
+  color: inherit;
+  text-decoration: underline;
+  text-underline-offset: 3px;
+}
+
+.markdown-body table {
+  width: 100%;
+  border-collapse: collapse;
+  overflow: hidden;
+  border-radius: 14px;
+}
+
+.markdown-body th,
+.markdown-body td {
+  padding: 8px 10px;
+  border: 1px solid rgba(0, 0, 0, 0.12);
+  text-align: left;
+  vertical-align: top;
+}
+
+.message-images {
+  display: flex;
+  flex-wrap: wrap;
+  gap: 8px;
+  margin-bottom: 10px;
+}
+
+.message-image {
+  max-width: 180px;
+  max-height: 180px;
+  border-radius: 16px;
+  object-fit: cover;
+  border: 1px solid rgba(255, 255, 255, 0.24);
+}
+
+.event-head {
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+  gap: 10px;
+  padding: 10px 14px;
+  border-bottom: 1px solid var(--border);
+}
+
+.event-title {
+  display: flex;
+  flex-wrap: wrap;
+  align-items: center;
+  gap: 8px;
+  font-weight: 900;
+}
+
+.event-toggle {
+  flex: 0 0 auto;
+  border: 1px solid var(--border);
+  border-radius: 999px;
+  background: var(--panel-strong);
+  color: var(--muted);
+  font-size: 0.76rem;
+  font-weight: 850;
+  padding: 5px 9px;
+}
+
+.event.latest .event-toggle {
+  display: none;
+}
+
+.event:not(.collapsed) .event-toggle::after {
+  content: "collapse";
+}
+
+.event.collapsed .event-toggle::after {
+  content: "expand";
+}
+
+.badge {
+  border-radius: 999px;
+  background: rgba(var(--glow-rgb), 0.11);
+  color: var(--text);
+  font-size: 0.72rem;
+  font-weight: 850;
+  padding: 4px 8px;
+}
+
+.tool-grid {
+  display: grid;
+  gap: 10px;
+  margin-top: 10px;
+}
+
+.tool-call {
+  border: 1px solid var(--border);
+  border-radius: 16px;
+  padding: 11px;
+  background: color-mix(in srgb, var(--hover), transparent 28%);
+}
+
+.tool-name {
+  margin-bottom: 8px;
+  font-weight: 900;
+}
+
+.error-text {
+  color: var(--danger);
+}
+
+.muted-text {
+  color: var(--muted);
+}
+
+.composer textarea {
+  border: 0;
+  outline: 0;
+  background: transparent;
+  color: var(--text);
+}
+
+.composer-wrap {
+  position: sticky;
+  bottom: 0;
+  z-index: 4;
+  display: grid;
+  gap: 8px;
+}
+
+.composer {
+  display: flex;
+  align-items: flex-end;
+  gap: 10px;
+  border-radius: 26px;
+  padding: 11px;
+  background: var(--panel-strong);
+  box-shadow: 0 14px 38px rgba(var(--glow-rgb), 0.15), 0 3px 10px rgba(0, 0, 0, 0.08);
+}
+
+.composer.dragover {
+  border-color: rgba(var(--glow-rgb), 0.44);
+  box-shadow: 0 0 0 5px rgba(var(--glow-rgb), 0.09), var(--shadow);
+}
+
+.composer textarea {
+  flex: 1;
+  max-height: 180px;
+  min-height: 30px;
+  resize: none;
+  line-height: 1.5;
+  padding: 7px 0;
+}
+
+.icon-button,
+.send-button {
+  display: grid;
+  place-items: center;
+  flex: 0 0 auto;
+  height: 38px;
+  min-width: 38px;
+}
+
+.icon-button {
+  font-size: 1.35rem;
+  line-height: 1;
+}
+
+.send-button {
+  padding: 0 16px;
+  background: linear-gradient(135deg, var(--accent-start), var(--accent-end));
+  color: var(--accent-text);
+}
+
+.send-button.is-running {
+  display: flex;
+  align-items: center;
+  justify-content: center;
+}
+
+.send-button.is-running::before {
+  content: "";
+  width: 12px;
+  height: 12px;
+  margin-right: 8px;
+  border: 2px solid currentColor;
+  border-top-color: transparent;
+  border-radius: 50%;
+  animation: spin 0.82s linear infinite;
+}
+
+button:disabled {
+  cursor: not-allowed;
+  opacity: 0.58;
+  transform: none;
+}
+
+#imageInput {
+  display: none;
+}
+
+.image-preview {
+  display: flex;
+  flex-wrap: wrap;
+  gap: 8px;
+  padding: 0 8px;
+}
+
+.image-chip {
+  display: flex;
+  align-items: center;
+  gap: 8px;
+  max-width: 240px;
+  border: 1px solid var(--border);
+  border-radius: 999px;
+  padding: 5px 10px 5px 5px;
+  background: var(--panel);
+  color: var(--text);
+}
+
+.image-chip img {
+  width: 30px;
+  height: 30px;
+  border-radius: 50%;
+  object-fit: cover;
+}
+
+.image-chip span {
+  overflow: hidden;
+  text-overflow: ellipsis;
+  white-space: nowrap;
+  font-size: 0.82rem;
+}
+
+.composer-hint {
+  margin: 0;
+  color: var(--muted);
+  font-size: 0.78rem;
+  text-align: center;
+}
+
+.modal {
+  position: fixed;
+  inset: 0;
+  z-index: 30;
+  display: grid;
+  place-items: center;
+  padding: 18px;
+  background: rgba(0, 0, 0, 0.24);
+  backdrop-filter: blur(14px);
+}
+
+.modal.hidden {
+  display: none;
+}
+
+.modal-card {
+  display: grid;
+  grid-template-rows: auto auto auto minmax(0, 1fr) auto;
+  gap: 12px;
+  width: min(780px, 100%);
+  max-height: min(760px, 82vh);
+  border: 1px solid var(--border);
+  border-radius: 28px;
+  background: var(--panel-strong);
+  box-shadow: 0 24px 88px rgba(0, 0, 0, 0.22);
+  padding: 18px;
+}
+
+.modal-head,
+.modal-path-row,
+.modal-actions {
+  display: flex;
+  align-items: center;
+  gap: 12px;
+}
+
+.modal-head {
+  justify-content: space-between;
+}
+
+.modal-head h2,
+.modal-head p {
+  margin: 0;
+}
+
+.modal-head h2 {
+  font-size: 1.18rem;
+  letter-spacing: -0.025em;
+}
+
+.modal-head p,
+.modal-actions span {
+  color: var(--muted);
+  font-size: 0.86rem;
+}
+
+.modal-path-row {
+  border: 1px solid var(--border);
+  border-radius: 18px;
+  background: var(--hover);
+  padding: 8px;
+}
+
+.modal-path-row input {
+  min-width: 0;
+  flex: 1;
+  border: 0;
+  outline: 0;
+  background: transparent;
+  color: var(--text);
+}
+
+.workspace-roots {
+  display: flex;
+  flex-wrap: wrap;
+  gap: 8px;
+}
+
+.root-chip {
+  max-width: 190px;
+  overflow: hidden;
+  border: 1px solid var(--border);
+  border-radius: 999px;
+  background: var(--panel);
+  color: var(--text);
+  font-weight: 800;
+  padding: 7px 11px;
+  text-overflow: ellipsis;
+  white-space: nowrap;
+}
+
+.workspace-list {
+  display: grid;
+  align-content: start;
+  gap: 7px;
+  min-height: 0;
+  overflow: auto;
+  padding-right: 4px;
+}
+
+.dir-row {
+  display: grid;
+  grid-template-columns: auto minmax(0, 1fr) auto;
+  align-items: center;
+  gap: 10px;
+  width: 100%;
+  border: 1px solid var(--border);
+  border-radius: 18px;
+  background: var(--panel);
+  color: var(--text);
+  padding: 10px 12px;
+  text-align: left;
+}
+
+.dir-row:hover,
+.root-chip:hover {
+  border-color: rgba(var(--glow-rgb), 0.38);
+  background: var(--hover);
+}
+
+.dir-icon {
+  display: grid;
+  place-items: center;
+  width: 24px;
+  height: 24px;
+  border-radius: 50%;
+  background: rgba(var(--glow-rgb), 0.1);
+  font-weight: 900;
+}
+
+.dir-main {
+  min-width: 0;
+}
+
+.dir-main strong,
+.dir-main small {
+  display: block;
+  overflow: hidden;
+  text-overflow: ellipsis;
+  white-space: nowrap;
+}
+
+.dir-main small {
+  margin-top: 2px;
+  color: var(--muted);
+  font-size: 0.78rem;
+}
+
+.dir-action {
+  color: var(--muted);
+  font-size: 0.76rem;
+  font-weight: 850;
+}
+
+.dir-empty {
+  border: 1px dashed var(--border);
+  border-radius: 18px;
+  padding: 18px;
+  color: var(--muted);
+  text-align: center;
+}
+
+.modal-actions {
+  justify-content: space-between;
+}
+
+#theme-switcher {
+  position: fixed;
+  right: 22px;
+  bottom: 22px;
+  z-index: 20;
+  display: flex;
+  gap: 9px;
+  padding: 9px;
+  border: 1px solid var(--border);
+  border-radius: 999px;
+  background: var(--bar);
+  box-shadow: 0 12px 34px rgba(0, 0, 0, 0.12);
+}
+
+.theme-dot {
+  width: 21px;
+  height: 21px;
+  border: 1.5px solid transparent;
+  border-radius: 50%;
+  padding: 0;
+  transition: transform 0.18s ease, box-shadow 0.18s ease;
+}
+
+.theme-dot[data-theme="white"] {
+  background: #ffffff;
+  border-color: #c8c8c8;
+}
+
+.theme-dot[data-theme="yellow"] {
+  background: #e8d5a0;
+  border-color: #c4a060;
+}
+
+.theme-dot[data-theme="blue"] {
+  background: #a8c4e8;
+  border-color: #5e8ec8;
+}
+
+.theme-dot[data-theme="dark"] {
+  background: #2a2a26;
+  border-color: #585852;
+}
+
+.theme-dot.active {
+  box-shadow: 0 0 0 2px var(--bg), 0 0 0 4px var(--border);
+  transform: scale(1.08);
+}
+
+@keyframes spin {
+  to {
+    transform: rotate(360deg);
+  }
+}
+
+@media (max-width: 720px) {
+  .chat-shell {
+    padding: 10px;
+  }
+
+  .topbar,
+  .workspace-strip {
+    grid-template-columns: 1fr;
+  }
+
+  .topbar {
+    align-items: flex-start;
+  }
+
+  .workspace-strip {
+    display: grid;
+  }
+
+  .workspace-strip span {
+    max-width: none;
+  }
+
+  .modal-card {
+    max-height: 88vh;
+    padding: 14px;
+  }
+
+  .modal-head,
+  .modal-actions {
+    align-items: stretch;
+    flex-direction: column;
+  }
+
+  .modal-path-row {
+    align-items: stretch;
+    flex-direction: column;
+  }
+
+  .message,
+  .event {
+    max-width: 96%;
+  }
+
+  #theme-switcher {
+    right: 12px;
+    bottom: 12px;
+  }
+}
diff --git a/frontend/static/app.js b/frontend/static/app.js
new file mode 100644
index 0000000000000000000000000000000000000000..540038184267a2f3611cbc1817a4f2af57849098
--- /dev/null
+++ b/frontend/static/app.js
@@ -0,0 +1,743 @@
+(function () {
+  var canvas, ctx, t = 0;
+  var TILE = 26, GAP = 1;
+  var rafId = null;
+  var cachedRgb = "10,10,10";
+  var lastDraw = 0;
+  var FRAME_MS = 1000 / 24;
+
+  function rgb() {
+    var th = document.documentElement.getAttribute("data-theme") || "white";
+    if (th === "dark") return "220,210,175";
+    if (th === "yellow") return "120,85,20";
+    if (th === "blue") return "38,88,155";
+    return "10,10,10";
+  }
+
+  function frame(ts) {
+    rafId = requestAnimationFrame(frame);
+    if (ts - lastDraw < FRAME_MS) return;
+    lastDraw = ts;
+
+    var w = canvas.width, h = canvas.height;
+    var cols = Math.ceil(w / TILE) + 1;
+    var rows = Math.ceil(h / TILE) + 1;
+    var pre = "rgba(" + cachedRgb + ",";
+
+    ctx.clearRect(0, 0, w, h);
+    for (var r = 0; r < rows; r++) {
+      for (var c = 0; c < cols; c++) {
+        var wave = 0.6 * Math.sin(c * 0.21 + t * 0.36) * Math.sin(r * 0.17 + t * 0.28)
+          + 0.4 * Math.sin(c * 0.11 - r * 0.13 + t * 0.19);
+        var norm = (wave + 1) * 0.5;
+        var v = norm * norm * norm;
+        var a = Math.round((0.004 + v * 0.186) * 100) / 100;
+        if (a < 0.02) continue;
+        ctx.fillStyle = pre + a + ")";
+        ctx.fillRect(c * TILE + GAP, r * TILE + GAP, TILE - GAP, TILE - GAP);
+      }
+    }
+    t += 0.007;
+  }
+
+  var resizeTimer;
+  function resize() {
+    clearTimeout(resizeTimer);
+    resizeTimer = setTimeout(function () {
+      var newW = window.innerWidth;
+      var newH = window.innerHeight;
+      if (newW === canvas.width && Math.abs(newH - canvas.height) <= 90) return;
+      canvas.width = newW;
+      canvas.height = newH;
+    }, 120);
+  }
+
+  function onVisibilityChange() {
+    if (document.hidden) {
+      if (rafId) {
+        cancelAnimationFrame(rafId);
+        rafId = null;
+      }
+    } else if (!rafId) {
+      rafId = requestAnimationFrame(frame);
+    }
+  }
+
+  document.addEventListener("DOMContentLoaded", function () {
+    canvas = document.createElement("canvas");
+    canvas.style.cssText = "position:fixed;top:0;left:0;width:100%;height:100%;"
+      + "z-index:0;pointer-events:none;will-change:transform;"
+      + "-webkit-backface-visibility:hidden;backface-visibility:hidden;";
+    document.body.insertBefore(canvas, document.body.firstChild);
+    ctx = canvas.getContext("2d");
+    canvas.width = window.innerWidth;
+    canvas.height = window.innerHeight;
+    cachedRgb = rgb();
+    window.addEventListener("resize", resize);
+    document.addEventListener("visibilitychange", onVisibilityChange);
+    rafId = requestAnimationFrame(frame);
+  });
+
+  new MutationObserver(function () { cachedRgb = rgb(); })
+    .observe(document.documentElement, { attributes: true, attributeFilter: ["data-theme"] });
+})();
+
+(function () {
+  var THEMES = ["white", "yellow", "blue", "dark"];
+  var LABELS = { white: "Pure White", yellow: "Warm Yellow", blue: "Cool Blue", dark: "Dark" };
+
+  function applyTheme(theme) {
+    if (theme === "white") {
+      document.documentElement.removeAttribute("data-theme");
+    } else {
+      document.documentElement.setAttribute("data-theme", theme);
+    }
+    try { localStorage.setItem("rh-ui-theme", theme); } catch (e) {}
+    document.querySelectorAll(".theme-dot").forEach(function (dot) {
+      dot.classList.toggle("active", dot.dataset.theme === theme);
+    });
+  }
+
+  var saved = "white";
+  try { saved = localStorage.getItem("rh-ui-theme") || "white"; } catch (e) {}
+  applyTheme(saved);
+
+  document.addEventListener("DOMContentLoaded", function () {
+    var switcher = document.createElement("div");
+    switcher.id = "theme-switcher";
+    switcher.setAttribute("aria-label", "Choose colour theme");
+    THEMES.forEach(function (theme) {
+      var btn = document.createElement("button");
+      btn.className = "theme-dot";
+      btn.dataset.theme = theme;
+      btn.title = LABELS[theme];
+      btn.setAttribute("aria-label", LABELS[theme]);
+      btn.addEventListener("click", function () { applyTheme(theme); });
+      switcher.appendChild(btn);
+    });
+    document.body.appendChild(switcher);
+    applyTheme(saved);
+  });
+})();
+
+(function () {
+  var ws;
+  var running = false;
+  var interrupting = false;
+  var pendingAskId = "";
+  var keepSubmittedMessageOnReset = false;
+  var autoFollowTimeline = true;
+  var conversationStarted = false;
+  var images = [];
+  var COLLAPSED_STEP_HEIGHT = 220;
+
+  var workspaceInput = document.getElementById("workspaceInput");
+  var workspaceStrip = document.getElementById("workspaceStrip");
+  var promptInput = document.getElementById("promptInput");
+  var runBtn = document.getElementById("runBtn");
+  var newBtn = document.getElementById("newBtn");
+  var pickWorkspaceBtn = document.getElementById("pickWorkspaceBtn");
+  var attachBtn = document.getElementById("attachBtn");
+  var imageInput = document.getElementById("imageInput");
+  var imagePreview = document.getElementById("imagePreview");
+  var dropZone = document.getElementById("dropZone");
+  var timeline = document.getElementById("timeline");
+  var statusPill = document.getElementById("statusPill");
+  var workspaceMeta = document.getElementById("workspaceMeta");
+  var workspaceModal = document.getElementById("workspaceModal");
+  var workspaceCloseBtn = document.getElementById("workspaceCloseBtn");
+  var workspacePathInput = document.getElementById("workspacePathInput");
+  var workspaceGoBtn = document.getElementById("workspaceGoBtn");
+  var workspaceRoots = document.getElementById("workspaceRoots");
+  var workspaceList = document.getElementById("workspaceList");
+  var workspaceUseBtn = document.getElementById("workspaceUseBtn");
+  var workspacePickerHint = document.getElementById("workspacePickerHint");
+  var currentWorkspacePath = "";
+  var defaultPromptPlaceholder = promptInput.getAttribute("placeholder") || "Message ResearchHarness";
+
+  function escapeHtml(value) {
+    return String(value || "")
+      .replaceAll("&", "&amp;")
+      .replaceAll("<", "&lt;")
+      .replaceAll(">", "&gt;")
+      .replaceAll('"', "&quot;")
+      .replaceAll("'", "&#039;");
+  }
+
+  function renderMarkdown(text) {
+    if (!window.marked || !window.DOMPurify) {
+      console.warn("Markdown renderer unavailable; falling back to plain text.");
+      return "<pre>" + escapeHtml(text) + "</pre>";
+    }
+    try {
+      var rawHtml = window.marked.parse(String(text || ""), { gfm: true, breaks: false, async: false });
+      var safeHtml = window.DOMPurify.sanitize(rawHtml, { USE_PROFILES: { html: true } });
+      return '<div class="markdown-body">' + safeHtml + "</div>";
+    } catch (e) {
+      console.warn("Markdown rendering failed; falling back to plain text.", e);
+      return "<pre>" + escapeHtml(text) + "</pre>";
+    }
+  }
+
+  function setStatus(text, kind) {
+    statusPill.textContent = text;
+    statusPill.className = "status " + (kind || "idle");
+  }
+
+  function setWorkspaceSelected(path) {
+    workspaceInput.value = path;
+    workspaceMeta.textContent = "Workspace selected: " + path;
+  }
+
+  function updateComposerMode() {
+    if (pendingAskId) {
+      runBtn.disabled = false;
+      runBtn.classList.remove("is-running");
+      runBtn.textContent = "Reply";
+      promptInput.placeholder = defaultPromptPlaceholder;
+      return;
+    }
+    runBtn.disabled = running && interrupting;
+    runBtn.classList.toggle("is-running", running);
+    runBtn.textContent = running ? (interrupting ? "Stopping" : "Stop") : "Run";
+    promptInput.placeholder = defaultPromptPlaceholder;
+  }
+
+  function setRunning(active, statusText) {
+    running = active;
+    if (!active) interrupting = false;
+    updateComposerMode();
+    setStatus(statusText || (active ? "Running" : "Idle"), active ? "running" : "idle");
+  }
+
+  function clearTimeline() {
+    autoFollowTimeline = true;
+    timeline.innerHTML = ''
+      + '<div class="welcome">'
+      + '<h1>What should the agent do?</h1>'
+      + '<p>Ask a question, attach images, choose a local workspace, and watch tool calls stream here.</p>'
+      + '</div>';
+  }
+
+  function ensureTimelineReady() {
+    var welcome = timeline.querySelector(".welcome");
+    if (welcome) welcome.remove();
+  }
+
+  function isNearBottom() {
+    return timeline.scrollHeight - timeline.scrollTop - timeline.clientHeight < 80;
+  }
+
+  function scrollTimeline(force) {
+    if (!force && !autoFollowTimeline) return;
+    requestAnimationFrame(function () {
+      timeline.scrollTop = timeline.scrollHeight;
+      requestAnimationFrame(function () {
+        timeline.scrollTop = timeline.scrollHeight;
+        autoFollowTimeline = isNearBottom();
+      });
+    });
+  }
+
+  function syncTimelineFollowMode() {
+    autoFollowTimeline = isNearBottom();
+  }
+
+  function updateEventToggle(node) {
+    var toggle = node.querySelector(".event-toggle");
+    if (!toggle) return;
+    toggle.setAttribute("aria-expanded", node.classList.contains("collapsed") ? "false" : "true");
+  }
+
+  function eventBody(node) {
+    return node.querySelector(".event-body");
+  }
+
+  function eventCanCollapse(node) {
+    return node.classList.contains("can-collapse");
+  }
+
+  function refreshEventCollapseCapability(node) {
+    var body = eventBody(node);
+    var toggle = node.querySelector(".event-toggle");
+    if (!body) return;
+    var shouldCollapse = body.scrollHeight > COLLAPSED_STEP_HEIGHT + 8;
+    node.classList.toggle("can-collapse", shouldCollapse);
+    if (toggle) toggle.hidden = !shouldCollapse;
+    if (!shouldCollapse) {
+      node.classList.remove("collapsed");
+      body.style.maxHeight = "none";
+    }
+    updateEventToggle(node);
+  }
+
+  function setEventExpanded(node, expanded, animate) {
+    var body = eventBody(node);
+    if (!body) {
+      node.classList.toggle("collapsed", !expanded);
+      updateEventToggle(node);
+      return;
+    }
+    refreshEventCollapseCapability(node);
+    if (!eventCanCollapse(node)) return;
+
+    if (expanded) {
+      node.classList.remove("collapsed");
+      body.style.maxHeight = body.scrollHeight + "px";
+      if (!animate) {
+        body.style.maxHeight = "none";
+      } else {
+        body.addEventListener("transitionend", function onEnd(event) {
+          if (event.propertyName !== "max-height") return;
+          body.removeEventListener("transitionend", onEnd);
+          if (!node.classList.contains("collapsed")) {
+            body.style.maxHeight = "none";
+          }
+        });
+      }
+    } else {
+      if (body.style.maxHeight === "none" || !body.style.maxHeight) {
+        body.style.maxHeight = body.scrollHeight + "px";
+      }
+      body.offsetHeight;
+      node.classList.add("collapsed");
+      body.style.maxHeight = COLLAPSED_STEP_HEIGHT + "px";
+    }
+    updateEventToggle(node);
+  }
+
+  function toggleEvent(node) {
+    if (node.classList.contains("latest") || !eventCanCollapse(node)) return;
+    setEventExpanded(node, node.classList.contains("collapsed"), true);
+  }
+
+  function addEvent(kind, title, bodyHtml, badges) {
+    var shouldFollow = autoFollowTimeline || isNearBottom();
+    ensureTimelineReady();
+    timeline.querySelectorAll(".event.latest").forEach(function (eventNode) {
+      eventNode.classList.remove("latest");
+      setEventExpanded(eventNode, false, true);
+      updateEventToggle(eventNode);
+    });
+    var badgeHtml = (badges || []).map(function (badge) {
+      return '<span class="badge">' + escapeHtml(badge) + "</span>";
+    }).join("");
+    var node = document.createElement("article");
+    node.className = "event event-" + kind + " latest";
+    node.innerHTML = ''
+      + '<div class="event-head">'
+      + '<div class="event-title">' + escapeHtml(title) + badgeHtml + '</div>'
+      + '<button class="event-toggle" type="button" aria-label="Toggle step details"></button>'
+      + '</div>'
+      + '<div class="event-body"><div class="event-body-inner">' + bodyHtml + '</div></div>';
+    node.querySelector(".event-toggle").addEventListener("click", function (event) {
+      event.stopPropagation();
+      toggleEvent(node);
+    });
+    node.addEventListener("click", function () {
+      toggleEvent(node);
+    });
+    timeline.appendChild(node);
+    setEventExpanded(node, true, false);
+    scrollTimeline(shouldFollow);
+  }
+
+  function addMessage(kind, text, attachedImages) {
+    autoFollowTimeline = true;
+    ensureTimelineReady();
+    var node = document.createElement("article");
+    node.className = "message " + kind;
+    var imageHtml = "";
+    (attachedImages || []).forEach(function (image) {
+      imageHtml += '<img class="message-image" alt="" src="' + image.data_url + '">';
+    });
+    node.innerHTML = '<div class="message-body">'
+      + (imageHtml ? '<div class="message-images">' + imageHtml + '</div>' : '')
+      + '<pre>' + escapeHtml(text) + '</pre>'
+      + '</div>';
+    timeline.appendChild(node);
+    scrollTimeline(true);
+  }
+
+  function formatJson(value) {
+    try {
+      return JSON.stringify(value, null, 2);
+    } catch (e) {
+      return String(value);
+    }
+  }
+
+  function renderTrace(row) {
+    if (!row || row.capture_type === "llm_call" || row.capture_type === "compaction") return;
+    var role = row.role || "";
+    var turn = row.turn_index || 0;
+    var text = row.text || "";
+    if (role === "system") return;
+    if (role === "user" && turn === 0) return;
+
+    if (role === "assistant") {
+      var tools = Array.isArray(row.tool_names) ? row.tool_names : [];
+      var args = Array.isArray(row.tool_arguments) ? row.tool_arguments : [];
+      var body = "";
+      if (text.trim()) {
+        body += (!tools.length && row.termination === "result")
+          ? renderMarkdown(text)
+          : "<pre>" + escapeHtml(text) + "</pre>";
+      }
+      if (tools.length) {
+        body += '<div class="tool-grid">';
+        tools.forEach(function (name, idx) {
+          body += '<div class="tool-call"><div class="tool-name">' + escapeHtml(name)
+            + '</div><pre>' + escapeHtml(formatJson(args[idx] || {})) + '</pre></div>';
+        });
+        body += "</div>";
+      }
+      if (!body) body = '<pre>(empty assistant output)</pre>';
+      if (row.error) body += '<pre class="error-text">' + escapeHtml(row.error) + "</pre>";
+      addEvent("assistant", "Assistant", body, ["round " + turn]);
+      return;
+    }
+
+    if (role === "tool") {
+      var toolName = Array.isArray(row.tool_names) && row.tool_names.length ? row.tool_names[0] : "Tool";
+      var toolBody = "<pre>" + escapeHtml(text) + "</pre>";
+      if (row.error) toolBody += '<pre class="error-text">' + escapeHtml(row.error) + "</pre>";
+      addEvent("tool", toolName + " result", toolBody, ["round " + turn]);
+      return;
+    }
+
+    if (role === "runtime") {
+      if (!text.trim() && !row.error && !row.termination) return;
+      var runtimeBody = "<pre>" + escapeHtml(text || row.termination || "") + "</pre>";
+      if (row.error) runtimeBody += '<pre class="error-text">' + escapeHtml(row.error) + "</pre>";
+      addEvent("runtime", "Runtime", runtimeBody, turn ? ["round " + turn] : []);
+      return;
+    }
+
+    if (role === "user") {
+      addEvent("runtime", "Runtime message", "<pre>" + escapeHtml(text) + "</pre>", ["round " + turn]);
+    }
+  }
+
+  function connect() {
+    var protocol = window.location.protocol === "https:" ? "wss:" : "ws:";
+    ws = new WebSocket(protocol + "//" + window.location.host + "/ws");
+    ws.onopen = function () {
+      setStatus("Connected", "idle");
+    };
+    ws.onclose = function () {
+      clearAskRequest();
+      setRunning(false, "Disconnected");
+      setStatus("Disconnected", "error");
+    };
+    ws.onmessage = function (event) {
+      var message = JSON.parse(event.data);
+      if (message.type === "ready") {
+        setStatus("Connected", "idle");
+      } else if (message.type === "conversation_reset") {
+        if (keepSubmittedMessageOnReset) {
+          keepSubmittedMessageOnReset = false;
+          ensureTimelineReady();
+        } else {
+          clearTimeline();
+        }
+        conversationStarted = false;
+        clearAskRequest();
+      } else if (message.type === "uploaded_images") {
+        addEvent("runtime", "Uploaded images saved", "<pre>" + escapeHtml((message.paths || []).join("\n")) + "</pre>", []);
+      } else if (message.type === "run_started") {
+        setRunning(true, "Running");
+      } else if (message.type === "interrupt_requested") {
+        interrupting = true;
+        updateComposerMode();
+        setStatus("Interrupting", "running");
+      } else if (message.type === "trace") {
+        renderTrace(message.row);
+      } else if (message.type === "ask_user") {
+        showAskRequest(message);
+      } else if (message.type === "run_finished") {
+        conversationStarted = true;
+        setRunning(false, "Done");
+        clearAskRequest();
+        setStatus("Done", "done");
+      } else if (message.type === "run_error") {
+        keepSubmittedMessageOnReset = false;
+        clearAskRequest();
+        setRunning(false, "Error");
+        setStatus("Error", "error");
+        addEvent("runtime", "Error", '<pre class="error-text">' + escapeHtml(message.error || "unknown error") + "</pre>", []);
+      }
+    };
+  }
+
+  function showAskRequest(message) {
+    pendingAskId = message.request_id || "";
+    var question = message.question || "Question";
+    var context = message.context || "";
+    var body = "<pre>" + escapeHtml(question) + "</pre>";
+    if (context) body += '<pre class="muted-text">' + escapeHtml(context) + "</pre>";
+    addEvent("runtime", "Agent question", body, ["AskUser"]);
+    setStatus("Waiting for input", "running");
+    updateComposerMode();
+    promptInput.focus();
+  }
+
+  function clearAskRequest() {
+    pendingAskId = "";
+    updateComposerMode();
+  }
+
+  function sendStart() {
+    if (pendingAskId) {
+      sendAskUserAnswer();
+      return;
+    }
+    if (!ws || ws.readyState !== WebSocket.OPEN) {
+      setStatus("Disconnected", "error");
+      return;
+    }
+    if (running) {
+      sendInterrupt();
+      return;
+    }
+    var prompt = promptInput.value.trim();
+    if (!prompt) return;
+    var sentImages = images.slice();
+    var continueConversation = conversationStarted;
+    if (!continueConversation) clearTimeline();
+    addMessage("user", prompt, sentImages);
+    keepSubmittedMessageOnReset = !continueConversation;
+    setRunning(true, "Starting");
+    ws.send(JSON.stringify({
+      type: "start",
+      prompt: prompt,
+      workspace_root: workspaceInput.value,
+      images: sentImages,
+      continue_conversation: continueConversation
+    }));
+    promptInput.value = "";
+    promptInput.style.height = "auto";
+    images = [];
+    renderImages();
+  }
+
+  function sendInterrupt() {
+    if (!running || interrupting || !ws || ws.readyState !== WebSocket.OPEN) return;
+    interrupting = true;
+    updateComposerMode();
+    setStatus("Interrupting", "running");
+    ws.send(JSON.stringify({ type: "interrupt" }));
+  }
+
+  function sendAskUserAnswer() {
+    if (!pendingAskId || !ws || ws.readyState !== WebSocket.OPEN) return;
+    var answer = promptInput.value.trim();
+    if (!answer) return;
+    var requestId = pendingAskId;
+    addMessage("user", answer, []);
+    ws.send(JSON.stringify({ type: "ask_user_answer", request_id: requestId, answer: answer }));
+    pendingAskId = "";
+    promptInput.value = "";
+    promptInput.style.height = "auto";
+    updateComposerMode();
+    setStatus("Running", "running");
+  }
+
+  function addImageFiles(fileList) {
+    Array.from(fileList || []).forEach(function (file) {
+      if (!file.type || !file.type.startsWith("image/")) return;
+      var reader = new FileReader();
+      reader.onload = function () {
+        images.push({ name: file.name, data_url: String(reader.result || "") });
+        renderImages();
+      };
+      reader.readAsDataURL(file);
+    });
+  }
+
+  function renderImages() {
+    imagePreview.innerHTML = "";
+    images.forEach(function (image, idx) {
+      var chip = document.createElement("button");
+      chip.type = "button";
+      chip.className = "image-chip";
+      chip.title = "Remove image";
+      chip.innerHTML = '<img alt="" src="' + image.data_url + '"><span>' + escapeHtml(image.name || "image") + "</span>";
+      chip.addEventListener("click", function () {
+        images.splice(idx, 1);
+        renderImages();
+      });
+      imagePreview.appendChild(chip);
+    });
+  }
+
+  function openWorkspaceModal() {
+    workspaceModal.classList.remove("hidden");
+    loadWorkspaceDirectory(workspaceInput.value.trim());
+  }
+
+  function closeWorkspaceModal() {
+    workspaceModal.classList.add("hidden");
+  }
+
+  function setWorkspacePickerBusy(text) {
+    workspaceList.innerHTML = '<div class="dir-empty">' + escapeHtml(text || "Loading...") + "</div>";
+    workspacePickerHint.textContent = text || "Loading...";
+  }
+
+  function renderWorkspaceError(message) {
+    workspaceList.innerHTML = '<div class="dir-empty error-text">' + escapeHtml(message) + "</div>";
+    workspacePickerHint.textContent = "Paste a valid existing folder path, then press Go.";
+  }
+
+  function directoryRow(label, path, actionLabel, onClick) {
+    var row = document.createElement("button");
+    row.type = "button";
+    row.className = "dir-row";
+    row.innerHTML = ''
+      + '<span class="dir-icon">&rsaquo;</span>'
+      + '<span class="dir-main"><strong>' + escapeHtml(label) + '</strong><small>' + escapeHtml(path) + '</small></span>'
+      + '<span class="dir-action">' + escapeHtml(actionLabel || "Open") + '</span>';
+    row.addEventListener("click", onClick);
+    return row;
+  }
+
+  function renderWorkspacePicker(payload) {
+    currentWorkspacePath = payload.path || "";
+    workspacePathInput.value = currentWorkspacePath;
+    workspaceRoots.innerHTML = "";
+    (payload.roots || []).forEach(function (root) {
+      var chip = document.createElement("button");
+      chip.type = "button";
+      chip.className = "root-chip";
+      chip.textContent = root.label || root.path;
+      chip.title = root.path || "";
+      chip.addEventListener("click", function () {
+        loadWorkspaceDirectory(root.path || "");
+      });
+      workspaceRoots.appendChild(chip);
+    });
+
+    workspaceList.innerHTML = "";
+    if (payload.parent) {
+      workspaceList.appendChild(directoryRow("..", payload.parent, "Parent", function () {
+        loadWorkspaceDirectory(payload.parent);
+      }));
+    }
+    (payload.entries || []).forEach(function (entry) {
+      workspaceList.appendChild(directoryRow(entry.name, entry.path, "Open", function () {
+        loadWorkspaceDirectory(entry.path);
+      }));
+    });
+    if (!payload.parent && !(payload.entries || []).length) {
+      workspaceList.innerHTML = '<div class="dir-empty">No readable child folders.</div>';
+    }
+    workspacePickerHint.textContent = payload.truncated
+      ? "Directory list was truncated. Paste a deeper path if needed."
+      : "Current folder will be used when you click Use this folder.";
+  }
+
+  async function loadWorkspaceDirectory(path) {
+    setWorkspacePickerBusy("Loading folders...");
+    try {
+      var url = "/api/workspace-directories";
+      if (path) url += "?path=" + encodeURIComponent(path);
+      var response = await fetch(url);
+      var payload = await response.json();
+      if (!response.ok || payload.error) {
+        renderWorkspaceError(payload.error || "Cannot open this folder.");
+        return;
+      }
+      renderWorkspacePicker(payload);
+    } catch (error) {
+      renderWorkspaceError(String(error));
+    }
+  }
+
+  runBtn.addEventListener("click", sendStart);
+  timeline.addEventListener("scroll", syncTimelineFollowMode);
+  timeline.addEventListener("wheel", function (event) {
+    if (event.deltaY < 0) autoFollowTimeline = false;
+  }, { passive: true });
+  timeline.addEventListener("touchmove", function () {
+    autoFollowTimeline = false;
+  }, { passive: true });
+  promptInput.addEventListener("keydown", function (event) {
+    if (event.isComposing) return;
+    if (event.key === "Enter" && !event.shiftKey && !event.ctrlKey && !event.metaKey) {
+      event.preventDefault();
+      sendStart();
+    }
+  });
+  promptInput.addEventListener("input", function () {
+    promptInput.style.height = "auto";
+    promptInput.style.height = Math.min(promptInput.scrollHeight, 180) + "px";
+  });
+  newBtn.addEventListener("click", function () {
+    if (ws && ws.readyState === WebSocket.OPEN) ws.send(JSON.stringify({ type: "new" }));
+    if (!running) {
+      promptInput.value = "";
+      images = [];
+      renderImages();
+      clearTimeline();
+      clearAskRequest();
+      conversationStarted = false;
+      setRunning(false, "Idle");
+    }
+  });
+  attachBtn.addEventListener("click", function () {
+    imageInput.click();
+  });
+  imageInput.addEventListener("change", function (event) { addImageFiles(event.target.files); });
+
+  pickWorkspaceBtn.addEventListener("click", function () {
+    openWorkspaceModal();
+  });
+
+  workspaceCloseBtn.addEventListener("click", closeWorkspaceModal);
+  workspaceModal.addEventListener("click", function (event) {
+    if (event.target === workspaceModal) closeWorkspaceModal();
+  });
+  workspaceGoBtn.addEventListener("click", function () {
+    loadWorkspaceDirectory(workspacePathInput.value.trim());
+  });
+  workspacePathInput.addEventListener("keydown", function (event) {
+    if (event.key === "Enter") {
+      event.preventDefault();
+      loadWorkspaceDirectory(workspacePathInput.value.trim());
+    }
+  });
+  workspaceUseBtn.addEventListener("click", function () {
+    if (!currentWorkspacePath) return;
+    setWorkspaceSelected(currentWorkspacePath);
+    closeWorkspaceModal();
+  });
+
+  ["dragenter", "dragover"].forEach(function (name) {
+    dropZone.addEventListener(name, function (event) {
+      event.preventDefault();
+      dropZone.classList.add("dragover");
+    });
+  });
+  ["dragleave", "drop"].forEach(function (name) {
+    dropZone.addEventListener(name, function (event) {
+      event.preventDefault();
+      dropZone.classList.remove("dragover");
+    });
+  });
+  dropZone.addEventListener("drop", function (event) {
+    addImageFiles(event.dataTransfer.files);
+  });
+  document.addEventListener("paste", function (event) {
+    var files = [];
+    Array.from(event.clipboardData ? event.clipboardData.items : []).forEach(function (item) {
+      if (item.kind === "file") {
+        var file = item.getAsFile();
+        if (file) files.push(file);
+      }
+    });
+    if (files.length) addImageFiles(files);
+  });
+
+  connect();
+})();
diff --git a/frontend/static/favicon.svg b/frontend/static/favicon.svg
new file mode 100644
index 0000000000000000000000000000000000000000..2bccaa0217f1e098ecbff18b123fe7c88f8be1b8
--- /dev/null
+++ b/frontend/static/favicon.svg
@@ -0,0 +1,10 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 64 64">
+  <rect width="64" height="64" rx="16" fill="#ffffff"/>
+  <text
+    x="32"
+    y="48"
+    text-anchor="middle"
+    font-size="46"
+    font-family="Apple Color Emoji, Segoe UI Emoji, Noto Color Emoji, sans-serif"
+  >🚀</text>
+</svg>
diff --git a/frontend/static/index.html b/frontend/static/index.html
new file mode 100644
index 0000000000000000000000000000000000000000..19844a718e225b465c2fceed974627e9c20c4731
--- /dev/null
+++ b/frontend/static/index.html
@@ -0,0 +1,75 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1" />
+    <title>ResearchHarness Chat</title>
+    <link rel="icon" type="image/svg+xml" href="/static/favicon.svg?v=rocket-1" />
+    <link rel="stylesheet" href="/static/app.css" />
+  </head>
+  <body>
+    <main class="chat-shell">
+      <header class="topbar">
+        <div class="brand">
+          <div class="logo">RH</div>
+          <div>
+            <strong>ResearchHarness</strong>
+            <span id="statusPill" class="status idle">Idle</span>
+          </div>
+        </div>
+        <div class="top-actions">
+          <button id="pickWorkspaceBtn" class="plain" type="button" hidden>Open workspace</button>
+          <button id="newBtn" class="plain" type="button">New chat</button>
+        </div>
+      </header>
+
+      <section id="workspaceStrip" class="workspace-strip">
+        <input id="workspaceInput" type="hidden" value="" />
+        <span id="workspaceMeta">Managed temporary workspace. Each chat uses an isolated runtime directory.</span>
+      </section>
+
+      <section id="timeline" class="messages">
+        <div class="welcome">
+          <h1>What should the agent do?</h1>
+          <p>Ask a question, attach images, and watch tool calls stream from an isolated temporary workspace.</p>
+        </div>
+      </section>
+
+      <footer class="composer-wrap">
+        <div id="imagePreview" class="image-preview"></div>
+        <div id="dropZone" class="composer">
+          <button id="attachBtn" class="icon-button" type="button" title="Click + to add one or more images">+</button>
+          <input id="imageInput" type="file" accept="image/*" multiple />
+          <textarea id="promptInput" rows="1" placeholder="Message ResearchHarness"></textarea>
+          <button id="runBtn" class="send-button" type="button">Run</button>
+        </div>
+        <p class="composer-hint">Enter sends. Ctrl+Enter or Shift+Enter inserts a newline. Click + to add one or more images; paste or drop images also works.</p>
+      </footer>
+    </main>
+
+    <section id="workspaceModal" class="modal hidden" role="dialog" aria-modal="true" aria-labelledby="workspaceModalTitle">
+      <div class="modal-card">
+        <header class="modal-head">
+          <div>
+            <h2 id="workspaceModalTitle">Open workspace</h2>
+            <p>Choose an existing local folder. Unicode paths are supported.</p>
+          </div>
+          <button id="workspaceCloseBtn" class="plain" type="button" aria-label="Close workspace picker">Close</button>
+        </header>
+        <div class="modal-path-row">
+          <input id="workspacePathInput" type="text" autocomplete="off" placeholder="Paste a folder path..." />
+          <button id="workspaceGoBtn" class="plain" type="button">Go</button>
+        </div>
+        <div id="workspaceRoots" class="workspace-roots"></div>
+        <div id="workspaceList" class="workspace-list"></div>
+        <footer class="modal-actions">
+          <span id="workspacePickerHint">Select a folder to use as the agent workspace.</span>
+          <button id="workspaceUseBtn" class="send-button" type="button">Use this folder</button>
+        </footer>
+      </div>
+    </section>
+    <script src="https://cdn.jsdelivr.net/npm/dompurify@3.2.6/dist/purify.min.js"></script>
+    <script src="https://cdn.jsdelivr.net/npm/marked@15.0.12/marked.min.js"></script>
+    <script src="/static/app.js"></script>
+  </body>
+</html>
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..35bb0171e234380ac41f180fee018d09c967c78d
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,8 @@
+fastapi==0.115.6
+json5==0.14.0
+openai==2.3.0
+Pillow==11.3.0
+requests==2.32.5
+structai==0.1.22
+tiktoken==0.12.0
+uvicorn==0.34.0
diff --git a/run_agent.py b/run_agent.py
new file mode 100644
index 0000000000000000000000000000000000000000..6ba72187473890cbd6ec41a7b09606b13462b199
--- /dev/null
+++ b/run_agent.py
@@ -0,0 +1,7 @@
+"""Thin top-level CLI entrypoint for the ResearchHarness agent."""
+
+from agent_base.react_agent import main
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/run_frontend.py b/run_frontend.py
new file mode 100644
index 0000000000000000000000000000000000000000..6528d113bf82d311b20928959375d3206056f54f
--- /dev/null
+++ b/run_frontend.py
@@ -0,0 +1,48 @@
+"""Launch the local ResearchHarness browser UI."""
+
+from __future__ import annotations
+
+import argparse
+import sys
+import threading
+import webbrowser
+
+import uvicorn
+
+from agent_base.utils import read_role_prompt_files
+from frontend.local_server import app, configure_frontend
+
+
+def main(argv: list[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(description="Run the local ResearchHarness frontend.")
+    parser.add_argument("--host", default="127.0.0.1", help="Host to bind. Default: 127.0.0.1")
+    parser.add_argument("--port", type=int, default=8765, help="Port to bind. Default: 8765")
+    parser.add_argument("--no-browser", action="store_true", help="Do not open the browser automatically.")
+    parser.add_argument("--trace-dir", help="Optional directory where frontend agent traces are written.")
+    parser.add_argument(
+        "--role-prompt-file",
+        action="append",
+        default=[],
+        dest="role_prompt_files",
+        metavar="PATH",
+        help="Append one role-specific prompt file to the frontend agent. May be passed multiple times.",
+    )
+    args = parser.parse_args(argv)
+
+    try:
+        role_prompt = read_role_prompt_files(args.role_prompt_files)
+        configure_frontend(role_prompt=role_prompt, trace_dir=args.trace_dir)
+    except (OSError, ValueError) as exc:
+        print(str(exc), file=sys.stderr)
+        return 1
+
+    url = f"http://{args.host}:{args.port}"
+    if not args.no_browser:
+        threading.Timer(0.8, lambda: webbrowser.open(url)).start()
+    print(f"ResearchHarness frontend: {url}")
+    uvicorn.run(app, host=args.host, port=args.port, reload=False)
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/run_server.py b/run_server.py
new file mode 100644
index 0000000000000000000000000000000000000000..e8e6eaf311854559c7b023629bce45a56ae976df
--- /dev/null
+++ b/run_server.py
@@ -0,0 +1,61 @@
+"""Run ResearchHarness as a minimal OpenAI-compatible API server."""
+
+from __future__ import annotations
+
+import argparse
+import sys
+
+from agent_base.utils import PROJECT_ROOT, MissingRequiredEnvError, load_dotenv, require_required_env
+from api.openai_server import serve
+
+
+def main(argv: list[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(description="Serve ResearchHarness through /v1/chat/completions.")
+    parser.add_argument(
+        "--api-runs-dir",
+        required=True,
+        dest="api_runs_dir",
+        help="Directory where the server creates one isolated subdirectory per request.",
+    )
+    parser.add_argument("--host", default="127.0.0.1", help="Host to bind. Defaults to 127.0.0.1.")
+    parser.add_argument("--port", type=int, default=8686, help="Port to bind. Defaults to 8686.")
+    parser.add_argument(
+        "--role-prompt-file",
+        action="append",
+        default=[],
+        dest="role_prompt_files",
+        help="Optional role prompt file appended to the base ResearchHarness prompt.",
+    )
+    parser.add_argument(
+        "--input-wrapper",
+        action=argparse.BooleanOptionalAction,
+        default=True,
+        help="Enable or disable the input LLM wrapper. Enabled by default.",
+    )
+    parser.add_argument(
+        "--output-wrapper",
+        action=argparse.BooleanOptionalAction,
+        default=True,
+        help="Enable or disable the output LLM wrapper. Enabled by default.",
+    )
+    args = parser.parse_args(argv)
+
+    load_dotenv(PROJECT_ROOT / ".env")
+    try:
+        require_required_env("ResearchHarness API server")
+        serve(
+            api_runs_dir=args.api_runs_dir,
+            host=args.host,
+            port=args.port,
+            role_prompt_files=list(args.role_prompt_files),
+            input_wrapper=args.input_wrapper,
+            output_wrapper=args.output_wrapper,
+        )
+    except (MissingRequiredEnvError, ValueError) as exc:
+        print(str(exc), file=sys.stderr)
+        return 1
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/traces/.gitkeep b/traces/.gitkeep
new file mode 100644
index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc
--- /dev/null
+++ b/traces/.gitkeep
@@ -0,0 +1 @@
+
diff --git a/workspace/.gitkeep b/workspace/.gitkeep
new file mode 100644
index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc
--- /dev/null
+++ b/workspace/.gitkeep
@@ -0,0 +1 @@
+