pjpjq's picture
Fix web search fallback and integrate vercel agent-browser tool
9c0e837
import shlex
import shutil
import subprocess
from langchain.tools import tool
_SUPPORTED_COMMANDS = {
# Navigation
"open",
"goto",
"navigate",
"back",
"forward",
"reload",
"close",
"quit",
"exit",
# Snapshots / extraction
"snapshot",
"get",
"find",
"is",
# Interactions
"click",
"dblclick",
"focus",
"type",
"fill",
"press",
"key",
"keydown",
"keyup",
"hover",
"select",
"check",
"uncheck",
"scroll",
"scrollintoview",
"scrollinto",
"drag",
"upload",
# Utilities
"wait",
"screenshot",
"pdf",
"eval",
"session",
"set",
"connect",
"help",
"--help",
"-h",
"install",
}
def _resolve_agent_browser_command() -> list[str] | None:
"""Prefer installed CLI; fallback to npx for zero-setup usage."""
if shutil.which("agent-browser"):
return ["agent-browser"]
if shutil.which("npx"):
return ["npx", "-y", "agent-browser@latest"]
return None
def _build_cmd(command: str, session: str | None, json_output: bool) -> list[str]:
parts = shlex.split(command)
if not parts:
raise ValueError("command is empty")
action = parts[0]
if action not in _SUPPORTED_COMMANDS:
supported = ", ".join(sorted(_SUPPORTED_COMMANDS))
raise ValueError(f"unsupported agent-browser command: {action}. supported: {supported}")
cmd_prefix = _resolve_agent_browser_command()
if cmd_prefix is None:
raise ValueError("agent-browser CLI not found. Install Node.js and run: npm i -g agent-browser && agent-browser install")
cmd = cmd_prefix.copy()
if session:
cmd.extend(["--session", session])
cmd.extend(parts)
if json_output and "--json" not in parts:
cmd.append("--json")
return cmd
@tool("agent_browser", parse_docstring=True)
def agent_browser_tool(
command: str,
session: str | None = None,
json_output: bool = True,
timeout_seconds: int = 60,
) -> str:
"""Run a single agent-browser CLI command (vercel-labs/agent-browser).
Best-practice workflow:
1. open URL
2. snapshot -i -c
3. interact with refs like @e1
4. re-snapshot after page changes
Args:
command: Raw agent-browser command arguments, e.g. "open https://example.com" or "snapshot -i -c".
session: Optional isolated browser session name.
json_output: Add --json automatically for machine-readable output.
timeout_seconds: Max command runtime in seconds.
"""
try:
cmd = _build_cmd(command=command, session=session, json_output=json_output)
except ValueError as e:
return f"Error: {e}"
try:
result = subprocess.run(
cmd,
capture_output=True,
text=True,
check=False,
timeout=timeout_seconds,
)
except subprocess.TimeoutExpired:
return f"Error: agent-browser command timed out after {timeout_seconds}s"
except Exception as e:
return f"Error: failed to execute agent-browser command: {e}"
if result.returncode != 0:
stderr = (result.stderr or "").strip()
stdout = (result.stdout or "").strip()
detail = stderr if stderr else stdout
return f"Error: agent-browser exited with code {result.returncode}. {detail}"
return (result.stdout or "").strip()