import shlex import shutil import subprocess from langchain.tools import tool _SUPPORTED_COMMANDS = { # Navigation "open", "goto", "navigate", "back", "forward", "reload", "close", "quit", "exit", # Snapshots / extraction "snapshot", "get", "find", "is", # Interactions "click", "dblclick", "focus", "type", "fill", "press", "key", "keydown", "keyup", "hover", "select", "check", "uncheck", "scroll", "scrollintoview", "scrollinto", "drag", "upload", # Utilities "wait", "screenshot", "pdf", "eval", "session", "set", "connect", "help", "--help", "-h", "install", } def _resolve_agent_browser_command() -> list[str] | None: """Prefer installed CLI; fallback to npx for zero-setup usage.""" if shutil.which("agent-browser"): return ["agent-browser"] if shutil.which("npx"): return ["npx", "-y", "agent-browser@latest"] return None def _build_cmd(command: str, session: str | None, json_output: bool) -> list[str]: parts = shlex.split(command) if not parts: raise ValueError("command is empty") action = parts[0] if action not in _SUPPORTED_COMMANDS: supported = ", ".join(sorted(_SUPPORTED_COMMANDS)) raise ValueError(f"unsupported agent-browser command: {action}. supported: {supported}") cmd_prefix = _resolve_agent_browser_command() if cmd_prefix is None: raise ValueError("agent-browser CLI not found. Install Node.js and run: npm i -g agent-browser && agent-browser install") cmd = cmd_prefix.copy() if session: cmd.extend(["--session", session]) cmd.extend(parts) if json_output and "--json" not in parts: cmd.append("--json") return cmd @tool("agent_browser", parse_docstring=True) def agent_browser_tool( command: str, session: str | None = None, json_output: bool = True, timeout_seconds: int = 60, ) -> str: """Run a single agent-browser CLI command (vercel-labs/agent-browser). Best-practice workflow: 1. open URL 2. snapshot -i -c 3. interact with refs like @e1 4. re-snapshot after page changes Args: command: Raw agent-browser command arguments, e.g. "open https://example.com" or "snapshot -i -c". session: Optional isolated browser session name. json_output: Add --json automatically for machine-readable output. timeout_seconds: Max command runtime in seconds. """ try: cmd = _build_cmd(command=command, session=session, json_output=json_output) except ValueError as e: return f"Error: {e}" try: result = subprocess.run( cmd, capture_output=True, text=True, check=False, timeout=timeout_seconds, ) except subprocess.TimeoutExpired: return f"Error: agent-browser command timed out after {timeout_seconds}s" except Exception as e: return f"Error: failed to execute agent-browser command: {e}" if result.returncode != 0: stderr = (result.stderr or "").strip() stdout = (result.stdout or "").strip() detail = stderr if stderr else stdout return f"Error: agent-browser exited with code {result.returncode}. {detail}" return (result.stdout or "").strip()