| import shlex |
| import shutil |
| import subprocess |
|
|
| from langchain.tools import tool |
|
|
| _SUPPORTED_COMMANDS = { |
| |
| "open", |
| "goto", |
| "navigate", |
| "back", |
| "forward", |
| "reload", |
| "close", |
| "quit", |
| "exit", |
| |
| "snapshot", |
| "get", |
| "find", |
| "is", |
| |
| "click", |
| "dblclick", |
| "focus", |
| "type", |
| "fill", |
| "press", |
| "key", |
| "keydown", |
| "keyup", |
| "hover", |
| "select", |
| "check", |
| "uncheck", |
| "scroll", |
| "scrollintoview", |
| "scrollinto", |
| "drag", |
| "upload", |
| |
| "wait", |
| "screenshot", |
| "pdf", |
| "eval", |
| "session", |
| "set", |
| "connect", |
| "help", |
| "--help", |
| "-h", |
| "install", |
| } |
|
|
|
|
| def _resolve_agent_browser_command() -> list[str] | None: |
| """Prefer installed CLI; fallback to npx for zero-setup usage.""" |
| if shutil.which("agent-browser"): |
| return ["agent-browser"] |
| if shutil.which("npx"): |
| return ["npx", "-y", "agent-browser@latest"] |
| return None |
|
|
|
|
| def _build_cmd(command: str, session: str | None, json_output: bool) -> list[str]: |
| parts = shlex.split(command) |
| if not parts: |
| raise ValueError("command is empty") |
|
|
| action = parts[0] |
| if action not in _SUPPORTED_COMMANDS: |
| supported = ", ".join(sorted(_SUPPORTED_COMMANDS)) |
| raise ValueError(f"unsupported agent-browser command: {action}. supported: {supported}") |
|
|
| cmd_prefix = _resolve_agent_browser_command() |
| if cmd_prefix is None: |
| raise ValueError("agent-browser CLI not found. Install Node.js and run: npm i -g agent-browser && agent-browser install") |
|
|
| cmd = cmd_prefix.copy() |
| if session: |
| cmd.extend(["--session", session]) |
|
|
| cmd.extend(parts) |
| if json_output and "--json" not in parts: |
| cmd.append("--json") |
| return cmd |
|
|
|
|
| @tool("agent_browser", parse_docstring=True) |
| def agent_browser_tool( |
| command: str, |
| session: str | None = None, |
| json_output: bool = True, |
| timeout_seconds: int = 60, |
| ) -> str: |
| """Run a single agent-browser CLI command (vercel-labs/agent-browser). |
| |
| Best-practice workflow: |
| 1. open URL |
| 2. snapshot -i -c |
| 3. interact with refs like @e1 |
| 4. re-snapshot after page changes |
| |
| Args: |
| command: Raw agent-browser command arguments, e.g. "open https://example.com" or "snapshot -i -c". |
| session: Optional isolated browser session name. |
| json_output: Add --json automatically for machine-readable output. |
| timeout_seconds: Max command runtime in seconds. |
| """ |
| try: |
| cmd = _build_cmd(command=command, session=session, json_output=json_output) |
| except ValueError as e: |
| return f"Error: {e}" |
|
|
| try: |
| result = subprocess.run( |
| cmd, |
| capture_output=True, |
| text=True, |
| check=False, |
| timeout=timeout_seconds, |
| ) |
| except subprocess.TimeoutExpired: |
| return f"Error: agent-browser command timed out after {timeout_seconds}s" |
| except Exception as e: |
| return f"Error: failed to execute agent-browser command: {e}" |
|
|
| if result.returncode != 0: |
| stderr = (result.stderr or "").strip() |
| stdout = (result.stdout or "").strip() |
| detail = stderr if stderr else stdout |
| return f"Error: agent-browser exited with code {result.returncode}. {detail}" |
|
|
| return (result.stdout or "").strip() |
|
|