| """Agent-facing tool: respond to a native JS dialog captured by the CDP supervisor. |
| |
| This tool is response-only — the agent first reads ``pending_dialogs`` from |
| ``browser_snapshot`` output, then calls ``browser_dialog(action=...)`` to |
| accept or dismiss. |
| |
| Gated on the same ``_browser_cdp_check`` as ``browser_cdp`` so it only |
| appears when a CDP endpoint is reachable (Browserbase with a |
| ``connectUrl``, local Chrome via ``/browser connect``, or |
| ``browser.cdp_url`` set in config). |
| |
| See ``website/docs/developer-guide/browser-supervisor.md`` for the full |
| design. |
| """ |
|
|
| from __future__ import annotations |
|
|
| import json |
| import logging |
| from typing import Any, Dict, Optional |
|
|
| from tools.browser_supervisor import SUPERVISOR_REGISTRY |
| from tools.registry import registry |
|
|
| logger = logging.getLogger(__name__) |
|
|
|
|
| BROWSER_DIALOG_SCHEMA: Dict[str, Any] = { |
| "name": "browser_dialog", |
| "description": ( |
| "Respond to a native JavaScript dialog (alert / confirm / prompt / " |
| "beforeunload) that is currently blocking the page.\n\n" |
| "**Workflow:** call ``browser_snapshot`` first — if a dialog is open, " |
| "it appears in the ``pending_dialogs`` field with ``id``, ``type``, " |
| "and ``message``. Then call this tool with ``action='accept'`` or " |
| "``action='dismiss'``.\n\n" |
| "**Prompt dialogs:** pass ``prompt_text`` to supply the response " |
| "string. Ignored for alert/confirm/beforeunload.\n\n" |
| "**Multiple dialogs:** if more than one dialog is queued (rare — " |
| "happens when a second dialog fires while the first is still open), " |
| "pass ``dialog_id`` from the snapshot to disambiguate.\n\n" |
| "**Availability:** only present when a CDP-capable backend is " |
| "attached — Browserbase sessions, local Chrome via " |
| "``/browser connect``, or ``browser.cdp_url`` in config.yaml. " |
| "Not available on Camofox (REST-only) or the default Playwright " |
| "local browser (CDP port is hidden)." |
| ), |
| "parameters": { |
| "type": "object", |
| "properties": { |
| "action": { |
| "type": "string", |
| "enum": ["accept", "dismiss"], |
| "description": ( |
| "'accept' clicks OK / returns the prompt text. " |
| "'dismiss' clicks Cancel / returns null from prompt(). " |
| "For ``beforeunload`` dialogs: 'accept' allows the " |
| "navigation, 'dismiss' keeps the page." |
| ), |
| }, |
| "prompt_text": { |
| "type": "string", |
| "description": ( |
| "Response string for a ``prompt()`` dialog. Ignored for " |
| "other dialog types. Defaults to empty string." |
| ), |
| }, |
| "dialog_id": { |
| "type": "string", |
| "description": ( |
| "Specific dialog to respond to, from " |
| "``browser_snapshot.pending_dialogs[].id``. Required " |
| "only when multiple dialogs are queued." |
| ), |
| }, |
| }, |
| "required": ["action"], |
| }, |
| } |
|
|
|
|
| def browser_dialog( |
| action: str, |
| prompt_text: Optional[str] = None, |
| dialog_id: Optional[str] = None, |
| task_id: Optional[str] = None, |
| ) -> str: |
| """Respond to a pending dialog on the active task's CDP supervisor.""" |
| effective_task_id = task_id or "default" |
| supervisor = SUPERVISOR_REGISTRY.get(effective_task_id) |
| if supervisor is None: |
| return json.dumps( |
| { |
| "success": False, |
| "error": ( |
| "No CDP supervisor is attached to this task. Either the " |
| "browser backend doesn't expose CDP (Camofox, default " |
| "Playwright) or no browser session has been started yet. " |
| "Call browser_navigate or /browser connect first." |
| ), |
| } |
| ) |
|
|
| result = supervisor.respond_to_dialog( |
| action=action, |
| prompt_text=prompt_text, |
| dialog_id=dialog_id, |
| ) |
| if result.get("ok"): |
| return json.dumps( |
| { |
| "success": True, |
| "action": action, |
| "dialog": result.get("dialog", {}), |
| } |
| ) |
| return json.dumps({"success": False, "error": result.get("error", "unknown error")}) |
|
|
|
|
| def _browser_dialog_check() -> bool: |
| """Gate: same as ``browser_cdp`` — only offered when CDP is reachable. |
| |
| Kept identical so the two tools appear and disappear together. The |
| supervisor itself is started lazily by ``browser_navigate`` / |
| ``/browser connect`` / Browserbase session creation, so a reachable |
| CDP URL is enough to commit to showing the tool. |
| """ |
| try: |
| from tools.browser_cdp_tool import _browser_cdp_check |
| except Exception as exc: |
| logger.debug("browser_dialog check: browser_cdp_tool import failed: %s", exc) |
| return False |
| return _browser_cdp_check() |
|
|
|
|
| registry.register( |
| name="browser_dialog", |
| toolset="browser-cdp", |
| schema=BROWSER_DIALOG_SCHEMA, |
| handler=lambda args, **kw: browser_dialog( |
| action=args.get("action", ""), |
| prompt_text=args.get("prompt_text"), |
| dialog_id=args.get("dialog_id"), |
| task_id=kw.get("task_id"), |
| ), |
| check_fn=_browser_dialog_check, |
| emoji="💬", |
| ) |
|
|