| """Integration tests for tools.browser_supervisor. |
| |
| Exercises the supervisor end-to-end against a real local Chrome |
| (``--remote-debugging-port``). Skipped when Chrome is not installed |
| — these are the tests that actually verify the CDP wire protocol |
| works, since mock-CDP unit tests can only prove the happy paths we |
| thought to model. |
| |
| Run manually: |
| scripts/run_tests.sh tests/tools/test_browser_supervisor.py |
| |
| Automated: skipped in CI unless ``HERMES_E2E_BROWSER=1`` is set. |
| """ |
|
|
| from __future__ import annotations |
|
|
| import asyncio |
| import base64 |
| import json |
| import os |
| import shutil |
| import subprocess |
| import tempfile |
| import time |
|
|
| import pytest |
|
|
|
|
| pytestmark = pytest.mark.skipif( |
| not shutil.which("google-chrome") and not shutil.which("chromium"), |
| reason="Chrome/Chromium not installed", |
| ) |
|
|
|
|
| def _find_chrome() -> str: |
| for candidate in ("google-chrome", "chromium", "chromium-browser"): |
| path = shutil.which(candidate) |
| if path: |
| return path |
| pytest.skip("no Chrome binary found") |
|
|
|
|
| @pytest.fixture |
| def chrome_cdp(worker_id): |
| """Start a headless Chrome with --remote-debugging-port, yield its WS URL. |
| |
| Uses a unique port per xdist worker to avoid cross-worker collisions. |
| Always launches with ``--site-per-process`` so cross-origin iframes |
| become real OOPIFs (needed by the iframe interaction tests). |
| """ |
| import socket |
|
|
| |
| if worker_id == "master": |
| port_offset = 0 |
| else: |
| port_offset = int(worker_id.lstrip("gw")) |
| port = 9225 + port_offset |
| profile = tempfile.mkdtemp(prefix="hermes-supervisor-test-") |
| proc = subprocess.Popen( |
| [ |
| _find_chrome(), |
| f"--remote-debugging-port={port}", |
| f"--user-data-dir={profile}", |
| "--no-first-run", |
| "--no-default-browser-check", |
| "--headless=new", |
| "--disable-gpu", |
| "--site-per-process", |
| ], |
| stdout=subprocess.DEVNULL, |
| stderr=subprocess.DEVNULL, |
| ) |
|
|
| ws_url = None |
| deadline = time.monotonic() + 15 |
| while time.monotonic() < deadline: |
| try: |
| import urllib.request |
| with urllib.request.urlopen( |
| f"http://127.0.0.1:{port}/json/version", timeout=1 |
| ) as r: |
| info = json.loads(r.read().decode()) |
| ws_url = info["webSocketDebuggerUrl"] |
| break |
| except Exception: |
| time.sleep(0.25) |
| if ws_url is None: |
| proc.terminate() |
| proc.wait(timeout=5) |
| shutil.rmtree(profile, ignore_errors=True) |
| pytest.skip("Chrome didn't expose CDP in time") |
|
|
| yield ws_url, port |
|
|
| proc.terminate() |
| try: |
| proc.wait(timeout=3) |
| except Exception: |
| proc.kill() |
| shutil.rmtree(profile, ignore_errors=True) |
|
|
|
|
| def _test_page_url() -> str: |
| html = """<!doctype html> |
| <html><head><title>Supervisor pytest</title></head><body> |
| <h1>Supervisor pytest</h1> |
| <iframe id="inner" srcdoc="<body><h2>frame-marker</h2></body>" width="400" height="100"></iframe> |
| </body></html>""" |
| return "data:text/html;base64," + base64.b64encode(html.encode()).decode() |
|
|
|
|
| def _fire_on_page(cdp_url: str, expression: str) -> None: |
| """Navigate the first page target to a data URL and fire `expression`.""" |
| import asyncio |
| import websockets as _ws_mod |
|
|
| async def run(): |
| async with _ws_mod.connect(cdp_url, max_size=50 * 1024 * 1024) as ws: |
| next_id = [1] |
|
|
| async def call(method, params=None, session_id=None): |
| cid = next_id[0] |
| next_id[0] += 1 |
| p = {"id": cid, "method": method} |
| if params: |
| p["params"] = params |
| if session_id: |
| p["sessionId"] = session_id |
| await ws.send(json.dumps(p)) |
| async for raw in ws: |
| m = json.loads(raw) |
| if m.get("id") == cid: |
| return m |
|
|
| targets = (await call("Target.getTargets"))["result"]["targetInfos"] |
| page = next(t for t in targets if t.get("type") == "page") |
| attach = await call( |
| "Target.attachToTarget", {"targetId": page["targetId"], "flatten": True} |
| ) |
| sid = attach["result"]["sessionId"] |
| await call("Page.navigate", {"url": _test_page_url()}, session_id=sid) |
| await asyncio.sleep(1.5) |
| await call( |
| "Runtime.evaluate", |
| {"expression": expression, "returnByValue": True}, |
| session_id=sid, |
| ) |
|
|
| asyncio.run(run()) |
|
|
|
|
| @pytest.fixture |
| def supervisor_registry(): |
| """Yield the global registry and tear down any supervisors after the test.""" |
| from tools.browser_supervisor import SUPERVISOR_REGISTRY |
|
|
| yield SUPERVISOR_REGISTRY |
| SUPERVISOR_REGISTRY.stop_all() |
|
|
|
|
| def _wait_for_dialog(supervisor, timeout: float = 5.0): |
| deadline = time.monotonic() + timeout |
| while time.monotonic() < deadline: |
| snap = supervisor.snapshot() |
| if snap.pending_dialogs: |
| return snap.pending_dialogs |
| time.sleep(0.1) |
| return () |
|
|
|
|
| def test_supervisor_start_and_snapshot(chrome_cdp, supervisor_registry): |
| """Supervisor attaches, exposes an active snapshot with a top frame.""" |
| cdp_url, _port = chrome_cdp |
| supervisor = supervisor_registry.get_or_start(task_id="pytest-1", cdp_url=cdp_url) |
|
|
| |
| _fire_on_page(cdp_url, "/* no dialog */ void 0") |
|
|
| |
| time.sleep(1.0) |
| snap = supervisor.snapshot() |
| assert snap.active is True |
| assert snap.task_id == "pytest-1" |
| assert snap.pending_dialogs == () |
| |
| assert snap.frame_tree.get("top") is not None |
|
|
|
|
| def test_main_frame_alert_detection_and_dismiss(chrome_cdp, supervisor_registry): |
| """alert() in the main frame surfaces and can be dismissed via the sync API.""" |
| cdp_url, _port = chrome_cdp |
| supervisor = supervisor_registry.get_or_start(task_id="pytest-2", cdp_url=cdp_url) |
|
|
| _fire_on_page(cdp_url, "setTimeout(() => alert('PYTEST-MAIN-ALERT'), 50)") |
| dialogs = _wait_for_dialog(supervisor) |
| assert dialogs, "no dialog detected" |
| d = dialogs[0] |
| assert d.type == "alert" |
| assert "PYTEST-MAIN-ALERT" in d.message |
|
|
| result = supervisor.respond_to_dialog("dismiss") |
| assert result["ok"] is True |
| |
| time.sleep(0.3) |
| assert supervisor.snapshot().pending_dialogs == () |
|
|
|
|
| def test_iframe_contentwindow_alert(chrome_cdp, supervisor_registry): |
| """alert() fired from inside a same-origin iframe surfaces too.""" |
| cdp_url, _port = chrome_cdp |
| supervisor = supervisor_registry.get_or_start(task_id="pytest-3", cdp_url=cdp_url) |
|
|
| _fire_on_page( |
| cdp_url, |
| "setTimeout(() => document.querySelector('#inner').contentWindow.alert('PYTEST-IFRAME'), 50)", |
| ) |
| dialogs = _wait_for_dialog(supervisor) |
| assert dialogs, "no iframe dialog detected" |
| assert any("PYTEST-IFRAME" in d.message for d in dialogs) |
|
|
| result = supervisor.respond_to_dialog("accept") |
| assert result["ok"] is True |
|
|
|
|
| def test_prompt_dialog_with_response_text(chrome_cdp, supervisor_registry): |
| """prompt() gets our prompt_text back inside the page.""" |
| cdp_url, _port = chrome_cdp |
| supervisor = supervisor_registry.get_or_start(task_id="pytest-4", cdp_url=cdp_url) |
|
|
| |
| _fire_on_page( |
| cdp_url, |
| "setTimeout(() => { window.__promptResult = prompt('give me a token', 'default-x'); }, 50)", |
| ) |
| dialogs = _wait_for_dialog(supervisor) |
| assert dialogs |
| d = dialogs[0] |
| assert d.type == "prompt" |
| assert d.default_prompt == "default-x" |
|
|
| result = supervisor.respond_to_dialog("accept", prompt_text="PYTEST-PROMPT-REPLY") |
| assert result["ok"] is True |
|
|
|
|
| def test_respond_with_no_pending_dialog_errors_cleanly(chrome_cdp, supervisor_registry): |
| """Calling respond_to_dialog when nothing is pending returns a clean error, not an exception.""" |
| cdp_url, _port = chrome_cdp |
| supervisor = supervisor_registry.get_or_start(task_id="pytest-5", cdp_url=cdp_url) |
|
|
| result = supervisor.respond_to_dialog("accept") |
| assert result["ok"] is False |
| assert "no dialog" in result["error"].lower() |
|
|
|
|
| def test_auto_dismiss_policy(chrome_cdp, supervisor_registry): |
| """auto_dismiss policy clears dialogs without the agent responding.""" |
| from tools.browser_supervisor import DIALOG_POLICY_AUTO_DISMISS |
|
|
| cdp_url, _port = chrome_cdp |
| supervisor = supervisor_registry.get_or_start( |
| task_id="pytest-6", |
| cdp_url=cdp_url, |
| dialog_policy=DIALOG_POLICY_AUTO_DISMISS, |
| ) |
|
|
| _fire_on_page(cdp_url, "setTimeout(() => alert('PYTEST-AUTO-DISMISS'), 50)") |
| |
| time.sleep(2.0) |
| snap = supervisor.snapshot() |
| |
| assert snap.pending_dialogs == () |
|
|
|
|
| def test_registry_idempotent_get_or_start(chrome_cdp, supervisor_registry): |
| """Calling get_or_start twice with the same (task, url) returns the same instance.""" |
| cdp_url, _port = chrome_cdp |
| a = supervisor_registry.get_or_start(task_id="pytest-idem", cdp_url=cdp_url) |
| b = supervisor_registry.get_or_start(task_id="pytest-idem", cdp_url=cdp_url) |
| assert a is b |
|
|
|
|
| def test_registry_stop(chrome_cdp, supervisor_registry): |
| """stop() tears down the supervisor and snapshot reports inactive.""" |
| cdp_url, _port = chrome_cdp |
| supervisor = supervisor_registry.get_or_start(task_id="pytest-stop", cdp_url=cdp_url) |
| assert supervisor.snapshot().active is True |
| supervisor_registry.stop("pytest-stop") |
| |
| assert supervisor.snapshot().active is False |
|
|
|
|
| def test_browser_dialog_tool_no_supervisor(): |
| """browser_dialog returns a clear error when no supervisor is attached.""" |
| from tools.browser_dialog_tool import browser_dialog |
|
|
| r = json.loads(browser_dialog(action="accept", task_id="nonexistent-task")) |
| assert r["success"] is False |
| assert "No CDP supervisor" in r["error"] |
|
|
|
|
| def test_browser_dialog_invalid_action(chrome_cdp, supervisor_registry): |
| """browser_dialog rejects actions that aren't accept/dismiss.""" |
| from tools.browser_dialog_tool import browser_dialog |
|
|
| cdp_url, _port = chrome_cdp |
| supervisor_registry.get_or_start(task_id="pytest-bad-action", cdp_url=cdp_url) |
|
|
| r = json.loads(browser_dialog(action="eat", task_id="pytest-bad-action")) |
| assert r["success"] is False |
| assert "accept" in r["error"] and "dismiss" in r["error"] |
|
|
|
|
| def test_recent_dialogs_ring_buffer(chrome_cdp, supervisor_registry): |
| """Closed dialogs show up in recent_dialogs with a closed_by tag.""" |
| from tools.browser_supervisor import DIALOG_POLICY_AUTO_DISMISS |
|
|
| cdp_url, _port = chrome_cdp |
| sv = supervisor_registry.get_or_start( |
| task_id="pytest-recent", |
| cdp_url=cdp_url, |
| dialog_policy=DIALOG_POLICY_AUTO_DISMISS, |
| ) |
|
|
| _fire_on_page(cdp_url, "setTimeout(() => alert('PYTEST-RECENT'), 50)") |
| |
| deadline = time.time() + 5 |
| while time.time() < deadline: |
| recent = sv.snapshot().recent_dialogs |
| if recent and any("PYTEST-RECENT" in r.message for r in recent): |
| break |
| time.sleep(0.1) |
|
|
| recent = sv.snapshot().recent_dialogs |
| assert recent, "recent_dialogs should contain the auto-dismissed dialog" |
| match = next((r for r in recent if "PYTEST-RECENT" in r.message), None) |
| assert match is not None |
| assert match.type == "alert" |
| assert match.closed_by == "auto_policy" |
| assert match.closed_at >= match.opened_at |
|
|
|
|
| def test_browser_dialog_tool_end_to_end(chrome_cdp, supervisor_registry): |
| """Full agent-path check: fire an alert, call the tool handler directly.""" |
| from tools.browser_dialog_tool import browser_dialog |
|
|
| cdp_url, _port = chrome_cdp |
| supervisor = supervisor_registry.get_or_start(task_id="pytest-tool", cdp_url=cdp_url) |
|
|
| _fire_on_page(cdp_url, "setTimeout(() => alert('PYTEST-TOOL-END2END'), 50)") |
| assert _wait_for_dialog(supervisor), "no dialog detected via wait_for_dialog" |
|
|
| r = json.loads(browser_dialog(action="dismiss", task_id="pytest-tool")) |
| assert r["success"] is True |
| assert r["action"] == "dismiss" |
| assert "PYTEST-TOOL-END2END" in r["dialog"]["message"] |
|
|
|
|
| def test_browser_cdp_frame_id_routes_via_supervisor(chrome_cdp, supervisor_registry, monkeypatch): |
| """browser_cdp(frame_id=...) routes Runtime.evaluate through supervisor. |
| |
| Mocks the supervisor with a known frame and verifies browser_cdp sends |
| the call via the supervisor's loop rather than opening a stateless |
| WebSocket. This is the path that makes cross-origin iframe eval work |
| on Browserbase. |
| """ |
| cdp_url, _port = chrome_cdp |
| sv = supervisor_registry.get_or_start(task_id="frame-id-test", cdp_url=cdp_url) |
| assert sv.snapshot().active |
|
|
| |
| |
| |
| import tools.browser_supervisor as _bs |
| with sv._state_lock: |
| fake_frame_id = "FAKE-FRAME-001" |
| sv._frames[fake_frame_id] = _bs.FrameInfo( |
| frame_id=fake_frame_id, |
| url="fake://", |
| origin="", |
| parent_frame_id=None, |
| is_oopif=True, |
| cdp_session_id=sv._page_session_id, |
| ) |
|
|
| |
| |
| from tools.browser_cdp_tool import browser_cdp |
| result = browser_cdp( |
| method="Runtime.evaluate", |
| params={"expression": "1 + 1", "returnByValue": True}, |
| frame_id=fake_frame_id, |
| task_id="frame-id-test", |
| ) |
| r = json.loads(result) |
| assert r.get("success") is True, f"expected success, got: {r}" |
| assert r.get("frame_id") == fake_frame_id |
| assert r.get("session_id") == sv._page_session_id |
| value = r.get("result", {}).get("result", {}).get("value") |
| assert value == 2, f"expected 2, got {value!r}" |
|
|
|
|
| def test_browser_cdp_frame_id_real_oopif_smoke_documented(): |
| """Document that real-OOPIF E2E was manually verified — see PR #14540. |
| |
| A pytest version of this hits an asyncio version-quirk in the venv |
| (3.11) that doesn't show up in standalone scripts (3.13 + system |
| websockets). The mechanism IS verified end-to-end by two separate |
| smoke scripts in /tmp/dialog-iframe-test/: |
| |
| * smoke_local_oopif.py — local Chrome + 2 http servers on |
| different hostnames + --site-per-process. Outer page on |
| localhost:18905, iframe src=http://127.0.0.1:18906. Calls |
| browser_cdp(method='Runtime.evaluate', frame_id=<OOPIF>) and |
| verifies inner page's title comes back from the OOPIF session. |
| PASSED on 2026-04-23: iframe document.title = 'INNER-FRAME-XYZ' |
| |
| * smoke_bb_iframe_agent_path.py — Browserbase + real cross-origin |
| iframe (src=https://example.com/). Same browser_cdp(frame_id=) |
| path. PASSED on 2026-04-23: iframe document.title = |
| 'Example Domain' |
| |
| The test_browser_cdp_frame_id_routes_via_supervisor pytest covers |
| the supervisor-routing plumbing with a fake injected OOPIF. |
| """ |
| pytest.skip( |
| "Real-OOPIF E2E verified manually with smoke_local_oopif.py and " |
| "smoke_bb_iframe_agent_path.py — pytest version hits an asyncio " |
| "version quirk between venv (3.11) and standalone (3.13). " |
| "Smoke logs preserved in /tmp/dialog-iframe-test/." |
| ) |
|
|
|
|
| def test_browser_cdp_frame_id_missing_supervisor(): |
| """browser_cdp(frame_id=...) errors cleanly when no supervisor is attached.""" |
| from tools.browser_cdp_tool import browser_cdp |
| result = browser_cdp( |
| method="Runtime.evaluate", |
| params={"expression": "1"}, |
| frame_id="any-frame-id", |
| task_id="no-such-task", |
| ) |
| r = json.loads(result) |
| assert r.get("success") is not True |
| assert "supervisor" in (r.get("error") or "").lower() |
|
|
|
|
| def test_browser_cdp_frame_id_not_in_frame_tree(chrome_cdp, supervisor_registry): |
| """browser_cdp(frame_id=...) errors when the frame_id isn't known.""" |
| cdp_url, _port = chrome_cdp |
| sv = supervisor_registry.get_or_start(task_id="bad-frame-test", cdp_url=cdp_url) |
| assert sv.snapshot().active |
|
|
| from tools.browser_cdp_tool import browser_cdp |
| result = browser_cdp( |
| method="Runtime.evaluate", |
| params={"expression": "1"}, |
| frame_id="nonexistent-frame", |
| task_id="bad-frame-test", |
| ) |
| r = json.loads(result) |
| assert r.get("success") is not True |
| assert "not found" in (r.get("error") or "").lower() |
|
|
|
|
| def test_bridge_captures_prompt_and_returns_reply_text(chrome_cdp, supervisor_registry): |
| """End-to-end: agent's prompt_text round-trips INTO the page's JS. |
| |
| Proves the bridge isn't just catching dialogs — it's properly round- |
| tripping our reply back into the page via Fetch.fulfillRequest, so |
| ``prompt()`` actually returns the agent-supplied string to the page. |
| """ |
| import base64 as _b64 |
|
|
| cdp_url, _port = chrome_cdp |
| sv = supervisor_registry.get_or_start(task_id="pytest-bridge-prompt", cdp_url=cdp_url) |
|
|
| |
| html = """<!doctype html><html><body><script> |
| window.__ret = null; |
| setTimeout(() => { window.__ret = prompt('PROMPT-MSG', 'default'); }, 50); |
| </script></body></html>""" |
| url = "data:text/html;base64," + _b64.b64encode(html.encode()).decode() |
|
|
| import asyncio as _asyncio |
| import websockets as _ws_mod |
|
|
| async def nav_and_read(): |
| async with _ws_mod.connect(cdp_url, max_size=50 * 1024 * 1024) as ws: |
| nid = [1] |
| pending: dict = {} |
|
|
| async def reader_fn(): |
| try: |
| async for raw in ws: |
| m = json.loads(raw) |
| if "id" in m: |
| fut = pending.pop(m["id"], None) |
| if fut and not fut.done(): |
| fut.set_result(m) |
| except Exception: |
| pass |
|
|
| rd = _asyncio.create_task(reader_fn()) |
|
|
| async def call(method, params=None, sid=None): |
| c = nid[0]; nid[0] += 1 |
| p = {"id": c, "method": method} |
| if params: p["params"] = params |
| if sid: p["sessionId"] = sid |
| fut = _asyncio.get_event_loop().create_future() |
| pending[c] = fut |
| await ws.send(json.dumps(p)) |
| return await _asyncio.wait_for(fut, timeout=20) |
|
|
| try: |
| t = (await call("Target.getTargets"))["result"]["targetInfos"] |
| pg = next(x for x in t if x.get("type") == "page") |
| a = await call("Target.attachToTarget", {"targetId": pg["targetId"], "flatten": True}) |
| sid = a["result"]["sessionId"] |
|
|
| |
| nav_id = nid[0]; nid[0] += 1 |
| nav_fut = _asyncio.get_event_loop().create_future() |
| pending[nav_id] = nav_fut |
| await ws.send(json.dumps({"id": nav_id, "method": "Page.navigate", "params": {"url": url}, "sessionId": sid})) |
|
|
| |
| deadline = time.monotonic() + 10 |
| dialog = None |
| while time.monotonic() < deadline: |
| snap = sv.snapshot() |
| if snap.pending_dialogs: |
| dialog = snap.pending_dialogs[0] |
| break |
| await _asyncio.sleep(0.05) |
| assert dialog is not None, "no dialog captured" |
| assert dialog.bridge_request_id is not None, "expected bridge path" |
| assert dialog.type == "prompt" |
|
|
| |
| resp = sv.respond_to_dialog("accept", prompt_text="AGENT-SUPPLIED-REPLY") |
| assert resp["ok"] is True |
|
|
| |
| try: |
| await _asyncio.wait_for(nav_fut, timeout=10) |
| except Exception: |
| pass |
| await _asyncio.sleep(0.5) |
| r = await call( |
| "Runtime.evaluate", |
| {"expression": "window.__ret", "returnByValue": True}, |
| sid=sid, |
| ) |
| return r.get("result", {}).get("result", {}).get("value") |
| finally: |
| rd.cancel() |
| try: await rd |
| except BaseException: pass |
|
|
| value = asyncio.run(nav_and_read()) |
| assert value == "AGENT-SUPPLIED-REPLY", f"expected AGENT-SUPPLIED-REPLY, got {value!r}" |
|
|