Spaces:
Sleeping
Sleeping
File size: 4,137 Bytes
c39ecaf | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 | """
Accessibility tree extraction from Playwright pages using Chrome DevTools Protocol.
Converts the browser DOM into a numbered text representation that LLM agents
can reason about and reference by element ID.
Example output:
[1] heading "My Todo List"
[2] textbox "Add a new todo..." value=""
[3] button "Add"
[4] checkbox "Buy groceries" checked=false
"""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Optional
from playwright.async_api import Page
SKIP_ROLES = frozenset({
"none", "generic", "RootWebArea", "LineBreak",
"InlineTextBox", "StaticText", "paragraph",
"MenuListPopup", "group",
})
INTERACTIVE_ROLES = frozenset({
"button", "link", "textbox", "checkbox", "radio",
"combobox", "searchbox", "option", "tab", "menuitem",
"switch", "slider", "spinbutton",
})
@dataclass
class AccessibilityNode:
"""A node in the parsed accessibility tree."""
id: int
role: str
name: str
value: Optional[str] = None
checked: Optional[str] = None
selected: Optional[bool] = None
expanded: Optional[bool] = None
disabled: Optional[bool] = None
focused: Optional[bool] = None
level: Optional[int] = None
class AccessibilityTree:
"""Manages accessibility tree extraction and element ID mapping."""
def __init__(self):
self._nodes: dict[int, AccessibilityNode] = {}
self._counter = 0
def clear(self):
self._nodes.clear()
self._counter = 0
def get_node(self, element_id: int) -> Optional[AccessibilityNode]:
return self._nodes.get(element_id)
async def extract(self, page: Page) -> str:
"""Extract accessibility tree from page via CDP and return text representation."""
self.clear()
cdp = await page.context.new_cdp_session(page)
try:
result = await cdp.send("Accessibility.getFullAXTree")
finally:
await cdp.detach()
raw_nodes = result.get("nodes", [])
lines = []
for raw in raw_nodes:
role = raw.get("role", {}).get("value", "")
if role in SKIP_ROLES:
continue
name = raw.get("name", {}).get("value", "")
props = {}
for p in raw.get("properties", []):
val = p.get("value", {})
if "value" in val:
props[p["name"]] = val["value"]
# Skip nodes with no name and non-interactive roles
if not name and role not in INTERACTIVE_ROLES:
continue
self._counter += 1
node = AccessibilityNode(
id=self._counter,
role=role,
name=name,
value=props.get("value"),
checked=props.get("checked"),
selected=props.get("selected"),
expanded=props.get("expanded"),
disabled=props.get("disabled"),
focused=props.get("focused"),
level=props.get("level"),
)
self._nodes[node.id] = node
lines.append(self._render_node(node))
return "\n".join(lines) if lines else "[empty page]"
def _render_node(self, node: AccessibilityNode) -> str:
"""Render a single node as text."""
parts = [f"[{node.id}] {node.role}"]
if node.name:
parts.append(f'"{node.name}"')
if node.value is not None:
parts.append(f'value="{node.value}"')
if node.checked is not None:
parts.append(f"checked={node.checked}")
if node.selected is not None:
parts.append(f"selected={str(node.selected).lower()}")
if node.expanded is not None:
parts.append(f"expanded={str(node.expanded).lower()}")
if node.disabled is not None and node.disabled:
parts.append("disabled")
if node.focused is not None and node.focused:
parts.append("focused")
if node.level is not None:
parts.append(f"level={node.level}")
return " ".join(parts)
|