File size: 4,137 Bytes
c39ecaf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
"""
Accessibility tree extraction from Playwright pages using Chrome DevTools Protocol.

Converts the browser DOM into a numbered text representation that LLM agents
can reason about and reference by element ID.

Example output:
    [1] heading "My Todo List"
    [2] textbox "Add a new todo..." value=""
    [3] button "Add"
    [4] checkbox "Buy groceries" checked=false
"""

from __future__ import annotations

from dataclasses import dataclass, field
from typing import Optional

from playwright.async_api import Page


SKIP_ROLES = frozenset({
    "none", "generic", "RootWebArea", "LineBreak",
    "InlineTextBox", "StaticText", "paragraph",
    "MenuListPopup", "group",
})

INTERACTIVE_ROLES = frozenset({
    "button", "link", "textbox", "checkbox", "radio",
    "combobox", "searchbox", "option", "tab", "menuitem",
    "switch", "slider", "spinbutton",
})


@dataclass
class AccessibilityNode:
    """A node in the parsed accessibility tree."""
    id: int
    role: str
    name: str
    value: Optional[str] = None
    checked: Optional[str] = None
    selected: Optional[bool] = None
    expanded: Optional[bool] = None
    disabled: Optional[bool] = None
    focused: Optional[bool] = None
    level: Optional[int] = None


class AccessibilityTree:
    """Manages accessibility tree extraction and element ID mapping."""

    def __init__(self):
        self._nodes: dict[int, AccessibilityNode] = {}
        self._counter = 0

    def clear(self):
        self._nodes.clear()
        self._counter = 0

    def get_node(self, element_id: int) -> Optional[AccessibilityNode]:
        return self._nodes.get(element_id)

    async def extract(self, page: Page) -> str:
        """Extract accessibility tree from page via CDP and return text representation."""
        self.clear()

        cdp = await page.context.new_cdp_session(page)
        try:
            result = await cdp.send("Accessibility.getFullAXTree")
        finally:
            await cdp.detach()

        raw_nodes = result.get("nodes", [])
        lines = []

        for raw in raw_nodes:
            role = raw.get("role", {}).get("value", "")
            if role in SKIP_ROLES:
                continue

            name = raw.get("name", {}).get("value", "")
            props = {}
            for p in raw.get("properties", []):
                val = p.get("value", {})
                if "value" in val:
                    props[p["name"]] = val["value"]

            # Skip nodes with no name and non-interactive roles
            if not name and role not in INTERACTIVE_ROLES:
                continue

            self._counter += 1
            node = AccessibilityNode(
                id=self._counter,
                role=role,
                name=name,
                value=props.get("value"),
                checked=props.get("checked"),
                selected=props.get("selected"),
                expanded=props.get("expanded"),
                disabled=props.get("disabled"),
                focused=props.get("focused"),
                level=props.get("level"),
            )
            self._nodes[node.id] = node
            lines.append(self._render_node(node))

        return "\n".join(lines) if lines else "[empty page]"

    def _render_node(self, node: AccessibilityNode) -> str:
        """Render a single node as text."""
        parts = [f"[{node.id}] {node.role}"]

        if node.name:
            parts.append(f'"{node.name}"')
        if node.value is not None:
            parts.append(f'value="{node.value}"')
        if node.checked is not None:
            parts.append(f"checked={node.checked}")
        if node.selected is not None:
            parts.append(f"selected={str(node.selected).lower()}")
        if node.expanded is not None:
            parts.append(f"expanded={str(node.expanded).lower()}")
        if node.disabled is not None and node.disabled:
            parts.append("disabled")
        if node.focused is not None and node.focused:
            parts.append("focused")
        if node.level is not None:
            parts.append(f"level={node.level}")

        return " ".join(parts)