Spaces:
Sleeping
Sleeping
Upload 2 files
Browse files
examples/tools/browsers/util/dom.py
ADDED
|
@@ -0,0 +1,210 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# coding: utf-8
|
| 2 |
+
|
| 3 |
+
from dataclasses import dataclass
|
| 4 |
+
from typing import Optional, Dict, List
|
| 5 |
+
|
| 6 |
+
from pydantic import BaseModel
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class Coordinates(BaseModel):
|
| 10 |
+
x: int
|
| 11 |
+
y: int
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class CoordinateSet(BaseModel):
|
| 15 |
+
top_left: Coordinates
|
| 16 |
+
top_right: Coordinates
|
| 17 |
+
bottom_left: Coordinates
|
| 18 |
+
bottom_right: Coordinates
|
| 19 |
+
center: Coordinates
|
| 20 |
+
width: int
|
| 21 |
+
height: int
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
class ViewportInfo(BaseModel):
|
| 25 |
+
width: int
|
| 26 |
+
height: int
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
@dataclass
|
| 30 |
+
class HashedDomElement:
|
| 31 |
+
"""
|
| 32 |
+
Hash of the dom element to be used as a unique identifier
|
| 33 |
+
"""
|
| 34 |
+
|
| 35 |
+
branch_path_hash: str
|
| 36 |
+
attributes_hash: str
|
| 37 |
+
xpath_hash: str
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
@dataclass(frozen=False)
|
| 41 |
+
class DOMBaseNode:
|
| 42 |
+
is_visible: bool
|
| 43 |
+
# Use None as default and set parent later to avoid circular reference issues
|
| 44 |
+
parent: Optional['DOMElementNode']
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
@dataclass(frozen=False)
|
| 48 |
+
class DOMTextNode(DOMBaseNode):
|
| 49 |
+
text: str
|
| 50 |
+
type: str = 'TEXT_NODE'
|
| 51 |
+
|
| 52 |
+
def has_parent_with_highlight_index(self) -> bool:
|
| 53 |
+
current = self.parent
|
| 54 |
+
while current is not None:
|
| 55 |
+
# stop if the element has a highlight index (will be handled separately)
|
| 56 |
+
if current.highlight_index is not None:
|
| 57 |
+
return True
|
| 58 |
+
|
| 59 |
+
current = current.parent
|
| 60 |
+
return False
|
| 61 |
+
|
| 62 |
+
def is_parent_in_viewport(self) -> bool:
|
| 63 |
+
if self.parent is None:
|
| 64 |
+
return False
|
| 65 |
+
return self.parent.is_in_viewport
|
| 66 |
+
|
| 67 |
+
def is_parent_top_element(self) -> bool:
|
| 68 |
+
if self.parent is None:
|
| 69 |
+
return False
|
| 70 |
+
return self.parent.is_top_element
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
@dataclass(frozen=False)
|
| 74 |
+
class DOMElementNode(DOMBaseNode):
|
| 75 |
+
"""
|
| 76 |
+
xpath: the xpath of the element from the last root node (shadow root or iframe OR document if no shadow root or iframe).
|
| 77 |
+
To properly reference the element we need to recursively switch the root node until we find the element (work you way up the tree with `.parent`)
|
| 78 |
+
"""
|
| 79 |
+
|
| 80 |
+
tag_name: str
|
| 81 |
+
xpath: str
|
| 82 |
+
attributes: Dict[str, str]
|
| 83 |
+
children: List[DOMBaseNode]
|
| 84 |
+
is_interactive: bool = False
|
| 85 |
+
is_top_element: bool = False
|
| 86 |
+
is_in_viewport: bool = False
|
| 87 |
+
shadow_root: bool = False
|
| 88 |
+
highlight_index: Optional[int] = None
|
| 89 |
+
viewport_coordinates: Optional[CoordinateSet] = None
|
| 90 |
+
page_coordinates: Optional[CoordinateSet] = None
|
| 91 |
+
viewport_info: Optional[ViewportInfo] = None
|
| 92 |
+
|
| 93 |
+
def __repr__(self) -> str:
|
| 94 |
+
tag_str = f'<{self.tag_name}'
|
| 95 |
+
|
| 96 |
+
# Add attributes
|
| 97 |
+
for key, value in self.attributes.items():
|
| 98 |
+
tag_str += f' {key}="{value}"'
|
| 99 |
+
tag_str += '>'
|
| 100 |
+
|
| 101 |
+
# Add extra info
|
| 102 |
+
extras = []
|
| 103 |
+
if self.is_interactive:
|
| 104 |
+
extras.append('interactive')
|
| 105 |
+
if self.is_top_element:
|
| 106 |
+
extras.append('top')
|
| 107 |
+
if self.shadow_root:
|
| 108 |
+
extras.append('shadow-root')
|
| 109 |
+
if self.highlight_index is not None:
|
| 110 |
+
extras.append(f'highlight:{self.highlight_index}')
|
| 111 |
+
if self.is_in_viewport:
|
| 112 |
+
extras.append('in-viewport')
|
| 113 |
+
|
| 114 |
+
if extras:
|
| 115 |
+
tag_str += f' [{", ".join(extras)}]'
|
| 116 |
+
|
| 117 |
+
return tag_str
|
| 118 |
+
|
| 119 |
+
def get_all_text_till_next_clickable_element(self, max_depth: int = -1) -> str:
|
| 120 |
+
text_parts = []
|
| 121 |
+
|
| 122 |
+
def collect_text(node: DOMBaseNode, current_depth: int) -> None:
|
| 123 |
+
if max_depth != -1 and current_depth > max_depth:
|
| 124 |
+
return
|
| 125 |
+
|
| 126 |
+
# Skip this branch if we hit a highlighted element (except for the current node)
|
| 127 |
+
if isinstance(node, DOMElementNode) and node != self and node.highlight_index is not None:
|
| 128 |
+
return
|
| 129 |
+
|
| 130 |
+
if isinstance(node, DOMTextNode):
|
| 131 |
+
text_parts.append(node.text)
|
| 132 |
+
elif isinstance(node, DOMElementNode):
|
| 133 |
+
for child in node.children:
|
| 134 |
+
collect_text(child, current_depth + 1)
|
| 135 |
+
|
| 136 |
+
collect_text(self, 0)
|
| 137 |
+
return '\n'.join(text_parts).strip()
|
| 138 |
+
|
| 139 |
+
def clickable_elements_to_string(self, include_attributes: list[str] | None = None) -> str:
|
| 140 |
+
"""Convert the processed DOM content to HTML."""
|
| 141 |
+
formatted_text = []
|
| 142 |
+
|
| 143 |
+
def process_node(node: DOMBaseNode, depth: int) -> None:
|
| 144 |
+
if isinstance(node, DOMElementNode):
|
| 145 |
+
# Add element with highlight_index
|
| 146 |
+
if node.highlight_index is not None:
|
| 147 |
+
attributes_str = ''
|
| 148 |
+
text = node.get_all_text_till_next_clickable_element()
|
| 149 |
+
if include_attributes:
|
| 150 |
+
attributes = list(
|
| 151 |
+
set(
|
| 152 |
+
[
|
| 153 |
+
str(value)
|
| 154 |
+
for key, value in node.attributes.items()
|
| 155 |
+
if key in include_attributes and value != node.tag_name
|
| 156 |
+
]
|
| 157 |
+
)
|
| 158 |
+
)
|
| 159 |
+
if text in attributes:
|
| 160 |
+
attributes.remove(text)
|
| 161 |
+
attributes_str = ';'.join(attributes)
|
| 162 |
+
line = f'[{node.highlight_index}]<{node.tag_name} '
|
| 163 |
+
if attributes_str:
|
| 164 |
+
line += f'{attributes_str}'
|
| 165 |
+
if text:
|
| 166 |
+
if attributes_str:
|
| 167 |
+
line += f'>{text}'
|
| 168 |
+
else:
|
| 169 |
+
line += f'{text}'
|
| 170 |
+
line += '/>'
|
| 171 |
+
formatted_text.append(line)
|
| 172 |
+
|
| 173 |
+
# Process children regardless
|
| 174 |
+
for child in node.children:
|
| 175 |
+
process_node(child, depth + 1)
|
| 176 |
+
|
| 177 |
+
elif isinstance(node, DOMTextNode):
|
| 178 |
+
# Add text only if it doesn't have a highlighted parent
|
| 179 |
+
if not node.has_parent_with_highlight_index() and node.is_visible: # and node.is_parent_top_element()
|
| 180 |
+
formatted_text.append(f'{node.text}')
|
| 181 |
+
|
| 182 |
+
process_node(self, 0)
|
| 183 |
+
return '\n'.join(formatted_text)
|
| 184 |
+
|
| 185 |
+
def get_file_upload_element(self, check_siblings: bool = True) -> Optional['DOMElementNode']:
|
| 186 |
+
# Check if current element is a file input
|
| 187 |
+
if self.tag_name == 'input' and self.attributes.get('type') == 'file':
|
| 188 |
+
return self
|
| 189 |
+
|
| 190 |
+
# Check children
|
| 191 |
+
for child in self.children:
|
| 192 |
+
if isinstance(child, DOMElementNode):
|
| 193 |
+
result = child.get_file_upload_element(check_siblings=False)
|
| 194 |
+
if result:
|
| 195 |
+
return result
|
| 196 |
+
|
| 197 |
+
# Check siblings only for the initial call
|
| 198 |
+
if check_siblings and self.parent:
|
| 199 |
+
for sibling in self.parent.children:
|
| 200 |
+
if sibling is not self and isinstance(sibling, DOMElementNode):
|
| 201 |
+
result = sibling.get_file_upload_element(check_siblings=False)
|
| 202 |
+
if result:
|
| 203 |
+
return result
|
| 204 |
+
|
| 205 |
+
return None
|
| 206 |
+
|
| 207 |
+
|
| 208 |
+
class DomTree(BaseModel):
|
| 209 |
+
element_tree: DOMElementNode
|
| 210 |
+
element_map: Dict[int, DOMElementNode]
|
examples/tools/browsers/util/dom_build.py
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# coding: utf-8
|
| 2 |
+
|
| 3 |
+
# Derived from browser_use DomService, we use it as a utility method, and supports sync and async.
|
| 4 |
+
|
| 5 |
+
import gc
|
| 6 |
+
import json
|
| 7 |
+
|
| 8 |
+
from typing import Dict, Any, Tuple, Optional
|
| 9 |
+
|
| 10 |
+
from aworld.utils.async_func import async_func
|
| 11 |
+
from examples.tools.browsers.util.dom import DOMElementNode, DOMBaseNode, DOMTextNode, ViewportInfo
|
| 12 |
+
from aworld.logs.util import logger
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
async def async_build_dom_tree(page, js_code: str, args: Dict[str, Any]) -> Tuple[DOMElementNode, Dict[int, DOMElementNode]]:
|
| 16 |
+
if await page.evaluate('1+1') != 2:
|
| 17 |
+
raise ValueError('The page cannot evaluate javascript code properly')
|
| 18 |
+
|
| 19 |
+
# NOTE: We execute JS code in the browser to extract important DOM information.
|
| 20 |
+
# The returned hash map contains information about the DOM tree and the
|
| 21 |
+
# relationship between the DOM elements.
|
| 22 |
+
try:
|
| 23 |
+
eval_page = await page.evaluate(js_code, args)
|
| 24 |
+
except Exception as e:
|
| 25 |
+
logger.error('Error evaluating JavaScript: %s', e)
|
| 26 |
+
raise
|
| 27 |
+
|
| 28 |
+
# Only log performance metrics in debug mode
|
| 29 |
+
if args.get("debugMode") and 'perfMetrics' in eval_page:
|
| 30 |
+
logger.debug('DOM Tree Building Performance Metrics:\n%s', json.dumps(eval_page['perfMetrics'], indent=2))
|
| 31 |
+
|
| 32 |
+
return await async_func(_construct_dom_tree)(eval_page)
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def build_dom_tree(page, js_code: str, args: Dict[str, Any]) -> Tuple[DOMElementNode, Dict[int, DOMElementNode]]:
|
| 36 |
+
if page.evaluate('1+1') != 2:
|
| 37 |
+
raise ValueError('The page cannot evaluate javascript code properly')
|
| 38 |
+
|
| 39 |
+
# NOTE: We execute JS code in the browser to extract important DOM information.
|
| 40 |
+
# The returned hash map contains information about the DOM tree and the
|
| 41 |
+
# relationship between the DOM elements.
|
| 42 |
+
try:
|
| 43 |
+
eval_page = page.evaluate(js_code, args)
|
| 44 |
+
except Exception as e:
|
| 45 |
+
logger.error('Error evaluating JavaScript: %s', e)
|
| 46 |
+
raise
|
| 47 |
+
|
| 48 |
+
# Only log performance metrics in debug mode
|
| 49 |
+
if args.get("debugMode") and 'perfMetrics' in eval_page:
|
| 50 |
+
logger.debug('DOM Tree Building Performance Metrics:\n%s', json.dumps(eval_page['perfMetrics'], indent=2))
|
| 51 |
+
|
| 52 |
+
return _construct_dom_tree(eval_page)
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def _construct_dom_tree(eval_page: dict, ) -> tuple[DOMElementNode, Dict[int, DOMElementNode]]:
|
| 56 |
+
js_node_map = eval_page['map']
|
| 57 |
+
js_root_id = eval_page['rootId']
|
| 58 |
+
|
| 59 |
+
selector_map = {}
|
| 60 |
+
node_map = {}
|
| 61 |
+
|
| 62 |
+
for id, node_data in js_node_map.items():
|
| 63 |
+
node, children_ids = _parse_node(node_data)
|
| 64 |
+
if node is None:
|
| 65 |
+
continue
|
| 66 |
+
|
| 67 |
+
node_map[id] = node
|
| 68 |
+
|
| 69 |
+
if isinstance(node, DOMElementNode) and node.highlight_index is not None:
|
| 70 |
+
selector_map[node.highlight_index] = node
|
| 71 |
+
|
| 72 |
+
# NOTE: We know that we are building the tree bottom up
|
| 73 |
+
# and all children are already processed.
|
| 74 |
+
if isinstance(node, DOMElementNode):
|
| 75 |
+
for child_id in children_ids:
|
| 76 |
+
if child_id not in node_map:
|
| 77 |
+
continue
|
| 78 |
+
|
| 79 |
+
child_node = node_map[child_id]
|
| 80 |
+
|
| 81 |
+
child_node.parent = node
|
| 82 |
+
node.children.append(child_node)
|
| 83 |
+
|
| 84 |
+
html_to_dict = node_map[str(js_root_id)]
|
| 85 |
+
|
| 86 |
+
del node_map
|
| 87 |
+
del js_node_map
|
| 88 |
+
del js_root_id
|
| 89 |
+
|
| 90 |
+
gc.collect()
|
| 91 |
+
|
| 92 |
+
if html_to_dict is None or not isinstance(html_to_dict, DOMElementNode):
|
| 93 |
+
raise ValueError('Failed to parse HTML to dictionary')
|
| 94 |
+
|
| 95 |
+
return html_to_dict, selector_map
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
def _parse_node(node_data: dict, ) -> Tuple[Optional[DOMBaseNode], list[int]]:
|
| 99 |
+
if not node_data:
|
| 100 |
+
return None, []
|
| 101 |
+
|
| 102 |
+
# Process text nodes immediately
|
| 103 |
+
if node_data.get('type') == 'TEXT_NODE':
|
| 104 |
+
text_node = DOMTextNode(
|
| 105 |
+
text=node_data['text'],
|
| 106 |
+
is_visible=node_data['isVisible'],
|
| 107 |
+
parent=None,
|
| 108 |
+
)
|
| 109 |
+
return text_node, []
|
| 110 |
+
|
| 111 |
+
# Process coordinates if they exist for element nodes
|
| 112 |
+
|
| 113 |
+
viewport_info = None
|
| 114 |
+
|
| 115 |
+
if 'viewport' in node_data:
|
| 116 |
+
viewport_info = ViewportInfo(
|
| 117 |
+
width=node_data['viewport']['width'],
|
| 118 |
+
height=node_data['viewport']['height'],
|
| 119 |
+
)
|
| 120 |
+
|
| 121 |
+
element_node = DOMElementNode(
|
| 122 |
+
tag_name=node_data['tagName'],
|
| 123 |
+
xpath=node_data['xpath'],
|
| 124 |
+
attributes=node_data.get('attributes', {}),
|
| 125 |
+
children=[],
|
| 126 |
+
is_visible=node_data.get('isVisible', False),
|
| 127 |
+
is_interactive=node_data.get('isInteractive', False),
|
| 128 |
+
is_top_element=node_data.get('isTopElement', False),
|
| 129 |
+
is_in_viewport=node_data.get('isInViewport', False),
|
| 130 |
+
highlight_index=node_data.get('highlightIndex'),
|
| 131 |
+
shadow_root=node_data.get('shadowRoot', False),
|
| 132 |
+
parent=None,
|
| 133 |
+
viewport_info=viewport_info,
|
| 134 |
+
)
|
| 135 |
+
|
| 136 |
+
children_ids = node_data.get('children', [])
|
| 137 |
+
|
| 138 |
+
return element_node, children_ids
|