Spaces:
Sleeping
Sleeping
| # @file purpose: Ultra-compact serializer optimized for code-use agents | |
| # Focuses on minimal token usage while preserving essential interactive context | |
| from browser_use.dom.utils import cap_text_length | |
| from browser_use.dom.views import ( | |
| EnhancedDOMTreeNode, | |
| NodeType, | |
| SimplifiedNode, | |
| ) | |
| # Minimal but sufficient attribute list for code agents | |
| CODE_USE_KEY_ATTRIBUTES = [ | |
| 'id', # Essential for element selection | |
| 'name', # For form inputs | |
| 'type', # For input types | |
| 'placeholder', # For empty inputs | |
| 'aria-label', # For buttons without text | |
| 'value', # Current values | |
| 'alt', # For images | |
| 'class', # Keep top 2 classes for common selectors | |
| ] | |
| # Interactive elements agent can use | |
| INTERACTIVE_ELEMENTS = { | |
| 'a', | |
| 'button', | |
| 'input', | |
| 'textarea', | |
| 'select', | |
| 'form', | |
| } | |
| # Semantic structure elements - expanded to include more content containers | |
| SEMANTIC_STRUCTURE = { | |
| 'h1', | |
| 'h2', | |
| 'h3', | |
| 'h4', | |
| 'h5', | |
| 'h6', | |
| 'nav', | |
| 'main', | |
| 'header', | |
| 'footer', | |
| 'article', | |
| 'section', | |
| 'p', # Paragraphs often contain prices and product info | |
| 'span', # Spans often contain prices and labels | |
| 'div', # Divs with useful attributes (id/class) should be shown | |
| 'ul', | |
| 'ol', | |
| 'li', | |
| 'label', | |
| 'img', | |
| } | |
| class DOMCodeAgentSerializer: | |
| """Optimized DOM serializer for code-use agents - balances token efficiency with context.""" | |
| def serialize_tree(node: SimplifiedNode | None, include_attributes: list[str], depth: int = 0) -> str: | |
| """ | |
| Serialize DOM tree with smart token optimization. | |
| Strategy: | |
| - Keep top 2 CSS classes for querySelector compatibility | |
| - Show div/span/p elements with useful attributes or text | |
| - Show all interactive + semantic elements | |
| - Inline text up to 80 chars for better context | |
| """ | |
| if not node: | |
| return '' | |
| # Skip excluded/hidden nodes | |
| if hasattr(node, 'excluded_by_parent') and node.excluded_by_parent: | |
| return DOMCodeAgentSerializer._serialize_children(node, include_attributes, depth) | |
| if not node.should_display: | |
| return DOMCodeAgentSerializer._serialize_children(node, include_attributes, depth) | |
| formatted_text = [] | |
| depth_str = ' ' * depth # Use 2 spaces instead of tabs for compactness | |
| if node.original_node.node_type == NodeType.ELEMENT_NODE: | |
| tag = node.original_node.tag_name.lower() | |
| is_visible = node.original_node.snapshot_node and node.original_node.is_visible | |
| # Skip invisible (except iframes) | |
| if not is_visible and tag not in ['iframe', 'frame']: | |
| return DOMCodeAgentSerializer._serialize_children(node, include_attributes, depth) | |
| # Special handling for iframes | |
| if tag in ['iframe', 'frame']: | |
| return DOMCodeAgentSerializer._serialize_iframe(node, include_attributes, depth) | |
| # Build minimal attributes | |
| attributes_str = DOMCodeAgentSerializer._build_minimal_attributes(node.original_node) | |
| # Decide if element should be shown | |
| is_interactive = tag in INTERACTIVE_ELEMENTS | |
| is_semantic = tag in SEMANTIC_STRUCTURE | |
| has_useful_attrs = bool(attributes_str) | |
| has_text = DOMCodeAgentSerializer._has_direct_text(node) | |
| # Skip non-semantic, non-interactive containers without attributes | |
| if not is_interactive and not is_semantic and not has_useful_attrs and not has_text: | |
| return DOMCodeAgentSerializer._serialize_children(node, include_attributes, depth) | |
| # Collapse pointless wrappers | |
| if tag in {'div', 'span'} and not has_useful_attrs and not has_text and len(node.children) == 1: | |
| return DOMCodeAgentSerializer._serialize_children(node, include_attributes, depth) | |
| # Build element | |
| line = f'{depth_str}<{tag}' | |
| if attributes_str: | |
| line += f' {attributes_str}' | |
| # Inline text | |
| inline_text = DOMCodeAgentSerializer._get_inline_text(node) | |
| if inline_text: | |
| line += f'>{inline_text}' | |
| else: | |
| line += '>' | |
| formatted_text.append(line) | |
| # Children (only if no inline text) | |
| if node.children and not inline_text: | |
| children_text = DOMCodeAgentSerializer._serialize_children(node, include_attributes, depth + 1) | |
| if children_text: | |
| formatted_text.append(children_text) | |
| elif node.original_node.node_type == NodeType.TEXT_NODE: | |
| # Handled inline with parent | |
| pass | |
| elif node.original_node.node_type == NodeType.DOCUMENT_FRAGMENT_NODE: | |
| # Shadow DOM - minimal marker | |
| if node.children: | |
| formatted_text.append(f'{depth_str}#shadow') | |
| children_text = DOMCodeAgentSerializer._serialize_children(node, include_attributes, depth + 1) | |
| if children_text: | |
| formatted_text.append(children_text) | |
| return '\n'.join(formatted_text) | |
| def _serialize_children(node: SimplifiedNode, include_attributes: list[str], depth: int) -> str: | |
| """Serialize children.""" | |
| children_output = [] | |
| for child in node.children: | |
| child_text = DOMCodeAgentSerializer.serialize_tree(child, include_attributes, depth) | |
| if child_text: | |
| children_output.append(child_text) | |
| return '\n'.join(children_output) | |
| def _build_minimal_attributes(node: EnhancedDOMTreeNode) -> str: | |
| """Build minimal but useful attributes - keep top 2 classes for selectors.""" | |
| attrs = [] | |
| if node.attributes: | |
| for attr in CODE_USE_KEY_ATTRIBUTES: | |
| if attr in node.attributes: | |
| value = str(node.attributes[attr]).strip() | |
| if value: | |
| # Special handling for class - keep only first 2 classes | |
| if attr == 'class': | |
| classes = value.split()[:2] | |
| value = ' '.join(classes) | |
| # Cap at 25 chars | |
| value = cap_text_length(value, 25) | |
| attrs.append(f'{attr}="{value}"') | |
| return ' '.join(attrs) | |
| def _has_direct_text(node: SimplifiedNode) -> bool: | |
| """Check if node has direct text children.""" | |
| for child in node.children: | |
| if child.original_node.node_type == NodeType.TEXT_NODE: | |
| text = child.original_node.node_value.strip() if child.original_node.node_value else '' | |
| if len(text) > 1: | |
| return True | |
| return False | |
| def _get_inline_text(node: SimplifiedNode) -> str: | |
| """Get inline text (max 80 chars for better context).""" | |
| text_parts = [] | |
| for child in node.children: | |
| if child.original_node.node_type == NodeType.TEXT_NODE: | |
| text = child.original_node.node_value.strip() if child.original_node.node_value else '' | |
| if text and len(text) > 1: | |
| text_parts.append(text) | |
| if not text_parts: | |
| return '' | |
| combined = ' '.join(text_parts) | |
| return cap_text_length(combined, 40) | |
| def _serialize_iframe(node: SimplifiedNode, include_attributes: list[str], depth: int) -> str: | |
| """Handle iframe minimally.""" | |
| formatted_text = [] | |
| depth_str = ' ' * depth | |
| tag = node.original_node.tag_name.lower() | |
| # Minimal iframe marker | |
| attributes_str = DOMCodeAgentSerializer._build_minimal_attributes(node.original_node) | |
| line = f'{depth_str}<{tag}' | |
| if attributes_str: | |
| line += f' {attributes_str}' | |
| line += '>' | |
| formatted_text.append(line) | |
| # Iframe content | |
| if node.original_node.content_document: | |
| formatted_text.append(f'{depth_str} #iframe-content') | |
| # Find and serialize body content only | |
| for child_node in node.original_node.content_document.children_nodes or []: | |
| if child_node.tag_name.lower() == 'html': | |
| for html_child in child_node.children: | |
| if html_child.tag_name.lower() == 'body': | |
| for body_child in html_child.children: | |
| DOMCodeAgentSerializer._serialize_document_node( | |
| body_child, formatted_text, include_attributes, depth + 2 | |
| ) | |
| break | |
| return '\n'.join(formatted_text) | |
| def _serialize_document_node( | |
| dom_node: EnhancedDOMTreeNode, output: list[str], include_attributes: list[str], depth: int | |
| ) -> None: | |
| """Serialize document node without SimplifiedNode wrapper.""" | |
| depth_str = ' ' * depth | |
| if dom_node.node_type == NodeType.ELEMENT_NODE: | |
| tag = dom_node.tag_name.lower() | |
| # Skip invisible | |
| is_visible = dom_node.snapshot_node and dom_node.is_visible | |
| if not is_visible: | |
| return | |
| # Check if worth showing | |
| is_interactive = tag in INTERACTIVE_ELEMENTS | |
| is_semantic = tag in SEMANTIC_STRUCTURE | |
| attributes_str = DOMCodeAgentSerializer._build_minimal_attributes(dom_node) | |
| if not is_interactive and not is_semantic and not attributes_str: | |
| # Skip but process children | |
| for child in dom_node.children: | |
| DOMCodeAgentSerializer._serialize_document_node(child, output, include_attributes, depth) | |
| return | |
| # Build element | |
| line = f'{depth_str}<{tag}' | |
| if attributes_str: | |
| line += f' {attributes_str}' | |
| # Get text | |
| text_parts = [] | |
| for child in dom_node.children: | |
| if child.node_type == NodeType.TEXT_NODE and child.node_value: | |
| text = child.node_value.strip() | |
| if text and len(text) > 1: | |
| text_parts.append(text) | |
| if text_parts: | |
| combined = ' '.join(text_parts) | |
| line += f'>{cap_text_length(combined, 25)}' | |
| else: | |
| line += '>' | |
| output.append(line) | |
| # Process non-text children | |
| for child in dom_node.children: | |
| if child.node_type != NodeType.TEXT_NODE: | |
| DOMCodeAgentSerializer._serialize_document_node(child, output, include_attributes, depth + 1) | |