Spaces:

Speedofmastery
/

HMM

Sleeping

App Files Files Community

HMM / browser-use-main /browser_use /dom /serializer /code_use_serializer.py

Speedofmastery

Merge Landrun + Browser-Use + Chromium with AI agent support (without binary files)

d7b3d84 3 months ago

raw

history blame contribute delete

9.06 kB

	# @file purpose: Ultra-compact serializer optimized for code-use agents
	# Focuses on minimal token usage while preserving essential interactive context

	from browser_use.dom.utils import cap_text_length
	from browser_use.dom.views import (
	EnhancedDOMTreeNode,
	NodeType,
	SimplifiedNode,
	)

	# Minimal but sufficient attribute list for code agents
	CODE_USE_KEY_ATTRIBUTES = [
	'id', # Essential for element selection
	'name', # For form inputs
	'type', # For input types
	'placeholder', # For empty inputs
	'aria-label', # For buttons without text
	'value', # Current values
	'alt', # For images
	'class', # Keep top 2 classes for common selectors
	]

	# Interactive elements agent can use
	INTERACTIVE_ELEMENTS = {
	'a',
	'button',
	'input',
	'textarea',
	'select',
	'form',
	}

	# Semantic structure elements - expanded to include more content containers
	SEMANTIC_STRUCTURE = {
	'h1',
	'h2',
	'h3',
	'h4',
	'h5',
	'h6',
	'nav',
	'main',
	'header',
	'footer',
	'article',
	'section',
	'p', # Paragraphs often contain prices and product info
	'span', # Spans often contain prices and labels
	'div', # Divs with useful attributes (id/class) should be shown
	'ul',
	'ol',
	'li',
	'label',
	'img',
	}


	class DOMCodeAgentSerializer:
	"""Optimized DOM serializer for code-use agents - balances token efficiency with context."""

	@staticmethod
	def serialize_tree(node: SimplifiedNode \| None, include_attributes: list[str], depth: int = 0) -> str:
	"""
	Serialize DOM tree with smart token optimization.

	Strategy:
	- Keep top 2 CSS classes for querySelector compatibility
	- Show div/span/p elements with useful attributes or text
	- Show all interactive + semantic elements
	- Inline text up to 80 chars for better context
	"""
	if not node:
	return ''

	# Skip excluded/hidden nodes
	if hasattr(node, 'excluded_by_parent') and node.excluded_by_parent:
	return DOMCodeAgentSerializer._serialize_children(node, include_attributes, depth)

	if not node.should_display:
	return DOMCodeAgentSerializer._serialize_children(node, include_attributes, depth)

	formatted_text = []
	depth_str = ' ' * depth # Use 2 spaces instead of tabs for compactness

	if node.original_node.node_type == NodeType.ELEMENT_NODE:
	tag = node.original_node.tag_name.lower()
	is_visible = node.original_node.snapshot_node and node.original_node.is_visible

	# Skip invisible (except iframes)
	if not is_visible and tag not in ['iframe', 'frame']:
	return DOMCodeAgentSerializer._serialize_children(node, include_attributes, depth)

	# Special handling for iframes
	if tag in ['iframe', 'frame']:
	return DOMCodeAgentSerializer._serialize_iframe(node, include_attributes, depth)

	# Build minimal attributes
	attributes_str = DOMCodeAgentSerializer._build_minimal_attributes(node.original_node)

	# Decide if element should be shown
	is_interactive = tag in INTERACTIVE_ELEMENTS
	is_semantic = tag in SEMANTIC_STRUCTURE
	has_useful_attrs = bool(attributes_str)
	has_text = DOMCodeAgentSerializer._has_direct_text(node)

	# Skip non-semantic, non-interactive containers without attributes
	if not is_interactive and not is_semantic and not has_useful_attrs and not has_text:
	return DOMCodeAgentSerializer._serialize_children(node, include_attributes, depth)

	# Collapse pointless wrappers
	if tag in {'div', 'span'} and not has_useful_attrs and not has_text and len(node.children) == 1:
	return DOMCodeAgentSerializer._serialize_children(node, include_attributes, depth)

	# Build element
	line = f'{depth_str}<{tag}'

	if attributes_str:
	line += f' {attributes_str}'

	# Inline text
	inline_text = DOMCodeAgentSerializer._get_inline_text(node)
	if inline_text:
	line += f'>{inline_text}'
	else:
	line += '>'

	formatted_text.append(line)

	# Children (only if no inline text)
	if node.children and not inline_text:
	children_text = DOMCodeAgentSerializer._serialize_children(node, include_attributes, depth + 1)
	if children_text:
	formatted_text.append(children_text)

	elif node.original_node.node_type == NodeType.TEXT_NODE:
	# Handled inline with parent
	pass

	elif node.original_node.node_type == NodeType.DOCUMENT_FRAGMENT_NODE:
	# Shadow DOM - minimal marker
	if node.children:
	formatted_text.append(f'{depth_str}#shadow')
	children_text = DOMCodeAgentSerializer._serialize_children(node, include_attributes, depth + 1)
	if children_text:
	formatted_text.append(children_text)

	return '\n'.join(formatted_text)

	@staticmethod
	def _serialize_children(node: SimplifiedNode, include_attributes: list[str], depth: int) -> str:
	"""Serialize children."""
	children_output = []
	for child in node.children:
	child_text = DOMCodeAgentSerializer.serialize_tree(child, include_attributes, depth)
	if child_text:
	children_output.append(child_text)
	return '\n'.join(children_output)

	@staticmethod
	def _build_minimal_attributes(node: EnhancedDOMTreeNode) -> str:
	"""Build minimal but useful attributes - keep top 2 classes for selectors."""
	attrs = []

	if node.attributes:
	for attr in CODE_USE_KEY_ATTRIBUTES:
	if attr in node.attributes:
	value = str(node.attributes[attr]).strip()
	if value:
	# Special handling for class - keep only first 2 classes
	if attr == 'class':
	classes = value.split()[:2]
	value = ' '.join(classes)
	# Cap at 25 chars
	value = cap_text_length(value, 25)
	attrs.append(f'{attr}="{value}"')

	return ' '.join(attrs)

	@staticmethod
	def _has_direct_text(node: SimplifiedNode) -> bool:
	"""Check if node has direct text children."""
	for child in node.children:
	if child.original_node.node_type == NodeType.TEXT_NODE:
	text = child.original_node.node_value.strip() if child.original_node.node_value else ''
	if len(text) > 1:
	return True
	return False

	@staticmethod
	def _get_inline_text(node: SimplifiedNode) -> str:
	"""Get inline text (max 80 chars for better context)."""
	text_parts = []
	for child in node.children:
	if child.original_node.node_type == NodeType.TEXT_NODE:
	text = child.original_node.node_value.strip() if child.original_node.node_value else ''
	if text and len(text) > 1:
	text_parts.append(text)

	if not text_parts:
	return ''

	combined = ' '.join(text_parts)
	return cap_text_length(combined, 40)

	@staticmethod
	def _serialize_iframe(node: SimplifiedNode, include_attributes: list[str], depth: int) -> str:
	"""Handle iframe minimally."""
	formatted_text = []
	depth_str = ' ' * depth
	tag = node.original_node.tag_name.lower()

	# Minimal iframe marker
	attributes_str = DOMCodeAgentSerializer._build_minimal_attributes(node.original_node)
	line = f'{depth_str}<{tag}'
	if attributes_str:
	line += f' {attributes_str}'
	line += '>'
	formatted_text.append(line)

	# Iframe content
	if node.original_node.content_document:
	formatted_text.append(f'{depth_str} #iframe-content')

	# Find and serialize body content only
	for child_node in node.original_node.content_document.children_nodes or []:
	if child_node.tag_name.lower() == 'html':
	for html_child in child_node.children:
	if html_child.tag_name.lower() == 'body':
	for body_child in html_child.children:
	DOMCodeAgentSerializer._serialize_document_node(
	body_child, formatted_text, include_attributes, depth + 2
	)
	break

	return '\n'.join(formatted_text)

	@staticmethod
	def _serialize_document_node(
	dom_node: EnhancedDOMTreeNode, output: list[str], include_attributes: list[str], depth: int
	) -> None:
	"""Serialize document node without SimplifiedNode wrapper."""
	depth_str = ' ' * depth

	if dom_node.node_type == NodeType.ELEMENT_NODE:
	tag = dom_node.tag_name.lower()

	# Skip invisible
	is_visible = dom_node.snapshot_node and dom_node.is_visible
	if not is_visible:
	return

	# Check if worth showing
	is_interactive = tag in INTERACTIVE_ELEMENTS
	is_semantic = tag in SEMANTIC_STRUCTURE
	attributes_str = DOMCodeAgentSerializer._build_minimal_attributes(dom_node)

	if not is_interactive and not is_semantic and not attributes_str:
	# Skip but process children
	for child in dom_node.children:
	DOMCodeAgentSerializer._serialize_document_node(child, output, include_attributes, depth)
	return

	# Build element
	line = f'{depth_str}<{tag}'
	if attributes_str:
	line += f' {attributes_str}'

	# Get text
	text_parts = []
	for child in dom_node.children:
	if child.node_type == NodeType.TEXT_NODE and child.node_value:
	text = child.node_value.strip()
	if text and len(text) > 1:
	text_parts.append(text)

	if text_parts:
	combined = ' '.join(text_parts)
	line += f'>{cap_text_length(combined, 25)}'
	else:
	line += '>'

	output.append(line)

	# Process non-text children
	for child in dom_node.children:
	if child.node_type != NodeType.TEXT_NODE:
	DOMCodeAgentSerializer._serialize_document_node(child, output, include_attributes, depth + 1)