Speedofmastery's picture
Merge Landrun + Browser-Use + Chromium with AI agent support (without binary files)
d7b3d84
from dataclasses import dataclass, field
from typing import Any
from bubus import BaseEvent
from cdp_use.cdp.target import TargetID
from pydantic import AliasChoices, BaseModel, ConfigDict, Field, field_serializer
from browser_use.dom.views import DOMInteractedElement, SerializedDOMState
# Known placeholder image data for about:blank pages - a 4x4 white PNG
PLACEHOLDER_4PX_SCREENSHOT = (
'iVBORw0KGgoAAAANSUhEUgAAAAQAAAAECAIAAAAmkwkpAAAAFElEQVR4nGP8//8/AwwwMSAB3BwAlm4DBfIlvvkAAAAASUVORK5CYII='
)
# Pydantic
class TabInfo(BaseModel):
"""Represents information about a browser tab"""
model_config = ConfigDict(
extra='forbid',
validate_by_name=True,
validate_by_alias=True,
populate_by_name=True,
)
# Original fields
url: str
title: str
target_id: TargetID = Field(serialization_alias='tab_id', validation_alias=AliasChoices('tab_id', 'target_id'))
parent_target_id: TargetID | None = Field(
default=None, serialization_alias='parent_tab_id', validation_alias=AliasChoices('parent_tab_id', 'parent_target_id')
) # parent page that contains this popup or cross-origin iframe
@field_serializer('target_id')
def serialize_target_id(self, target_id: TargetID, _info: Any) -> str:
return target_id[-4:]
@field_serializer('parent_target_id')
def serialize_parent_target_id(self, parent_target_id: TargetID | None, _info: Any) -> str | None:
return parent_target_id[-4:] if parent_target_id else None
class PageInfo(BaseModel):
"""Comprehensive page size and scroll information"""
# Current viewport dimensions
viewport_width: int
viewport_height: int
# Total page dimensions
page_width: int
page_height: int
# Current scroll position
scroll_x: int
scroll_y: int
# Calculated scroll information
pixels_above: int
pixels_below: int
pixels_left: int
pixels_right: int
# Page statistics are now computed dynamically instead of stored
@dataclass
class NetworkRequest:
"""Information about a pending network request"""
url: str
method: str = 'GET'
loading_duration_ms: float = 0.0 # How long this request has been loading (ms since request started, max 10s)
resource_type: str | None = None # e.g., 'Document', 'Stylesheet', 'Image', 'Script', 'XHR', 'Fetch'
@dataclass
class PaginationButton:
"""Information about a pagination button detected on the page"""
button_type: str # 'next', 'prev', 'first', 'last', 'page_number'
backend_node_id: int # Backend node ID for clicking
text: str # Button text/label
selector: str # XPath or other selector to locate the element
is_disabled: bool = False # Whether the button appears disabled
@dataclass
class BrowserStateSummary:
"""The summary of the browser's current state designed for an LLM to process"""
# provided by SerializedDOMState:
dom_state: SerializedDOMState
url: str
title: str
tabs: list[TabInfo]
screenshot: str | None = field(default=None, repr=False)
page_info: PageInfo | None = None # Enhanced page information
# Keep legacy fields for backward compatibility
pixels_above: int = 0
pixels_below: int = 0
browser_errors: list[str] = field(default_factory=list)
is_pdf_viewer: bool = False # Whether the current page is a PDF viewer
recent_events: str | None = None # Text summary of recent browser events
pending_network_requests: list[NetworkRequest] = field(default_factory=list) # Currently loading network requests
pagination_buttons: list[PaginationButton] = field(default_factory=list) # Detected pagination buttons
closed_popup_messages: list[str] = field(default_factory=list) # Messages from auto-closed JavaScript dialogs
@dataclass
class BrowserStateHistory:
"""The summary of the browser's state at a past point in time to usse in LLM message history"""
url: str
title: str
tabs: list[TabInfo]
interacted_element: list[DOMInteractedElement | None] | list[None]
screenshot_path: str | None = None
def get_screenshot(self) -> str | None:
"""Load screenshot from disk and return as base64 string"""
if not self.screenshot_path:
return None
import base64
from pathlib import Path
path_obj = Path(self.screenshot_path)
if not path_obj.exists():
return None
try:
with open(path_obj, 'rb') as f:
screenshot_data = f.read()
return base64.b64encode(screenshot_data).decode('utf-8')
except Exception:
return None
def to_dict(self) -> dict[str, Any]:
data = {}
data['tabs'] = [tab.model_dump() for tab in self.tabs]
data['screenshot_path'] = self.screenshot_path
data['interacted_element'] = [el.to_dict() if el else None for el in self.interacted_element]
data['url'] = self.url
data['title'] = self.title
return data
class BrowserError(Exception):
"""Browser error with structured memory for LLM context management.
This exception class provides separate memory contexts for browser actions:
- short_term_memory: Immediate context shown once to the LLM for the next action
- long_term_memory: Persistent error information stored across steps
"""
message: str
short_term_memory: str | None = None
long_term_memory: str | None = None
details: dict[str, Any] | None = None
while_handling_event: BaseEvent[Any] | None = None
def __init__(
self,
message: str,
short_term_memory: str | None = None,
long_term_memory: str | None = None,
details: dict[str, Any] | None = None,
event: BaseEvent[Any] | None = None,
):
"""Initialize a BrowserError with structured memory contexts.
Args:
message: Technical error message for logging and debugging
short_term_memory: Context shown once to LLM (e.g., available actions, options)
long_term_memory: Persistent error info stored in agent memory
details: Additional metadata for debugging
event: The browser event that triggered this error
"""
self.message = message
self.short_term_memory = short_term_memory
self.long_term_memory = long_term_memory
self.details = details
self.while_handling_event = event
super().__init__(message)
def __str__(self) -> str:
if self.details:
return f'{self.message} ({self.details}) during: {self.while_handling_event}'
elif self.while_handling_event:
return f'{self.message} (while handling: {self.while_handling_event})'
else:
return self.message
class URLNotAllowedError(BrowserError):
"""Error raised when a URL is not allowed"""