Spaces:
Sleeping
Sleeping
File size: 6,317 Bytes
d7b3d84 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 | from dataclasses import dataclass, field
from typing import Any
from bubus import BaseEvent
from cdp_use.cdp.target import TargetID
from pydantic import AliasChoices, BaseModel, ConfigDict, Field, field_serializer
from browser_use.dom.views import DOMInteractedElement, SerializedDOMState
# Known placeholder image data for about:blank pages - a 4x4 white PNG
PLACEHOLDER_4PX_SCREENSHOT = (
'iVBORw0KGgoAAAANSUhEUgAAAAQAAAAECAIAAAAmkwkpAAAAFElEQVR4nGP8//8/AwwwMSAB3BwAlm4DBfIlvvkAAAAASUVORK5CYII='
)
# Pydantic
class TabInfo(BaseModel):
"""Represents information about a browser tab"""
model_config = ConfigDict(
extra='forbid',
validate_by_name=True,
validate_by_alias=True,
populate_by_name=True,
)
# Original fields
url: str
title: str
target_id: TargetID = Field(serialization_alias='tab_id', validation_alias=AliasChoices('tab_id', 'target_id'))
parent_target_id: TargetID | None = Field(
default=None, serialization_alias='parent_tab_id', validation_alias=AliasChoices('parent_tab_id', 'parent_target_id')
) # parent page that contains this popup or cross-origin iframe
@field_serializer('target_id')
def serialize_target_id(self, target_id: TargetID, _info: Any) -> str:
return target_id[-4:]
@field_serializer('parent_target_id')
def serialize_parent_target_id(self, parent_target_id: TargetID | None, _info: Any) -> str | None:
return parent_target_id[-4:] if parent_target_id else None
class PageInfo(BaseModel):
"""Comprehensive page size and scroll information"""
# Current viewport dimensions
viewport_width: int
viewport_height: int
# Total page dimensions
page_width: int
page_height: int
# Current scroll position
scroll_x: int
scroll_y: int
# Calculated scroll information
pixels_above: int
pixels_below: int
pixels_left: int
pixels_right: int
# Page statistics are now computed dynamically instead of stored
@dataclass
class NetworkRequest:
"""Information about a pending network request"""
url: str
method: str = 'GET'
loading_duration_ms: float = 0.0 # How long this request has been loading (ms since request started, max 10s)
resource_type: str | None = None # e.g., 'Document', 'Stylesheet', 'Image', 'Script', 'XHR', 'Fetch'
@dataclass
class PaginationButton:
"""Information about a pagination button detected on the page"""
button_type: str # 'next', 'prev', 'first', 'last', 'page_number'
backend_node_id: int # Backend node ID for clicking
text: str # Button text/label
selector: str # XPath or other selector to locate the element
is_disabled: bool = False # Whether the button appears disabled
@dataclass
class BrowserStateSummary:
"""The summary of the browser's current state designed for an LLM to process"""
# provided by SerializedDOMState:
dom_state: SerializedDOMState
url: str
title: str
tabs: list[TabInfo]
screenshot: str | None = field(default=None, repr=False)
page_info: PageInfo | None = None # Enhanced page information
# Keep legacy fields for backward compatibility
pixels_above: int = 0
pixels_below: int = 0
browser_errors: list[str] = field(default_factory=list)
is_pdf_viewer: bool = False # Whether the current page is a PDF viewer
recent_events: str | None = None # Text summary of recent browser events
pending_network_requests: list[NetworkRequest] = field(default_factory=list) # Currently loading network requests
pagination_buttons: list[PaginationButton] = field(default_factory=list) # Detected pagination buttons
closed_popup_messages: list[str] = field(default_factory=list) # Messages from auto-closed JavaScript dialogs
@dataclass
class BrowserStateHistory:
"""The summary of the browser's state at a past point in time to usse in LLM message history"""
url: str
title: str
tabs: list[TabInfo]
interacted_element: list[DOMInteractedElement | None] | list[None]
screenshot_path: str | None = None
def get_screenshot(self) -> str | None:
"""Load screenshot from disk and return as base64 string"""
if not self.screenshot_path:
return None
import base64
from pathlib import Path
path_obj = Path(self.screenshot_path)
if not path_obj.exists():
return None
try:
with open(path_obj, 'rb') as f:
screenshot_data = f.read()
return base64.b64encode(screenshot_data).decode('utf-8')
except Exception:
return None
def to_dict(self) -> dict[str, Any]:
data = {}
data['tabs'] = [tab.model_dump() for tab in self.tabs]
data['screenshot_path'] = self.screenshot_path
data['interacted_element'] = [el.to_dict() if el else None for el in self.interacted_element]
data['url'] = self.url
data['title'] = self.title
return data
class BrowserError(Exception):
"""Browser error with structured memory for LLM context management.
This exception class provides separate memory contexts for browser actions:
- short_term_memory: Immediate context shown once to the LLM for the next action
- long_term_memory: Persistent error information stored across steps
"""
message: str
short_term_memory: str | None = None
long_term_memory: str | None = None
details: dict[str, Any] | None = None
while_handling_event: BaseEvent[Any] | None = None
def __init__(
self,
message: str,
short_term_memory: str | None = None,
long_term_memory: str | None = None,
details: dict[str, Any] | None = None,
event: BaseEvent[Any] | None = None,
):
"""Initialize a BrowserError with structured memory contexts.
Args:
message: Technical error message for logging and debugging
short_term_memory: Context shown once to LLM (e.g., available actions, options)
long_term_memory: Persistent error info stored in agent memory
details: Additional metadata for debugging
event: The browser event that triggered this error
"""
self.message = message
self.short_term_memory = short_term_memory
self.long_term_memory = long_term_memory
self.details = details
self.while_handling_event = event
super().__init__(message)
def __str__(self) -> str:
if self.details:
return f'{self.message} ({self.details}) during: {self.while_handling_event}'
elif self.while_handling_event:
return f'{self.message} (while handling: {self.while_handling_event})'
else:
return self.message
class URLNotAllowedError(BrowserError):
"""Error raised when a URL is not allowed"""
|