File size: 7,840 Bytes
db4810d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 |
import pytest
from langchain_anthropic import ChatAnthropic
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
from langchain_openai import AzureChatOpenAI, ChatOpenAI
from browser_use.agent.message_manager.service import MessageManager, MessageManagerSettings
from browser_use.agent.views import ActionResult
from browser_use.browser.views import BrowserState, TabInfo
from browser_use.dom.views import DOMElementNode, DOMTextNode
@pytest.fixture(
params=[
ChatOpenAI(model='gpt-4o-mini'),
AzureChatOpenAI(model='gpt-4o', api_version='2024-02-15-preview'),
ChatAnthropic(model_name='claude-3-5-sonnet-20240620', timeout=100, temperature=0.0, stop=None),
],
ids=['gpt-4o-mini', 'gpt-4o', 'claude-3-5-sonnet'],
)
def message_manager(request: pytest.FixtureRequest):
task = 'Test task'
action_descriptions = 'Test actions'
return MessageManager(
task=task,
system_message=SystemMessage(content=action_descriptions),
settings=MessageManagerSettings(
max_input_tokens=1000,
estimated_characters_per_token=3,
image_tokens=800,
),
)
def test_initial_messages(message_manager: MessageManager):
"""Test that message manager initializes with system and task messages"""
messages = message_manager.get_messages()
assert len(messages) == 2
assert isinstance(messages[0], SystemMessage)
assert isinstance(messages[1], HumanMessage)
assert 'Test task' in messages[1].content
def test_add_state_message(message_manager: MessageManager):
"""Test adding browser state message"""
state = BrowserState(
url='https://test.com',
title='Test Page',
element_tree=DOMElementNode(
tag_name='div',
attributes={},
children=[],
is_visible=True,
parent=None,
xpath='//div',
),
selector_map={},
tabs=[TabInfo(page_id=1, url='https://test.com', title='Test Page')],
)
message_manager.add_state_message(state)
messages = message_manager.get_messages()
assert len(messages) == 3
assert isinstance(messages[2], HumanMessage)
assert 'https://test.com' in messages[2].content
def test_add_state_with_memory_result(message_manager: MessageManager):
"""Test adding state with result that should be included in memory"""
state = BrowserState(
url='https://test.com',
title='Test Page',
element_tree=DOMElementNode(
tag_name='div',
attributes={},
children=[],
is_visible=True,
parent=None,
xpath='//div',
),
selector_map={},
tabs=[TabInfo(page_id=1, url='https://test.com', title='Test Page')],
)
result = ActionResult(extracted_content='Important content', include_in_memory=True)
message_manager.add_state_message(state, [result])
messages = message_manager.get_messages()
# Should have system, task, extracted content, and state messages
assert len(messages) == 4
assert 'Important content' in messages[2].content
assert isinstance(messages[2], HumanMessage)
assert isinstance(messages[3], HumanMessage)
assert 'Important content' not in messages[3].content
def test_add_state_with_non_memory_result(message_manager: MessageManager):
"""Test adding state with result that should not be included in memory"""
state = BrowserState(
url='https://test.com',
title='Test Page',
element_tree=DOMElementNode(
tag_name='div',
attributes={},
children=[],
is_visible=True,
parent=None,
xpath='//div',
),
selector_map={},
tabs=[TabInfo(page_id=1, url='https://test.com', title='Test Page')],
)
result = ActionResult(extracted_content='Temporary content', include_in_memory=False)
message_manager.add_state_message(state, [result])
messages = message_manager.get_messages()
# Should have system, task, and combined state+result message
assert len(messages) == 3
assert 'Temporary content' in messages[2].content
assert isinstance(messages[2], HumanMessage)
@pytest.mark.skip('not sure how to fix this')
@pytest.mark.parametrize('max_tokens', [100000, 10000, 5000])
def test_token_overflow_handling_with_real_flow(message_manager: MessageManager, max_tokens):
"""Test handling of token overflow in a realistic message flow"""
# Set more realistic token limit
message_manager.settings.max_input_tokens = max_tokens
# Create a long sequence of interactions
for i in range(200): # Simulate 40 steps of interaction
# Create state with varying content length
state = BrowserState(
url=f'https://test{i}.com',
title=f'Test Page {i}',
element_tree=DOMElementNode(
tag_name='div',
attributes={},
children=[
DOMTextNode(
text=f'Content {j} ' * (10 + i), # Increasing content length
is_visible=True,
parent=None,
)
for j in range(5) # Multiple DOM items
],
is_visible=True,
parent=None,
xpath='//div',
),
selector_map={j: f'//div[{j}]' for j in range(5)},
tabs=[TabInfo(page_id=1, url=f'https://test{i}.com', title=f'Test Page {i}')],
)
# Alternate between different types of results
result = None
if i % 2 == 0: # Every other iteration
result = ActionResult(
extracted_content=f'Important content from step {i}' * 5,
include_in_memory=i % 4 == 0, # Include in memory every 4th message
)
# Add state message
if result:
message_manager.add_state_message(state, [result])
else:
message_manager.add_state_message(state)
try:
messages = message_manager.get_messages()
except ValueError as e:
if 'Max token limit reached - history is too long' in str(e):
return # If error occurs, end the test
else:
raise e
assert message_manager.state.history.current_tokens <= message_manager.settings.max_input_tokens + 100
last_msg = messages[-1]
assert isinstance(last_msg, HumanMessage)
if i % 4 == 0:
assert isinstance(message_manager.state.history.messages[-2].message, HumanMessage)
if i % 2 == 0 and not i % 4 == 0:
if isinstance(last_msg.content, list):
assert 'Current url: https://test' in last_msg.content[0]['text']
else:
assert 'Current url: https://test' in last_msg.content
# Add model output every time
from browser_use.agent.views import AgentBrain, AgentOutput
from browser_use.controller.registry.views import ActionModel
output = AgentOutput(
current_state=AgentBrain(
evaluation_previous_goal=f'Success in step {i}',
memory=f'Memory from step {i}',
next_goal=f'Goal for step {i + 1}',
),
action=[ActionModel()],
)
message_manager._remove_last_state_message()
message_manager.add_model_output(output)
# Get messages and verify after each addition
messages = [m.message for m in message_manager.state.history.messages]
# Verify token limit is respected
# Verify essential messages are preserved
assert isinstance(messages[0], SystemMessage) # System prompt always first
assert isinstance(messages[1], HumanMessage) # Task always second
assert 'Test task' in messages[1].content
# Verify structure of latest messages
assert isinstance(messages[-1], AIMessage) # Last message should be model output
assert f'step {i}' in messages[-1].content # Should contain current step info
# Log token usage for debugging
token_usage = message_manager.state.history.current_tokens
token_limit = message_manager.settings.max_input_tokens
# print(f'Step {i}: Using {token_usage}/{token_limit} tokens')
# go through all messages and verify that the token count and total tokens is correct
total_tokens = 0
real_tokens = []
stored_tokens = []
for msg in message_manager.state.history.messages:
total_tokens += msg.metadata.tokens
stored_tokens.append(msg.metadata.tokens)
real_tokens.append(message_manager._count_tokens(msg.message))
assert total_tokens == sum(real_tokens)
assert stored_tokens == real_tokens
assert message_manager.state.history.current_tokens == total_tokens
# pytest -s browser_use/agent/message_manager/tests.py
|