File size: 6,837 Bytes
db4810d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 |
import asyncio
import os
import pytest
from langchain_openai import AzureChatOpenAI
from pydantic import BaseModel, SecretStr
from browser_use.agent.service import Agent
from browser_use.agent.views import AgentHistoryList
from browser_use.browser.browser import Browser, BrowserConfig
from browser_use.browser.views import BrowserState
@pytest.fixture
def llm():
"""Initialize language model for testing"""
# return ChatAnthropic(model_name='claude-3-5-sonnet-20240620', timeout=25, stop=None)
return AzureChatOpenAI(
model='gpt-4o',
api_version='2024-10-21',
azure_endpoint=os.getenv('AZURE_OPENAI_ENDPOINT', ''),
api_key=SecretStr(os.getenv('AZURE_OPENAI_KEY', '')),
)
# return ChatOpenAI(model='gpt-4o-mini')
@pytest.fixture(scope='session')
def event_loop():
"""Create an instance of the default event loop for each test case."""
loop = asyncio.get_event_loop_policy().new_event_loop()
yield loop
loop.close()
@pytest.fixture(scope='session')
async def browser(event_loop):
browser_instance = Browser(
config=BrowserConfig(
headless=True,
)
)
yield browser_instance
await browser_instance.close()
@pytest.fixture
async def context(browser):
async with await browser.new_context() as context:
yield context
# Clean up automatically happens with __aexit__
# pytest tests/test_agent_actions.py -v -k "test_ecommerce_interaction" --capture=no
# @pytest.mark.asyncio
@pytest.mark.skip(reason='Kinda expensive to run')
async def test_ecommerce_interaction(llm, context):
"""Test complex ecommerce interaction sequence"""
agent = Agent(
task="Go to amazon.com, search for 'laptop', filter by 4+ stars, and find the price of the first result",
llm=llm,
browser_context=context,
save_conversation_path='tmp/test_ecommerce_interaction/conversation',
)
history: AgentHistoryList = await agent.run(max_steps=20)
# Verify sequence of actions
action_sequence = []
for action in history.model_actions():
action_name = list(action.keys())[0]
if action_name in ['go_to_url', 'open_tab']:
action_sequence.append('navigate')
elif action_name == 'input_text':
action_sequence.append('input')
# Check that the input is 'laptop'
inp = action['input_text']['text'].lower() # type: ignore
if inp == 'laptop':
action_sequence.append('input_exact_correct')
elif 'laptop' in inp:
action_sequence.append('correct_in_input')
else:
action_sequence.append('incorrect_input')
elif action_name == 'click_element':
action_sequence.append('click')
# Verify essential steps were performed
assert 'navigate' in action_sequence # Navigated to Amazon
assert 'input' in action_sequence # Entered search term
assert 'click' in action_sequence # Clicked search/filter
assert 'input_exact_correct' in action_sequence or 'correct_in_input' in action_sequence
# @pytest.mark.asyncio
async def test_error_recovery(llm, context):
"""Test agent's ability to recover from errors"""
agent = Agent(
task='Navigate to nonexistent-site.com and then recover by going to google.com ',
llm=llm,
browser_context=context,
)
history: AgentHistoryList = await agent.run(max_steps=10)
actions_names = history.action_names()
actions = history.model_actions()
assert (
'go_to_url' in actions_names or 'open_tab' in actions_names
), f'{actions_names} does not contain go_to_url or open_tab'
for action in actions:
if 'go_to_url' in action:
assert 'url' in action['go_to_url'], 'url is not in go_to_url'
assert action['go_to_url']['url'].endswith(
'google.com'
), 'url does not end with google.com'
break
# @pytest.mark.asyncio
async def test_find_contact_email(llm, context):
"""Test agent's ability to find contact email on a website"""
agent = Agent(
task='Go to https://browser-use.com/ and find out the contact email',
llm=llm,
browser_context=context,
)
history: AgentHistoryList = await agent.run(max_steps=10)
# Verify the agent found the contact email
extracted_content = history.extracted_content()
email = 'info@browser-use.com'
for content in extracted_content:
if email in content:
break
else:
pytest.fail(f'{extracted_content} does not contain {email}')
# @pytest.mark.asyncio
async def test_agent_finds_installation_command(llm, context):
"""Test agent's ability to find the pip installation command for browser-use on the web"""
agent = Agent(
task='Find the pip installation command for the browser-use repo',
llm=llm,
browser_context=context,
)
history: AgentHistoryList = await agent.run(max_steps=10)
# Verify the agent found the correct installation command
extracted_content = history.extracted_content()
install_command = 'pip install browser-use'
for content in extracted_content:
if install_command in content:
break
else:
pytest.fail(f'{extracted_content} does not contain {install_command}')
class CaptchaTest(BaseModel):
name: str
url: str
success_text: str
additional_text: str | None = None
# run 3 test: python -m pytest tests/test_agent_actions.py -v -k "test_captcha_solver" --capture=no --log-cli-level=INFO
# pytest tests/test_agent_actions.py -v -k "test_captcha_solver" --capture=no --log-cli-level=INFO
@pytest.mark.asyncio
@pytest.mark.parametrize(
'captcha',
[
CaptchaTest(
name='Text Captcha',
url='https://2captcha.com/demo/text',
success_text='Captcha is passed successfully!',
),
CaptchaTest(
name='Basic Captcha',
url='https://captcha.com/demos/features/captcha-demo.aspx',
success_text='Correct!',
),
CaptchaTest(
name='Rotate Captcha',
url='https://2captcha.com/demo/rotatecaptcha',
success_text='Captcha is passed successfully',
additional_text='Use multiple clicks at once. click done when image is exact correct position.',
),
CaptchaTest(
name='MT Captcha',
url='https://2captcha.com/demo/mtcaptcha',
success_text='Verified Successfully',
additional_text='Stop when you solved it successfully.',
),
],
)
async def test_captcha_solver(llm, context, captcha: CaptchaTest):
"""Test agent's ability to solve different types of captchas"""
agent = Agent(
task=f'Go to {captcha.url} and solve the captcha. {captcha.additional_text}',
llm=llm,
browser_context=context,
)
from browser_use.agent.views import AgentHistoryList
history: AgentHistoryList = await agent.run(max_steps=7)
state: BrowserState = await context.get_state()
all_text = state.element_tree.get_all_text_till_next_clickable_element()
if not all_text:
all_text = ''
if not isinstance(all_text, str):
all_text = str(all_text)
solved = captcha.success_text in all_text
assert solved, f'Failed to solve {captcha.name}'
# python -m pytest tests/test_agent_actions.py -v --capture=no
# pytest tests/test_agent_actions.py -v -k "test_captcha_solver" --capture=no --log-cli-level=INFO
|