Spaces:

Boobs00
/

use

Configuration error

App Files Files Community

use / tests /test_agent_actions.py

Boobs00

Upload folder using huggingface_hub

db4810d verified 10 months ago

raw

history blame contribute delete

6.84 kB

	import asyncio
	import os

	import pytest
	from langchain_openai import AzureChatOpenAI
	from pydantic import BaseModel, SecretStr

	from browser_use.agent.service import Agent
	from browser_use.agent.views import AgentHistoryList
	from browser_use.browser.browser import Browser, BrowserConfig
	from browser_use.browser.views import BrowserState


	@pytest.fixture
	def llm():
	"""Initialize language model for testing"""

	# return ChatAnthropic(model_name='claude-3-5-sonnet-20240620', timeout=25, stop=None)
	return AzureChatOpenAI(
	model='gpt-4o',
	api_version='2024-10-21',
	azure_endpoint=os.getenv('AZURE_OPENAI_ENDPOINT', ''),
	api_key=SecretStr(os.getenv('AZURE_OPENAI_KEY', '')),
	)
	# return ChatOpenAI(model='gpt-4o-mini')


	@pytest.fixture(scope='session')
	def event_loop():
	"""Create an instance of the default event loop for each test case."""
	loop = asyncio.get_event_loop_policy().new_event_loop()
	yield loop
	loop.close()


	@pytest.fixture(scope='session')
	async def browser(event_loop):
	browser_instance = Browser(
	config=BrowserConfig(
	headless=True,
	)
	)
	yield browser_instance
	await browser_instance.close()


	@pytest.fixture
	async def context(browser):
	async with await browser.new_context() as context:
	yield context
	# Clean up automatically happens with __aexit__


	# pytest tests/test_agent_actions.py -v -k "test_ecommerce_interaction" --capture=no
	# @pytest.mark.asyncio
	@pytest.mark.skip(reason='Kinda expensive to run')
	async def test_ecommerce_interaction(llm, context):
	"""Test complex ecommerce interaction sequence"""
	agent = Agent(
	task="Go to amazon.com, search for 'laptop', filter by 4+ stars, and find the price of the first result",
	llm=llm,
	browser_context=context,
	save_conversation_path='tmp/test_ecommerce_interaction/conversation',
	)

	history: AgentHistoryList = await agent.run(max_steps=20)

	# Verify sequence of actions
	action_sequence = []
	for action in history.model_actions():
	action_name = list(action.keys())[0]
	if action_name in ['go_to_url', 'open_tab']:
	action_sequence.append('navigate')
	elif action_name == 'input_text':
	action_sequence.append('input')
	# Check that the input is 'laptop'
	inp = action['input_text']['text'].lower() # type: ignore
	if inp == 'laptop':
	action_sequence.append('input_exact_correct')
	elif 'laptop' in inp:
	action_sequence.append('correct_in_input')
	else:
	action_sequence.append('incorrect_input')
	elif action_name == 'click_element':
	action_sequence.append('click')

	# Verify essential steps were performed
	assert 'navigate' in action_sequence # Navigated to Amazon
	assert 'input' in action_sequence # Entered search term
	assert 'click' in action_sequence # Clicked search/filter
	assert 'input_exact_correct' in action_sequence or 'correct_in_input' in action_sequence


	# @pytest.mark.asyncio
	async def test_error_recovery(llm, context):
	"""Test agent's ability to recover from errors"""
	agent = Agent(
	task='Navigate to nonexistent-site.com and then recover by going to google.com ',
	llm=llm,
	browser_context=context,
	)

	history: AgentHistoryList = await agent.run(max_steps=10)

	actions_names = history.action_names()
	actions = history.model_actions()
	assert (
	'go_to_url' in actions_names or 'open_tab' in actions_names
	), f'{actions_names} does not contain go_to_url or open_tab'
	for action in actions:
	if 'go_to_url' in action:
	assert 'url' in action['go_to_url'], 'url is not in go_to_url'
	assert action['go_to_url']['url'].endswith(
	'google.com'
	), 'url does not end with google.com'
	break


	# @pytest.mark.asyncio
	async def test_find_contact_email(llm, context):
	"""Test agent's ability to find contact email on a website"""
	agent = Agent(
	task='Go to https://browser-use.com/ and find out the contact email',
	llm=llm,
	browser_context=context,
	)

	history: AgentHistoryList = await agent.run(max_steps=10)

	# Verify the agent found the contact email
	extracted_content = history.extracted_content()
	email = 'info@browser-use.com'
	for content in extracted_content:
	if email in content:
	break
	else:
	pytest.fail(f'{extracted_content} does not contain {email}')


	# @pytest.mark.asyncio
	async def test_agent_finds_installation_command(llm, context):
	"""Test agent's ability to find the pip installation command for browser-use on the web"""
	agent = Agent(
	task='Find the pip installation command for the browser-use repo',
	llm=llm,
	browser_context=context,
	)

	history: AgentHistoryList = await agent.run(max_steps=10)

	# Verify the agent found the correct installation command
	extracted_content = history.extracted_content()
	install_command = 'pip install browser-use'
	for content in extracted_content:
	if install_command in content:
	break
	else:
	pytest.fail(f'{extracted_content} does not contain {install_command}')


	class CaptchaTest(BaseModel):
	name: str
	url: str
	success_text: str
	additional_text: str \| None = None


	# run 3 test: python -m pytest tests/test_agent_actions.py -v -k "test_captcha_solver" --capture=no --log-cli-level=INFO
	# pytest tests/test_agent_actions.py -v -k "test_captcha_solver" --capture=no --log-cli-level=INFO
	@pytest.mark.asyncio
	@pytest.mark.parametrize(
	'captcha',
	[
	CaptchaTest(
	name='Text Captcha',
	url='https://2captcha.com/demo/text',
	success_text='Captcha is passed successfully!',
	),
	CaptchaTest(
	name='Basic Captcha',
	url='https://captcha.com/demos/features/captcha-demo.aspx',
	success_text='Correct!',
	),
	CaptchaTest(
	name='Rotate Captcha',
	url='https://2captcha.com/demo/rotatecaptcha',
	success_text='Captcha is passed successfully',
	additional_text='Use multiple clicks at once. click done when image is exact correct position.',
	),
	CaptchaTest(
	name='MT Captcha',
	url='https://2captcha.com/demo/mtcaptcha',
	success_text='Verified Successfully',
	additional_text='Stop when you solved it successfully.',
	),
	],
	)
	async def test_captcha_solver(llm, context, captcha: CaptchaTest):
	"""Test agent's ability to solve different types of captchas"""
	agent = Agent(
	task=f'Go to {captcha.url} and solve the captcha. {captcha.additional_text}',
	llm=llm,
	browser_context=context,
	)
	from browser_use.agent.views import AgentHistoryList

	history: AgentHistoryList = await agent.run(max_steps=7)

	state: BrowserState = await context.get_state()

	all_text = state.element_tree.get_all_text_till_next_clickable_element()

	if not all_text:
	all_text = ''

	if not isinstance(all_text, str):
	all_text = str(all_text)

	solved = captcha.success_text in all_text
	assert solved, f'Failed to solve {captcha.name}'

	# python -m pytest tests/test_agent_actions.py -v --capture=no

	# pytest tests/test_agent_actions.py -v -k "test_captcha_solver" --capture=no --log-cli-level=INFO