File size: 5,481 Bytes
db4810d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 |
import asyncio
import os
import pytest
from langchain_openai import AzureChatOpenAI
from pydantic import SecretStr
from browser_use.agent.service import Agent
from browser_use.agent.views import AgentHistoryList
from browser_use.browser.browser import Browser, BrowserConfig
@pytest.fixture(scope='function')
def event_loop():
"""Create an instance of the default event loop for each test case."""
loop = asyncio.get_event_loop_policy().new_event_loop()
yield loop
loop.close()
@pytest.fixture(scope='function')
async def browser(event_loop):
browser_instance = Browser(
config=BrowserConfig(
headless=True,
)
)
yield browser_instance
await browser_instance.close()
@pytest.fixture
async def context(browser):
async with await browser.new_context() as context:
yield context
@pytest.fixture
def llm():
"""Initialize language model for testing"""
return AzureChatOpenAI(
model='gpt-4o',
api_version='2024-10-21',
azure_endpoint=os.getenv('AZURE_OPENAI_ENDPOINT', ''),
api_key=SecretStr(os.getenv('AZURE_OPENAI_KEY', '')),
)
# pytest -s -k test_search_google
@pytest.mark.asyncio
async def test_search_google(llm, context):
"""Test 'Search Google' action"""
agent = Agent(
task="Search Google for 'OpenAI'.",
llm=llm,
browser_context=context,
)
history: AgentHistoryList = await agent.run(max_steps=2)
action_names = history.action_names()
assert 'search_google' in action_names
@pytest.mark.asyncio
async def test_go_to_url(llm, context):
"""Test 'Navigate to URL' action"""
agent = Agent(
task="Navigate to 'https://www.python.org'.",
llm=llm,
browser_context=context,
)
history = await agent.run(max_steps=2)
action_names = history.action_names()
assert 'go_to_url' in action_names
@pytest.mark.asyncio
async def test_go_back(llm, context):
"""Test 'Go back' action"""
agent = Agent(
task="Go to 'https://www.example.com', then go back.",
llm=llm,
browser_context=context,
)
history = await agent.run(max_steps=3)
action_names = history.action_names()
assert 'go_to_url' in action_names
assert 'go_back' in action_names
@pytest.mark.asyncio
async def test_click_element(llm, context):
"""Test 'Click element' action"""
agent = Agent(
task="Go to 'https://www.python.org' and click on the first link.",
llm=llm,
browser_context=context,
)
history = await agent.run(max_steps=4)
action_names = history.action_names()
assert 'go_to_url' in action_names or 'open_tab' in action_names
assert 'click_element' in action_names
@pytest.mark.asyncio
async def test_input_text(llm, context):
"""Test 'Input text' action"""
agent = Agent(
task="Go to 'https://www.google.com' and input 'OpenAI' into the search box.",
llm=llm,
browser_context=context,
)
history = await agent.run(max_steps=4)
action_names = history.action_names()
assert 'go_to_url' in action_names
assert 'input_text' in action_names
@pytest.mark.asyncio
async def test_switch_tab(llm, context):
"""Test 'Switch tab' action"""
agent = Agent(
task="Open new tabs with 'https://www.google.com' and 'https://www.wikipedia.org', then switch to the first tab.",
llm=llm,
browser_context=context,
)
history = await agent.run(max_steps=6)
action_names = history.action_names()
open_tab_count = action_names.count('open_tab')
assert open_tab_count >= 2
assert 'switch_tab' in action_names
@pytest.mark.asyncio
async def test_open_new_tab(llm, context):
"""Test 'Open new tab' action"""
agent = Agent(
task="Open a new tab and go to 'https://www.example.com'.",
llm=llm,
browser_context=context,
)
history = await agent.run(max_steps=3)
action_names = history.action_names()
assert 'open_tab' in action_names
@pytest.mark.asyncio
async def test_extract_page_content(llm, context):
"""Test 'Extract page content' action"""
agent = Agent(
task="Go to 'https://www.example.com' and extract the page content.",
llm=llm,
browser_context=context,
)
history = await agent.run(max_steps=3)
action_names = history.action_names()
assert 'go_to_url' in action_names
assert 'extract_content' in action_names
# pytest -k test_done_action
@pytest.mark.asyncio
async def test_done_action(llm, context):
"""Test 'Complete task' action"""
agent = Agent(
task="Navigate to 'https://www.example.com' and signal that the task is done.",
llm=llm,
browser_context=context,
)
history = await agent.run(max_steps=3)
action_names = history.action_names()
assert 'go_to_url' in action_names
assert 'done' in action_names
# run with: pytest -k test_scroll_down
@pytest.mark.asyncio
async def test_scroll_down(llm, context):
"""Test 'Scroll down' action and validate that the page actually scrolled"""
agent = Agent(
task="Go to 'https://en.wikipedia.org/wiki/Internet' and scroll down the page.",
llm=llm,
browser_context=context,
)
# Get the browser instance
page = await context.get_current_page()
# Navigate to the page and get initial scroll position
await agent.run(max_steps=1)
initial_scroll_position = await page.evaluate('window.scrollY;')
# Perform the scroll down action
await agent.run(max_steps=2)
final_scroll_position = await page.evaluate('window.scrollY;')
# Validate that the scroll position has changed
assert final_scroll_position > initial_scroll_position, 'Page did not scroll down'
# Validate that the 'scroll_down' action was executed
history = agent.history
action_names = history.action_names()
assert 'scroll_down' in action_names
|