import asyncio import os import pytest import requests from langchain_anthropic import ChatAnthropic from langchain_google_genai import ChatGoogleGenerativeAI from langchain_ollama import ChatOllama from langchain_openai import AzureChatOpenAI, ChatOpenAI from pydantic import SecretStr from browser_use.agent.service import Agent from browser_use.agent.views import AgentHistoryList from browser_use.browser.browser import Browser, BrowserConfig @pytest.fixture(scope='function') def event_loop(): """Create an instance of the default event loop for each test case.""" loop = asyncio.get_event_loop_policy().new_event_loop() yield loop loop.close() @pytest.fixture(scope='function') async def browser(event_loop): browser_instance = Browser( config=BrowserConfig( headless=True, ) ) yield browser_instance await browser_instance.close() @pytest.fixture async def context(browser): async with await browser.new_context() as context: yield context api_key_gemini = SecretStr(os.getenv('GEMINI_API_KEY') or '') api_key_deepseek = SecretStr(os.getenv('DEEPSEEK_API_KEY') or '') api_key_anthropic = SecretStr(os.getenv('ANTHROPIC_API_KEY') or '') # pytest -s -v tests/test_models.py @pytest.fixture( params=[ ChatOpenAI(model='gpt-4o'), ChatOpenAI(model='gpt-4o-mini'), AzureChatOpenAI( model='gpt-4o', api_version='2024-10-21', azure_endpoint=os.getenv('AZURE_OPENAI_ENDPOINT', ''), api_key=SecretStr(os.getenv('AZURE_OPENAI_KEY', '')), ), # ChatOpenAI( # base_url='https://api.deepseek.com/v1', # model='deepseek-reasoner', # api_key=api_key_deepseek, # ), # run: ollama start ChatOllama( model='qwen2.5:latest', num_ctx=128000, ), AzureChatOpenAI( model='gpt-4o-mini', api_version='2024-10-21', azure_endpoint=os.getenv('AZURE_OPENAI_ENDPOINT', ''), api_key=SecretStr(os.getenv('AZURE_OPENAI_KEY', '')), ), ChatAnthropic( model_name='claude-3-5-sonnet-20240620', timeout=100, temperature=0.0, stop=None, api_key=api_key_anthropic, ), ChatGoogleGenerativeAI(model='gemini-2.0-flash-exp', api_key=api_key_gemini), ChatGoogleGenerativeAI(model='gemini-1.5-pro', api_key=api_key_gemini), ChatGoogleGenerativeAI(model='gemini-1.5-flash-latest', api_key=api_key_gemini), ChatOpenAI( base_url='https://api.deepseek.com/v1', model='deepseek-chat', api_key=api_key_deepseek, ), ], ids=[ 'gpt-4o', 'gpt-4o-mini', 'azure-gpt-4o', #'deepseek-reasoner', 'qwen2.5:latest', 'azure-gpt-4o-mini', 'claude-3-5-sonnet', 'gemini-2.0-flash-exp', 'gemini-1.5-pro', 'gemini-1.5-flash-latest', 'deepseek-chat', ], ) async def llm(request): return request.param @pytest.mark.asyncio async def test_model_search(llm, context): """Test 'Search Google' action""" model_name = llm.model if hasattr(llm, 'model') else llm.model_name print(f'\nTesting model: {model_name}') use_vision = True models_without_vision = ['deepseek-chat', 'deepseek-reasoner'] if hasattr(llm, 'model') and llm.model in models_without_vision: use_vision = False elif hasattr(llm, 'model_name') and llm.model_name in models_without_vision: use_vision = False # require ollama run local_models = ['qwen2.5:latest'] if model_name in local_models: # check if ollama is running # ping ollama http://127.0.0.1 try: response = requests.get('http://127.0.0.1:11434/') if response.status_code != 200: raise except Exception: raise Exception('Ollama is not running - start with `ollama start`') agent = Agent( task="Search Google for 'elon musk' then click on the first result and scroll down.", llm=llm, browser_context=context, max_failures=2, use_vision=use_vision, ) history: AgentHistoryList = await agent.run(max_steps=2) done = history.is_done() successful = history.is_successful() action_names = history.action_names() print(f'Actions performed: {action_names}') errors = [e for e in history.errors() if e is not None] errors = '\n'.join(errors) passed = False if 'search_google' in action_names: passed = True elif 'go_to_url' in action_names: passed = True elif 'open_tab' in action_names: passed = True else: passed = False print(f'Model {model_name}: {"✅ PASSED - " if passed else "❌ FAILED - "} Done: {done} Successful: {successful}') assert passed, f'Model {model_name} not working\nActions performed: {action_names}\nErrors: {errors}'