"""Tests for src/tools/web_search.py""" import email.message import json import urllib.error from unittest.mock import MagicMock, patch import pytest from tools.web_search import ( DuckDuckGoProvider, SearchProvider, SerperProvider, SimpleHTMLParser, TavilyProvider, URLFetcher, WebSearchTool, _create_web_search_tool, ) # ═══════════════════════════════════════════════════════════════ # Helpers # ═══════════════════════════════════════════════════════════════ class MockProvider(SearchProvider): """Mock search provider that returns canned results.""" def __init__(self, results: list[dict[str, str]] | None = None): self._results = results or [ {"title": "Result 1", "url": "https://example.com/1", "snippet": "Snippet 1"}, {"title": "Result 2", "url": "https://example.com/2", "snippet": "Snippet 2"}, ] def search(self, query: str, max_results: int = 5) -> list[dict[str, str]]: return self._results[:max_results] class EmptyProvider(SearchProvider): def search(self, query: str, max_results: int = 5) -> list[dict[str, str]]: return [] # ═══════════════════════════════════════════════════════════════ # SimpleHTMLParser # ═══════════════════════════════════════════════════════════════ class TestSimpleHTMLParser: def test_empty_input(self): result = SimpleHTMLParser.html_to_text("") assert result == "" def test_plain_text(self): result = SimpleHTMLParser.html_to_text("Hello World") assert "Hello World" in result def test_strips_tags(self): html = "

Hello World

" result = SimpleHTMLParser.html_to_text(html) assert "Hello" in result assert "World" in result assert "<" not in result assert ">" not in result def test_removes_script(self): html = "

Safe text

" result = SimpleHTMLParser.html_to_text(html) assert "alert" not in result assert "Safe text" in result def test_removes_style(self): html = "

Visible

" result = SimpleHTMLParser.html_to_text(html) assert ".class" not in result assert "Visible" in result def test_removes_comments(self): html = "Hello" result = SimpleHTMLParser.html_to_text(html) assert "comment" not in result assert "Hello" in result def test_handles_html_entities(self): html = "Fish & Chips" result = SimpleHTMLParser.html_to_text(html) assert "&" not in result assert "Fish & Chips" in result def test_max_length_truncation(self): html = "x" * 10000 result = SimpleHTMLParser.html_to_text(html, max_length=100) assert "(content truncated)" in result def test_block_tags_add_newlines(self): html = "

Para 1

Para 2

" result = SimpleHTMLParser.html_to_text(html) assert "Para 1" in result assert "Para 2" in result def test_heading_tags(self): html = "

Title

Subtitle

Content

" result = SimpleHTMLParser.html_to_text(html) assert "Title" in result assert "Subtitle" in result assert "Content" in result def test_nested_tags(self): html = "

Article text

" result = SimpleHTMLParser.html_to_text(html) assert "Article text" in result def test_no_remaining_tags(self): html = "
text
" result = SimpleHTMLParser.html_to_text(html) assert "<" not in result assert ">" not in result def test_removes_nav_header_footer(self): html = "
Content
" result = SimpleHTMLParser.html_to_text(html) assert "Navigation" not in result assert "Content" in result assert "Footer" not in result def test_list_items(self): html = "" result = SimpleHTMLParser.html_to_text(html) assert "Item 1" in result assert "Item 2" in result def test_case_insensitive_removes_script(self): html = "

Good text

" result = SimpleHTMLParser.html_to_text(html) assert "bad code" not in result assert "Good text" in result def test_br_tags(self): html = "Line 1
Line 2
Line 3" result = SimpleHTMLParser.html_to_text(html) assert "Line 1" in result assert "Line 2" in result assert "Line 3" in result def test_multiple_spaces_collapsed(self): html = "

word1 word2 word3

" result = SimpleHTMLParser.html_to_text(html) # Collapsed spaces assert "word1" in result assert "word2" in result assert "word3" in result assert "word1 word2" not in result # ═══════════════════════════════════════════════════════════════ # URLFetcher # ═══════════════════════════════════════════════════════════════ class TestURLFetcher: def test_init_defaults(self): fetcher = URLFetcher() assert fetcher._timeout == 15 assert fetcher._max_content_length == 500_000 def test_init_custom(self): fetcher = URLFetcher(timeout=30, max_content_length=100_000) assert fetcher._timeout == 30 assert fetcher._max_content_length == 100_000 def test_fetch_fails_for_nonexistent_host(self): fetcher = URLFetcher(timeout=2) result = fetcher.fetch("http://this-should-not-exist-xyz.invalid/") assert isinstance(result, dict) assert result["success"] is False assert result["url"] == "http://this-should-not-exist-xyz.invalid/" def test_fetch_result_keys(self): fetcher = URLFetcher(timeout=1) result = fetcher.fetch("http://nope.invalid") for key in ("success", "url", "title", "content", "error"): assert key in result def test_fetch_success_mock(self): fetcher = URLFetcher() mock_html = b"Test Page

Hello World

" mock_response = MagicMock() mock_response.__enter__ = lambda s: s mock_response.__exit__ = MagicMock(return_value=False) mock_response.headers.get = lambda key, default="": ( "text/html; charset=utf-8" if "Content-Type" in key else default ) mock_response.read.return_value = mock_html with patch("urllib.request.urlopen", return_value=mock_response): result = fetcher.fetch("https://example.com") assert result["success"] is True assert "Hello World" in result["content"] assert result["title"] == "Test Page" def test_fetch_unsupported_content_type(self): fetcher = URLFetcher() mock_response = MagicMock() mock_response.__enter__ = lambda s: s mock_response.__exit__ = MagicMock(return_value=False) mock_response.headers.get = lambda key, default="": ( "application/pdf" if "Content-Type" in key else default ) with patch("urllib.request.urlopen", return_value=mock_response): result = fetcher.fetch("https://example.com/file.pdf") assert result["success"] is False assert "Unsupported content type" in result["error"] def test_fetch_http_error(self): fetcher = URLFetcher() with patch("urllib.request.urlopen", side_effect=urllib.error.HTTPError( url="http://example.com", code=404, msg="Not Found", hdrs=email.message.Message(), fp=None )): result = fetcher.fetch("http://example.com") assert result["success"] is False assert "HTTP Error 404" in result["error"] def test_fetch_url_error(self): fetcher = URLFetcher() with patch("urllib.request.urlopen", side_effect=urllib.error.URLError("Name resolution failed")): result = fetcher.fetch("http://doesnotexist.invalid") assert result["success"] is False assert "URL Error" in result["error"] def test_fetch_timeout(self): fetcher = URLFetcher(timeout=1) with patch("urllib.request.urlopen", side_effect=TimeoutError("timed out")): result = fetcher.fetch("http://example.com") assert result["success"] is False assert "timed out" in result["error"].lower() def test_fetch_text_plain_content_type(self): fetcher = URLFetcher() mock_html = b"Hello plain text" mock_response = MagicMock() mock_response.__enter__ = lambda s: s mock_response.__exit__ = MagicMock(return_value=False) mock_response.headers.get = lambda key, default="": ( "text/plain" if "Content-Type" in key else default ) mock_response.read.return_value = mock_html with patch("urllib.request.urlopen", return_value=mock_response): result = fetcher.fetch("https://example.com/text.txt") assert result["success"] is True assert "Hello plain text" in result["content"] # ═══════════════════════════════════════════════════════════════ # DuckDuckGoProvider # ═══════════════════════════════════════════════════════════════ class TestDuckDuckGoProvider: def test_init_defaults(self): provider = DuckDuckGoProvider() assert provider._timeout == 10 def test_init_custom(self): provider = DuckDuckGoProvider(timeout=30) assert provider._timeout == 30 def test_search_with_abstract(self): provider = DuckDuckGoProvider() ddg_response = { "Heading": "Python", "Abstract": "A programming language", "AbstractURL": "https://python.org", "RelatedTopics": [], "Results": [], } mock_response = MagicMock() mock_response.__enter__ = lambda s: s mock_response.__exit__ = MagicMock(return_value=False) mock_response.read.return_value = json.dumps(ddg_response).encode("utf-8") with patch("urllib.request.urlopen", return_value=mock_response): results = provider.search("python", max_results=5) assert len(results) == 1 assert results[0]["title"] == "Python" assert results[0]["snippet"] == "A programming language" def test_search_with_related_topics(self): provider = DuckDuckGoProvider() ddg_response = { "Heading": "", "Abstract": "", "AbstractURL": "", "RelatedTopics": [ {"Text": "Result 1", "FirstURL": "https://example.com/1"}, {"Text": "Result 2", "FirstURL": "https://example.com/2"}, ], "Results": [], } mock_response = MagicMock() mock_response.__enter__ = lambda s: s mock_response.__exit__ = MagicMock(return_value=False) mock_response.read.return_value = json.dumps(ddg_response).encode("utf-8") with patch("urllib.request.urlopen", return_value=mock_response): results = provider.search("test", max_results=5) assert len(results) == 2 def test_search_network_error_returns_empty(self): provider = DuckDuckGoProvider() with patch("urllib.request.urlopen", side_effect=urllib.error.URLError("failed")): results = provider.search("test") assert results == [] def test_search_excludes_non_dict_topics(self): provider = DuckDuckGoProvider() ddg_response = { "Heading": "", "Abstract": "", "AbstractURL": "", "RelatedTopics": [ "not a dict", {"Text": "", "FirstURL": ""}, {"Text": "Valid topic", "FirstURL": "https://valid.com"}, ], "Results": [], } mock_response = MagicMock() mock_response.__enter__ = lambda s: s mock_response.__exit__ = MagicMock(return_value=False) mock_response.read.return_value = json.dumps(ddg_response).encode("utf-8") with patch("urllib.request.urlopen", return_value=mock_response): results = provider.search("test", max_results=10) assert any(r["snippet"] == "Valid topic" for r in results) def test_search_respects_max_results(self): provider = DuckDuckGoProvider() ddg_response = { "Heading": "", "Abstract": "", "AbstractURL": "", "RelatedTopics": [{"Text": f"Topic {i}", "FirstURL": f"https://ex.com/{i}"} for i in range(10)], "Results": [], } mock_response = MagicMock() mock_response.__enter__ = lambda s: s mock_response.__exit__ = MagicMock(return_value=False) mock_response.read.return_value = json.dumps(ddg_response).encode("utf-8") with patch("urllib.request.urlopen", return_value=mock_response): results = provider.search("test", max_results=3) assert len(results) <= 3 # ═══════════════════════════════════════════════════════════════ # SerperProvider # ═══════════════════════════════════════════════════════════════ class TestSerperProvider: def test_init(self): provider = SerperProvider(api_key="test-key") assert provider._api_key == "test-key" assert provider._timeout == 10 def test_search_organic(self): provider = SerperProvider(api_key="test-key") serper_response = { "organic": [ {"title": "Result 1", "link": "https://example.com/1", "snippet": "Snippet 1"}, {"title": "Result 2", "link": "https://example.com/2", "snippet": "Snippet 2"}, ] } mock_response = MagicMock() mock_response.__enter__ = lambda s: s mock_response.__exit__ = MagicMock(return_value=False) mock_response.read.return_value = json.dumps(serper_response).encode("utf-8") with patch("urllib.request.urlopen", return_value=mock_response): results = provider.search("test query", max_results=5) assert len(results) == 2 assert results[0]["title"] == "Result 1" def test_search_with_answer_box(self): provider = SerperProvider(api_key="test-key") serper_response = { "organic": [ {"title": "Result", "link": "https://example.com", "snippet": "info"} ], "answerBox": { "title": "Direct Answer", "link": "https://answer.com", "answer": "The answer is 42", } } mock_response = MagicMock() mock_response.__enter__ = lambda s: s mock_response.__exit__ = MagicMock(return_value=False) mock_response.read.return_value = json.dumps(serper_response).encode("utf-8") with patch("urllib.request.urlopen", return_value=mock_response): results = provider.search("query", max_results=5) # Answer box should be inserted at position 0 assert results[0]["title"] == "Direct Answer" def test_search_network_error_returns_empty(self): provider = SerperProvider(api_key="key") with patch("urllib.request.urlopen", side_effect=urllib.error.URLError("failed")): results = provider.search("test") assert results == [] # ═══════════════════════════════════════════════════════════════ # TavilyProvider # ═══════════════════════════════════════════════════════════════ class TestTavilyProvider: def test_init(self): provider = TavilyProvider(api_key="tavily-key") assert provider._api_key == "tavily-key" assert provider._include_answer is True def test_search_with_answer(self): provider = TavilyProvider(api_key="key") tavily_response = { "answer": "The answer is 42", "results": [ {"title": "Page 1", "url": "https://example.com/1", "content": "Content 1"}, {"title": "Page 2", "url": "https://example.com/2", "content": "Content 2"}, ] } mock_response = MagicMock() mock_response.__enter__ = lambda s: s mock_response.__exit__ = MagicMock(return_value=False) mock_response.read.return_value = json.dumps(tavily_response).encode("utf-8") with patch("urllib.request.urlopen", return_value=mock_response): results = provider.search("test", max_results=5) assert results[0]["title"] == "Tavily AI Answer" assert len(results) == 3 def test_search_no_answer(self): provider = TavilyProvider(api_key="key") tavily_response = { "results": [ {"title": "Page", "url": "https://example.com", "content": "Content"} ] } mock_response = MagicMock() mock_response.__enter__ = lambda s: s mock_response.__exit__ = MagicMock(return_value=False) mock_response.read.return_value = json.dumps(tavily_response).encode("utf-8") with patch("urllib.request.urlopen", return_value=mock_response): results = provider.search("test", max_results=5) assert len(results) == 1 assert results[0]["title"] == "Page" def test_search_network_error_returns_empty(self): provider = TavilyProvider(api_key="key") with patch("urllib.request.urlopen", side_effect=urllib.error.URLError("failed")): results = provider.search("test") assert results == [] # ═══════════════════════════════════════════════════════════════ # WebSearchTool # ═══════════════════════════════════════════════════════════════ class TestWebSearchToolInit: def test_init_default_provider(self): tool = WebSearchTool() assert isinstance(tool._provider, DuckDuckGoProvider) def test_init_custom_provider(self): provider = MockProvider() tool = WebSearchTool(provider=provider) assert tool._provider is provider def test_name_property(self): tool = WebSearchTool() assert tool.name == "web_search" def test_description_property(self): tool = WebSearchTool() desc = tool.description assert isinstance(desc, str) assert len(desc) > 0 def test_parameters_schema(self): tool = WebSearchTool() schema = tool.parameters_schema assert schema["type"] == "object" assert "query" in schema["properties"] assert "url" in schema["properties"] assert "action" in schema["properties"] def test_parameters_schema_with_selenium(self): tool = WebSearchTool() tool._use_selenium = True schema = tool.parameters_schema assert "selector" in schema["properties"] assert "js_code" in schema["properties"] class TestWebSearchToolExecute: def test_execute_search_with_query(self): tool = WebSearchTool(provider=MockProvider()) result = tool.execute(query="python programming") assert result.success is True assert "Result 1" in result.output def test_execute_no_action_no_query_no_url(self): tool = WebSearchTool(provider=MockProvider()) result = tool.execute() assert result.success is False assert result.error def test_execute_search_empty_results(self): tool = WebSearchTool(provider=EmptyProvider()) result = tool.execute(query="something obscure") assert result.success is True assert "No results found" in result.output def test_execute_fetch_with_url(self): tool = WebSearchTool(provider=MockProvider()) mock_fetch_result = { "success": True, "url": "https://example.com", "title": "Test", "content": "Test content here", } with patch.object(tool._fetcher, "fetch", return_value=mock_fetch_result): result = tool.execute(url="https://example.com") assert result.success is True assert "Test content here" in result.output def test_execute_fetch_action_no_url(self): tool = WebSearchTool(provider=MockProvider()) result = tool.execute(action="fetch") assert result.success is False assert result.error def test_execute_fetch_action_with_url(self): tool = WebSearchTool(provider=MockProvider()) mock_fetch_result = { "success": True, "url": "https://example.com", "title": "Page", "content": "Page content", } with patch.object(tool._fetcher, "fetch", return_value=mock_fetch_result): result = tool.execute(action="fetch", url="https://example.com") assert result.success is True def test_execute_fetch_failure(self): tool = WebSearchTool(provider=MockProvider()) mock_fetch_result = { "success": False, "url": "https://example.com", "title": "", "content": "", "error": "Connection refused", } with patch.object(tool._fetcher, "fetch", return_value=mock_fetch_result): result = tool.execute(action="fetch", url="https://example.com") assert result.success is False def test_execute_click_without_selenium(self): tool = WebSearchTool(provider=MockProvider()) result = tool.execute(action="click", selector=".button") assert result.success is False assert result.error is not None assert "Selenium" in result.error def test_execute_fill_without_selenium(self): tool = WebSearchTool(provider=MockProvider()) result = tool.execute(action="fill", selector="input", value="test") assert result.success is False assert result.error is not None assert "Selenium" in result.error def test_execute_extract_links_without_selenium(self): tool = WebSearchTool(provider=MockProvider()) result = tool.execute(action="extract_links") assert result.success is False assert result.error is not None assert "Selenium" in result.error def test_execute_execute_js_without_selenium(self): tool = WebSearchTool(provider=MockProvider()) result = tool.execute(action="execute_js", js_code="return 1") assert result.success is False assert result.error is not None assert "Selenium" in result.error def test_execute_execute_js_no_code(self): tool = WebSearchTool(provider=MockProvider()) result = tool.execute(action="execute_js") assert result.success is False def test_execute_crawl_without_url(self): tool = WebSearchTool(provider=MockProvider()) result = tool.execute(action="crawl") assert result.success is False def test_execute_crawl_without_selenium(self): tool = WebSearchTool(provider=MockProvider()) result = tool.execute(action="crawl", url="https://example.com") assert result.success is False assert result.error is not None assert "Selenium" in result.error def test_execute_get_content_without_selenium(self): tool = WebSearchTool(provider=MockProvider()) result = tool.execute(action="get_content") assert result.success is False def test_execute_search_action_explicit(self): tool = WebSearchTool(provider=MockProvider()) result = tool.execute(action="search", query="test") assert result.success is True def test_execute_search_action_no_query(self): tool = WebSearchTool(provider=MockProvider()) result = tool.execute(action="search") assert result.success is False def test_execute_with_fetch_content(self): provider = MockProvider() tool = WebSearchTool(provider=provider, fetch_content=True) mock_fetch_result = { "success": True, "url": "https://example.com/1", "title": "Example", "content": "Page content here for fetching", } with patch.object(tool._fetcher, "fetch", return_value=mock_fetch_result): result = tool.execute(query="test query") assert result.success is True def test_execute_max_results_clipped(self): """max_results is capped at 10 in the implementation.""" tool = WebSearchTool(provider=MockProvider()) result = tool.execute(query="test", max_results=100) assert result.success is True class TestWebSearchToolFormatting: def test_format_search_results_empty(self): tool = WebSearchTool(provider=MockProvider()) formatted = tool._format_search_results([]) assert "No results found" in formatted def test_format_search_results_with_results(self): tool = WebSearchTool(provider=MockProvider()) results = [ {"title": "Test Title", "url": "https://example.com", "snippet": "A snippet"}, ] formatted = tool._format_search_results(results) assert "Test Title" in formatted assert "https://example.com" in formatted assert "A snippet" in formatted def test_format_search_results_with_content(self): tool = WebSearchTool(provider=MockProvider(), max_content_length=200) results = [ {"title": "Title", "url": "https://example.com", "snippet": "snap", "content": "Page content"}, ] formatted = tool._format_search_results(results, with_content=True) assert "Page content" in formatted def test_format_search_results_content_truncated(self): tool = WebSearchTool(provider=MockProvider(), max_content_length=5) results = [ {"title": "T", "url": "https://x.com", "snippet": "", "content": "A" * 100}, ] formatted = tool._format_search_results(results, with_content=True) assert "truncated" in formatted.lower() class TestWebSearchToolSeleniumCheck: def test_require_selenium_raises_without_selenium(self): tool = WebSearchTool(provider=MockProvider()) assert tool._selenium_fetcher is None with pytest.raises(RuntimeError, match="Selenium"): tool._require_selenium("click") def test_context_manager(self): tool = WebSearchTool(provider=MockProvider()) with tool as t: assert t is tool def test_close_without_selenium(self): tool = WebSearchTool(provider=MockProvider()) tool.close() # Should not raise class TestCreateWebSearchToolFactory: def test_default_provider(self): tool = _create_web_search_tool() assert isinstance(tool._provider, DuckDuckGoProvider) def test_serper_provider_with_key(self): tool = _create_web_search_tool(provider="serper", api_key="my-key") assert isinstance(tool._provider, SerperProvider) def test_serper_provider_no_key_falls_back_to_ddg(self): tool = _create_web_search_tool(provider="serper") assert isinstance(tool._provider, DuckDuckGoProvider) def test_tavily_provider_with_key(self): tool = _create_web_search_tool(provider="tavily", api_key="my-key") assert isinstance(tool._provider, TavilyProvider) def test_tavily_provider_no_key_falls_back_to_ddg(self): tool = _create_web_search_tool(provider="tavily") assert isinstance(tool._provider, DuckDuckGoProvider) def test_duckduckgo_provider_explicit(self): tool = _create_web_search_tool(provider="duckduckgo") assert isinstance(tool._provider, DuckDuckGoProvider) def test_ddg_alias(self): tool = _create_web_search_tool(provider="ddg") assert isinstance(tool._provider, DuckDuckGoProvider) def test_unknown_provider_falls_back_to_ddg(self): tool = _create_web_search_tool(provider="unknown_xyz") assert isinstance(tool._provider, DuckDuckGoProvider) def test_serper_provider_serper_api_key_param(self): tool = _create_web_search_tool(provider="serper", serper_api_key="key") assert isinstance(tool._provider, SerperProvider) def test_tavily_provider_tavily_api_key_param(self): tool = _create_web_search_tool(provider="tavily", tavily_api_key="key") assert isinstance(tool._provider, TavilyProvider) # ═══════════════════════════════════════════════════════════════ # SeleniumFetcher — initialization only (no real browser) # ═══════════════════════════════════════════════════════════════ class TestSeleniumFetcherInit: def test_init_defaults(self): from tools.web_search import SeleniumFetcher fetcher = SeleniumFetcher() assert fetcher._headless is True assert fetcher._browser == "chrome" assert fetcher._wait_timeout == 15 assert fetcher._page_load_timeout == 30 assert fetcher._scroll_to_bottom is False assert fetcher._driver is None def test_init_custom(self): from tools.web_search import SeleniumFetcher fetcher = SeleniumFetcher( headless=False, browser="firefox", wait_timeout=30, scroll_to_bottom=True, ) assert fetcher._headless is False assert fetcher._browser == "firefox" assert fetcher._wait_timeout == 30 assert fetcher._scroll_to_bottom is True def test_ensure_dependencies_import_error(self): from tools.web_search import SeleniumFetcher fetcher = SeleniumFetcher() with patch("builtins.__import__", side_effect=ImportError("No selenium")), pytest.raises(ImportError): fetcher._ensure_dependencies() def test_ensure_dependencies_with_selenium(self): """If selenium is available, _ensure_dependencies should not raise.""" from tools.web_search import SeleniumFetcher fetcher = SeleniumFetcher() try: import selenium # noqa: F401 fetcher._ensure_dependencies() # Should not raise except ImportError: pytest.skip("selenium not installed") def test_close_no_driver(self): from tools.web_search import SeleniumFetcher fetcher = SeleniumFetcher() fetcher.close() # Should not raise assert fetcher._driver is None def test_close_with_mock_driver(self): from tools.web_search import SeleniumFetcher fetcher = SeleniumFetcher() mock_driver = MagicMock() fetcher._driver = mock_driver fetcher.close() mock_driver.quit.assert_called_once() assert fetcher._driver is None def test_context_manager(self): from tools.web_search import SeleniumFetcher fetcher = SeleniumFetcher() with fetcher as f: assert f is fetcher def test_create_driver_invalid_browser(self): from tools.web_search import SeleniumFetcher try: import selenium # noqa: F401 except ImportError: pytest.skip("selenium not installed") fetcher = SeleniumFetcher(browser="ie") with pytest.raises(ValueError, match="Unsupported browser"): fetcher._create_driver() def test_get_driver_creates_if_none(self): from tools.web_search import SeleniumFetcher fetcher = SeleniumFetcher() with patch.object(fetcher, "_create_driver", return_value=MagicMock()): driver = fetcher._get_driver() assert driver is not None # ═══════════════════════════════════════════════════════════════ # URLFetcher — charset detection # ═══════════════════════════════════════════════════════════════ class TestURLFetcherCharset: def test_custom_charset_in_content_type(self): """Test charset extraction from content-type header.""" fetcher = URLFetcher() mock_html = "Hello World".encode("latin-1") mock_response = MagicMock() mock_response.__enter__ = lambda s: s mock_response.__exit__ = MagicMock(return_value=False) mock_response.headers.get = lambda key, default="": ( "text/html; charset=latin-1" if "Content-Type" in key else default ) mock_response.read.return_value = mock_html with patch("urllib.request.urlopen", return_value=mock_response): result = fetcher.fetch("https://example.com") assert result["success"] is True def test_unicode_decode_error_fallback(self): """Test fallback when charset decoding fails.""" fetcher = URLFetcher() mock_html = b"\xff\xfe Hello World" mock_response = MagicMock() mock_response.__enter__ = lambda s: s mock_response.__exit__ = MagicMock(return_value=False) mock_response.headers.get = lambda key, default="": ( "text/html; charset=utf-16" if "Content-Type" in key else default ) mock_response.read.return_value = mock_html with patch("urllib.request.urlopen", return_value=mock_response): result = fetcher.fetch("https://example.com") # Should succeed even if charset is tricky assert "success" in result def test_value_error_in_fetch(self): """Test handling of ValueError in fetch.""" fetcher = URLFetcher() with patch("urllib.request.urlopen", side_effect=ValueError("bad url")): result = fetcher.fetch("not-a-url") assert result["success"] is False def test_main_content_extraction(self): """Test that main/article content is extracted.""" fetcher = URLFetcher() html_content = b""" Test
""" + (b"Main content " * 50) + b"""
""" mock_response = MagicMock() mock_response.__enter__ = lambda s: s mock_response.__exit__ = MagicMock(return_value=False) mock_response.headers.get = lambda key, default="": ( "text/html; charset=utf-8" if "Content-Type" in key else default ) mock_response.read.return_value = html_content with patch("urllib.request.urlopen", return_value=mock_response): result = fetcher.fetch("https://example.com") assert result["success"] is True assert "Main content" in result["content"] # ═══════════════════════════════════════════════════════════════ # WebSearchTool — with mock selenium fetcher # ═══════════════════════════════════════════════════════════════ class TestWebSearchToolWithSelenium: """Tests for WebSearchTool when given a mocked selenium fetcher.""" def _make_mock_selenium_fetcher(self): from tools.web_search import SeleniumFetcher return MagicMock(spec=SeleniumFetcher) def test_init_with_selenium_fetcher(self): from tools.web_search import SeleniumFetcher mock_fetcher = MagicMock(spec=SeleniumFetcher) tool = WebSearchTool(selenium_fetcher=mock_fetcher) assert tool._use_selenium is True assert tool._selenium_fetcher is mock_fetcher def test_execute_fetch_with_selenium(self): from tools.web_search import SeleniumFetcher mock_fetcher = MagicMock(spec=SeleniumFetcher) mock_fetcher.fetch.return_value = { "success": True, "url": "https://example.com", "title": "Example", "content": "Content via Selenium", } tool = WebSearchTool(selenium_fetcher=mock_fetcher) result = tool.execute(url="https://example.com") assert result.success is True assert "Content via Selenium" in result.output def test_execute_click_with_selenium(self): from tools.web_search import SeleniumFetcher mock_fetcher = MagicMock(spec=SeleniumFetcher) mock_fetcher.click_element.return_value = { "success": True, "url": "https://example.com/next", "title": "Next Page", "clicked_text": "Submit", } tool = WebSearchTool(selenium_fetcher=mock_fetcher) result = tool.execute(action="click", selector=".submit-btn") assert result.success is True def test_execute_click_failed(self): from tools.web_search import SeleniumFetcher mock_fetcher = MagicMock(spec=SeleniumFetcher) mock_fetcher.click_element.return_value = { "success": False, "error": "Element not found", "url": "", "title": "", "clicked_text": "", } tool = WebSearchTool(selenium_fetcher=mock_fetcher) result = tool.execute(action="click", selector=".nonexistent") assert result.success is False def test_execute_fill_with_selenium(self): from tools.web_search import SeleniumFetcher mock_fetcher = MagicMock(spec=SeleniumFetcher) mock_fetcher.fill_input.return_value = { "success": True, "url": "https://example.com", "title": "Page", "error": "", } tool = WebSearchTool(selenium_fetcher=mock_fetcher) result = tool.execute(action="fill", selector="input[name=q]", value="test") assert result.success is True def test_execute_fill_no_selector_fails(self): from tools.web_search import SeleniumFetcher mock_fetcher = MagicMock(spec=SeleniumFetcher) tool = WebSearchTool(selenium_fetcher=mock_fetcher) result = tool.execute(action="fill", value="test") assert result.success is False def test_execute_extract_links_with_selenium(self): from tools.web_search import SeleniumFetcher mock_fetcher = MagicMock(spec=SeleniumFetcher) mock_fetcher.fetch.return_value = { "success": True, "url": "https://example.com", "title": "Example", "content": "", } mock_fetcher.extract_links.return_value = { "success": True, "url": "https://example.com", "links": [ {"url": "https://example.com/1", "text": "Link 1", "title": ""}, ], "count": 1, } tool = WebSearchTool(selenium_fetcher=mock_fetcher) result = tool.execute(action="extract_links", url="https://example.com") assert result.success is True def test_execute_execute_js_with_selenium(self): from tools.web_search import SeleniumFetcher mock_fetcher = MagicMock(spec=SeleniumFetcher) mock_fetcher.execute_js.return_value = { "success": True, "url": "https://example.com", "return_value": "document.title", "error": "", } tool = WebSearchTool(selenium_fetcher=mock_fetcher) result = tool.execute(action="execute_js", js_code="return document.title") assert result.success is True def test_execute_get_content_with_selenium(self): from tools.web_search import SeleniumFetcher mock_fetcher = MagicMock(spec=SeleniumFetcher) mock_fetcher.get_page_content.return_value = { "success": True, "url": "https://example.com", "title": "Example", "content": "Page content here", "error": "", } tool = WebSearchTool(selenium_fetcher=mock_fetcher) result = tool.execute(action="get_content") assert result.success is True def test_execute_crawl_with_selenium(self): from tools.web_search import SeleniumFetcher mock_fetcher = MagicMock(spec=SeleniumFetcher) mock_fetcher.crawl.return_value = { "success": True, "pages": [{"url": "https://example.com", "title": "Home", "depth": 0}], "total_pages": 1, "error": "", } tool = WebSearchTool(selenium_fetcher=mock_fetcher) result = tool.execute(action="crawl", url="https://example.com", max_depth=1, max_pages=5) assert result.success is True def test_execute_search_with_fetch_content_selenium(self): from tools.web_search import SeleniumFetcher mock_fetcher = MagicMock(spec=SeleniumFetcher) mock_fetcher.fetch.return_value = { "success": True, "url": "https://example.com", "title": "Example", "content": "Content here", } tool = WebSearchTool( provider=MockProvider(), selenium_fetcher=mock_fetcher, fetch_content=True, ) result = tool.execute(query="test") assert result.success is True def test_description_with_selenium(self): from tools.web_search import SeleniumFetcher mock_fetcher = MagicMock(spec=SeleniumFetcher) tool = WebSearchTool(selenium_fetcher=mock_fetcher) desc = tool.description assert "Selenium" in desc def test_close_with_selenium(self): from tools.web_search import SeleniumFetcher mock_fetcher = MagicMock(spec=SeleniumFetcher) tool = WebSearchTool(selenium_fetcher=mock_fetcher) tool.close() mock_fetcher.close.assert_called_once() # ═══════════════════════════════════════════════════════════════ # WebSearchTool — callback integration # ═══════════════════════════════════════════════════════════════ class TestWebSearchToolCallbacks: def test_emit_tool_start_with_callback_manager(self): from callbacks.base import BaseCallbackHandler from callbacks.manager import CallbackManager class RecordingCB(BaseCallbackHandler): def __init__(self): self.calls = [] def on_tool_start(self, *, run_id, tool_name, **kwargs): self.calls.append(("start", tool_name)) def on_tool_end(self, *, run_id, tool_name, **kwargs): self.calls.append(("end", tool_name)) cb = RecordingCB() manager = CallbackManager(handlers=[cb]) tool = WebSearchTool(provider=MockProvider(), callback_manager=manager) result = tool.execute(query="test") assert result.success is True assert any(c[0] == "start" for c in cb.calls) assert any(c[0] == "end" for c in cb.calls) def test_get_callback_manager_from_context(self): tool = WebSearchTool(provider=MockProvider()) # Without a callback manager, _get_callback_manager should return None or context manager cb = tool._get_callback_manager() # May be None if not in a callback context assert cb is None or hasattr(cb, "on_tool_start") def test_get_callback_manager_exception_returns_none(self, monkeypatch): """_get_callback_manager should return None on exception.""" tool = WebSearchTool(provider=MockProvider()) tool._callback_manager = None # Mock get_callback_manager to raise monkeypatch.setattr( "callbacks.context.get_callback_manager", lambda: (_ for _ in ()).throw(RuntimeError("error")), ) cb = tool._get_callback_manager() assert cb is None def test_emit_tool_error_with_callback_manager(self): """Test _emit_tool_error is called when callback manager is set.""" from callbacks.base import BaseCallbackHandler from callbacks.manager import CallbackManager class RecordingCB(BaseCallbackHandler): def __init__(self): self.errors = [] def on_tool_error(self, *, run_id, tool_name, **kwargs): self.errors.append(tool_name) cb_handler = RecordingCB() manager = CallbackManager(handlers=[cb_handler]) tool = WebSearchTool(provider=MockProvider(), callback_manager=manager) # Trigger an error by mocking provider.search to raise from unittest.mock import MagicMock mock_error = TimeoutError("timed out") tool._provider = MagicMock() tool._provider.search.side_effect = mock_error result = tool.execute(query="test") assert result.success is False assert len(cb_handler.errors) > 0 # ═══════════════════════════════════════════════════════════════ # WebSearchTool.execute — action routing edge cases # ═══════════════════════════════════════════════════════════════ class TestWebSearchToolExecuteActionRouting: """Test execute() auto-detection and edge cases.""" def test_auto_detect_click_from_selector(self): """Auto-detect action 'click' when selector is provided.""" tool = WebSearchTool(provider=MockProvider()) mock_selenium = MagicMock() mock_selenium.click_element.return_value = { "success": True, "url": "http://example.com", "title": "T", "content": "", "clicked_text": "Button" } tool._selenium_fetcher = mock_selenium tool._use_selenium = True result = tool.execute(selector="#btn") assert result.success is True def test_auto_detect_execute_js_from_js_code(self): """Auto-detect action 'execute_js' when js_code is provided.""" tool = WebSearchTool(provider=MockProvider()) mock_selenium = MagicMock() mock_selenium.execute_js.return_value = { "success": True, "url": "http://example.com", "return_value": 42 } tool._selenium_fetcher = mock_selenium tool._use_selenium = True result = tool.execute(js_code="return 42;") assert result.success is True def test_click_action_without_selector_returns_error(self): """Action 'click' without selector returns error.""" tool = WebSearchTool(provider=MockProvider()) result = tool.execute(action="click") assert result.success is False assert result.error is not None assert "selector" in result.error def test_fill_action_without_selector_returns_error(self): """Action 'fill' without selector returns error.""" tool = WebSearchTool(provider=MockProvider()) result = tool.execute(action="fill") assert result.success is False assert result.error is not None assert "selector" in result.error def test_execute_js_action_without_js_code_returns_error(self): """Action 'execute_js' without js_code returns error.""" tool = WebSearchTool(provider=MockProvider()) result = tool.execute(action="execute_js") assert result.success is False assert result.error is not None assert "js_code" in result.error def test_crawl_action_without_url_returns_error(self): """Action 'crawl' without url returns error.""" tool = WebSearchTool(provider=MockProvider()) result = tool.execute(action="crawl") assert result.success is False assert result.error is not None assert "url" in result.error def test_fetch_action_without_url_returns_error(self): """Action 'fetch' without url returns error.""" tool = WebSearchTool(provider=MockProvider()) result = tool.execute(action="fetch") assert result.success is False assert result.error is not None assert "url" in result.error def test_no_action_no_query_url_selector_js_code_returns_error(self): """No action, query, url, selector, or js_code returns error.""" tool = WebSearchTool(provider=MockProvider()) result = tool.execute() assert result.success is False assert result.error is not None assert "No action" in result.error def test_fetch_url_with_wait_for_selector(self): """_fetch_url with wait_for_selector uses fetch_with_wait.""" from tools.web_search import SeleniumFetcher tool = WebSearchTool(provider=MockProvider()) mock_selenium = MagicMock(spec=SeleniumFetcher) mock_selenium.fetch_with_wait.return_value = { "success": True, "title": "Test", "content": "content" } tool._selenium_fetcher = mock_selenium tool._use_selenium = True result = tool.execute(url="https://example.com", wait_for_selector="#main") assert result.success is True mock_selenium.fetch_with_wait.assert_called_once() def test_fetch_url_exception_returns_error(self): """_fetch_url exception returns error.""" from unittest.mock import patch tool = WebSearchTool(provider=MockProvider()) with patch.object(tool, "_get_active_fetcher") as mock_fetcher: mock_fetcher.return_value.fetch.side_effect = RuntimeError("connection failed") result = tool.execute(url="https://example.com") assert result.success is False def test_execute_fill_fail(self): """Fill action when fill fails.""" tool = WebSearchTool(provider=MockProvider()) mock_selenium = MagicMock() mock_selenium.fill_input.return_value = { "success": False, "error": "element not found" } tool._selenium_fetcher = mock_selenium tool._use_selenium = True result = tool.execute(action="fill", selector="#input", value="test") assert result.success is False assert result.error is not None assert "element not found" in result.error def test_execute_extract_links_fetch_fail(self): """Extract links when URL fetch fails.""" tool = WebSearchTool(provider=MockProvider()) mock_selenium = MagicMock() mock_selenium.fetch.return_value = { "success": False, "error": "page not found" } tool._selenium_fetcher = mock_selenium tool._use_selenium = True result = tool.execute(action="extract_links", url="https://example.com") assert result.success is False assert result.error is not None assert "page not found" in result.error def test_execute_extract_links_extract_fail(self): """Extract links when link extraction fails.""" tool = WebSearchTool(provider=MockProvider()) mock_selenium = MagicMock() mock_selenium.fetch.return_value = {"success": True, "content": ""} mock_selenium.extract_links.return_value = { "success": False, "error": "extraction failed" } tool._selenium_fetcher = mock_selenium tool._use_selenium = True result = tool.execute(action="extract_links") assert result.success is False def test_execute_extract_links_with_title(self): """Extract links with link that has title.""" tool = WebSearchTool(provider=MockProvider()) mock_selenium = MagicMock() mock_selenium.extract_links.return_value = { "success": True, "url": "http://example.com", "count": 1, "links": [{"url": "http://example.com/page", "text": "link", "title": "Page Title"}] } tool._selenium_fetcher = mock_selenium tool._use_selenium = True result = tool.execute(action="extract_links") assert result.success is True assert "Page Title" in result.output def test_execute_js_fail(self): """Execute JS when JS fails.""" tool = WebSearchTool(provider=MockProvider()) mock_selenium = MagicMock() mock_selenium.execute_js.return_value = { "success": False, "error": "js error" } tool._selenium_fetcher = mock_selenium tool._use_selenium = True result = tool.execute(action="execute_js", js_code="throw Error()") assert result.success is False def test_execute_js_no_return_value(self): """Execute JS with no return value.""" tool = WebSearchTool(provider=MockProvider()) mock_selenium = MagicMock() mock_selenium.execute_js.return_value = { "success": True, "url": "http://example.com", "return_value": None } tool._selenium_fetcher = mock_selenium tool._use_selenium = True result = tool.execute(action="execute_js", js_code="document.title = 'test'") assert result.success is True assert "no return value" in result.output def test_execute_crawl_fail(self): """Crawl action when crawl fails.""" tool = WebSearchTool(provider=MockProvider()) mock_selenium = MagicMock() mock_selenium.crawl.return_value = { "success": False, "error": "crawl failed", "pages": [] } tool._selenium_fetcher = mock_selenium tool._use_selenium = True result = tool.execute(action="crawl", url="http://example.com") assert result.success is False def test_execute_crawl_with_content_truncation(self): """Crawl result with content exceeding max_content_length.""" tool = WebSearchTool(provider=MockProvider(), max_content_length=10) mock_selenium = MagicMock() long_content = "x" * 100 mock_selenium.crawl.return_value = { "success": True, "total_pages": 1, "pages": [{"url": "http://example.com", "title": "T", "depth": 0, "content": long_content, "links_found": 0}], "error": None, } tool._selenium_fetcher = mock_selenium tool._use_selenium = True result = tool.execute(action="crawl", url="http://example.com") assert result.success is True assert "truncated" in result.output def test_execute_crawl_with_error_warning(self): """Crawl result with partial error.""" tool = WebSearchTool(provider=MockProvider()) mock_selenium = MagicMock() mock_selenium.crawl.return_value = { "success": True, "total_pages": 1, "pages": [{"url": "http://example.com", "title": "T", "depth": 0, "content": "content", "links_found": 0}], "error": "some pages failed", } tool._selenium_fetcher = mock_selenium tool._use_selenium = True result = tool.execute(action="crawl", url="http://example.com") assert result.success is True assert "warning" in result.output.lower() or "some pages failed" in result.output def test_execute_get_content_fail(self): """Get content when it fails.""" tool = WebSearchTool(provider=MockProvider()) mock_selenium = MagicMock() mock_selenium.get_page_content.return_value = { "success": False, "error": "driver not ready" } tool._selenium_fetcher = mock_selenium tool._use_selenium = True result = tool.execute(action="get_content") assert result.success is False def test_execute_search_timeout_error(self): """Search with TimeoutError.""" tool = WebSearchTool(provider=MockProvider()) tool._provider = MagicMock() tool._provider.search.side_effect = TimeoutError("timed out") result = tool.execute(query="test") assert result.success is False assert result.error is not None assert "timed out" in result.error.lower() def test_execute_search_urlerror(self): """Search with URLError.""" import urllib.error tool = WebSearchTool(provider=MockProvider()) tool._provider = MagicMock() tool._provider.search.side_effect = urllib.error.URLError("network error") result = tool.execute(query="test") assert result.success is False def test_execute_search_with_fetched_title(self): """Search result gets title from fetched content.""" tool = WebSearchTool(provider=MockProvider(results=[ {"title": "", "url": "https://example.com/1", "snippet": "Snippet"}, ]), fetch_content=True) with patch.object(tool, "_fetch_page_content", return_value={ "title": "Fetched Title", "content": "Page content" }): result = tool.execute(query="test") assert result.success is True