Spaces:
Sleeping
Sleeping
| """Tests for src/tools/web_search.py""" | |
| import email.message | |
| import json | |
| import urllib.error | |
| from unittest.mock import MagicMock, patch | |
| import pytest | |
| from tools.web_search import ( | |
| DuckDuckGoProvider, | |
| SearchProvider, | |
| SerperProvider, | |
| SimpleHTMLParser, | |
| TavilyProvider, | |
| URLFetcher, | |
| WebSearchTool, | |
| _create_web_search_tool, | |
| ) | |
| # ═══════════════════════════════════════════════════════════════ | |
| # Helpers | |
| # ═══════════════════════════════════════════════════════════════ | |
| class MockProvider(SearchProvider): | |
| """Mock search provider that returns canned results.""" | |
| def __init__(self, results: list[dict[str, str]] | None = None): | |
| self._results = results or [ | |
| {"title": "Result 1", "url": "https://example.com/1", "snippet": "Snippet 1"}, | |
| {"title": "Result 2", "url": "https://example.com/2", "snippet": "Snippet 2"}, | |
| ] | |
| def search(self, query: str, max_results: int = 5) -> list[dict[str, str]]: | |
| return self._results[:max_results] | |
| class EmptyProvider(SearchProvider): | |
| def search(self, query: str, max_results: int = 5) -> list[dict[str, str]]: | |
| return [] | |
| # ═══════════════════════════════════════════════════════════════ | |
| # SimpleHTMLParser | |
| # ═══════════════════════════════════════════════════════════════ | |
| class TestSimpleHTMLParser: | |
| def test_empty_input(self): | |
| result = SimpleHTMLParser.html_to_text("") | |
| assert result == "" | |
| def test_plain_text(self): | |
| result = SimpleHTMLParser.html_to_text("Hello World") | |
| assert "Hello World" in result | |
| def test_strips_tags(self): | |
| html = "<p>Hello <b>World</b></p>" | |
| result = SimpleHTMLParser.html_to_text(html) | |
| assert "Hello" in result | |
| assert "World" in result | |
| assert "<" not in result | |
| assert ">" not in result | |
| def test_removes_script(self): | |
| html = "<html><script>alert('xss')</script><p>Safe text</p></html>" | |
| result = SimpleHTMLParser.html_to_text(html) | |
| assert "alert" not in result | |
| assert "Safe text" in result | |
| def test_removes_style(self): | |
| html = "<html><style>.class { color: red; }</style><p>Visible</p></html>" | |
| result = SimpleHTMLParser.html_to_text(html) | |
| assert ".class" not in result | |
| assert "Visible" in result | |
| def test_removes_comments(self): | |
| html = "<!-- This is a comment -->Hello" | |
| result = SimpleHTMLParser.html_to_text(html) | |
| assert "comment" not in result | |
| assert "Hello" in result | |
| def test_handles_html_entities(self): | |
| html = "Fish & Chips" | |
| result = SimpleHTMLParser.html_to_text(html) | |
| assert "&" not in result | |
| assert "Fish & Chips" in result | |
| def test_max_length_truncation(self): | |
| html = "x" * 10000 | |
| result = SimpleHTMLParser.html_to_text(html, max_length=100) | |
| assert "(content truncated)" in result | |
| def test_block_tags_add_newlines(self): | |
| html = "<p>Para 1</p><p>Para 2</p>" | |
| result = SimpleHTMLParser.html_to_text(html) | |
| assert "Para 1" in result | |
| assert "Para 2" in result | |
| def test_heading_tags(self): | |
| html = "<h1>Title</h1><h2>Subtitle</h2><p>Content</p>" | |
| result = SimpleHTMLParser.html_to_text(html) | |
| assert "Title" in result | |
| assert "Subtitle" in result | |
| assert "Content" in result | |
| def test_nested_tags(self): | |
| html = "<div><article><p>Article text</p></article></div>" | |
| result = SimpleHTMLParser.html_to_text(html) | |
| assert "Article text" in result | |
| def test_no_remaining_tags(self): | |
| html = "<div class='foo'><span id='bar'>text</span></div>" | |
| result = SimpleHTMLParser.html_to_text(html) | |
| assert "<" not in result | |
| assert ">" not in result | |
| def test_removes_nav_header_footer(self): | |
| html = "<nav>Navigation</nav><main>Content</main><footer>Footer</footer>" | |
| result = SimpleHTMLParser.html_to_text(html) | |
| assert "Navigation" not in result | |
| assert "Content" in result | |
| assert "Footer" not in result | |
| def test_list_items(self): | |
| html = "<ul><li>Item 1</li><li>Item 2</li></ul>" | |
| result = SimpleHTMLParser.html_to_text(html) | |
| assert "Item 1" in result | |
| assert "Item 2" in result | |
| def test_case_insensitive_removes_script(self): | |
| html = "<SCRIPT>bad code</SCRIPT><P>Good text</P>" | |
| result = SimpleHTMLParser.html_to_text(html) | |
| assert "bad code" not in result | |
| assert "Good text" in result | |
| def test_br_tags(self): | |
| html = "Line 1<br>Line 2<br/>Line 3" | |
| result = SimpleHTMLParser.html_to_text(html) | |
| assert "Line 1" in result | |
| assert "Line 2" in result | |
| assert "Line 3" in result | |
| def test_multiple_spaces_collapsed(self): | |
| html = "<p>word1 word2 word3</p>" | |
| result = SimpleHTMLParser.html_to_text(html) | |
| # Collapsed spaces | |
| assert "word1" in result | |
| assert "word2" in result | |
| assert "word3" in result | |
| assert "word1 word2" not in result | |
| # ═══════════════════════════════════════════════════════════════ | |
| # URLFetcher | |
| # ═══════════════════════════════════════════════════════════════ | |
| class TestURLFetcher: | |
| def test_init_defaults(self): | |
| fetcher = URLFetcher() | |
| assert fetcher._timeout == 15 | |
| assert fetcher._max_content_length == 500_000 | |
| def test_init_custom(self): | |
| fetcher = URLFetcher(timeout=30, max_content_length=100_000) | |
| assert fetcher._timeout == 30 | |
| assert fetcher._max_content_length == 100_000 | |
| def test_fetch_fails_for_nonexistent_host(self): | |
| fetcher = URLFetcher(timeout=2) | |
| result = fetcher.fetch("http://this-should-not-exist-xyz.invalid/") | |
| assert isinstance(result, dict) | |
| assert result["success"] is False | |
| assert result["url"] == "http://this-should-not-exist-xyz.invalid/" | |
| def test_fetch_result_keys(self): | |
| fetcher = URLFetcher(timeout=1) | |
| result = fetcher.fetch("http://nope.invalid") | |
| for key in ("success", "url", "title", "content", "error"): | |
| assert key in result | |
| def test_fetch_success_mock(self): | |
| fetcher = URLFetcher() | |
| mock_html = b"<html><title>Test Page</title><body><p>Hello World</p></body></html>" | |
| mock_response = MagicMock() | |
| mock_response.__enter__ = lambda s: s | |
| mock_response.__exit__ = MagicMock(return_value=False) | |
| mock_response.headers.get = lambda key, default="": ( | |
| "text/html; charset=utf-8" if "Content-Type" in key else default | |
| ) | |
| mock_response.read.return_value = mock_html | |
| with patch("urllib.request.urlopen", return_value=mock_response): | |
| result = fetcher.fetch("https://example.com") | |
| assert result["success"] is True | |
| assert "Hello World" in result["content"] | |
| assert result["title"] == "Test Page" | |
| def test_fetch_unsupported_content_type(self): | |
| fetcher = URLFetcher() | |
| mock_response = MagicMock() | |
| mock_response.__enter__ = lambda s: s | |
| mock_response.__exit__ = MagicMock(return_value=False) | |
| mock_response.headers.get = lambda key, default="": ( | |
| "application/pdf" if "Content-Type" in key else default | |
| ) | |
| with patch("urllib.request.urlopen", return_value=mock_response): | |
| result = fetcher.fetch("https://example.com/file.pdf") | |
| assert result["success"] is False | |
| assert "Unsupported content type" in result["error"] | |
| def test_fetch_http_error(self): | |
| fetcher = URLFetcher() | |
| with patch("urllib.request.urlopen", side_effect=urllib.error.HTTPError( | |
| url="http://example.com", code=404, msg="Not Found", hdrs=email.message.Message(), fp=None | |
| )): | |
| result = fetcher.fetch("http://example.com") | |
| assert result["success"] is False | |
| assert "HTTP Error 404" in result["error"] | |
| def test_fetch_url_error(self): | |
| fetcher = URLFetcher() | |
| with patch("urllib.request.urlopen", side_effect=urllib.error.URLError("Name resolution failed")): | |
| result = fetcher.fetch("http://doesnotexist.invalid") | |
| assert result["success"] is False | |
| assert "URL Error" in result["error"] | |
| def test_fetch_timeout(self): | |
| fetcher = URLFetcher(timeout=1) | |
| with patch("urllib.request.urlopen", side_effect=TimeoutError("timed out")): | |
| result = fetcher.fetch("http://example.com") | |
| assert result["success"] is False | |
| assert "timed out" in result["error"].lower() | |
| def test_fetch_text_plain_content_type(self): | |
| fetcher = URLFetcher() | |
| mock_html = b"Hello plain text" | |
| mock_response = MagicMock() | |
| mock_response.__enter__ = lambda s: s | |
| mock_response.__exit__ = MagicMock(return_value=False) | |
| mock_response.headers.get = lambda key, default="": ( | |
| "text/plain" if "Content-Type" in key else default | |
| ) | |
| mock_response.read.return_value = mock_html | |
| with patch("urllib.request.urlopen", return_value=mock_response): | |
| result = fetcher.fetch("https://example.com/text.txt") | |
| assert result["success"] is True | |
| assert "Hello plain text" in result["content"] | |
| # ═══════════════════════════════════════════════════════════════ | |
| # DuckDuckGoProvider | |
| # ═══════════════════════════════════════════════════════════════ | |
| class TestDuckDuckGoProvider: | |
| def test_init_defaults(self): | |
| provider = DuckDuckGoProvider() | |
| assert provider._timeout == 10 | |
| def test_init_custom(self): | |
| provider = DuckDuckGoProvider(timeout=30) | |
| assert provider._timeout == 30 | |
| def test_search_with_abstract(self): | |
| provider = DuckDuckGoProvider() | |
| ddg_response = { | |
| "Heading": "Python", | |
| "Abstract": "A programming language", | |
| "AbstractURL": "https://python.org", | |
| "RelatedTopics": [], | |
| "Results": [], | |
| } | |
| mock_response = MagicMock() | |
| mock_response.__enter__ = lambda s: s | |
| mock_response.__exit__ = MagicMock(return_value=False) | |
| mock_response.read.return_value = json.dumps(ddg_response).encode("utf-8") | |
| with patch("urllib.request.urlopen", return_value=mock_response): | |
| results = provider.search("python", max_results=5) | |
| assert len(results) == 1 | |
| assert results[0]["title"] == "Python" | |
| assert results[0]["snippet"] == "A programming language" | |
| def test_search_with_related_topics(self): | |
| provider = DuckDuckGoProvider() | |
| ddg_response = { | |
| "Heading": "", | |
| "Abstract": "", | |
| "AbstractURL": "", | |
| "RelatedTopics": [ | |
| {"Text": "Result 1", "FirstURL": "https://example.com/1"}, | |
| {"Text": "Result 2", "FirstURL": "https://example.com/2"}, | |
| ], | |
| "Results": [], | |
| } | |
| mock_response = MagicMock() | |
| mock_response.__enter__ = lambda s: s | |
| mock_response.__exit__ = MagicMock(return_value=False) | |
| mock_response.read.return_value = json.dumps(ddg_response).encode("utf-8") | |
| with patch("urllib.request.urlopen", return_value=mock_response): | |
| results = provider.search("test", max_results=5) | |
| assert len(results) == 2 | |
| def test_search_network_error_returns_empty(self): | |
| provider = DuckDuckGoProvider() | |
| with patch("urllib.request.urlopen", side_effect=urllib.error.URLError("failed")): | |
| results = provider.search("test") | |
| assert results == [] | |
| def test_search_excludes_non_dict_topics(self): | |
| provider = DuckDuckGoProvider() | |
| ddg_response = { | |
| "Heading": "", | |
| "Abstract": "", | |
| "AbstractURL": "", | |
| "RelatedTopics": [ | |
| "not a dict", | |
| {"Text": "", "FirstURL": ""}, | |
| {"Text": "Valid topic", "FirstURL": "https://valid.com"}, | |
| ], | |
| "Results": [], | |
| } | |
| mock_response = MagicMock() | |
| mock_response.__enter__ = lambda s: s | |
| mock_response.__exit__ = MagicMock(return_value=False) | |
| mock_response.read.return_value = json.dumps(ddg_response).encode("utf-8") | |
| with patch("urllib.request.urlopen", return_value=mock_response): | |
| results = provider.search("test", max_results=10) | |
| assert any(r["snippet"] == "Valid topic" for r in results) | |
| def test_search_respects_max_results(self): | |
| provider = DuckDuckGoProvider() | |
| ddg_response = { | |
| "Heading": "", | |
| "Abstract": "", | |
| "AbstractURL": "", | |
| "RelatedTopics": [{"Text": f"Topic {i}", "FirstURL": f"https://ex.com/{i}"} for i in range(10)], | |
| "Results": [], | |
| } | |
| mock_response = MagicMock() | |
| mock_response.__enter__ = lambda s: s | |
| mock_response.__exit__ = MagicMock(return_value=False) | |
| mock_response.read.return_value = json.dumps(ddg_response).encode("utf-8") | |
| with patch("urllib.request.urlopen", return_value=mock_response): | |
| results = provider.search("test", max_results=3) | |
| assert len(results) <= 3 | |
| # ═══════════════════════════════════════════════════════════════ | |
| # SerperProvider | |
| # ═══════════════════════════════════════════════════════════════ | |
| class TestSerperProvider: | |
| def test_init(self): | |
| provider = SerperProvider(api_key="test-key") | |
| assert provider._api_key == "test-key" | |
| assert provider._timeout == 10 | |
| def test_search_organic(self): | |
| provider = SerperProvider(api_key="test-key") | |
| serper_response = { | |
| "organic": [ | |
| {"title": "Result 1", "link": "https://example.com/1", "snippet": "Snippet 1"}, | |
| {"title": "Result 2", "link": "https://example.com/2", "snippet": "Snippet 2"}, | |
| ] | |
| } | |
| mock_response = MagicMock() | |
| mock_response.__enter__ = lambda s: s | |
| mock_response.__exit__ = MagicMock(return_value=False) | |
| mock_response.read.return_value = json.dumps(serper_response).encode("utf-8") | |
| with patch("urllib.request.urlopen", return_value=mock_response): | |
| results = provider.search("test query", max_results=5) | |
| assert len(results) == 2 | |
| assert results[0]["title"] == "Result 1" | |
| def test_search_with_answer_box(self): | |
| provider = SerperProvider(api_key="test-key") | |
| serper_response = { | |
| "organic": [ | |
| {"title": "Result", "link": "https://example.com", "snippet": "info"} | |
| ], | |
| "answerBox": { | |
| "title": "Direct Answer", | |
| "link": "https://answer.com", | |
| "answer": "The answer is 42", | |
| } | |
| } | |
| mock_response = MagicMock() | |
| mock_response.__enter__ = lambda s: s | |
| mock_response.__exit__ = MagicMock(return_value=False) | |
| mock_response.read.return_value = json.dumps(serper_response).encode("utf-8") | |
| with patch("urllib.request.urlopen", return_value=mock_response): | |
| results = provider.search("query", max_results=5) | |
| # Answer box should be inserted at position 0 | |
| assert results[0]["title"] == "Direct Answer" | |
| def test_search_network_error_returns_empty(self): | |
| provider = SerperProvider(api_key="key") | |
| with patch("urllib.request.urlopen", side_effect=urllib.error.URLError("failed")): | |
| results = provider.search("test") | |
| assert results == [] | |
| # ═══════════════════════════════════════════════════════════════ | |
| # TavilyProvider | |
| # ═══════════════════════════════════════════════════════════════ | |
| class TestTavilyProvider: | |
| def test_init(self): | |
| provider = TavilyProvider(api_key="tavily-key") | |
| assert provider._api_key == "tavily-key" | |
| assert provider._include_answer is True | |
| def test_search_with_answer(self): | |
| provider = TavilyProvider(api_key="key") | |
| tavily_response = { | |
| "answer": "The answer is 42", | |
| "results": [ | |
| {"title": "Page 1", "url": "https://example.com/1", "content": "Content 1"}, | |
| {"title": "Page 2", "url": "https://example.com/2", "content": "Content 2"}, | |
| ] | |
| } | |
| mock_response = MagicMock() | |
| mock_response.__enter__ = lambda s: s | |
| mock_response.__exit__ = MagicMock(return_value=False) | |
| mock_response.read.return_value = json.dumps(tavily_response).encode("utf-8") | |
| with patch("urllib.request.urlopen", return_value=mock_response): | |
| results = provider.search("test", max_results=5) | |
| assert results[0]["title"] == "Tavily AI Answer" | |
| assert len(results) == 3 | |
| def test_search_no_answer(self): | |
| provider = TavilyProvider(api_key="key") | |
| tavily_response = { | |
| "results": [ | |
| {"title": "Page", "url": "https://example.com", "content": "Content"} | |
| ] | |
| } | |
| mock_response = MagicMock() | |
| mock_response.__enter__ = lambda s: s | |
| mock_response.__exit__ = MagicMock(return_value=False) | |
| mock_response.read.return_value = json.dumps(tavily_response).encode("utf-8") | |
| with patch("urllib.request.urlopen", return_value=mock_response): | |
| results = provider.search("test", max_results=5) | |
| assert len(results) == 1 | |
| assert results[0]["title"] == "Page" | |
| def test_search_network_error_returns_empty(self): | |
| provider = TavilyProvider(api_key="key") | |
| with patch("urllib.request.urlopen", side_effect=urllib.error.URLError("failed")): | |
| results = provider.search("test") | |
| assert results == [] | |
| # ═══════════════════════════════════════════════════════════════ | |
| # WebSearchTool | |
| # ═══════════════════════════════════════════════════════════════ | |
| class TestWebSearchToolInit: | |
| def test_init_default_provider(self): | |
| tool = WebSearchTool() | |
| assert isinstance(tool._provider, DuckDuckGoProvider) | |
| def test_init_custom_provider(self): | |
| provider = MockProvider() | |
| tool = WebSearchTool(provider=provider) | |
| assert tool._provider is provider | |
| def test_name_property(self): | |
| tool = WebSearchTool() | |
| assert tool.name == "web_search" | |
| def test_description_property(self): | |
| tool = WebSearchTool() | |
| desc = tool.description | |
| assert isinstance(desc, str) | |
| assert len(desc) > 0 | |
| def test_parameters_schema(self): | |
| tool = WebSearchTool() | |
| schema = tool.parameters_schema | |
| assert schema["type"] == "object" | |
| assert "query" in schema["properties"] | |
| assert "url" in schema["properties"] | |
| assert "action" in schema["properties"] | |
| def test_parameters_schema_with_selenium(self): | |
| tool = WebSearchTool() | |
| tool._use_selenium = True | |
| schema = tool.parameters_schema | |
| assert "selector" in schema["properties"] | |
| assert "js_code" in schema["properties"] | |
| class TestWebSearchToolExecute: | |
| def test_execute_search_with_query(self): | |
| tool = WebSearchTool(provider=MockProvider()) | |
| result = tool.execute(query="python programming") | |
| assert result.success is True | |
| assert "Result 1" in result.output | |
| def test_execute_no_action_no_query_no_url(self): | |
| tool = WebSearchTool(provider=MockProvider()) | |
| result = tool.execute() | |
| assert result.success is False | |
| assert result.error | |
| def test_execute_search_empty_results(self): | |
| tool = WebSearchTool(provider=EmptyProvider()) | |
| result = tool.execute(query="something obscure") | |
| assert result.success is True | |
| assert "No results found" in result.output | |
| def test_execute_fetch_with_url(self): | |
| tool = WebSearchTool(provider=MockProvider()) | |
| mock_fetch_result = { | |
| "success": True, | |
| "url": "https://example.com", | |
| "title": "Test", | |
| "content": "Test content here", | |
| } | |
| with patch.object(tool._fetcher, "fetch", return_value=mock_fetch_result): | |
| result = tool.execute(url="https://example.com") | |
| assert result.success is True | |
| assert "Test content here" in result.output | |
| def test_execute_fetch_action_no_url(self): | |
| tool = WebSearchTool(provider=MockProvider()) | |
| result = tool.execute(action="fetch") | |
| assert result.success is False | |
| assert result.error | |
| def test_execute_fetch_action_with_url(self): | |
| tool = WebSearchTool(provider=MockProvider()) | |
| mock_fetch_result = { | |
| "success": True, | |
| "url": "https://example.com", | |
| "title": "Page", | |
| "content": "Page content", | |
| } | |
| with patch.object(tool._fetcher, "fetch", return_value=mock_fetch_result): | |
| result = tool.execute(action="fetch", url="https://example.com") | |
| assert result.success is True | |
| def test_execute_fetch_failure(self): | |
| tool = WebSearchTool(provider=MockProvider()) | |
| mock_fetch_result = { | |
| "success": False, | |
| "url": "https://example.com", | |
| "title": "", | |
| "content": "", | |
| "error": "Connection refused", | |
| } | |
| with patch.object(tool._fetcher, "fetch", return_value=mock_fetch_result): | |
| result = tool.execute(action="fetch", url="https://example.com") | |
| assert result.success is False | |
| def test_execute_click_without_selenium(self): | |
| tool = WebSearchTool(provider=MockProvider()) | |
| result = tool.execute(action="click", selector=".button") | |
| assert result.success is False | |
| assert result.error is not None | |
| assert "Selenium" in result.error | |
| def test_execute_fill_without_selenium(self): | |
| tool = WebSearchTool(provider=MockProvider()) | |
| result = tool.execute(action="fill", selector="input", value="test") | |
| assert result.success is False | |
| assert result.error is not None | |
| assert "Selenium" in result.error | |
| def test_execute_extract_links_without_selenium(self): | |
| tool = WebSearchTool(provider=MockProvider()) | |
| result = tool.execute(action="extract_links") | |
| assert result.success is False | |
| assert result.error is not None | |
| assert "Selenium" in result.error | |
| def test_execute_execute_js_without_selenium(self): | |
| tool = WebSearchTool(provider=MockProvider()) | |
| result = tool.execute(action="execute_js", js_code="return 1") | |
| assert result.success is False | |
| assert result.error is not None | |
| assert "Selenium" in result.error | |
| def test_execute_execute_js_no_code(self): | |
| tool = WebSearchTool(provider=MockProvider()) | |
| result = tool.execute(action="execute_js") | |
| assert result.success is False | |
| def test_execute_crawl_without_url(self): | |
| tool = WebSearchTool(provider=MockProvider()) | |
| result = tool.execute(action="crawl") | |
| assert result.success is False | |
| def test_execute_crawl_without_selenium(self): | |
| tool = WebSearchTool(provider=MockProvider()) | |
| result = tool.execute(action="crawl", url="https://example.com") | |
| assert result.success is False | |
| assert result.error is not None | |
| assert "Selenium" in result.error | |
| def test_execute_get_content_without_selenium(self): | |
| tool = WebSearchTool(provider=MockProvider()) | |
| result = tool.execute(action="get_content") | |
| assert result.success is False | |
| def test_execute_search_action_explicit(self): | |
| tool = WebSearchTool(provider=MockProvider()) | |
| result = tool.execute(action="search", query="test") | |
| assert result.success is True | |
| def test_execute_search_action_no_query(self): | |
| tool = WebSearchTool(provider=MockProvider()) | |
| result = tool.execute(action="search") | |
| assert result.success is False | |
| def test_execute_with_fetch_content(self): | |
| provider = MockProvider() | |
| tool = WebSearchTool(provider=provider, fetch_content=True) | |
| mock_fetch_result = { | |
| "success": True, | |
| "url": "https://example.com/1", | |
| "title": "Example", | |
| "content": "Page content here for fetching", | |
| } | |
| with patch.object(tool._fetcher, "fetch", return_value=mock_fetch_result): | |
| result = tool.execute(query="test query") | |
| assert result.success is True | |
| def test_execute_max_results_clipped(self): | |
| """max_results is capped at 10 in the implementation.""" | |
| tool = WebSearchTool(provider=MockProvider()) | |
| result = tool.execute(query="test", max_results=100) | |
| assert result.success is True | |
| class TestWebSearchToolFormatting: | |
| def test_format_search_results_empty(self): | |
| tool = WebSearchTool(provider=MockProvider()) | |
| formatted = tool._format_search_results([]) | |
| assert "No results found" in formatted | |
| def test_format_search_results_with_results(self): | |
| tool = WebSearchTool(provider=MockProvider()) | |
| results = [ | |
| {"title": "Test Title", "url": "https://example.com", "snippet": "A snippet"}, | |
| ] | |
| formatted = tool._format_search_results(results) | |
| assert "Test Title" in formatted | |
| assert "https://example.com" in formatted | |
| assert "A snippet" in formatted | |
| def test_format_search_results_with_content(self): | |
| tool = WebSearchTool(provider=MockProvider(), max_content_length=200) | |
| results = [ | |
| {"title": "Title", "url": "https://example.com", "snippet": "snap", "content": "Page content"}, | |
| ] | |
| formatted = tool._format_search_results(results, with_content=True) | |
| assert "Page content" in formatted | |
| def test_format_search_results_content_truncated(self): | |
| tool = WebSearchTool(provider=MockProvider(), max_content_length=5) | |
| results = [ | |
| {"title": "T", "url": "https://x.com", "snippet": "", "content": "A" * 100}, | |
| ] | |
| formatted = tool._format_search_results(results, with_content=True) | |
| assert "truncated" in formatted.lower() | |
| class TestWebSearchToolSeleniumCheck: | |
| def test_require_selenium_raises_without_selenium(self): | |
| tool = WebSearchTool(provider=MockProvider()) | |
| assert tool._selenium_fetcher is None | |
| with pytest.raises(RuntimeError, match="Selenium"): | |
| tool._require_selenium("click") | |
| def test_context_manager(self): | |
| tool = WebSearchTool(provider=MockProvider()) | |
| with tool as t: | |
| assert t is tool | |
| def test_close_without_selenium(self): | |
| tool = WebSearchTool(provider=MockProvider()) | |
| tool.close() # Should not raise | |
| class TestCreateWebSearchToolFactory: | |
| def test_default_provider(self): | |
| tool = _create_web_search_tool() | |
| assert isinstance(tool._provider, DuckDuckGoProvider) | |
| def test_serper_provider_with_key(self): | |
| tool = _create_web_search_tool(provider="serper", api_key="my-key") | |
| assert isinstance(tool._provider, SerperProvider) | |
| def test_serper_provider_no_key_falls_back_to_ddg(self): | |
| tool = _create_web_search_tool(provider="serper") | |
| assert isinstance(tool._provider, DuckDuckGoProvider) | |
| def test_tavily_provider_with_key(self): | |
| tool = _create_web_search_tool(provider="tavily", api_key="my-key") | |
| assert isinstance(tool._provider, TavilyProvider) | |
| def test_tavily_provider_no_key_falls_back_to_ddg(self): | |
| tool = _create_web_search_tool(provider="tavily") | |
| assert isinstance(tool._provider, DuckDuckGoProvider) | |
| def test_duckduckgo_provider_explicit(self): | |
| tool = _create_web_search_tool(provider="duckduckgo") | |
| assert isinstance(tool._provider, DuckDuckGoProvider) | |
| def test_ddg_alias(self): | |
| tool = _create_web_search_tool(provider="ddg") | |
| assert isinstance(tool._provider, DuckDuckGoProvider) | |
| def test_unknown_provider_falls_back_to_ddg(self): | |
| tool = _create_web_search_tool(provider="unknown_xyz") | |
| assert isinstance(tool._provider, DuckDuckGoProvider) | |
| def test_serper_provider_serper_api_key_param(self): | |
| tool = _create_web_search_tool(provider="serper", serper_api_key="key") | |
| assert isinstance(tool._provider, SerperProvider) | |
| def test_tavily_provider_tavily_api_key_param(self): | |
| tool = _create_web_search_tool(provider="tavily", tavily_api_key="key") | |
| assert isinstance(tool._provider, TavilyProvider) | |
| # ═══════════════════════════════════════════════════════════════ | |
| # SeleniumFetcher — initialization only (no real browser) | |
| # ═══════════════════════════════════════════════════════════════ | |
| class TestSeleniumFetcherInit: | |
| def test_init_defaults(self): | |
| from tools.web_search import SeleniumFetcher | |
| fetcher = SeleniumFetcher() | |
| assert fetcher._headless is True | |
| assert fetcher._browser == "chrome" | |
| assert fetcher._wait_timeout == 15 | |
| assert fetcher._page_load_timeout == 30 | |
| assert fetcher._scroll_to_bottom is False | |
| assert fetcher._driver is None | |
| def test_init_custom(self): | |
| from tools.web_search import SeleniumFetcher | |
| fetcher = SeleniumFetcher( | |
| headless=False, | |
| browser="firefox", | |
| wait_timeout=30, | |
| scroll_to_bottom=True, | |
| ) | |
| assert fetcher._headless is False | |
| assert fetcher._browser == "firefox" | |
| assert fetcher._wait_timeout == 30 | |
| assert fetcher._scroll_to_bottom is True | |
| def test_ensure_dependencies_import_error(self): | |
| from tools.web_search import SeleniumFetcher | |
| fetcher = SeleniumFetcher() | |
| with patch("builtins.__import__", side_effect=ImportError("No selenium")), pytest.raises(ImportError): | |
| fetcher._ensure_dependencies() | |
| def test_ensure_dependencies_with_selenium(self): | |
| """If selenium is available, _ensure_dependencies should not raise.""" | |
| from tools.web_search import SeleniumFetcher | |
| fetcher = SeleniumFetcher() | |
| try: | |
| import selenium # noqa: F401 | |
| fetcher._ensure_dependencies() # Should not raise | |
| except ImportError: | |
| pytest.skip("selenium not installed") | |
| def test_close_no_driver(self): | |
| from tools.web_search import SeleniumFetcher | |
| fetcher = SeleniumFetcher() | |
| fetcher.close() # Should not raise | |
| assert fetcher._driver is None | |
| def test_close_with_mock_driver(self): | |
| from tools.web_search import SeleniumFetcher | |
| fetcher = SeleniumFetcher() | |
| mock_driver = MagicMock() | |
| fetcher._driver = mock_driver | |
| fetcher.close() | |
| mock_driver.quit.assert_called_once() | |
| assert fetcher._driver is None | |
| def test_context_manager(self): | |
| from tools.web_search import SeleniumFetcher | |
| fetcher = SeleniumFetcher() | |
| with fetcher as f: | |
| assert f is fetcher | |
| def test_create_driver_invalid_browser(self): | |
| from tools.web_search import SeleniumFetcher | |
| try: | |
| import selenium # noqa: F401 | |
| except ImportError: | |
| pytest.skip("selenium not installed") | |
| fetcher = SeleniumFetcher(browser="ie") | |
| with pytest.raises(ValueError, match="Unsupported browser"): | |
| fetcher._create_driver() | |
| def test_get_driver_creates_if_none(self): | |
| from tools.web_search import SeleniumFetcher | |
| fetcher = SeleniumFetcher() | |
| with patch.object(fetcher, "_create_driver", return_value=MagicMock()): | |
| driver = fetcher._get_driver() | |
| assert driver is not None | |
| # ═══════════════════════════════════════════════════════════════ | |
| # URLFetcher — charset detection | |
| # ═══════════════════════════════════════════════════════════════ | |
| class TestURLFetcherCharset: | |
| def test_custom_charset_in_content_type(self): | |
| """Test charset extraction from content-type header.""" | |
| fetcher = URLFetcher() | |
| mock_html = "Hello World".encode("latin-1") | |
| mock_response = MagicMock() | |
| mock_response.__enter__ = lambda s: s | |
| mock_response.__exit__ = MagicMock(return_value=False) | |
| mock_response.headers.get = lambda key, default="": ( | |
| "text/html; charset=latin-1" if "Content-Type" in key else default | |
| ) | |
| mock_response.read.return_value = mock_html | |
| with patch("urllib.request.urlopen", return_value=mock_response): | |
| result = fetcher.fetch("https://example.com") | |
| assert result["success"] is True | |
| def test_unicode_decode_error_fallback(self): | |
| """Test fallback when charset decoding fails.""" | |
| fetcher = URLFetcher() | |
| mock_html = b"\xff\xfe Hello World" | |
| mock_response = MagicMock() | |
| mock_response.__enter__ = lambda s: s | |
| mock_response.__exit__ = MagicMock(return_value=False) | |
| mock_response.headers.get = lambda key, default="": ( | |
| "text/html; charset=utf-16" if "Content-Type" in key else default | |
| ) | |
| mock_response.read.return_value = mock_html | |
| with patch("urllib.request.urlopen", return_value=mock_response): | |
| result = fetcher.fetch("https://example.com") | |
| # Should succeed even if charset is tricky | |
| assert "success" in result | |
| def test_value_error_in_fetch(self): | |
| """Test handling of ValueError in fetch.""" | |
| fetcher = URLFetcher() | |
| with patch("urllib.request.urlopen", side_effect=ValueError("bad url")): | |
| result = fetcher.fetch("not-a-url") | |
| assert result["success"] is False | |
| def test_main_content_extraction(self): | |
| """Test that main/article content is extracted.""" | |
| fetcher = URLFetcher() | |
| html_content = b""" | |
| <html> | |
| <head><title>Test</title></head> | |
| <body> | |
| <nav>Navigation</nav> | |
| <main> | |
| """ + (b"Main content " * 50) + b""" | |
| </main> | |
| </body> | |
| </html> | |
| """ | |
| mock_response = MagicMock() | |
| mock_response.__enter__ = lambda s: s | |
| mock_response.__exit__ = MagicMock(return_value=False) | |
| mock_response.headers.get = lambda key, default="": ( | |
| "text/html; charset=utf-8" if "Content-Type" in key else default | |
| ) | |
| mock_response.read.return_value = html_content | |
| with patch("urllib.request.urlopen", return_value=mock_response): | |
| result = fetcher.fetch("https://example.com") | |
| assert result["success"] is True | |
| assert "Main content" in result["content"] | |
| # ═══════════════════════════════════════════════════════════════ | |
| # WebSearchTool — with mock selenium fetcher | |
| # ═══════════════════════════════════════════════════════════════ | |
| class TestWebSearchToolWithSelenium: | |
| """Tests for WebSearchTool when given a mocked selenium fetcher.""" | |
| def _make_mock_selenium_fetcher(self): | |
| from tools.web_search import SeleniumFetcher | |
| return MagicMock(spec=SeleniumFetcher) | |
| def test_init_with_selenium_fetcher(self): | |
| from tools.web_search import SeleniumFetcher | |
| mock_fetcher = MagicMock(spec=SeleniumFetcher) | |
| tool = WebSearchTool(selenium_fetcher=mock_fetcher) | |
| assert tool._use_selenium is True | |
| assert tool._selenium_fetcher is mock_fetcher | |
| def test_execute_fetch_with_selenium(self): | |
| from tools.web_search import SeleniumFetcher | |
| mock_fetcher = MagicMock(spec=SeleniumFetcher) | |
| mock_fetcher.fetch.return_value = { | |
| "success": True, | |
| "url": "https://example.com", | |
| "title": "Example", | |
| "content": "Content via Selenium", | |
| } | |
| tool = WebSearchTool(selenium_fetcher=mock_fetcher) | |
| result = tool.execute(url="https://example.com") | |
| assert result.success is True | |
| assert "Content via Selenium" in result.output | |
| def test_execute_click_with_selenium(self): | |
| from tools.web_search import SeleniumFetcher | |
| mock_fetcher = MagicMock(spec=SeleniumFetcher) | |
| mock_fetcher.click_element.return_value = { | |
| "success": True, | |
| "url": "https://example.com/next", | |
| "title": "Next Page", | |
| "clicked_text": "Submit", | |
| } | |
| tool = WebSearchTool(selenium_fetcher=mock_fetcher) | |
| result = tool.execute(action="click", selector=".submit-btn") | |
| assert result.success is True | |
| def test_execute_click_failed(self): | |
| from tools.web_search import SeleniumFetcher | |
| mock_fetcher = MagicMock(spec=SeleniumFetcher) | |
| mock_fetcher.click_element.return_value = { | |
| "success": False, | |
| "error": "Element not found", | |
| "url": "", | |
| "title": "", | |
| "clicked_text": "", | |
| } | |
| tool = WebSearchTool(selenium_fetcher=mock_fetcher) | |
| result = tool.execute(action="click", selector=".nonexistent") | |
| assert result.success is False | |
| def test_execute_fill_with_selenium(self): | |
| from tools.web_search import SeleniumFetcher | |
| mock_fetcher = MagicMock(spec=SeleniumFetcher) | |
| mock_fetcher.fill_input.return_value = { | |
| "success": True, | |
| "url": "https://example.com", | |
| "title": "Page", | |
| "error": "", | |
| } | |
| tool = WebSearchTool(selenium_fetcher=mock_fetcher) | |
| result = tool.execute(action="fill", selector="input[name=q]", value="test") | |
| assert result.success is True | |
| def test_execute_fill_no_selector_fails(self): | |
| from tools.web_search import SeleniumFetcher | |
| mock_fetcher = MagicMock(spec=SeleniumFetcher) | |
| tool = WebSearchTool(selenium_fetcher=mock_fetcher) | |
| result = tool.execute(action="fill", value="test") | |
| assert result.success is False | |
| def test_execute_extract_links_with_selenium(self): | |
| from tools.web_search import SeleniumFetcher | |
| mock_fetcher = MagicMock(spec=SeleniumFetcher) | |
| mock_fetcher.fetch.return_value = { | |
| "success": True, | |
| "url": "https://example.com", | |
| "title": "Example", | |
| "content": "", | |
| } | |
| mock_fetcher.extract_links.return_value = { | |
| "success": True, | |
| "url": "https://example.com", | |
| "links": [ | |
| {"url": "https://example.com/1", "text": "Link 1", "title": ""}, | |
| ], | |
| "count": 1, | |
| } | |
| tool = WebSearchTool(selenium_fetcher=mock_fetcher) | |
| result = tool.execute(action="extract_links", url="https://example.com") | |
| assert result.success is True | |
| def test_execute_execute_js_with_selenium(self): | |
| from tools.web_search import SeleniumFetcher | |
| mock_fetcher = MagicMock(spec=SeleniumFetcher) | |
| mock_fetcher.execute_js.return_value = { | |
| "success": True, | |
| "url": "https://example.com", | |
| "return_value": "document.title", | |
| "error": "", | |
| } | |
| tool = WebSearchTool(selenium_fetcher=mock_fetcher) | |
| result = tool.execute(action="execute_js", js_code="return document.title") | |
| assert result.success is True | |
| def test_execute_get_content_with_selenium(self): | |
| from tools.web_search import SeleniumFetcher | |
| mock_fetcher = MagicMock(spec=SeleniumFetcher) | |
| mock_fetcher.get_page_content.return_value = { | |
| "success": True, | |
| "url": "https://example.com", | |
| "title": "Example", | |
| "content": "Page content here", | |
| "error": "", | |
| } | |
| tool = WebSearchTool(selenium_fetcher=mock_fetcher) | |
| result = tool.execute(action="get_content") | |
| assert result.success is True | |
| def test_execute_crawl_with_selenium(self): | |
| from tools.web_search import SeleniumFetcher | |
| mock_fetcher = MagicMock(spec=SeleniumFetcher) | |
| mock_fetcher.crawl.return_value = { | |
| "success": True, | |
| "pages": [{"url": "https://example.com", "title": "Home", "depth": 0}], | |
| "total_pages": 1, | |
| "error": "", | |
| } | |
| tool = WebSearchTool(selenium_fetcher=mock_fetcher) | |
| result = tool.execute(action="crawl", url="https://example.com", max_depth=1, max_pages=5) | |
| assert result.success is True | |
| def test_execute_search_with_fetch_content_selenium(self): | |
| from tools.web_search import SeleniumFetcher | |
| mock_fetcher = MagicMock(spec=SeleniumFetcher) | |
| mock_fetcher.fetch.return_value = { | |
| "success": True, | |
| "url": "https://example.com", | |
| "title": "Example", | |
| "content": "Content here", | |
| } | |
| tool = WebSearchTool( | |
| provider=MockProvider(), | |
| selenium_fetcher=mock_fetcher, | |
| fetch_content=True, | |
| ) | |
| result = tool.execute(query="test") | |
| assert result.success is True | |
| def test_description_with_selenium(self): | |
| from tools.web_search import SeleniumFetcher | |
| mock_fetcher = MagicMock(spec=SeleniumFetcher) | |
| tool = WebSearchTool(selenium_fetcher=mock_fetcher) | |
| desc = tool.description | |
| assert "Selenium" in desc | |
| def test_close_with_selenium(self): | |
| from tools.web_search import SeleniumFetcher | |
| mock_fetcher = MagicMock(spec=SeleniumFetcher) | |
| tool = WebSearchTool(selenium_fetcher=mock_fetcher) | |
| tool.close() | |
| mock_fetcher.close.assert_called_once() | |
| # ═══════════════════════════════════════════════════════════════ | |
| # WebSearchTool — callback integration | |
| # ═══════════════════════════════════════════════════════════════ | |
| class TestWebSearchToolCallbacks: | |
| def test_emit_tool_start_with_callback_manager(self): | |
| from callbacks.base import BaseCallbackHandler | |
| from callbacks.manager import CallbackManager | |
| class RecordingCB(BaseCallbackHandler): | |
| def __init__(self): | |
| self.calls = [] | |
| def on_tool_start(self, *, run_id, tool_name, **kwargs): | |
| self.calls.append(("start", tool_name)) | |
| def on_tool_end(self, *, run_id, tool_name, **kwargs): | |
| self.calls.append(("end", tool_name)) | |
| cb = RecordingCB() | |
| manager = CallbackManager(handlers=[cb]) | |
| tool = WebSearchTool(provider=MockProvider(), callback_manager=manager) | |
| result = tool.execute(query="test") | |
| assert result.success is True | |
| assert any(c[0] == "start" for c in cb.calls) | |
| assert any(c[0] == "end" for c in cb.calls) | |
| def test_get_callback_manager_from_context(self): | |
| tool = WebSearchTool(provider=MockProvider()) | |
| # Without a callback manager, _get_callback_manager should return None or context manager | |
| cb = tool._get_callback_manager() | |
| # May be None if not in a callback context | |
| assert cb is None or hasattr(cb, "on_tool_start") | |
| def test_get_callback_manager_exception_returns_none(self, monkeypatch): | |
| """_get_callback_manager should return None on exception.""" | |
| tool = WebSearchTool(provider=MockProvider()) | |
| tool._callback_manager = None | |
| # Mock get_callback_manager to raise | |
| monkeypatch.setattr( | |
| "callbacks.context.get_callback_manager", | |
| lambda: (_ for _ in ()).throw(RuntimeError("error")), | |
| ) | |
| cb = tool._get_callback_manager() | |
| assert cb is None | |
| def test_emit_tool_error_with_callback_manager(self): | |
| """Test _emit_tool_error is called when callback manager is set.""" | |
| from callbacks.base import BaseCallbackHandler | |
| from callbacks.manager import CallbackManager | |
| class RecordingCB(BaseCallbackHandler): | |
| def __init__(self): | |
| self.errors = [] | |
| def on_tool_error(self, *, run_id, tool_name, **kwargs): | |
| self.errors.append(tool_name) | |
| cb_handler = RecordingCB() | |
| manager = CallbackManager(handlers=[cb_handler]) | |
| tool = WebSearchTool(provider=MockProvider(), callback_manager=manager) | |
| # Trigger an error by mocking provider.search to raise | |
| from unittest.mock import MagicMock | |
| mock_error = TimeoutError("timed out") | |
| tool._provider = MagicMock() | |
| tool._provider.search.side_effect = mock_error | |
| result = tool.execute(query="test") | |
| assert result.success is False | |
| assert len(cb_handler.errors) > 0 | |
| # ═══════════════════════════════════════════════════════════════ | |
| # WebSearchTool.execute — action routing edge cases | |
| # ═══════════════════════════════════════════════════════════════ | |
| class TestWebSearchToolExecuteActionRouting: | |
| """Test execute() auto-detection and edge cases.""" | |
| def test_auto_detect_click_from_selector(self): | |
| """Auto-detect action 'click' when selector is provided.""" | |
| tool = WebSearchTool(provider=MockProvider()) | |
| mock_selenium = MagicMock() | |
| mock_selenium.click_element.return_value = { | |
| "success": True, "url": "http://example.com", "title": "T", | |
| "content": "", "clicked_text": "Button" | |
| } | |
| tool._selenium_fetcher = mock_selenium | |
| tool._use_selenium = True | |
| result = tool.execute(selector="#btn") | |
| assert result.success is True | |
| def test_auto_detect_execute_js_from_js_code(self): | |
| """Auto-detect action 'execute_js' when js_code is provided.""" | |
| tool = WebSearchTool(provider=MockProvider()) | |
| mock_selenium = MagicMock() | |
| mock_selenium.execute_js.return_value = { | |
| "success": True, "url": "http://example.com", | |
| "return_value": 42 | |
| } | |
| tool._selenium_fetcher = mock_selenium | |
| tool._use_selenium = True | |
| result = tool.execute(js_code="return 42;") | |
| assert result.success is True | |
| def test_click_action_without_selector_returns_error(self): | |
| """Action 'click' without selector returns error.""" | |
| tool = WebSearchTool(provider=MockProvider()) | |
| result = tool.execute(action="click") | |
| assert result.success is False | |
| assert result.error is not None | |
| assert "selector" in result.error | |
| def test_fill_action_without_selector_returns_error(self): | |
| """Action 'fill' without selector returns error.""" | |
| tool = WebSearchTool(provider=MockProvider()) | |
| result = tool.execute(action="fill") | |
| assert result.success is False | |
| assert result.error is not None | |
| assert "selector" in result.error | |
| def test_execute_js_action_without_js_code_returns_error(self): | |
| """Action 'execute_js' without js_code returns error.""" | |
| tool = WebSearchTool(provider=MockProvider()) | |
| result = tool.execute(action="execute_js") | |
| assert result.success is False | |
| assert result.error is not None | |
| assert "js_code" in result.error | |
| def test_crawl_action_without_url_returns_error(self): | |
| """Action 'crawl' without url returns error.""" | |
| tool = WebSearchTool(provider=MockProvider()) | |
| result = tool.execute(action="crawl") | |
| assert result.success is False | |
| assert result.error is not None | |
| assert "url" in result.error | |
| def test_fetch_action_without_url_returns_error(self): | |
| """Action 'fetch' without url returns error.""" | |
| tool = WebSearchTool(provider=MockProvider()) | |
| result = tool.execute(action="fetch") | |
| assert result.success is False | |
| assert result.error is not None | |
| assert "url" in result.error | |
| def test_no_action_no_query_url_selector_js_code_returns_error(self): | |
| """No action, query, url, selector, or js_code returns error.""" | |
| tool = WebSearchTool(provider=MockProvider()) | |
| result = tool.execute() | |
| assert result.success is False | |
| assert result.error is not None | |
| assert "No action" in result.error | |
| def test_fetch_url_with_wait_for_selector(self): | |
| """_fetch_url with wait_for_selector uses fetch_with_wait.""" | |
| from tools.web_search import SeleniumFetcher | |
| tool = WebSearchTool(provider=MockProvider()) | |
| mock_selenium = MagicMock(spec=SeleniumFetcher) | |
| mock_selenium.fetch_with_wait.return_value = { | |
| "success": True, "title": "Test", "content": "content" | |
| } | |
| tool._selenium_fetcher = mock_selenium | |
| tool._use_selenium = True | |
| result = tool.execute(url="https://example.com", wait_for_selector="#main") | |
| assert result.success is True | |
| mock_selenium.fetch_with_wait.assert_called_once() | |
| def test_fetch_url_exception_returns_error(self): | |
| """_fetch_url exception returns error.""" | |
| from unittest.mock import patch | |
| tool = WebSearchTool(provider=MockProvider()) | |
| with patch.object(tool, "_get_active_fetcher") as mock_fetcher: | |
| mock_fetcher.return_value.fetch.side_effect = RuntimeError("connection failed") | |
| result = tool.execute(url="https://example.com") | |
| assert result.success is False | |
| def test_execute_fill_fail(self): | |
| """Fill action when fill fails.""" | |
| tool = WebSearchTool(provider=MockProvider()) | |
| mock_selenium = MagicMock() | |
| mock_selenium.fill_input.return_value = { | |
| "success": False, "error": "element not found" | |
| } | |
| tool._selenium_fetcher = mock_selenium | |
| tool._use_selenium = True | |
| result = tool.execute(action="fill", selector="#input", value="test") | |
| assert result.success is False | |
| assert result.error is not None | |
| assert "element not found" in result.error | |
| def test_execute_extract_links_fetch_fail(self): | |
| """Extract links when URL fetch fails.""" | |
| tool = WebSearchTool(provider=MockProvider()) | |
| mock_selenium = MagicMock() | |
| mock_selenium.fetch.return_value = { | |
| "success": False, "error": "page not found" | |
| } | |
| tool._selenium_fetcher = mock_selenium | |
| tool._use_selenium = True | |
| result = tool.execute(action="extract_links", url="https://example.com") | |
| assert result.success is False | |
| assert result.error is not None | |
| assert "page not found" in result.error | |
| def test_execute_extract_links_extract_fail(self): | |
| """Extract links when link extraction fails.""" | |
| tool = WebSearchTool(provider=MockProvider()) | |
| mock_selenium = MagicMock() | |
| mock_selenium.fetch.return_value = {"success": True, "content": ""} | |
| mock_selenium.extract_links.return_value = { | |
| "success": False, "error": "extraction failed" | |
| } | |
| tool._selenium_fetcher = mock_selenium | |
| tool._use_selenium = True | |
| result = tool.execute(action="extract_links") | |
| assert result.success is False | |
| def test_execute_extract_links_with_title(self): | |
| """Extract links with link that has title.""" | |
| tool = WebSearchTool(provider=MockProvider()) | |
| mock_selenium = MagicMock() | |
| mock_selenium.extract_links.return_value = { | |
| "success": True, | |
| "url": "http://example.com", | |
| "count": 1, | |
| "links": [{"url": "http://example.com/page", "text": "link", "title": "Page Title"}] | |
| } | |
| tool._selenium_fetcher = mock_selenium | |
| tool._use_selenium = True | |
| result = tool.execute(action="extract_links") | |
| assert result.success is True | |
| assert "Page Title" in result.output | |
| def test_execute_js_fail(self): | |
| """Execute JS when JS fails.""" | |
| tool = WebSearchTool(provider=MockProvider()) | |
| mock_selenium = MagicMock() | |
| mock_selenium.execute_js.return_value = { | |
| "success": False, "error": "js error" | |
| } | |
| tool._selenium_fetcher = mock_selenium | |
| tool._use_selenium = True | |
| result = tool.execute(action="execute_js", js_code="throw Error()") | |
| assert result.success is False | |
| def test_execute_js_no_return_value(self): | |
| """Execute JS with no return value.""" | |
| tool = WebSearchTool(provider=MockProvider()) | |
| mock_selenium = MagicMock() | |
| mock_selenium.execute_js.return_value = { | |
| "success": True, "url": "http://example.com", "return_value": None | |
| } | |
| tool._selenium_fetcher = mock_selenium | |
| tool._use_selenium = True | |
| result = tool.execute(action="execute_js", js_code="document.title = 'test'") | |
| assert result.success is True | |
| assert "no return value" in result.output | |
| def test_execute_crawl_fail(self): | |
| """Crawl action when crawl fails.""" | |
| tool = WebSearchTool(provider=MockProvider()) | |
| mock_selenium = MagicMock() | |
| mock_selenium.crawl.return_value = { | |
| "success": False, "error": "crawl failed", "pages": [] | |
| } | |
| tool._selenium_fetcher = mock_selenium | |
| tool._use_selenium = True | |
| result = tool.execute(action="crawl", url="http://example.com") | |
| assert result.success is False | |
| def test_execute_crawl_with_content_truncation(self): | |
| """Crawl result with content exceeding max_content_length.""" | |
| tool = WebSearchTool(provider=MockProvider(), max_content_length=10) | |
| mock_selenium = MagicMock() | |
| long_content = "x" * 100 | |
| mock_selenium.crawl.return_value = { | |
| "success": True, | |
| "total_pages": 1, | |
| "pages": [{"url": "http://example.com", "title": "T", "depth": 0, | |
| "content": long_content, "links_found": 0}], | |
| "error": None, | |
| } | |
| tool._selenium_fetcher = mock_selenium | |
| tool._use_selenium = True | |
| result = tool.execute(action="crawl", url="http://example.com") | |
| assert result.success is True | |
| assert "truncated" in result.output | |
| def test_execute_crawl_with_error_warning(self): | |
| """Crawl result with partial error.""" | |
| tool = WebSearchTool(provider=MockProvider()) | |
| mock_selenium = MagicMock() | |
| mock_selenium.crawl.return_value = { | |
| "success": True, | |
| "total_pages": 1, | |
| "pages": [{"url": "http://example.com", "title": "T", "depth": 0, | |
| "content": "content", "links_found": 0}], | |
| "error": "some pages failed", | |
| } | |
| tool._selenium_fetcher = mock_selenium | |
| tool._use_selenium = True | |
| result = tool.execute(action="crawl", url="http://example.com") | |
| assert result.success is True | |
| assert "warning" in result.output.lower() or "some pages failed" in result.output | |
| def test_execute_get_content_fail(self): | |
| """Get content when it fails.""" | |
| tool = WebSearchTool(provider=MockProvider()) | |
| mock_selenium = MagicMock() | |
| mock_selenium.get_page_content.return_value = { | |
| "success": False, "error": "driver not ready" | |
| } | |
| tool._selenium_fetcher = mock_selenium | |
| tool._use_selenium = True | |
| result = tool.execute(action="get_content") | |
| assert result.success is False | |
| def test_execute_search_timeout_error(self): | |
| """Search with TimeoutError.""" | |
| tool = WebSearchTool(provider=MockProvider()) | |
| tool._provider = MagicMock() | |
| tool._provider.search.side_effect = TimeoutError("timed out") | |
| result = tool.execute(query="test") | |
| assert result.success is False | |
| assert result.error is not None | |
| assert "timed out" in result.error.lower() | |
| def test_execute_search_urlerror(self): | |
| """Search with URLError.""" | |
| import urllib.error | |
| tool = WebSearchTool(provider=MockProvider()) | |
| tool._provider = MagicMock() | |
| tool._provider.search.side_effect = urllib.error.URLError("network error") | |
| result = tool.execute(query="test") | |
| assert result.success is False | |
| def test_execute_search_with_fetched_title(self): | |
| """Search result gets title from fetched content.""" | |
| tool = WebSearchTool(provider=MockProvider(results=[ | |
| {"title": "", "url": "https://example.com/1", "snippet": "Snippet"}, | |
| ]), fetch_content=True) | |
| with patch.object(tool, "_fetch_page_content", return_value={ | |
| "title": "Fetched Title", "content": "Page content" | |
| }): | |
| result = tool.execute(query="test") | |
| assert result.success is True | |