gMAS / tests /test_web_search.py
Артём Боярских
chore: initial commit
3193174
"""Tests for src/tools/web_search.py"""
import email.message
import json
import urllib.error
from unittest.mock import MagicMock, patch
import pytest
from tools.web_search import (
DuckDuckGoProvider,
SearchProvider,
SerperProvider,
SimpleHTMLParser,
TavilyProvider,
URLFetcher,
WebSearchTool,
_create_web_search_tool,
)
# ═══════════════════════════════════════════════════════════════
# Helpers
# ═══════════════════════════════════════════════════════════════
class MockProvider(SearchProvider):
"""Mock search provider that returns canned results."""
def __init__(self, results: list[dict[str, str]] | None = None):
self._results = results or [
{"title": "Result 1", "url": "https://example.com/1", "snippet": "Snippet 1"},
{"title": "Result 2", "url": "https://example.com/2", "snippet": "Snippet 2"},
]
def search(self, query: str, max_results: int = 5) -> list[dict[str, str]]:
return self._results[:max_results]
class EmptyProvider(SearchProvider):
def search(self, query: str, max_results: int = 5) -> list[dict[str, str]]:
return []
# ═══════════════════════════════════════════════════════════════
# SimpleHTMLParser
# ═══════════════════════════════════════════════════════════════
class TestSimpleHTMLParser:
def test_empty_input(self):
result = SimpleHTMLParser.html_to_text("")
assert result == ""
def test_plain_text(self):
result = SimpleHTMLParser.html_to_text("Hello World")
assert "Hello World" in result
def test_strips_tags(self):
html = "<p>Hello <b>World</b></p>"
result = SimpleHTMLParser.html_to_text(html)
assert "Hello" in result
assert "World" in result
assert "<" not in result
assert ">" not in result
def test_removes_script(self):
html = "<html><script>alert('xss')</script><p>Safe text</p></html>"
result = SimpleHTMLParser.html_to_text(html)
assert "alert" not in result
assert "Safe text" in result
def test_removes_style(self):
html = "<html><style>.class { color: red; }</style><p>Visible</p></html>"
result = SimpleHTMLParser.html_to_text(html)
assert ".class" not in result
assert "Visible" in result
def test_removes_comments(self):
html = "<!-- This is a comment -->Hello"
result = SimpleHTMLParser.html_to_text(html)
assert "comment" not in result
assert "Hello" in result
def test_handles_html_entities(self):
html = "Fish &amp; Chips"
result = SimpleHTMLParser.html_to_text(html)
assert "&amp;" not in result
assert "Fish & Chips" in result
def test_max_length_truncation(self):
html = "x" * 10000
result = SimpleHTMLParser.html_to_text(html, max_length=100)
assert "(content truncated)" in result
def test_block_tags_add_newlines(self):
html = "<p>Para 1</p><p>Para 2</p>"
result = SimpleHTMLParser.html_to_text(html)
assert "Para 1" in result
assert "Para 2" in result
def test_heading_tags(self):
html = "<h1>Title</h1><h2>Subtitle</h2><p>Content</p>"
result = SimpleHTMLParser.html_to_text(html)
assert "Title" in result
assert "Subtitle" in result
assert "Content" in result
def test_nested_tags(self):
html = "<div><article><p>Article text</p></article></div>"
result = SimpleHTMLParser.html_to_text(html)
assert "Article text" in result
def test_no_remaining_tags(self):
html = "<div class='foo'><span id='bar'>text</span></div>"
result = SimpleHTMLParser.html_to_text(html)
assert "<" not in result
assert ">" not in result
def test_removes_nav_header_footer(self):
html = "<nav>Navigation</nav><main>Content</main><footer>Footer</footer>"
result = SimpleHTMLParser.html_to_text(html)
assert "Navigation" not in result
assert "Content" in result
assert "Footer" not in result
def test_list_items(self):
html = "<ul><li>Item 1</li><li>Item 2</li></ul>"
result = SimpleHTMLParser.html_to_text(html)
assert "Item 1" in result
assert "Item 2" in result
def test_case_insensitive_removes_script(self):
html = "<SCRIPT>bad code</SCRIPT><P>Good text</P>"
result = SimpleHTMLParser.html_to_text(html)
assert "bad code" not in result
assert "Good text" in result
def test_br_tags(self):
html = "Line 1<br>Line 2<br/>Line 3"
result = SimpleHTMLParser.html_to_text(html)
assert "Line 1" in result
assert "Line 2" in result
assert "Line 3" in result
def test_multiple_spaces_collapsed(self):
html = "<p>word1 word2 word3</p>"
result = SimpleHTMLParser.html_to_text(html)
# Collapsed spaces
assert "word1" in result
assert "word2" in result
assert "word3" in result
assert "word1 word2" not in result
# ═══════════════════════════════════════════════════════════════
# URLFetcher
# ═══════════════════════════════════════════════════════════════
class TestURLFetcher:
def test_init_defaults(self):
fetcher = URLFetcher()
assert fetcher._timeout == 15
assert fetcher._max_content_length == 500_000
def test_init_custom(self):
fetcher = URLFetcher(timeout=30, max_content_length=100_000)
assert fetcher._timeout == 30
assert fetcher._max_content_length == 100_000
def test_fetch_fails_for_nonexistent_host(self):
fetcher = URLFetcher(timeout=2)
result = fetcher.fetch("http://this-should-not-exist-xyz.invalid/")
assert isinstance(result, dict)
assert result["success"] is False
assert result["url"] == "http://this-should-not-exist-xyz.invalid/"
def test_fetch_result_keys(self):
fetcher = URLFetcher(timeout=1)
result = fetcher.fetch("http://nope.invalid")
for key in ("success", "url", "title", "content", "error"):
assert key in result
def test_fetch_success_mock(self):
fetcher = URLFetcher()
mock_html = b"<html><title>Test Page</title><body><p>Hello World</p></body></html>"
mock_response = MagicMock()
mock_response.__enter__ = lambda s: s
mock_response.__exit__ = MagicMock(return_value=False)
mock_response.headers.get = lambda key, default="": (
"text/html; charset=utf-8" if "Content-Type" in key else default
)
mock_response.read.return_value = mock_html
with patch("urllib.request.urlopen", return_value=mock_response):
result = fetcher.fetch("https://example.com")
assert result["success"] is True
assert "Hello World" in result["content"]
assert result["title"] == "Test Page"
def test_fetch_unsupported_content_type(self):
fetcher = URLFetcher()
mock_response = MagicMock()
mock_response.__enter__ = lambda s: s
mock_response.__exit__ = MagicMock(return_value=False)
mock_response.headers.get = lambda key, default="": (
"application/pdf" if "Content-Type" in key else default
)
with patch("urllib.request.urlopen", return_value=mock_response):
result = fetcher.fetch("https://example.com/file.pdf")
assert result["success"] is False
assert "Unsupported content type" in result["error"]
def test_fetch_http_error(self):
fetcher = URLFetcher()
with patch("urllib.request.urlopen", side_effect=urllib.error.HTTPError(
url="http://example.com", code=404, msg="Not Found", hdrs=email.message.Message(), fp=None
)):
result = fetcher.fetch("http://example.com")
assert result["success"] is False
assert "HTTP Error 404" in result["error"]
def test_fetch_url_error(self):
fetcher = URLFetcher()
with patch("urllib.request.urlopen", side_effect=urllib.error.URLError("Name resolution failed")):
result = fetcher.fetch("http://doesnotexist.invalid")
assert result["success"] is False
assert "URL Error" in result["error"]
def test_fetch_timeout(self):
fetcher = URLFetcher(timeout=1)
with patch("urllib.request.urlopen", side_effect=TimeoutError("timed out")):
result = fetcher.fetch("http://example.com")
assert result["success"] is False
assert "timed out" in result["error"].lower()
def test_fetch_text_plain_content_type(self):
fetcher = URLFetcher()
mock_html = b"Hello plain text"
mock_response = MagicMock()
mock_response.__enter__ = lambda s: s
mock_response.__exit__ = MagicMock(return_value=False)
mock_response.headers.get = lambda key, default="": (
"text/plain" if "Content-Type" in key else default
)
mock_response.read.return_value = mock_html
with patch("urllib.request.urlopen", return_value=mock_response):
result = fetcher.fetch("https://example.com/text.txt")
assert result["success"] is True
assert "Hello plain text" in result["content"]
# ═══════════════════════════════════════════════════════════════
# DuckDuckGoProvider
# ═══════════════════════════════════════════════════════════════
class TestDuckDuckGoProvider:
def test_init_defaults(self):
provider = DuckDuckGoProvider()
assert provider._timeout == 10
def test_init_custom(self):
provider = DuckDuckGoProvider(timeout=30)
assert provider._timeout == 30
def test_search_with_abstract(self):
provider = DuckDuckGoProvider()
ddg_response = {
"Heading": "Python",
"Abstract": "A programming language",
"AbstractURL": "https://python.org",
"RelatedTopics": [],
"Results": [],
}
mock_response = MagicMock()
mock_response.__enter__ = lambda s: s
mock_response.__exit__ = MagicMock(return_value=False)
mock_response.read.return_value = json.dumps(ddg_response).encode("utf-8")
with patch("urllib.request.urlopen", return_value=mock_response):
results = provider.search("python", max_results=5)
assert len(results) == 1
assert results[0]["title"] == "Python"
assert results[0]["snippet"] == "A programming language"
def test_search_with_related_topics(self):
provider = DuckDuckGoProvider()
ddg_response = {
"Heading": "",
"Abstract": "",
"AbstractURL": "",
"RelatedTopics": [
{"Text": "Result 1", "FirstURL": "https://example.com/1"},
{"Text": "Result 2", "FirstURL": "https://example.com/2"},
],
"Results": [],
}
mock_response = MagicMock()
mock_response.__enter__ = lambda s: s
mock_response.__exit__ = MagicMock(return_value=False)
mock_response.read.return_value = json.dumps(ddg_response).encode("utf-8")
with patch("urllib.request.urlopen", return_value=mock_response):
results = provider.search("test", max_results=5)
assert len(results) == 2
def test_search_network_error_returns_empty(self):
provider = DuckDuckGoProvider()
with patch("urllib.request.urlopen", side_effect=urllib.error.URLError("failed")):
results = provider.search("test")
assert results == []
def test_search_excludes_non_dict_topics(self):
provider = DuckDuckGoProvider()
ddg_response = {
"Heading": "",
"Abstract": "",
"AbstractURL": "",
"RelatedTopics": [
"not a dict",
{"Text": "", "FirstURL": ""},
{"Text": "Valid topic", "FirstURL": "https://valid.com"},
],
"Results": [],
}
mock_response = MagicMock()
mock_response.__enter__ = lambda s: s
mock_response.__exit__ = MagicMock(return_value=False)
mock_response.read.return_value = json.dumps(ddg_response).encode("utf-8")
with patch("urllib.request.urlopen", return_value=mock_response):
results = provider.search("test", max_results=10)
assert any(r["snippet"] == "Valid topic" for r in results)
def test_search_respects_max_results(self):
provider = DuckDuckGoProvider()
ddg_response = {
"Heading": "",
"Abstract": "",
"AbstractURL": "",
"RelatedTopics": [{"Text": f"Topic {i}", "FirstURL": f"https://ex.com/{i}"} for i in range(10)],
"Results": [],
}
mock_response = MagicMock()
mock_response.__enter__ = lambda s: s
mock_response.__exit__ = MagicMock(return_value=False)
mock_response.read.return_value = json.dumps(ddg_response).encode("utf-8")
with patch("urllib.request.urlopen", return_value=mock_response):
results = provider.search("test", max_results=3)
assert len(results) <= 3
# ═══════════════════════════════════════════════════════════════
# SerperProvider
# ═══════════════════════════════════════════════════════════════
class TestSerperProvider:
def test_init(self):
provider = SerperProvider(api_key="test-key")
assert provider._api_key == "test-key"
assert provider._timeout == 10
def test_search_organic(self):
provider = SerperProvider(api_key="test-key")
serper_response = {
"organic": [
{"title": "Result 1", "link": "https://example.com/1", "snippet": "Snippet 1"},
{"title": "Result 2", "link": "https://example.com/2", "snippet": "Snippet 2"},
]
}
mock_response = MagicMock()
mock_response.__enter__ = lambda s: s
mock_response.__exit__ = MagicMock(return_value=False)
mock_response.read.return_value = json.dumps(serper_response).encode("utf-8")
with patch("urllib.request.urlopen", return_value=mock_response):
results = provider.search("test query", max_results=5)
assert len(results) == 2
assert results[0]["title"] == "Result 1"
def test_search_with_answer_box(self):
provider = SerperProvider(api_key="test-key")
serper_response = {
"organic": [
{"title": "Result", "link": "https://example.com", "snippet": "info"}
],
"answerBox": {
"title": "Direct Answer",
"link": "https://answer.com",
"answer": "The answer is 42",
}
}
mock_response = MagicMock()
mock_response.__enter__ = lambda s: s
mock_response.__exit__ = MagicMock(return_value=False)
mock_response.read.return_value = json.dumps(serper_response).encode("utf-8")
with patch("urllib.request.urlopen", return_value=mock_response):
results = provider.search("query", max_results=5)
# Answer box should be inserted at position 0
assert results[0]["title"] == "Direct Answer"
def test_search_network_error_returns_empty(self):
provider = SerperProvider(api_key="key")
with patch("urllib.request.urlopen", side_effect=urllib.error.URLError("failed")):
results = provider.search("test")
assert results == []
# ═══════════════════════════════════════════════════════════════
# TavilyProvider
# ═══════════════════════════════════════════════════════════════
class TestTavilyProvider:
def test_init(self):
provider = TavilyProvider(api_key="tavily-key")
assert provider._api_key == "tavily-key"
assert provider._include_answer is True
def test_search_with_answer(self):
provider = TavilyProvider(api_key="key")
tavily_response = {
"answer": "The answer is 42",
"results": [
{"title": "Page 1", "url": "https://example.com/1", "content": "Content 1"},
{"title": "Page 2", "url": "https://example.com/2", "content": "Content 2"},
]
}
mock_response = MagicMock()
mock_response.__enter__ = lambda s: s
mock_response.__exit__ = MagicMock(return_value=False)
mock_response.read.return_value = json.dumps(tavily_response).encode("utf-8")
with patch("urllib.request.urlopen", return_value=mock_response):
results = provider.search("test", max_results=5)
assert results[0]["title"] == "Tavily AI Answer"
assert len(results) == 3
def test_search_no_answer(self):
provider = TavilyProvider(api_key="key")
tavily_response = {
"results": [
{"title": "Page", "url": "https://example.com", "content": "Content"}
]
}
mock_response = MagicMock()
mock_response.__enter__ = lambda s: s
mock_response.__exit__ = MagicMock(return_value=False)
mock_response.read.return_value = json.dumps(tavily_response).encode("utf-8")
with patch("urllib.request.urlopen", return_value=mock_response):
results = provider.search("test", max_results=5)
assert len(results) == 1
assert results[0]["title"] == "Page"
def test_search_network_error_returns_empty(self):
provider = TavilyProvider(api_key="key")
with patch("urllib.request.urlopen", side_effect=urllib.error.URLError("failed")):
results = provider.search("test")
assert results == []
# ═══════════════════════════════════════════════════════════════
# WebSearchTool
# ═══════════════════════════════════════════════════════════════
class TestWebSearchToolInit:
def test_init_default_provider(self):
tool = WebSearchTool()
assert isinstance(tool._provider, DuckDuckGoProvider)
def test_init_custom_provider(self):
provider = MockProvider()
tool = WebSearchTool(provider=provider)
assert tool._provider is provider
def test_name_property(self):
tool = WebSearchTool()
assert tool.name == "web_search"
def test_description_property(self):
tool = WebSearchTool()
desc = tool.description
assert isinstance(desc, str)
assert len(desc) > 0
def test_parameters_schema(self):
tool = WebSearchTool()
schema = tool.parameters_schema
assert schema["type"] == "object"
assert "query" in schema["properties"]
assert "url" in schema["properties"]
assert "action" in schema["properties"]
def test_parameters_schema_with_selenium(self):
tool = WebSearchTool()
tool._use_selenium = True
schema = tool.parameters_schema
assert "selector" in schema["properties"]
assert "js_code" in schema["properties"]
class TestWebSearchToolExecute:
def test_execute_search_with_query(self):
tool = WebSearchTool(provider=MockProvider())
result = tool.execute(query="python programming")
assert result.success is True
assert "Result 1" in result.output
def test_execute_no_action_no_query_no_url(self):
tool = WebSearchTool(provider=MockProvider())
result = tool.execute()
assert result.success is False
assert result.error
def test_execute_search_empty_results(self):
tool = WebSearchTool(provider=EmptyProvider())
result = tool.execute(query="something obscure")
assert result.success is True
assert "No results found" in result.output
def test_execute_fetch_with_url(self):
tool = WebSearchTool(provider=MockProvider())
mock_fetch_result = {
"success": True,
"url": "https://example.com",
"title": "Test",
"content": "Test content here",
}
with patch.object(tool._fetcher, "fetch", return_value=mock_fetch_result):
result = tool.execute(url="https://example.com")
assert result.success is True
assert "Test content here" in result.output
def test_execute_fetch_action_no_url(self):
tool = WebSearchTool(provider=MockProvider())
result = tool.execute(action="fetch")
assert result.success is False
assert result.error
def test_execute_fetch_action_with_url(self):
tool = WebSearchTool(provider=MockProvider())
mock_fetch_result = {
"success": True,
"url": "https://example.com",
"title": "Page",
"content": "Page content",
}
with patch.object(tool._fetcher, "fetch", return_value=mock_fetch_result):
result = tool.execute(action="fetch", url="https://example.com")
assert result.success is True
def test_execute_fetch_failure(self):
tool = WebSearchTool(provider=MockProvider())
mock_fetch_result = {
"success": False,
"url": "https://example.com",
"title": "",
"content": "",
"error": "Connection refused",
}
with patch.object(tool._fetcher, "fetch", return_value=mock_fetch_result):
result = tool.execute(action="fetch", url="https://example.com")
assert result.success is False
def test_execute_click_without_selenium(self):
tool = WebSearchTool(provider=MockProvider())
result = tool.execute(action="click", selector=".button")
assert result.success is False
assert result.error is not None
assert "Selenium" in result.error
def test_execute_fill_without_selenium(self):
tool = WebSearchTool(provider=MockProvider())
result = tool.execute(action="fill", selector="input", value="test")
assert result.success is False
assert result.error is not None
assert "Selenium" in result.error
def test_execute_extract_links_without_selenium(self):
tool = WebSearchTool(provider=MockProvider())
result = tool.execute(action="extract_links")
assert result.success is False
assert result.error is not None
assert "Selenium" in result.error
def test_execute_execute_js_without_selenium(self):
tool = WebSearchTool(provider=MockProvider())
result = tool.execute(action="execute_js", js_code="return 1")
assert result.success is False
assert result.error is not None
assert "Selenium" in result.error
def test_execute_execute_js_no_code(self):
tool = WebSearchTool(provider=MockProvider())
result = tool.execute(action="execute_js")
assert result.success is False
def test_execute_crawl_without_url(self):
tool = WebSearchTool(provider=MockProvider())
result = tool.execute(action="crawl")
assert result.success is False
def test_execute_crawl_without_selenium(self):
tool = WebSearchTool(provider=MockProvider())
result = tool.execute(action="crawl", url="https://example.com")
assert result.success is False
assert result.error is not None
assert "Selenium" in result.error
def test_execute_get_content_without_selenium(self):
tool = WebSearchTool(provider=MockProvider())
result = tool.execute(action="get_content")
assert result.success is False
def test_execute_search_action_explicit(self):
tool = WebSearchTool(provider=MockProvider())
result = tool.execute(action="search", query="test")
assert result.success is True
def test_execute_search_action_no_query(self):
tool = WebSearchTool(provider=MockProvider())
result = tool.execute(action="search")
assert result.success is False
def test_execute_with_fetch_content(self):
provider = MockProvider()
tool = WebSearchTool(provider=provider, fetch_content=True)
mock_fetch_result = {
"success": True,
"url": "https://example.com/1",
"title": "Example",
"content": "Page content here for fetching",
}
with patch.object(tool._fetcher, "fetch", return_value=mock_fetch_result):
result = tool.execute(query="test query")
assert result.success is True
def test_execute_max_results_clipped(self):
"""max_results is capped at 10 in the implementation."""
tool = WebSearchTool(provider=MockProvider())
result = tool.execute(query="test", max_results=100)
assert result.success is True
class TestWebSearchToolFormatting:
def test_format_search_results_empty(self):
tool = WebSearchTool(provider=MockProvider())
formatted = tool._format_search_results([])
assert "No results found" in formatted
def test_format_search_results_with_results(self):
tool = WebSearchTool(provider=MockProvider())
results = [
{"title": "Test Title", "url": "https://example.com", "snippet": "A snippet"},
]
formatted = tool._format_search_results(results)
assert "Test Title" in formatted
assert "https://example.com" in formatted
assert "A snippet" in formatted
def test_format_search_results_with_content(self):
tool = WebSearchTool(provider=MockProvider(), max_content_length=200)
results = [
{"title": "Title", "url": "https://example.com", "snippet": "snap", "content": "Page content"},
]
formatted = tool._format_search_results(results, with_content=True)
assert "Page content" in formatted
def test_format_search_results_content_truncated(self):
tool = WebSearchTool(provider=MockProvider(), max_content_length=5)
results = [
{"title": "T", "url": "https://x.com", "snippet": "", "content": "A" * 100},
]
formatted = tool._format_search_results(results, with_content=True)
assert "truncated" in formatted.lower()
class TestWebSearchToolSeleniumCheck:
def test_require_selenium_raises_without_selenium(self):
tool = WebSearchTool(provider=MockProvider())
assert tool._selenium_fetcher is None
with pytest.raises(RuntimeError, match="Selenium"):
tool._require_selenium("click")
def test_context_manager(self):
tool = WebSearchTool(provider=MockProvider())
with tool as t:
assert t is tool
def test_close_without_selenium(self):
tool = WebSearchTool(provider=MockProvider())
tool.close() # Should not raise
class TestCreateWebSearchToolFactory:
def test_default_provider(self):
tool = _create_web_search_tool()
assert isinstance(tool._provider, DuckDuckGoProvider)
def test_serper_provider_with_key(self):
tool = _create_web_search_tool(provider="serper", api_key="my-key")
assert isinstance(tool._provider, SerperProvider)
def test_serper_provider_no_key_falls_back_to_ddg(self):
tool = _create_web_search_tool(provider="serper")
assert isinstance(tool._provider, DuckDuckGoProvider)
def test_tavily_provider_with_key(self):
tool = _create_web_search_tool(provider="tavily", api_key="my-key")
assert isinstance(tool._provider, TavilyProvider)
def test_tavily_provider_no_key_falls_back_to_ddg(self):
tool = _create_web_search_tool(provider="tavily")
assert isinstance(tool._provider, DuckDuckGoProvider)
def test_duckduckgo_provider_explicit(self):
tool = _create_web_search_tool(provider="duckduckgo")
assert isinstance(tool._provider, DuckDuckGoProvider)
def test_ddg_alias(self):
tool = _create_web_search_tool(provider="ddg")
assert isinstance(tool._provider, DuckDuckGoProvider)
def test_unknown_provider_falls_back_to_ddg(self):
tool = _create_web_search_tool(provider="unknown_xyz")
assert isinstance(tool._provider, DuckDuckGoProvider)
def test_serper_provider_serper_api_key_param(self):
tool = _create_web_search_tool(provider="serper", serper_api_key="key")
assert isinstance(tool._provider, SerperProvider)
def test_tavily_provider_tavily_api_key_param(self):
tool = _create_web_search_tool(provider="tavily", tavily_api_key="key")
assert isinstance(tool._provider, TavilyProvider)
# ═══════════════════════════════════════════════════════════════
# SeleniumFetcher — initialization only (no real browser)
# ═══════════════════════════════════════════════════════════════
class TestSeleniumFetcherInit:
def test_init_defaults(self):
from tools.web_search import SeleniumFetcher
fetcher = SeleniumFetcher()
assert fetcher._headless is True
assert fetcher._browser == "chrome"
assert fetcher._wait_timeout == 15
assert fetcher._page_load_timeout == 30
assert fetcher._scroll_to_bottom is False
assert fetcher._driver is None
def test_init_custom(self):
from tools.web_search import SeleniumFetcher
fetcher = SeleniumFetcher(
headless=False,
browser="firefox",
wait_timeout=30,
scroll_to_bottom=True,
)
assert fetcher._headless is False
assert fetcher._browser == "firefox"
assert fetcher._wait_timeout == 30
assert fetcher._scroll_to_bottom is True
def test_ensure_dependencies_import_error(self):
from tools.web_search import SeleniumFetcher
fetcher = SeleniumFetcher()
with patch("builtins.__import__", side_effect=ImportError("No selenium")), pytest.raises(ImportError):
fetcher._ensure_dependencies()
def test_ensure_dependencies_with_selenium(self):
"""If selenium is available, _ensure_dependencies should not raise."""
from tools.web_search import SeleniumFetcher
fetcher = SeleniumFetcher()
try:
import selenium # noqa: F401
fetcher._ensure_dependencies() # Should not raise
except ImportError:
pytest.skip("selenium not installed")
def test_close_no_driver(self):
from tools.web_search import SeleniumFetcher
fetcher = SeleniumFetcher()
fetcher.close() # Should not raise
assert fetcher._driver is None
def test_close_with_mock_driver(self):
from tools.web_search import SeleniumFetcher
fetcher = SeleniumFetcher()
mock_driver = MagicMock()
fetcher._driver = mock_driver
fetcher.close()
mock_driver.quit.assert_called_once()
assert fetcher._driver is None
def test_context_manager(self):
from tools.web_search import SeleniumFetcher
fetcher = SeleniumFetcher()
with fetcher as f:
assert f is fetcher
def test_create_driver_invalid_browser(self):
from tools.web_search import SeleniumFetcher
try:
import selenium # noqa: F401
except ImportError:
pytest.skip("selenium not installed")
fetcher = SeleniumFetcher(browser="ie")
with pytest.raises(ValueError, match="Unsupported browser"):
fetcher._create_driver()
def test_get_driver_creates_if_none(self):
from tools.web_search import SeleniumFetcher
fetcher = SeleniumFetcher()
with patch.object(fetcher, "_create_driver", return_value=MagicMock()):
driver = fetcher._get_driver()
assert driver is not None
# ═══════════════════════════════════════════════════════════════
# URLFetcher — charset detection
# ═══════════════════════════════════════════════════════════════
class TestURLFetcherCharset:
def test_custom_charset_in_content_type(self):
"""Test charset extraction from content-type header."""
fetcher = URLFetcher()
mock_html = "Hello World".encode("latin-1")
mock_response = MagicMock()
mock_response.__enter__ = lambda s: s
mock_response.__exit__ = MagicMock(return_value=False)
mock_response.headers.get = lambda key, default="": (
"text/html; charset=latin-1" if "Content-Type" in key else default
)
mock_response.read.return_value = mock_html
with patch("urllib.request.urlopen", return_value=mock_response):
result = fetcher.fetch("https://example.com")
assert result["success"] is True
def test_unicode_decode_error_fallback(self):
"""Test fallback when charset decoding fails."""
fetcher = URLFetcher()
mock_html = b"\xff\xfe Hello World"
mock_response = MagicMock()
mock_response.__enter__ = lambda s: s
mock_response.__exit__ = MagicMock(return_value=False)
mock_response.headers.get = lambda key, default="": (
"text/html; charset=utf-16" if "Content-Type" in key else default
)
mock_response.read.return_value = mock_html
with patch("urllib.request.urlopen", return_value=mock_response):
result = fetcher.fetch("https://example.com")
# Should succeed even if charset is tricky
assert "success" in result
def test_value_error_in_fetch(self):
"""Test handling of ValueError in fetch."""
fetcher = URLFetcher()
with patch("urllib.request.urlopen", side_effect=ValueError("bad url")):
result = fetcher.fetch("not-a-url")
assert result["success"] is False
def test_main_content_extraction(self):
"""Test that main/article content is extracted."""
fetcher = URLFetcher()
html_content = b"""
<html>
<head><title>Test</title></head>
<body>
<nav>Navigation</nav>
<main>
""" + (b"Main content " * 50) + b"""
</main>
</body>
</html>
"""
mock_response = MagicMock()
mock_response.__enter__ = lambda s: s
mock_response.__exit__ = MagicMock(return_value=False)
mock_response.headers.get = lambda key, default="": (
"text/html; charset=utf-8" if "Content-Type" in key else default
)
mock_response.read.return_value = html_content
with patch("urllib.request.urlopen", return_value=mock_response):
result = fetcher.fetch("https://example.com")
assert result["success"] is True
assert "Main content" in result["content"]
# ═══════════════════════════════════════════════════════════════
# WebSearchTool — with mock selenium fetcher
# ═══════════════════════════════════════════════════════════════
class TestWebSearchToolWithSelenium:
"""Tests for WebSearchTool when given a mocked selenium fetcher."""
def _make_mock_selenium_fetcher(self):
from tools.web_search import SeleniumFetcher
return MagicMock(spec=SeleniumFetcher)
def test_init_with_selenium_fetcher(self):
from tools.web_search import SeleniumFetcher
mock_fetcher = MagicMock(spec=SeleniumFetcher)
tool = WebSearchTool(selenium_fetcher=mock_fetcher)
assert tool._use_selenium is True
assert tool._selenium_fetcher is mock_fetcher
def test_execute_fetch_with_selenium(self):
from tools.web_search import SeleniumFetcher
mock_fetcher = MagicMock(spec=SeleniumFetcher)
mock_fetcher.fetch.return_value = {
"success": True,
"url": "https://example.com",
"title": "Example",
"content": "Content via Selenium",
}
tool = WebSearchTool(selenium_fetcher=mock_fetcher)
result = tool.execute(url="https://example.com")
assert result.success is True
assert "Content via Selenium" in result.output
def test_execute_click_with_selenium(self):
from tools.web_search import SeleniumFetcher
mock_fetcher = MagicMock(spec=SeleniumFetcher)
mock_fetcher.click_element.return_value = {
"success": True,
"url": "https://example.com/next",
"title": "Next Page",
"clicked_text": "Submit",
}
tool = WebSearchTool(selenium_fetcher=mock_fetcher)
result = tool.execute(action="click", selector=".submit-btn")
assert result.success is True
def test_execute_click_failed(self):
from tools.web_search import SeleniumFetcher
mock_fetcher = MagicMock(spec=SeleniumFetcher)
mock_fetcher.click_element.return_value = {
"success": False,
"error": "Element not found",
"url": "",
"title": "",
"clicked_text": "",
}
tool = WebSearchTool(selenium_fetcher=mock_fetcher)
result = tool.execute(action="click", selector=".nonexistent")
assert result.success is False
def test_execute_fill_with_selenium(self):
from tools.web_search import SeleniumFetcher
mock_fetcher = MagicMock(spec=SeleniumFetcher)
mock_fetcher.fill_input.return_value = {
"success": True,
"url": "https://example.com",
"title": "Page",
"error": "",
}
tool = WebSearchTool(selenium_fetcher=mock_fetcher)
result = tool.execute(action="fill", selector="input[name=q]", value="test")
assert result.success is True
def test_execute_fill_no_selector_fails(self):
from tools.web_search import SeleniumFetcher
mock_fetcher = MagicMock(spec=SeleniumFetcher)
tool = WebSearchTool(selenium_fetcher=mock_fetcher)
result = tool.execute(action="fill", value="test")
assert result.success is False
def test_execute_extract_links_with_selenium(self):
from tools.web_search import SeleniumFetcher
mock_fetcher = MagicMock(spec=SeleniumFetcher)
mock_fetcher.fetch.return_value = {
"success": True,
"url": "https://example.com",
"title": "Example",
"content": "",
}
mock_fetcher.extract_links.return_value = {
"success": True,
"url": "https://example.com",
"links": [
{"url": "https://example.com/1", "text": "Link 1", "title": ""},
],
"count": 1,
}
tool = WebSearchTool(selenium_fetcher=mock_fetcher)
result = tool.execute(action="extract_links", url="https://example.com")
assert result.success is True
def test_execute_execute_js_with_selenium(self):
from tools.web_search import SeleniumFetcher
mock_fetcher = MagicMock(spec=SeleniumFetcher)
mock_fetcher.execute_js.return_value = {
"success": True,
"url": "https://example.com",
"return_value": "document.title",
"error": "",
}
tool = WebSearchTool(selenium_fetcher=mock_fetcher)
result = tool.execute(action="execute_js", js_code="return document.title")
assert result.success is True
def test_execute_get_content_with_selenium(self):
from tools.web_search import SeleniumFetcher
mock_fetcher = MagicMock(spec=SeleniumFetcher)
mock_fetcher.get_page_content.return_value = {
"success": True,
"url": "https://example.com",
"title": "Example",
"content": "Page content here",
"error": "",
}
tool = WebSearchTool(selenium_fetcher=mock_fetcher)
result = tool.execute(action="get_content")
assert result.success is True
def test_execute_crawl_with_selenium(self):
from tools.web_search import SeleniumFetcher
mock_fetcher = MagicMock(spec=SeleniumFetcher)
mock_fetcher.crawl.return_value = {
"success": True,
"pages": [{"url": "https://example.com", "title": "Home", "depth": 0}],
"total_pages": 1,
"error": "",
}
tool = WebSearchTool(selenium_fetcher=mock_fetcher)
result = tool.execute(action="crawl", url="https://example.com", max_depth=1, max_pages=5)
assert result.success is True
def test_execute_search_with_fetch_content_selenium(self):
from tools.web_search import SeleniumFetcher
mock_fetcher = MagicMock(spec=SeleniumFetcher)
mock_fetcher.fetch.return_value = {
"success": True,
"url": "https://example.com",
"title": "Example",
"content": "Content here",
}
tool = WebSearchTool(
provider=MockProvider(),
selenium_fetcher=mock_fetcher,
fetch_content=True,
)
result = tool.execute(query="test")
assert result.success is True
def test_description_with_selenium(self):
from tools.web_search import SeleniumFetcher
mock_fetcher = MagicMock(spec=SeleniumFetcher)
tool = WebSearchTool(selenium_fetcher=mock_fetcher)
desc = tool.description
assert "Selenium" in desc
def test_close_with_selenium(self):
from tools.web_search import SeleniumFetcher
mock_fetcher = MagicMock(spec=SeleniumFetcher)
tool = WebSearchTool(selenium_fetcher=mock_fetcher)
tool.close()
mock_fetcher.close.assert_called_once()
# ═══════════════════════════════════════════════════════════════
# WebSearchTool — callback integration
# ═══════════════════════════════════════════════════════════════
class TestWebSearchToolCallbacks:
def test_emit_tool_start_with_callback_manager(self):
from callbacks.base import BaseCallbackHandler
from callbacks.manager import CallbackManager
class RecordingCB(BaseCallbackHandler):
def __init__(self):
self.calls = []
def on_tool_start(self, *, run_id, tool_name, **kwargs):
self.calls.append(("start", tool_name))
def on_tool_end(self, *, run_id, tool_name, **kwargs):
self.calls.append(("end", tool_name))
cb = RecordingCB()
manager = CallbackManager(handlers=[cb])
tool = WebSearchTool(provider=MockProvider(), callback_manager=manager)
result = tool.execute(query="test")
assert result.success is True
assert any(c[0] == "start" for c in cb.calls)
assert any(c[0] == "end" for c in cb.calls)
def test_get_callback_manager_from_context(self):
tool = WebSearchTool(provider=MockProvider())
# Without a callback manager, _get_callback_manager should return None or context manager
cb = tool._get_callback_manager()
# May be None if not in a callback context
assert cb is None or hasattr(cb, "on_tool_start")
def test_get_callback_manager_exception_returns_none(self, monkeypatch):
"""_get_callback_manager should return None on exception."""
tool = WebSearchTool(provider=MockProvider())
tool._callback_manager = None
# Mock get_callback_manager to raise
monkeypatch.setattr(
"callbacks.context.get_callback_manager",
lambda: (_ for _ in ()).throw(RuntimeError("error")),
)
cb = tool._get_callback_manager()
assert cb is None
def test_emit_tool_error_with_callback_manager(self):
"""Test _emit_tool_error is called when callback manager is set."""
from callbacks.base import BaseCallbackHandler
from callbacks.manager import CallbackManager
class RecordingCB(BaseCallbackHandler):
def __init__(self):
self.errors = []
def on_tool_error(self, *, run_id, tool_name, **kwargs):
self.errors.append(tool_name)
cb_handler = RecordingCB()
manager = CallbackManager(handlers=[cb_handler])
tool = WebSearchTool(provider=MockProvider(), callback_manager=manager)
# Trigger an error by mocking provider.search to raise
from unittest.mock import MagicMock
mock_error = TimeoutError("timed out")
tool._provider = MagicMock()
tool._provider.search.side_effect = mock_error
result = tool.execute(query="test")
assert result.success is False
assert len(cb_handler.errors) > 0
# ═══════════════════════════════════════════════════════════════
# WebSearchTool.execute — action routing edge cases
# ═══════════════════════════════════════════════════════════════
class TestWebSearchToolExecuteActionRouting:
"""Test execute() auto-detection and edge cases."""
def test_auto_detect_click_from_selector(self):
"""Auto-detect action 'click' when selector is provided."""
tool = WebSearchTool(provider=MockProvider())
mock_selenium = MagicMock()
mock_selenium.click_element.return_value = {
"success": True, "url": "http://example.com", "title": "T",
"content": "", "clicked_text": "Button"
}
tool._selenium_fetcher = mock_selenium
tool._use_selenium = True
result = tool.execute(selector="#btn")
assert result.success is True
def test_auto_detect_execute_js_from_js_code(self):
"""Auto-detect action 'execute_js' when js_code is provided."""
tool = WebSearchTool(provider=MockProvider())
mock_selenium = MagicMock()
mock_selenium.execute_js.return_value = {
"success": True, "url": "http://example.com",
"return_value": 42
}
tool._selenium_fetcher = mock_selenium
tool._use_selenium = True
result = tool.execute(js_code="return 42;")
assert result.success is True
def test_click_action_without_selector_returns_error(self):
"""Action 'click' without selector returns error."""
tool = WebSearchTool(provider=MockProvider())
result = tool.execute(action="click")
assert result.success is False
assert result.error is not None
assert "selector" in result.error
def test_fill_action_without_selector_returns_error(self):
"""Action 'fill' without selector returns error."""
tool = WebSearchTool(provider=MockProvider())
result = tool.execute(action="fill")
assert result.success is False
assert result.error is not None
assert "selector" in result.error
def test_execute_js_action_without_js_code_returns_error(self):
"""Action 'execute_js' without js_code returns error."""
tool = WebSearchTool(provider=MockProvider())
result = tool.execute(action="execute_js")
assert result.success is False
assert result.error is not None
assert "js_code" in result.error
def test_crawl_action_without_url_returns_error(self):
"""Action 'crawl' without url returns error."""
tool = WebSearchTool(provider=MockProvider())
result = tool.execute(action="crawl")
assert result.success is False
assert result.error is not None
assert "url" in result.error
def test_fetch_action_without_url_returns_error(self):
"""Action 'fetch' without url returns error."""
tool = WebSearchTool(provider=MockProvider())
result = tool.execute(action="fetch")
assert result.success is False
assert result.error is not None
assert "url" in result.error
def test_no_action_no_query_url_selector_js_code_returns_error(self):
"""No action, query, url, selector, or js_code returns error."""
tool = WebSearchTool(provider=MockProvider())
result = tool.execute()
assert result.success is False
assert result.error is not None
assert "No action" in result.error
def test_fetch_url_with_wait_for_selector(self):
"""_fetch_url with wait_for_selector uses fetch_with_wait."""
from tools.web_search import SeleniumFetcher
tool = WebSearchTool(provider=MockProvider())
mock_selenium = MagicMock(spec=SeleniumFetcher)
mock_selenium.fetch_with_wait.return_value = {
"success": True, "title": "Test", "content": "content"
}
tool._selenium_fetcher = mock_selenium
tool._use_selenium = True
result = tool.execute(url="https://example.com", wait_for_selector="#main")
assert result.success is True
mock_selenium.fetch_with_wait.assert_called_once()
def test_fetch_url_exception_returns_error(self):
"""_fetch_url exception returns error."""
from unittest.mock import patch
tool = WebSearchTool(provider=MockProvider())
with patch.object(tool, "_get_active_fetcher") as mock_fetcher:
mock_fetcher.return_value.fetch.side_effect = RuntimeError("connection failed")
result = tool.execute(url="https://example.com")
assert result.success is False
def test_execute_fill_fail(self):
"""Fill action when fill fails."""
tool = WebSearchTool(provider=MockProvider())
mock_selenium = MagicMock()
mock_selenium.fill_input.return_value = {
"success": False, "error": "element not found"
}
tool._selenium_fetcher = mock_selenium
tool._use_selenium = True
result = tool.execute(action="fill", selector="#input", value="test")
assert result.success is False
assert result.error is not None
assert "element not found" in result.error
def test_execute_extract_links_fetch_fail(self):
"""Extract links when URL fetch fails."""
tool = WebSearchTool(provider=MockProvider())
mock_selenium = MagicMock()
mock_selenium.fetch.return_value = {
"success": False, "error": "page not found"
}
tool._selenium_fetcher = mock_selenium
tool._use_selenium = True
result = tool.execute(action="extract_links", url="https://example.com")
assert result.success is False
assert result.error is not None
assert "page not found" in result.error
def test_execute_extract_links_extract_fail(self):
"""Extract links when link extraction fails."""
tool = WebSearchTool(provider=MockProvider())
mock_selenium = MagicMock()
mock_selenium.fetch.return_value = {"success": True, "content": ""}
mock_selenium.extract_links.return_value = {
"success": False, "error": "extraction failed"
}
tool._selenium_fetcher = mock_selenium
tool._use_selenium = True
result = tool.execute(action="extract_links")
assert result.success is False
def test_execute_extract_links_with_title(self):
"""Extract links with link that has title."""
tool = WebSearchTool(provider=MockProvider())
mock_selenium = MagicMock()
mock_selenium.extract_links.return_value = {
"success": True,
"url": "http://example.com",
"count": 1,
"links": [{"url": "http://example.com/page", "text": "link", "title": "Page Title"}]
}
tool._selenium_fetcher = mock_selenium
tool._use_selenium = True
result = tool.execute(action="extract_links")
assert result.success is True
assert "Page Title" in result.output
def test_execute_js_fail(self):
"""Execute JS when JS fails."""
tool = WebSearchTool(provider=MockProvider())
mock_selenium = MagicMock()
mock_selenium.execute_js.return_value = {
"success": False, "error": "js error"
}
tool._selenium_fetcher = mock_selenium
tool._use_selenium = True
result = tool.execute(action="execute_js", js_code="throw Error()")
assert result.success is False
def test_execute_js_no_return_value(self):
"""Execute JS with no return value."""
tool = WebSearchTool(provider=MockProvider())
mock_selenium = MagicMock()
mock_selenium.execute_js.return_value = {
"success": True, "url": "http://example.com", "return_value": None
}
tool._selenium_fetcher = mock_selenium
tool._use_selenium = True
result = tool.execute(action="execute_js", js_code="document.title = 'test'")
assert result.success is True
assert "no return value" in result.output
def test_execute_crawl_fail(self):
"""Crawl action when crawl fails."""
tool = WebSearchTool(provider=MockProvider())
mock_selenium = MagicMock()
mock_selenium.crawl.return_value = {
"success": False, "error": "crawl failed", "pages": []
}
tool._selenium_fetcher = mock_selenium
tool._use_selenium = True
result = tool.execute(action="crawl", url="http://example.com")
assert result.success is False
def test_execute_crawl_with_content_truncation(self):
"""Crawl result with content exceeding max_content_length."""
tool = WebSearchTool(provider=MockProvider(), max_content_length=10)
mock_selenium = MagicMock()
long_content = "x" * 100
mock_selenium.crawl.return_value = {
"success": True,
"total_pages": 1,
"pages": [{"url": "http://example.com", "title": "T", "depth": 0,
"content": long_content, "links_found": 0}],
"error": None,
}
tool._selenium_fetcher = mock_selenium
tool._use_selenium = True
result = tool.execute(action="crawl", url="http://example.com")
assert result.success is True
assert "truncated" in result.output
def test_execute_crawl_with_error_warning(self):
"""Crawl result with partial error."""
tool = WebSearchTool(provider=MockProvider())
mock_selenium = MagicMock()
mock_selenium.crawl.return_value = {
"success": True,
"total_pages": 1,
"pages": [{"url": "http://example.com", "title": "T", "depth": 0,
"content": "content", "links_found": 0}],
"error": "some pages failed",
}
tool._selenium_fetcher = mock_selenium
tool._use_selenium = True
result = tool.execute(action="crawl", url="http://example.com")
assert result.success is True
assert "warning" in result.output.lower() or "some pages failed" in result.output
def test_execute_get_content_fail(self):
"""Get content when it fails."""
tool = WebSearchTool(provider=MockProvider())
mock_selenium = MagicMock()
mock_selenium.get_page_content.return_value = {
"success": False, "error": "driver not ready"
}
tool._selenium_fetcher = mock_selenium
tool._use_selenium = True
result = tool.execute(action="get_content")
assert result.success is False
def test_execute_search_timeout_error(self):
"""Search with TimeoutError."""
tool = WebSearchTool(provider=MockProvider())
tool._provider = MagicMock()
tool._provider.search.side_effect = TimeoutError("timed out")
result = tool.execute(query="test")
assert result.success is False
assert result.error is not None
assert "timed out" in result.error.lower()
def test_execute_search_urlerror(self):
"""Search with URLError."""
import urllib.error
tool = WebSearchTool(provider=MockProvider())
tool._provider = MagicMock()
tool._provider.search.side_effect = urllib.error.URLError("network error")
result = tool.execute(query="test")
assert result.success is False
def test_execute_search_with_fetched_title(self):
"""Search result gets title from fetched content."""
tool = WebSearchTool(provider=MockProvider(results=[
{"title": "", "url": "https://example.com/1", "snippet": "Snippet"},
]), fetch_content=True)
with patch.object(tool, "_fetch_page_content", return_value={
"title": "Fetched Title", "content": "Page content"
}):
result = tool.execute(query="test")
assert result.success is True