Article text
"""Tests for src/tools/web_search.py""" import email.message import json import urllib.error from unittest.mock import MagicMock, patch import pytest from tools.web_search import ( DuckDuckGoProvider, SearchProvider, SerperProvider, SimpleHTMLParser, TavilyProvider, URLFetcher, WebSearchTool, _create_web_search_tool, ) # ═══════════════════════════════════════════════════════════════ # Helpers # ═══════════════════════════════════════════════════════════════ class MockProvider(SearchProvider): """Mock search provider that returns canned results.""" def __init__(self, results: list[dict[str, str]] | None = None): self._results = results or [ {"title": "Result 1", "url": "https://example.com/1", "snippet": "Snippet 1"}, {"title": "Result 2", "url": "https://example.com/2", "snippet": "Snippet 2"}, ] def search(self, query: str, max_results: int = 5) -> list[dict[str, str]]: return self._results[:max_results] class EmptyProvider(SearchProvider): def search(self, query: str, max_results: int = 5) -> list[dict[str, str]]: return [] # ═══════════════════════════════════════════════════════════════ # SimpleHTMLParser # ═══════════════════════════════════════════════════════════════ class TestSimpleHTMLParser: def test_empty_input(self): result = SimpleHTMLParser.html_to_text("") assert result == "" def test_plain_text(self): result = SimpleHTMLParser.html_to_text("Hello World") assert "Hello World" in result def test_strips_tags(self): html = "
Hello World
" result = SimpleHTMLParser.html_to_text(html) assert "Hello" in result assert "World" in result assert "<" not in result assert ">" not in result def test_removes_script(self): html = "Safe text
" result = SimpleHTMLParser.html_to_text(html) assert "alert" not in result assert "Safe text" in result def test_removes_style(self): html = "Visible
" result = SimpleHTMLParser.html_to_text(html) assert ".class" not in result assert "Visible" in result def test_removes_comments(self): html = "Hello" result = SimpleHTMLParser.html_to_text(html) assert "comment" not in result assert "Hello" in result def test_handles_html_entities(self): html = "Fish & Chips" result = SimpleHTMLParser.html_to_text(html) assert "&" not in result assert "Fish & Chips" in result def test_max_length_truncation(self): html = "x" * 10000 result = SimpleHTMLParser.html_to_text(html, max_length=100) assert "(content truncated)" in result def test_block_tags_add_newlines(self): html = "Para 1
Para 2
" result = SimpleHTMLParser.html_to_text(html) assert "Para 1" in result assert "Para 2" in result def test_heading_tags(self): html = "Content
" result = SimpleHTMLParser.html_to_text(html) assert "Title" in result assert "Subtitle" in result assert "Content" in result def test_nested_tags(self): html = "Article text
Good text
" result = SimpleHTMLParser.html_to_text(html) assert "bad code" not in result assert "Good text" in result def test_br_tags(self): html = "Line 1word1 word2 word3
" result = SimpleHTMLParser.html_to_text(html) # Collapsed spaces assert "word1" in result assert "word2" in result assert "word3" in result assert "word1 word2" not in result # ═══════════════════════════════════════════════════════════════ # URLFetcher # ═══════════════════════════════════════════════════════════════ class TestURLFetcher: def test_init_defaults(self): fetcher = URLFetcher() assert fetcher._timeout == 15 assert fetcher._max_content_length == 500_000 def test_init_custom(self): fetcher = URLFetcher(timeout=30, max_content_length=100_000) assert fetcher._timeout == 30 assert fetcher._max_content_length == 100_000 def test_fetch_fails_for_nonexistent_host(self): fetcher = URLFetcher(timeout=2) result = fetcher.fetch("http://this-should-not-exist-xyz.invalid/") assert isinstance(result, dict) assert result["success"] is False assert result["url"] == "http://this-should-not-exist-xyz.invalid/" def test_fetch_result_keys(self): fetcher = URLFetcher(timeout=1) result = fetcher.fetch("http://nope.invalid") for key in ("success", "url", "title", "content", "error"): assert key in result def test_fetch_success_mock(self): fetcher = URLFetcher() mock_html = b"Hello World
" mock_response = MagicMock() mock_response.__enter__ = lambda s: s mock_response.__exit__ = MagicMock(return_value=False) mock_response.headers.get = lambda key, default="": ( "text/html; charset=utf-8" if "Content-Type" in key else default ) mock_response.read.return_value = mock_html with patch("urllib.request.urlopen", return_value=mock_response): result = fetcher.fetch("https://example.com") assert result["success"] is True assert "Hello World" in result["content"] assert result["title"] == "Test Page" def test_fetch_unsupported_content_type(self): fetcher = URLFetcher() mock_response = MagicMock() mock_response.__enter__ = lambda s: s mock_response.__exit__ = MagicMock(return_value=False) mock_response.headers.get = lambda key, default="": ( "application/pdf" if "Content-Type" in key else default ) with patch("urllib.request.urlopen", return_value=mock_response): result = fetcher.fetch("https://example.com/file.pdf") assert result["success"] is False assert "Unsupported content type" in result["error"] def test_fetch_http_error(self): fetcher = URLFetcher() with patch("urllib.request.urlopen", side_effect=urllib.error.HTTPError( url="http://example.com", code=404, msg="Not Found", hdrs=email.message.Message(), fp=None )): result = fetcher.fetch("http://example.com") assert result["success"] is False assert "HTTP Error 404" in result["error"] def test_fetch_url_error(self): fetcher = URLFetcher() with patch("urllib.request.urlopen", side_effect=urllib.error.URLError("Name resolution failed")): result = fetcher.fetch("http://doesnotexist.invalid") assert result["success"] is False assert "URL Error" in result["error"] def test_fetch_timeout(self): fetcher = URLFetcher(timeout=1) with patch("urllib.request.urlopen", side_effect=TimeoutError("timed out")): result = fetcher.fetch("http://example.com") assert result["success"] is False assert "timed out" in result["error"].lower() def test_fetch_text_plain_content_type(self): fetcher = URLFetcher() mock_html = b"Hello plain text" mock_response = MagicMock() mock_response.__enter__ = lambda s: s mock_response.__exit__ = MagicMock(return_value=False) mock_response.headers.get = lambda key, default="": ( "text/plain" if "Content-Type" in key else default ) mock_response.read.return_value = mock_html with patch("urllib.request.urlopen", return_value=mock_response): result = fetcher.fetch("https://example.com/text.txt") assert result["success"] is True assert "Hello plain text" in result["content"] # ═══════════════════════════════════════════════════════════════ # DuckDuckGoProvider # ═══════════════════════════════════════════════════════════════ class TestDuckDuckGoProvider: def test_init_defaults(self): provider = DuckDuckGoProvider() assert provider._timeout == 10 def test_init_custom(self): provider = DuckDuckGoProvider(timeout=30) assert provider._timeout == 30 def test_search_with_abstract(self): provider = DuckDuckGoProvider() ddg_response = { "Heading": "Python", "Abstract": "A programming language", "AbstractURL": "https://python.org", "RelatedTopics": [], "Results": [], } mock_response = MagicMock() mock_response.__enter__ = lambda s: s mock_response.__exit__ = MagicMock(return_value=False) mock_response.read.return_value = json.dumps(ddg_response).encode("utf-8") with patch("urllib.request.urlopen", return_value=mock_response): results = provider.search("python", max_results=5) assert len(results) == 1 assert results[0]["title"] == "Python" assert results[0]["snippet"] == "A programming language" def test_search_with_related_topics(self): provider = DuckDuckGoProvider() ddg_response = { "Heading": "", "Abstract": "", "AbstractURL": "", "RelatedTopics": [ {"Text": "Result 1", "FirstURL": "https://example.com/1"}, {"Text": "Result 2", "FirstURL": "https://example.com/2"}, ], "Results": [], } mock_response = MagicMock() mock_response.__enter__ = lambda s: s mock_response.__exit__ = MagicMock(return_value=False) mock_response.read.return_value = json.dumps(ddg_response).encode("utf-8") with patch("urllib.request.urlopen", return_value=mock_response): results = provider.search("test", max_results=5) assert len(results) == 2 def test_search_network_error_returns_empty(self): provider = DuckDuckGoProvider() with patch("urllib.request.urlopen", side_effect=urllib.error.URLError("failed")): results = provider.search("test") assert results == [] def test_search_excludes_non_dict_topics(self): provider = DuckDuckGoProvider() ddg_response = { "Heading": "", "Abstract": "", "AbstractURL": "", "RelatedTopics": [ "not a dict", {"Text": "", "FirstURL": ""}, {"Text": "Valid topic", "FirstURL": "https://valid.com"}, ], "Results": [], } mock_response = MagicMock() mock_response.__enter__ = lambda s: s mock_response.__exit__ = MagicMock(return_value=False) mock_response.read.return_value = json.dumps(ddg_response).encode("utf-8") with patch("urllib.request.urlopen", return_value=mock_response): results = provider.search("test", max_results=10) assert any(r["snippet"] == "Valid topic" for r in results) def test_search_respects_max_results(self): provider = DuckDuckGoProvider() ddg_response = { "Heading": "", "Abstract": "", "AbstractURL": "", "RelatedTopics": [{"Text": f"Topic {i}", "FirstURL": f"https://ex.com/{i}"} for i in range(10)], "Results": [], } mock_response = MagicMock() mock_response.__enter__ = lambda s: s mock_response.__exit__ = MagicMock(return_value=False) mock_response.read.return_value = json.dumps(ddg_response).encode("utf-8") with patch("urllib.request.urlopen", return_value=mock_response): results = provider.search("test", max_results=3) assert len(results) <= 3 # ═══════════════════════════════════════════════════════════════ # SerperProvider # ═══════════════════════════════════════════════════════════════ class TestSerperProvider: def test_init(self): provider = SerperProvider(api_key="test-key") assert provider._api_key == "test-key" assert provider._timeout == 10 def test_search_organic(self): provider = SerperProvider(api_key="test-key") serper_response = { "organic": [ {"title": "Result 1", "link": "https://example.com/1", "snippet": "Snippet 1"}, {"title": "Result 2", "link": "https://example.com/2", "snippet": "Snippet 2"}, ] } mock_response = MagicMock() mock_response.__enter__ = lambda s: s mock_response.__exit__ = MagicMock(return_value=False) mock_response.read.return_value = json.dumps(serper_response).encode("utf-8") with patch("urllib.request.urlopen", return_value=mock_response): results = provider.search("test query", max_results=5) assert len(results) == 2 assert results[0]["title"] == "Result 1" def test_search_with_answer_box(self): provider = SerperProvider(api_key="test-key") serper_response = { "organic": [ {"title": "Result", "link": "https://example.com", "snippet": "info"} ], "answerBox": { "title": "Direct Answer", "link": "https://answer.com", "answer": "The answer is 42", } } mock_response = MagicMock() mock_response.__enter__ = lambda s: s mock_response.__exit__ = MagicMock(return_value=False) mock_response.read.return_value = json.dumps(serper_response).encode("utf-8") with patch("urllib.request.urlopen", return_value=mock_response): results = provider.search("query", max_results=5) # Answer box should be inserted at position 0 assert results[0]["title"] == "Direct Answer" def test_search_network_error_returns_empty(self): provider = SerperProvider(api_key="key") with patch("urllib.request.urlopen", side_effect=urllib.error.URLError("failed")): results = provider.search("test") assert results == [] # ═══════════════════════════════════════════════════════════════ # TavilyProvider # ═══════════════════════════════════════════════════════════════ class TestTavilyProvider: def test_init(self): provider = TavilyProvider(api_key="tavily-key") assert provider._api_key == "tavily-key" assert provider._include_answer is True def test_search_with_answer(self): provider = TavilyProvider(api_key="key") tavily_response = { "answer": "The answer is 42", "results": [ {"title": "Page 1", "url": "https://example.com/1", "content": "Content 1"}, {"title": "Page 2", "url": "https://example.com/2", "content": "Content 2"}, ] } mock_response = MagicMock() mock_response.__enter__ = lambda s: s mock_response.__exit__ = MagicMock(return_value=False) mock_response.read.return_value = json.dumps(tavily_response).encode("utf-8") with patch("urllib.request.urlopen", return_value=mock_response): results = provider.search("test", max_results=5) assert results[0]["title"] == "Tavily AI Answer" assert len(results) == 3 def test_search_no_answer(self): provider = TavilyProvider(api_key="key") tavily_response = { "results": [ {"title": "Page", "url": "https://example.com", "content": "Content"} ] } mock_response = MagicMock() mock_response.__enter__ = lambda s: s mock_response.__exit__ = MagicMock(return_value=False) mock_response.read.return_value = json.dumps(tavily_response).encode("utf-8") with patch("urllib.request.urlopen", return_value=mock_response): results = provider.search("test", max_results=5) assert len(results) == 1 assert results[0]["title"] == "Page" def test_search_network_error_returns_empty(self): provider = TavilyProvider(api_key="key") with patch("urllib.request.urlopen", side_effect=urllib.error.URLError("failed")): results = provider.search("test") assert results == [] # ═══════════════════════════════════════════════════════════════ # WebSearchTool # ═══════════════════════════════════════════════════════════════ class TestWebSearchToolInit: def test_init_default_provider(self): tool = WebSearchTool() assert isinstance(tool._provider, DuckDuckGoProvider) def test_init_custom_provider(self): provider = MockProvider() tool = WebSearchTool(provider=provider) assert tool._provider is provider def test_name_property(self): tool = WebSearchTool() assert tool.name == "web_search" def test_description_property(self): tool = WebSearchTool() desc = tool.description assert isinstance(desc, str) assert len(desc) > 0 def test_parameters_schema(self): tool = WebSearchTool() schema = tool.parameters_schema assert schema["type"] == "object" assert "query" in schema["properties"] assert "url" in schema["properties"] assert "action" in schema["properties"] def test_parameters_schema_with_selenium(self): tool = WebSearchTool() tool._use_selenium = True schema = tool.parameters_schema assert "selector" in schema["properties"] assert "js_code" in schema["properties"] class TestWebSearchToolExecute: def test_execute_search_with_query(self): tool = WebSearchTool(provider=MockProvider()) result = tool.execute(query="python programming") assert result.success is True assert "Result 1" in result.output def test_execute_no_action_no_query_no_url(self): tool = WebSearchTool(provider=MockProvider()) result = tool.execute() assert result.success is False assert result.error def test_execute_search_empty_results(self): tool = WebSearchTool(provider=EmptyProvider()) result = tool.execute(query="something obscure") assert result.success is True assert "No results found" in result.output def test_execute_fetch_with_url(self): tool = WebSearchTool(provider=MockProvider()) mock_fetch_result = { "success": True, "url": "https://example.com", "title": "Test", "content": "Test content here", } with patch.object(tool._fetcher, "fetch", return_value=mock_fetch_result): result = tool.execute(url="https://example.com") assert result.success is True assert "Test content here" in result.output def test_execute_fetch_action_no_url(self): tool = WebSearchTool(provider=MockProvider()) result = tool.execute(action="fetch") assert result.success is False assert result.error def test_execute_fetch_action_with_url(self): tool = WebSearchTool(provider=MockProvider()) mock_fetch_result = { "success": True, "url": "https://example.com", "title": "Page", "content": "Page content", } with patch.object(tool._fetcher, "fetch", return_value=mock_fetch_result): result = tool.execute(action="fetch", url="https://example.com") assert result.success is True def test_execute_fetch_failure(self): tool = WebSearchTool(provider=MockProvider()) mock_fetch_result = { "success": False, "url": "https://example.com", "title": "", "content": "", "error": "Connection refused", } with patch.object(tool._fetcher, "fetch", return_value=mock_fetch_result): result = tool.execute(action="fetch", url="https://example.com") assert result.success is False def test_execute_click_without_selenium(self): tool = WebSearchTool(provider=MockProvider()) result = tool.execute(action="click", selector=".button") assert result.success is False assert result.error is not None assert "Selenium" in result.error def test_execute_fill_without_selenium(self): tool = WebSearchTool(provider=MockProvider()) result = tool.execute(action="fill", selector="input", value="test") assert result.success is False assert result.error is not None assert "Selenium" in result.error def test_execute_extract_links_without_selenium(self): tool = WebSearchTool(provider=MockProvider()) result = tool.execute(action="extract_links") assert result.success is False assert result.error is not None assert "Selenium" in result.error def test_execute_execute_js_without_selenium(self): tool = WebSearchTool(provider=MockProvider()) result = tool.execute(action="execute_js", js_code="return 1") assert result.success is False assert result.error is not None assert "Selenium" in result.error def test_execute_execute_js_no_code(self): tool = WebSearchTool(provider=MockProvider()) result = tool.execute(action="execute_js") assert result.success is False def test_execute_crawl_without_url(self): tool = WebSearchTool(provider=MockProvider()) result = tool.execute(action="crawl") assert result.success is False def test_execute_crawl_without_selenium(self): tool = WebSearchTool(provider=MockProvider()) result = tool.execute(action="crawl", url="https://example.com") assert result.success is False assert result.error is not None assert "Selenium" in result.error def test_execute_get_content_without_selenium(self): tool = WebSearchTool(provider=MockProvider()) result = tool.execute(action="get_content") assert result.success is False def test_execute_search_action_explicit(self): tool = WebSearchTool(provider=MockProvider()) result = tool.execute(action="search", query="test") assert result.success is True def test_execute_search_action_no_query(self): tool = WebSearchTool(provider=MockProvider()) result = tool.execute(action="search") assert result.success is False def test_execute_with_fetch_content(self): provider = MockProvider() tool = WebSearchTool(provider=provider, fetch_content=True) mock_fetch_result = { "success": True, "url": "https://example.com/1", "title": "Example", "content": "Page content here for fetching", } with patch.object(tool._fetcher, "fetch", return_value=mock_fetch_result): result = tool.execute(query="test query") assert result.success is True def test_execute_max_results_clipped(self): """max_results is capped at 10 in the implementation.""" tool = WebSearchTool(provider=MockProvider()) result = tool.execute(query="test", max_results=100) assert result.success is True class TestWebSearchToolFormatting: def test_format_search_results_empty(self): tool = WebSearchTool(provider=MockProvider()) formatted = tool._format_search_results([]) assert "No results found" in formatted def test_format_search_results_with_results(self): tool = WebSearchTool(provider=MockProvider()) results = [ {"title": "Test Title", "url": "https://example.com", "snippet": "A snippet"}, ] formatted = tool._format_search_results(results) assert "Test Title" in formatted assert "https://example.com" in formatted assert "A snippet" in formatted def test_format_search_results_with_content(self): tool = WebSearchTool(provider=MockProvider(), max_content_length=200) results = [ {"title": "Title", "url": "https://example.com", "snippet": "snap", "content": "Page content"}, ] formatted = tool._format_search_results(results, with_content=True) assert "Page content" in formatted def test_format_search_results_content_truncated(self): tool = WebSearchTool(provider=MockProvider(), max_content_length=5) results = [ {"title": "T", "url": "https://x.com", "snippet": "", "content": "A" * 100}, ] formatted = tool._format_search_results(results, with_content=True) assert "truncated" in formatted.lower() class TestWebSearchToolSeleniumCheck: def test_require_selenium_raises_without_selenium(self): tool = WebSearchTool(provider=MockProvider()) assert tool._selenium_fetcher is None with pytest.raises(RuntimeError, match="Selenium"): tool._require_selenium("click") def test_context_manager(self): tool = WebSearchTool(provider=MockProvider()) with tool as t: assert t is tool def test_close_without_selenium(self): tool = WebSearchTool(provider=MockProvider()) tool.close() # Should not raise class TestCreateWebSearchToolFactory: def test_default_provider(self): tool = _create_web_search_tool() assert isinstance(tool._provider, DuckDuckGoProvider) def test_serper_provider_with_key(self): tool = _create_web_search_tool(provider="serper", api_key="my-key") assert isinstance(tool._provider, SerperProvider) def test_serper_provider_no_key_falls_back_to_ddg(self): tool = _create_web_search_tool(provider="serper") assert isinstance(tool._provider, DuckDuckGoProvider) def test_tavily_provider_with_key(self): tool = _create_web_search_tool(provider="tavily", api_key="my-key") assert isinstance(tool._provider, TavilyProvider) def test_tavily_provider_no_key_falls_back_to_ddg(self): tool = _create_web_search_tool(provider="tavily") assert isinstance(tool._provider, DuckDuckGoProvider) def test_duckduckgo_provider_explicit(self): tool = _create_web_search_tool(provider="duckduckgo") assert isinstance(tool._provider, DuckDuckGoProvider) def test_ddg_alias(self): tool = _create_web_search_tool(provider="ddg") assert isinstance(tool._provider, DuckDuckGoProvider) def test_unknown_provider_falls_back_to_ddg(self): tool = _create_web_search_tool(provider="unknown_xyz") assert isinstance(tool._provider, DuckDuckGoProvider) def test_serper_provider_serper_api_key_param(self): tool = _create_web_search_tool(provider="serper", serper_api_key="key") assert isinstance(tool._provider, SerperProvider) def test_tavily_provider_tavily_api_key_param(self): tool = _create_web_search_tool(provider="tavily", tavily_api_key="key") assert isinstance(tool._provider, TavilyProvider) # ═══════════════════════════════════════════════════════════════ # SeleniumFetcher — initialization only (no real browser) # ═══════════════════════════════════════════════════════════════ class TestSeleniumFetcherInit: def test_init_defaults(self): from tools.web_search import SeleniumFetcher fetcher = SeleniumFetcher() assert fetcher._headless is True assert fetcher._browser == "chrome" assert fetcher._wait_timeout == 15 assert fetcher._page_load_timeout == 30 assert fetcher._scroll_to_bottom is False assert fetcher._driver is None def test_init_custom(self): from tools.web_search import SeleniumFetcher fetcher = SeleniumFetcher( headless=False, browser="firefox", wait_timeout=30, scroll_to_bottom=True, ) assert fetcher._headless is False assert fetcher._browser == "firefox" assert fetcher._wait_timeout == 30 assert fetcher._scroll_to_bottom is True def test_ensure_dependencies_import_error(self): from tools.web_search import SeleniumFetcher fetcher = SeleniumFetcher() with patch("builtins.__import__", side_effect=ImportError("No selenium")), pytest.raises(ImportError): fetcher._ensure_dependencies() def test_ensure_dependencies_with_selenium(self): """If selenium is available, _ensure_dependencies should not raise.""" from tools.web_search import SeleniumFetcher fetcher = SeleniumFetcher() try: import selenium # noqa: F401 fetcher._ensure_dependencies() # Should not raise except ImportError: pytest.skip("selenium not installed") def test_close_no_driver(self): from tools.web_search import SeleniumFetcher fetcher = SeleniumFetcher() fetcher.close() # Should not raise assert fetcher._driver is None def test_close_with_mock_driver(self): from tools.web_search import SeleniumFetcher fetcher = SeleniumFetcher() mock_driver = MagicMock() fetcher._driver = mock_driver fetcher.close() mock_driver.quit.assert_called_once() assert fetcher._driver is None def test_context_manager(self): from tools.web_search import SeleniumFetcher fetcher = SeleniumFetcher() with fetcher as f: assert f is fetcher def test_create_driver_invalid_browser(self): from tools.web_search import SeleniumFetcher try: import selenium # noqa: F401 except ImportError: pytest.skip("selenium not installed") fetcher = SeleniumFetcher(browser="ie") with pytest.raises(ValueError, match="Unsupported browser"): fetcher._create_driver() def test_get_driver_creates_if_none(self): from tools.web_search import SeleniumFetcher fetcher = SeleniumFetcher() with patch.object(fetcher, "_create_driver", return_value=MagicMock()): driver = fetcher._get_driver() assert driver is not None # ═══════════════════════════════════════════════════════════════ # URLFetcher — charset detection # ═══════════════════════════════════════════════════════════════ class TestURLFetcherCharset: def test_custom_charset_in_content_type(self): """Test charset extraction from content-type header.""" fetcher = URLFetcher() mock_html = "Hello World".encode("latin-1") mock_response = MagicMock() mock_response.__enter__ = lambda s: s mock_response.__exit__ = MagicMock(return_value=False) mock_response.headers.get = lambda key, default="": ( "text/html; charset=latin-1" if "Content-Type" in key else default ) mock_response.read.return_value = mock_html with patch("urllib.request.urlopen", return_value=mock_response): result = fetcher.fetch("https://example.com") assert result["success"] is True def test_unicode_decode_error_fallback(self): """Test fallback when charset decoding fails.""" fetcher = URLFetcher() mock_html = b"\xff\xfe Hello World" mock_response = MagicMock() mock_response.__enter__ = lambda s: s mock_response.__exit__ = MagicMock(return_value=False) mock_response.headers.get = lambda key, default="": ( "text/html; charset=utf-16" if "Content-Type" in key else default ) mock_response.read.return_value = mock_html with patch("urllib.request.urlopen", return_value=mock_response): result = fetcher.fetch("https://example.com") # Should succeed even if charset is tricky assert "success" in result def test_value_error_in_fetch(self): """Test handling of ValueError in fetch.""" fetcher = URLFetcher() with patch("urllib.request.urlopen", side_effect=ValueError("bad url")): result = fetcher.fetch("not-a-url") assert result["success"] is False def test_main_content_extraction(self): """Test that main/article content is extracted.""" fetcher = URLFetcher() html_content = b"""