Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
| import json | |
| import pytest | |
| from agent.core.tools import create_builtin_tools | |
| from agent.tools import web_search_tool | |
| class _FakeResponse: | |
| def __init__(self, text: str, url: str = "https://html.duckduckgo.com/html/?q=x"): | |
| self.text = text | |
| self.url = url | |
| def _content_block(output: dict): | |
| return next(item for item in output["results"] if isinstance(item, dict))["content"] | |
| def test_web_search_extracts_duckduckgo_results_and_filters_domains(monkeypatch): | |
| seen = {} | |
| def fake_get(url, headers, timeout, allow_redirects): | |
| seen.update( | |
| { | |
| "url": url, | |
| "user_agent": headers["User-Agent"], | |
| "timeout": timeout, | |
| "allow_redirects": allow_redirects, | |
| } | |
| ) | |
| return _FakeResponse( | |
| """ | |
| <html><body> | |
| <a class="result__a" href="https://docs.rs/reqwest">Reqwest docs</a> | |
| <a class="result__a" href="https://example.com/blocked">Blocked result</a> | |
| </body></html> | |
| """, | |
| url, | |
| ) | |
| monkeypatch.setenv( | |
| web_search_tool.WEB_SEARCH_BASE_URL_ENV, "http://search.test/search" | |
| ) | |
| monkeypatch.setattr(web_search_tool.requests, "get", fake_get) | |
| output = web_search_tool.execute_web_search( | |
| "rust web search", | |
| allowed_domains=["https://DOCS.rs/"], | |
| blocked_domains=["HTTPS://EXAMPLE.COM"], | |
| ) | |
| assert seen == { | |
| "url": "http://search.test/search?q=rust+web+search", | |
| "user_agent": "clawd-rust-tools/0.1", | |
| "timeout": 20, | |
| "allow_redirects": True, | |
| } | |
| assert output["query"] == "rust web search" | |
| assert _content_block(output) == [ | |
| {"title": "Reqwest docs", "url": "https://docs.rs/reqwest"} | |
| ] | |
| assert "Include a Sources section" in output["results"][0] | |
| def test_web_search_decodes_duckduckgo_redirects(): | |
| hits = web_search_tool.extract_search_hits( | |
| """ | |
| <a class="result__a" | |
| href="/l/?uddg=https%3A%2F%2Fexample.org%2Fpaper%3Fx%3D1&rut=abc"> | |
| Example Paper | |
| </a> | |
| """ | |
| ) | |
| assert hits == [ | |
| web_search_tool.SearchHit( | |
| title="Example Paper", | |
| url="https://example.org/paper?x=1", | |
| ) | |
| ] | |
| def test_web_search_generic_fallback_dedupes_and_rejects_bad_base_url(monkeypatch): | |
| def fake_get(url, headers, timeout, allow_redirects): | |
| return _FakeResponse( | |
| """ | |
| <html><body> | |
| <a href="https://example.com/one">Example One</a> | |
| <a href="https://example.com/one">Duplicate Example One</a> | |
| <a href="https://docs.rs/tokio">Tokio Docs</a> | |
| </body></html> | |
| """, | |
| url, | |
| ) | |
| monkeypatch.setenv( | |
| web_search_tool.WEB_SEARCH_BASE_URL_ENV, "http://search.test/fallback" | |
| ) | |
| monkeypatch.setattr(web_search_tool.requests, "get", fake_get) | |
| output = web_search_tool.execute_web_search("generic links") | |
| assert _content_block(output) == [ | |
| {"title": "Example One", "url": "https://example.com/one"}, | |
| {"title": "Tokio Docs", "url": "https://docs.rs/tokio"}, | |
| ] | |
| monkeypatch.setenv(web_search_tool.WEB_SEARCH_BASE_URL_ENV, "://bad-base-url") | |
| with pytest.raises(ValueError): | |
| web_search_tool.execute_web_search("generic links") | |
| async def test_web_search_handler_returns_pretty_json(monkeypatch): | |
| to_thread_calls = [] | |
| async def fake_to_thread(func, /, *args, **kwargs): | |
| to_thread_calls.append((func, args, kwargs)) | |
| return func(*args, **kwargs) | |
| monkeypatch.setattr( | |
| web_search_tool, | |
| "execute_web_search", | |
| lambda **kwargs: { | |
| "query": kwargs["query"], | |
| "results": [ | |
| "No web search results matched the query 'x'.", | |
| {"content": []}, | |
| ], | |
| "durationSeconds": 0.1, | |
| }, | |
| ) | |
| monkeypatch.setattr(web_search_tool.asyncio, "to_thread", fake_to_thread) | |
| text, ok = await web_search_tool.web_search_handler({"query": "x"}) | |
| assert ok is False | |
| assert "at least 2 characters" in text | |
| text, ok = await web_search_tool.web_search_handler( | |
| {"query": "valid query"}, tool_call_id="call_123" | |
| ) | |
| assert ok is True | |
| parsed = json.loads(text) | |
| assert parsed["query"] == "valid query" | |
| assert to_thread_calls[0][0] is web_search_tool.execute_web_search | |
| assert to_thread_calls[0][2]["tool_use_id"] == "call_123" | |
| text, ok = await web_search_tool.web_search_handler( | |
| {"query": "valid query", "allowed_domains": "docs.rs"} | |
| ) | |
| assert ok is False | |
| assert "allowed_domains must be an array of strings" in text | |
| text, ok = await web_search_tool.web_search_handler({"query": None}) | |
| assert ok is False | |
| assert "query string" in text | |
| def test_web_search_is_registered_for_llm(): | |
| tools = create_builtin_tools(local_mode=True) | |
| specs = {tool.name: tool for tool in tools} | |
| assert "web_search" in specs | |
| assert specs["web_search"].parameters["required"] == ["query"] | |