Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
File size: 5,156 Bytes
72f615f 754345f 72f615f 754345f 72f615f 754345f 72f615f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 | import json
import pytest
from agent.core.tools import create_builtin_tools
from agent.tools import web_search_tool
class _FakeResponse:
def __init__(self, text: str, url: str = "https://html.duckduckgo.com/html/?q=x"):
self.text = text
self.url = url
def _content_block(output: dict):
return next(item for item in output["results"] if isinstance(item, dict))["content"]
def test_web_search_extracts_duckduckgo_results_and_filters_domains(monkeypatch):
seen = {}
def fake_get(url, headers, timeout, allow_redirects):
seen.update(
{
"url": url,
"user_agent": headers["User-Agent"],
"timeout": timeout,
"allow_redirects": allow_redirects,
}
)
return _FakeResponse(
"""
<html><body>
<a class="result__a" href="https://docs.rs/reqwest">Reqwest docs</a>
<a class="result__a" href="https://example.com/blocked">Blocked result</a>
</body></html>
""",
url,
)
monkeypatch.setenv(
web_search_tool.WEB_SEARCH_BASE_URL_ENV, "http://search.test/search"
)
monkeypatch.setattr(web_search_tool.requests, "get", fake_get)
output = web_search_tool.execute_web_search(
"rust web search",
allowed_domains=["https://DOCS.rs/"],
blocked_domains=["HTTPS://EXAMPLE.COM"],
)
assert seen == {
"url": "http://search.test/search?q=rust+web+search",
"user_agent": "clawd-rust-tools/0.1",
"timeout": 20,
"allow_redirects": True,
}
assert output["query"] == "rust web search"
assert _content_block(output) == [
{"title": "Reqwest docs", "url": "https://docs.rs/reqwest"}
]
assert "Include a Sources section" in output["results"][0]
def test_web_search_decodes_duckduckgo_redirects():
hits = web_search_tool.extract_search_hits(
"""
<a class="result__a"
href="/l/?uddg=https%3A%2F%2Fexample.org%2Fpaper%3Fx%3D1&rut=abc">
Example Paper
</a>
"""
)
assert hits == [
web_search_tool.SearchHit(
title="Example Paper",
url="https://example.org/paper?x=1",
)
]
def test_web_search_generic_fallback_dedupes_and_rejects_bad_base_url(monkeypatch):
def fake_get(url, headers, timeout, allow_redirects):
return _FakeResponse(
"""
<html><body>
<a href="https://example.com/one">Example One</a>
<a href="https://example.com/one">Duplicate Example One</a>
<a href="https://docs.rs/tokio">Tokio Docs</a>
</body></html>
""",
url,
)
monkeypatch.setenv(
web_search_tool.WEB_SEARCH_BASE_URL_ENV, "http://search.test/fallback"
)
monkeypatch.setattr(web_search_tool.requests, "get", fake_get)
output = web_search_tool.execute_web_search("generic links")
assert _content_block(output) == [
{"title": "Example One", "url": "https://example.com/one"},
{"title": "Tokio Docs", "url": "https://docs.rs/tokio"},
]
monkeypatch.setenv(web_search_tool.WEB_SEARCH_BASE_URL_ENV, "://bad-base-url")
with pytest.raises(ValueError):
web_search_tool.execute_web_search("generic links")
@pytest.mark.asyncio
async def test_web_search_handler_returns_pretty_json(monkeypatch):
to_thread_calls = []
async def fake_to_thread(func, /, *args, **kwargs):
to_thread_calls.append((func, args, kwargs))
return func(*args, **kwargs)
monkeypatch.setattr(
web_search_tool,
"execute_web_search",
lambda **kwargs: {
"query": kwargs["query"],
"results": [
"No web search results matched the query 'x'.",
{"content": []},
],
"durationSeconds": 0.1,
},
)
monkeypatch.setattr(web_search_tool.asyncio, "to_thread", fake_to_thread)
text, ok = await web_search_tool.web_search_handler({"query": "x"})
assert ok is False
assert "at least 2 characters" in text
text, ok = await web_search_tool.web_search_handler(
{"query": "valid query"}, tool_call_id="call_123"
)
assert ok is True
parsed = json.loads(text)
assert parsed["query"] == "valid query"
assert to_thread_calls[0][0] is web_search_tool.execute_web_search
assert to_thread_calls[0][2]["tool_use_id"] == "call_123"
text, ok = await web_search_tool.web_search_handler(
{"query": "valid query", "allowed_domains": "docs.rs"}
)
assert ok is False
assert "allowed_domains must be an array of strings" in text
text, ok = await web_search_tool.web_search_handler({"query": None})
assert ok is False
assert "query string" in text
def test_web_search_is_registered_for_llm():
tools = create_builtin_tools(local_mode=True)
specs = {tool.name: tool for tool in tools}
assert "web_search" in specs
assert specs["web_search"].parameters["required"] == ["query"]
|