Aksel Joonas Reedi commited on
Commit
72f615f
·
unverified ·
1 Parent(s): bce8a45

Add web search so the agent can cite current sources (#159)

Browse files

The agent had research tools for papers, docs, and repositories, but no direct current-web lookup. This ports Claw Code's WebSearch behavior into a Python tool that searches DuckDuckGo HTML, extracts citeable title/URL hits, applies domain filters, and returns the same JSON-shaped result payload for model consumption.

Constraint: The implementation was prepared in a writable checkout because the primary working tree was sandbox read-only during this session.

Rejected: Add a new search API dependency | the Claw implementation uses the DuckDuckGo HTML endpoint and the repo already has requests.

Confidence: medium

Scope-risk: narrow

Directive: Keep the output schema stable unless the agent prompt/tool consumers are updated together.

Tested: pytest tests/unit/test_web_search_tool.py tests/unit/test_malformed_args_recovery.py -q

Tested: python -m compileall agent/tools/web_search_tool.py agent/core/tools.py agent/tools/research_tool.py agent/tools/__init__.py tests/unit/test_web_search_tool.py

Not-tested: Live DuckDuckGo network response beyond mocked HTML parser coverage.

agent/core/tools.py CHANGED
@@ -51,6 +51,7 @@ from agent.tools.papers_tool import HF_PAPERS_TOOL_SPEC, hf_papers_handler
51
  from agent.tools.plan_tool import PLAN_TOOL_SPEC, plan_tool_handler
52
  from agent.tools.research_tool import RESEARCH_TOOL_SPEC, research_handler
53
  from agent.tools.sandbox_tool import get_sandbox_tools
 
54
 
55
  # NOTE: Private HF repo tool disabled - replaced by hf_repo_files and hf_repo_git
56
  # from agent.tools.private_hf_repo_tools import (
@@ -311,6 +312,12 @@ def create_builtin_tools(local_mode: bool = False) -> list[ToolSpec]:
311
  parameters=HF_PAPERS_TOOL_SPEC["parameters"],
312
  handler=hf_papers_handler,
313
  ),
 
 
 
 
 
 
314
  # Dataset inspection tool (unified)
315
  ToolSpec(
316
  name=HF_INSPECT_DATASET_TOOL_SPEC["name"],
 
51
  from agent.tools.plan_tool import PLAN_TOOL_SPEC, plan_tool_handler
52
  from agent.tools.research_tool import RESEARCH_TOOL_SPEC, research_handler
53
  from agent.tools.sandbox_tool import get_sandbox_tools
54
+ from agent.tools.web_search_tool import WEB_SEARCH_TOOL_SPEC, web_search_handler
55
 
56
  # NOTE: Private HF repo tool disabled - replaced by hf_repo_files and hf_repo_git
57
  # from agent.tools.private_hf_repo_tools import (
 
312
  parameters=HF_PAPERS_TOOL_SPEC["parameters"],
313
  handler=hf_papers_handler,
314
  ),
315
+ ToolSpec(
316
+ name=WEB_SEARCH_TOOL_SPEC["name"],
317
+ description=WEB_SEARCH_TOOL_SPEC["description"],
318
+ parameters=WEB_SEARCH_TOOL_SPEC["parameters"],
319
+ handler=web_search_handler,
320
+ ),
321
  # Dataset inspection tool (unified)
322
  ToolSpec(
323
  name=HF_INSPECT_DATASET_TOOL_SPEC["name"],
agent/tools/__init__.py CHANGED
@@ -20,6 +20,7 @@ from agent.tools.github_read_file import (
20
  )
21
  from agent.tools.jobs_tool import HF_JOBS_TOOL_SPEC, HfJobsTool, hf_jobs_handler
22
  from agent.tools.types import ToolResult
 
23
 
24
  __all__ = [
25
  "ToolResult",
@@ -36,4 +37,6 @@ __all__ = [
36
  "github_search_code_handler",
37
  "HF_INSPECT_DATASET_TOOL_SPEC",
38
  "hf_inspect_dataset_handler",
 
 
39
  ]
 
20
  )
21
  from agent.tools.jobs_tool import HF_JOBS_TOOL_SPEC, HfJobsTool, hf_jobs_handler
22
  from agent.tools.types import ToolResult
23
+ from agent.tools.web_search_tool import WEB_SEARCH_TOOL_SPEC, web_search_handler
24
 
25
  __all__ = [
26
  "ToolResult",
 
37
  "github_search_code_handler",
38
  "HF_INSPECT_DATASET_TOOL_SPEC",
39
  "hf_inspect_dataset_handler",
40
+ "WEB_SEARCH_TOOL_SPEC",
41
+ "web_search_handler",
42
  ]
agent/tools/research_tool.py CHANGED
@@ -37,6 +37,7 @@ RESEARCH_TOOL_NAMES = {
37
  "github_find_examples",
38
  "github_list_repos",
39
  "github_read_file",
 
40
  "hf_inspect_dataset",
41
  "hf_repo_files",
42
  }
@@ -102,6 +103,8 @@ tell you what actually works.
102
  - `explore_hf_docs(endpoint)`: Search docs for a library. Endpoints: trl, transformers, datasets, peft, accelerate, trackio, vllm, inference-endpoints, etc.
103
  - `fetch_hf_docs(url)`: Fetch full page content from explore results
104
  - `find_hf_api(query=..., tag=...)`: Find REST API endpoints
 
 
105
 
106
  ## Hub repo inspection
107
  - `hf_repo_files`: List/read files in any HF repo (model, dataset, space)
@@ -426,7 +429,7 @@ async def research_handler(
426
  await _log(f"▸ {tool_name} {args_str}")
427
 
428
  output, _success = await session.tool_router.call_tool(
429
- tool_name, tool_args, session=session
430
  )
431
  _tool_uses += 1
432
  await _log(f"tools:{_tool_uses}")
 
37
  "github_find_examples",
38
  "github_list_repos",
39
  "github_read_file",
40
+ "web_search",
41
  "hf_inspect_dataset",
42
  "hf_repo_files",
43
  }
 
103
  - `explore_hf_docs(endpoint)`: Search docs for a library. Endpoints: trl, transformers, datasets, peft, accelerate, trackio, vllm, inference-endpoints, etc.
104
  - `fetch_hf_docs(url)`: Fetch full page content from explore results
105
  - `find_hf_api(query=..., tag=...)`: Find REST API endpoints
106
+ - `web_search(query=..., allowed_domains=[...], blocked_domains=[...])`:
107
+ Search the current web when papers/docs/GitHub are not enough.
108
 
109
  ## Hub repo inspection
110
  - `hf_repo_files`: List/read files in any HF repo (model, dataset, space)
 
429
  await _log(f"▸ {tool_name} {args_str}")
430
 
431
  output, _success = await session.tool_router.call_tool(
432
+ tool_name, tool_args, session=session, tool_call_id=tc.id
433
  )
434
  _tool_uses += 1
435
  await _log(f"tools:{_tool_uses}")
agent/tools/web_search_tool.py ADDED
@@ -0,0 +1,273 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """DuckDuckGo HTML web search tool.
2
+
3
+ This mirrors Claw Code's Rust WebSearch behavior: fetch DuckDuckGo's HTML
4
+ endpoint, extract result links, optionally filter domains, and return a
5
+ JSON payload the model can cite.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import asyncio
11
+ import html
12
+ import json
13
+ import os
14
+ import time
15
+ from dataclasses import dataclass
16
+ from html.parser import HTMLParser
17
+ from typing import Any
18
+ from urllib.parse import parse_qsl, parse_qs, urlencode, urlparse, urlunparse
19
+
20
+ import requests
21
+
22
+ DEFAULT_SEARCH_URL = "https://html.duckduckgo.com/html/"
23
+ WEB_SEARCH_BASE_URL_ENV = "CLAWD_WEB_SEARCH_BASE_URL"
24
+ USER_AGENT = "clawd-rust-tools/0.1"
25
+ REQUEST_TIMEOUT_SECONDS = 20
26
+ MAX_RESULTS = 8
27
+
28
+
29
+ @dataclass(frozen=True)
30
+ class SearchHit:
31
+ title: str
32
+ url: str
33
+
34
+ def as_json(self) -> dict[str, str]:
35
+ return {"title": self.title, "url": self.url}
36
+
37
+
38
+ class _AnchorParser(HTMLParser):
39
+ def __init__(self, *, require_result_class: bool) -> None:
40
+ super().__init__(convert_charrefs=True)
41
+ self.require_result_class = require_result_class
42
+ self.hits: list[tuple[str, str]] = []
43
+ self._active_href: str | None = None
44
+ self._active_text: list[str] = []
45
+
46
+ def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
47
+ if tag.lower() != "a":
48
+ return
49
+ attr_map = {key.lower(): value or "" for key, value in attrs}
50
+ href = attr_map.get("href")
51
+ if not href:
52
+ return
53
+ if self.require_result_class and "result__a" not in attr_map.get("class", ""):
54
+ return
55
+ self._active_href = href
56
+ self._active_text = []
57
+
58
+ def handle_data(self, data: str) -> None:
59
+ if self._active_href is not None:
60
+ self._active_text.append(data)
61
+
62
+ def handle_entityref(self, name: str) -> None:
63
+ if self._active_href is not None:
64
+ self._active_text.append(f"&{name};")
65
+
66
+ def handle_charref(self, name: str) -> None:
67
+ if self._active_href is not None:
68
+ self._active_text.append(f"&#{name};")
69
+
70
+ def handle_endtag(self, tag: str) -> None:
71
+ if tag.lower() != "a" or self._active_href is None:
72
+ return
73
+ title = collapse_whitespace(html.unescape("".join(self._active_text))).strip()
74
+ self.hits.append((self._active_href, title))
75
+ self._active_href = None
76
+ self._active_text = []
77
+
78
+
79
+ def build_search_url(query: str) -> str:
80
+ base = os.environ.get(WEB_SEARCH_BASE_URL_ENV, DEFAULT_SEARCH_URL)
81
+ parsed = urlparse(base)
82
+ if parsed.scheme not in {"http", "https"} or not parsed.netloc:
83
+ raise ValueError(f"invalid search base URL: {base}")
84
+
85
+ query_pairs = parse_qsl(parsed.query, keep_blank_values=True)
86
+ query_pairs.append(("q", query))
87
+ return urlunparse(parsed._replace(query=urlencode(query_pairs)))
88
+
89
+
90
+ def collapse_whitespace(value: str) -> str:
91
+ return " ".join(value.split())
92
+
93
+
94
+ def decode_duckduckgo_redirect(url: str) -> str | None:
95
+ if url.startswith("http://") or url.startswith("https://"):
96
+ return html.unescape(url)
97
+ if url.startswith("//"):
98
+ joined = f"https:{url}"
99
+ elif url.startswith("/"):
100
+ joined = f"https://duckduckgo.com{url}"
101
+ else:
102
+ return None
103
+
104
+ parsed = urlparse(joined)
105
+ if parsed.path in {"/l", "/l/"}:
106
+ uddg = parse_qs(parsed.query).get("uddg", [])
107
+ if uddg:
108
+ return html.unescape(uddg[0])
109
+ return joined
110
+
111
+
112
+ def _extract_links(search_html: str, *, require_result_class: bool) -> list[SearchHit]:
113
+ parser = _AnchorParser(require_result_class=require_result_class)
114
+ parser.feed(search_html)
115
+
116
+ hits: list[SearchHit] = []
117
+ for raw_url, title in parser.hits:
118
+ if not title:
119
+ continue
120
+ decoded_url = decode_duckduckgo_redirect(raw_url)
121
+ if decoded_url and (
122
+ decoded_url.startswith("http://") or decoded_url.startswith("https://")
123
+ ):
124
+ hits.append(SearchHit(title=title, url=decoded_url))
125
+ return hits
126
+
127
+
128
+ def extract_search_hits(search_html: str) -> list[SearchHit]:
129
+ return _extract_links(search_html, require_result_class=True)
130
+
131
+
132
+ def extract_search_hits_from_generic_links(search_html: str) -> list[SearchHit]:
133
+ return _extract_links(search_html, require_result_class=False)
134
+
135
+
136
+ def normalize_domain_filter(domain: str) -> str:
137
+ trimmed = domain.strip()
138
+ parsed = urlparse(trimmed)
139
+ candidate = parsed.hostname if parsed.scheme and parsed.hostname else trimmed
140
+ return candidate.strip().lstrip(".").rstrip("/").lower()
141
+
142
+
143
+ def host_matches_list(url: str, domains: list[str]) -> bool:
144
+ host = urlparse(url).hostname
145
+ if not host:
146
+ return False
147
+ normalized_host = host.lower()
148
+ for domain in domains:
149
+ normalized = normalize_domain_filter(domain)
150
+ if normalized and (
151
+ normalized_host == normalized or normalized_host.endswith(f".{normalized}")
152
+ ):
153
+ return True
154
+ return False
155
+
156
+
157
+ def dedupe_hits(hits: list[SearchHit]) -> list[SearchHit]:
158
+ seen: set[str] = set()
159
+ deduped: list[SearchHit] = []
160
+ for hit in hits:
161
+ if hit.url in seen:
162
+ continue
163
+ seen.add(hit.url)
164
+ deduped.append(hit)
165
+ return deduped
166
+
167
+
168
+ def execute_web_search(
169
+ query: str,
170
+ allowed_domains: list[str] | None = None,
171
+ blocked_domains: list[str] | None = None,
172
+ tool_use_id: str = "web_search_1",
173
+ ) -> dict[str, Any]:
174
+ started = time.monotonic()
175
+ search_url = build_search_url(query)
176
+ response = requests.get(
177
+ search_url,
178
+ headers={"User-Agent": USER_AGENT},
179
+ timeout=REQUEST_TIMEOUT_SECONDS,
180
+ allow_redirects=True,
181
+ )
182
+
183
+ hits = extract_search_hits(response.text)
184
+ if not hits and urlparse(response.url or search_url).hostname:
185
+ hits = extract_search_hits_from_generic_links(response.text)
186
+
187
+ if allowed_domains is not None:
188
+ hits = [hit for hit in hits if host_matches_list(hit.url, allowed_domains)]
189
+ if blocked_domains is not None:
190
+ hits = [hit for hit in hits if not host_matches_list(hit.url, blocked_domains)]
191
+
192
+ hits = dedupe_hits(hits)[:MAX_RESULTS]
193
+ rendered_hits = "\n".join(f"- [{hit.title}]({hit.url})" for hit in hits)
194
+ if hits:
195
+ summary = (
196
+ f"Search results for {query!r}. Include a Sources section in the final answer.\n"
197
+ f"{rendered_hits}"
198
+ )
199
+ else:
200
+ summary = f"No web search results matched the query {query!r}."
201
+
202
+ return {
203
+ "query": query,
204
+ "results": [
205
+ summary,
206
+ {
207
+ "tool_use_id": tool_use_id,
208
+ "content": [hit.as_json() for hit in hits],
209
+ },
210
+ ],
211
+ "durationSeconds": time.monotonic() - started,
212
+ }
213
+
214
+
215
+ WEB_SEARCH_TOOL_SPEC = {
216
+ "name": "web_search",
217
+ "description": "Search the web for current information and return cited results.",
218
+ "parameters": {
219
+ "type": "object",
220
+ "properties": {
221
+ "query": {"type": "string", "minLength": 2},
222
+ "allowed_domains": {
223
+ "type": "array",
224
+ "items": {"type": "string"},
225
+ "description": "Optional allowlist of domains or URLs. Subdomains match.",
226
+ },
227
+ "blocked_domains": {
228
+ "type": "array",
229
+ "items": {"type": "string"},
230
+ "description": "Optional blocklist of domains or URLs. Subdomains match.",
231
+ },
232
+ },
233
+ "required": ["query"],
234
+ "additionalProperties": False,
235
+ },
236
+ }
237
+
238
+
239
+ def _optional_string_list(arguments: dict[str, Any], key: str) -> list[str] | None:
240
+ value = arguments.get(key)
241
+ if value is None:
242
+ return None
243
+ if not isinstance(value, list) or not all(isinstance(item, str) for item in value):
244
+ raise ValueError(f"{key} must be an array of strings")
245
+ return value
246
+
247
+
248
+ async def web_search_handler(
249
+ arguments: dict[str, Any],
250
+ session: Any = None,
251
+ tool_call_id: str | None = None,
252
+ **_kw: Any,
253
+ ) -> tuple[str, bool]:
254
+ query_value = arguments.get("query", "")
255
+ if not isinstance(query_value, str):
256
+ return "Error: web_search requires a query string with at least 2 characters.", False
257
+
258
+ query = query_value.strip()
259
+ if len(query) < 2:
260
+ return "Error: web_search requires a query with at least 2 characters.", False
261
+
262
+ try:
263
+ output = await asyncio.to_thread(
264
+ execute_web_search,
265
+ query=query,
266
+ allowed_domains=_optional_string_list(arguments, "allowed_domains"),
267
+ blocked_domains=_optional_string_list(arguments, "blocked_domains"),
268
+ tool_use_id=tool_call_id or "web_search_1",
269
+ )
270
+ except Exception as exc:
271
+ return f"Error executing web search: {exc}", False
272
+
273
+ return json.dumps(output, indent=2), True
tests/unit/test_web_search_tool.py ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+
3
+ import pytest
4
+
5
+ from agent.core.tools import create_builtin_tools
6
+ from agent.tools import web_search_tool
7
+
8
+
9
+ class _FakeResponse:
10
+ def __init__(self, text: str, url: str = "https://html.duckduckgo.com/html/?q=x"):
11
+ self.text = text
12
+ self.url = url
13
+
14
+
15
+ def _content_block(output: dict):
16
+ return next(item for item in output["results"] if isinstance(item, dict))["content"]
17
+
18
+
19
+ def test_web_search_extracts_duckduckgo_results_and_filters_domains(monkeypatch):
20
+ seen = {}
21
+
22
+ def fake_get(url, headers, timeout, allow_redirects):
23
+ seen.update(
24
+ {
25
+ "url": url,
26
+ "user_agent": headers["User-Agent"],
27
+ "timeout": timeout,
28
+ "allow_redirects": allow_redirects,
29
+ }
30
+ )
31
+ return _FakeResponse(
32
+ """
33
+ <html><body>
34
+ <a class="result__a" href="https://docs.rs/reqwest">Reqwest docs</a>
35
+ <a class="result__a" href="https://example.com/blocked">Blocked result</a>
36
+ </body></html>
37
+ """,
38
+ url,
39
+ )
40
+
41
+ monkeypatch.setenv(web_search_tool.WEB_SEARCH_BASE_URL_ENV, "http://search.test/search")
42
+ monkeypatch.setattr(web_search_tool.requests, "get", fake_get)
43
+
44
+ output = web_search_tool.execute_web_search(
45
+ "rust web search",
46
+ allowed_domains=["https://DOCS.rs/"],
47
+ blocked_domains=["HTTPS://EXAMPLE.COM"],
48
+ )
49
+
50
+ assert seen == {
51
+ "url": "http://search.test/search?q=rust+web+search",
52
+ "user_agent": "clawd-rust-tools/0.1",
53
+ "timeout": 20,
54
+ "allow_redirects": True,
55
+ }
56
+ assert output["query"] == "rust web search"
57
+ assert _content_block(output) == [
58
+ {"title": "Reqwest docs", "url": "https://docs.rs/reqwest"}
59
+ ]
60
+ assert "Include a Sources section" in output["results"][0]
61
+
62
+
63
+ def test_web_search_decodes_duckduckgo_redirects():
64
+ hits = web_search_tool.extract_search_hits(
65
+ """
66
+ <a class="result__a"
67
+ href="/l/?uddg=https%3A%2F%2Fexample.org%2Fpaper%3Fx%3D1&amp;rut=abc">
68
+ Example Paper
69
+ </a>
70
+ """
71
+ )
72
+
73
+ assert hits == [
74
+ web_search_tool.SearchHit(
75
+ title="Example Paper",
76
+ url="https://example.org/paper?x=1",
77
+ )
78
+ ]
79
+
80
+
81
+ def test_web_search_generic_fallback_dedupes_and_rejects_bad_base_url(monkeypatch):
82
+ def fake_get(url, headers, timeout, allow_redirects):
83
+ return _FakeResponse(
84
+ """
85
+ <html><body>
86
+ <a href="https://example.com/one">Example One</a>
87
+ <a href="https://example.com/one">Duplicate Example One</a>
88
+ <a href="https://docs.rs/tokio">Tokio Docs</a>
89
+ </body></html>
90
+ """,
91
+ url,
92
+ )
93
+
94
+ monkeypatch.setenv(web_search_tool.WEB_SEARCH_BASE_URL_ENV, "http://search.test/fallback")
95
+ monkeypatch.setattr(web_search_tool.requests, "get", fake_get)
96
+
97
+ output = web_search_tool.execute_web_search("generic links")
98
+
99
+ assert _content_block(output) == [
100
+ {"title": "Example One", "url": "https://example.com/one"},
101
+ {"title": "Tokio Docs", "url": "https://docs.rs/tokio"},
102
+ ]
103
+
104
+ monkeypatch.setenv(web_search_tool.WEB_SEARCH_BASE_URL_ENV, "://bad-base-url")
105
+ with pytest.raises(ValueError):
106
+ web_search_tool.execute_web_search("generic links")
107
+
108
+
109
+ @pytest.mark.asyncio
110
+ async def test_web_search_handler_returns_pretty_json(monkeypatch):
111
+ to_thread_calls = []
112
+
113
+ async def fake_to_thread(func, /, *args, **kwargs):
114
+ to_thread_calls.append((func, args, kwargs))
115
+ return func(*args, **kwargs)
116
+
117
+ monkeypatch.setattr(
118
+ web_search_tool,
119
+ "execute_web_search",
120
+ lambda **kwargs: {
121
+ "query": kwargs["query"],
122
+ "results": ["No web search results matched the query 'x'.", {"content": []}],
123
+ "durationSeconds": 0.1,
124
+ },
125
+ )
126
+ monkeypatch.setattr(web_search_tool.asyncio, "to_thread", fake_to_thread)
127
+
128
+ text, ok = await web_search_tool.web_search_handler({"query": "x"})
129
+
130
+ assert ok is False
131
+ assert "at least 2 characters" in text
132
+
133
+ text, ok = await web_search_tool.web_search_handler(
134
+ {"query": "valid query"}, tool_call_id="call_123"
135
+ )
136
+
137
+ assert ok is True
138
+ parsed = json.loads(text)
139
+ assert parsed["query"] == "valid query"
140
+ assert to_thread_calls[0][0] is web_search_tool.execute_web_search
141
+ assert to_thread_calls[0][2]["tool_use_id"] == "call_123"
142
+
143
+ text, ok = await web_search_tool.web_search_handler(
144
+ {"query": "valid query", "allowed_domains": "docs.rs"}
145
+ )
146
+
147
+ assert ok is False
148
+ assert "allowed_domains must be an array of strings" in text
149
+
150
+ text, ok = await web_search_tool.web_search_handler({"query": None})
151
+
152
+ assert ok is False
153
+ assert "query string" in text
154
+
155
+
156
+ def test_web_search_is_registered_for_llm():
157
+ tools = create_builtin_tools(local_mode=True)
158
+ specs = {tool.name: tool for tool in tools}
159
+
160
+ assert "web_search" in specs
161
+ assert specs["web_search"].parameters["required"] == ["query"]