File size: 5,156 Bytes
72f615f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
754345f
 
 
72f615f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
754345f
 
 
72f615f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
754345f
 
 
 
72f615f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
import json

import pytest

from agent.core.tools import create_builtin_tools
from agent.tools import web_search_tool


class _FakeResponse:
    def __init__(self, text: str, url: str = "https://html.duckduckgo.com/html/?q=x"):
        self.text = text
        self.url = url


def _content_block(output: dict):
    return next(item for item in output["results"] if isinstance(item, dict))["content"]


def test_web_search_extracts_duckduckgo_results_and_filters_domains(monkeypatch):
    seen = {}

    def fake_get(url, headers, timeout, allow_redirects):
        seen.update(
            {
                "url": url,
                "user_agent": headers["User-Agent"],
                "timeout": timeout,
                "allow_redirects": allow_redirects,
            }
        )
        return _FakeResponse(
            """
            <html><body>
              <a class="result__a" href="https://docs.rs/reqwest">Reqwest docs</a>
              <a class="result__a" href="https://example.com/blocked">Blocked result</a>
            </body></html>
            """,
            url,
        )

    monkeypatch.setenv(
        web_search_tool.WEB_SEARCH_BASE_URL_ENV, "http://search.test/search"
    )
    monkeypatch.setattr(web_search_tool.requests, "get", fake_get)

    output = web_search_tool.execute_web_search(
        "rust web search",
        allowed_domains=["https://DOCS.rs/"],
        blocked_domains=["HTTPS://EXAMPLE.COM"],
    )

    assert seen == {
        "url": "http://search.test/search?q=rust+web+search",
        "user_agent": "clawd-rust-tools/0.1",
        "timeout": 20,
        "allow_redirects": True,
    }
    assert output["query"] == "rust web search"
    assert _content_block(output) == [
        {"title": "Reqwest docs", "url": "https://docs.rs/reqwest"}
    ]
    assert "Include a Sources section" in output["results"][0]


def test_web_search_decodes_duckduckgo_redirects():
    hits = web_search_tool.extract_search_hits(
        """
        <a class="result__a"
           href="/l/?uddg=https%3A%2F%2Fexample.org%2Fpaper%3Fx%3D1&amp;rut=abc">
          Example Paper
        </a>
        """
    )

    assert hits == [
        web_search_tool.SearchHit(
            title="Example Paper",
            url="https://example.org/paper?x=1",
        )
    ]


def test_web_search_generic_fallback_dedupes_and_rejects_bad_base_url(monkeypatch):
    def fake_get(url, headers, timeout, allow_redirects):
        return _FakeResponse(
            """
            <html><body>
              <a href="https://example.com/one">Example One</a>
              <a href="https://example.com/one">Duplicate Example One</a>
              <a href="https://docs.rs/tokio">Tokio Docs</a>
            </body></html>
            """,
            url,
        )

    monkeypatch.setenv(
        web_search_tool.WEB_SEARCH_BASE_URL_ENV, "http://search.test/fallback"
    )
    monkeypatch.setattr(web_search_tool.requests, "get", fake_get)

    output = web_search_tool.execute_web_search("generic links")

    assert _content_block(output) == [
        {"title": "Example One", "url": "https://example.com/one"},
        {"title": "Tokio Docs", "url": "https://docs.rs/tokio"},
    ]

    monkeypatch.setenv(web_search_tool.WEB_SEARCH_BASE_URL_ENV, "://bad-base-url")
    with pytest.raises(ValueError):
        web_search_tool.execute_web_search("generic links")


@pytest.mark.asyncio
async def test_web_search_handler_returns_pretty_json(monkeypatch):
    to_thread_calls = []

    async def fake_to_thread(func, /, *args, **kwargs):
        to_thread_calls.append((func, args, kwargs))
        return func(*args, **kwargs)

    monkeypatch.setattr(
        web_search_tool,
        "execute_web_search",
        lambda **kwargs: {
            "query": kwargs["query"],
            "results": [
                "No web search results matched the query 'x'.",
                {"content": []},
            ],
            "durationSeconds": 0.1,
        },
    )
    monkeypatch.setattr(web_search_tool.asyncio, "to_thread", fake_to_thread)

    text, ok = await web_search_tool.web_search_handler({"query": "x"})

    assert ok is False
    assert "at least 2 characters" in text

    text, ok = await web_search_tool.web_search_handler(
        {"query": "valid query"}, tool_call_id="call_123"
    )

    assert ok is True
    parsed = json.loads(text)
    assert parsed["query"] == "valid query"
    assert to_thread_calls[0][0] is web_search_tool.execute_web_search
    assert to_thread_calls[0][2]["tool_use_id"] == "call_123"

    text, ok = await web_search_tool.web_search_handler(
        {"query": "valid query", "allowed_domains": "docs.rs"}
    )

    assert ok is False
    assert "allowed_domains must be an array of strings" in text

    text, ok = await web_search_tool.web_search_handler({"query": None})

    assert ok is False
    assert "query string" in text


def test_web_search_is_registered_for_llm():
    tools = create_builtin_tools(local_mode=True)
    specs = {tool.name: tool for tool in tools}

    assert "web_search" in specs
    assert specs["web_search"].parameters["required"] == ["query"]