b2230765034 commited on
Commit
fb5275d
·
1 Parent(s): 1bd7efb

stage3: real-search adapter + integration tests (with httpx mocking)

Browse files
src/agents/__pycache__/research_agent.cpython-313.pyc CHANGED
Binary files a/src/agents/__pycache__/research_agent.cpython-313.pyc and b/src/agents/__pycache__/research_agent.cpython-313.pyc differ
 
src/agents/research_agent.py CHANGED
@@ -8,17 +8,23 @@ from dataclasses import dataclass, field
8
  from typing import Any
9
  from .base import BaseAgent
10
 
 
 
 
 
 
 
11
 
12
  @dataclass
13
  class ResearchAgent(BaseAgent):
14
  """
15
  Agent that performs research/search tasks.
16
 
17
- Currently uses simulated search results.
18
- Will be upgraded to real web search in later stages.
19
  """
20
  role: str = "research"
21
  tools: list[str] = field(default_factory=lambda: ["web_search", "document_fetch"])
 
22
 
23
  async def run(self, input: dict[str, Any]) -> dict[str, Any]:
24
  """
@@ -33,13 +39,20 @@ class ResearchAgent(BaseAgent):
33
  query = input.get("query", "")
34
  self.log(f"Researching: {query}")
35
 
36
- # Simulated search results (will be replaced with real search in Stage 3)
37
- simulated_results = self._simulate_search(query)
 
 
 
 
 
 
 
38
 
39
  return {
40
  "agent": "research",
41
  "query": query,
42
- "results": simulated_results
43
  }
44
 
45
  def _simulate_search(self, query: str) -> list[dict[str, str]]:
 
8
  from typing import Any
9
  from .base import BaseAgent
10
 
11
+ # Import the searcher tool
12
+ import sys
13
+ import os
14
+ sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
15
+ from tools.searcher import search as web_search
16
+
17
 
18
  @dataclass
19
  class ResearchAgent(BaseAgent):
20
  """
21
  Agent that performs research/search tasks.
22
 
23
+ Uses the searcher tool for web search with fallback to simulated results.
 
24
  """
25
  role: str = "research"
26
  tools: list[str] = field(default_factory=lambda: ["web_search", "document_fetch"])
27
+ use_real_search: bool = True # Flag to enable/disable real search
28
 
29
  async def run(self, input: dict[str, Any]) -> dict[str, Any]:
30
  """
 
39
  query = input.get("query", "")
40
  self.log(f"Researching: {query}")
41
 
42
+ # Use real search or simulated based on flag
43
+ if self.use_real_search:
44
+ try:
45
+ search_results = await web_search(query, max_results=5)
46
+ except Exception as e:
47
+ self.log(f"Search failed, using simulation: {e}", level="warning")
48
+ search_results = self._simulate_search(query)
49
+ else:
50
+ search_results = self._simulate_search(query)
51
 
52
  return {
53
  "agent": "research",
54
  "query": query,
55
+ "results": search_results
56
  }
57
 
58
  def _simulate_search(self, query: str) -> list[dict[str, str]]:
src/tools/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Tools module
src/tools/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (160 Bytes). View file
 
src/tools/__pycache__/searcher.cpython-313.pyc ADDED
Binary file (10.3 kB). View file
 
src/tools/searcher.py ADDED
@@ -0,0 +1,257 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Web Search Tool
3
+ ================
4
+ Abstraction layer for web search functionality.
5
+ Supports real search via DuckDuckGo HTML scraping or API services,
6
+ with fallback to simulated results.
7
+ """
8
+
9
+ import os
10
+ import re
11
+ import logging
12
+ from typing import Optional
13
+ from dataclasses import dataclass
14
+
15
+ try:
16
+ import httpx
17
+ HTTPX_AVAILABLE = True
18
+ except ImportError:
19
+ HTTPX_AVAILABLE = False
20
+
21
+ logging.basicConfig(level=logging.INFO)
22
+ logger = logging.getLogger("Searcher")
23
+
24
+
25
+ @dataclass
26
+ class SearchResult:
27
+ """Represents a single search result."""
28
+ title: str
29
+ url: str
30
+ snippet: str
31
+
32
+ def to_dict(self) -> dict[str, str]:
33
+ return {
34
+ "title": self.title,
35
+ "url": self.url,
36
+ "snippet": self.snippet
37
+ }
38
+
39
+
40
+ class SearchConfig:
41
+ """Configuration for search behavior."""
42
+
43
+ # Environment variable for API key (if using paid service)
44
+ SERPER_API_KEY_ENV = "SERPER_API_KEY"
45
+
46
+ # DuckDuckGo HTML endpoint (no API key needed)
47
+ DUCKDUCKGO_HTML_URL = "https://html.duckduckgo.com/html/"
48
+
49
+ # Timeout settings
50
+ REQUEST_TIMEOUT = 10.0
51
+
52
+ # Rate limiting
53
+ MAX_RESULTS = 5
54
+
55
+ @classmethod
56
+ def get_api_key(cls) -> Optional[str]:
57
+ """Get API key from environment if available."""
58
+ return os.environ.get(cls.SERPER_API_KEY_ENV)
59
+
60
+ @classmethod
61
+ def has_api_key(cls) -> bool:
62
+ """Check if API key is configured."""
63
+ return cls.get_api_key() is not None
64
+
65
+
66
+ async def search(query: str, max_results: int = 5) -> list[dict[str, str]]:
67
+ """
68
+ Perform a web search and return results.
69
+
70
+ This function tries multiple search strategies:
71
+ 1. If SERPER_API_KEY is set, use Serper.dev API
72
+ 2. Otherwise, try DuckDuckGo HTML scraping
73
+ 3. If all else fails, return simulated results
74
+
75
+ Args:
76
+ query: The search query string
77
+ max_results: Maximum number of results to return
78
+
79
+ Returns:
80
+ List of search result dictionaries with title, url, snippet
81
+ """
82
+ logger.info(f"Searching for: {query}")
83
+
84
+ # Strategy 1: Try Serper API if configured
85
+ if SearchConfig.has_api_key():
86
+ try:
87
+ results = await _search_serper(query, max_results)
88
+ if results:
89
+ logger.info(f"Serper returned {len(results)} results")
90
+ return results
91
+ except Exception as e:
92
+ logger.warning(f"Serper search failed: {e}")
93
+
94
+ # Strategy 2: Try DuckDuckGo HTML scraping
95
+ if HTTPX_AVAILABLE:
96
+ try:
97
+ results = await _search_duckduckgo(query, max_results)
98
+ if results:
99
+ logger.info(f"DuckDuckGo returned {len(results)} results")
100
+ return results
101
+ except Exception as e:
102
+ logger.warning(f"DuckDuckGo search failed: {e}")
103
+
104
+ # Strategy 3: Fallback to simulated results
105
+ logger.info("Using simulated search results")
106
+ return _simulate_search(query, max_results)
107
+
108
+
109
+ async def _search_serper(query: str, max_results: int) -> list[dict[str, str]]:
110
+ """
111
+ Search using Serper.dev API.
112
+
113
+ Args:
114
+ query: Search query
115
+ max_results: Max results to return
116
+
117
+ Returns:
118
+ List of search results
119
+ """
120
+ if not HTTPX_AVAILABLE:
121
+ raise RuntimeError("httpx not available")
122
+
123
+ api_key = SearchConfig.get_api_key()
124
+ if not api_key:
125
+ raise ValueError("SERPER_API_KEY not set")
126
+
127
+ async with httpx.AsyncClient(timeout=SearchConfig.REQUEST_TIMEOUT) as client:
128
+ response = await client.post(
129
+ "https://google.serper.dev/search",
130
+ headers={
131
+ "X-API-KEY": api_key,
132
+ "Content-Type": "application/json"
133
+ },
134
+ json={"q": query, "num": max_results}
135
+ )
136
+ response.raise_for_status()
137
+ data = response.json()
138
+
139
+ results = []
140
+ for item in data.get("organic", [])[:max_results]:
141
+ results.append({
142
+ "title": item.get("title", ""),
143
+ "url": item.get("link", ""),
144
+ "snippet": item.get("snippet", "")
145
+ })
146
+
147
+ return results
148
+
149
+
150
+ async def _search_duckduckgo(query: str, max_results: int) -> list[dict[str, str]]:
151
+ """
152
+ Search using DuckDuckGo HTML endpoint (no API key needed).
153
+
154
+ Args:
155
+ query: Search query
156
+ max_results: Max results to return
157
+
158
+ Returns:
159
+ List of search results
160
+ """
161
+ if not HTTPX_AVAILABLE:
162
+ raise RuntimeError("httpx not available")
163
+
164
+ async with httpx.AsyncClient(timeout=SearchConfig.REQUEST_TIMEOUT) as client:
165
+ response = await client.post(
166
+ SearchConfig.DUCKDUCKGO_HTML_URL,
167
+ data={"q": query},
168
+ headers={
169
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
170
+ }
171
+ )
172
+ response.raise_for_status()
173
+ html = response.text
174
+
175
+ # Parse results from HTML using regex (simple extraction)
176
+ results = []
177
+
178
+ # Find result blocks
179
+ result_pattern = r'<a[^>]*class="result__a"[^>]*href="([^"]*)"[^>]*>([^<]*)</a>'
180
+ snippet_pattern = r'<a[^>]*class="result__snippet"[^>]*>([^<]*)</a>'
181
+
182
+ urls_titles = re.findall(result_pattern, html)
183
+ snippets = re.findall(snippet_pattern, html)
184
+
185
+ for i, (url, title) in enumerate(urls_titles[:max_results]):
186
+ snippet = snippets[i] if i < len(snippets) else ""
187
+
188
+ # Clean up URL (DuckDuckGo uses redirects)
189
+ if "uddg=" in url:
190
+ url_match = re.search(r'uddg=([^&]+)', url)
191
+ if url_match:
192
+ from urllib.parse import unquote
193
+ url = unquote(url_match.group(1))
194
+
195
+ results.append({
196
+ "title": title.strip(),
197
+ "url": url,
198
+ "snippet": snippet.strip()
199
+ })
200
+
201
+ return results
202
+
203
+
204
+ def _simulate_search(query: str, max_results: int) -> list[dict[str, str]]:
205
+ """
206
+ Generate simulated search results for testing/fallback.
207
+
208
+ Args:
209
+ query: Search query
210
+ max_results: Max results to return
211
+
212
+ Returns:
213
+ List of simulated search results
214
+ """
215
+ base_results = [
216
+ {
217
+ "title": f"Research findings on {query}",
218
+ "url": f"https://research.example.com/{query.replace(' ', '-')}",
219
+ "snippet": f"Comprehensive research and analysis on {query}. "
220
+ f"Expert insights and latest developments."
221
+ },
222
+ {
223
+ "title": f"Understanding {query}: A Complete Guide",
224
+ "url": f"https://guide.example.org/{query.replace(' ', '-')}",
225
+ "snippet": f"Everything you need to know about {query}. "
226
+ f"Detailed explanations and practical examples."
227
+ },
228
+ {
229
+ "title": f"Latest developments in {query}",
230
+ "url": f"https://news.example.com/topics/{query.replace(' ', '-')}",
231
+ "snippet": f"Stay updated with the latest news about {query}. "
232
+ f"Breaking stories and expert commentary."
233
+ },
234
+ {
235
+ "title": f"{query} - Academic perspectives",
236
+ "url": f"https://academic.example.edu/{query.replace(' ', '-')}",
237
+ "snippet": f"Academic research and peer-reviewed studies on {query}. "
238
+ f"Citations and methodology included."
239
+ },
240
+ {
241
+ "title": f"Practical applications of {query}",
242
+ "url": f"https://apply.example.io/{query.replace(' ', '-')}",
243
+ "snippet": f"How to apply {query} in real-world scenarios. "
244
+ f"Case studies and implementation guides."
245
+ }
246
+ ]
247
+
248
+ return base_results[:max_results]
249
+
250
+
251
+ # Synchronous wrapper for non-async contexts
252
+ def search_sync(query: str, max_results: int = 5) -> list[dict[str, str]]:
253
+ """
254
+ Synchronous version of search for non-async contexts.
255
+ Falls back to simulated results.
256
+ """
257
+ return _simulate_search(query, max_results)
tests/__pycache__/test_search_integration.cpython-313-pytest-9.0.1.pyc ADDED
Binary file (29.4 kB). View file
 
tests/test_search_integration.py ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Search Integration Tests
3
+ =========================
4
+ Tests for the web search functionality with mocking.
5
+ """
6
+
7
+ import pytest
8
+ import sys
9
+ import os
10
+ from pathlib import Path
11
+ from unittest.mock import AsyncMock, patch, MagicMock
12
+
13
+ # Add src to path
14
+ sys.path.insert(0, str(Path(__file__).parent.parent / 'src'))
15
+
16
+ from tools.searcher import (
17
+ search,
18
+ _simulate_search,
19
+ _search_duckduckgo,
20
+ SearchConfig
21
+ )
22
+ from agents.research_agent import ResearchAgent
23
+ from orchestrator import Orchestrator
24
+
25
+
26
+ class TestSearcher:
27
+ """Tests for the searcher module."""
28
+
29
+ def test_simulate_search_returns_results(self):
30
+ """Test that simulated search returns valid results."""
31
+ results = _simulate_search("AI healthcare", max_results=3)
32
+
33
+ assert isinstance(results, list)
34
+ assert len(results) == 3
35
+
36
+ for result in results:
37
+ assert "title" in result
38
+ assert "url" in result
39
+ assert "snippet" in result
40
+ assert len(result["title"]) > 0
41
+ assert len(result["url"]) > 0
42
+
43
+ def test_simulate_search_respects_max_results(self):
44
+ """Test that max_results parameter is respected."""
45
+ results = _simulate_search("test query", max_results=2)
46
+ assert len(results) == 2
47
+
48
+ results = _simulate_search("test query", max_results=5)
49
+ assert len(results) == 5
50
+
51
+ @pytest.mark.asyncio
52
+ async def test_search_fallback_to_simulation(self):
53
+ """Test that search falls back to simulation when real search fails."""
54
+ # Without API key and with mocked failed HTTP, should fallback
55
+ with patch.dict(os.environ, {}, clear=True):
56
+ # Mock httpx to raise an exception
57
+ with patch('tools.searcher._search_duckduckgo', new_callable=AsyncMock) as mock_ddg:
58
+ mock_ddg.side_effect = Exception("Network error")
59
+
60
+ results = await search("test query")
61
+
62
+ assert isinstance(results, list)
63
+ assert len(results) > 0
64
+ # Should have simulated results
65
+ assert "example" in results[0]["url"].lower()
66
+
67
+
68
+ class TestSearcherWithMockedHTTP:
69
+ """Tests with mocked HTTP responses."""
70
+
71
+ @pytest.mark.asyncio
72
+ async def test_search_with_mocked_response(self):
73
+ """Test search with a mocked successful HTTP response."""
74
+ mock_results = [
75
+ {"title": "Mocked Result 1", "url": "https://mock.com/1", "snippet": "Mocked snippet 1"},
76
+ {"title": "Mocked Result 2", "url": "https://mock.com/2", "snippet": "Mocked snippet 2"}
77
+ ]
78
+
79
+ with patch('tools.searcher._search_duckduckgo', new_callable=AsyncMock) as mock_ddg:
80
+ mock_ddg.return_value = mock_results
81
+
82
+ results = await search("mocked query")
83
+
84
+ assert results == mock_results
85
+ mock_ddg.assert_called_once()
86
+
87
+ @pytest.mark.asyncio
88
+ async def test_research_agent_uses_searcher(self):
89
+ """Test that ResearchAgent properly uses the searcher."""
90
+ mock_results = [
91
+ {"title": "Agent Search Result", "url": "https://agent.test/1", "snippet": "Test snippet"}
92
+ ]
93
+
94
+ with patch('agents.research_agent.web_search', new_callable=AsyncMock) as mock_search:
95
+ mock_search.return_value = mock_results
96
+
97
+ agent = ResearchAgent(use_real_search=True)
98
+ result = await agent.run({"query": "test medical AI"})
99
+
100
+ assert result["agent"] == "research"
101
+ assert result["query"] == "test medical AI"
102
+ assert result["results"] == mock_results
103
+ mock_search.assert_called_once_with("test medical AI", max_results=5)
104
+
105
+ @pytest.mark.asyncio
106
+ async def test_research_agent_fallback_on_error(self):
107
+ """Test that ResearchAgent falls back to simulation on error."""
108
+ with patch('agents.research_agent.web_search', new_callable=AsyncMock) as mock_search:
109
+ mock_search.side_effect = Exception("Search service unavailable")
110
+
111
+ agent = ResearchAgent(use_real_search=True)
112
+ result = await agent.run({"query": "fallback test"})
113
+
114
+ # Should still return results (from simulation)
115
+ assert result["agent"] == "research"
116
+ assert isinstance(result["results"], list)
117
+ assert len(result["results"]) > 0
118
+
119
+
120
+ class TestOrchestratorWithSearchIntegration:
121
+ """Integration tests for orchestrator with search."""
122
+
123
+ @pytest.mark.asyncio
124
+ async def test_orchestrator_with_mocked_search(self):
125
+ """Test full orchestrator pipeline with mocked search."""
126
+ mock_results = [
127
+ {"title": "Orchestrator Test", "url": "https://test.com", "snippet": "Integration test"}
128
+ ]
129
+
130
+ with patch('agents.research_agent.web_search', new_callable=AsyncMock) as mock_search:
131
+ mock_search.return_value = mock_results
132
+
133
+ orchestrator = Orchestrator()
134
+ result = await orchestrator.run_task({"query": "integration test"})
135
+
136
+ assert "steps" in result
137
+ assert len(result["steps"]) >= 2
138
+
139
+ # First step should be research
140
+ research_step = result["steps"][0]
141
+ assert research_step["agent"] == "research"
142
+ assert research_step["output"]["results"] == mock_results
143
+
144
+ @pytest.mark.asyncio
145
+ async def test_orchestrator_produces_merkle_hashes(self):
146
+ """Test that orchestrator produces valid hashes for each step."""
147
+ orchestrator = Orchestrator()
148
+ result = await orchestrator.run_task({"query": "hash test"})
149
+
150
+ for step in result["steps"]:
151
+ assert "hash" in step
152
+ assert len(step["hash"]) == 64 # SHA256 hex length
153
+ assert all(c in '0123456789abcdef' for c in step["hash"])
154
+
155
+
156
+ class TestSearchConfig:
157
+ """Tests for search configuration."""
158
+
159
+ def test_config_reads_env_variable(self):
160
+ """Test that config properly reads API key from environment."""
161
+ test_key = "test_api_key_12345"
162
+
163
+ with patch.dict(os.environ, {"SERPER_API_KEY": test_key}):
164
+ assert SearchConfig.get_api_key() == test_key
165
+ assert SearchConfig.has_api_key() is True
166
+
167
+ def test_config_returns_none_when_no_key(self):
168
+ """Test that config returns None when no API key is set."""
169
+ with patch.dict(os.environ, {}, clear=True):
170
+ # Remove the key if it exists
171
+ os.environ.pop("SERPER_API_KEY", None)
172
+ assert SearchConfig.get_api_key() is None
173
+ assert SearchConfig.has_api_key() is False
174
+
175
+
176
+ if __name__ == "__main__":
177
+ pytest.main([__file__, "-v"])