Spaces:
Sleeping
Sleeping
b2230765034 commited on
Commit ·
fb5275d
1
Parent(s): 1bd7efb
stage3: real-search adapter + integration tests (with httpx mocking)
Browse files- src/agents/__pycache__/research_agent.cpython-313.pyc +0 -0
- src/agents/research_agent.py +18 -5
- src/tools/__init__.py +1 -0
- src/tools/__pycache__/__init__.cpython-313.pyc +0 -0
- src/tools/__pycache__/searcher.cpython-313.pyc +0 -0
- src/tools/searcher.py +257 -0
- tests/__pycache__/test_search_integration.cpython-313-pytest-9.0.1.pyc +0 -0
- tests/test_search_integration.py +177 -0
src/agents/__pycache__/research_agent.cpython-313.pyc
CHANGED
|
Binary files a/src/agents/__pycache__/research_agent.cpython-313.pyc and b/src/agents/__pycache__/research_agent.cpython-313.pyc differ
|
|
|
src/agents/research_agent.py
CHANGED
|
@@ -8,17 +8,23 @@ from dataclasses import dataclass, field
|
|
| 8 |
from typing import Any
|
| 9 |
from .base import BaseAgent
|
| 10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
@dataclass
|
| 13 |
class ResearchAgent(BaseAgent):
|
| 14 |
"""
|
| 15 |
Agent that performs research/search tasks.
|
| 16 |
|
| 17 |
-
|
| 18 |
-
Will be upgraded to real web search in later stages.
|
| 19 |
"""
|
| 20 |
role: str = "research"
|
| 21 |
tools: list[str] = field(default_factory=lambda: ["web_search", "document_fetch"])
|
|
|
|
| 22 |
|
| 23 |
async def run(self, input: dict[str, Any]) -> dict[str, Any]:
|
| 24 |
"""
|
|
@@ -33,13 +39,20 @@ class ResearchAgent(BaseAgent):
|
|
| 33 |
query = input.get("query", "")
|
| 34 |
self.log(f"Researching: {query}")
|
| 35 |
|
| 36 |
-
#
|
| 37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
return {
|
| 40 |
"agent": "research",
|
| 41 |
"query": query,
|
| 42 |
-
"results":
|
| 43 |
}
|
| 44 |
|
| 45 |
def _simulate_search(self, query: str) -> list[dict[str, str]]:
|
|
|
|
| 8 |
from typing import Any
|
| 9 |
from .base import BaseAgent
|
| 10 |
|
| 11 |
+
# Import the searcher tool
|
| 12 |
+
import sys
|
| 13 |
+
import os
|
| 14 |
+
sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
|
| 15 |
+
from tools.searcher import search as web_search
|
| 16 |
+
|
| 17 |
|
| 18 |
@dataclass
|
| 19 |
class ResearchAgent(BaseAgent):
|
| 20 |
"""
|
| 21 |
Agent that performs research/search tasks.
|
| 22 |
|
| 23 |
+
Uses the searcher tool for web search with fallback to simulated results.
|
|
|
|
| 24 |
"""
|
| 25 |
role: str = "research"
|
| 26 |
tools: list[str] = field(default_factory=lambda: ["web_search", "document_fetch"])
|
| 27 |
+
use_real_search: bool = True # Flag to enable/disable real search
|
| 28 |
|
| 29 |
async def run(self, input: dict[str, Any]) -> dict[str, Any]:
|
| 30 |
"""
|
|
|
|
| 39 |
query = input.get("query", "")
|
| 40 |
self.log(f"Researching: {query}")
|
| 41 |
|
| 42 |
+
# Use real search or simulated based on flag
|
| 43 |
+
if self.use_real_search:
|
| 44 |
+
try:
|
| 45 |
+
search_results = await web_search(query, max_results=5)
|
| 46 |
+
except Exception as e:
|
| 47 |
+
self.log(f"Search failed, using simulation: {e}", level="warning")
|
| 48 |
+
search_results = self._simulate_search(query)
|
| 49 |
+
else:
|
| 50 |
+
search_results = self._simulate_search(query)
|
| 51 |
|
| 52 |
return {
|
| 53 |
"agent": "research",
|
| 54 |
"query": query,
|
| 55 |
+
"results": search_results
|
| 56 |
}
|
| 57 |
|
| 58 |
def _simulate_search(self, query: str) -> list[dict[str, str]]:
|
src/tools/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# Tools module
|
src/tools/__pycache__/__init__.cpython-313.pyc
ADDED
|
Binary file (160 Bytes). View file
|
|
|
src/tools/__pycache__/searcher.cpython-313.pyc
ADDED
|
Binary file (10.3 kB). View file
|
|
|
src/tools/searcher.py
ADDED
|
@@ -0,0 +1,257 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Web Search Tool
|
| 3 |
+
================
|
| 4 |
+
Abstraction layer for web search functionality.
|
| 5 |
+
Supports real search via DuckDuckGo HTML scraping or API services,
|
| 6 |
+
with fallback to simulated results.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
import os
|
| 10 |
+
import re
|
| 11 |
+
import logging
|
| 12 |
+
from typing import Optional
|
| 13 |
+
from dataclasses import dataclass
|
| 14 |
+
|
| 15 |
+
try:
|
| 16 |
+
import httpx
|
| 17 |
+
HTTPX_AVAILABLE = True
|
| 18 |
+
except ImportError:
|
| 19 |
+
HTTPX_AVAILABLE = False
|
| 20 |
+
|
| 21 |
+
logging.basicConfig(level=logging.INFO)
|
| 22 |
+
logger = logging.getLogger("Searcher")
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
@dataclass
|
| 26 |
+
class SearchResult:
|
| 27 |
+
"""Represents a single search result."""
|
| 28 |
+
title: str
|
| 29 |
+
url: str
|
| 30 |
+
snippet: str
|
| 31 |
+
|
| 32 |
+
def to_dict(self) -> dict[str, str]:
|
| 33 |
+
return {
|
| 34 |
+
"title": self.title,
|
| 35 |
+
"url": self.url,
|
| 36 |
+
"snippet": self.snippet
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
class SearchConfig:
|
| 41 |
+
"""Configuration for search behavior."""
|
| 42 |
+
|
| 43 |
+
# Environment variable for API key (if using paid service)
|
| 44 |
+
SERPER_API_KEY_ENV = "SERPER_API_KEY"
|
| 45 |
+
|
| 46 |
+
# DuckDuckGo HTML endpoint (no API key needed)
|
| 47 |
+
DUCKDUCKGO_HTML_URL = "https://html.duckduckgo.com/html/"
|
| 48 |
+
|
| 49 |
+
# Timeout settings
|
| 50 |
+
REQUEST_TIMEOUT = 10.0
|
| 51 |
+
|
| 52 |
+
# Rate limiting
|
| 53 |
+
MAX_RESULTS = 5
|
| 54 |
+
|
| 55 |
+
@classmethod
|
| 56 |
+
def get_api_key(cls) -> Optional[str]:
|
| 57 |
+
"""Get API key from environment if available."""
|
| 58 |
+
return os.environ.get(cls.SERPER_API_KEY_ENV)
|
| 59 |
+
|
| 60 |
+
@classmethod
|
| 61 |
+
def has_api_key(cls) -> bool:
|
| 62 |
+
"""Check if API key is configured."""
|
| 63 |
+
return cls.get_api_key() is not None
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
async def search(query: str, max_results: int = 5) -> list[dict[str, str]]:
|
| 67 |
+
"""
|
| 68 |
+
Perform a web search and return results.
|
| 69 |
+
|
| 70 |
+
This function tries multiple search strategies:
|
| 71 |
+
1. If SERPER_API_KEY is set, use Serper.dev API
|
| 72 |
+
2. Otherwise, try DuckDuckGo HTML scraping
|
| 73 |
+
3. If all else fails, return simulated results
|
| 74 |
+
|
| 75 |
+
Args:
|
| 76 |
+
query: The search query string
|
| 77 |
+
max_results: Maximum number of results to return
|
| 78 |
+
|
| 79 |
+
Returns:
|
| 80 |
+
List of search result dictionaries with title, url, snippet
|
| 81 |
+
"""
|
| 82 |
+
logger.info(f"Searching for: {query}")
|
| 83 |
+
|
| 84 |
+
# Strategy 1: Try Serper API if configured
|
| 85 |
+
if SearchConfig.has_api_key():
|
| 86 |
+
try:
|
| 87 |
+
results = await _search_serper(query, max_results)
|
| 88 |
+
if results:
|
| 89 |
+
logger.info(f"Serper returned {len(results)} results")
|
| 90 |
+
return results
|
| 91 |
+
except Exception as e:
|
| 92 |
+
logger.warning(f"Serper search failed: {e}")
|
| 93 |
+
|
| 94 |
+
# Strategy 2: Try DuckDuckGo HTML scraping
|
| 95 |
+
if HTTPX_AVAILABLE:
|
| 96 |
+
try:
|
| 97 |
+
results = await _search_duckduckgo(query, max_results)
|
| 98 |
+
if results:
|
| 99 |
+
logger.info(f"DuckDuckGo returned {len(results)} results")
|
| 100 |
+
return results
|
| 101 |
+
except Exception as e:
|
| 102 |
+
logger.warning(f"DuckDuckGo search failed: {e}")
|
| 103 |
+
|
| 104 |
+
# Strategy 3: Fallback to simulated results
|
| 105 |
+
logger.info("Using simulated search results")
|
| 106 |
+
return _simulate_search(query, max_results)
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
async def _search_serper(query: str, max_results: int) -> list[dict[str, str]]:
|
| 110 |
+
"""
|
| 111 |
+
Search using Serper.dev API.
|
| 112 |
+
|
| 113 |
+
Args:
|
| 114 |
+
query: Search query
|
| 115 |
+
max_results: Max results to return
|
| 116 |
+
|
| 117 |
+
Returns:
|
| 118 |
+
List of search results
|
| 119 |
+
"""
|
| 120 |
+
if not HTTPX_AVAILABLE:
|
| 121 |
+
raise RuntimeError("httpx not available")
|
| 122 |
+
|
| 123 |
+
api_key = SearchConfig.get_api_key()
|
| 124 |
+
if not api_key:
|
| 125 |
+
raise ValueError("SERPER_API_KEY not set")
|
| 126 |
+
|
| 127 |
+
async with httpx.AsyncClient(timeout=SearchConfig.REQUEST_TIMEOUT) as client:
|
| 128 |
+
response = await client.post(
|
| 129 |
+
"https://google.serper.dev/search",
|
| 130 |
+
headers={
|
| 131 |
+
"X-API-KEY": api_key,
|
| 132 |
+
"Content-Type": "application/json"
|
| 133 |
+
},
|
| 134 |
+
json={"q": query, "num": max_results}
|
| 135 |
+
)
|
| 136 |
+
response.raise_for_status()
|
| 137 |
+
data = response.json()
|
| 138 |
+
|
| 139 |
+
results = []
|
| 140 |
+
for item in data.get("organic", [])[:max_results]:
|
| 141 |
+
results.append({
|
| 142 |
+
"title": item.get("title", ""),
|
| 143 |
+
"url": item.get("link", ""),
|
| 144 |
+
"snippet": item.get("snippet", "")
|
| 145 |
+
})
|
| 146 |
+
|
| 147 |
+
return results
|
| 148 |
+
|
| 149 |
+
|
| 150 |
+
async def _search_duckduckgo(query: str, max_results: int) -> list[dict[str, str]]:
|
| 151 |
+
"""
|
| 152 |
+
Search using DuckDuckGo HTML endpoint (no API key needed).
|
| 153 |
+
|
| 154 |
+
Args:
|
| 155 |
+
query: Search query
|
| 156 |
+
max_results: Max results to return
|
| 157 |
+
|
| 158 |
+
Returns:
|
| 159 |
+
List of search results
|
| 160 |
+
"""
|
| 161 |
+
if not HTTPX_AVAILABLE:
|
| 162 |
+
raise RuntimeError("httpx not available")
|
| 163 |
+
|
| 164 |
+
async with httpx.AsyncClient(timeout=SearchConfig.REQUEST_TIMEOUT) as client:
|
| 165 |
+
response = await client.post(
|
| 166 |
+
SearchConfig.DUCKDUCKGO_HTML_URL,
|
| 167 |
+
data={"q": query},
|
| 168 |
+
headers={
|
| 169 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
|
| 170 |
+
}
|
| 171 |
+
)
|
| 172 |
+
response.raise_for_status()
|
| 173 |
+
html = response.text
|
| 174 |
+
|
| 175 |
+
# Parse results from HTML using regex (simple extraction)
|
| 176 |
+
results = []
|
| 177 |
+
|
| 178 |
+
# Find result blocks
|
| 179 |
+
result_pattern = r'<a[^>]*class="result__a"[^>]*href="([^"]*)"[^>]*>([^<]*)</a>'
|
| 180 |
+
snippet_pattern = r'<a[^>]*class="result__snippet"[^>]*>([^<]*)</a>'
|
| 181 |
+
|
| 182 |
+
urls_titles = re.findall(result_pattern, html)
|
| 183 |
+
snippets = re.findall(snippet_pattern, html)
|
| 184 |
+
|
| 185 |
+
for i, (url, title) in enumerate(urls_titles[:max_results]):
|
| 186 |
+
snippet = snippets[i] if i < len(snippets) else ""
|
| 187 |
+
|
| 188 |
+
# Clean up URL (DuckDuckGo uses redirects)
|
| 189 |
+
if "uddg=" in url:
|
| 190 |
+
url_match = re.search(r'uddg=([^&]+)', url)
|
| 191 |
+
if url_match:
|
| 192 |
+
from urllib.parse import unquote
|
| 193 |
+
url = unquote(url_match.group(1))
|
| 194 |
+
|
| 195 |
+
results.append({
|
| 196 |
+
"title": title.strip(),
|
| 197 |
+
"url": url,
|
| 198 |
+
"snippet": snippet.strip()
|
| 199 |
+
})
|
| 200 |
+
|
| 201 |
+
return results
|
| 202 |
+
|
| 203 |
+
|
| 204 |
+
def _simulate_search(query: str, max_results: int) -> list[dict[str, str]]:
|
| 205 |
+
"""
|
| 206 |
+
Generate simulated search results for testing/fallback.
|
| 207 |
+
|
| 208 |
+
Args:
|
| 209 |
+
query: Search query
|
| 210 |
+
max_results: Max results to return
|
| 211 |
+
|
| 212 |
+
Returns:
|
| 213 |
+
List of simulated search results
|
| 214 |
+
"""
|
| 215 |
+
base_results = [
|
| 216 |
+
{
|
| 217 |
+
"title": f"Research findings on {query}",
|
| 218 |
+
"url": f"https://research.example.com/{query.replace(' ', '-')}",
|
| 219 |
+
"snippet": f"Comprehensive research and analysis on {query}. "
|
| 220 |
+
f"Expert insights and latest developments."
|
| 221 |
+
},
|
| 222 |
+
{
|
| 223 |
+
"title": f"Understanding {query}: A Complete Guide",
|
| 224 |
+
"url": f"https://guide.example.org/{query.replace(' ', '-')}",
|
| 225 |
+
"snippet": f"Everything you need to know about {query}. "
|
| 226 |
+
f"Detailed explanations and practical examples."
|
| 227 |
+
},
|
| 228 |
+
{
|
| 229 |
+
"title": f"Latest developments in {query}",
|
| 230 |
+
"url": f"https://news.example.com/topics/{query.replace(' ', '-')}",
|
| 231 |
+
"snippet": f"Stay updated with the latest news about {query}. "
|
| 232 |
+
f"Breaking stories and expert commentary."
|
| 233 |
+
},
|
| 234 |
+
{
|
| 235 |
+
"title": f"{query} - Academic perspectives",
|
| 236 |
+
"url": f"https://academic.example.edu/{query.replace(' ', '-')}",
|
| 237 |
+
"snippet": f"Academic research and peer-reviewed studies on {query}. "
|
| 238 |
+
f"Citations and methodology included."
|
| 239 |
+
},
|
| 240 |
+
{
|
| 241 |
+
"title": f"Practical applications of {query}",
|
| 242 |
+
"url": f"https://apply.example.io/{query.replace(' ', '-')}",
|
| 243 |
+
"snippet": f"How to apply {query} in real-world scenarios. "
|
| 244 |
+
f"Case studies and implementation guides."
|
| 245 |
+
}
|
| 246 |
+
]
|
| 247 |
+
|
| 248 |
+
return base_results[:max_results]
|
| 249 |
+
|
| 250 |
+
|
| 251 |
+
# Synchronous wrapper for non-async contexts
|
| 252 |
+
def search_sync(query: str, max_results: int = 5) -> list[dict[str, str]]:
|
| 253 |
+
"""
|
| 254 |
+
Synchronous version of search for non-async contexts.
|
| 255 |
+
Falls back to simulated results.
|
| 256 |
+
"""
|
| 257 |
+
return _simulate_search(query, max_results)
|
tests/__pycache__/test_search_integration.cpython-313-pytest-9.0.1.pyc
ADDED
|
Binary file (29.4 kB). View file
|
|
|
tests/test_search_integration.py
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Search Integration Tests
|
| 3 |
+
=========================
|
| 4 |
+
Tests for the web search functionality with mocking.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import pytest
|
| 8 |
+
import sys
|
| 9 |
+
import os
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
from unittest.mock import AsyncMock, patch, MagicMock
|
| 12 |
+
|
| 13 |
+
# Add src to path
|
| 14 |
+
sys.path.insert(0, str(Path(__file__).parent.parent / 'src'))
|
| 15 |
+
|
| 16 |
+
from tools.searcher import (
|
| 17 |
+
search,
|
| 18 |
+
_simulate_search,
|
| 19 |
+
_search_duckduckgo,
|
| 20 |
+
SearchConfig
|
| 21 |
+
)
|
| 22 |
+
from agents.research_agent import ResearchAgent
|
| 23 |
+
from orchestrator import Orchestrator
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
class TestSearcher:
|
| 27 |
+
"""Tests for the searcher module."""
|
| 28 |
+
|
| 29 |
+
def test_simulate_search_returns_results(self):
|
| 30 |
+
"""Test that simulated search returns valid results."""
|
| 31 |
+
results = _simulate_search("AI healthcare", max_results=3)
|
| 32 |
+
|
| 33 |
+
assert isinstance(results, list)
|
| 34 |
+
assert len(results) == 3
|
| 35 |
+
|
| 36 |
+
for result in results:
|
| 37 |
+
assert "title" in result
|
| 38 |
+
assert "url" in result
|
| 39 |
+
assert "snippet" in result
|
| 40 |
+
assert len(result["title"]) > 0
|
| 41 |
+
assert len(result["url"]) > 0
|
| 42 |
+
|
| 43 |
+
def test_simulate_search_respects_max_results(self):
|
| 44 |
+
"""Test that max_results parameter is respected."""
|
| 45 |
+
results = _simulate_search("test query", max_results=2)
|
| 46 |
+
assert len(results) == 2
|
| 47 |
+
|
| 48 |
+
results = _simulate_search("test query", max_results=5)
|
| 49 |
+
assert len(results) == 5
|
| 50 |
+
|
| 51 |
+
@pytest.mark.asyncio
|
| 52 |
+
async def test_search_fallback_to_simulation(self):
|
| 53 |
+
"""Test that search falls back to simulation when real search fails."""
|
| 54 |
+
# Without API key and with mocked failed HTTP, should fallback
|
| 55 |
+
with patch.dict(os.environ, {}, clear=True):
|
| 56 |
+
# Mock httpx to raise an exception
|
| 57 |
+
with patch('tools.searcher._search_duckduckgo', new_callable=AsyncMock) as mock_ddg:
|
| 58 |
+
mock_ddg.side_effect = Exception("Network error")
|
| 59 |
+
|
| 60 |
+
results = await search("test query")
|
| 61 |
+
|
| 62 |
+
assert isinstance(results, list)
|
| 63 |
+
assert len(results) > 0
|
| 64 |
+
# Should have simulated results
|
| 65 |
+
assert "example" in results[0]["url"].lower()
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
class TestSearcherWithMockedHTTP:
|
| 69 |
+
"""Tests with mocked HTTP responses."""
|
| 70 |
+
|
| 71 |
+
@pytest.mark.asyncio
|
| 72 |
+
async def test_search_with_mocked_response(self):
|
| 73 |
+
"""Test search with a mocked successful HTTP response."""
|
| 74 |
+
mock_results = [
|
| 75 |
+
{"title": "Mocked Result 1", "url": "https://mock.com/1", "snippet": "Mocked snippet 1"},
|
| 76 |
+
{"title": "Mocked Result 2", "url": "https://mock.com/2", "snippet": "Mocked snippet 2"}
|
| 77 |
+
]
|
| 78 |
+
|
| 79 |
+
with patch('tools.searcher._search_duckduckgo', new_callable=AsyncMock) as mock_ddg:
|
| 80 |
+
mock_ddg.return_value = mock_results
|
| 81 |
+
|
| 82 |
+
results = await search("mocked query")
|
| 83 |
+
|
| 84 |
+
assert results == mock_results
|
| 85 |
+
mock_ddg.assert_called_once()
|
| 86 |
+
|
| 87 |
+
@pytest.mark.asyncio
|
| 88 |
+
async def test_research_agent_uses_searcher(self):
|
| 89 |
+
"""Test that ResearchAgent properly uses the searcher."""
|
| 90 |
+
mock_results = [
|
| 91 |
+
{"title": "Agent Search Result", "url": "https://agent.test/1", "snippet": "Test snippet"}
|
| 92 |
+
]
|
| 93 |
+
|
| 94 |
+
with patch('agents.research_agent.web_search', new_callable=AsyncMock) as mock_search:
|
| 95 |
+
mock_search.return_value = mock_results
|
| 96 |
+
|
| 97 |
+
agent = ResearchAgent(use_real_search=True)
|
| 98 |
+
result = await agent.run({"query": "test medical AI"})
|
| 99 |
+
|
| 100 |
+
assert result["agent"] == "research"
|
| 101 |
+
assert result["query"] == "test medical AI"
|
| 102 |
+
assert result["results"] == mock_results
|
| 103 |
+
mock_search.assert_called_once_with("test medical AI", max_results=5)
|
| 104 |
+
|
| 105 |
+
@pytest.mark.asyncio
|
| 106 |
+
async def test_research_agent_fallback_on_error(self):
|
| 107 |
+
"""Test that ResearchAgent falls back to simulation on error."""
|
| 108 |
+
with patch('agents.research_agent.web_search', new_callable=AsyncMock) as mock_search:
|
| 109 |
+
mock_search.side_effect = Exception("Search service unavailable")
|
| 110 |
+
|
| 111 |
+
agent = ResearchAgent(use_real_search=True)
|
| 112 |
+
result = await agent.run({"query": "fallback test"})
|
| 113 |
+
|
| 114 |
+
# Should still return results (from simulation)
|
| 115 |
+
assert result["agent"] == "research"
|
| 116 |
+
assert isinstance(result["results"], list)
|
| 117 |
+
assert len(result["results"]) > 0
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
class TestOrchestratorWithSearchIntegration:
|
| 121 |
+
"""Integration tests for orchestrator with search."""
|
| 122 |
+
|
| 123 |
+
@pytest.mark.asyncio
|
| 124 |
+
async def test_orchestrator_with_mocked_search(self):
|
| 125 |
+
"""Test full orchestrator pipeline with mocked search."""
|
| 126 |
+
mock_results = [
|
| 127 |
+
{"title": "Orchestrator Test", "url": "https://test.com", "snippet": "Integration test"}
|
| 128 |
+
]
|
| 129 |
+
|
| 130 |
+
with patch('agents.research_agent.web_search', new_callable=AsyncMock) as mock_search:
|
| 131 |
+
mock_search.return_value = mock_results
|
| 132 |
+
|
| 133 |
+
orchestrator = Orchestrator()
|
| 134 |
+
result = await orchestrator.run_task({"query": "integration test"})
|
| 135 |
+
|
| 136 |
+
assert "steps" in result
|
| 137 |
+
assert len(result["steps"]) >= 2
|
| 138 |
+
|
| 139 |
+
# First step should be research
|
| 140 |
+
research_step = result["steps"][0]
|
| 141 |
+
assert research_step["agent"] == "research"
|
| 142 |
+
assert research_step["output"]["results"] == mock_results
|
| 143 |
+
|
| 144 |
+
@pytest.mark.asyncio
|
| 145 |
+
async def test_orchestrator_produces_merkle_hashes(self):
|
| 146 |
+
"""Test that orchestrator produces valid hashes for each step."""
|
| 147 |
+
orchestrator = Orchestrator()
|
| 148 |
+
result = await orchestrator.run_task({"query": "hash test"})
|
| 149 |
+
|
| 150 |
+
for step in result["steps"]:
|
| 151 |
+
assert "hash" in step
|
| 152 |
+
assert len(step["hash"]) == 64 # SHA256 hex length
|
| 153 |
+
assert all(c in '0123456789abcdef' for c in step["hash"])
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
class TestSearchConfig:
|
| 157 |
+
"""Tests for search configuration."""
|
| 158 |
+
|
| 159 |
+
def test_config_reads_env_variable(self):
|
| 160 |
+
"""Test that config properly reads API key from environment."""
|
| 161 |
+
test_key = "test_api_key_12345"
|
| 162 |
+
|
| 163 |
+
with patch.dict(os.environ, {"SERPER_API_KEY": test_key}):
|
| 164 |
+
assert SearchConfig.get_api_key() == test_key
|
| 165 |
+
assert SearchConfig.has_api_key() is True
|
| 166 |
+
|
| 167 |
+
def test_config_returns_none_when_no_key(self):
|
| 168 |
+
"""Test that config returns None when no API key is set."""
|
| 169 |
+
with patch.dict(os.environ, {}, clear=True):
|
| 170 |
+
# Remove the key if it exists
|
| 171 |
+
os.environ.pop("SERPER_API_KEY", None)
|
| 172 |
+
assert SearchConfig.get_api_key() is None
|
| 173 |
+
assert SearchConfig.has_api_key() is False
|
| 174 |
+
|
| 175 |
+
|
| 176 |
+
if __name__ == "__main__":
|
| 177 |
+
pytest.main([__file__, "-v"])
|