Spaces:

MCP-1st-Birthday
/

AgentMask

Sleeping

App Files Files Community

b2230765034 commited on Nov 30, 2025

Commit

fb5275d

1 Parent(s): 1bd7efb

stage3: real-search adapter + integration tests (with httpx mocking)

Browse files

Files changed (8) hide show

src/agents/__pycache__/research_agent.cpython-313.pyc +0 -0
src/agents/research_agent.py +18 -5
src/tools/__init__.py +1 -0
src/tools/__pycache__/__init__.cpython-313.pyc +0 -0
src/tools/__pycache__/searcher.cpython-313.pyc +0 -0
src/tools/searcher.py +257 -0
tests/__pycache__/test_search_integration.cpython-313-pytest-9.0.1.pyc +0 -0
tests/test_search_integration.py +177 -0

src/agents/__pycache__/research_agent.cpython-313.pyc CHANGED Viewed

Binary files a/src/agents/__pycache__/research_agent.cpython-313.pyc and b/src/agents/__pycache__/research_agent.cpython-313.pyc differ

src/agents/research_agent.py CHANGED Viewed

@@ -8,17 +8,23 @@ from dataclasses import dataclass, field
 from typing import Any
 from .base import BaseAgent
 @dataclass
 class ResearchAgent(BaseAgent):
     """
     Agent that performs research/search tasks.
-    Currently uses simulated search results.
-    Will be upgraded to real web search in later stages.
     """
     role: str = "research"
     tools: list[str] = field(default_factory=lambda: ["web_search", "document_fetch"])
     async def run(self, input: dict[str, Any]) -> dict[str, Any]:
         """
@@ -33,13 +39,20 @@ class ResearchAgent(BaseAgent):
         query = input.get("query", "")
         self.log(f"Researching: {query}")
-        # Simulated search results (will be replaced with real search in Stage 3)
-        simulated_results = self._simulate_search(query)
         return {
             "agent": "research",
             "query": query,
-            "results": simulated_results
         }
     def _simulate_search(self, query: str) -> list[dict[str, str]]:

 from typing import Any
 from .base import BaseAgent
+# Import the searcher tool
+import sys
+import os
+sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
+from tools.searcher import search as web_search
 @dataclass
 class ResearchAgent(BaseAgent):
     """
     Agent that performs research/search tasks.
+    Uses the searcher tool for web search with fallback to simulated results.
     """
     role: str = "research"
     tools: list[str] = field(default_factory=lambda: ["web_search", "document_fetch"])
+    use_real_search: bool = True  # Flag to enable/disable real search
     async def run(self, input: dict[str, Any]) -> dict[str, Any]:
         """
         query = input.get("query", "")
         self.log(f"Researching: {query}")
+        # Use real search or simulated based on flag
+        if self.use_real_search:
+            try:
+                search_results = await web_search(query, max_results=5)
+            except Exception as e:
+                self.log(f"Search failed, using simulation: {e}", level="warning")
+                search_results = self._simulate_search(query)
+        else:
+            search_results = self._simulate_search(query)
         return {
             "agent": "research",
             "query": query,
+            "results": search_results
         }
     def _simulate_search(self, query: str) -> list[dict[str, str]]:

src/tools/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # Tools module

src/tools/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (160 Bytes). View file

src/tools/__pycache__/searcher.cpython-313.pyc ADDED Viewed

Binary file (10.3 kB). View file

src/tools/searcher.py ADDED Viewed

	@@ -0,0 +1,257 @@

+"""
+Web Search Tool
+================
+Abstraction layer for web search functionality.
+Supports real search via DuckDuckGo HTML scraping or API services,
+with fallback to simulated results.
+"""
+import os
+import re
+import logging
+from typing import Optional
+from dataclasses import dataclass
+try:
+    import httpx
+    HTTPX_AVAILABLE = True
+except ImportError:
+    HTTPX_AVAILABLE = False
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger("Searcher")
+@dataclass
+class SearchResult:
+    """Represents a single search result."""
+    title: str
+    url: str
+    snippet: str
+    def to_dict(self) -> dict[str, str]:
+        return {
+            "title": self.title,
+            "url": self.url,
+            "snippet": self.snippet
+        }
+class SearchConfig:
+    """Configuration for search behavior."""
+    # Environment variable for API key (if using paid service)
+    SERPER_API_KEY_ENV = "SERPER_API_KEY"
+    # DuckDuckGo HTML endpoint (no API key needed)
+    DUCKDUCKGO_HTML_URL = "https://html.duckduckgo.com/html/"
+    # Timeout settings
+    REQUEST_TIMEOUT = 10.0
+    # Rate limiting
+    MAX_RESULTS = 5
+    @classmethod
+    def get_api_key(cls) -> Optional[str]:
+        """Get API key from environment if available."""
+        return os.environ.get(cls.SERPER_API_KEY_ENV)
+    @classmethod
+    def has_api_key(cls) -> bool:
+        """Check if API key is configured."""
+        return cls.get_api_key() is not None
+async def search(query: str, max_results: int = 5) -> list[dict[str, str]]:
+    """
+    Perform a web search and return results.
+    This function tries multiple search strategies:
+    1. If SERPER_API_KEY is set, use Serper.dev API
+    2. Otherwise, try DuckDuckGo HTML scraping
+    3. If all else fails, return simulated results
+    Args:
+        query: The search query string
+        max_results: Maximum number of results to return
+    Returns:
+        List of search result dictionaries with title, url, snippet
+    """
+    logger.info(f"Searching for: {query}")
+    # Strategy 1: Try Serper API if configured
+    if SearchConfig.has_api_key():
+        try:
+            results = await _search_serper(query, max_results)
+            if results:
+                logger.info(f"Serper returned {len(results)} results")
+                return results
+        except Exception as e:
+            logger.warning(f"Serper search failed: {e}")
+    # Strategy 2: Try DuckDuckGo HTML scraping
+    if HTTPX_AVAILABLE:
+        try:
+            results = await _search_duckduckgo(query, max_results)
+            if results:
+                logger.info(f"DuckDuckGo returned {len(results)} results")
+                return results
+        except Exception as e:
+            logger.warning(f"DuckDuckGo search failed: {e}")
+    # Strategy 3: Fallback to simulated results
+    logger.info("Using simulated search results")
+    return _simulate_search(query, max_results)
+async def _search_serper(query: str, max_results: int) -> list[dict[str, str]]:
+    """
+    Search using Serper.dev API.
+    Args:
+        query: Search query
+        max_results: Max results to return
+    Returns:
+        List of search results
+    """
+    if not HTTPX_AVAILABLE:
+        raise RuntimeError("httpx not available")
+    api_key = SearchConfig.get_api_key()
+    if not api_key:
+        raise ValueError("SERPER_API_KEY not set")
+    async with httpx.AsyncClient(timeout=SearchConfig.REQUEST_TIMEOUT) as client:
+        response = await client.post(
+            "https://google.serper.dev/search",
+            headers={
+                "X-API-KEY": api_key,
+                "Content-Type": "application/json"
+            },
+            json={"q": query, "num": max_results}
+        )
+        response.raise_for_status()
+        data = response.json()
+    results = []
+    for item in data.get("organic", [])[:max_results]:
+        results.append({
+            "title": item.get("title", ""),
+            "url": item.get("link", ""),
+            "snippet": item.get("snippet", "")
+        })
+    return results
+async def _search_duckduckgo(query: str, max_results: int) -> list[dict[str, str]]:
+    """
+    Search using DuckDuckGo HTML endpoint (no API key needed).
+    Args:
+        query: Search query
+        max_results: Max results to return
+    Returns:
+        List of search results
+    """
+    if not HTTPX_AVAILABLE:
+        raise RuntimeError("httpx not available")
+    async with httpx.AsyncClient(timeout=SearchConfig.REQUEST_TIMEOUT) as client:
+        response = await client.post(
+            SearchConfig.DUCKDUCKGO_HTML_URL,
+            data={"q": query},
+            headers={
+                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
+            }
+        )
+        response.raise_for_status()
+        html = response.text
+    # Parse results from HTML using regex (simple extraction)
+    results = []
+    # Find result blocks
+    result_pattern = r'<a[^>]*class="result__a"[^>]*href="([^"]*)"[^>]*>([^<]*)</a>'
+    snippet_pattern = r'<a[^>]*class="result__snippet"[^>]*>([^<]*)</a>'
+    urls_titles = re.findall(result_pattern, html)
+    snippets = re.findall(snippet_pattern, html)
+    for i, (url, title) in enumerate(urls_titles[:max_results]):
+        snippet = snippets[i] if i < len(snippets) else ""
+        # Clean up URL (DuckDuckGo uses redirects)
+        if "uddg=" in url:
+            url_match = re.search(r'uddg=([^&]+)', url)
+            if url_match:
+                from urllib.parse import unquote
+                url = unquote(url_match.group(1))
+        results.append({
+            "title": title.strip(),
+            "url": url,
+            "snippet": snippet.strip()
+        })
+    return results
+def _simulate_search(query: str, max_results: int) -> list[dict[str, str]]:
+    """
+    Generate simulated search results for testing/fallback.
+    Args:
+        query: Search query
+        max_results: Max results to return
+    Returns:
+        List of simulated search results
+    """
+    base_results = [
+        {
+            "title": f"Research findings on {query}",
+            "url": f"https://research.example.com/{query.replace(' ', '-')}",
+            "snippet": f"Comprehensive research and analysis on {query}. "
+                       f"Expert insights and latest developments."
+        },
+        {
+            "title": f"Understanding {query}: A Complete Guide",
+            "url": f"https://guide.example.org/{query.replace(' ', '-')}",
+            "snippet": f"Everything you need to know about {query}. "
+                       f"Detailed explanations and practical examples."
+        },
+        {
+            "title": f"Latest developments in {query}",
+            "url": f"https://news.example.com/topics/{query.replace(' ', '-')}",
+            "snippet": f"Stay updated with the latest news about {query}. "
+                       f"Breaking stories and expert commentary."
+        },
+        {
+            "title": f"{query} - Academic perspectives",
+            "url": f"https://academic.example.edu/{query.replace(' ', '-')}",
+            "snippet": f"Academic research and peer-reviewed studies on {query}. "
+                       f"Citations and methodology included."
+        },
+        {
+            "title": f"Practical applications of {query}",
+            "url": f"https://apply.example.io/{query.replace(' ', '-')}",
+            "snippet": f"How to apply {query} in real-world scenarios. "
+                       f"Case studies and implementation guides."
+        }
+    ]
+    return base_results[:max_results]
+# Synchronous wrapper for non-async contexts
+def search_sync(query: str, max_results: int = 5) -> list[dict[str, str]]:
+    """
+    Synchronous version of search for non-async contexts.
+    Falls back to simulated results.
+    """
+    return _simulate_search(query, max_results)

tests/__pycache__/test_search_integration.cpython-313-pytest-9.0.1.pyc ADDED Viewed

Binary file (29.4 kB). View file

tests/test_search_integration.py ADDED Viewed

	@@ -0,0 +1,177 @@

+"""
+Search Integration Tests
+=========================
+Tests for the web search functionality with mocking.
+"""
+import pytest
+import sys
+import os
+from pathlib import Path
+from unittest.mock import AsyncMock, patch, MagicMock
+# Add src to path
+sys.path.insert(0, str(Path(__file__).parent.parent / 'src'))
+from tools.searcher import (
+    search,
+    _simulate_search,
+    _search_duckduckgo,
+    SearchConfig
+)
+from agents.research_agent import ResearchAgent
+from orchestrator import Orchestrator
+class TestSearcher:
+    """Tests for the searcher module."""
+    def test_simulate_search_returns_results(self):
+        """Test that simulated search returns valid results."""
+        results = _simulate_search("AI healthcare", max_results=3)
+        assert isinstance(results, list)
+        assert len(results) == 3
+        for result in results:
+            assert "title" in result
+            assert "url" in result
+            assert "snippet" in result
+            assert len(result["title"]) > 0
+            assert len(result["url"]) > 0
+    def test_simulate_search_respects_max_results(self):
+        """Test that max_results parameter is respected."""
+        results = _simulate_search("test query", max_results=2)
+        assert len(results) == 2
+        results = _simulate_search("test query", max_results=5)
+        assert len(results) == 5
+    @pytest.mark.asyncio
+    async def test_search_fallback_to_simulation(self):
+        """Test that search falls back to simulation when real search fails."""
+        # Without API key and with mocked failed HTTP, should fallback
+        with patch.dict(os.environ, {}, clear=True):
+            # Mock httpx to raise an exception
+            with patch('tools.searcher._search_duckduckgo', new_callable=AsyncMock) as mock_ddg:
+                mock_ddg.side_effect = Exception("Network error")
+                results = await search("test query")
+                assert isinstance(results, list)
+                assert len(results) > 0
+                # Should have simulated results
+                assert "example" in results[0]["url"].lower()
+class TestSearcherWithMockedHTTP:
+    """Tests with mocked HTTP responses."""
+    @pytest.mark.asyncio
+    async def test_search_with_mocked_response(self):
+        """Test search with a mocked successful HTTP response."""
+        mock_results = [
+            {"title": "Mocked Result 1", "url": "https://mock.com/1", "snippet": "Mocked snippet 1"},
+            {"title": "Mocked Result 2", "url": "https://mock.com/2", "snippet": "Mocked snippet 2"}
+        ]
+        with patch('tools.searcher._search_duckduckgo', new_callable=AsyncMock) as mock_ddg:
+            mock_ddg.return_value = mock_results
+            results = await search("mocked query")
+            assert results == mock_results
+            mock_ddg.assert_called_once()
+    @pytest.mark.asyncio
+    async def test_research_agent_uses_searcher(self):
+        """Test that ResearchAgent properly uses the searcher."""
+        mock_results = [
+            {"title": "Agent Search Result", "url": "https://agent.test/1", "snippet": "Test snippet"}
+        ]
+        with patch('agents.research_agent.web_search', new_callable=AsyncMock) as mock_search:
+            mock_search.return_value = mock_results
+            agent = ResearchAgent(use_real_search=True)
+            result = await agent.run({"query": "test medical AI"})
+            assert result["agent"] == "research"
+            assert result["query"] == "test medical AI"
+            assert result["results"] == mock_results
+            mock_search.assert_called_once_with("test medical AI", max_results=5)
+    @pytest.mark.asyncio
+    async def test_research_agent_fallback_on_error(self):
+        """Test that ResearchAgent falls back to simulation on error."""
+        with patch('agents.research_agent.web_search', new_callable=AsyncMock) as mock_search:
+            mock_search.side_effect = Exception("Search service unavailable")
+            agent = ResearchAgent(use_real_search=True)
+            result = await agent.run({"query": "fallback test"})
+            # Should still return results (from simulation)
+            assert result["agent"] == "research"
+            assert isinstance(result["results"], list)
+            assert len(result["results"]) > 0
+class TestOrchestratorWithSearchIntegration:
+    """Integration tests for orchestrator with search."""
+    @pytest.mark.asyncio
+    async def test_orchestrator_with_mocked_search(self):
+        """Test full orchestrator pipeline with mocked search."""
+        mock_results = [
+            {"title": "Orchestrator Test", "url": "https://test.com", "snippet": "Integration test"}
+        ]
+        with patch('agents.research_agent.web_search', new_callable=AsyncMock) as mock_search:
+            mock_search.return_value = mock_results
+            orchestrator = Orchestrator()
+            result = await orchestrator.run_task({"query": "integration test"})
+            assert "steps" in result
+            assert len(result["steps"]) >= 2
+            # First step should be research
+            research_step = result["steps"][0]
+            assert research_step["agent"] == "research"
+            assert research_step["output"]["results"] == mock_results
+    @pytest.mark.asyncio
+    async def test_orchestrator_produces_merkle_hashes(self):
+        """Test that orchestrator produces valid hashes for each step."""
+        orchestrator = Orchestrator()
+        result = await orchestrator.run_task({"query": "hash test"})
+        for step in result["steps"]:
+            assert "hash" in step
+            assert len(step["hash"]) == 64  # SHA256 hex length
+            assert all(c in '0123456789abcdef' for c in step["hash"])
+class TestSearchConfig:
+    """Tests for search configuration."""
+    def test_config_reads_env_variable(self):
+        """Test that config properly reads API key from environment."""
+        test_key = "test_api_key_12345"
+        with patch.dict(os.environ, {"SERPER_API_KEY": test_key}):
+            assert SearchConfig.get_api_key() == test_key
+            assert SearchConfig.has_api_key() is True
+    def test_config_returns_none_when_no_key(self):
+        """Test that config returns None when no API key is set."""
+        with patch.dict(os.environ, {}, clear=True):
+            # Remove the key if it exists
+            os.environ.pop("SERPER_API_KEY", None)
+            assert SearchConfig.get_api_key() is None
+            assert SearchConfig.has_api_key() is False
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])