Agent_Course_Final_Assignment

Sleeping

App Files Files Community

Chris commited on May 29, 2025

Commit

e107ea2

1 Parent(s): a178cd6

Final 6.7.3

Browse files

Files changed (10) hide show

.gitignore +1 -0
requirements.txt +1 -0
src/agents/__pycache__/router.cpython-310.pyc +0 -0
src/agents/__pycache__/web_researcher.cpython-310.pyc +0 -0
src/agents/router.py +2 -2
src/agents/web_researcher.py +78 -13
src/app.py +4 -4
src/requirements.txt +1 -0
src/tools/__pycache__/web_search_tool.cpython-310.pyc +0 -0
src/tools/web_search_tool.py +255 -95

.gitignore CHANGED Viewed

@@ -8,3 +8,4 @@ debug_*.py
 *_debug*.py
 tests/
 *.log

 *_debug*.py
 tests/
 *.log
+gaia_evaluation_cjb97*

requirements.txt CHANGED Viewed

@@ -6,6 +6,7 @@ beautifulsoup4==4.13.0
 certifi==2025.4.26
 charset-normalizer==3.4.2
 click==8.2.1
 exceptiongroup==1.3.0
 fastapi==0.115.12
 ffmpy==0.5.0

 certifi==2025.4.26
 charset-normalizer==3.4.2
 click==8.2.1
+duckduckgo-search==6.3.4
 exceptiongroup==1.3.0
 fastapi==0.115.12
 ffmpy==0.5.0

src/agents/__pycache__/router.cpython-310.pyc CHANGED Viewed

Binary files a/src/agents/__pycache__/router.cpython-310.pyc and b/src/agents/__pycache__/router.cpython-310.pyc differ

src/agents/__pycache__/web_researcher.cpython-310.pyc CHANGED Viewed

Binary files a/src/agents/__pycache__/web_researcher.cpython-310.pyc and b/src/agents/__pycache__/web_researcher.cpython-310.pyc differ

src/agents/router.py CHANGED Viewed

@@ -317,8 +317,8 @@ class RouterAgent:
         """
         try:
-            # Use router model for this analysis
-            tier = ModelTier.ROUTER if state.complexity_assessment != "complex" else ModelTier.MAIN
             result = self.llm_client.generate(prompt, tier=tier, max_tokens=200)
             if result.success:

         """
         try:
+            # Use main model (32B) for better routing decisions instead of 7B router model
+            tier = ModelTier.MAIN  # Always use 32B model for routing to improve classification accuracy
             result = self.llm_client.generate(prompt, tier=tier, max_tokens=200)
             if result.success:

src/agents/web_researcher.py CHANGED Viewed

@@ -414,24 +414,89 @@ class WebResearchAgent:
         return ' '.join(topic_words[:3]) if topic_words else "topic"
     def _extract_search_terms(self, question: str) -> str:
-        """Extract search terms from question"""
-        # Remove question words and common phrases
-        stop_phrases = [
-            'what is', 'what are', 'who is', 'who are', 'when is', 'when was',
-            'where is', 'where are', 'how is', 'how are', 'why is', 'why are',
-            'tell me about', 'find information about', 'search for'
         ]
-        clean_question = question.lower()
-        for phrase in stop_phrases:
-            clean_question = clean_question.replace(phrase, '')
-        # Remove punctuation and extra spaces
-        clean_question = re.sub(r'[?.,!]', '', clean_question)
-        clean_question = re.sub(r'\s+', ' ', clean_question).strip()
-        return clean_question
     def _extract_youtube_info(self, question: str) -> str:
         """Extract YouTube URL or search terms"""

         return ' '.join(topic_words[:3]) if topic_words else "topic"
     def _extract_search_terms(self, question: str) -> str:
+        """Extract focused search terms from question to avoid length limits"""
+        # Handle different question types more intelligently
+        question_lower = question.lower()
+        # For questions about specific people, places, things - extract key entities
+        # Look for quoted phrases first (highest priority)
+        quoted_terms = re.findall(r'"([^"]+)"', question)
+        if quoted_terms:
+            # Use the first quoted phrase as it's usually the most important
+            main_term = quoted_terms[0]
+            # Add year if present
+            years = re.findall(r'\b(19|20)\d{2}\b', question)
+            if years:
+                return f"{main_term} {years[0]}"
+            return main_term
+        # Extract proper nouns and key entities
+        # Look for capitalized words (likely proper nouns)
+        proper_nouns = re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b', question)
+        # Extract years and numbers (often important)
+        years = re.findall(r'\b(19|20)\d{2}\b', question)
+        numbers = re.findall(r'\b\d+\b', question)
+        # Remove very common stop words and question patterns
+        stop_patterns = [
+            r'\b(?:what|who|when|where|why|how|is|are|was|were|do|does|did|can|could|would|should|will)\b',
+            r'\b(?:the|a|an|and|or|but|in|on|at|to|for|of|with|by|from|about)\b',
+            r'\b(?:please|could|you|tell|me|find|search|for|give|provide|list|show)\b',
+            r'\b(?:information|details|data|facts|answer)\b',
+            r'[?.,!]+',  # Punctuation
         ]
+        # Clean the question
+        clean_question = question
+        for pattern in stop_patterns:
+            clean_question = re.sub(pattern, ' ', clean_question, flags=re.IGNORECASE)
+        # Extract remaining meaningful words
+        words = clean_question.split()
+        meaningful_words = []
+        for word in words:
+            word = word.strip()
+            if len(word) > 2 and word.isalpha():  # Only alphabetic words longer than 2 chars
+                meaningful_words.append(word)
+        # Build search terms prioritizing important elements
+        search_terms = []
+        # Add proper nouns first (most specific)
+        for noun in proper_nouns[:2]:  # Max 2 proper nouns
+            if len(' '.join(search_terms + [noun])) <= 100:  # Conservative length limit
+                search_terms.append(noun)
+        # Add years/numbers
+        for year in years[:1]:  # Max 1 year
+            if len(' '.join(search_terms + [year])) <= 100:
+                search_terms.append(year)
+        # Add meaningful words until we reach a reasonable length
+        for word in meaningful_words[:5]:  # Max 5 additional words
+            potential_query = ' '.join(search_terms + [word])
+            if len(potential_query) <= 100:  # Keep well under 250 char limit
+                search_terms.append(word)
+            else:
+                break
+        # Fallback if nothing found
+        if not search_terms:
+            # Take first few words of the original question
+            first_words = question.split()[:5]  # First 5 words max
+            search_terms = [w for w in first_words if w.isalpha() and len(w) > 2]
+        result = ' '.join(search_terms)
+        # Final length check and truncation
+        if len(result) > 100:
+            result = result[:100].rsplit(' ', 1)[0]
+        logger.info(f"📝 Extracted search terms: '{result}' from question: '{question[:50]}...'")
+        return result
     def _extract_youtube_info(self, question: str) -> str:
         """Extract YouTube URL or search terms"""

src/app.py CHANGED Viewed

@@ -1755,15 +1755,15 @@ Please click the "Sign in with Hugging Face" button above to access GAIA evaluat
         ### 🔧 System Architecture
         **LangGraph Multi-Agent Workflow:**
-        - **Router Agent**: Classifies questions and selects appropriate specialized agents
-        - **Web Research Agent**: Handles Wikipedia searches and web research with Tavily API + Wikipedia fallback
         - **File Processing Agent**: Processes uploaded files (CSV, images, code, audio)
         - **Reasoning Agent**: Handles mathematical calculations and logical reasoning
         - **Synthesizer Agent**: Combines results from multiple agents into final answers
         **Models Used**: Qwen 2.5 (7B/32B/72B) with intelligent tier selection for optimal cost/performance
-        **Tools Available**: Wikipedia API, Tavily web search (with Wikipedia fallback), mathematical calculator, multi-format file processor
         ### 📈 Performance Metrics
         - **Success Rate**: 30%+ expected on GAIA benchmark with full authentication
@@ -1771,7 +1771,7 @@ Please click the "Sign in with Hugging Face" button above to access GAIA evaluat
         - **Cost Efficiency**: $0.01-0.40 per question depending on model tier selection
         - **Architecture**: Multi-agent LangGraph orchestration with intelligent synthesis
         - **Reliability**: Robust error handling and graceful degradation within workflow
-        - **Web Search**: Reliable Tavily API with Wikipedia fallback (no rate limiting issues)
         ### 🎯 Authentication Requirements
         - **HF_TOKEN Environment Variable**: Best performance with full access to Qwen models

         ### 🔧 System Architecture
         **LangGraph Multi-Agent Workflow:**
+        - **Router Agent**: Classifies questions and selects appropriate specialized agents (using 32B model for better accuracy)
+        - **Web Research Agent**: Multi-engine search with DuckDuckGo (primary), Tavily API (secondary), Wikipedia (fallback)
         - **File Processing Agent**: Processes uploaded files (CSV, images, code, audio)
         - **Reasoning Agent**: Handles mathematical calculations and logical reasoning
         - **Synthesizer Agent**: Combines results from multiple agents into final answers
         **Models Used**: Qwen 2.5 (7B/32B/72B) with intelligent tier selection for optimal cost/performance
+        **Tools Available**: Multi-engine web search (DuckDuckGo + Tavily + Wikipedia), mathematical calculator, multi-format file processor
         ### 📈 Performance Metrics
         - **Success Rate**: 30%+ expected on GAIA benchmark with full authentication
         - **Cost Efficiency**: $0.01-0.40 per question depending on model tier selection
         - **Architecture**: Multi-agent LangGraph orchestration with intelligent synthesis
         - **Reliability**: Robust error handling and graceful degradation within workflow
+        - **Web Search**: 3-tier search system (DuckDuckGo → Tavily → Wikipedia) with smart query optimization
         ### 🎯 Authentication Requirements
         - **HF_TOKEN Environment Variable**: Best performance with full access to Qwen models

src/requirements.txt CHANGED Viewed

@@ -10,6 +10,7 @@ huggingface-hub==0.32.2
 transformers==4.52.3
 wikipedia-api==0.7.1
 wikipedia==1.4.0
 # OAuth dependencies for Gradio
 itsdangerous>=2.0.0

 transformers==4.52.3
 wikipedia-api==0.7.1
 wikipedia==1.4.0
+duckduckgo-search==6.3.4
 # OAuth dependencies for Gradio
 itsdangerous>=2.0.0

src/tools/__pycache__/web_search_tool.cpython-310.pyc CHANGED Viewed

Binary files a/src/tools/__pycache__/web_search_tool.cpython-310.pyc and b/src/tools/__pycache__/web_search_tool.cpython-310.pyc differ

src/tools/web_search_tool.py CHANGED Viewed

@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 """
 Web Search Tool for GAIA Agent System
-Handles web searches using Tavily API (primary) and Wikipedia (fallback)
 """
 import re
@@ -36,8 +36,8 @@ class WebSearchResult:
 class WebSearchTool(BaseTool):
     """
-    Web search tool using Tavily API (primary) and Wikipedia (fallback)
-    Much more reliable than DuckDuckGo with no rate limiting issues
     """
     def __init__(self):
@@ -50,14 +50,43 @@ class WebSearchTool(BaseTool):
         })
         self.session.timeout = 10
-        # Initialize Tavily client if API key is available
         self.tavily_api_key = os.getenv("TAVILY_API_KEY")
         self.use_tavily = self.tavily_api_key is not None
         if self.use_tavily:
-            logger.info("✅ Tavily API key found - using Tavily for web search")
-        else:
-            logger.info("ℹ️ No Tavily API key found - will use Wikipedia fallback only")
     def _execute_impl(self, input_data: Any, **kwargs) -> Dict[str, Any]:
         """
@@ -95,24 +124,208 @@ class WebSearchTool(BaseTool):
         """Check if text is a URL"""
         return bool(re.match(r'https?://', text))
     def _search_web(self, query: str, limit: int = 5, extract_content: bool = False) -> Dict[str, Any]:
         """
-        Search the web using Tavily API (primary) or Wikipedia (fallback)
         """
-        # Try Tavily first if API key is available
         if self.use_tavily:
             try:
-                return self._search_with_tavily(query, limit, extract_content)
             except Exception as e:
-                logger.warning(f"Tavily search failed, falling back to Wikipedia: {e}")
         # Fallback to Wikipedia search
-        return self._search_with_wikipedia(query, limit)
     def _search_with_tavily(self, query: str, limit: int = 5, extract_content: bool = False) -> Dict[str, Any]:
         """
-        Search using Tavily Search API - much more reliable than DuckDuckGo
         """
         try:
             logger.info(f"🔍 Tavily search for: {query}")
@@ -129,7 +342,7 @@ class WebSearchTool(BaseTool):
                 "include_answer": False,
                 "include_images": False,
                 "include_raw_content": extract_content,
-                "max_results": min(limit, 10)  # Tavily supports up to 10 results
             }
             # Make API request
@@ -167,43 +380,41 @@ class WebSearchTool(BaseTool):
                     "search_engine": "tavily"
                 }
             else:
-                logger.warning("Tavily returned no results, trying Wikipedia fallback")
-                return self._search_with_wikipedia(query, limit)
         except requests.exceptions.RequestException as e:
             logger.error(f"Tavily API request failed: {e}")
-            # Fall back to Wikipedia
-            return self._search_with_wikipedia(query, limit)
         except Exception as e:
             logger.error(f"Tavily search error: {e}")
-            # Fall back to Wikipedia
             return self._search_with_wikipedia(query, limit)
     def _search_with_wikipedia(self, query: str, limit: int = 5) -> Dict[str, Any]:
         """
-        Search using Wikipedia as fallback - very reliable and no rate limits
         """
         try:
             logger.info(f"📚 Wikipedia search for: {query}")
-            # Try to import wikipedia library
-            try:
-                import wikipedia
-            except ImportError:
-                return {
-                    "query": query,
-                    "found": False,
-                    "message": "❌ No search engines available. Install 'wikipedia' package or configure Tavily API key.",
-                    "results": []
-                }
-            wikipedia.set_lang("en")
-            # Clean up query for Wikipedia search
-            search_terms = query.replace("site:", "").strip()
             # Search Wikipedia pages
-            wiki_results = wikipedia.search(search_terms, results=min(limit * 2, 10))
             if not wiki_results:
                 return {
@@ -222,7 +433,7 @@ class WebSearchTool(BaseTool):
                     break
                 try:
-                    page = wikipedia.page(page_title)
                     summary = page.summary[:300] + "..." if len(page.summary) > 300 else page.summary
                     web_result = WebSearchResult(
@@ -234,11 +445,11 @@ class WebSearchTool(BaseTool):
                     results.append(web_result.to_dict())
                     processed += 1
-                except wikipedia.exceptions.DisambiguationError as e:
                     # Try the first suggestion from disambiguation
                     try:
                         if e.options:
-                            page = wikipedia.page(e.options[0])
                             summary = page.summary[:300] + "..." if len(page.summary) > 300 else page.summary
                             web_result = WebSearchResult(
@@ -252,7 +463,7 @@ class WebSearchTool(BaseTool):
                     except:
                         continue
-                except wikipedia.exceptions.PageError:
                     # Page doesn't exist, skip
                     continue
                 except Exception as e:
@@ -284,7 +495,7 @@ class WebSearchTool(BaseTool):
             return {
                 "query": query,
                 "found": False,
-                "message": f"Search failed: {str(e)}",
                 "results": [],
                 "error_type": "search_failure"
             }
@@ -397,57 +608,6 @@ class WebSearchTool(BaseTool):
         combined_content = re.sub(r' +', ' ', combined_content)  # Multiple spaces
         return combined_content.strip()[:5000]  # Limit to 5000 characters
-    def search_youtube_metadata(self, query: str) -> Dict[str, Any]:
-        """
-        Specialized search for YouTube video information
-        """
-        try:
-            # Search specifically for YouTube videos
-            youtube_query = f"site:youtube.com {query}"
-            # Use the same search logic but filter for YouTube results
-            search_result = self._search_web(youtube_query, limit=3)
-            if not search_result.get('found'):
-                return search_result
-            youtube_results = []
-            for result in search_result.get('results', []):
-                if 'youtube.com/watch' in result.get('url', ''):
-                    video_id = self._extract_youtube_id(result['url'])
-                    youtube_result = {
-                        "title": result.get('title', 'No title'),
-                        "url": result.get('url', ''),
-                        "description": result.get('snippet', 'No description'),
-                        "video_id": video_id
-                    }
-                    youtube_results.append(youtube_result)
-            return {
-                "query": query,
-                "found": len(youtube_results) > 0,
-                "results": youtube_results,
-                "message": f"Found {len(youtube_results)} YouTube videos"
-            }
-        except Exception as e:
-            raise Exception(f"YouTube search failed: {str(e)}")
-    def _extract_youtube_id(self, url: str) -> str:
-        """Extract YouTube video ID from URL"""
-        patterns = [
-            r'(?:v=|\/)([0-9A-Za-z_-]{11}).*',
-            r'(?:embed\/)([0-9A-Za-z_-]{11})',
-            r'(?:youtu\.be\/)([0-9A-Za-z_-]{11})'
-        ]
-        for pattern in patterns:
-            match = re.search(pattern, url)
-            if match:
-                return match.group(1)
-        return ""
 def test_web_search_tool():
     """Test the web search tool with various queries"""
@@ -456,10 +616,10 @@ def test_web_search_tool():
     # Test cases
     test_cases = [
         "Python programming tutorial",
-        "https://en.wikipedia.org/wiki/Machine_learning",
-        {"query": "artificial intelligence news", "action": "search", "limit": 3},
-        {"query": "https://www.python.org", "action": "extract"},
-        {"query": "OpenAI ChatGPT", "action": "search", "limit": 2, "extract_content": True}
     ]
     print("🧪 Testing Web Search Tool...")

 #!/usr/bin/env python3
 """
 Web Search Tool for GAIA Agent System
+Handles web searches using DuckDuckGo (primary), Tavily API (secondary), and Wikipedia (fallback)
 """
 import re
 class WebSearchTool(BaseTool):
     """
+    Web search tool using DuckDuckGo (primary), Tavily API (secondary), and Wikipedia (fallback)
+    Provides multiple search engine options for reliability
     """
     def __init__(self):
         })
         self.session.timeout = 10
+        # Initialize search engines
         self.tavily_api_key = os.getenv("TAVILY_API_KEY")
         self.use_tavily = self.tavily_api_key is not None
+        # Try to import DuckDuckGo
+        try:
+            from duckduckgo_search import DDGS
+            self.ddgs = DDGS()
+            self.use_duckduckgo = True
+            logger.info("✅ DuckDuckGo search initialized")
+        except ImportError:
+            logger.warning("⚠️ DuckDuckGo search not available - install duckduckgo-search package")
+            self.use_duckduckgo = False
+        # Try to import Wikipedia
+        try:
+            import wikipedia
+            self.wikipedia = wikipedia
+            self.use_wikipedia = True
+            logger.info("✅ Wikipedia search initialized")
+        except ImportError:
+            logger.warning("⚠️ Wikipedia search not available - install wikipedia package")
+            self.use_wikipedia = False
         if self.use_tavily:
+            logger.info("✅ Tavily API key found - using as secondary search")
+        # Search engine priority: DuckDuckGo -> Tavily -> Wikipedia
+        search_engines = []
+        if self.use_duckduckgo:
+            search_engines.append("DuckDuckGo")
+        if self.use_tavily:
+            search_engines.append("Tavily")
+        if self.use_wikipedia:
+            search_engines.append("Wikipedia")
+        logger.info(f"🔍 Available search engines: {', '.join(search_engines)}")
     def _execute_impl(self, input_data: Any, **kwargs) -> Dict[str, Any]:
         """
         """Check if text is a URL"""
         return bool(re.match(r'https?://', text))
+    def _extract_search_terms(self, query: str, max_length: int = 250) -> str:
+        """
+        Extract key search terms from a potentially long query
+        """
+        # If query is short enough, use as-is
+        if len(query) <= max_length:
+            return query
+        # Remove common stop words and extract key terms
+        stop_words = {
+            'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by',
+            'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'do', 'does', 'did',
+            'will', 'would', 'could', 'should', 'may', 'might', 'must', 'can', 'this', 'that', 'these', 'those',
+            'i', 'you', 'he', 'she', 'it', 'we', 'they', 'me', 'him', 'her', 'us', 'them',
+            'what', 'where', 'when', 'why', 'how', 'which', 'who', 'whose', 'whom',
+            'please', 'could', 'you', 'tell', 'me', 'find', 'search', 'for', 'about'
+        }
+        # Split into words and filter
+        words = re.findall(r'\b\w+\b', query.lower())
+        key_words = [word for word in words if word not in stop_words and len(word) > 2]
+        # Keep important phrases and entities
+        # Look for quoted phrases, proper nouns, numbers, dates
+        important_patterns = [
+            r'"[^"]*"',  # Quoted phrases
+            r'\b[A-Z][a-z]*(?:\s+[A-Z][a-z]*)*\b',  # Proper nouns
+            r'\b\d{4}\b',  # Years
+            r'\b\d+\b',   # Numbers
+        ]
+        important_terms = []
+        for pattern in important_patterns:
+            matches = re.findall(pattern, query)
+            important_terms.extend(matches)
+        # Combine key words and important terms
+        search_terms = []
+        # Add important terms first (they're usually more specific)
+        for term in important_terms:
+            if len(' '.join(search_terms + [term])) <= max_length:
+                search_terms.append(term)
+        # Add key words until we hit the limit
+        for word in key_words:
+            potential_query = ' '.join(search_terms + [word])
+            if len(potential_query) <= max_length:
+                search_terms.append(word)
+            else:
+                break
+        result = ' '.join(search_terms)
+        # If still too long, truncate
+        if len(result) > max_length:
+            result = result[:max_length].rsplit(' ', 1)[0]
+        # If we ended up with nothing, use first part of original query
+        if not result.strip():
+            result = query[:max_length].rsplit(' ', 1)[0]
+        if result != query:
+            logger.info(f"📝 Extracted search terms: '{result}' from '{query[:100]}...'")
+        return result
     def _search_web(self, query: str, limit: int = 5, extract_content: bool = False) -> Dict[str, Any]:
         """
+        Search the web using available search engines in priority order
         """
+        # Extract search terms to avoid length issues
+        search_query = self._extract_search_terms(query, max_length=250)
+        # Try DuckDuckGo first (most comprehensive for general web search)
+        if self.use_duckduckgo:
+            try:
+                return self._search_with_duckduckgo(search_query, limit, extract_content)
+            except Exception as e:
+                logger.warning(f"DuckDuckGo search failed, trying Tavily: {e}")
+        # Try Tavily if DuckDuckGo fails and API key is available
         if self.use_tavily:
             try:
+                return self._search_with_tavily(search_query, limit, extract_content)
             except Exception as e:
+                logger.warning(f"Tavily search failed, trying Wikipedia: {e}")
         # Fallback to Wikipedia search
+        if self.use_wikipedia:
+            return self._search_with_wikipedia(search_query, limit)
+        # No search engines available
+        return {
+            "query": query,
+            "found": False,
+            "message": "❌ No search engines available. Please install required packages.",
+            "results": []
+        }
+    def _search_with_duckduckgo(self, query: str, limit: int = 5, extract_content: bool = False) -> Dict[str, Any]:
+        """
+        Search using DuckDuckGo - primary search engine
+        """
+        try:
+            logger.info(f"🦆 DuckDuckGo search for: {query}")
+            # Add retry logic for DuckDuckGo rate limiting
+            max_retries = 3
+            retry_delay = 2
+            for attempt in range(max_retries):
+                try:
+                    # Use DuckDuckGo text search
+                    ddg_results = list(self.ddgs.text(query, max_results=min(limit, 10)))
+                    if not ddg_results:
+                        if attempt < max_retries - 1:
+                            logger.warning(f"DuckDuckGo returned no results, retrying in {retry_delay}s...")
+                            time.sleep(retry_delay)
+                            retry_delay *= 2
+                            continue
+                        else:
+                            logger.warning("DuckDuckGo returned no results after retries")
+                            # Fall back to other search engines
+                            return self._search_with_fallback(query, limit)
+                    break
+                except Exception as e:
+                    if "rate limit" in str(e).lower() or "429" in str(e):
+                        if attempt < max_retries - 1:
+                            logger.warning(f"DuckDuckGo rate limited, retrying in {retry_delay}s...")
+                            time.sleep(retry_delay)
+                            retry_delay *= 2
+                            continue
+                        else:
+                            logger.warning("DuckDuckGo rate limited after retries, using fallback")
+                            return self._search_with_fallback(query, limit)
+                    else:
+                        raise
+            # Process DuckDuckGo results
+            results = []
+            for result in ddg_results:
+                web_result = WebSearchResult(
+                    title=result.get('title', 'No title'),
+                    url=result.get('href', ''),
+                    snippet=result.get('body', 'No description'),
+                    content=''  # DuckDuckGo doesn't provide full content
+                )
+                results.append(web_result.to_dict())
+            # Extract content if requested
+            if extract_content and results:
+                for result in results[:2]:  # Only extract from first 2 results to save time
+                    try:
+                        content_result = self._extract_content_from_url(result['url'])
+                        if content_result.get('found'):
+                            result['content'] = content_result.get('content', '')[:1000]
+                    except:
+                        pass  # Skip content extraction errors
+            logger.info(f"✅ DuckDuckGo found {len(results)} results")
+            return {
+                "query": query,
+                "found": True,
+                "results": results,
+                "total_results": len(results),
+                "message": f"Found {len(results)} results via DuckDuckGo",
+                "search_engine": "duckduckgo"
+            }
+        except Exception as e:
+            logger.error(f"DuckDuckGo search error: {e}")
+            # Fall back to other search engines
+            return self._search_with_fallback(query, limit)
+    def _search_with_fallback(self, query: str, limit: int) -> Dict[str, Any]:
+        """Try fallback search engines"""
+        # Try Tavily if available
+        if self.use_tavily:
+            try:
+                return self._search_with_tavily(query, limit, False)
+            except Exception as e:
+                logger.warning(f"Tavily fallback failed: {e}")
+        # Try Wikipedia as last resort
+        if self.use_wikipedia:
+            return self._search_with_wikipedia(query, limit)
+        return {
+            "query": query,
+            "found": False,
+            "message": "All search engines failed",
+            "results": []
+        }
     def _search_with_tavily(self, query: str, limit: int = 5, extract_content: bool = False) -> Dict[str, Any]:
         """
+        Search using Tavily Search API - secondary search engine
         """
         try:
             logger.info(f"🔍 Tavily search for: {query}")
                 "include_answer": False,
                 "include_images": False,
                 "include_raw_content": extract_content,
+                "max_results": min(limit, 10)
             }
             # Make API request
                     "search_engine": "tavily"
                 }
             else:
+                logger.warning("Tavily returned no results")
+                # Fall back to Wikipedia
+                if self.use_wikipedia:
+                    return self._search_with_wikipedia(query, limit)
         except requests.exceptions.RequestException as e:
             logger.error(f"Tavily API request failed: {e}")
         except Exception as e:
             logger.error(f"Tavily search error: {e}")
+        # Fall back to Wikipedia if Tavily fails
+        if self.use_wikipedia:
             return self._search_with_wikipedia(query, limit)
+        return {
+            "query": query,
+            "found": False,
+            "message": "Tavily search failed and no fallback available",
+            "results": []
+        }
     def _search_with_wikipedia(self, query: str, limit: int = 5) -> Dict[str, Any]:
         """
+        Search using Wikipedia - fallback search engine for factual information
         """
         try:
             logger.info(f"📚 Wikipedia search for: {query}")
+            self.wikipedia.set_lang("en")
+            # Clean up query for Wikipedia search and ensure it's not too long
+            search_terms = self._extract_search_terms(query, max_length=100)  # Wikipedia has stricter limits
             # Search Wikipedia pages
+            wiki_results = self.wikipedia.search(search_terms, results=min(limit * 2, 10))
             if not wiki_results:
                 return {
                     break
                 try:
+                    page = self.wikipedia.page(page_title)
                     summary = page.summary[:300] + "..." if len(page.summary) > 300 else page.summary
                     web_result = WebSearchResult(
                     results.append(web_result.to_dict())
                     processed += 1
+                except self.wikipedia.exceptions.DisambiguationError as e:
                     # Try the first suggestion from disambiguation
                     try:
                         if e.options:
+                            page = self.wikipedia.page(e.options[0])
                             summary = page.summary[:300] + "..." if len(page.summary) > 300 else page.summary
                             web_result = WebSearchResult(
                     except:
                         continue
+                except self.wikipedia.exceptions.PageError:
                     # Page doesn't exist, skip
                     continue
                 except Exception as e:
             return {
                 "query": query,
                 "found": False,
+                "message": f"Wikipedia search failed: {str(e)}",
                 "results": [],
                 "error_type": "search_failure"
             }
         combined_content = re.sub(r' +', ' ', combined_content)  # Multiple spaces
         return combined_content.strip()[:5000]  # Limit to 5000 characters
 def test_web_search_tool():
     """Test the web search tool with various queries"""
     # Test cases
     test_cases = [
         "Python programming tutorial",
+        "Mercedes Sosa studio albums 2000 2009",
+        "artificial intelligence recent developments",
+        "climate change latest research",
+        "https://en.wikipedia.org/wiki/Machine_learning"
     ]
     print("🧪 Testing Web Search Tool...")