Agent_Course_Final_Assignment

Sleeping

App Files Files Community

Chris commited on May 29, 2025

Commit

6c60f72

1 Parent(s): 5ec1e1b

Final 7.1.3

Browse files

Files changed (6) hide show

src/agents/__pycache__/web_researcher.cpython-310.pyc +0 -0
src/agents/web_researcher.py +163 -113
src/tools/__pycache__/final_answer_tool.cpython-310.pyc +0 -0
src/tools/__pycache__/web_search_tool.cpython-310.pyc +0 -0
src/tools/final_answer_tool.py +60 -8
src/tools/web_search_tool.py +100 -70

src/agents/__pycache__/web_researcher.cpython-310.pyc CHANGED Viewed

Binary files a/src/agents/__pycache__/web_researcher.cpython-310.pyc and b/src/agents/__pycache__/web_researcher.cpython-310.pyc differ

src/agents/web_researcher.py CHANGED Viewed

@@ -413,90 +413,114 @@ class WebResearchAgent:
         return ' '.join(topic_words[:3]) if topic_words else "topic"
-    def _extract_search_terms(self, question: str) -> str:
-        """Extract focused search terms from question to avoid length limits"""
-        # Handle different question types more intelligently
-        question_lower = question.lower()
-        # For questions about specific people, places, things - extract key entities
-        # Look for quoted phrases first (highest priority)
-        quoted_terms = re.findall(r'"([^"]+)"', question)
-        if quoted_terms:
-            # Use the first quoted phrase as it's usually the most important
-            main_term = quoted_terms[0]
-            # Add year if present
-            years = re.findall(r'\b(19|20)\d{2}\b', question)
-            if years:
-                return f"{main_term} {years[0]}"
-            return main_term
-        # Extract proper nouns and key entities
-        # Look for capitalized words (likely proper nouns)
-        proper_nouns = re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b', question)
-        # Extract years and numbers (often important)
         years = re.findall(r'\b(19|20)\d{2}\b', question)
-        numbers = re.findall(r'\b\d+\b', question)
-        # Remove very common stop words and question patterns
-        stop_patterns = [
-            r'\b(?:what|who|when|where|why|how|is|are|was|were|do|does|did|can|could|would|should|will)\b',
-            r'\b(?:the|a|an|and|or|but|in|on|at|to|for|of|with|by|from|about)\b',
-            r'\b(?:please|could|you|tell|me|find|search|for|give|provide|list|show)\b',
-            r'\b(?:information|details|data|facts|answer)\b',
-            r'[?.,!]+',  # Punctuation
-        ]
-        # Clean the question
-        clean_question = question
-        for pattern in stop_patterns:
-            clean_question = re.sub(pattern, ' ', clean_question, flags=re.IGNORECASE)
-        # Extract remaining meaningful words
-        words = clean_question.split()
-        meaningful_words = []
-        for word in words:
-            word = word.strip()
-            if len(word) > 2 and word.isalpha():  # Only alphabetic words longer than 2 chars
-                meaningful_words.append(word)
-        # Build search terms prioritizing important elements
         search_terms = []
-        # Add proper nouns first (most specific)
-        for noun in proper_nouns[:2]:  # Max 2 proper nouns
-            if len(' '.join(search_terms + [noun])) <= 100:  # Conservative length limit
-                search_terms.append(noun)
-        # Add years/numbers
-        for year in years[:1]:  # Max 1 year
-            if len(' '.join(search_terms + [year])) <= 100:
-                search_terms.append(year)
-        # Add meaningful words until we reach a reasonable length
-        for word in meaningful_words[:5]:  # Max 5 additional words
-            potential_query = ' '.join(search_terms + [word])
-            if len(potential_query) <= 100:  # Keep well under 250 char limit
                 search_terms.append(word)
-            else:
-                break
-        # Fallback if nothing found
-        if not search_terms:
-            # Take first few words of the original question
-            first_words = question.split()[:5]  # First 5 words max
-            search_terms = [w for w in first_words if w.isalpha() and len(w) > 2]
-        result = ' '.join(search_terms)
-        # Final length check and truncation
-        if len(result) > 100:
-            result = result[:100].rsplit(' ', 1)[0]
-        logger.info(f"📝 Extracted search terms: '{result}' from question: '{question[:50]}...'")
-        return result
     def _extract_youtube_info(self, question: str) -> str:
         """Extract YouTube URL or search terms"""
@@ -578,53 +602,79 @@ class WebResearchAgent:
     def _analyze_web_search_result(self, state: GAIAAgentState, web_result: ToolResult) -> AgentResult:
         """Analyze web search results"""
-        search_results = web_result.result['results']
-        # Combine top results for analysis
-        combined_content = []
-        for i, result in enumerate(search_results[:3], 1):
-            combined_content.append(f"Result {i}: {result['title']}")
-            combined_content.append(f"URL: {result['url']}")
-            combined_content.append(f"Description: {result['snippet']}")
-            combined_content.append("")
-        analysis_prompt = f"""
-        Based on these web search results, please answer the following question:
-        Question: {state.question}
-        Search Results:
-        {chr(10).join(combined_content)}
-        Please provide a direct answer based on the most relevant information.
-        """
-        model_tier = ModelTier.MAIN
-        llm_result = self.llm_client.generate(analysis_prompt, tier=model_tier, max_tokens=400)
-        if llm_result.success:
-            return AgentResult(
-                agent_role=AgentRole.WEB_RESEARCHER,
-                success=True,
-                result=llm_result.response,
-                confidence=0.75,
-                reasoning=f"Analyzed {len(search_results)} web search results",
-                tools_used=[web_result],
-                model_used=llm_result.model_used,
-                processing_time=web_result.execution_time + llm_result.response_time,
-                cost_estimate=llm_result.cost_estimate
-            )
         else:
-            # Fallback to first result description
-            first_result = search_results[0] if search_results else {}
             return AgentResult(
                 agent_role=AgentRole.WEB_RESEARCHER,
-                success=True,
-                result=first_result.get('snippet', 'Web search completed'),
-                confidence=0.50,
-                reasoning="Web search completed but analysis failed",
                 tools_used=[web_result],
-                model_used="fallback",
                 processing_time=web_result.execution_time,
                 cost_estimate=0.0
             )

         return ' '.join(topic_words[:3]) if topic_words else "topic"
+    def _extract_search_terms(self, question: str, max_length: int = 100) -> str:
+        """
+        Extract optimized search terms from question
+        Prioritizes important terms while staying under length limits
+        """
+        # Clean the question first
+        clean_question = re.sub(r'[^\w\s\-]', ' ', question.lower())
+        words = clean_question.split()
+        # Remove common stop words but keep question words
+        stop_words = {
+            'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being',
+            'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could',
+            'should', 'may', 'might', 'must', 'shall', 'can', 'to', 'of', 'in',
+            'on', 'at', 'by', 'for', 'with', 'from', 'as', 'but', 'or', 'and',
+            'if', 'then', 'than', 'this', 'that', 'these', 'those', 'i', 'you',
+            'he', 'she', 'it', 'we', 'they', 'me', 'him', 'her', 'us', 'them'
+        }
+        # Keep important question words
+        question_words = {'who', 'what', 'when', 'where', 'why', 'how', 'which'}
+        # Priority terms (always include if present)
+        priority_terms = []
+        # Extract quoted phrases first
+        quoted_phrases = re.findall(r'"([^"]*)"', question)
+        for phrase in quoted_phrases:
+            if len(phrase.strip()) > 0:
+                priority_terms.append(phrase.strip())
+        # Extract proper nouns (capitalized words)
+        proper_nouns = []
+        for word in question.split():
+            clean_word = re.sub(r'[^\w]', '', word)
+            if clean_word and clean_word[0].isupper() and len(clean_word) > 1:
+                proper_nouns.append(clean_word)
+        # Extract years (4-digit numbers)
         years = re.findall(r'\b(19|20)\d{2}\b', question)
+        # Extract other important numbers (but not random ones)
+        important_numbers = re.findall(r'\b\d{1,4}\b', question)
+        # Filter out years and common numbers from important numbers to avoid duplication
+        common_numbers = {'19', '20', '1', '2', '3', '4', '5', '10'}  # Filter out very common numbers
+        important_numbers = [num for num in important_numbers if num not in years and num not in common_numbers]
+        # Build search terms with priority
         search_terms = []
+        # Add quoted phrases (highest priority)
+        search_terms.extend(priority_terms)
+        # Add proper nouns (high priority)
+        search_terms.extend(proper_nouns[:5])  # Limit to avoid duplication
+        # Add question words if present
+        for word in words:
+            if word in question_words and word not in search_terms:
                 search_terms.append(word)
+        # Add years
+        search_terms.extend(years[:2])  # Limit to 2 years max
+        # Add other important terms
+        for word in words:
+            if (word not in stop_words and
+                word not in search_terms and
+                len(word) > 2 and
+                not word.isdigit()):  # Avoid random numbers
+                search_terms.append(word)
+                # Stop if we have enough terms
+                if len(' '.join(search_terms)) > max_length - 20:
+                    break
+        # Add a few important numbers if space allows
+        if len(' '.join(search_terms)) < max_length - 10:
+            search_terms.extend(important_numbers[:2])
+        # Join and clean up
+        search_query = ' '.join(search_terms)
+        # Remove duplicates while preserving order
+        seen = set()
+        unique_terms = []
+        for term in search_terms:
+            if term.lower() not in seen:
+                seen.add(term.lower())
+                unique_terms.append(term)
+        # Final cleanup and length check
+        final_query = ' '.join(unique_terms)
+        if len(final_query) > max_length:
+            # Truncate to fit
+            truncated_terms = []
+            current_length = 0
+            for term in unique_terms:
+                if current_length + len(term) + 1 <= max_length:
+                    truncated_terms.append(term)
+                    current_length += len(term) + 1
+                else:
+                    break
+            final_query = ' '.join(truncated_terms)
+        logger.info(f"📝 Optimized search terms: '{final_query}' from question: '{question[:50]}...'")
+        return final_query
     def _extract_youtube_info(self, question: str) -> str:
         """Extract YouTube URL or search terms"""
     def _analyze_web_search_result(self, state: GAIAAgentState, web_result: ToolResult) -> AgentResult:
         """Analyze web search results"""
+        search_data = web_result.result
+        # Handle new search result format
+        if search_data.get('success') and search_data.get('results'):
+            search_results = search_data['results']
+            # Convert WebSearchResult objects to dictionaries if needed
+            if search_results and hasattr(search_results[0], 'to_dict'):
+                search_results = [r.to_dict() for r in search_results]
+            # Combine top results for analysis
+            combined_content = []
+            for i, result in enumerate(search_results[:3], 1):
+                combined_content.append(f"Result {i}: {result.get('title', 'No title')}")
+                combined_content.append(f"URL: {result.get('url', 'No URL')}")
+                combined_content.append(f"Description: {result.get('snippet', result.get('content', 'No description'))[:200]}")
+                combined_content.append(f"Source: {result.get('source', 'Unknown')}")
+                combined_content.append("")
+            analysis_prompt = f"""
+            Based on these web search results, please answer the following question:
+            Question: {state.question}
+            Search Query: {search_data.get('query', 'N/A')}
+            Search Engine: {search_data.get('source', 'Unknown')}
+            Results Found: {search_data.get('count', len(search_results))}
+            Search Results:
+            {chr(10).join(combined_content)}
+            Please provide a direct answer based on the most relevant information.
+            """
+            model_tier = ModelTier.COMPLEX  # Use 72B model for better analysis
+            llm_result = self.llm_client.generate(analysis_prompt, tier=model_tier, max_tokens=400)
+            if llm_result.success:
+                return AgentResult(
+                    agent_role=AgentRole.WEB_RESEARCHER,
+                    success=True,
+                    result=llm_result.response,
+                    confidence=0.80,  # Higher confidence with better model
+                    reasoning=f"Analyzed {len(search_results)} web search results using {search_data.get('source', 'search engine')}",
+                    tools_used=[web_result],
+                    model_used=llm_result.model_used,
+                    processing_time=web_result.execution_time + llm_result.response_time,
+                    cost_estimate=llm_result.cost_estimate
+                )
+            else:
+                # Fallback to first result description
+                first_result = search_results[0] if search_results else {}
+                return AgentResult(
+                    agent_role=AgentRole.WEB_RESEARCHER,
+                    success=True,
+                    result=first_result.get('snippet', first_result.get('content', 'Web search completed')),
+                    confidence=0.50,
+                    reasoning="Web search completed but analysis failed",
+                    tools_used=[web_result],
+                    model_used="fallback",
+                    processing_time=web_result.execution_time,
+                    cost_estimate=0.0
+                )
         else:
+            # Handle search failure or empty results
             return AgentResult(
                 agent_role=AgentRole.WEB_RESEARCHER,
+                success=False,
+                result="Web search returned no useful results",
+                confidence=0.20,
+                reasoning=f"Search failed or empty: {search_data.get('note', 'Unknown reason')}",
                 tools_used=[web_result],
+                model_used="none",
                 processing_time=web_result.execution_time,
                 cost_estimate=0.0
             )

src/tools/__pycache__/final_answer_tool.cpython-310.pyc CHANGED Viewed

Binary files a/src/tools/__pycache__/final_answer_tool.cpython-310.pyc and b/src/tools/__pycache__/final_answer_tool.cpython-310.pyc differ

src/tools/__pycache__/web_search_tool.cpython-310.pyc CHANGED Viewed

Binary files a/src/tools/__pycache__/web_search_tool.cpython-310.pyc and b/src/tools/__pycache__/web_search_tool.cpython-310.pyc differ

src/tools/final_answer_tool.py CHANGED Viewed

@@ -93,7 +93,7 @@ EXTRACTION RULES:
 """
         # Add type-specific rules
-        if "mathematical" in question_type.lower() or any(word in question.lower() for word in ["how many", "count", "number", "calculate"]):
             base_prompt += """
 - If asking for a count/number: respond with ONLY the number (e.g., "5", "23", "0")
 - If asking for calculation: respond with ONLY the result (e.g., "42", "3.14", "100")
@@ -155,6 +155,9 @@ Extract the precise answer NOW:"""
             "result:",
             "response:",
             "conclusion:",
         ]
         for prefix in prefixes_to_remove:
@@ -167,18 +170,67 @@ Extract the precise answer NOW:"""
         if answer.startswith("'") and answer.endswith("'"):
             answer = answer[1:-1]
-        # Handle specific formatting based on question type
-        if "mathematical" in question_type.lower():
-            # Extract just the number for mathematical questions
-            number_match = re.search(r'-?\d+(?:\.\d+)?', answer)
-            if number_match:
-                answer = number_match.group()
-        elif "text_manipulation" in question_type.lower():
             # For reversed text questions, ensure clean output
             if len(answer.split()) == 1:  # Single word answer
                 answer = answer.lower()
         # Remove any trailing punctuation that's not part of the answer
         answer = answer.rstrip('.,!?;:')

 """
         # Add type-specific rules
+        if "mathematical" in question_type.lower() or any(word in question.lower() for word in ["how many", "count", "number", "albums"]):
             base_prompt += """
 - If asking for a count/number: respond with ONLY the number (e.g., "5", "23", "0")
 - If asking for calculation: respond with ONLY the result (e.g., "42", "3.14", "100")
             "result:",
             "response:",
             "conclusion:",
+            "based on",
+            "according to",
+            "from the",
         ]
         for prefix in prefixes_to_remove:
         if answer.startswith("'") and answer.endswith("'"):
             answer = answer[1:-1]
+        # AGGRESSIVE LENGTH ENFORCEMENT FOR GAIA
+        # If answer is too long, extract the core information
+        if len(answer) > 50:
+            # For different question types, extract differently
+            if "mathematical" in question_type.lower() or any(word in question.lower() for word in ["how many", "count", "number", "albums"]):
+                # Extract just the number for mathematical questions
+                number_match = re.search(r'-?\d+(?:\.\d+)?', answer)
+                if number_match:
+                    answer = number_match.group()
+            elif "name" in question_type.lower() or any(word in question.lower() for word in ["who", "name"]):
+                # Extract just the name (first few words)
+                words = answer.split()
+                if len(words) > 3:
+                    answer = ' '.join(words[:3])  # Keep only first 3 words for names
+            elif "location" in question_type.lower() or any(word in question.lower() for word in ["where", "city", "country"]):
+                # Extract just the location name
+                words = answer.split()
+                if len(words) > 2:
+                    answer = ' '.join(words[:2])  # Keep only first 2 words for locations
+            elif "yes_no" in question_type.lower() or any(word in answer.lower() for word in ["yes", "no", "true", "false"]):
+                # Extract yes/no/true/false
+                if any(word in answer.lower() for word in ["yes", "no", "true", "false"]):
+                    for word in answer.lower().split():
+                        if word in ["yes", "no", "true", "false"]:
+                            answer = word
+                            break
+            else:
+                # For other types, take first sentence or clause
+                sentences = re.split(r'[.!?]', answer)
+                if sentences:
+                    answer = sentences[0].strip()
+                    # If still too long, take first clause
+                    if len(answer) > 30:
+                        clauses = re.split(r'[,;:]', answer)
+                        if clauses:
+                            answer = clauses[0].strip()
+        # Handle specific formatting based on question type
+        if "text_manipulation" in question_type.lower():
             # For reversed text questions, ensure clean output
             if len(answer.split()) == 1:  # Single word answer
                 answer = answer.lower()
+        # Final aggressive truncation if still too long
+        if len(answer) > 40:
+            # Split into words and take as many as fit
+            words = answer.split()
+            truncated_words = []
+            current_length = 0
+            for word in words:
+                if current_length + len(word) + 1 <= 40:
+                    truncated_words.append(word)
+                    current_length += len(word) + 1
+                else:
+                    break
+            if truncated_words:
+                answer = ' '.join(truncated_words)
+            else:
+                # Last resort - take first 40 characters
+                answer = answer[:40].strip()
         # Remove any trailing punctuation that's not part of the answer
         answer = answer.rstrip('.,!?;:')

src/tools/web_search_tool.py CHANGED Viewed

@@ -20,18 +20,20 @@ logger = logging.getLogger(__name__)
 class WebSearchResult:
     """Container for web search results"""
-    def __init__(self, title: str, url: str, snippet: str, content: str = ""):
         self.title = title
         self.url = url
         self.snippet = snippet
         self.content = content
     def to_dict(self) -> Dict[str, str]:
         return {
             "title": self.title,
             "url": self.url,
             "snippet": self.snippet,
-            "content": self.content[:1500] + "..." if len(self.content) > 1500 else self.content
         }
 class WebSearchTool(BaseTool):
@@ -246,53 +248,78 @@ class WebSearchTool(BaseTool):
                     title=result.get('title', 'No title'),
                     url=result.get('href', ''),
                     snippet=result.get('body', 'No description'),
-                    content=''  # DuckDuckGo doesn't provide full content
                 )
-                results.append(web_result.to_dict())
-            # Extract content if requested
-            if extract_content and results:
-                for result in results[:2]:  # Only extract from first 2 results to save time
-                    try:
-                        content_result = self._extract_content_from_url(result['url'])
-                        if content_result.get('found'):
-                            result['content'] = content_result.get('content', '')[:1000]
-                    except:
-                        pass  # Skip content extraction errors
             logger.info(f"✅ DuckDuckGo found {len(results)} results")
             return {
-                "query": query,
-                "found": True,
-                "results": results,
-                "total_results": len(results),
-                "message": f"Found {len(results)} results via DuckDuckGo",
-                "search_engine": "duckduckgo"
             }
         except Exception as e:
-            logger.warning(f"DuckDuckGo search failed: {str(e)[:100]}")
-            # Fall back to other search engines immediately
             return self._search_with_fallback(query, limit)
-    def _search_with_fallback(self, query: str, limit: int) -> Dict[str, Any]:
-        """Try fallback search engines"""
-        # Try Tavily if available
-        if self.use_tavily:
             try:
-                return self._search_with_tavily(query, limit, False)
             except Exception as e:
-                logger.warning(f"Tavily fallback failed: {e}")
-        # Try Wikipedia as last resort
-        if self.use_wikipedia:
-            return self._search_with_wikipedia(query, limit)
         return {
-            "query": query,
-            "found": False,
-            "message": "All search engines failed",
-            "results": []
         }
     def _search_with_tavily(self, query: str, limit: int = 5, extract_content: bool = False) -> Dict[str, Any]:
@@ -339,17 +366,16 @@ class WebSearchTool(BaseTool):
                     snippet=result.get('content', 'No description'),
                     content=result.get('raw_content', '') if extract_content else ''
                 )
-                results.append(web_result.to_dict())
             if results:
                 logger.info(f"✅ Tavily found {len(results)} results")
                 return {
-                    "query": query,
-                    "found": True,
-                    "results": results,
-                    "total_results": len(results),
-                    "message": f"Found {len(results)} results via Tavily Search API",
-                    "search_engine": "tavily"
                 }
             else:
                 logger.warning("Tavily returned no results")
@@ -367,10 +393,12 @@ class WebSearchTool(BaseTool):
             return self._search_with_wikipedia(query, limit)
         return {
-            "query": query,
-            "found": False,
-            "message": "Tavily search failed and no fallback available",
-            "results": []
         }
     def _search_with_wikipedia(self, query: str, limit: int = 5) -> Dict[str, Any]:
@@ -390,11 +418,12 @@ class WebSearchTool(BaseTool):
             if not wiki_results:
                 return {
-                    "query": query,
-                    "found": False,
-                    "message": "No Wikipedia articles found for this query",
-                    "results": [],
-                    "search_engine": "wikipedia"
                 }
             results = []
@@ -414,7 +443,7 @@ class WebSearchTool(BaseTool):
                         snippet=summary,
                         content=page.summary[:1000] + "..." if len(page.summary) > 1000 else page.summary
                     )
-                    results.append(web_result.to_dict())
                     processed += 1
                 except self.wikipedia.exceptions.DisambiguationError as e:
@@ -430,7 +459,7 @@ class WebSearchTool(BaseTool):
                                 snippet=summary,
                                 content=page.summary[:1000] + "..." if len(page.summary) > 1000 else page.summary
                             )
-                            results.append(web_result.to_dict())
                             processed += 1
                     except:
                         continue
@@ -446,30 +475,31 @@ class WebSearchTool(BaseTool):
             if results:
                 logger.info(f"✅ Wikipedia found {len(results)} results")
                 return {
-                    "query": query,
-                    "found": True,
-                    "results": results,
-                    "total_results": len(results),
-                    "message": f"Found {len(results)} Wikipedia articles",
-                    "search_engine": "wikipedia"
                 }
             else:
                 return {
-                    "query": query,
-                    "found": False,
-                    "message": "No accessible Wikipedia articles found for this query",
-                    "results": [],
-                    "search_engine": "wikipedia"
                 }
         except Exception as e:
             logger.error(f"Wikipedia search failed: {e}")
             return {
-                "query": query,
-                "found": False,
-                "message": f"Wikipedia search failed: {str(e)}",
-                "results": [],
-                "error_type": "search_failure"
             }
     def _extract_content_from_url(self, url: str) -> Dict[str, Any]:
@@ -603,7 +633,7 @@ def test_web_search_tool():
             if result.success:
                 print(f"✅ Success: {result.result.get('message', 'No message')}")
-                search_engine = result.result.get('search_engine', 'unknown')
                 print(f"   Search engine: {search_engine}")
                 if result.result.get('found'):

 class WebSearchResult:
     """Container for web search results"""
+    def __init__(self, title: str, url: str, snippet: str, content: str = "", source: str = ""):
         self.title = title
         self.url = url
         self.snippet = snippet
         self.content = content
+        self.source = source
     def to_dict(self) -> Dict[str, str]:
         return {
             "title": self.title,
             "url": self.url,
             "snippet": self.snippet,
+            "content": self.content[:1500] + "..." if len(self.content) > 1500 else self.content,
+            "source": self.source
         }
 class WebSearchTool(BaseTool):
                     title=result.get('title', 'No title'),
                     url=result.get('href', ''),
                     snippet=result.get('body', 'No description'),
+                    source='DuckDuckGo'
                 )
+                results.append(web_result)
             logger.info(f"✅ DuckDuckGo found {len(results)} results")
             return {
+                'success': True,
+                'results': results,
+                'source': 'DuckDuckGo',
+                'query': query,
+                'count': len(results)
             }
         except Exception as e:
+            logger.warning(f"DuckDuckGo search failed: {str(e)}")
+            # Don't log the full exception details to avoid spam
             return self._search_with_fallback(query, limit)
+    def _search_with_fallback(self, query: str, limit: int = 5) -> Dict[str, Any]:
+        """Enhanced fallback search when DuckDuckGo fails"""
+        logger.info(f"🔄 Using fallback search engines for: {query}")
+        # Try Tavily API first if available
+        if hasattr(self, 'tavily') and self.tavily:
             try:
+                logger.info("📡 Trying Tavily API search")
+                tavily_result = self.tavily.search(query, max_results=limit)
+                if tavily_result and 'results' in tavily_result:
+                    results = []
+                    for result in tavily_result['results'][:limit]:
+                        web_result = WebSearchResult(
+                            title=result.get('title', 'No title'),
+                            url=result.get('url', ''),
+                            snippet=result.get('content', 'No description'),
+                            source='Tavily'
+                        )
+                        results.append(web_result)
+                    if results:
+                        logger.info(f"✅ Tavily found {len(results)} results")
+                        return {
+                            'success': True,
+                            'results': results,
+                            'source': 'Tavily',
+                            'query': query,
+                            'count': len(results)
+                        }
             except Exception as e:
+                logger.warning(f"Tavily search failed: {str(e)}")
+        # Fall back to Wikipedia search
+        logger.info("📚 Wikipedia search for: " + query)
+        try:
+            wiki_results = self._search_wikipedia(query, limit)
+            if wiki_results and wiki_results.get('success'):
+                logger.info(f"✅ Wikipedia found {wiki_results.get('count', 0)} results")
+                return wiki_results
+        except Exception as e:
+            logger.warning(f"Wikipedia fallback failed: {str(e)}")
+        # Final fallback - return empty but successful result to allow processing to continue
+        logger.warning("All search engines failed, returning empty results")
         return {
+            'success': True,
+            'results': [],
+            'source': 'none',
+            'query': query,
+            'count': 0,
+            'note': 'All search engines failed'
         }
     def _search_with_tavily(self, query: str, limit: int = 5, extract_content: bool = False) -> Dict[str, Any]:
                     snippet=result.get('content', 'No description'),
                     content=result.get('raw_content', '') if extract_content else ''
                 )
+                results.append(web_result)
             if results:
                 logger.info(f"✅ Tavily found {len(results)} results")
                 return {
+                    'success': True,
+                    'results': results,
+                    'source': 'Tavily',
+                    'query': query,
+                    'count': len(results)
                 }
             else:
                 logger.warning("Tavily returned no results")
             return self._search_with_wikipedia(query, limit)
         return {
+            'success': False,
+            'results': [],
+            'source': 'Tavily',
+            'query': query,
+            'count': 0,
+            'note': 'Tavily search failed and no fallback available'
         }
     def _search_with_wikipedia(self, query: str, limit: int = 5) -> Dict[str, Any]:
             if not wiki_results:
                 return {
+                    'success': False,
+                    'results': [],
+                    'source': 'Wikipedia',
+                    'query': query,
+                    'count': 0,
+                    'note': 'No Wikipedia articles found for this query'
                 }
             results = []
                         snippet=summary,
                         content=page.summary[:1000] + "..." if len(page.summary) > 1000 else page.summary
                     )
+                    results.append(web_result)
                     processed += 1
                 except self.wikipedia.exceptions.DisambiguationError as e:
                                 snippet=summary,
                                 content=page.summary[:1000] + "..." if len(page.summary) > 1000 else page.summary
                             )
+                            results.append(web_result)
                             processed += 1
                     except:
                         continue
             if results:
                 logger.info(f"✅ Wikipedia found {len(results)} results")
                 return {
+                    'success': True,
+                    'results': results,
+                    'source': 'Wikipedia',
+                    'query': query,
+                    'count': len(results)
                 }
             else:
                 return {
+                    'success': False,
+                    'results': [],
+                    'source': 'Wikipedia',
+                    'query': query,
+                    'count': 0,
+                    'note': 'No accessible Wikipedia articles found for this query'
                 }
         except Exception as e:
             logger.error(f"Wikipedia search failed: {e}")
             return {
+                'success': False,
+                'results': [],
+                'source': 'Wikipedia',
+                'query': query,
+                'count': 0,
+                'note': f"Wikipedia search failed: {str(e)}"
             }
     def _extract_content_from_url(self, url: str) -> Dict[str, Any]:
             if result.success:
                 print(f"✅ Success: {result.result.get('message', 'No message')}")
+                search_engine = result.result.get('source', 'unknown')
                 print(f"   Search engine: {search_engine}")
                 if result.result.get('found'):