Agent_Course_Final_Assignment

Sleeping

App Files Files Community

Chris commited on May 30, 2025

Commit

6afa67b

1 Parent(s): b55bafd

Final 7.6.3

Browse files

Files changed (4) hide show

src/agents/router.py +319 -27
src/agents/web_researcher.py +193 -121
src/tools/final_answer_tool.py +183 -205
src/tools/web_search_tool.py +78 -64

src/agents/router.py CHANGED Viewed

@@ -24,49 +24,43 @@ class RouterAgent:
     def process(self, state: GAIAAgentState) -> GAIAAgentState:
         """
-        Enhanced routing with multi-phase problem decomposition
         """
-        logger.info("🧭 Router: Starting multi-phase question analysis")
-        state.add_processing_step("Router: Multi-phase analysis initiated")
         try:
-            # Phase 1: Structural Analysis
-            structural_analysis = self._analyze_question_structure(state.question)
-            state.add_processing_step(f"Router: Structure = {structural_analysis['type']}")
-            # Phase 2: Information Requirements Analysis
-            info_requirements = self._analyze_information_needs(state.question, structural_analysis)
-            state.add_processing_step(f"Router: Needs = {info_requirements['primary_need']}")
-            # Phase 3: Strategy Planning
-            execution_strategy = self._plan_execution_strategy(state.question, structural_analysis, info_requirements)
-            state.add_processing_step(f"Router: Strategy = {execution_strategy['approach']}")
-            # Phase 4: Agent Selection and Sequencing
-            agent_sequence = self._select_agent_sequence(execution_strategy, info_requirements)
-            # Store analysis in state for agents to use
             state.router_analysis = {
-                'structural': structural_analysis,
-                'requirements': info_requirements,
-                'strategy': execution_strategy,
-                'sequence': agent_sequence
             }
-            logger.info(f"✅ Routing complete: {structural_analysis['type']} -> {agent_sequence}")
-            state.add_processing_step(f"Router: Selected agents = {agent_sequence}")
-            # Set agent sequence for workflow
-            state.agent_sequence = agent_sequence
             return state
         except Exception as e:
-            error_msg = f"Router analysis failed: {str(e)}"
             logger.error(error_msg)
             state.add_error(error_msg)
             # Fallback to basic routing
-            state.agent_sequence = ['reasoning_agent', 'web_researcher', 'synthesizer']
             return state
     def route_question(self, state: GAIAAgentState) -> GAIAAgentState:
@@ -826,4 +820,302 @@ REASONING: [brief explanation]
             sequence.remove('synthesizer')
         sequence.append('synthesizer')
-        return sequence

     def process(self, state: GAIAAgentState) -> GAIAAgentState:
         """
+        Enhanced router processing with improved classification and planning
         """
+        logger.info("🧭 Router: Starting enhanced multi-phase analysis")
+        state.add_processing_step("Router: Enhanced multi-phase question analysis")
         try:
+            # Enhanced classification
+            classification_result = self._classify_question_enhanced(state.question)
+            state.question_type = classification_result['question_type']
+            state.routing_decision = classification_result['reasoning']
+            # Select agents based on enhanced classification
+            agents = self._select_agents_for_type(classification_result)
+            state.selected_agents = agents
+            # Store enhanced analysis for downstream agents
             state.router_analysis = {
+                'classification': classification_result,
+                'selected_agents': [a.value for a in agents],
+                'confidence': classification_result['confidence']
             }
+            logger.info(f"✅ Enhanced routing: {classification_result['type']} -> {[a.value for a in agents]}")
             return state
         except Exception as e:
+            error_msg = f"Enhanced router analysis failed: {str(e)}"
             logger.error(error_msg)
             state.add_error(error_msg)
             # Fallback to basic routing
+            state.question_type = QuestionType.GENERAL_INQUIRY
+            state.selected_agents = [AgentRole.WEB_RESEARCHER, AgentRole.REASONING_AGENT, AgentRole.SYNTHESIZER]
+            state.routing_decision = f"Enhanced routing failed, using fallback: {error_msg}"
             return state
     def route_question(self, state: GAIAAgentState) -> GAIAAgentState:
             sequence.remove('synthesizer')
         sequence.append('synthesizer')
+        return sequence
+    def _classify_question_enhanced(self, question: str) -> Dict[str, Any]:
+        """Enhanced question classification using better pattern matching and LLM analysis"""
+        question_lower = question.lower()
+        # Enhanced pattern classification
+        pattern_classification = self._classify_by_enhanced_patterns(question_lower, question)
+        # LLM-based classification for complex cases
+        llm_classification = self._classify_with_llm(question)
+        # Combine both approaches
+        final_classification = self._combine_classifications(pattern_classification, llm_classification, question)
+        logger.info(f"🤖 Enhanced classification: Pattern={pattern_classification['type']}, LLM={llm_classification['type']}, Final={final_classification['type']}")
+        return final_classification
+    def _classify_by_enhanced_patterns(self, question_lower: str, original_question: str) -> Dict[str, Any]:
+        """Enhanced pattern-based classification with better accuracy"""
+        # Mathematical/counting questions (high confidence patterns)
+        mathematical_patterns = [
+            r'\bhow many\b',
+            r'\bcount\b.*\b(of|the)\b',
+            r'\bnumber of\b',
+            r'\btotal\b.*\b(of|number)\b',
+            r'\bcalculate\b',
+            r'\bsum\b.*\bof\b',
+            r'\bhow much\b',
+            r'\bquantity\b'
+        ]
+        if any(re.search(pattern, question_lower) for pattern in mathematical_patterns):
+            # Check for temporal constraints
+            temporal_indicators = ['between', 'from', 'during', 'in', r'\b(19|20)\d{2}\b']
+            has_temporal = any(re.search(indicator, question_lower) for indicator in temporal_indicators)
+            return {
+                'type': 'mathematical',
+                'confidence': 0.9,
+                'subtype': 'temporal_counting' if has_temporal else 'general_counting',
+                'reasoning': 'Strong mathematical/counting indicators found'
+            }
+        # Text manipulation questions
+        text_manipulation_patterns = [
+            r'\bopposite\b',
+            r'\breverse\b',
+            r'\bbackwards\b',
+            r'\bdecode\b',
+            r'\btranslate\b',
+            r'\bconvert\b',
+            r'\.rewsna',  # Common in reversed text questions
+            r'\bcipher\b',
+            r'\bencrypt\b'
+        ]
+        if any(re.search(pattern, question_lower) for pattern in text_manipulation_patterns):
+            return {
+                'type': 'text_manipulation',
+                'confidence': 0.85,
+                'subtype': 'text_processing',
+                'reasoning': 'Text manipulation patterns detected'
+            }
+        # File/code processing questions
+        file_patterns = [
+            r'\battached\b.*\b(file|image|document|excel|csv|python|code)\b',
+            r'\bfile\b.*\b(contains|attached|uploaded)\b',
+            r'\b(image|photo|picture)\b.*\b(shows|contains|attached)\b',
+            r'\bcode\b.*\b(attached|file|script)\b',
+            r'\bspreadsheet\b',
+            r'\b\.py\b|\b\.csv\b|\b\.xlsx\b|\b\.png\b|\b\.jpg\b'
+        ]
+        if any(re.search(pattern, question_lower) for pattern in file_patterns):
+            return {
+                'type': 'file_processing',
+                'confidence': 0.9,
+                'subtype': 'file_analysis',
+                'reasoning': 'File processing indicators found'
+            }
+        # Web research questions (specific indicators)
+        web_research_patterns = [
+            r'\bwikipedia\b.*\barticle\b',
+            r'\bfeatured article\b',
+            r'\bpromoted\b.*\b(in|during)\b.*\b(19|20)\d{2}\b',
+            r'\bnominated\b.*\bby\b',
+            r'\byoutube\b.*\bvideo\b',
+            r'\bwatch\?v=\b',
+            r'\bhttps?://\b',
+            r'\bwebsite\b|\burl\b'
+        ]
+        if any(re.search(pattern, question_lower) for pattern in web_research_patterns):
+            return {
+                'type': 'web_research',
+                'confidence': 0.8,
+                'subtype': 'specific_lookup',
+                'reasoning': 'Web-specific content indicators found'
+            }
+        # Reasoning/analysis questions
+        reasoning_patterns = [
+            r'\banalyze\b|\banalysis\b',
+            r'\bcompare\b|\bcomparison\b',
+            r'\bexplain\b|\bexplanation\b',
+            r'\bwhy\b.*\b(is|are|was|were|do|does|did)\b',
+            r'\bhow\b.*\b(does|do|did|can|could|would)\b',
+            r'\bwhat.*difference\b',
+            r'\bwhat.*relationship\b'
+        ]
+        if any(re.search(pattern, question_lower) for pattern in reasoning_patterns):
+            return {
+                'type': 'reasoning',
+                'confidence': 0.7,
+                'subtype': 'analytical_reasoning',
+                'reasoning': 'Reasoning/analysis patterns detected'
+            }
+        # General factual questions
+        factual_patterns = [
+            r'\bwho\b.*\b(is|was|are|were)\b',
+            r'\bwhat\b.*\b(is|was|are|were)\b',
+            r'\bwhen\b.*\b(did|was|were|is|are)\b',
+            r'\bwhere\b.*\b(is|was|are|were)\b',
+            r'\bwhich\b.*\b(is|was|are|were)\b'
+        ]
+        if any(re.search(pattern, question_lower) for pattern in factual_patterns):
+            return {
+                'type': 'factual_lookup',
+                'confidence': 0.6,
+                'subtype': 'general_factual',
+                'reasoning': 'General factual question patterns'
+            }
+        # Default classification
+        return {
+            'type': 'general',
+            'confidence': 0.4,
+            'subtype': 'unclassified',
+            'reasoning': 'No specific patterns matched'
+        }
+    def _classify_with_llm(self, question: str) -> Dict[str, Any]:
+        """LLM-based classification for complex questions"""
+        classification_prompt = f"""
+        Analyze this question and classify it into one of these categories:
+        Categories:
+        - mathematical: Questions asking for counts, calculations, quantities
+        - text_manipulation: Questions involving text reversal, encoding, word puzzles
+        - file_processing: Questions about attached files, images, code, data
+        - web_research: Questions requiring web search, Wikipedia lookup, current information
+        - reasoning: Questions requiring analysis, comparison, logical deduction
+        - factual_lookup: Simple fact-based questions about people, places, events
+        Question: {question}
+        Respond with just the category name and a brief reason (max 10 words).
+        Format: category_name: reason
+        Classification:"""
+        try:
+            llm_result = self.llm_client.generate(
+                classification_prompt,
+                tier=ModelTier.ROUTER,  # Use fast model for classification
+                max_tokens=50
+            )
+            if llm_result.success:
+                response = llm_result.response.strip().lower()
+                # Parse the response
+                if ':' in response:
+                    category, reason = response.split(':', 1)
+                    category = category.strip()
+                    reason = reason.strip()
+                else:
+                    category = response.split()[0] if response.split() else 'general'
+                    reason = 'llm classification'
+                # Validate category
+                valid_categories = ['mathematical', 'text_manipulation', 'file_processing', 'web_research', 'reasoning', 'factual_lookup']
+                if category not in valid_categories:
+                    category = 'general'
+                return {
+                    'type': category,
+                    'confidence': 0.7,
+                    'reasoning': f'LLM: {reason}'
+                }
+            else:
+                return {
+                    'type': 'general',
+                    'confidence': 0.3,
+                    'reasoning': 'LLM classification failed'
+                }
+        except Exception as e:
+            logger.warning(f"LLM classification failed: {e}")
+            return {
+                'type': 'general',
+                'confidence': 0.3,
+                'reasoning': 'LLM classification error'
+            }
+    def _combine_classifications(self, pattern_result: Dict[str, Any], llm_result: Dict[str, Any], question: str) -> Dict[str, Any]:
+        """Combine pattern and LLM classifications for final decision"""
+        pattern_type = pattern_result['type']
+        pattern_confidence = pattern_result['confidence']
+        llm_type = llm_result['type']
+        llm_confidence = llm_result['confidence']
+        # If pattern matching has high confidence, trust it
+        if pattern_confidence >= 0.8:
+            final_type = pattern_type
+            final_confidence = pattern_confidence
+            reasoning = f"High confidence pattern match: {pattern_result['reasoning']}"
+        # If both agree, boost confidence
+        elif pattern_type == llm_type:
+            final_type = pattern_type
+            final_confidence = min(0.95, (pattern_confidence + llm_confidence) / 2 + 0.1)
+            reasoning = f"Pattern and LLM agree: {pattern_type}"
+        # If they disagree, use the one with higher confidence
+        elif pattern_confidence > llm_confidence:
+            final_type = pattern_type
+            final_confidence = pattern_confidence * 0.9  # Slight penalty for disagreement
+            reasoning = f"Pattern-based: {pattern_result['reasoning']}"
+        else:
+            final_type = llm_type
+            final_confidence = llm_confidence * 0.9  # Slight penalty for disagreement
+            reasoning = f"LLM-based: {llm_result['reasoning']}"
+        # Map to question types
+        type_mapping = {
+            'mathematical': QuestionType.QUANTITATIVE_ANALYSIS,
+            'text_manipulation': QuestionType.TEXT_MANIPULATION,
+            'file_processing': QuestionType.FILE_PROCESSING,
+            'web_research': QuestionType.WEB_RESEARCH,
+            'reasoning': QuestionType.COMPLEX_REASONING,
+            'factual_lookup': QuestionType.FACTUAL_LOOKUP,
+            'general': QuestionType.GENERAL_INQUIRY
+        }
+        question_type = type_mapping.get(final_type, QuestionType.GENERAL_INQUIRY)
+        return {
+            'type': final_type,
+            'question_type': question_type,
+            'confidence': final_confidence,
+            'reasoning': reasoning,
+            'pattern_result': pattern_result,
+            'llm_result': llm_result
+        }
+    def _select_agents_for_type(self, classification_result: Dict[str, Any]) -> List[AgentRole]:
+        """Select appropriate agents based on enhanced classification"""
+        question_type = classification_result['type']
+        confidence = classification_result['confidence']
+        # Agent selection based on question type
+        if question_type == 'mathematical':
+            agents = [AgentRole.WEB_RESEARCHER, AgentRole.REASONING_AGENT]
+        elif question_type == 'text_manipulation':
+            agents = [AgentRole.REASONING_AGENT]
+        elif question_type == 'file_processing':
+            agents = [AgentRole.FILE_PROCESSOR, AgentRole.REASONING_AGENT]
+        elif question_type == 'web_research':
+            agents = [AgentRole.WEB_RESEARCHER]
+        elif question_type == 'reasoning':
+            agents = [AgentRole.REASONING_AGENT, AgentRole.WEB_RESEARCHER]
+        elif question_type == 'factual_lookup':
+            agents = [AgentRole.WEB_RESEARCHER]
+        else:
+            # General questions - try multiple approaches
+            agents = [AgentRole.WEB_RESEARCHER, AgentRole.REASONING_AGENT]
+        # Always add synthesizer
+        agents.append(AgentRole.SYNTHESIZER)
+        # If confidence is low, add more agents for better coverage
+        if confidence < 0.6:
+            if AgentRole.WEB_RESEARCHER not in agents:
+                agents.insert(-1, AgentRole.WEB_RESEARCHER)  # Insert before synthesizer
+        return agents

src/agents/web_researcher.py CHANGED Viewed

@@ -589,23 +589,165 @@ class WebResearchAgent:
             return self._create_failure_result("YouTube research failed")
     def _research_web_general(self, state: GAIAAgentState) -> AgentResult:
-        """General web search research"""
         search_terms = self._extract_search_terms(state.question)
-        logger.info(f"Web search for: {search_terms}")
-        # Perform web search
-        web_result = self.web_search_tool.execute({
-            "query": search_terms,
-            "action": "search",
-            "limit": 5
-        })
         if web_result.success and web_result.result.get('found'):
-            return self._analyze_web_search_result(state, web_result)
         else:
-            return self._create_failure_result("Web search failed")
     def _research_url_content(self, state: GAIAAgentState) -> AgentResult:
         """Extract and analyze content from specific URLs"""
@@ -760,128 +902,58 @@ class WebResearchAgent:
         return ' '.join(topic_words[:3]) if topic_words else "topic"
-    def _extract_search_terms(self, question: str, max_length: int = 100) -> str:
         """
-        Extract optimized search terms from question
-        Prioritizes important terms while staying under length limits
         """
-        # Clean the question first
-        clean_question = re.sub(r'[^\w\s\-]', ' ', question.lower())
-        words = clean_question.split()
-        # Remove common stop words but keep question words
-        stop_words = {
-            'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being',
-            'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could',
-            'should', 'may', 'might', 'must', 'shall', 'can', 'to', 'of', 'in',
-            'on', 'at', 'by', 'for', 'with', 'from', 'as', 'but', 'or', 'and',
-            'if', 'then', 'than', 'this', 'that', 'these', 'those', 'i', 'you',
-            'he', 'she', 'it', 'we', 'they', 'me', 'him', 'her', 'us', 'them'
-        }
-        # Keep important question words
-        question_words = {'who', 'what', 'when', 'where', 'why', 'how', 'which'}
-        # Priority terms (always include if present)
-        priority_terms = []
-        # Extract quoted phrases first
-        quoted_phrases = re.findall(r'"([^"]*)"', question)
-        for phrase in quoted_phrases:
-            if len(phrase.strip()) > 0:
-                priority_terms.append(phrase.strip())
-        # Extract years (4-digit numbers) - capture full years, not just prefixes
-        years = re.findall(r'\b(?:19|20)\d{2}\b', question)  # Changed from capturing group to full match
-        # Extract proper nouns (capitalized words) - exclude numbers
-        proper_nouns = []
-        for word in question.split():
-            clean_word = re.sub(r'[^\w]', '', word)
-            if (clean_word and
-                clean_word[0].isupper() and
-                len(clean_word) > 1 and
-                not clean_word.isdigit()):  # Exclude pure numbers
-                proper_nouns.append(clean_word)
-        # Extract other meaningful numbers (but be very selective)
-        # Only include numbers that are likely meaningful (dates, counts, etc.)
-        meaningful_numbers = []
-        number_matches = re.findall(r'\b\d{1,4}\b', question)
-        for num in number_matches:
-            # Skip very common/meaningless numbers and years already captured
-            if (num not in ['1', '2', '3', '4', '5', '10', '20', '19', '21', '22', '23', '24', '25'] and
-                num not in years and
-                len(num) > 1):  # Require at least 2 digits for meaningful numbers
-                # Only include if it appears in a meaningful context
-                if any(context in question.lower() for context in [
-                    f'{num} albums', f'{num} songs', f'{num} years', f'{num} people',
-                    f'{num} times', f'{num} days', f'{num} months', f'episode {num}',
-                    f'season {num}', f'volume {num}', f'part {num}'
-                ]):
-                    meaningful_numbers.append(num)
-        # Build search terms with priority
-        search_terms = []
-        # Add quoted phrases (highest priority)
-        search_terms.extend(priority_terms)
-        # Add proper nouns (high priority)
-        search_terms.extend(proper_nouns[:5])  # Limit to avoid duplication
-        # Add question words if present
-        for word in words:
-            if word in question_words and word not in search_terms:
-                search_terms.append(word)
-        # Add years
-        search_terms.extend(years[:2])  # Limit to 2 years max
-        # Add other important terms
-        for word in words:
-            if (word not in stop_words and
-                word not in search_terms and
-                len(word) > 2 and
-                not word.isdigit()):  # Avoid random numbers
-                search_terms.append(word)
-                # Stop if we have enough terms
-                if len(' '.join(search_terms)) > max_length - 20:
-                    break
-        # Add a few important numbers if space allows
-        if len(' '.join(search_terms)) < max_length - 10:
-            search_terms.extend(meaningful_numbers[:2])
-        # Join and clean up
-        search_query = ' '.join(search_terms)
-        # Remove duplicates while preserving order
-        seen = set()
-        unique_terms = []
-        for term in search_terms:
-            if term.lower() not in seen:
-                seen.add(term.lower())
-                unique_terms.append(term)
-        # Final cleanup and length check
-        final_query = ' '.join(unique_terms)
-        if len(final_query) > max_length:
-            # Truncate to fit
-            truncated_terms = []
-            current_length = 0
-            for term in unique_terms:
-                if current_length + len(term) + 1 <= max_length:
-                    truncated_terms.append(term)
-                    current_length += len(term) + 1
-                else:
-                    break
-            final_query = ' '.join(truncated_terms)
-        logger.info(f"📝 Optimized search terms: '{final_query}' from question: '{question[:50]}...'")
-        return final_query
     def _extract_youtube_info(self, question: str) -> str:
         """Extract YouTube URL or search terms"""

             return self._create_failure_result("YouTube research failed")
     def _research_web_general(self, state: GAIAAgentState) -> AgentResult:
+        """General web research with enhanced result analysis"""
+        # Extract optimized search terms
         search_terms = self._extract_search_terms(state.question)
+        logger.info(f"Web research for: {search_terms}")
+        # Search the web
+        search_query = {"query": search_terms, "action": "search", "limit": 5}
+        web_result = self.web_search_tool.execute(search_query)
         if web_result.success and web_result.result.get('found'):
+            search_data = web_result.result
+            # Enhanced analysis with focused LLM processing
+            analysis_prompt = self._create_enhanced_analysis_prompt(state.question, search_data, search_terms)
+            # Use appropriate model tier based on complexity
+            model_tier = ModelTier.COMPLEX if state.complexity_assessment == "complex" else ModelTier.MAIN
+            llm_result = self.llm_client.generate(analysis_prompt, tier=model_tier, max_tokens=600)
+            if llm_result.success:
+                # Parse the LLM response for better confidence assessment
+                confidence = self._assess_answer_confidence(llm_result.response, state.question, search_data)
+                return AgentResult(
+                    agent_role=AgentRole.WEB_RESEARCHER,
+                    success=True,
+                    result=llm_result.response,
+                    confidence=confidence,
+                    reasoning=f"Enhanced web search analysis of {len(search_data.get('results', []))} sources for '{search_terms}'",
+                    tools_used=[ToolResult(
+                        tool_name="web_search",
+                        success=True,
+                        result=search_data,
+                        execution_time=web_result.execution_time
+                    )],
+                    model_used=llm_result.model_used,
+                    processing_time=web_result.execution_time + llm_result.response_time,
+                    cost_estimate=llm_result.cost_estimate
+                )
+            else:
+                # Fallback to best search result
+                results = search_data.get('results', [])
+                best_result = results[0] if results else {"title": "No results", "snippet": "No information found"}
+                return AgentResult(
+                    agent_role=AgentRole.WEB_RESEARCHER,
+                    success=True,
+                    result=f"Found: {best_result.get('title', 'Unknown')} - {best_result.get('snippet', 'No description')}",
+                    confidence=0.4,
+                    reasoning="Web search completed but analysis failed",
+                    tools_used=[ToolResult(
+                        tool_name="web_search",
+                        success=True,
+                        result=search_data,
+                        execution_time=web_result.execution_time
+                    )],
+                    model_used="fallback",
+                    processing_time=web_result.execution_time,
+                    cost_estimate=0.0
+                )
         else:
+            return self._create_failure_result(f"Web search failed for '{search_terms}': {web_result.result.get('message', 'Unknown error')}")
+    def _create_enhanced_analysis_prompt(self, question: str, search_data: Dict[str, Any], search_terms: str) -> str:
+        """Create enhanced analysis prompt for better result processing"""
+        results = search_data.get('results', [])
+        search_source = search_data.get('source', 'web')
+        # Format search results concisely
+        formatted_results = []
+        for i, result in enumerate(results[:4], 1):  # Limit to top 4 results
+            title = result.get('title', 'No title')
+            snippet = result.get('snippet', 'No description')
+            url = result.get('url', '')
+            source = result.get('source', search_source)
+            formatted_results.append(f"""
+Result {i} ({source}):
+Title: {title}
+Content: {snippet}
+URL: {url}
+""")
+        # Create focused analysis prompt
+        prompt = f"""
+You are analyzing web search results to answer a specific question. Provide a direct, accurate answer based on the search findings.
+Question: {question}
+Search Terms Used: {search_terms}
+Search Results:
+{''.join(formatted_results)}
+Instructions:
+1. Carefully read through all the search results
+2. Look for information that directly answers the question
+3. If you find a clear answer, state it concisely
+4. If the information is incomplete, state what you found and what's missing
+5. If you find no relevant information, clearly state that
+6. For questions asking for specific numbers, dates, or names, be precise
+7. Always base your answer on the search results provided
+Provide your analysis and answer:"""
+        return prompt
+    def _assess_answer_confidence(self, answer: str, question: str, search_data: Dict[str, Any]) -> float:
+        """Assess confidence in the answer based on various factors"""
+        # Base confidence factors
+        confidence = 0.5  # Start with medium confidence
+        # Factor 1: Search result quality
+        results = search_data.get('results', [])
+        if len(results) >= 3:
+            confidence += 0.1  # More results = higher confidence
+        # Factor 2: Source quality
+        source = search_data.get('source', 'unknown')
+        if source == 'Wikipedia':
+            confidence += 0.15  # Wikipedia is generally reliable
+        elif source == 'DuckDuckGo':
+            confidence += 0.1   # General web search
+        # Factor 3: Answer specificity
+        answer_lower = answer.lower()
+        if any(indicator in answer_lower for indicator in [
+            'no information', 'not found', 'unclear', 'unable to determine',
+            'cannot find', 'no clear answer', 'insufficient information'
+        ]):
+            confidence -= 0.2  # Reduce confidence for uncertain answers
+        # Factor 4: Answer contains specific details
+        if any(pattern in answer for pattern in [
+            re.compile(r'\b\d{4}\b'),      # Years
+            re.compile(r'\b\d+\b'),        # Numbers
+            re.compile(r'\b[A-Z][a-z]+\b') # Proper nouns
+        ]):
+            confidence += 0.1  # Specific details increase confidence
+        # Factor 5: Answer length (very short answers might be incomplete)
+        if len(answer.split()) < 5:
+            confidence -= 0.1
+        elif len(answer.split()) > 50:
+            confidence += 0.05  # Detailed answers
+        # Factor 6: Question type matching
+        question_lower = question.lower()
+        if 'how many' in question_lower and re.search(r'\b\d+\b', answer):
+            confidence += 0.15  # Numerical answer to numerical question
+        elif any(q_word in question_lower for q_word in ['who', 'what', 'when', 'where']) and len(answer.split()) > 3:
+            confidence += 0.1   # Substantial answer to factual question
+        # Ensure confidence stays within bounds
+        return max(0.1, min(0.95, confidence))
     def _research_url_content(self, state: GAIAAgentState) -> AgentResult:
         """Extract and analyze content from specific URLs"""
         return ' '.join(topic_words[:3]) if topic_words else "topic"
+    def _extract_search_terms(self, question: str, max_length: int = 180) -> str:
         """
+        Improved search term extraction for better web search results
+        Prioritizes entities, dates, and specific terms
         """
+        # Remove common question words first
+        question_clean = re.sub(r'\b(what|who|when|where|why|how|is|are|was|were|did|do|does|can|could|should|would|please|tell|me|find|about)\b', '', question.lower())
+        # Extract key patterns first
+        entities = []
+        # Extract quoted phrases (highest priority)
+        quoted_phrases = re.findall(r'"([^"]+)"', question)
+        entities.extend(quoted_phrases)
+        # Extract proper nouns (names, places, organizations)
+        proper_nouns = re.findall(r'\b[A-Z][a-zA-Z]*(?:\s+[A-Z][a-zA-Z]*)*\b', question)
+        # Filter out common question words that might be capitalized
+        filtered_nouns = [noun for noun in proper_nouns if noun.lower() not in {'you', 'i', 'me', 'my', 'the', 'a', 'an'}]
+        entities.extend(filtered_nouns[:4])  # Limit to top 4
+        # Extract years and dates (high priority for temporal questions)
+        years = re.findall(r'\b(19|20)\d{2}\b', question)
+        entities.extend(years)
+        # Extract important numbers that might be quantities
+        numbers = re.findall(r'\b\d+\b', question)
+        entities.extend(numbers[:2])  # Limit to first 2 numbers
+        # If we have good entities, use them primarily
+        if entities:
+            search_terms = ' '.join(entities[:8])  # Use top 8 entities
+        else:
+            # Fallback: clean the question and extract key words
+            words = question_clean.split()
+            # Remove very common words
+            stop_words = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'from', 'up', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'between', 'among', 'this', 'that', 'these', 'those', 'many', 'some', 'all', 'any', 'most', 'other', 'such', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 'just', 'now', 'here', 'there', 'then', 'them', 'they', 'their', 'would', 'could', 'should', 'will', 'can', 'may', 'might', 'must'}
+            filtered_words = [w for w in words if w.lower() not in stop_words and len(w) > 2]
+            search_terms = ' '.join(filtered_words[:10])  # Use top 10 content words
+        # Clean up the search terms
+        search_terms = re.sub(r'\s+', ' ', search_terms)  # Remove multiple spaces
+        search_terms = search_terms.strip()
+        # Ensure we don't exceed max length
+        if len(search_terms) > max_length:
+            search_terms = search_terms[:max_length].rsplit(' ', 1)[0]  # Cut at word boundary
+        # Log the extraction for debugging
+        logger.info(f"📝 Optimized search terms: '{search_terms}' from question: '{question[:100]}...'")
+        return search_terms.strip()
     def _extract_youtube_info(self, question: str) -> str:
         """Extract YouTube URL or search terms"""

src/tools/final_answer_tool.py CHANGED Viewed

@@ -21,248 +21,226 @@ class FinalAnswerTool:
     def __init__(self, llm_client: QwenClient):
         self.llm_client = llm_client
-    def extract_final_answer(self, question: str, agent_results: str, question_type: str = "") -> Dict[str, Any]:
         """
-        Extract the precise final answer in GAIA-compliant format
-        Args:
-            question: The original GAIA question
-            agent_results: Combined results from multiple agents
-            question_type: Type of question (for specialized extraction)
-        Returns:
-            Dict with extracted answer, confidence, and reasoning
         """
         try:
-            logger.info("🎯 Extracting GAIA-compliant final answer")
-            # Create specialized extraction prompt
             extraction_prompt = self._create_extraction_prompt(question, agent_results, question_type)
             # Use 72B model for precise extraction
-            result = self.llm_client.generate(
                 extraction_prompt,
-                tier=ModelTier.COMPLEX,  # 72B model
-                max_tokens=50  # Force concise answers
             )
-            if not result.success:
-                logger.error("Final answer extraction failed")
                 return {
-                    "answer": "Processing failed",
-                    "confidence": 0.0,
-                    "reasoning": f"Extraction failed: {result.response}"
                 }
-            # Parse and clean the extracted answer
-            extracted_answer = self._clean_answer(result.response, question, question_type)
-            # Validate answer format
-            validation_result = self._validate_answer(extracted_answer, question_type)
-            logger.info(f"✅ Final answer extracted: '{extracted_answer}'")
-            return {
-                "answer": extracted_answer,
-                "confidence": validation_result["confidence"],
-                "reasoning": f"Extracted using 72B model. Validation: {validation_result['status']}"
-            }
         except Exception as e:
-            error_msg = f"Final answer extraction error: {str(e)}"
-            logger.error(error_msg)
-            return {
-                "answer": "Extraction error",
-                "confidence": 0.0,
-                "reasoning": error_msg
-            }
     def _create_extraction_prompt(self, question: str, agent_results: str, question_type: str) -> str:
         """Create specialized extraction prompt based on question type"""
-        base_prompt = f"""
-CRITICAL: This is for GAIA benchmark evaluation using EXACT MATCH comparison.
-Your response must be ONLY the precise answer - no explanations, no "FINAL ANSWER:", no extra text.
-Question: {question}
-Agent Analysis Results:
-{agent_results}
-EXTRACTION RULES:
-"""
-        # Add type-specific rules
-        if "mathematical" in question_type.lower() or any(word in question.lower() for word in ["how many", "count", "number", "albums"]):
-            base_prompt += """
-- If asking for a count/number: respond with ONLY the number (e.g., "5", "23", "0")
-- If asking for calculation: respond with ONLY the result (e.g., "42", "3.14", "100")
-- No units unless specifically requested in the question
-"""
-        elif "text_manipulation" in question_type.lower() or "reverse" in question.lower():
-            base_prompt += """
-- If text is reversed: provide the corrected text
-- If asking for opposite: provide ONLY the opposite word (e.g., "right" for opposite of "left")
-- If asking to decode: provide ONLY the decoded answer
-"""
-        elif "yes" in question.lower() or "true" in question.lower() or "false" in question.lower():
-            base_prompt += """
-- If yes/no question: respond with ONLY "yes" or "no" (lowercase)
-- If true/false question: respond with ONLY "true" or "false" (lowercase)
-"""
-        elif any(word in question.lower() for word in ["name", "who", "which person"]):
-            base_prompt += """
-- If asking for a name: provide ONLY the name (e.g., "John Smith", "Einstein")
-- If asking for first name only: provide ONLY first name (e.g., "John")
-- If asking for last name only: provide ONLY last name (e.g., "Smith")
-"""
-        elif any(word in question.lower() for word in ["where", "location", "city", "country"]):
-            base_prompt += """
-- If asking for location: provide ONLY the location name (e.g., "Paris", "USA", "New York")
-- No additional descriptors unless specifically requested
-"""
         else:
-            base_prompt += """
-- Provide ONLY the direct answer to the question
-- No explanations, context, or additional information
-- Be as concise as possible while being accurate
-"""
-        base_prompt += """
-EXAMPLES OF CORRECT FORMAT:
-- Question: "How many albums?" → Answer: "5"
-- Question: "What is the opposite of left?" → Answer: "right"
-- Question: "True or false?" → Answer: "true"
-- Question: "Who discovered X?" → Answer: "Einstein"
-- Question: "Which city?" → Answer: "London"
-Extract the precise answer NOW:"""
-        return base_prompt
-    def _clean_answer(self, raw_answer: str, question: str, question_type: str) -> str:
-        """Clean and format the extracted answer"""
-        # Remove common unwanted prefixes/suffixes
         answer = raw_answer.strip()
-        # Remove common prefixes
         prefixes_to_remove = [
-            "the answer is",
-            "answer:",
-            "final answer:",
-            "result:",
-            "response:",
-            "conclusion:",
-            "based on",
-            "according to",
-            "from the",
         ]
         for prefix in prefixes_to_remove:
-            if answer.lower().startswith(prefix):
                 answer = answer[len(prefix):].strip()
         # Remove quotes if they wrap the entire answer
         if answer.startswith('"') and answer.endswith('"'):
             answer = answer[1:-1]
-        if answer.startswith("'") and answer.endswith("'"):
             answer = answer[1:-1]
-        # AGGRESSIVE LENGTH ENFORCEMENT FOR GAIA
-        # If answer is too long, extract the core information
-        if len(answer) > 50:
-            # For different question types, extract differently
-            if "mathematical" in question_type.lower() or any(word in question.lower() for word in ["how many", "count", "number", "albums"]):
-                # Extract just the number for mathematical questions
-                number_match = re.search(r'-?\d+(?:\.\d+)?', answer)
-                if number_match:
-                    answer = number_match.group()
-            elif "name" in question_type.lower() or any(word in question.lower() for word in ["who", "name"]):
-                # Extract just the name (first few words)
-                words = answer.split()
-                if len(words) > 3:
-                    answer = ' '.join(words[:3])  # Keep only first 3 words for names
-            elif "location" in question_type.lower() or any(word in question.lower() for word in ["where", "city", "country"]):
-                # Extract just the location name
-                words = answer.split()
-                if len(words) > 2:
-                    answer = ' '.join(words[:2])  # Keep only first 2 words for locations
-            elif "yes_no" in question_type.lower() or any(word in answer.lower() for word in ["yes", "no", "true", "false"]):
-                # Extract yes/no/true/false
-                if any(word in answer.lower() for word in ["yes", "no", "true", "false"]):
-                    for word in answer.lower().split():
-                        if word in ["yes", "no", "true", "false"]:
-                            answer = word
-                            break
-            else:
-                # For other types, take first sentence or clause
-                sentences = re.split(r'[.!?]', answer)
-                if sentences:
-                    answer = sentences[0].strip()
-                    # If still too long, take first clause
-                    if len(answer) > 30:
-                        clauses = re.split(r'[,;:]', answer)
-                        if clauses:
-                            answer = clauses[0].strip()
-        # Handle specific formatting based on question type
-        if "text_manipulation" in question_type.lower():
-            # For reversed text questions, ensure clean output
-            if len(answer.split()) == 1:  # Single word answer
-                answer = answer.lower()
-        # Final aggressive truncation if still too long
-        if len(answer) > 40:
-            # Split into words and take as many as fit
-            words = answer.split()
-            truncated_words = []
-            current_length = 0
-            for word in words:
-                if current_length + len(word) + 1 <= 40:
-                    truncated_words.append(word)
-                    current_length += len(word) + 1
-                else:
-                    break
-            if truncated_words:
-                answer = ' '.join(truncated_words)
-            else:
-                # Last resort - take first 40 characters
-                answer = answer[:40].strip()
-        # Remove any trailing punctuation that's not part of the answer
-        answer = answer.rstrip('.,!?;:')
-        return answer.strip()
-    def _validate_answer(self, answer: str, question_type: str) -> Dict[str, Any]:
-        """Validate the extracted answer format"""
-        if not answer:
-            return {"status": "empty_answer", "confidence": 0.0}
-        # Check length - GAIA answers should be concise
-        if len(answer) > 100:
-            return {"status": "too_long", "confidence": 0.3}
-        # Type-specific validation
-        if "mathematical" in question_type.lower():
-            if re.match(r'^-?\d+(?:\.\d+)?$', answer):
-                return {"status": "valid_number", "confidence": 0.9}
-            else:
-                return {"status": "invalid_number_format", "confidence": 0.5}
-        elif "yes_no" in question_type.lower():
-            if answer.lower() in ["yes", "no", "true", "false"]:
-                return {"status": "valid_boolean", "confidence": 0.9}
-            else:
-                return {"status": "invalid_boolean_format", "confidence": 0.4}
-        # General validation - prefer short, direct answers
-        if len(answer) <= 20:
-            return {"status": "concise_answer", "confidence": 0.8}
-        elif len(answer) <= 50:
-            return {"status": "moderate_length", "confidence": 0.6}
-        else:
-            return {"status": "long_answer", "confidence": 0.4}

     def __init__(self, llm_client: QwenClient):
         self.llm_client = llm_client
+    def extract_final_answer(self, question: str, agent_results: str, question_type: str = "general") -> Dict[str, Any]:
         """
+        Extract GAIA-compliant final answer with enhanced accuracy
         """
+        logger.info("🎯 Extracting GAIA-compliant final answer")
         try:
+            # Create specialized extraction prompt based on question type
             extraction_prompt = self._create_extraction_prompt(question, agent_results, question_type)
             # Use 72B model for precise extraction
+            llm_result = self.llm_client.generate(
                 extraction_prompt,
+                tier=ModelTier.COMPLEX,  # Always use most capable model
+                max_tokens=100,  # Keep answer concise
+                temperature=0.1  # Lower temperature for consistency
             )
+            if llm_result.success:
+                # Clean and validate the extracted answer
+                raw_answer = llm_result.response.strip()
+                final_answer = self._clean_and_validate_answer(raw_answer, question, question_type)
+                # Assess answer quality
+                confidence = self._assess_answer_quality(final_answer, question, agent_results, question_type)
                 return {
+                    "answer": final_answer,
+                    "confidence": confidence,
+                    "reasoning": f"Extracted from {question_type} analysis using 72B model",
+                    "raw_response": raw_answer,
+                    "validation_passed": len(final_answer) <= 100 and len(final_answer) > 0
                 }
+            else:
+                # Fallback to simple extraction
+                return self._fallback_extraction(question, agent_results)
         except Exception as e:
+            logger.error(f"Final answer extraction failed: {e}")
+            return self._fallback_extraction(question, agent_results)
     def _create_extraction_prompt(self, question: str, agent_results: str, question_type: str) -> str:
         """Create specialized extraction prompt based on question type"""
+        base_instructions = """
+        CRITICAL: Extract the exact answer for GAIA benchmark evaluation.
+        Your response must be ONLY the answer - no explanations, no prefixes, no extra text.
+        Question: {question}
+        Analysis from agents:
+        {agent_results}
+        """
+        # Specialized instructions based on question type
+        if question_type == "mathematical" or "how many" in question.lower():
+            type_instructions = """
+        This is a counting/mathematical question. Respond with ONLY the number.
+        Examples of correct responses: "5", "42", "0"
+        Do NOT include words like "albums", "songs", "items", etc.
+        """
+        elif question_type == "yes_no":
+            type_instructions = """
+        This is a yes/no question. Respond with ONLY "yes" or "no".
+        """
+        elif question_type == "name" or any(word in question.lower() for word in ["who", "name"]):
+            type_instructions = """
+        This is asking for a name. Respond with ONLY the name requested.
+        Examples: "John Smith", "Mike102", "Einstein"
+        """
+        elif question_type == "location":
+            type_instructions = """
+        This is asking for a location. Respond with ONLY the location name.
+        Examples: "Paris", "New York", "LIE", "Hanoi"
+        """
+        elif question_type == "text_manipulation":
+            type_instructions = """
+        This involves text manipulation. Respond with ONLY the processed text result.
+        Examples: "right", "hello", "12345"
+        """
         else:
+            type_instructions = """
+        Respond with ONLY the direct answer requested.
+        Keep it concise and specific.
+        """
+        ending_instructions = """
+        EXTRACT ONLY THE ANSWER:"""
+        return base_instructions.format(
+            question=question,
+            agent_results=agent_results[:2000]  # Limit input length
+        ) + type_instructions + ending_instructions
+    def _clean_and_validate_answer(self, raw_answer: str, question: str, question_type: str) -> str:
+        """Clean and validate the extracted answer"""
+        # Remove common prefixes and suffixes
         answer = raw_answer.strip()
+        # Remove common answer prefixes
         prefixes_to_remove = [
+            "final answer:", "answer:", "the answer is:", "result:", "conclusion:",
+            "based on", "according to", "therefore", "thus", "so", "hence",
+            "final answer is", "the result is", "it is", "this is"
         ]
+        answer_lower = answer.lower()
         for prefix in prefixes_to_remove:
+            if answer_lower.startswith(prefix):
                 answer = answer[len(prefix):].strip()
+                answer_lower = answer.lower()
         # Remove quotes if they wrap the entire answer
         if answer.startswith('"') and answer.endswith('"'):
             answer = answer[1:-1]
+        elif answer.startswith("'") and answer.endswith("'"):
             answer = answer[1:-1]
+        # Remove trailing punctuation that's not part of the answer
+        while answer and answer[-1] in '.!?:;':
+            answer = answer[:-1]
+        # Special handling for different question types
+        if question_type == "mathematical" or "how many" in question.lower():
+            # Extract just the number
+            numbers = re.findall(r'\b\d+\b', answer)
+            if numbers:
+                answer = numbers[0]
+        elif question_type == "yes_no":
+            # Normalize yes/no answers
+            if any(word in answer.lower() for word in ['yes', 'true', 'correct', 'right']):
+                answer = "yes"
+            elif any(word in answer.lower() for word in ['no', 'false', 'incorrect', 'wrong']):
+                answer = "no"
+        # Final cleanup
+        answer = answer.strip()
+        # Ensure answer is not empty
+        if not answer:
+            # Try to extract from the original raw answer
+            words = raw_answer.split()
+            if words:
+                answer = words[-1]  # Take the last word as fallback
+        return answer
+    def _assess_answer_quality(self, answer: str, question: str, agent_results: str, question_type: str) -> float:
+        """Assess the quality/confidence of the extracted answer"""
+        confidence = 0.7  # Base confidence
+        # Factor 1: Answer length appropriateness
+        if len(answer) == 0:
+            return 0.1  # Very low confidence for empty answers
+        elif len(answer) > 100:
+            confidence -= 0.2  # Too long for GAIA
+        elif 1 <= len(answer) <= 50:
+            confidence += 0.1  # Good length
+        # Factor 2: Question type matching
+        question_lower = question.lower()
+        if ("how many" in question_lower or question_type == "mathematical") and re.match(r'^\d+$', answer):
+            confidence += 0.15  # Numeric answer to counting question
+        elif ("who" in question_lower or "name" in question_lower) and len(answer.split()) <= 3:
+            confidence += 0.1   # Name-like answer to who question
+        elif ("where" in question_lower) and len(answer.split()) <= 2:
+            confidence += 0.1   # Location-like answer
+        elif ("yes or no" in question_lower) and answer.lower() in ["yes", "no"]:
+            confidence += 0.15  # Perfect yes/no answer
+        # Factor 3: Answer appears in agent results (indicates it was found)
+        if answer.lower() in agent_results.lower():
+            confidence += 0.1
+        # Factor 4: Answer specificity
+        if re.search(r'\b\d{4}\b', answer):  # Contains year
+            confidence += 0.05
+        if re.search(r'\b[A-Z][a-z]+\b', answer):  # Contains proper noun
+            confidence += 0.05
+        # Factor 5: Common failure patterns
+        failure_indicators = ['unknown', 'unclear', 'not found', 'unable to determine', 'no information']
+        if any(indicator in answer.lower() for indicator in failure_indicators):
+            confidence -= 0.3
+        return max(0.1, min(0.95, confidence))
+    def _fallback_extraction(self, question: str, agent_results: str) -> Dict[str, Any]:
+        """Simple fallback when LLM extraction fails"""
+        # Try to extract a reasonable answer from agent results
+        lines = agent_results.split('\n')
+        # Look for lines that might contain answers
+        potential_answers = []
+        for line in lines:
+            line = line.strip()
+            if len(line) > 0 and len(line) < 100:
+                # Skip lines that are clearly explanatory
+                if not any(word in line.lower() for word in ['according', 'based on', 'however', 'therefore', 'because']):
+                    potential_answers.append(line)
+        # Use the first reasonable answer or a fallback
+        answer = potential_answers[0] if potential_answers else "Unable to determine"
+        return {
+            "answer": answer,
+            "confidence": 0.3,
+            "reasoning": "Fallback extraction due to LLM failure",
+            "raw_response": agent_results[:100],
+            "validation_passed": False
+        }

src/tools/web_search_tool.py CHANGED Viewed

@@ -126,105 +126,119 @@ class WebSearchTool(BaseTool):
         """Check if text is a URL"""
         return bool(re.match(r'https?://', text))
-    def _extract_search_terms(self, query: str, max_length: int = 250) -> str:
         """
-        Extract key search terms from a potentially long query
         """
-        # If query is short enough, use as-is
-        if len(query) <= max_length:
-            return query
-        # Remove common stop words and extract key terms
-        stop_words = {
-            'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by',
-            'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'do', 'does', 'did',
-            'will', 'would', 'could', 'should', 'may', 'might', 'must', 'can', 'this', 'that', 'these', 'those',
-            'i', 'you', 'he', 'she', 'it', 'we', 'they', 'me', 'him', 'her', 'us', 'them',
-            'what', 'where', 'when', 'why', 'how', 'which', 'who', 'whose', 'whom',
-            'please', 'could', 'you', 'tell', 'me', 'find', 'search', 'for', 'about'
-        }
-        # Split into words and filter
-        words = re.findall(r'\b\w+\b', query.lower())
-        key_words = [word for word in words if word not in stop_words and len(word) > 2]
-        # Keep important phrases and entities
-        # Look for quoted phrases, proper nouns, numbers, dates
-        important_patterns = [
-            r'"[^"]*"',  # Quoted phrases
-            r'\b[A-Z][a-z]*(?:\s+[A-Z][a-z]*)*\b',  # Proper nouns
-            r'\b\d{4}\b',  # Years
-            r'\b\d+\b',   # Numbers
-        ]
-        important_terms = []
-        for pattern in important_patterns:
-            matches = re.findall(pattern, query)
-            important_terms.extend(matches)
-        # Combine key words and important terms
-        search_terms = []
-        # Add important terms first (they're usually more specific)
-        for term in important_terms:
-            if len(' '.join(search_terms + [term])) <= max_length:
-                search_terms.append(term)
-        # Add key words until we hit the limit
-        for word in key_words:
-            potential_query = ' '.join(search_terms + [word])
-            if len(potential_query) <= max_length:
-                search_terms.append(word)
-            else:
-                break
-        result = ' '.join(search_terms)
-        # If still too long, truncate
-        if len(result) > max_length:
-            result = result[:max_length].rsplit(' ', 1)[0]
-        # If we ended up with nothing, use first part of original query
-        if not result.strip():
-            result = query[:max_length].rsplit(' ', 1)[0]
-        if result != query:
-            logger.info(f"📝 Extracted search terms: '{result}' from '{query[:100]}...'")
-        return result
     def _search_web(self, query: str, limit: int = 5, extract_content: bool = False) -> Dict[str, Any]:
         """
-        Search the web using available search engines in priority order
         """
-        # Extract search terms to avoid length issues
-        search_query = self._extract_search_terms(query, max_length=250)
         # Try DuckDuckGo first (most comprehensive for general web search)
         if self.use_duckduckgo:
             try:
-                return self._search_with_duckduckgo(search_query, limit, extract_content)
             except Exception as e:
                 logger.warning(f"DuckDuckGo search failed, trying Tavily: {e}")
         # Try Tavily if DuckDuckGo fails and API key is available
         if self.use_tavily:
             try:
-                return self._search_with_tavily(search_query, limit, extract_content)
             except Exception as e:
                 logger.warning(f"Tavily search failed, trying Wikipedia: {e}")
         # Fallback to Wikipedia search
         if self.use_wikipedia:
-            return self._search_with_wikipedia(search_query, limit)
-        # No search engines available
         return {
             "query": query,
             "found": False,
-            "message": "❌ No search engines available. Please install required packages.",
-            "results": []
         }
     def _search_with_duckduckgo(self, query: str, limit: int = 5, extract_content: bool = False) -> Dict[str, Any]:

         """Check if text is a URL"""
         return bool(re.match(r'https?://', text))
+    def _extract_search_terms(self, question: str, max_length: int = 200) -> str:
         """
+        Extract focused search terms from a question
+        Prioritizes key entities, dates, and specific information
         """
+        # Remove common question words first
+        question_clean = re.sub(r'\b(what|who|when|where|why|how|is|are|was|were|did|do|does|can|could|should|would)\b', '', question.lower())
+        # Extract key patterns first
+        entities = []
+        # Extract quoted phrases (highest priority)
+        quoted_phrases = re.findall(r'"([^"]+)"', question)
+        entities.extend(quoted_phrases)
+        # Extract proper nouns (names, places, organizations)
+        proper_nouns = re.findall(r'\b[A-Z][a-zA-Z]*(?:\s+[A-Z][a-zA-Z]*)*\b', question)
+        entities.extend(proper_nouns[:3])  # Limit to top 3
+        # Extract years and dates
+        years = re.findall(r'\b(19|20)\d{2}\b', question)
+        entities.extend(years)
+        # Extract numbers that might be important
+        numbers = re.findall(r'\b\d+\b', question)
+        entities.extend(numbers[:2])  # Limit to first 2 numbers
+        # If we have good entities, use them primarily
+        if entities:
+            search_terms = ' '.join(entities[:6])  # Use top 6 entities
+        else:
+            # Fallback: clean the question and extract key words
+            words = question_clean.split()
+            # Remove very common words
+            stop_words = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'from', 'up', 'about', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'between', 'among', 'this', 'that', 'these', 'those', 'i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', 'her', 'hers', 'herself', 'it', 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves'}
+            filtered_words = [w for w in words if w.lower() not in stop_words and len(w) > 2]
+            search_terms = ' '.join(filtered_words[:8])  # Use top 8 content words
+        # Ensure we don't exceed max length
+        if len(search_terms) > max_length:
+            search_terms = search_terms[:max_length].rsplit(' ', 1)[0]  # Cut at word boundary
+        # Log the extraction for debugging
+        logger.info(f"📝 Extracted search terms: '{search_terms}' from question: '{question[:100]}...'")
+        return search_terms.strip()
     def _search_web(self, query: str, limit: int = 5, extract_content: bool = False) -> Dict[str, Any]:
         """
+        Search the web using available search engines in priority order with improved search terms
         """
+        # Extract clean search terms from the query
+        search_query = self._extract_search_terms(query, max_length=200)
         # Try DuckDuckGo first (most comprehensive for general web search)
         if self.use_duckduckgo:
             try:
+                ddg_result = self._search_with_duckduckgo(search_query, limit, extract_content)
+                if ddg_result.get('success') and ddg_result.get('count', 0) > 0:
+                    return {
+                        'success': True,
+                        'found': True,
+                        'results': [r.to_dict() if hasattr(r, 'to_dict') else r for r in ddg_result['results']],
+                        'query': query,
+                        'source': 'DuckDuckGo',
+                        'total_found': ddg_result['count']
+                    }
             except Exception as e:
                 logger.warning(f"DuckDuckGo search failed, trying Tavily: {e}")
         # Try Tavily if DuckDuckGo fails and API key is available
         if self.use_tavily:
             try:
+                tavily_result = self._search_with_tavily(search_query, limit, extract_content)
+                if tavily_result.get('success') and tavily_result.get('count', 0) > 0:
+                    return {
+                        'success': True,
+                        'found': True,
+                        'results': [r.to_dict() if hasattr(r, 'to_dict') else r for r in tavily_result['results']],
+                        'query': query,
+                        'source': 'Tavily',
+                        'total_found': tavily_result['count']
+                    }
             except Exception as e:
                 logger.warning(f"Tavily search failed, trying Wikipedia: {e}")
         # Fallback to Wikipedia search
         if self.use_wikipedia:
+            try:
+                wiki_result = self._search_with_wikipedia(search_query, limit)
+                if wiki_result.get('success') and wiki_result.get('count', 0) > 0:
+                    return {
+                        'success': True,
+                        'found': True,
+                        'results': [r.to_dict() if hasattr(r, 'to_dict') else r for r in wiki_result['results']],
+                        'query': query,
+                        'source': 'Wikipedia',
+                        'total_found': wiki_result['count']
+                    }
+            except Exception as e:
+                logger.warning(f"Wikipedia search failed: {e}")
+        # No search engines available or all failed
+        logger.warning("All search engines failed, returning empty results")
         return {
             "query": query,
             "found": False,
+            "success": False,
+            "message": "❌ All search engines failed or returned no results.",
+            "results": [],
+            "source": "none",
+            "total_found": 0
         }
     def _search_with_duckduckgo(self, query: str, limit: int = 5, extract_content: bool = False) -> Dict[str, Any]: