Agent_Course_Final_Assignment

Sleeping

App Files Files Community

Chris commited on May 30, 2025

Commit

4d128ff

1 Parent(s): 73eb248

Final 7.8.3

Browse files

Files changed (3) hide show

src/agents/router.py +4 -4
src/tools/final_answer_tool.py +1 -2
src/tools/web_search_tool.py +117 -32

src/agents/router.py CHANGED Viewed

@@ -1067,13 +1067,13 @@ REASONING: [brief explanation]
         # Map to question types
         type_mapping = {
-            'mathematical': QuestionType.QUANTITATIVE_ANALYSIS,
             'text_manipulation': QuestionType.TEXT_MANIPULATION,
             'file_processing': QuestionType.FILE_PROCESSING,
             'web_research': QuestionType.WEB_RESEARCH,
-            'reasoning': QuestionType.COMPLEX_REASONING,
-            'factual_lookup': QuestionType.FACTUAL_LOOKUP,
-            'general': QuestionType.GENERAL_INQUIRY
         }
         question_type = type_mapping.get(final_type, QuestionType.GENERAL_INQUIRY)

         # Map to question types
         type_mapping = {
+            'mathematical': QuestionType.MATHEMATICAL,
             'text_manipulation': QuestionType.TEXT_MANIPULATION,
             'file_processing': QuestionType.FILE_PROCESSING,
             'web_research': QuestionType.WEB_RESEARCH,
+            'reasoning': QuestionType.REASONING,
+            'factual_lookup': QuestionType.WEB_RESEARCH,  # Map to web_research
+            'general': QuestionType.UNKNOWN
         }
         question_type = type_mapping.get(final_type, QuestionType.GENERAL_INQUIRY)

src/tools/final_answer_tool.py CHANGED Viewed

@@ -35,8 +35,7 @@ class FinalAnswerTool:
             llm_result = self.llm_client.generate(
                 extraction_prompt,
                 tier=ModelTier.COMPLEX,  # Always use most capable model
-                max_tokens=100,  # Keep answer concise
-                temperature=0.1  # Lower temperature for consistency
             )
             if llm_result.success:

             llm_result = self.llm_client.generate(
                 extraction_prompt,
                 tier=ModelTier.COMPLEX,  # Always use most capable model
+                max_tokens=100  # Keep answer concise
             )
             if llm_result.success:

src/tools/web_search_tool.py CHANGED Viewed

@@ -129,49 +129,134 @@ class WebSearchTool(BaseTool):
     def _extract_search_terms(self, question: str, max_length: int = 200) -> str:
         """
         Extract focused search terms from a question
-        Prioritizes key entities, dates, and specific information
         """
-        # Remove common question words first
-        question_clean = re.sub(r'\b(what|who|when|where|why|how|is|are|was|were|did|do|does|can|could|should|would)\b', '', question.lower())
-        # Extract key patterns first
-        entities = []
-        # Extract quoted phrases (highest priority)
-        quoted_phrases = re.findall(r'"([^"]+)"', question)
-        entities.extend(quoted_phrases)
-        # Extract proper nouns (names, places, organizations)
-        proper_nouns = re.findall(r'\b[A-Z][a-zA-Z]*(?:\s+[A-Z][a-zA-Z]*)*\b', question)
-        entities.extend(proper_nouns[:3])  # Limit to top 3
-        # Extract years and dates
         years = re.findall(r'\b(19|20)\d{2}\b', question)
-        entities.extend(years)
-        # Extract numbers that might be important
-        numbers = re.findall(r'\b\d+\b', question)
-        entities.extend(numbers[:2])  # Limit to first 2 numbers
-        # If we have good entities, use them primarily
-        if entities:
-            search_terms = ' '.join(entities[:6])  # Use top 6 entities
         else:
-            # Fallback: clean the question and extract key words
-            words = question_clean.split()
-            # Remove very common words
-            stop_words = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'from', 'up', 'about', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'between', 'among', 'this', 'that', 'these', 'those', 'i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', 'her', 'hers', 'herself', 'it', 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves'}
-            filtered_words = [w for w in words if w.lower() not in stop_words and len(w) > 2]
-            search_terms = ' '.join(filtered_words[:8])  # Use top 8 content words
-        # Ensure we don't exceed max length
-        if len(search_terms) > max_length:
-            search_terms = search_terms[:max_length].rsplit(' ', 1)[0]  # Cut at word boundary
-        # Log the extraction for debugging
-        logger.info(f"📝 Extracted search terms: '{search_terms}' from question: '{question[:100]}...'")
-        return search_terms.strip()
     def _search_web(self, query: str, limit: int = 5, extract_content: bool = False) -> Dict[str, Any]:
         """

     def _extract_search_terms(self, question: str, max_length: int = 200) -> str:
         """
         Extract focused search terms from a question
+        Intelligently builds search queries prioritizing key information
         """
+        import re
+        # Special handling for backwards text questions
+        if re.search(r'\.rewsna\b|etirw\b|dnatsrednu\b', question.lower()):
+            # This is backwards text - reverse it
+            words = question.split()
+            reversed_words = [word[::-1] for word in words]
+            reversed_question = ' '.join(reversed_words)
+            return self._extract_search_terms(reversed_question, max_length)
+        # Remove common question starters but keep meaningful content
+        clean_question = question
+        question_starters = [
+            r'^(what|who|when|where|why|how|which|whose)\s+',
+            r'\bis\s+the\s+',
+            r'\bare\s+the\s+',
+            r'\bwas\s+the\s+',
+            r'\bwere\s+the\s+',
+            r'\bdid\s+the\s+',
+            r'\bdo\s+the\s+',
+            r'\bcan\s+you\s+',
+            r'\bcould\s+you\s+',
+            r'\bplease\s+',
+            r'\btell\s+me\s+',
+            r'\bfind\s+',
+            r'\blist\s+',
+        ]
+        for starter in question_starters:
+            clean_question = re.sub(starter, '', clean_question, flags=re.IGNORECASE)
+        # Extract key components in priority order
+        search_parts = []
+        # 1. Extract quoted phrases (highest priority)
+        quoted_phrases = re.findall(r'"([^"]+)"', question)
+        for phrase in quoted_phrases[:2]:  # Max 2 quoted phrases
+            search_parts.append(phrase)
+        # 2. Extract proper nouns and names (high priority)
+        # Look for capitalized words that are likely names/places
+        proper_nouns = re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b', question)
+        # Filter out common words that might be capitalized
+        common_caps = {'The', 'This', 'That', 'These', 'Those', 'In', 'On', 'At', 'To', 'For', 'Of', 'With', 'By'}
+        meaningful_nouns = [noun for noun in proper_nouns if noun not in common_caps]
+        search_parts.extend(meaningful_nouns[:3])  # Max 3 proper nouns
+        # 3. Extract years and dates (medium priority)
         years = re.findall(r'\b(19|20)\d{2}\b', question)
+        search_parts.extend(years[:2])  # Max 2 years
+        # 4. Extract specific important keywords based on question context
+        important_keywords = []
+        # Look for specific domains/topics
+        domain_keywords = {
+            'music': ['album', 'albums', 'song', 'songs', 'artist', 'band', 'music', 'released', 'published'],
+            'sports': ['player', 'team', 'game', 'match', 'season', 'championship', 'league'],
+            'science': ['research', 'study', 'paper', 'journal', 'scientist', 'experiment'],
+            'technology': ['software', 'program', 'code', 'website', 'application', 'system'],
+            'geography': ['country', 'city', 'place', 'location', 'region', 'area'],
+            'history': ['year', 'century', 'period', 'era', 'historical', 'ancient'],
+            'wikipedia': ['wikipedia', 'article', 'featured', 'promoted', 'nomination', 'nominated'],
+            'competition': ['competition', 'contest', 'award', 'winner', 'recipient', 'prize']
+        }
+        question_lower = question.lower()
+        for domain, keywords in domain_keywords.items():
+            for keyword in keywords:
+                if keyword in question_lower:
+                    important_keywords.append(keyword)
+        # Add unique important keywords
+        unique_keywords = []
+        for keyword in important_keywords:
+            if keyword not in [part.lower() for part in search_parts]:
+                unique_keywords.append(keyword)
+        search_parts.extend(unique_keywords[:3])  # Max 3 domain keywords
+        # 5. Extract key content words (lower priority)
+        if len(search_parts) < 4:  # Only if we need more terms
+            # Remove stop words and get meaningful content
+            stop_words = {
+                'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for',
+                'of', 'with', 'by', 'from', 'up', 'about', 'into', 'through', 'during',
+                'before', 'after', 'above', 'below', 'between', 'among', 'this', 'that',
+                'these', 'those', 'i', 'me', 'my', 'we', 'our', 'you', 'your', 'he',
+                'him', 'his', 'she', 'her', 'it', 'its', 'they', 'them', 'their',
+                'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has',
+                'had', 'do', 'does', 'did', 'will', 'would', 'could', 'should',
+                'may', 'might', 'must', 'can'
+            }
+            # Extract words, clean them, and filter
+            words = re.findall(r'\b\w+\b', clean_question.lower())
+            content_words = [w for w in words if w not in stop_words and len(w) > 2]
+            # Add important content words not already included
+            for word in content_words[:3]:
+                if word not in [part.lower() for part in search_parts]:
+                    search_parts.append(word)
+        # Build the final search query
+        if search_parts:
+            search_query = ' '.join(search_parts)
         else:
+            # Fallback: use first few meaningful words
+            words = question.split()[:6]
+            search_query = ' '.join(words)
+        # Clean up and ensure reasonable length
+        search_query = ' '.join(search_query.split())  # Remove extra whitespace
+        # Truncate at word boundary if too long
+        if len(search_query) > max_length:
+            search_query = search_query[:max_length].rsplit(' ', 1)[0]
+        # Ensure we have something to search for
+        if not search_query.strip():
+            search_query = question.split()[:3]  # Use first 3 words as absolute fallback
+            search_query = ' '.join(search_query)
+        # Log for debugging
+        logger.info(f"📝 Extracted search terms: '{search_query}' from question: '{question[:100]}...'")
+        return search_query.strip()
     def _search_web(self, query: str, limit: int = 5, extract_content: bool = False) -> Dict[str, Any]:
         """