Spaces:
Sleeping
Sleeping
Chris
commited on
Commit
·
4d128ff
1
Parent(s):
73eb248
Final 7.8.3
Browse files- src/agents/router.py +4 -4
- src/tools/final_answer_tool.py +1 -2
- src/tools/web_search_tool.py +117 -32
src/agents/router.py
CHANGED
|
@@ -1067,13 +1067,13 @@ REASONING: [brief explanation]
|
|
| 1067 |
|
| 1068 |
# Map to question types
|
| 1069 |
type_mapping = {
|
| 1070 |
-
'mathematical': QuestionType.
|
| 1071 |
'text_manipulation': QuestionType.TEXT_MANIPULATION,
|
| 1072 |
'file_processing': QuestionType.FILE_PROCESSING,
|
| 1073 |
'web_research': QuestionType.WEB_RESEARCH,
|
| 1074 |
-
'reasoning': QuestionType.
|
| 1075 |
-
'factual_lookup': QuestionType.
|
| 1076 |
-
'general': QuestionType.
|
| 1077 |
}
|
| 1078 |
|
| 1079 |
question_type = type_mapping.get(final_type, QuestionType.GENERAL_INQUIRY)
|
|
|
|
| 1067 |
|
| 1068 |
# Map to question types
|
| 1069 |
type_mapping = {
|
| 1070 |
+
'mathematical': QuestionType.MATHEMATICAL,
|
| 1071 |
'text_manipulation': QuestionType.TEXT_MANIPULATION,
|
| 1072 |
'file_processing': QuestionType.FILE_PROCESSING,
|
| 1073 |
'web_research': QuestionType.WEB_RESEARCH,
|
| 1074 |
+
'reasoning': QuestionType.REASONING,
|
| 1075 |
+
'factual_lookup': QuestionType.WEB_RESEARCH, # Map to web_research
|
| 1076 |
+
'general': QuestionType.UNKNOWN
|
| 1077 |
}
|
| 1078 |
|
| 1079 |
question_type = type_mapping.get(final_type, QuestionType.GENERAL_INQUIRY)
|
src/tools/final_answer_tool.py
CHANGED
|
@@ -35,8 +35,7 @@ class FinalAnswerTool:
|
|
| 35 |
llm_result = self.llm_client.generate(
|
| 36 |
extraction_prompt,
|
| 37 |
tier=ModelTier.COMPLEX, # Always use most capable model
|
| 38 |
-
max_tokens=100
|
| 39 |
-
temperature=0.1 # Lower temperature for consistency
|
| 40 |
)
|
| 41 |
|
| 42 |
if llm_result.success:
|
|
|
|
| 35 |
llm_result = self.llm_client.generate(
|
| 36 |
extraction_prompt,
|
| 37 |
tier=ModelTier.COMPLEX, # Always use most capable model
|
| 38 |
+
max_tokens=100 # Keep answer concise
|
|
|
|
| 39 |
)
|
| 40 |
|
| 41 |
if llm_result.success:
|
src/tools/web_search_tool.py
CHANGED
|
@@ -129,49 +129,134 @@ class WebSearchTool(BaseTool):
|
|
| 129 |
def _extract_search_terms(self, question: str, max_length: int = 200) -> str:
|
| 130 |
"""
|
| 131 |
Extract focused search terms from a question
|
| 132 |
-
|
| 133 |
"""
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
entities.extend(quoted_phrases)
|
| 143 |
|
| 144 |
-
# Extract
|
| 145 |
-
|
| 146 |
-
entities.extend(proper_nouns[:3]) # Limit to top 3
|
| 147 |
|
| 148 |
-
# Extract
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
years = re.findall(r'\b(19|20)\d{2}\b', question)
|
| 150 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
|
| 156 |
-
#
|
| 157 |
-
if
|
| 158 |
-
|
| 159 |
else:
|
| 160 |
-
# Fallback:
|
| 161 |
-
words =
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 166 |
|
| 167 |
-
# Ensure we
|
| 168 |
-
if
|
| 169 |
-
|
|
|
|
| 170 |
|
| 171 |
-
# Log
|
| 172 |
-
logger.info(f"📝 Extracted search terms: '{
|
| 173 |
|
| 174 |
-
return
|
| 175 |
|
| 176 |
def _search_web(self, query: str, limit: int = 5, extract_content: bool = False) -> Dict[str, Any]:
|
| 177 |
"""
|
|
|
|
| 129 |
def _extract_search_terms(self, question: str, max_length: int = 200) -> str:
|
| 130 |
"""
|
| 131 |
Extract focused search terms from a question
|
| 132 |
+
Intelligently builds search queries prioritizing key information
|
| 133 |
"""
|
| 134 |
+
import re
|
| 135 |
+
|
| 136 |
+
# Special handling for backwards text questions
|
| 137 |
+
if re.search(r'\.rewsna\b|etirw\b|dnatsrednu\b', question.lower()):
|
| 138 |
+
# This is backwards text - reverse it
|
| 139 |
+
words = question.split()
|
| 140 |
+
reversed_words = [word[::-1] for word in words]
|
| 141 |
+
reversed_question = ' '.join(reversed_words)
|
| 142 |
+
return self._extract_search_terms(reversed_question, max_length)
|
| 143 |
+
|
| 144 |
+
# Remove common question starters but keep meaningful content
|
| 145 |
+
clean_question = question
|
| 146 |
+
question_starters = [
|
| 147 |
+
r'^(what|who|when|where|why|how|which|whose)\s+',
|
| 148 |
+
r'\bis\s+the\s+',
|
| 149 |
+
r'\bare\s+the\s+',
|
| 150 |
+
r'\bwas\s+the\s+',
|
| 151 |
+
r'\bwere\s+the\s+',
|
| 152 |
+
r'\bdid\s+the\s+',
|
| 153 |
+
r'\bdo\s+the\s+',
|
| 154 |
+
r'\bcan\s+you\s+',
|
| 155 |
+
r'\bcould\s+you\s+',
|
| 156 |
+
r'\bplease\s+',
|
| 157 |
+
r'\btell\s+me\s+',
|
| 158 |
+
r'\bfind\s+',
|
| 159 |
+
r'\blist\s+',
|
| 160 |
+
]
|
| 161 |
|
| 162 |
+
for starter in question_starters:
|
| 163 |
+
clean_question = re.sub(starter, '', clean_question, flags=re.IGNORECASE)
|
|
|
|
| 164 |
|
| 165 |
+
# Extract key components in priority order
|
| 166 |
+
search_parts = []
|
|
|
|
| 167 |
|
| 168 |
+
# 1. Extract quoted phrases (highest priority)
|
| 169 |
+
quoted_phrases = re.findall(r'"([^"]+)"', question)
|
| 170 |
+
for phrase in quoted_phrases[:2]: # Max 2 quoted phrases
|
| 171 |
+
search_parts.append(phrase)
|
| 172 |
+
|
| 173 |
+
# 2. Extract proper nouns and names (high priority)
|
| 174 |
+
# Look for capitalized words that are likely names/places
|
| 175 |
+
proper_nouns = re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b', question)
|
| 176 |
+
# Filter out common words that might be capitalized
|
| 177 |
+
common_caps = {'The', 'This', 'That', 'These', 'Those', 'In', 'On', 'At', 'To', 'For', 'Of', 'With', 'By'}
|
| 178 |
+
meaningful_nouns = [noun for noun in proper_nouns if noun not in common_caps]
|
| 179 |
+
search_parts.extend(meaningful_nouns[:3]) # Max 3 proper nouns
|
| 180 |
+
|
| 181 |
+
# 3. Extract years and dates (medium priority)
|
| 182 |
years = re.findall(r'\b(19|20)\d{2}\b', question)
|
| 183 |
+
search_parts.extend(years[:2]) # Max 2 years
|
| 184 |
+
|
| 185 |
+
# 4. Extract specific important keywords based on question context
|
| 186 |
+
important_keywords = []
|
| 187 |
+
|
| 188 |
+
# Look for specific domains/topics
|
| 189 |
+
domain_keywords = {
|
| 190 |
+
'music': ['album', 'albums', 'song', 'songs', 'artist', 'band', 'music', 'released', 'published'],
|
| 191 |
+
'sports': ['player', 'team', 'game', 'match', 'season', 'championship', 'league'],
|
| 192 |
+
'science': ['research', 'study', 'paper', 'journal', 'scientist', 'experiment'],
|
| 193 |
+
'technology': ['software', 'program', 'code', 'website', 'application', 'system'],
|
| 194 |
+
'geography': ['country', 'city', 'place', 'location', 'region', 'area'],
|
| 195 |
+
'history': ['year', 'century', 'period', 'era', 'historical', 'ancient'],
|
| 196 |
+
'wikipedia': ['wikipedia', 'article', 'featured', 'promoted', 'nomination', 'nominated'],
|
| 197 |
+
'competition': ['competition', 'contest', 'award', 'winner', 'recipient', 'prize']
|
| 198 |
+
}
|
| 199 |
|
| 200 |
+
question_lower = question.lower()
|
| 201 |
+
for domain, keywords in domain_keywords.items():
|
| 202 |
+
for keyword in keywords:
|
| 203 |
+
if keyword in question_lower:
|
| 204 |
+
important_keywords.append(keyword)
|
| 205 |
+
|
| 206 |
+
# Add unique important keywords
|
| 207 |
+
unique_keywords = []
|
| 208 |
+
for keyword in important_keywords:
|
| 209 |
+
if keyword not in [part.lower() for part in search_parts]:
|
| 210 |
+
unique_keywords.append(keyword)
|
| 211 |
+
search_parts.extend(unique_keywords[:3]) # Max 3 domain keywords
|
| 212 |
+
|
| 213 |
+
# 5. Extract key content words (lower priority)
|
| 214 |
+
if len(search_parts) < 4: # Only if we need more terms
|
| 215 |
+
# Remove stop words and get meaningful content
|
| 216 |
+
stop_words = {
|
| 217 |
+
'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for',
|
| 218 |
+
'of', 'with', 'by', 'from', 'up', 'about', 'into', 'through', 'during',
|
| 219 |
+
'before', 'after', 'above', 'below', 'between', 'among', 'this', 'that',
|
| 220 |
+
'these', 'those', 'i', 'me', 'my', 'we', 'our', 'you', 'your', 'he',
|
| 221 |
+
'him', 'his', 'she', 'her', 'it', 'its', 'they', 'them', 'their',
|
| 222 |
+
'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has',
|
| 223 |
+
'had', 'do', 'does', 'did', 'will', 'would', 'could', 'should',
|
| 224 |
+
'may', 'might', 'must', 'can'
|
| 225 |
+
}
|
| 226 |
+
|
| 227 |
+
# Extract words, clean them, and filter
|
| 228 |
+
words = re.findall(r'\b\w+\b', clean_question.lower())
|
| 229 |
+
content_words = [w for w in words if w not in stop_words and len(w) > 2]
|
| 230 |
+
|
| 231 |
+
# Add important content words not already included
|
| 232 |
+
for word in content_words[:3]:
|
| 233 |
+
if word not in [part.lower() for part in search_parts]:
|
| 234 |
+
search_parts.append(word)
|
| 235 |
|
| 236 |
+
# Build the final search query
|
| 237 |
+
if search_parts:
|
| 238 |
+
search_query = ' '.join(search_parts)
|
| 239 |
else:
|
| 240 |
+
# Fallback: use first few meaningful words
|
| 241 |
+
words = question.split()[:6]
|
| 242 |
+
search_query = ' '.join(words)
|
| 243 |
+
|
| 244 |
+
# Clean up and ensure reasonable length
|
| 245 |
+
search_query = ' '.join(search_query.split()) # Remove extra whitespace
|
| 246 |
+
|
| 247 |
+
# Truncate at word boundary if too long
|
| 248 |
+
if len(search_query) > max_length:
|
| 249 |
+
search_query = search_query[:max_length].rsplit(' ', 1)[0]
|
| 250 |
|
| 251 |
+
# Ensure we have something to search for
|
| 252 |
+
if not search_query.strip():
|
| 253 |
+
search_query = question.split()[:3] # Use first 3 words as absolute fallback
|
| 254 |
+
search_query = ' '.join(search_query)
|
| 255 |
|
| 256 |
+
# Log for debugging
|
| 257 |
+
logger.info(f"📝 Extracted search terms: '{search_query}' from question: '{question[:100]}...'")
|
| 258 |
|
| 259 |
+
return search_query.strip()
|
| 260 |
|
| 261 |
def _search_web(self, query: str, limit: int = 5, extract_content: bool = False) -> Dict[str, Any]:
|
| 262 |
"""
|