Chris commited on
Commit
4d128ff
·
1 Parent(s): 73eb248

Final 7.8.3

Browse files
src/agents/router.py CHANGED
@@ -1067,13 +1067,13 @@ REASONING: [brief explanation]
1067
 
1068
  # Map to question types
1069
  type_mapping = {
1070
- 'mathematical': QuestionType.QUANTITATIVE_ANALYSIS,
1071
  'text_manipulation': QuestionType.TEXT_MANIPULATION,
1072
  'file_processing': QuestionType.FILE_PROCESSING,
1073
  'web_research': QuestionType.WEB_RESEARCH,
1074
- 'reasoning': QuestionType.COMPLEX_REASONING,
1075
- 'factual_lookup': QuestionType.FACTUAL_LOOKUP,
1076
- 'general': QuestionType.GENERAL_INQUIRY
1077
  }
1078
 
1079
  question_type = type_mapping.get(final_type, QuestionType.GENERAL_INQUIRY)
 
1067
 
1068
  # Map to question types
1069
  type_mapping = {
1070
+ 'mathematical': QuestionType.MATHEMATICAL,
1071
  'text_manipulation': QuestionType.TEXT_MANIPULATION,
1072
  'file_processing': QuestionType.FILE_PROCESSING,
1073
  'web_research': QuestionType.WEB_RESEARCH,
1074
+ 'reasoning': QuestionType.REASONING,
1075
+ 'factual_lookup': QuestionType.WEB_RESEARCH, # Map to web_research
1076
+ 'general': QuestionType.UNKNOWN
1077
  }
1078
 
1079
  question_type = type_mapping.get(final_type, QuestionType.GENERAL_INQUIRY)
src/tools/final_answer_tool.py CHANGED
@@ -35,8 +35,7 @@ class FinalAnswerTool:
35
  llm_result = self.llm_client.generate(
36
  extraction_prompt,
37
  tier=ModelTier.COMPLEX, # Always use most capable model
38
- max_tokens=100, # Keep answer concise
39
- temperature=0.1 # Lower temperature for consistency
40
  )
41
 
42
  if llm_result.success:
 
35
  llm_result = self.llm_client.generate(
36
  extraction_prompt,
37
  tier=ModelTier.COMPLEX, # Always use most capable model
38
+ max_tokens=100 # Keep answer concise
 
39
  )
40
 
41
  if llm_result.success:
src/tools/web_search_tool.py CHANGED
@@ -129,49 +129,134 @@ class WebSearchTool(BaseTool):
129
  def _extract_search_terms(self, question: str, max_length: int = 200) -> str:
130
  """
131
  Extract focused search terms from a question
132
- Prioritizes key entities, dates, and specific information
133
  """
134
- # Remove common question words first
135
- question_clean = re.sub(r'\b(what|who|when|where|why|how|is|are|was|were|did|do|does|can|could|should|would)\b', '', question.lower())
136
-
137
- # Extract key patterns first
138
- entities = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
 
140
- # Extract quoted phrases (highest priority)
141
- quoted_phrases = re.findall(r'"([^"]+)"', question)
142
- entities.extend(quoted_phrases)
143
 
144
- # Extract proper nouns (names, places, organizations)
145
- proper_nouns = re.findall(r'\b[A-Z][a-zA-Z]*(?:\s+[A-Z][a-zA-Z]*)*\b', question)
146
- entities.extend(proper_nouns[:3]) # Limit to top 3
147
 
148
- # Extract years and dates
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  years = re.findall(r'\b(19|20)\d{2}\b', question)
150
- entities.extend(years)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
 
152
- # Extract numbers that might be important
153
- numbers = re.findall(r'\b\d+\b', question)
154
- entities.extend(numbers[:2]) # Limit to first 2 numbers
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
 
156
- # If we have good entities, use them primarily
157
- if entities:
158
- search_terms = ' '.join(entities[:6]) # Use top 6 entities
159
  else:
160
- # Fallback: clean the question and extract key words
161
- words = question_clean.split()
162
- # Remove very common words
163
- stop_words = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'from', 'up', 'about', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'between', 'among', 'this', 'that', 'these', 'those', 'i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', 'her', 'hers', 'herself', 'it', 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves'}
164
- filtered_words = [w for w in words if w.lower() not in stop_words and len(w) > 2]
165
- search_terms = ' '.join(filtered_words[:8]) # Use top 8 content words
 
 
 
 
166
 
167
- # Ensure we don't exceed max length
168
- if len(search_terms) > max_length:
169
- search_terms = search_terms[:max_length].rsplit(' ', 1)[0] # Cut at word boundary
 
170
 
171
- # Log the extraction for debugging
172
- logger.info(f"📝 Extracted search terms: '{search_terms}' from question: '{question[:100]}...'")
173
 
174
- return search_terms.strip()
175
 
176
  def _search_web(self, query: str, limit: int = 5, extract_content: bool = False) -> Dict[str, Any]:
177
  """
 
129
  def _extract_search_terms(self, question: str, max_length: int = 200) -> str:
130
  """
131
  Extract focused search terms from a question
132
+ Intelligently builds search queries prioritizing key information
133
  """
134
+ import re
135
+
136
+ # Special handling for backwards text questions
137
+ if re.search(r'\.rewsna\b|etirw\b|dnatsrednu\b', question.lower()):
138
+ # This is backwards text - reverse it
139
+ words = question.split()
140
+ reversed_words = [word[::-1] for word in words]
141
+ reversed_question = ' '.join(reversed_words)
142
+ return self._extract_search_terms(reversed_question, max_length)
143
+
144
+ # Remove common question starters but keep meaningful content
145
+ clean_question = question
146
+ question_starters = [
147
+ r'^(what|who|when|where|why|how|which|whose)\s+',
148
+ r'\bis\s+the\s+',
149
+ r'\bare\s+the\s+',
150
+ r'\bwas\s+the\s+',
151
+ r'\bwere\s+the\s+',
152
+ r'\bdid\s+the\s+',
153
+ r'\bdo\s+the\s+',
154
+ r'\bcan\s+you\s+',
155
+ r'\bcould\s+you\s+',
156
+ r'\bplease\s+',
157
+ r'\btell\s+me\s+',
158
+ r'\bfind\s+',
159
+ r'\blist\s+',
160
+ ]
161
 
162
+ for starter in question_starters:
163
+ clean_question = re.sub(starter, '', clean_question, flags=re.IGNORECASE)
 
164
 
165
+ # Extract key components in priority order
166
+ search_parts = []
 
167
 
168
+ # 1. Extract quoted phrases (highest priority)
169
+ quoted_phrases = re.findall(r'"([^"]+)"', question)
170
+ for phrase in quoted_phrases[:2]: # Max 2 quoted phrases
171
+ search_parts.append(phrase)
172
+
173
+ # 2. Extract proper nouns and names (high priority)
174
+ # Look for capitalized words that are likely names/places
175
+ proper_nouns = re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b', question)
176
+ # Filter out common words that might be capitalized
177
+ common_caps = {'The', 'This', 'That', 'These', 'Those', 'In', 'On', 'At', 'To', 'For', 'Of', 'With', 'By'}
178
+ meaningful_nouns = [noun for noun in proper_nouns if noun not in common_caps]
179
+ search_parts.extend(meaningful_nouns[:3]) # Max 3 proper nouns
180
+
181
+ # 3. Extract years and dates (medium priority)
182
  years = re.findall(r'\b(19|20)\d{2}\b', question)
183
+ search_parts.extend(years[:2]) # Max 2 years
184
+
185
+ # 4. Extract specific important keywords based on question context
186
+ important_keywords = []
187
+
188
+ # Look for specific domains/topics
189
+ domain_keywords = {
190
+ 'music': ['album', 'albums', 'song', 'songs', 'artist', 'band', 'music', 'released', 'published'],
191
+ 'sports': ['player', 'team', 'game', 'match', 'season', 'championship', 'league'],
192
+ 'science': ['research', 'study', 'paper', 'journal', 'scientist', 'experiment'],
193
+ 'technology': ['software', 'program', 'code', 'website', 'application', 'system'],
194
+ 'geography': ['country', 'city', 'place', 'location', 'region', 'area'],
195
+ 'history': ['year', 'century', 'period', 'era', 'historical', 'ancient'],
196
+ 'wikipedia': ['wikipedia', 'article', 'featured', 'promoted', 'nomination', 'nominated'],
197
+ 'competition': ['competition', 'contest', 'award', 'winner', 'recipient', 'prize']
198
+ }
199
 
200
+ question_lower = question.lower()
201
+ for domain, keywords in domain_keywords.items():
202
+ for keyword in keywords:
203
+ if keyword in question_lower:
204
+ important_keywords.append(keyword)
205
+
206
+ # Add unique important keywords
207
+ unique_keywords = []
208
+ for keyword in important_keywords:
209
+ if keyword not in [part.lower() for part in search_parts]:
210
+ unique_keywords.append(keyword)
211
+ search_parts.extend(unique_keywords[:3]) # Max 3 domain keywords
212
+
213
+ # 5. Extract key content words (lower priority)
214
+ if len(search_parts) < 4: # Only if we need more terms
215
+ # Remove stop words and get meaningful content
216
+ stop_words = {
217
+ 'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for',
218
+ 'of', 'with', 'by', 'from', 'up', 'about', 'into', 'through', 'during',
219
+ 'before', 'after', 'above', 'below', 'between', 'among', 'this', 'that',
220
+ 'these', 'those', 'i', 'me', 'my', 'we', 'our', 'you', 'your', 'he',
221
+ 'him', 'his', 'she', 'her', 'it', 'its', 'they', 'them', 'their',
222
+ 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has',
223
+ 'had', 'do', 'does', 'did', 'will', 'would', 'could', 'should',
224
+ 'may', 'might', 'must', 'can'
225
+ }
226
+
227
+ # Extract words, clean them, and filter
228
+ words = re.findall(r'\b\w+\b', clean_question.lower())
229
+ content_words = [w for w in words if w not in stop_words and len(w) > 2]
230
+
231
+ # Add important content words not already included
232
+ for word in content_words[:3]:
233
+ if word not in [part.lower() for part in search_parts]:
234
+ search_parts.append(word)
235
 
236
+ # Build the final search query
237
+ if search_parts:
238
+ search_query = ' '.join(search_parts)
239
  else:
240
+ # Fallback: use first few meaningful words
241
+ words = question.split()[:6]
242
+ search_query = ' '.join(words)
243
+
244
+ # Clean up and ensure reasonable length
245
+ search_query = ' '.join(search_query.split()) # Remove extra whitespace
246
+
247
+ # Truncate at word boundary if too long
248
+ if len(search_query) > max_length:
249
+ search_query = search_query[:max_length].rsplit(' ', 1)[0]
250
 
251
+ # Ensure we have something to search for
252
+ if not search_query.strip():
253
+ search_query = question.split()[:3] # Use first 3 words as absolute fallback
254
+ search_query = ' '.join(search_query)
255
 
256
+ # Log for debugging
257
+ logger.info(f"📝 Extracted search terms: '{search_query}' from question: '{question[:100]}...'")
258
 
259
+ return search_query.strip()
260
 
261
  def _search_web(self, query: str, limit: int = 5, extract_content: bool = False) -> Dict[str, Any]:
262
  """