Chris commited on
Commit
0b92da3
·
1 Parent(s): 0a9db12

Final 6.9.3

Browse files
src/agents/__pycache__/router.cpython-310.pyc CHANGED
Binary files a/src/agents/__pycache__/router.cpython-310.pyc and b/src/agents/__pycache__/router.cpython-310.pyc differ
 
src/agents/router.py CHANGED
@@ -173,21 +173,21 @@ class RouterAgent:
173
  if question_type == QuestionType.MATHEMATICAL and pattern in [r'\bhow many\b', r'\bhow much\b']:
174
  score += 2 # Boost counting questions
175
  elif question_type == QuestionType.TEXT_MANIPULATION and any(special in pattern for special in ['opposite', 'reverse', 'backwards']):
176
- score += 2 # Reduced from 3 to 2 to avoid over-weighting
177
  if score > 0:
178
  type_scores[question_type] = score
179
 
180
  # Special handling for specific question patterns
181
 
182
- # Detect backwards/scrambled text (strong indicator)
183
- if re.search(r'\.rewsna|tfel|etirw', question_lower):
184
  type_scores[QuestionType.TEXT_MANIPULATION] = type_scores.get(QuestionType.TEXT_MANIPULATION, 0) + 3
185
 
186
  # Detect code execution patterns (strong indicator)
187
  if re.search(r'\bfinal.*output\b|\bnumeric.*output\b|\battached.*code\b', question_lower):
188
  type_scores[QuestionType.CODE_EXECUTION] = type_scores.get(QuestionType.CODE_EXECUTION, 0) + 4
189
 
190
- # Detect mathematical operations with numbers
191
  if re.search(r'\b\d+.*\b(?:studio albums|between|and)\b.*\d+', question_lower):
192
  type_scores[QuestionType.MATHEMATICAL] = type_scores.get(QuestionType.MATHEMATICAL, 0) + 3
193
 
@@ -198,8 +198,20 @@ class RouterAgent:
198
  # Multi-step questions that need research AND calculation
199
  if ('how many' in question_lower or 'how much' in question_lower) and \
200
  any(term in question_lower for term in ['between', 'from', 'during', 'published', 'released']):
 
 
 
 
 
201
  type_scores[QuestionType.WEB_RESEARCH] = type_scores.get(QuestionType.WEB_RESEARCH, 0) + 2
202
- type_scores[QuestionType.MATHEMATICAL] = type_scores.get(QuestionType.MATHEMATICAL, 0) + 2
 
 
 
 
 
 
 
203
 
204
  # Add detected types based on scores
205
  for qtype, score in type_scores.items():
 
173
  if question_type == QuestionType.MATHEMATICAL and pattern in [r'\bhow many\b', r'\bhow much\b']:
174
  score += 2 # Boost counting questions
175
  elif question_type == QuestionType.TEXT_MANIPULATION and any(special in pattern for special in ['opposite', 'reverse', 'backwards']):
176
+ score += 1 # Reduced further to avoid over-weighting
177
  if score > 0:
178
  type_scores[question_type] = score
179
 
180
  # Special handling for specific question patterns
181
 
182
+ # Detect backwards/scrambled text (strong indicator) - only for clearly backwards text
183
+ if re.search(r'\.rewsna\b|etirw\b|dnatsrednu\b', question_lower):
184
  type_scores[QuestionType.TEXT_MANIPULATION] = type_scores.get(QuestionType.TEXT_MANIPULATION, 0) + 3
185
 
186
  # Detect code execution patterns (strong indicator)
187
  if re.search(r'\bfinal.*output\b|\bnumeric.*output\b|\battached.*code\b', question_lower):
188
  type_scores[QuestionType.CODE_EXECUTION] = type_scores.get(QuestionType.CODE_EXECUTION, 0) + 4
189
 
190
+ # Detect mathematical operations with numbers (boost mathematical score)
191
  if re.search(r'\b\d+.*\b(?:studio albums|between|and)\b.*\d+', question_lower):
192
  type_scores[QuestionType.MATHEMATICAL] = type_scores.get(QuestionType.MATHEMATICAL, 0) + 3
193
 
 
198
  # Multi-step questions that need research AND calculation
199
  if ('how many' in question_lower or 'how much' in question_lower) and \
200
  any(term in question_lower for term in ['between', 'from', 'during', 'published', 'released']):
201
+ type_scores[QuestionType.WEB_RESEARCH] = type_scores.get(QuestionType.WEB_RESEARCH, 0) + 3 # Increased from 2
202
+ type_scores[QuestionType.MATHEMATICAL] = type_scores.get(QuestionType.MATHEMATICAL, 0) + 3 # Increased from 2
203
+
204
+ # Detect factual research questions (boost web research)
205
+ if any(pattern in question_lower for pattern in ['who is', 'who was', 'who did', 'what is', 'when did', 'where', 'which']):
206
  type_scores[QuestionType.WEB_RESEARCH] = type_scores.get(QuestionType.WEB_RESEARCH, 0) + 2
207
+
208
+ # Detect image/file references
209
+ if any(term in question_lower for term in ['image', 'picture', 'photo', 'file', 'attached', 'provided']):
210
+ type_scores[QuestionType.FILE_PROCESSING] = type_scores.get(QuestionType.FILE_PROCESSING, 0) + 4 # Increased from 3
211
+
212
+ # Detect Wikipedia-specific questions
213
+ if any(term in question_lower for term in ['wikipedia', 'featured article', 'english wikipedia']):
214
+ type_scores[QuestionType.WIKIPEDIA] = type_scores.get(QuestionType.WIKIPEDIA, 0) + 4
215
 
216
  # Add detected types based on scores
217
  for qtype, score in type_scores.items():
src/app.py CHANGED
@@ -891,9 +891,14 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
891
  logger.info(f"📤 Submitting {len(answers_payload)} answers to: {submit_url}")
892
  try:
893
  response = requests.post(submit_url, json=submission_data, timeout=120)
 
 
894
  response.raise_for_status()
895
  result_data = response.json()
896
 
 
 
 
897
  # Calculate execution time
898
  execution_time = time.time() - start_time
899
 
@@ -1536,9 +1541,14 @@ Please log in to access GAIA evaluation with Qwen models and LangGraph workflow.
1536
  logger.info(f"📤 Submitting {len(answers_payload)} answers to: {submit_url}")
1537
  try:
1538
  response = requests.post(submit_url, json=submission_data, timeout=120)
 
 
1539
  response.raise_for_status()
1540
  result_data = response.json()
1541
 
 
 
 
1542
  # Calculate execution time
1543
  execution_time = time.time() - start_time
1544
 
 
891
  logger.info(f"📤 Submitting {len(answers_payload)} answers to: {submit_url}")
892
  try:
893
  response = requests.post(submit_url, json=submission_data, timeout=120)
894
+ logger.info(f"📨 Unit 4 API response status: {response.status_code}")
895
+
896
  response.raise_for_status()
897
  result_data = response.json()
898
 
899
+ # Log the actual response for debugging
900
+ logger.info(f"📊 Unit 4 API response data: {result_data}")
901
+
902
  # Calculate execution time
903
  execution_time = time.time() - start_time
904
 
 
1541
  logger.info(f"📤 Submitting {len(answers_payload)} answers to: {submit_url}")
1542
  try:
1543
  response = requests.post(submit_url, json=submission_data, timeout=120)
1544
+ logger.info(f"📨 Unit 4 API response status: {response.status_code}")
1545
+
1546
  response.raise_for_status()
1547
  result_data = response.json()
1548
 
1549
+ # Log the actual response for debugging
1550
+ logger.info(f"📊 Unit 4 API response data: {result_data}")
1551
+
1552
  # Calculate execution time
1553
  execution_time = time.time() - start_time
1554
 
src/tools/web_search_tool.py CHANGED
@@ -227,45 +227,17 @@ class WebSearchTool(BaseTool):
227
 
228
  def _search_with_duckduckgo(self, query: str, limit: int = 5, extract_content: bool = False) -> Dict[str, Any]:
229
  """
230
- Search using DuckDuckGo - primary search engine
231
  """
232
  try:
233
  logger.info(f"🦆 DuckDuckGo search for: {query}")
234
 
235
- # Add retry logic for DuckDuckGo rate limiting
236
- max_retries = 3
237
- retry_delay = 2
238
 
239
- for attempt in range(max_retries):
240
- try:
241
- # Use DuckDuckGo text search
242
- ddg_results = list(self.ddgs.text(query, max_results=min(limit, 10)))
243
-
244
- if not ddg_results:
245
- if attempt < max_retries - 1:
246
- logger.warning(f"DuckDuckGo returned no results, retrying in {retry_delay}s...")
247
- time.sleep(retry_delay)
248
- retry_delay *= 2
249
- continue
250
- else:
251
- logger.warning("DuckDuckGo returned no results after retries")
252
- # Fall back to other search engines
253
- return self._search_with_fallback(query, limit)
254
-
255
- break
256
-
257
- except Exception as e:
258
- if "rate limit" in str(e).lower() or "429" in str(e):
259
- if attempt < max_retries - 1:
260
- logger.warning(f"DuckDuckGo rate limited, retrying in {retry_delay}s...")
261
- time.sleep(retry_delay)
262
- retry_delay *= 2
263
- continue
264
- else:
265
- logger.warning("DuckDuckGo rate limited after retries, using fallback")
266
- return self._search_with_fallback(query, limit)
267
- else:
268
- raise
269
 
270
  # Process DuckDuckGo results
271
  results = []
@@ -299,8 +271,8 @@ class WebSearchTool(BaseTool):
299
  }
300
 
301
  except Exception as e:
302
- logger.error(f"DuckDuckGo search error: {e}")
303
- # Fall back to other search engines
304
  return self._search_with_fallback(query, limit)
305
 
306
  def _search_with_fallback(self, query: str, limit: int) -> Dict[str, Any]:
 
227
 
228
  def _search_with_duckduckgo(self, query: str, limit: int = 5, extract_content: bool = False) -> Dict[str, Any]:
229
  """
230
+ Search using DuckDuckGo - primary search engine with improved error handling
231
  """
232
  try:
233
  logger.info(f"🦆 DuckDuckGo search for: {query}")
234
 
235
+ # Use DuckDuckGo text search - fail fast if there are issues
236
+ ddg_results = list(self.ddgs.text(query, max_results=min(limit, 10)))
 
237
 
238
+ if not ddg_results:
239
+ logger.warning("DuckDuckGo returned no results")
240
+ return self._search_with_fallback(query, limit)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
 
242
  # Process DuckDuckGo results
243
  results = []
 
271
  }
272
 
273
  except Exception as e:
274
+ logger.warning(f"DuckDuckGo search failed: {str(e)[:100]}")
275
+ # Fall back to other search engines immediately
276
  return self._search_with_fallback(query, limit)
277
 
278
  def _search_with_fallback(self, query: str, limit: int) -> Dict[str, Any]: