Chris commited on
Commit
5a03810
Β·
1 Parent(s): 6c60f72

Final 7.2.3

Browse files
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
  title: GAIA Agent System
3
  emoji: πŸ€–
4
- colorFrom: indigo
5
- colorTo: indigo
6
  sdk: gradio
7
  sdk_version: 5.25.2
8
  app_file: ./src/app.py
 
1
  ---
2
  title: GAIA Agent System
3
  emoji: πŸ€–
4
+ colorFrom: yellow
5
+ colorTo: pink
6
  sdk: gradio
7
  sdk_version: 5.25.2
8
  app_file: ./src/app.py
src/agents/__pycache__/web_researcher.cpython-310.pyc CHANGED
Binary files a/src/agents/__pycache__/web_researcher.cpython-310.pyc and b/src/agents/__pycache__/web_researcher.cpython-310.pyc differ
 
src/agents/web_researcher.py CHANGED
@@ -445,21 +445,35 @@ class WebResearchAgent:
445
  if len(phrase.strip()) > 0:
446
  priority_terms.append(phrase.strip())
447
 
448
- # Extract proper nouns (capitalized words)
 
 
 
449
  proper_nouns = []
450
  for word in question.split():
451
  clean_word = re.sub(r'[^\w]', '', word)
452
- if clean_word and clean_word[0].isupper() and len(clean_word) > 1:
 
 
 
453
  proper_nouns.append(clean_word)
454
 
455
- # Extract years (4-digit numbers)
456
- years = re.findall(r'\b(19|20)\d{2}\b', question)
457
-
458
- # Extract other important numbers (but not random ones)
459
- important_numbers = re.findall(r'\b\d{1,4}\b', question)
460
- # Filter out years and common numbers from important numbers to avoid duplication
461
- common_numbers = {'19', '20', '1', '2', '3', '4', '5', '10'} # Filter out very common numbers
462
- important_numbers = [num for num in important_numbers if num not in years and num not in common_numbers]
 
 
 
 
 
 
 
 
463
 
464
  # Build search terms with priority
465
  search_terms = []
@@ -492,7 +506,7 @@ class WebResearchAgent:
492
 
493
  # Add a few important numbers if space allows
494
  if len(' '.join(search_terms)) < max_length - 10:
495
- search_terms.extend(important_numbers[:2])
496
 
497
  # Join and clean up
498
  search_query = ' '.join(search_terms)
 
445
  if len(phrase.strip()) > 0:
446
  priority_terms.append(phrase.strip())
447
 
448
+ # Extract years (4-digit numbers) - capture full years, not just prefixes
449
+ years = re.findall(r'\b(?:19|20)\d{2}\b', question) # Changed from capturing group to full match
450
+
451
+ # Extract proper nouns (capitalized words) - exclude numbers
452
  proper_nouns = []
453
  for word in question.split():
454
  clean_word = re.sub(r'[^\w]', '', word)
455
+ if (clean_word and
456
+ clean_word[0].isupper() and
457
+ len(clean_word) > 1 and
458
+ not clean_word.isdigit()): # Exclude pure numbers
459
  proper_nouns.append(clean_word)
460
 
461
+ # Extract other meaningful numbers (but be very selective)
462
+ # Only include numbers that are likely meaningful (dates, counts, etc.)
463
+ meaningful_numbers = []
464
+ number_matches = re.findall(r'\b\d{1,4}\b', question)
465
+ for num in number_matches:
466
+ # Skip very common/meaningless numbers and years already captured
467
+ if (num not in ['1', '2', '3', '4', '5', '10', '20', '19', '21', '22', '23', '24', '25'] and
468
+ num not in years and
469
+ len(num) > 1): # Require at least 2 digits for meaningful numbers
470
+ # Only include if it appears in a meaningful context
471
+ if any(context in question.lower() for context in [
472
+ f'{num} albums', f'{num} songs', f'{num} years', f'{num} people',
473
+ f'{num} times', f'{num} days', f'{num} months', f'episode {num}',
474
+ f'season {num}', f'volume {num}', f'part {num}'
475
+ ]):
476
+ meaningful_numbers.append(num)
477
 
478
  # Build search terms with priority
479
  search_terms = []
 
506
 
507
  # Add a few important numbers if space allows
508
  if len(' '.join(search_terms)) < max_length - 10:
509
+ search_terms.extend(meaningful_numbers[:2])
510
 
511
  # Join and clean up
512
  search_query = ' '.join(search_terms)
src/models/__pycache__/qwen_client.cpython-310.pyc CHANGED
Binary files a/src/models/__pycache__/qwen_client.cpython-310.pyc and b/src/models/__pycache__/qwen_client.cpython-310.pyc differ
 
src/production_deployment_guide.md CHANGED
@@ -312,3 +312,4 @@ With proper deployment and authentication:
312
  - **Deployment**: Ready for immediate HuggingFace Space deployment
313
 
314
  **The GAIA Agent is now a focused, high-performance system using proper AI models and multi-agent orchestration!** πŸŽ‰
 
 
312
  - **Deployment**: Ready for immediate HuggingFace Space deployment
313
 
314
  **The GAIA Agent is now a focused, high-performance system using proper AI models and multi-agent orchestration!** πŸŽ‰
315
+
src/tools/__pycache__/final_answer_tool.cpython-310.pyc CHANGED
Binary files a/src/tools/__pycache__/final_answer_tool.cpython-310.pyc and b/src/tools/__pycache__/final_answer_tool.cpython-310.pyc differ
 
src/tools/__pycache__/web_search_tool.cpython-310.pyc CHANGED
Binary files a/src/tools/__pycache__/web_search_tool.cpython-310.pyc and b/src/tools/__pycache__/web_search_tool.cpython-310.pyc differ
 
src/tools/final_answer_tool.py CHANGED
@@ -55,7 +55,7 @@ class FinalAnswerTool:
55
  }
56
 
57
  # Parse and clean the extracted answer
58
- extracted_answer = self._clean_answer(result.response, question_type)
59
 
60
  # Validate answer format
61
  validation_result = self._validate_answer(extracted_answer, question_type)
@@ -141,7 +141,7 @@ Extract the precise answer NOW:"""
141
 
142
  return base_prompt
143
 
144
- def _clean_answer(self, raw_answer: str, question_type: str) -> str:
145
  """Clean and format the extracted answer"""
146
 
147
  # Remove common unwanted prefixes/suffixes
 
55
  }
56
 
57
  # Parse and clean the extracted answer
58
+ extracted_answer = self._clean_answer(result.response, question, question_type)
59
 
60
  # Validate answer format
61
  validation_result = self._validate_answer(extracted_answer, question_type)
 
141
 
142
  return base_prompt
143
 
144
+ def _clean_answer(self, raw_answer: str, question: str, question_type: str) -> str:
145
  """Clean and format the extracted answer"""
146
 
147
  # Remove common unwanted prefixes/suffixes
src/tools/web_search_tool.py CHANGED
@@ -229,13 +229,27 @@ class WebSearchTool(BaseTool):
229
 
230
  def _search_with_duckduckgo(self, query: str, limit: int = 5, extract_content: bool = False) -> Dict[str, Any]:
231
  """
232
- Search using DuckDuckGo - primary search engine with improved error handling
233
  """
234
  try:
235
  logger.info(f"πŸ¦† DuckDuckGo search for: {query}")
236
 
237
- # Use DuckDuckGo text search - fail fast if there are issues
238
- ddg_results = list(self.ddgs.text(query, max_results=min(limit, 10)))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239
 
240
  if not ddg_results:
241
  logger.warning("DuckDuckGo returned no results")
@@ -264,7 +278,10 @@ class WebSearchTool(BaseTool):
264
 
265
  except Exception as e:
266
  logger.warning(f"DuckDuckGo search failed: {str(e)}")
267
- # Don't log the full exception details to avoid spam
 
 
 
268
  return self._search_with_fallback(query, limit)
269
 
270
  def _search_with_fallback(self, query: str, limit: int = 5) -> Dict[str, Any]:
@@ -304,7 +321,7 @@ class WebSearchTool(BaseTool):
304
  # Fall back to Wikipedia search
305
  logger.info("πŸ“š Wikipedia search for: " + query)
306
  try:
307
- wiki_results = self._search_wikipedia(query, limit)
308
  if wiki_results and wiki_results.get('success'):
309
  logger.info(f"βœ… Wikipedia found {wiki_results.get('count', 0)} results")
310
  return wiki_results
 
229
 
230
  def _search_with_duckduckgo(self, query: str, limit: int = 5, extract_content: bool = False) -> Dict[str, Any]:
231
  """
232
+ Search using DuckDuckGo - primary search engine with improved error handling and rate limiting
233
  """
234
  try:
235
  logger.info(f"πŸ¦† DuckDuckGo search for: {query}")
236
 
237
+ # Add small delay to avoid rate limiting
238
+ time.sleep(0.5)
239
+
240
+ # Use DuckDuckGo text search with retry logic
241
+ max_retries = 2
242
+ for attempt in range(max_retries):
243
+ try:
244
+ ddg_results = list(self.ddgs.text(query, max_results=min(limit, 10)))
245
+ break
246
+ except Exception as retry_error:
247
+ if attempt < max_retries - 1:
248
+ logger.warning(f"DuckDuckGo attempt {attempt + 1} failed, retrying in {2 ** attempt}s: {retry_error}")
249
+ time.sleep(2 ** attempt) # Exponential backoff
250
+ continue
251
+ else:
252
+ raise retry_error
253
 
254
  if not ddg_results:
255
  logger.warning("DuckDuckGo returned no results")
 
278
 
279
  except Exception as e:
280
  logger.warning(f"DuckDuckGo search failed: {str(e)}")
281
+ # Check if it's a rate limiting error and add longer delay
282
+ if "ratelimit" in str(e).lower() or "429" in str(e) or "202" in str(e):
283
+ logger.warning("Rate limiting detected, adding delay before fallback")
284
+ time.sleep(2.0)
285
  return self._search_with_fallback(query, limit)
286
 
287
  def _search_with_fallback(self, query: str, limit: int = 5) -> Dict[str, Any]:
 
321
  # Fall back to Wikipedia search
322
  logger.info("πŸ“š Wikipedia search for: " + query)
323
  try:
324
+ wiki_results = self._search_with_wikipedia(query, limit)
325
  if wiki_results and wiki_results.get('success'):
326
  logger.info(f"βœ… Wikipedia found {wiki_results.get('count', 0)} results")
327
  return wiki_results