Chris
commited on
Commit
Β·
5a03810
1
Parent(s):
6c60f72
Final 7.2.3
Browse files- README.md +2 -2
- src/agents/__pycache__/web_researcher.cpython-310.pyc +0 -0
- src/agents/web_researcher.py +25 -11
- src/models/__pycache__/qwen_client.cpython-310.pyc +0 -0
- src/production_deployment_guide.md +1 -0
- src/tools/__pycache__/final_answer_tool.cpython-310.pyc +0 -0
- src/tools/__pycache__/web_search_tool.cpython-310.pyc +0 -0
- src/tools/final_answer_tool.py +2 -2
- src/tools/web_search_tool.py +22 -5
README.md
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
---
|
| 2 |
title: GAIA Agent System
|
| 3 |
emoji: π€
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: 5.25.2
|
| 8 |
app_file: ./src/app.py
|
|
|
|
| 1 |
---
|
| 2 |
title: GAIA Agent System
|
| 3 |
emoji: π€
|
| 4 |
+
colorFrom: yellow
|
| 5 |
+
colorTo: pink
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: 5.25.2
|
| 8 |
app_file: ./src/app.py
|
src/agents/__pycache__/web_researcher.cpython-310.pyc
CHANGED
|
Binary files a/src/agents/__pycache__/web_researcher.cpython-310.pyc and b/src/agents/__pycache__/web_researcher.cpython-310.pyc differ
|
|
|
src/agents/web_researcher.py
CHANGED
|
@@ -445,21 +445,35 @@ class WebResearchAgent:
|
|
| 445 |
if len(phrase.strip()) > 0:
|
| 446 |
priority_terms.append(phrase.strip())
|
| 447 |
|
| 448 |
-
# Extract
|
|
|
|
|
|
|
|
|
|
| 449 |
proper_nouns = []
|
| 450 |
for word in question.split():
|
| 451 |
clean_word = re.sub(r'[^\w]', '', word)
|
| 452 |
-
if clean_word and
|
|
|
|
|
|
|
|
|
|
| 453 |
proper_nouns.append(clean_word)
|
| 454 |
|
| 455 |
-
# Extract
|
| 456 |
-
|
| 457 |
-
|
| 458 |
-
|
| 459 |
-
|
| 460 |
-
|
| 461 |
-
|
| 462 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 463 |
|
| 464 |
# Build search terms with priority
|
| 465 |
search_terms = []
|
|
@@ -492,7 +506,7 @@ class WebResearchAgent:
|
|
| 492 |
|
| 493 |
# Add a few important numbers if space allows
|
| 494 |
if len(' '.join(search_terms)) < max_length - 10:
|
| 495 |
-
search_terms.extend(
|
| 496 |
|
| 497 |
# Join and clean up
|
| 498 |
search_query = ' '.join(search_terms)
|
|
|
|
| 445 |
if len(phrase.strip()) > 0:
|
| 446 |
priority_terms.append(phrase.strip())
|
| 447 |
|
| 448 |
+
# Extract years (4-digit numbers) - capture full years, not just prefixes
|
| 449 |
+
years = re.findall(r'\b(?:19|20)\d{2}\b', question) # Changed from capturing group to full match
|
| 450 |
+
|
| 451 |
+
# Extract proper nouns (capitalized words) - exclude numbers
|
| 452 |
proper_nouns = []
|
| 453 |
for word in question.split():
|
| 454 |
clean_word = re.sub(r'[^\w]', '', word)
|
| 455 |
+
if (clean_word and
|
| 456 |
+
clean_word[0].isupper() and
|
| 457 |
+
len(clean_word) > 1 and
|
| 458 |
+
not clean_word.isdigit()): # Exclude pure numbers
|
| 459 |
proper_nouns.append(clean_word)
|
| 460 |
|
| 461 |
+
# Extract other meaningful numbers (but be very selective)
|
| 462 |
+
# Only include numbers that are likely meaningful (dates, counts, etc.)
|
| 463 |
+
meaningful_numbers = []
|
| 464 |
+
number_matches = re.findall(r'\b\d{1,4}\b', question)
|
| 465 |
+
for num in number_matches:
|
| 466 |
+
# Skip very common/meaningless numbers and years already captured
|
| 467 |
+
if (num not in ['1', '2', '3', '4', '5', '10', '20', '19', '21', '22', '23', '24', '25'] and
|
| 468 |
+
num not in years and
|
| 469 |
+
len(num) > 1): # Require at least 2 digits for meaningful numbers
|
| 470 |
+
# Only include if it appears in a meaningful context
|
| 471 |
+
if any(context in question.lower() for context in [
|
| 472 |
+
f'{num} albums', f'{num} songs', f'{num} years', f'{num} people',
|
| 473 |
+
f'{num} times', f'{num} days', f'{num} months', f'episode {num}',
|
| 474 |
+
f'season {num}', f'volume {num}', f'part {num}'
|
| 475 |
+
]):
|
| 476 |
+
meaningful_numbers.append(num)
|
| 477 |
|
| 478 |
# Build search terms with priority
|
| 479 |
search_terms = []
|
|
|
|
| 506 |
|
| 507 |
# Add a few important numbers if space allows
|
| 508 |
if len(' '.join(search_terms)) < max_length - 10:
|
| 509 |
+
search_terms.extend(meaningful_numbers[:2])
|
| 510 |
|
| 511 |
# Join and clean up
|
| 512 |
search_query = ' '.join(search_terms)
|
src/models/__pycache__/qwen_client.cpython-310.pyc
CHANGED
|
Binary files a/src/models/__pycache__/qwen_client.cpython-310.pyc and b/src/models/__pycache__/qwen_client.cpython-310.pyc differ
|
|
|
src/production_deployment_guide.md
CHANGED
|
@@ -312,3 +312,4 @@ With proper deployment and authentication:
|
|
| 312 |
- **Deployment**: Ready for immediate HuggingFace Space deployment
|
| 313 |
|
| 314 |
**The GAIA Agent is now a focused, high-performance system using proper AI models and multi-agent orchestration!** π
|
|
|
|
|
|
| 312 |
- **Deployment**: Ready for immediate HuggingFace Space deployment
|
| 313 |
|
| 314 |
**The GAIA Agent is now a focused, high-performance system using proper AI models and multi-agent orchestration!** π
|
| 315 |
+
|
src/tools/__pycache__/final_answer_tool.cpython-310.pyc
CHANGED
|
Binary files a/src/tools/__pycache__/final_answer_tool.cpython-310.pyc and b/src/tools/__pycache__/final_answer_tool.cpython-310.pyc differ
|
|
|
src/tools/__pycache__/web_search_tool.cpython-310.pyc
CHANGED
|
Binary files a/src/tools/__pycache__/web_search_tool.cpython-310.pyc and b/src/tools/__pycache__/web_search_tool.cpython-310.pyc differ
|
|
|
src/tools/final_answer_tool.py
CHANGED
|
@@ -55,7 +55,7 @@ class FinalAnswerTool:
|
|
| 55 |
}
|
| 56 |
|
| 57 |
# Parse and clean the extracted answer
|
| 58 |
-
extracted_answer = self._clean_answer(result.response, question_type)
|
| 59 |
|
| 60 |
# Validate answer format
|
| 61 |
validation_result = self._validate_answer(extracted_answer, question_type)
|
|
@@ -141,7 +141,7 @@ Extract the precise answer NOW:"""
|
|
| 141 |
|
| 142 |
return base_prompt
|
| 143 |
|
| 144 |
-
def _clean_answer(self, raw_answer: str, question_type: str) -> str:
|
| 145 |
"""Clean and format the extracted answer"""
|
| 146 |
|
| 147 |
# Remove common unwanted prefixes/suffixes
|
|
|
|
| 55 |
}
|
| 56 |
|
| 57 |
# Parse and clean the extracted answer
|
| 58 |
+
extracted_answer = self._clean_answer(result.response, question, question_type)
|
| 59 |
|
| 60 |
# Validate answer format
|
| 61 |
validation_result = self._validate_answer(extracted_answer, question_type)
|
|
|
|
| 141 |
|
| 142 |
return base_prompt
|
| 143 |
|
| 144 |
+
def _clean_answer(self, raw_answer: str, question: str, question_type: str) -> str:
|
| 145 |
"""Clean and format the extracted answer"""
|
| 146 |
|
| 147 |
# Remove common unwanted prefixes/suffixes
|
src/tools/web_search_tool.py
CHANGED
|
@@ -229,13 +229,27 @@ class WebSearchTool(BaseTool):
|
|
| 229 |
|
| 230 |
def _search_with_duckduckgo(self, query: str, limit: int = 5, extract_content: bool = False) -> Dict[str, Any]:
|
| 231 |
"""
|
| 232 |
-
Search using DuckDuckGo - primary search engine with improved error handling
|
| 233 |
"""
|
| 234 |
try:
|
| 235 |
logger.info(f"π¦ DuckDuckGo search for: {query}")
|
| 236 |
|
| 237 |
-
#
|
| 238 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 239 |
|
| 240 |
if not ddg_results:
|
| 241 |
logger.warning("DuckDuckGo returned no results")
|
|
@@ -264,7 +278,10 @@ class WebSearchTool(BaseTool):
|
|
| 264 |
|
| 265 |
except Exception as e:
|
| 266 |
logger.warning(f"DuckDuckGo search failed: {str(e)}")
|
| 267 |
-
#
|
|
|
|
|
|
|
|
|
|
| 268 |
return self._search_with_fallback(query, limit)
|
| 269 |
|
| 270 |
def _search_with_fallback(self, query: str, limit: int = 5) -> Dict[str, Any]:
|
|
@@ -304,7 +321,7 @@ class WebSearchTool(BaseTool):
|
|
| 304 |
# Fall back to Wikipedia search
|
| 305 |
logger.info("π Wikipedia search for: " + query)
|
| 306 |
try:
|
| 307 |
-
wiki_results = self.
|
| 308 |
if wiki_results and wiki_results.get('success'):
|
| 309 |
logger.info(f"β
Wikipedia found {wiki_results.get('count', 0)} results")
|
| 310 |
return wiki_results
|
|
|
|
| 229 |
|
| 230 |
def _search_with_duckduckgo(self, query: str, limit: int = 5, extract_content: bool = False) -> Dict[str, Any]:
|
| 231 |
"""
|
| 232 |
+
Search using DuckDuckGo - primary search engine with improved error handling and rate limiting
|
| 233 |
"""
|
| 234 |
try:
|
| 235 |
logger.info(f"π¦ DuckDuckGo search for: {query}")
|
| 236 |
|
| 237 |
+
# Add small delay to avoid rate limiting
|
| 238 |
+
time.sleep(0.5)
|
| 239 |
+
|
| 240 |
+
# Use DuckDuckGo text search with retry logic
|
| 241 |
+
max_retries = 2
|
| 242 |
+
for attempt in range(max_retries):
|
| 243 |
+
try:
|
| 244 |
+
ddg_results = list(self.ddgs.text(query, max_results=min(limit, 10)))
|
| 245 |
+
break
|
| 246 |
+
except Exception as retry_error:
|
| 247 |
+
if attempt < max_retries - 1:
|
| 248 |
+
logger.warning(f"DuckDuckGo attempt {attempt + 1} failed, retrying in {2 ** attempt}s: {retry_error}")
|
| 249 |
+
time.sleep(2 ** attempt) # Exponential backoff
|
| 250 |
+
continue
|
| 251 |
+
else:
|
| 252 |
+
raise retry_error
|
| 253 |
|
| 254 |
if not ddg_results:
|
| 255 |
logger.warning("DuckDuckGo returned no results")
|
|
|
|
| 278 |
|
| 279 |
except Exception as e:
|
| 280 |
logger.warning(f"DuckDuckGo search failed: {str(e)}")
|
| 281 |
+
# Check if it's a rate limiting error and add longer delay
|
| 282 |
+
if "ratelimit" in str(e).lower() or "429" in str(e) or "202" in str(e):
|
| 283 |
+
logger.warning("Rate limiting detected, adding delay before fallback")
|
| 284 |
+
time.sleep(2.0)
|
| 285 |
return self._search_with_fallback(query, limit)
|
| 286 |
|
| 287 |
def _search_with_fallback(self, query: str, limit: int = 5) -> Dict[str, Any]:
|
|
|
|
| 321 |
# Fall back to Wikipedia search
|
| 322 |
logger.info("π Wikipedia search for: " + query)
|
| 323 |
try:
|
| 324 |
+
wiki_results = self._search_with_wikipedia(query, limit)
|
| 325 |
if wiki_results and wiki_results.get('success'):
|
| 326 |
logger.info(f"β
Wikipedia found {wiki_results.get('count', 0)} results")
|
| 327 |
return wiki_results
|