Spaces:
Sleeping
Sleeping
Chris
commited on
Commit
·
83178da
1
Parent(s):
3a3e679
Final 5.8.3
Browse files- src/__pycache__/app.cpython-310.pyc +0 -0
- src/app.py +344 -41
- src/production_deployment_guide.md +27 -4
- src/tools/__pycache__/web_search_tool.cpython-310.pyc +0 -0
- src/tools/web_search_tool.py +73 -27
src/__pycache__/app.cpython-310.pyc
CHANGED
|
Binary files a/src/__pycache__/app.cpython-310.pyc and b/src/__pycache__/app.cpython-310.pyc differ
|
|
|
src/app.py
CHANGED
|
@@ -13,6 +13,9 @@ import pandas as pd
|
|
| 13 |
from typing import Optional, Tuple, Dict
|
| 14 |
import tempfile
|
| 15 |
from pathlib import Path
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
# Configure logging
|
| 18 |
logging.basicConfig(level=logging.INFO)
|
|
@@ -25,11 +28,207 @@ from models.qwen_client import QwenClient
|
|
| 25 |
# Constants for Unit 4 API
|
| 26 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
class GAIAAgentApp:
|
| 29 |
"""Production GAIA Agent Application with Unit 4 API integration"""
|
| 30 |
|
| 31 |
def __init__(self, hf_token: Optional[str] = None):
|
| 32 |
"""Initialize the application with optional HF token"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
try:
|
| 34 |
# Try main QwenClient first
|
| 35 |
from models.qwen_client import QwenClient
|
|
@@ -39,23 +238,30 @@ class GAIAAgentApp:
|
|
| 39 |
# Test if client is working
|
| 40 |
test_result = self.llm_client.generate("Test", max_tokens=5)
|
| 41 |
if not test_result.success:
|
| 42 |
-
logger.
|
| 43 |
raise Exception("Main client not working")
|
| 44 |
|
| 45 |
self.initialized = True
|
| 46 |
logger.info("✅ GAIA Agent system initialized with main client")
|
| 47 |
|
| 48 |
except Exception as e:
|
| 49 |
-
logger.
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
self.initialized = False
|
| 60 |
|
| 61 |
@classmethod
|
|
@@ -92,7 +298,7 @@ class GAIAAgentApp:
|
|
| 92 |
"""
|
| 93 |
|
| 94 |
if not self.initialized:
|
| 95 |
-
return "❌ System not initialized", "
|
| 96 |
|
| 97 |
if not question.strip():
|
| 98 |
return "❌ Please provide a question", "", ""
|
|
@@ -302,6 +508,32 @@ def check_oauth_scopes(oauth_token: str) -> Dict[str, any]:
|
|
| 302 |
|
| 303 |
def format_auth_status(profile: gr.OAuthProfile | None) -> str:
|
| 304 |
"""Format authentication status for display in UI"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 305 |
if not profile:
|
| 306 |
return """
|
| 307 |
### 🔐 Authentication Status: Not Logged In
|
|
@@ -311,6 +543,8 @@ Please log in to access GAIA evaluation features.
|
|
| 311 |
**What you can do:**
|
| 312 |
- ✅ Manual question testing (limited functionality)
|
| 313 |
- ❌ Official GAIA benchmark evaluation (requires login)
|
|
|
|
|
|
|
| 314 |
"""
|
| 315 |
|
| 316 |
username = profile.username
|
|
@@ -360,7 +594,7 @@ Please log in to access GAIA evaluation features.
|
|
| 360 |
status_parts.extend([
|
| 361 |
"",
|
| 362 |
"💡 **Note**: Your OAuth token has limited scopes (common with Gradio OAuth).",
|
| 363 |
-
"
|
| 364 |
])
|
| 365 |
|
| 366 |
return "\n".join(status_parts)
|
|
@@ -368,18 +602,32 @@ Please log in to access GAIA evaluation features.
|
|
| 368 |
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
| 369 |
"""
|
| 370 |
Fetches all questions from Unit 4 API, runs the GAIA Agent on them, submits all answers,
|
| 371 |
-
and displays the results. Also returns updated authentication status.
|
| 372 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 373 |
# Get authentication status for display
|
| 374 |
auth_status = format_auth_status(profile)
|
| 375 |
|
| 376 |
# Get space info for code submission
|
| 377 |
space_id = os.getenv("SPACE_ID")
|
| 378 |
|
| 379 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 380 |
username = f"{profile.username}"
|
| 381 |
oauth_token = getattr(profile, 'oauth_token', None) or getattr(profile, 'token', None)
|
| 382 |
-
logger.info(f"User logged in: {username},
|
| 383 |
|
| 384 |
# Check if OAuth token has sufficient scopes
|
| 385 |
if oauth_token:
|
|
@@ -397,29 +645,27 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
| 397 |
|
| 398 |
except Exception as e:
|
| 399 |
logger.warning(f"⚠️ Could not validate OAuth token: {e}")
|
| 400 |
-
|
| 401 |
else:
|
| 402 |
-
logger.info("User not logged in.")
|
| 403 |
-
return "Please
|
|
|
|
|
|
|
|
|
|
| 404 |
|
| 405 |
api_url = DEFAULT_API_URL
|
| 406 |
questions_url = f"{api_url}/questions"
|
| 407 |
submit_url = f"{api_url}/submit"
|
| 408 |
|
| 409 |
-
# 1. Instantiate GAIA Agent with
|
| 410 |
try:
|
| 411 |
-
|
| 412 |
-
|
| 413 |
-
agent = GAIAAgentApp.create_with_oauth_token(oauth_token)
|
| 414 |
-
else:
|
| 415 |
-
logger.info("Creating GAIA Agent with fallback authentication (limited OAuth scopes detected)")
|
| 416 |
-
agent = GAIAAgentApp() # This will automatically fallback to SimpleClient
|
| 417 |
|
| 418 |
if not agent.initialized:
|
| 419 |
-
return "Error: GAIA Agent failed to initialize
|
| 420 |
except Exception as e:
|
| 421 |
logger.error(f"Error instantiating agent: {e}")
|
| 422 |
-
return f"Error initializing GAIA Agent: {e}", None, auth_status
|
| 423 |
|
| 424 |
# Agent code URL
|
| 425 |
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Local Development"
|
|
@@ -433,17 +679,17 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
| 433 |
questions_data = response.json()
|
| 434 |
if not questions_data:
|
| 435 |
logger.error("Fetched questions list is empty.")
|
| 436 |
-
return "Fetched questions list is empty or invalid format.", None, auth_status
|
| 437 |
logger.info(f"Fetched {len(questions_data)} questions.")
|
| 438 |
except requests.exceptions.RequestException as e:
|
| 439 |
logger.error(f"Error fetching questions: {e}")
|
| 440 |
-
return f"Error fetching questions: {e}", None, auth_status
|
| 441 |
except requests.exceptions.JSONDecodeError as e:
|
| 442 |
logger.error(f"Error decoding JSON response from questions endpoint: {e}")
|
| 443 |
-
return f"Error decoding server response for questions: {e}", None, auth_status
|
| 444 |
except Exception as e:
|
| 445 |
logger.error(f"An unexpected error occurred fetching questions: {e}")
|
| 446 |
-
return f"An unexpected error occurred fetching questions: {e}", None, auth_status
|
| 447 |
|
| 448 |
# 3. Run GAIA Agent
|
| 449 |
results_log = []
|
|
@@ -478,7 +724,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
| 478 |
|
| 479 |
if not answers_payload:
|
| 480 |
logger.error("GAIA Agent did not produce any answers to submit.")
|
| 481 |
-
return "GAIA Agent did not produce any answers to submit.", pd.DataFrame(results_log), auth_status
|
| 482 |
|
| 483 |
# 4. Prepare Submission
|
| 484 |
submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
|
|
@@ -491,16 +737,37 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
| 491 |
response = requests.post(submit_url, json=submission_data, timeout=120)
|
| 492 |
response.raise_for_status()
|
| 493 |
result_data = response.json()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 494 |
final_status = (
|
| 495 |
f"🎉 GAIA Agent Submission Successful!\n"
|
| 496 |
f"User: {result_data.get('username')}\n"
|
| 497 |
f"Overall Score: {result_data.get('score', 'N/A')}% "
|
| 498 |
f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
|
| 499 |
-
f"
|
|
|
|
|
|
|
| 500 |
)
|
| 501 |
logger.info("Submission successful.")
|
| 502 |
results_df = pd.DataFrame(results_log)
|
| 503 |
-
return final_status, results_df, auth_status
|
| 504 |
except requests.exceptions.HTTPError as e:
|
| 505 |
error_detail = f"Server responded with status {e.response.status_code}."
|
| 506 |
try:
|
|
@@ -511,22 +778,22 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
| 511 |
status_message = f"Submission Failed: {error_detail}"
|
| 512 |
logger.error(status_message)
|
| 513 |
results_df = pd.DataFrame(results_log)
|
| 514 |
-
return status_message, results_df, auth_status
|
| 515 |
except requests.exceptions.Timeout:
|
| 516 |
status_message = "Submission Failed: The request timed out."
|
| 517 |
logger.error(status_message)
|
| 518 |
results_df = pd.DataFrame(results_log)
|
| 519 |
-
return status_message, results_df, auth_status
|
| 520 |
except requests.exceptions.RequestException as e:
|
| 521 |
status_message = f"Submission Failed: Network error - {e}"
|
| 522 |
logger.error(status_message)
|
| 523 |
results_df = pd.DataFrame(results_log)
|
| 524 |
-
return status_message, results_df, auth_status
|
| 525 |
except Exception as e:
|
| 526 |
status_message = f"An unexpected error occurred during submission: {e}"
|
| 527 |
logger.error(status_message)
|
| 528 |
results_df = pd.DataFrame(results_log)
|
| 529 |
-
return status_message, results_df, auth_status
|
| 530 |
|
| 531 |
def create_interface():
|
| 532 |
"""Create the Gradio interface with both Unit 4 API and manual testing"""
|
|
@@ -864,6 +1131,29 @@ def create_interface():
|
|
| 864 |
label="Questions and GAIA Agent Answers",
|
| 865 |
wrap=True
|
| 866 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 867 |
|
| 868 |
gr.Markdown("---")
|
| 869 |
|
|
@@ -932,9 +1222,22 @@ def create_interface():
|
|
| 932 |
)
|
| 933 |
|
| 934 |
# Event handlers for Unit 4 API
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 935 |
unit4_run_button.click(
|
| 936 |
-
fn=
|
| 937 |
-
outputs=[unit4_status_output, unit4_results_table, auth_status_display
|
|
|
|
| 938 |
)
|
| 939 |
|
| 940 |
# Refresh authentication status
|
|
|
|
| 13 |
from typing import Optional, Tuple, Dict
|
| 14 |
import tempfile
|
| 15 |
from pathlib import Path
|
| 16 |
+
import json
|
| 17 |
+
from datetime import datetime
|
| 18 |
+
import csv
|
| 19 |
|
| 20 |
# Configure logging
|
| 21 |
logging.basicConfig(level=logging.INFO)
|
|
|
|
| 28 |
# Constants for Unit 4 API
|
| 29 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 30 |
|
| 31 |
+
class GAIAResultLogger:
|
| 32 |
+
"""
|
| 33 |
+
Logger for GAIA evaluation results with export functionality
|
| 34 |
+
"""
|
| 35 |
+
|
| 36 |
+
def __init__(self):
|
| 37 |
+
self.results_dir = Path("results")
|
| 38 |
+
self.results_dir.mkdir(exist_ok=True)
|
| 39 |
+
|
| 40 |
+
def log_evaluation_results(self, username: str, questions_data: list, results_log: list,
|
| 41 |
+
final_result: dict, execution_time: float) -> dict:
|
| 42 |
+
"""
|
| 43 |
+
Log complete evaluation results to multiple formats
|
| 44 |
+
Returns paths to generated files
|
| 45 |
+
"""
|
| 46 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 47 |
+
base_filename = f"gaia_evaluation_{username}_{timestamp}"
|
| 48 |
+
|
| 49 |
+
files_created = {}
|
| 50 |
+
|
| 51 |
+
try:
|
| 52 |
+
# 1. CSV Export (for easy sharing)
|
| 53 |
+
csv_path = self.results_dir / f"{base_filename}.csv"
|
| 54 |
+
self._save_csv_results(csv_path, results_log, final_result)
|
| 55 |
+
files_created["csv"] = str(csv_path)
|
| 56 |
+
|
| 57 |
+
# 2. Detailed JSON Export
|
| 58 |
+
json_path = self.results_dir / f"{base_filename}.json"
|
| 59 |
+
detailed_results = self._create_detailed_results(
|
| 60 |
+
username, questions_data, results_log, final_result, execution_time, timestamp
|
| 61 |
+
)
|
| 62 |
+
self._save_json_results(json_path, detailed_results)
|
| 63 |
+
files_created["json"] = str(json_path)
|
| 64 |
+
|
| 65 |
+
# 3. Summary Report
|
| 66 |
+
summary_path = self.results_dir / f"{base_filename}_summary.md"
|
| 67 |
+
self._save_summary_report(summary_path, detailed_results)
|
| 68 |
+
files_created["summary"] = str(summary_path)
|
| 69 |
+
|
| 70 |
+
logger.info(f"✅ Results logged to {len(files_created)} files: {list(files_created.keys())}")
|
| 71 |
+
|
| 72 |
+
except Exception as e:
|
| 73 |
+
logger.error(f"❌ Error logging results: {e}")
|
| 74 |
+
files_created["error"] = str(e)
|
| 75 |
+
|
| 76 |
+
return files_created
|
| 77 |
+
|
| 78 |
+
def _save_csv_results(self, path: Path, results_log: list, final_result: dict):
|
| 79 |
+
"""Save results in CSV format for easy sharing"""
|
| 80 |
+
with open(path, 'w', newline='', encoding='utf-8') as csvfile:
|
| 81 |
+
if not results_log:
|
| 82 |
+
return
|
| 83 |
+
|
| 84 |
+
fieldnames = list(results_log[0].keys()) + ['Correct', 'Score']
|
| 85 |
+
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
| 86 |
+
|
| 87 |
+
# Header
|
| 88 |
+
writer.writeheader()
|
| 89 |
+
|
| 90 |
+
# Add overall results info
|
| 91 |
+
score = final_result.get('score', 'N/A')
|
| 92 |
+
correct_count = final_result.get('correct_count', 'N/A')
|
| 93 |
+
total_attempted = final_result.get('total_attempted', len(results_log))
|
| 94 |
+
|
| 95 |
+
# Write each result
|
| 96 |
+
for i, row in enumerate(results_log):
|
| 97 |
+
row_data = row.copy()
|
| 98 |
+
row_data['Correct'] = 'Unknown' # We don't get individual correct/incorrect from API
|
| 99 |
+
row_data['Score'] = f"{score}% ({correct_count}/{total_attempted})" if i == 0 else ""
|
| 100 |
+
writer.writerow(row_data)
|
| 101 |
+
|
| 102 |
+
def _create_detailed_results(self, username: str, questions_data: list, results_log: list,
|
| 103 |
+
final_result: dict, execution_time: float, timestamp: str) -> dict:
|
| 104 |
+
"""Create comprehensive results dictionary"""
|
| 105 |
+
return {
|
| 106 |
+
"metadata": {
|
| 107 |
+
"username": username,
|
| 108 |
+
"timestamp": timestamp,
|
| 109 |
+
"execution_time_seconds": execution_time,
|
| 110 |
+
"total_questions": len(questions_data),
|
| 111 |
+
"total_processed": len(results_log),
|
| 112 |
+
"system_info": {
|
| 113 |
+
"gradio_version": "4.44.0",
|
| 114 |
+
"python_version": "3.x",
|
| 115 |
+
"space_id": os.getenv("SPACE_ID", "local"),
|
| 116 |
+
"space_host": os.getenv("SPACE_HOST", "local")
|
| 117 |
+
}
|
| 118 |
+
},
|
| 119 |
+
"evaluation_results": {
|
| 120 |
+
"overall_score": final_result.get('score', 'N/A'),
|
| 121 |
+
"correct_count": final_result.get('correct_count', 'N/A'),
|
| 122 |
+
"total_attempted": final_result.get('total_attempted', len(results_log)),
|
| 123 |
+
"success_rate": f"{final_result.get('score', 0)}%",
|
| 124 |
+
"api_message": final_result.get('message', 'No message'),
|
| 125 |
+
"submission_successful": 'score' in final_result
|
| 126 |
+
},
|
| 127 |
+
"question_details": [
|
| 128 |
+
{
|
| 129 |
+
"index": i + 1,
|
| 130 |
+
"task_id": item.get("task_id"),
|
| 131 |
+
"question": item.get("question"),
|
| 132 |
+
"level": item.get("Level", "Unknown"),
|
| 133 |
+
"file_name": item.get("file_name", ""),
|
| 134 |
+
"submitted_answer": next(
|
| 135 |
+
(r["Submitted Answer"] for r in results_log if r.get("Task ID") == item.get("task_id")),
|
| 136 |
+
"No answer"
|
| 137 |
+
),
|
| 138 |
+
"question_length": len(item.get("question", "")),
|
| 139 |
+
"answer_length": len(next(
|
| 140 |
+
(r["Submitted Answer"] for r in results_log if r.get("Task ID") == item.get("task_id")),
|
| 141 |
+
""
|
| 142 |
+
))
|
| 143 |
+
}
|
| 144 |
+
for i, item in enumerate(questions_data)
|
| 145 |
+
],
|
| 146 |
+
"processing_summary": {
|
| 147 |
+
"questions_by_level": self._analyze_questions_by_level(questions_data),
|
| 148 |
+
"questions_with_files": len([q for q in questions_data if q.get("file_name")]),
|
| 149 |
+
"average_question_length": sum(len(q.get("question", "")) for q in questions_data) / len(questions_data) if questions_data else 0,
|
| 150 |
+
"average_answer_length": sum(len(r.get("Submitted Answer", "")) for r in results_log) / len(results_log) if results_log else 0,
|
| 151 |
+
"processing_time_per_question": execution_time / len(results_log) if results_log else 0
|
| 152 |
+
},
|
| 153 |
+
"raw_results_log": results_log,
|
| 154 |
+
"api_response": final_result
|
| 155 |
+
}
|
| 156 |
+
|
| 157 |
+
def _analyze_questions_by_level(self, questions_data: list) -> dict:
|
| 158 |
+
"""Analyze question distribution by level"""
|
| 159 |
+
level_counts = {}
|
| 160 |
+
for q in questions_data:
|
| 161 |
+
level = q.get("Level", "Unknown")
|
| 162 |
+
level_counts[level] = level_counts.get(level, 0) + 1
|
| 163 |
+
return level_counts
|
| 164 |
+
|
| 165 |
+
def _save_json_results(self, path: Path, detailed_results: dict):
|
| 166 |
+
"""Save detailed results in JSON format"""
|
| 167 |
+
with open(path, 'w', encoding='utf-8') as jsonfile:
|
| 168 |
+
json.dump(detailed_results, jsonfile, indent=2, ensure_ascii=False)
|
| 169 |
+
|
| 170 |
+
def _save_summary_report(self, path: Path, detailed_results: dict):
|
| 171 |
+
"""Save human-readable summary report"""
|
| 172 |
+
metadata = detailed_results["metadata"]
|
| 173 |
+
results = detailed_results["evaluation_results"]
|
| 174 |
+
summary = detailed_results["processing_summary"]
|
| 175 |
+
|
| 176 |
+
report = f"""# GAIA Agent Evaluation Report
|
| 177 |
+
|
| 178 |
+
## Summary
|
| 179 |
+
- **User**: {metadata['username']}
|
| 180 |
+
- **Date**: {metadata['timestamp']}
|
| 181 |
+
- **Overall Score**: {results['overall_score']}% ({results['correct_count']}/{results['total_attempted']} correct)
|
| 182 |
+
- **Execution Time**: {metadata['execution_time_seconds']:.2f} seconds
|
| 183 |
+
- **Submission Status**: {'✅ Success' if results['submission_successful'] else '❌ Failed'}
|
| 184 |
+
|
| 185 |
+
## Question Analysis
|
| 186 |
+
- **Total Questions**: {metadata['total_questions']}
|
| 187 |
+
- **Successfully Processed**: {metadata['total_processed']}
|
| 188 |
+
- **Questions with Files**: {summary['questions_with_files']}
|
| 189 |
+
- **Average Question Length**: {summary['average_question_length']:.0f} characters
|
| 190 |
+
- **Average Answer Length**: {summary['average_answer_length']:.0f} characters
|
| 191 |
+
- **Processing Time per Question**: {summary['processing_time_per_question']:.2f} seconds
|
| 192 |
+
|
| 193 |
+
## Questions by Level
|
| 194 |
+
"""
|
| 195 |
+
|
| 196 |
+
for level, count in summary['questions_by_level'].items():
|
| 197 |
+
report += f"- **Level {level}**: {count} questions\n"
|
| 198 |
+
|
| 199 |
+
report += f"""
|
| 200 |
+
## API Response
|
| 201 |
+
{results['api_message']}
|
| 202 |
+
|
| 203 |
+
## System Information
|
| 204 |
+
- **Space ID**: {metadata['system_info']['space_id']}
|
| 205 |
+
- **Space Host**: {metadata['system_info']['space_host']}
|
| 206 |
+
- **Gradio Version**: {metadata['system_info']['gradio_version']}
|
| 207 |
+
|
| 208 |
+
---
|
| 209 |
+
*Report generated automatically by GAIA Agent System*
|
| 210 |
+
"""
|
| 211 |
+
|
| 212 |
+
with open(path, 'w', encoding='utf-8') as f:
|
| 213 |
+
f.write(report)
|
| 214 |
+
|
| 215 |
+
def get_latest_results(self, username: str = None) -> list:
|
| 216 |
+
"""Get list of latest result files"""
|
| 217 |
+
pattern = f"gaia_evaluation_{username}_*" if username else "gaia_evaluation_*"
|
| 218 |
+
files = list(self.results_dir.glob(pattern))
|
| 219 |
+
files.sort(key=lambda x: x.stat().st_mtime, reverse=True)
|
| 220 |
+
return files[:10] # Return 10 most recent
|
| 221 |
+
|
| 222 |
class GAIAAgentApp:
|
| 223 |
"""Production GAIA Agent Application with Unit 4 API integration"""
|
| 224 |
|
| 225 |
def __init__(self, hf_token: Optional[str] = None):
|
| 226 |
"""Initialize the application with optional HF token"""
|
| 227 |
+
|
| 228 |
+
# Priority order: 1) passed hf_token, 2) HF_TOKEN env var
|
| 229 |
+
if not hf_token:
|
| 230 |
+
hf_token = os.getenv("HF_TOKEN")
|
| 231 |
+
|
| 232 |
try:
|
| 233 |
# Try main QwenClient first
|
| 234 |
from models.qwen_client import QwenClient
|
|
|
|
| 238 |
# Test if client is working
|
| 239 |
test_result = self.llm_client.generate("Test", max_tokens=5)
|
| 240 |
if not test_result.success:
|
| 241 |
+
logger.error(f"❌ Main client test failed: {test_result}")
|
| 242 |
raise Exception("Main client not working")
|
| 243 |
|
| 244 |
self.initialized = True
|
| 245 |
logger.info("✅ GAIA Agent system initialized with main client")
|
| 246 |
|
| 247 |
except Exception as e:
|
| 248 |
+
logger.error(f"❌ Main client failed ({e})")
|
| 249 |
+
|
| 250 |
+
# Only fallback to simple client if no HF token is available
|
| 251 |
+
if not hf_token:
|
| 252 |
+
logger.warning("⚠️ No HF token available, trying simple client...")
|
| 253 |
+
try:
|
| 254 |
+
# Fallback to simple client
|
| 255 |
+
from models.simple_client import SimpleClient
|
| 256 |
+
self.llm_client = SimpleClient(hf_token=hf_token)
|
| 257 |
+
self.workflow = SimpleGAIAWorkflow(self.llm_client)
|
| 258 |
+
self.initialized = True
|
| 259 |
+
logger.info("✅ GAIA Agent system initialized with simple client fallback")
|
| 260 |
+
except Exception as fallback_error:
|
| 261 |
+
logger.error(f"❌ Both main and fallback clients failed: {fallback_error}")
|
| 262 |
+
self.initialized = False
|
| 263 |
+
else:
|
| 264 |
+
logger.error("❌ Main client failed despite having HF token - not falling back to simple client")
|
| 265 |
self.initialized = False
|
| 266 |
|
| 267 |
@classmethod
|
|
|
|
| 298 |
"""
|
| 299 |
|
| 300 |
if not self.initialized:
|
| 301 |
+
return "❌ System not initialized", "", ""
|
| 302 |
|
| 303 |
if not question.strip():
|
| 304 |
return "❌ Please provide a question", "", ""
|
|
|
|
| 508 |
|
| 509 |
def format_auth_status(profile: gr.OAuthProfile | None) -> str:
|
| 510 |
"""Format authentication status for display in UI"""
|
| 511 |
+
|
| 512 |
+
# Check for HF_TOKEN first
|
| 513 |
+
hf_token = os.getenv("HF_TOKEN")
|
| 514 |
+
|
| 515 |
+
if hf_token:
|
| 516 |
+
# HF_TOKEN is available - this is the best case scenario
|
| 517 |
+
return """
|
| 518 |
+
### 🎯 Authentication Status: HF_TOKEN Environment Variable
|
| 519 |
+
|
| 520 |
+
**🚀 FULL SYSTEM CAPABILITIES ENABLED**
|
| 521 |
+
|
| 522 |
+
**Authentication Source**: HF_TOKEN environment variable
|
| 523 |
+
**Scopes**: read, inference (full access)
|
| 524 |
+
|
| 525 |
+
**Available Features:**
|
| 526 |
+
- ✅ **Advanced Model Access**: Full Qwen model capabilities (7B/32B/72B)
|
| 527 |
+
- ✅ **High Performance**: 30%+ expected GAIA score
|
| 528 |
+
- ✅ **Complete Pipeline**: All agents and tools fully functional
|
| 529 |
+
- ✅ **Web Research**: Full DuckDuckGo search capabilities
|
| 530 |
+
- ✅ **File Processing**: Complete multi-format file handling
|
| 531 |
+
- ✅ **Manual Testing**: Individual question processing
|
| 532 |
+
- ✅ **Official Evaluation**: GAIA benchmark submission
|
| 533 |
+
|
| 534 |
+
💡 **Status**: Optimal configuration for GAIA benchmark performance.
|
| 535 |
+
"""
|
| 536 |
+
|
| 537 |
if not profile:
|
| 538 |
return """
|
| 539 |
### 🔐 Authentication Status: Not Logged In
|
|
|
|
| 543 |
**What you can do:**
|
| 544 |
- ✅ Manual question testing (limited functionality)
|
| 545 |
- ❌ Official GAIA benchmark evaluation (requires login)
|
| 546 |
+
|
| 547 |
+
**For Best Performance**: Set HF_TOKEN as a Space secret for full capabilities.
|
| 548 |
"""
|
| 549 |
|
| 550 |
username = profile.username
|
|
|
|
| 594 |
status_parts.extend([
|
| 595 |
"",
|
| 596 |
"💡 **Note**: Your OAuth token has limited scopes (common with Gradio OAuth).",
|
| 597 |
+
"For best performance, set HF_TOKEN as a Space secret for full model access."
|
| 598 |
])
|
| 599 |
|
| 600 |
return "\n".join(status_parts)
|
|
|
|
| 602 |
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
| 603 |
"""
|
| 604 |
Fetches all questions from Unit 4 API, runs the GAIA Agent on them, submits all answers,
|
| 605 |
+
and displays the results. Also returns updated authentication status and downloadable files.
|
| 606 |
"""
|
| 607 |
+
start_time = time.time()
|
| 608 |
+
|
| 609 |
+
# Initialize result logger
|
| 610 |
+
result_logger = GAIAResultLogger()
|
| 611 |
+
|
| 612 |
# Get authentication status for display
|
| 613 |
auth_status = format_auth_status(profile)
|
| 614 |
|
| 615 |
# Get space info for code submission
|
| 616 |
space_id = os.getenv("SPACE_ID")
|
| 617 |
|
| 618 |
+
# Priority order for token: 1) HF_TOKEN env var, 2) OAuth token
|
| 619 |
+
hf_token = os.getenv("HF_TOKEN")
|
| 620 |
+
oauth_token = None
|
| 621 |
+
username = "unknown_user"
|
| 622 |
+
|
| 623 |
+
if hf_token:
|
| 624 |
+
logger.info("🎯 Using HF_TOKEN environment variable for authentication")
|
| 625 |
+
oauth_token = hf_token
|
| 626 |
+
username = "hf_token_user"
|
| 627 |
+
elif profile:
|
| 628 |
username = f"{profile.username}"
|
| 629 |
oauth_token = getattr(profile, 'oauth_token', None) or getattr(profile, 'token', None)
|
| 630 |
+
logger.info(f"User logged in: {username}, OAuth token available: {oauth_token is not None}")
|
| 631 |
|
| 632 |
# Check if OAuth token has sufficient scopes
|
| 633 |
if oauth_token:
|
|
|
|
| 645 |
|
| 646 |
except Exception as e:
|
| 647 |
logger.warning(f"⚠️ Could not validate OAuth token: {e}")
|
|
|
|
| 648 |
else:
|
| 649 |
+
logger.info("User not logged in and no HF_TOKEN available.")
|
| 650 |
+
return "Please either login to Hugging Face or set HF_TOKEN environment variable.", None, auth_status, None, None, None
|
| 651 |
+
|
| 652 |
+
if not oauth_token:
|
| 653 |
+
return "No valid authentication token available. Please login or set HF_TOKEN environment variable.", None, auth_status, None, None, None
|
| 654 |
|
| 655 |
api_url = DEFAULT_API_URL
|
| 656 |
questions_url = f"{api_url}/questions"
|
| 657 |
submit_url = f"{api_url}/submit"
|
| 658 |
|
| 659 |
+
# 1. Instantiate GAIA Agent with token
|
| 660 |
try:
|
| 661 |
+
logger.info("🚀 Creating GAIA Agent with authenticated token")
|
| 662 |
+
agent = GAIAAgentApp.create_with_oauth_token(oauth_token)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 663 |
|
| 664 |
if not agent.initialized:
|
| 665 |
+
return "Error: GAIA Agent failed to initialize", None, auth_status, None, None, None
|
| 666 |
except Exception as e:
|
| 667 |
logger.error(f"Error instantiating agent: {e}")
|
| 668 |
+
return f"Error initializing GAIA Agent: {e}", None, auth_status, None, None, None
|
| 669 |
|
| 670 |
# Agent code URL
|
| 671 |
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Local Development"
|
|
|
|
| 679 |
questions_data = response.json()
|
| 680 |
if not questions_data:
|
| 681 |
logger.error("Fetched questions list is empty.")
|
| 682 |
+
return "Fetched questions list is empty or invalid format.", None, auth_status, None, None, None
|
| 683 |
logger.info(f"Fetched {len(questions_data)} questions.")
|
| 684 |
except requests.exceptions.RequestException as e:
|
| 685 |
logger.error(f"Error fetching questions: {e}")
|
| 686 |
+
return f"Error fetching questions: {e}", None, auth_status, None, None, None
|
| 687 |
except requests.exceptions.JSONDecodeError as e:
|
| 688 |
logger.error(f"Error decoding JSON response from questions endpoint: {e}")
|
| 689 |
+
return f"Error decoding server response for questions: {e}", None, auth_status, None, None, None
|
| 690 |
except Exception as e:
|
| 691 |
logger.error(f"An unexpected error occurred fetching questions: {e}")
|
| 692 |
+
return f"An unexpected error occurred fetching questions: {e}", None, auth_status, None, None, None
|
| 693 |
|
| 694 |
# 3. Run GAIA Agent
|
| 695 |
results_log = []
|
|
|
|
| 724 |
|
| 725 |
if not answers_payload:
|
| 726 |
logger.error("GAIA Agent did not produce any answers to submit.")
|
| 727 |
+
return "GAIA Agent did not produce any answers to submit.", pd.DataFrame(results_log), auth_status, None, None, None
|
| 728 |
|
| 729 |
# 4. Prepare Submission
|
| 730 |
submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
|
|
|
|
| 737 |
response = requests.post(submit_url, json=submission_data, timeout=120)
|
| 738 |
response.raise_for_status()
|
| 739 |
result_data = response.json()
|
| 740 |
+
|
| 741 |
+
# Calculate execution time
|
| 742 |
+
execution_time = time.time() - start_time
|
| 743 |
+
|
| 744 |
+
# 6. Log results to files
|
| 745 |
+
logger.info("📝 Logging evaluation results...")
|
| 746 |
+
logged_files = result_logger.log_evaluation_results(
|
| 747 |
+
username=username,
|
| 748 |
+
questions_data=questions_data,
|
| 749 |
+
results_log=results_log,
|
| 750 |
+
final_result=result_data,
|
| 751 |
+
execution_time=execution_time
|
| 752 |
+
)
|
| 753 |
+
|
| 754 |
+
# Prepare download files
|
| 755 |
+
csv_file = logged_files.get("csv")
|
| 756 |
+
json_file = logged_files.get("json")
|
| 757 |
+
summary_file = logged_files.get("summary")
|
| 758 |
+
|
| 759 |
final_status = (
|
| 760 |
f"🎉 GAIA Agent Submission Successful!\n"
|
| 761 |
f"User: {result_data.get('username')}\n"
|
| 762 |
f"Overall Score: {result_data.get('score', 'N/A')}% "
|
| 763 |
f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
|
| 764 |
+
f"Execution Time: {execution_time:.2f} seconds\n"
|
| 765 |
+
f"Message: {result_data.get('message', 'No message received.')}\n\n"
|
| 766 |
+
f"📁 Results saved to {len([f for f in [csv_file, json_file, summary_file] if f])} files for sharing."
|
| 767 |
)
|
| 768 |
logger.info("Submission successful.")
|
| 769 |
results_df = pd.DataFrame(results_log)
|
| 770 |
+
return final_status, results_df, auth_status, csv_file, json_file, summary_file
|
| 771 |
except requests.exceptions.HTTPError as e:
|
| 772 |
error_detail = f"Server responded with status {e.response.status_code}."
|
| 773 |
try:
|
|
|
|
| 778 |
status_message = f"Submission Failed: {error_detail}"
|
| 779 |
logger.error(status_message)
|
| 780 |
results_df = pd.DataFrame(results_log)
|
| 781 |
+
return status_message, results_df, auth_status, None, None, None
|
| 782 |
except requests.exceptions.Timeout:
|
| 783 |
status_message = "Submission Failed: The request timed out."
|
| 784 |
logger.error(status_message)
|
| 785 |
results_df = pd.DataFrame(results_log)
|
| 786 |
+
return status_message, results_df, auth_status, None, None, None
|
| 787 |
except requests.exceptions.RequestException as e:
|
| 788 |
status_message = f"Submission Failed: Network error - {e}"
|
| 789 |
logger.error(status_message)
|
| 790 |
results_df = pd.DataFrame(results_log)
|
| 791 |
+
return status_message, results_df, auth_status, None, None, None
|
| 792 |
except Exception as e:
|
| 793 |
status_message = f"An unexpected error occurred during submission: {e}"
|
| 794 |
logger.error(status_message)
|
| 795 |
results_df = pd.DataFrame(results_log)
|
| 796 |
+
return status_message, results_df, auth_status, None, None, None
|
| 797 |
|
| 798 |
def create_interface():
|
| 799 |
"""Create the Gradio interface with both Unit 4 API and manual testing"""
|
|
|
|
| 1131 |
label="Questions and GAIA Agent Answers",
|
| 1132 |
wrap=True
|
| 1133 |
)
|
| 1134 |
+
|
| 1135 |
+
# Download section
|
| 1136 |
+
gr.Markdown("### 📁 Download Results")
|
| 1137 |
+
gr.Markdown("After evaluation completes, download your results in different formats:")
|
| 1138 |
+
|
| 1139 |
+
with gr.Row():
|
| 1140 |
+
csv_download = gr.File(
|
| 1141 |
+
label="📊 CSV Results",
|
| 1142 |
+
visible=False,
|
| 1143 |
+
interactive=False
|
| 1144 |
+
)
|
| 1145 |
+
|
| 1146 |
+
json_download = gr.File(
|
| 1147 |
+
label="🔍 Detailed JSON",
|
| 1148 |
+
visible=False,
|
| 1149 |
+
interactive=False
|
| 1150 |
+
)
|
| 1151 |
+
|
| 1152 |
+
summary_download = gr.File(
|
| 1153 |
+
label="📋 Summary Report",
|
| 1154 |
+
visible=False,
|
| 1155 |
+
interactive=False
|
| 1156 |
+
)
|
| 1157 |
|
| 1158 |
gr.Markdown("---")
|
| 1159 |
|
|
|
|
| 1222 |
)
|
| 1223 |
|
| 1224 |
# Event handlers for Unit 4 API
|
| 1225 |
+
def handle_evaluation_results(profile):
|
| 1226 |
+
"""Handle evaluation and update download visibility"""
|
| 1227 |
+
results = run_and_submit_all(profile)
|
| 1228 |
+
status, table, auth_status, csv_file, json_file, summary_file = results
|
| 1229 |
+
|
| 1230 |
+
# Update download file visibility and values
|
| 1231 |
+
csv_update = gr.update(value=csv_file, visible=csv_file is not None)
|
| 1232 |
+
json_update = gr.update(value=json_file, visible=json_file is not None)
|
| 1233 |
+
summary_update = gr.update(value=summary_file, visible=summary_file is not None)
|
| 1234 |
+
|
| 1235 |
+
return status, table, auth_status, csv_update, json_update, summary_update
|
| 1236 |
+
|
| 1237 |
unit4_run_button.click(
|
| 1238 |
+
fn=handle_evaluation_results,
|
| 1239 |
+
outputs=[unit4_status_output, unit4_results_table, auth_status_display,
|
| 1240 |
+
csv_download, json_download, summary_download]
|
| 1241 |
)
|
| 1242 |
|
| 1243 |
# Refresh authentication status
|
src/production_deployment_guide.md
CHANGED
|
@@ -143,18 +143,41 @@ suggested_storage: "small"
|
|
| 143 |
|
| 144 |
### 4. Environment Variables (Space Secrets)
|
| 145 |
|
| 146 |
-
|
| 147 |
-
The system uses OAuth authentication in production.
|
| 148 |
|
| 149 |
-
|
| 150 |
|
| 151 |
```bash
|
| 152 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
LANGCHAIN_TRACING_V2=true # Optional: LangSmith tracing
|
| 154 |
LANGCHAIN_API_KEY=your_key_here # Optional: LangSmith API key
|
| 155 |
LANGCHAIN_PROJECT=gaia-agent # Optional: LangSmith project
|
| 156 |
```
|
| 157 |
|
|
|
|
|
|
|
| 158 |
### 5. Authentication Flow in Production
|
| 159 |
|
| 160 |
```python
|
|
|
|
| 143 |
|
| 144 |
### 4. Environment Variables (Space Secrets)
|
| 145 |
|
| 146 |
+
**🎯 CRITICAL: Set HF_TOKEN for Full Model Access**
|
|
|
|
| 147 |
|
| 148 |
+
To get the **real GAIA Agent performance** (not SimpleClient fallback), you **MUST** set `HF_TOKEN` as a Space secret:
|
| 149 |
|
| 150 |
```bash
|
| 151 |
+
# Required for full model access and GAIA performance
|
| 152 |
+
HF_TOKEN=hf_your_token_here # REQUIRED: Your HuggingFace token
|
| 153 |
+
```
|
| 154 |
+
|
| 155 |
+
**How to set HF_TOKEN:**
|
| 156 |
+
1. Go to your Space settings in HuggingFace
|
| 157 |
+
2. Navigate to "Repository secrets"
|
| 158 |
+
3. Add new secret:
|
| 159 |
+
- **Name**: `HF_TOKEN`
|
| 160 |
+
- **Value**: Your HuggingFace token (from [https://huggingface.co/settings/tokens](https://huggingface.co/settings/tokens))
|
| 161 |
+
|
| 162 |
+
⚠️ **IMPORTANT**: Do NOT set `HF_TOKEN` as a regular environment variable - use Space secrets for security.
|
| 163 |
+
|
| 164 |
+
**Token Requirements:**
|
| 165 |
+
- Token must have **`read`** and **`inference`** scopes
|
| 166 |
+
- Generate token at: https://huggingface.co/settings/tokens
|
| 167 |
+
- Select "Fine-grained" token type
|
| 168 |
+
- Enable both scopes for full functionality
|
| 169 |
+
|
| 170 |
+
**Optional environment variables:**
|
| 171 |
+
|
| 172 |
+
```bash
|
| 173 |
+
# Optional: LangSmith tracing (if you want observability)
|
| 174 |
LANGCHAIN_TRACING_V2=true # Optional: LangSmith tracing
|
| 175 |
LANGCHAIN_API_KEY=your_key_here # Optional: LangSmith API key
|
| 176 |
LANGCHAIN_PROJECT=gaia-agent # Optional: LangSmith project
|
| 177 |
```
|
| 178 |
|
| 179 |
+
**⚠️ DO NOT SET**: The system automatically handles OAuth in production when HF_TOKEN is available.
|
| 180 |
+
|
| 181 |
### 5. Authentication Flow in Production
|
| 182 |
|
| 183 |
```python
|
src/tools/__pycache__/web_search_tool.cpython-310.pyc
CHANGED
|
Binary files a/src/tools/__pycache__/web_search_tool.cpython-310.pyc and b/src/tools/__pycache__/web_search_tool.cpython-310.pyc differ
|
|
|
src/tools/web_search_tool.py
CHANGED
|
@@ -88,17 +88,21 @@ class WebSearchTool(BaseTool):
|
|
| 88 |
|
| 89 |
def _search_web(self, query: str, limit: int = 5, extract_content: bool = False) -> Dict[str, Any]:
|
| 90 |
"""
|
| 91 |
-
Search the web using DuckDuckGo with
|
| 92 |
"""
|
| 93 |
-
max_retries = 3
|
| 94 |
-
retry_delay = 1.0
|
| 95 |
|
| 96 |
-
for attempt in range(
|
| 97 |
try:
|
| 98 |
-
logger.info(f"Searching web for: {query} (attempt {attempt + 1}/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 99 |
|
| 100 |
-
# Perform DuckDuckGo search with timeout
|
| 101 |
with DDGS() as ddgs:
|
|
|
|
| 102 |
search_results = list(ddgs.text(
|
| 103 |
keywords=query,
|
| 104 |
max_results=limit,
|
|
@@ -107,10 +111,8 @@ class WebSearchTool(BaseTool):
|
|
| 107 |
))
|
| 108 |
|
| 109 |
if not search_results:
|
| 110 |
-
if attempt <
|
| 111 |
logger.warning(f"No results on attempt {attempt + 1}, retrying...")
|
| 112 |
-
time.sleep(retry_delay)
|
| 113 |
-
retry_delay *= 2 # Exponential backoff
|
| 114 |
continue
|
| 115 |
else:
|
| 116 |
return {
|
|
@@ -154,30 +156,74 @@ class WebSearchTool(BaseTool):
|
|
| 154 |
"total_results": len(results),
|
| 155 |
"message": f"Found {len(results)} web search results"
|
| 156 |
}
|
| 157 |
-
|
| 158 |
except Exception as e:
|
| 159 |
-
|
| 160 |
-
if
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
else:
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
|
| 175 |
-
#
|
| 176 |
return {
|
| 177 |
"query": query,
|
| 178 |
"found": False,
|
| 179 |
-
"message": "
|
| 180 |
-
"results": []
|
|
|
|
| 181 |
}
|
| 182 |
|
| 183 |
def _extract_content_from_url(self, url: str) -> Dict[str, Any]:
|
|
|
|
| 88 |
|
| 89 |
def _search_web(self, query: str, limit: int = 5, extract_content: bool = False) -> Dict[str, Any]:
|
| 90 |
"""
|
| 91 |
+
Search the web using DuckDuckGo with enhanced rate limiting handling
|
| 92 |
"""
|
|
|
|
|
|
|
| 93 |
|
| 94 |
+
for attempt in range(3):
|
| 95 |
try:
|
| 96 |
+
logger.info(f"Searching web for: {query} (attempt {attempt + 1}/3)")
|
| 97 |
+
|
| 98 |
+
# Progressive delays to handle rate limiting
|
| 99 |
+
if attempt > 0:
|
| 100 |
+
delay = 5 * (2 ** (attempt - 1)) # 5s, 10s delays
|
| 101 |
+
logger.info(f"Waiting {delay}s before retry due to rate limiting...")
|
| 102 |
+
time.sleep(delay)
|
| 103 |
|
|
|
|
| 104 |
with DDGS() as ddgs:
|
| 105 |
+
# Use DuckDuckGo search with proper parameters
|
| 106 |
search_results = list(ddgs.text(
|
| 107 |
keywords=query,
|
| 108 |
max_results=limit,
|
|
|
|
| 111 |
))
|
| 112 |
|
| 113 |
if not search_results:
|
| 114 |
+
if attempt < 2:
|
| 115 |
logger.warning(f"No results on attempt {attempt + 1}, retrying...")
|
|
|
|
|
|
|
| 116 |
continue
|
| 117 |
else:
|
| 118 |
return {
|
|
|
|
| 156 |
"total_results": len(results),
|
| 157 |
"message": f"Found {len(results)} web search results"
|
| 158 |
}
|
| 159 |
+
|
| 160 |
except Exception as e:
|
| 161 |
+
error_msg = str(e)
|
| 162 |
+
if "ratelimit" in error_msg.lower() or "rate limit" in error_msg.lower() or "403" in error_msg or "202" in error_msg or "429" in error_msg:
|
| 163 |
+
logger.warning(f"Web search attempt {attempt + 1} failed: {error_msg}")
|
| 164 |
+
if attempt < 2:
|
| 165 |
+
continue
|
| 166 |
else:
|
| 167 |
+
logger.error(f"Web search attempt {attempt + 1} failed with non-rate-limit error: {error_msg}")
|
| 168 |
+
if attempt < 2:
|
| 169 |
+
continue
|
| 170 |
+
|
| 171 |
+
# If all attempts failed, try fallback search strategy
|
| 172 |
+
logger.warning("All DuckDuckGo attempts failed, trying fallback search strategy...")
|
| 173 |
+
return self._fallback_search(query)
|
| 174 |
+
|
| 175 |
+
def _fallback_search(self, query: str) -> Dict[str, Any]:
|
| 176 |
+
"""
|
| 177 |
+
Fallback search strategy when DuckDuckGo is completely unavailable
|
| 178 |
+
"""
|
| 179 |
+
try:
|
| 180 |
+
# Try a simple Wikipedia search as fallback
|
| 181 |
+
import wikipedia
|
| 182 |
+
wikipedia.set_lang("en")
|
| 183 |
+
|
| 184 |
+
# Extract key terms from query for Wikipedia search
|
| 185 |
+
search_terms = query.replace("site:", "").strip()
|
| 186 |
+
|
| 187 |
+
try:
|
| 188 |
+
# Search Wikipedia pages
|
| 189 |
+
wiki_results = wikipedia.search(search_terms, results=3)
|
| 190 |
+
if wiki_results:
|
| 191 |
+
fallback_results = []
|
| 192 |
+
for i, page_title in enumerate(wiki_results[:2], 1):
|
| 193 |
+
try:
|
| 194 |
+
page = wikipedia.page(page_title)
|
| 195 |
+
summary = page.summary[:200] + "..." if len(page.summary) > 200 else page.summary
|
| 196 |
+
|
| 197 |
+
web_result = WebSearchResult(
|
| 198 |
+
title=f"{page_title} (Wikipedia)",
|
| 199 |
+
url=page.url,
|
| 200 |
+
snippet=summary
|
| 201 |
+
)
|
| 202 |
+
fallback_results.append(web_result.to_dict())
|
| 203 |
+
except:
|
| 204 |
+
continue
|
| 205 |
+
|
| 206 |
+
if fallback_results:
|
| 207 |
+
return {
|
| 208 |
+
"query": query,
|
| 209 |
+
"found": True,
|
| 210 |
+
"results": fallback_results,
|
| 211 |
+
"total_results": len(fallback_results),
|
| 212 |
+
"message": f"Using Wikipedia fallback search. Found {len(fallback_results)} results"
|
| 213 |
+
}
|
| 214 |
+
except:
|
| 215 |
+
pass
|
| 216 |
+
|
| 217 |
+
except ImportError:
|
| 218 |
+
pass
|
| 219 |
|
| 220 |
+
# Last resort: return a helpful message
|
| 221 |
return {
|
| 222 |
"query": query,
|
| 223 |
"found": False,
|
| 224 |
+
"message": "❌ Web search failed due to rate limiting. Please try again later or provide the information directly.",
|
| 225 |
+
"results": [],
|
| 226 |
+
"error_type": "search_failure"
|
| 227 |
}
|
| 228 |
|
| 229 |
def _extract_content_from_url(self, url: str) -> Dict[str, Any]:
|