| import os
|
| from typing import Any, Dict, List, Tuple
|
| import pandas as pd
|
| import requests
|
|
|
| def fetch_all_questions() -> List[Dict[str, Any]]:
|
| """Fetch all questions from the GAIA benchmark API."""
|
| try:
|
|
|
| api_url = os.getenv("GAIA_API_URL", "")
|
| if not api_url:
|
| raise ValueError("GAIA_API_URL environment variable not set")
|
|
|
| response = requests.get(f"{api_url}/questions")
|
| response.raise_for_status()
|
|
|
| questions = response.json()
|
| return questions
|
| except Exception as e:
|
| raise Exception(f"Failed to fetch questions: {str(e)}")
|
|
|
| def run_agent(agent: Any, questions: List[Dict[str, Any]]) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
|
| """Run the agent on all questions and collect results.
|
|
|
| Args:
|
| agent: The GaiaAgent instance
|
| questions: List of question data from the API
|
|
|
| Returns:
|
| Tuple containing:
|
| - List of result logs for display
|
| - List of answer payloads for submission
|
| """
|
| results_log = []
|
| answers_payload = []
|
|
|
| for question in questions:
|
| question_id = question.get("id", "unknown")
|
| question_text = question.get("question", "")
|
|
|
| try:
|
|
|
| answer = agent.get_answer(question)
|
|
|
|
|
| result_entry = {
|
| "Question ID": question_id,
|
| "Question": question_text,
|
| "Answer": answer if answer else "No answer provided",
|
| "Status": "Success" if answer else "Failed"
|
| }
|
| results_log.append(result_entry)
|
|
|
|
|
| if answer:
|
| answer_entry = {
|
| "question_id": question_id,
|
| "answer": answer
|
| }
|
| answers_payload.append(answer_entry)
|
|
|
| except Exception as e:
|
|
|
| result_entry = {
|
| "Question ID": question_id,
|
| "Question": question_text,
|
| "Answer": f"Error: {str(e)}",
|
| "Status": "Failed"
|
| }
|
| results_log.append(result_entry)
|
|
|
| return results_log, answers_payload
|
|
|
| def submit_answers(submission_data: Dict[str, Any], results_log: List[Dict[str, Any]]) -> Tuple[str, pd.DataFrame]:
|
| """Submit answers to the GAIA benchmark API.
|
|
|
| Args:
|
| submission_data: Dictionary containing submission details
|
| results_log: List of result logs for display
|
|
|
| Returns:
|
| Tuple containing:
|
| - Status message string
|
| - DataFrame of results for display
|
| """
|
| try:
|
|
|
| api_url = os.getenv("GAIA_API_URL", "")
|
| if not api_url:
|
| raise ValueError("GAIA_API_URL environment variable not set")
|
|
|
|
|
| response = requests.post(
|
| f"{api_url}/submit",
|
| json=submission_data
|
| )
|
| response.raise_for_status()
|
|
|
|
|
| results_df = pd.DataFrame(results_log)
|
|
|
|
|
| return "Answers submitted successfully!", results_df
|
|
|
| except Exception as e:
|
|
|
| results_df = pd.DataFrame(results_log)
|
| return f"Error submitting answers: {str(e)}", results_df |