Spaces:
Sleeping
Sleeping
| import requests | |
| import json | |
| import os | |
| from typing import List, Dict, Any | |
| from gaia_agent import GaiaAgent | |
| class GaiaSubmission: | |
| def __init__(self, api_base_url: str, api_key: str = None): | |
| self.api_base_url = api_base_url.rstrip('/') | |
| self.api_key = api_key | |
| self.agent = GaiaAgent() | |
| self.headers = {'Content-Type': 'application/json'} | |
| if api_key: | |
| self.headers['Authorization'] = f'Bearer {api_key}' | |
| def get_questions(self) -> List[Dict[str, Any]]: | |
| """Hämta alla frågor från API:et""" | |
| try: | |
| response = requests.get(f"{self.api_base_url}/questions", headers=self.headers) | |
| response.raise_for_status() | |
| return response.json() | |
| except Exception as e: | |
| print(f"Error fetching questions: {e}") | |
| return [] | |
| def get_random_question(self) -> Dict[str, Any]: | |
| """Hämta en slumpmässig fråga""" | |
| try: | |
| response = requests.get(f"{self.api_base_url}/random-question", headers=self.headers) | |
| response.raise_for_status() | |
| return response.json() | |
| except Exception as e: | |
| print(f"Error fetching random question: {e}") | |
| return {} | |
| def download_file(self, task_id: str, file_path: str) -> bool: | |
| """Ladda ned en fil associerad med en uppgift""" | |
| try: | |
| response = requests.get(f"{self.api_base_url}/files/{task_id}", headers=self.headers) | |
| response.raise_for_status() | |
| with open(file_path, 'wb') as f: | |
| f.write(response.content) | |
| return True | |
| except Exception as e: | |
| print(f"Error downloading file for task {task_id}: {e}") | |
| return False | |
| def submit_answer(self, task_id: str, answer: str, reasoning_trace: str = "") -> Dict[str, Any]: | |
| """Skicka in svar till API:et""" | |
| try: | |
| submission = { | |
| "task_id": task_id, | |
| "model_answer": answer, | |
| "reasoning_trace": reasoning_trace | |
| } | |
| response = requests.post( | |
| f"{self.api_base_url}/submit", | |
| headers=self.headers, | |
| json=submission | |
| ) | |
| response.raise_for_status() | |
| return response.json() | |
| except Exception as e: | |
| print(f"Error submitting answer for task {task_id}: {e}") | |
| return {"error": str(e)} | |
| def process_single_question(self, question_data: Dict[str, Any]) -> Dict[str, Any]: | |
| """Bearbeta en enskild fråga""" | |
| task_id = question_data.get('task_id') | |
| question = question_data.get('question', '') | |
| print(f"Processing task {task_id}: {question[:100]}...") | |
| # Kontrollera om det finns associerade filer | |
| if 'files' in question_data: | |
| for file_info in question_data['files']: | |
| file_name = file_info.get('filename') | |
| if file_name: | |
| success = self.download_file(task_id, file_name) | |
| if success: | |
| print(f"Downloaded file: {file_name}") | |
| else: | |
| print(f"Failed to download file: {file_name}") | |
| # Bearbeta frågan med agenten | |
| try: | |
| answer, reasoning_trace = self.agent(question) | |
| result = { | |
| "task_id": task_id, | |
| "question": question, | |
| "answer": answer, | |
| "reasoning_trace": reasoning_trace, | |
| "status": "success" | |
| } | |
| print(f"Answer: {answer}") | |
| return result | |
| except Exception as e: | |
| error_msg = f"Error processing question: {str(e)}" | |
| print(error_msg) | |
| return { | |
| "task_id": task_id, | |
| "question": question, | |
| "answer": "", | |
| "reasoning_trace": error_msg, | |
| "status": "error" | |
| } | |
| def run_evaluation(self, submit_answers: bool = False) -> List[Dict[str, Any]]: | |
| """Kör utvärdering på alla frågor""" | |
| questions = self.get_questions() | |
| if not questions: | |
| print("No questions retrieved. Exiting.") | |
| return [] | |
| print(f"Retrieved {len(questions)} questions") | |
| results = [] | |
| for i, question_data in enumerate(questions, 1): | |
| print(f"\n--- Question {i}/{len(questions)} ---") | |
| result = self.process_single_question(question_data) | |
| results.append(result) | |
| # Skicka in svar om det är aktiverat | |
| if submit_answers and result['status'] == 'success': | |
| submission_result = self.submit_answer( | |
| result['task_id'], | |
| result['answer'], | |
| result['reasoning_trace'] | |
| ) | |
| result['submission_result'] = submission_result | |
| print(f"Submission result: {submission_result}") | |
| return results | |
| def save_results(self, results: List[Dict[str, Any]], filename: str = "gaia_results.json"): | |
| """Spara resultat till fil""" | |
| with open(filename, 'w', encoding='utf-8') as f: | |
| json.dump(results, f, indent=2, ensure_ascii=False) | |
| print(f"Results saved to {filename}") | |
| def save_submission_format(self, results: List[Dict[str, Any]], filename: str = "gaia_submission.jsonl"): | |
| """Spara resultat i GAIA submission format""" | |
| with open(filename, 'w', encoding='utf-8') as f: | |
| for result in results: | |
| if result['status'] == 'success': | |
| submission_entry = { | |
| "task_id": result['task_id'], | |
| "model_answer": result['answer'], | |
| "reasoning_trace": result['reasoning_trace'] | |
| } | |
| f.write(json.dumps(submission_entry, ensure_ascii=False) + '\n') | |
| print(f"Submission file saved to {filename}") |