Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import json | |
| import os | |
| import time | |
| import requests | |
| from tools import TOOLS | |
| import traceback | |
| from agent import solve_task, load_tasks | |
| from mistral_hf_wrapper import MistralInference | |
| API_URL = os.getenv("HF_MISTRAL_ENDPOINT") | |
| API_TOKEN = os.getenv("HF_TOKEN") | |
| USERNAME = os.getenv("HF_USERNAME") | |
| CODE_LINK = os.getenv("HF_CODE_LINK") | |
| MAX_RETRIES = 3 | |
| INFERENCE_TIMEOUT = 45 # seconds | |
| def run_and_submit_all(): | |
| model = MistralInference(api_url=API_URL, api_token=API_TOKEN) | |
| tasks = load_tasks() | |
| print(f"[INFO] Loaded {len(tasks)} tasks from metadata.jsonl") | |
| if not tasks: | |
| return "No tasks loaded from metadata.jsonl. Make sure the file exists and is valid." | |
| answers = [] | |
| for i, task in enumerate(tasks): | |
| task_id = task.get("task_id", f"UNKNOWN-{i}") | |
| print(f"[INFO] Solving task {i+1}/{len(tasks)}: {task_id}") | |
| attempt = 0 | |
| success = False | |
| while attempt < MAX_RETRIES and not success: | |
| attempt += 1 | |
| try: | |
| start = time.time() | |
| result = solve_task(task, tools=TOOLS) | |
| duration = time.time() - start | |
| # Handle empty responses | |
| if not result.get("submitted_answer"): | |
| raise ValueError("Empty model response") | |
| print(f"[INFO] Answer in {duration:.1f}s: {result['submitted_answer'][:100]}...") | |
| answers.append({ | |
| "task_id": task_id, | |
| "submitted_answer": result["submitted_answer"].strip() | |
| }) | |
| success = True | |
| except Exception as e: | |
| print(f"[ERROR] ❌ Task {task_id} failed after {attempt} attempts") | |
| traceback.print_exc() # Shows full stack trace | |
| answers.append({ | |
| "task_id": task_id, | |
| "submitted_answer": f"ERROR: {str(e)}" | |
| }) | |
| with open("partial_answers.json", "w", encoding="utf-8") as f: | |
| json.dump(answers, f, indent=2) | |
| print("[INFO] Submitting answers to leaderboard...") | |
| try: | |
| res = requests.post( | |
| "https://agents-course-unit4-scoring.hf.space/submit", | |
| headers={"Content-Type": "application/json"}, | |
| json={ | |
| "username": USERNAME, | |
| "agent_code": CODE_LINK, | |
| "answers": answers | |
| }, | |
| timeout=60 # Set submission timeout | |
| ) | |
| if res.ok: | |
| print("[INFO] Submission successful") | |
| return json.dumps(res.json(), indent=2) | |
| else: | |
| return f"Error submitting: {res.status_code} - {res.text}" | |
| except requests.exceptions.Timeout: | |
| return " Submission timed out. Try again later." | |
| except Exception as e: | |
| return f" Submission failed with exception: {e}" | |
| # Gradio interface | |
| gr.Interface( | |
| fn=run_and_submit_all, | |
| inputs=[], | |
| outputs="textbox", | |
| title="GAIA Benchmark Agent Submission" | |
| ).launch() |