File size: 3,036 Bytes
3df5153
 
3d1b0c5
a6de347
3df5153
a6de347
2cbde76
6a1d4dd
 
3df5153
33cdb30
3d1b0c5
 
94b05f0
3df5153
a6de347
 
 
3d1b0c5
 
 
3df5153
a6de347
 
0617575
 
 
 
6d03c10
a6de347
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0617575
a6de347
 
 
 
 
 
 
 
47b1528
 
 
 
 
 
a6de347
 
 
 
 
3d1b0c5
2cbde76
 
 
 
 
 
 
 
 
a6de347
2cbde76
 
a6de347
2cbde76
 
 
3d1b0c5
a6de347
 
2cbde76
a6de347
 
d6ca7ae
d2fa4ad
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import gradio as gr
import json
import os
import time
import requests
from tools import TOOLS
import traceback
from agent import solve_task, load_tasks
from mistral_hf_wrapper import MistralInference

API_URL = os.getenv("HF_MISTRAL_ENDPOINT")
API_TOKEN = os.getenv("HF_TOKEN")
USERNAME = os.getenv("HF_USERNAME")
CODE_LINK = os.getenv("HF_CODE_LINK")

MAX_RETRIES = 3
INFERENCE_TIMEOUT = 45  # seconds

def run_and_submit_all():
    model = MistralInference(api_url=API_URL, api_token=API_TOKEN)
    tasks = load_tasks()

    print(f"[INFO] Loaded {len(tasks)} tasks from metadata.jsonl")

    if not tasks:
        return "No tasks loaded from metadata.jsonl. Make sure the file exists and is valid."

    answers = []
    for i, task in enumerate(tasks):
        task_id = task.get("task_id", f"UNKNOWN-{i}")
        print(f"[INFO] Solving task {i+1}/{len(tasks)}: {task_id}")
        
        attempt = 0
        success = False
        while attempt < MAX_RETRIES and not success:
            attempt += 1
            try:
                start = time.time()
                result = solve_task(task, tools=TOOLS)
                duration = time.time() - start

                # Handle empty responses
                if not result.get("submitted_answer"):
                    raise ValueError("Empty model response")

                print(f"[INFO] Answer in {duration:.1f}s: {result['submitted_answer'][:100]}...")
                answers.append({
                    "task_id": task_id,
                    "submitted_answer": result["submitted_answer"].strip()
                })
                success = True

            except Exception as e:
                print(f"[ERROR] ❌ Task {task_id} failed after {attempt} attempts")
                traceback.print_exc()  # Shows full stack trace
                answers.append({
                    "task_id": task_id,
                    "submitted_answer": f"ERROR: {str(e)}"
                })

    with open("partial_answers.json", "w", encoding="utf-8") as f:
        json.dump(answers, f, indent=2)

    print("[INFO] Submitting answers to leaderboard...")

    try:
        res = requests.post(
            "https://agents-course-unit4-scoring.hf.space/submit",
            headers={"Content-Type": "application/json"},
            json={
                "username": USERNAME,
                "agent_code": CODE_LINK,
                "answers": answers
            },
            timeout=60  # Set submission timeout
        )
        if res.ok:
            print("[INFO]  Submission successful")
            return json.dumps(res.json(), indent=2)
        else:
            return f"Error submitting: {res.status_code} - {res.text}"

    except requests.exceptions.Timeout:
        return " Submission timed out. Try again later."
    except Exception as e:
        return f" Submission failed with exception: {e}"

# Gradio interface
gr.Interface(
    fn=run_and_submit_all,
    inputs=[],
    outputs="textbox",
    title="GAIA Benchmark Agent Submission"
).launch()