File size: 2,680 Bytes
eccf8e4
3c4371f
e70eacb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10e9b7d
e70eacb
 
 
 
 
 
 
 
 
85d85a6
e70eacb
26e135d
 
85d85a6
e70eacb
 
26e135d
 
85d85a6
e70eacb
85d85a6
 
 
e70eacb
31243f4
e70eacb
31243f4
e70eacb
85d85a6
e70eacb
 
85d85a6
 
 
e70eacb
26e135d
e70eacb
 
26e135d
e80aab9
 
 
 
e70eacb
 
7e4a06b
e70eacb
 
 
 
e80aab9
 
e70eacb
3c4371f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import requests
import pandas as pd
import gradio as gr
from agent import HF_USERNAME, AGENT_CODE_URL, solve_question

GAIA_API_URL = "https://agents-course-unit4-scoring.hf.space"

def get_random_question():
    return requests.get(f"{GAIA_API_URL}/random-question").json()
def get_questions():
    return requests.get(f"{GAIA_API_URL}/questions").json()
def get_task_file(task_id: str):
    url = f"{GAIA_API_URL}/files/{task_id}"
    res = requests.get(url)
    if res.status_code == 200:
        return res.content
    return None
def submit_answer(answers: list[dict], username: str, agent_code:str = AGENT_CODE_URL) -> dict:
    payload =  {
        "username": username,
        "agent_code": agent_code,
        "answers": answers,
    }
    resp = requests.post(
        f"{GAIA_API_URL}/submit",
        json=payload,

    )
    return resp.json()
def run_random():
    print("Fetching a random GAIA question…\n")
    task = get_random_question()
    answer = solve_question(task)
    print(f"Answer   : {answer}")

def run_and_submit_all(profile: gr.OAuthProfile | None):
    
    if not profile:
        yield "Please log in to Hugging Face first.", None
        return
        
    username = profile.username
    questions = get_questions()
    total = len(questions)
    yield f" Fetched {total} questions. Starting agent...", None
    
    answers, log = [], []
    
    
    for i, item in enumerate(questions, start=1):
        task_id, question = item["task_id"], item["question"]
        try:
            answer = solve_question(item)
        except Exception as e:
            answer = f"ERROR: {e}"
            
        answers.append({"task_id": task_id, "submitted_answer": answer})
        log.append({"Task ID": task_id, "Question": question, "Answer": answer})
      
        yield f" Progress: {i}/{total} — Last answer: {str(answer)[:80]}", pd.DataFrame(log)
        
    resp = submit_answer(answers, username)
    status = (f"Done! User: {resp.get('username')} | Score: {resp.get('score')}% | "
              f"Correct: {resp.get('correct_count')}/{resp.get('total_attempted')} | "
              f"{resp.get('message')}")
    yield status, pd.DataFrame(log)



with gr.Blocks() as demo:
    gr.Markdown("# GAIA Agent Evaluation Runner")
    gr.Markdown("Log in with Hugging Face, then click the button to run and submit all answers.")
    gr.LoginButton()
    btn = gr.Button("Run Evaluation & Submit All Answers")
    status = gr.Textbox(label="Status", lines=3, interactive=False)
    table = gr.DataFrame(label="Results", wrap=True)
    btn.click(fn=run_and_submit_all, outputs=[status, table])

if __name__ == "__main__":
    demo.launch()