File size: 2,680 Bytes
eccf8e4 3c4371f e70eacb 10e9b7d e70eacb 85d85a6 e70eacb 26e135d 85d85a6 e70eacb 26e135d 85d85a6 e70eacb 85d85a6 e70eacb 31243f4 e70eacb 31243f4 e70eacb 85d85a6 e70eacb 85d85a6 e70eacb 26e135d e70eacb 26e135d e80aab9 e70eacb 7e4a06b e70eacb e80aab9 e70eacb 3c4371f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 | import requests
import pandas as pd
import gradio as gr
from agent import HF_USERNAME, AGENT_CODE_URL, solve_question
GAIA_API_URL = "https://agents-course-unit4-scoring.hf.space"
def get_random_question():
return requests.get(f"{GAIA_API_URL}/random-question").json()
def get_questions():
return requests.get(f"{GAIA_API_URL}/questions").json()
def get_task_file(task_id: str):
url = f"{GAIA_API_URL}/files/{task_id}"
res = requests.get(url)
if res.status_code == 200:
return res.content
return None
def submit_answer(answers: list[dict], username: str, agent_code:str = AGENT_CODE_URL) -> dict:
payload = {
"username": username,
"agent_code": agent_code,
"answers": answers,
}
resp = requests.post(
f"{GAIA_API_URL}/submit",
json=payload,
)
return resp.json()
def run_random():
print("Fetching a random GAIA question…\n")
task = get_random_question()
answer = solve_question(task)
print(f"Answer : {answer}")
def run_and_submit_all(profile: gr.OAuthProfile | None):
if not profile:
yield "Please log in to Hugging Face first.", None
return
username = profile.username
questions = get_questions()
total = len(questions)
yield f" Fetched {total} questions. Starting agent...", None
answers, log = [], []
for i, item in enumerate(questions, start=1):
task_id, question = item["task_id"], item["question"]
try:
answer = solve_question(item)
except Exception as e:
answer = f"ERROR: {e}"
answers.append({"task_id": task_id, "submitted_answer": answer})
log.append({"Task ID": task_id, "Question": question, "Answer": answer})
yield f" Progress: {i}/{total} — Last answer: {str(answer)[:80]}", pd.DataFrame(log)
resp = submit_answer(answers, username)
status = (f"Done! User: {resp.get('username')} | Score: {resp.get('score')}% | "
f"Correct: {resp.get('correct_count')}/{resp.get('total_attempted')} | "
f"{resp.get('message')}")
yield status, pd.DataFrame(log)
with gr.Blocks() as demo:
gr.Markdown("# GAIA Agent Evaluation Runner")
gr.Markdown("Log in with Hugging Face, then click the button to run and submit all answers.")
gr.LoginButton()
btn = gr.Button("Run Evaluation & Submit All Answers")
status = gr.Textbox(label="Status", lines=3, interactive=False)
table = gr.DataFrame(label="Results", wrap=True)
btn.click(fn=run_and_submit_all, outputs=[status, table])
if __name__ == "__main__":
demo.launch()
|