manasajanj's picture
Update app.py
85d85a6 verified
import requests
import pandas as pd
import gradio as gr
from agent import HF_USERNAME, AGENT_CODE_URL, solve_question
GAIA_API_URL = "https://agents-course-unit4-scoring.hf.space"
def get_random_question():
return requests.get(f"{GAIA_API_URL}/random-question").json()
def get_questions():
return requests.get(f"{GAIA_API_URL}/questions").json()
def get_task_file(task_id: str):
url = f"{GAIA_API_URL}/files/{task_id}"
res = requests.get(url)
if res.status_code == 200:
return res.content
return None
def submit_answer(answers: list[dict], username: str, agent_code:str = AGENT_CODE_URL) -> dict:
payload = {
"username": username,
"agent_code": agent_code,
"answers": answers,
}
resp = requests.post(
f"{GAIA_API_URL}/submit",
json=payload,
)
return resp.json()
def run_random():
print("Fetching a random GAIA question…\n")
task = get_random_question()
answer = solve_question(task)
print(f"Answer : {answer}")
def run_and_submit_all(profile: gr.OAuthProfile | None):
if not profile:
yield "Please log in to Hugging Face first.", None
return
username = profile.username
questions = get_questions()
total = len(questions)
yield f" Fetched {total} questions. Starting agent...", None
answers, log = [], []
for i, item in enumerate(questions, start=1):
task_id, question = item["task_id"], item["question"]
try:
answer = solve_question(item)
except Exception as e:
answer = f"ERROR: {e}"
answers.append({"task_id": task_id, "submitted_answer": answer})
log.append({"Task ID": task_id, "Question": question, "Answer": answer})
yield f" Progress: {i}/{total} — Last answer: {str(answer)[:80]}", pd.DataFrame(log)
resp = submit_answer(answers, username)
status = (f"Done! User: {resp.get('username')} | Score: {resp.get('score')}% | "
f"Correct: {resp.get('correct_count')}/{resp.get('total_attempted')} | "
f"{resp.get('message')}")
yield status, pd.DataFrame(log)
with gr.Blocks() as demo:
gr.Markdown("# GAIA Agent Evaluation Runner")
gr.Markdown("Log in with Hugging Face, then click the button to run and submit all answers.")
gr.LoginButton()
btn = gr.Button("Run Evaluation & Submit All Answers")
status = gr.Textbox(label="Status", lines=3, interactive=False)
table = gr.DataFrame(label="Results", wrap=True)
btn.click(fn=run_and_submit_all, outputs=[status, table])
if __name__ == "__main__":
demo.launch()