LangGraph_GAIA / app.py
BiGuan's picture
Update app.py
f72f3ee verified
Raw
History Blame Contribute Delete
3 kB
import os
import requests
import pandas as pd
import gradio as gr
from agent import agent
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
GAIA_DATA_URL = "https://huggingface.co/datasets/gaia-benchmark/GAIA/resolve/main/2023/validation"
LAST = {}
def submit_with_retry(payload):
url = f"{DEFAULT_API_URL}/submit"
for _ in range(4):
try:
r = requests.post(url, json=payload, timeout=120)
if r.status_code >= 500:
continue
r.raise_for_status()
return True, r.json()
except Exception:
pass
return False, "Failed"
def run_and_submit_all(progress=gr.Progress()):
hf_token = os.getenv("HF_TOKEN")
space_id = os.getenv("SPACE_ID")
if not hf_token or not space_id:
return "❌ Missing HF_TOKEN or SPACE_ID", pd.DataFrame()
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
r = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15)
questions = r.json()
total = len(questions)
results = []
answers = []
status_lines = []
progress(0, desc="Starting...")
status_lines.append("πŸš€ Starting evaluation\n")
for idx, q in enumerate(questions, start=1):
task_id = q["task_id"]
question_text = q["question"]
file_name = q.get("file_name")
file_url = None
if file_name:
if file_name.startswith("http"):
file_url = file_name
else:
file_url = f"{GAIA_DATA_URL}/{file_name}"
status_lines.append(f"πŸ“‹ Processing {idx}/{total}: {question_text}")
print(f"Processing {idx}/{total}: {question_text}")
progress(idx / total, desc=f"{idx}/{total}")
ans = agent(question_text, files=[file_url] if file_url else None)
ans = str(ans).strip() or "0"
results.append({"Task ID": task_id, "Question": question_text, "Answer": ans})
answers.append({"task_id": task_id, "submitted_answer": ans})
status_lines.append(f" βœ… Answer: {ans}\n")
payload = {
"username": os.getenv("HF_USERNAME", "BiGuan"),
"agent_code": agent_code,
"answers": answers,
}
LAST["payload"] = payload
LAST["results"] = results
ok, data = submit_with_retry(payload)
if ok:
final_status = f"βœ… Score: {data.get('score')}%\nCorrect: {data.get('correct_count')}/{data.get('total_attempted')}\n"
else:
final_status = "❌ Submission failed"
full_status = "\n".join(status_lines) + "\n" + final_status
return full_status, pd.DataFrame(results)
with gr.Blocks() as demo:
gr.Markdown("# GAIA Agent (90‑Point Attempt)")
btn = gr.Button("Run Evaluation", variant="primary")
status = gr.Textbox(label="Detailed Progress", lines=12)
table = gr.DataFrame(label="Results (Task ID, Question, Answer)")
btn.click(run_and_submit_all, outputs=[status, table], queue=True, show_progress="full")
demo.launch()