Spaces:
Sleeping
Sleeping
| import os | |
| import requests | |
| import pandas as pd | |
| import gradio as gr | |
| from agent import agent | |
| DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" | |
| GAIA_DATA_URL = "https://huggingface.co/datasets/gaia-benchmark/GAIA/resolve/main/2023/validation" | |
| LAST = {} | |
| def submit_with_retry(payload): | |
| url = f"{DEFAULT_API_URL}/submit" | |
| for _ in range(4): | |
| try: | |
| r = requests.post(url, json=payload, timeout=120) | |
| if r.status_code >= 500: | |
| continue | |
| r.raise_for_status() | |
| return True, r.json() | |
| except Exception: | |
| pass | |
| return False, "Failed" | |
| def run_and_submit_all(progress=gr.Progress()): | |
| hf_token = os.getenv("HF_TOKEN") | |
| space_id = os.getenv("SPACE_ID") | |
| if not hf_token or not space_id: | |
| return "β Missing HF_TOKEN or SPACE_ID", pd.DataFrame() | |
| agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" | |
| r = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15) | |
| questions = r.json() | |
| total = len(questions) | |
| results = [] | |
| answers = [] | |
| status_lines = [] | |
| progress(0, desc="Starting...") | |
| status_lines.append("π Starting evaluation\n") | |
| for idx, q in enumerate(questions, start=1): | |
| task_id = q["task_id"] | |
| question_text = q["question"] | |
| file_name = q.get("file_name") | |
| file_url = None | |
| if file_name: | |
| if file_name.startswith("http"): | |
| file_url = file_name | |
| else: | |
| file_url = f"{GAIA_DATA_URL}/{file_name}" | |
| status_lines.append(f"π Processing {idx}/{total}: {question_text}") | |
| print(f"Processing {idx}/{total}: {question_text}") | |
| progress(idx / total, desc=f"{idx}/{total}") | |
| ans = agent(question_text, files=[file_url] if file_url else None) | |
| ans = str(ans).strip() or "0" | |
| results.append({"Task ID": task_id, "Question": question_text, "Answer": ans}) | |
| answers.append({"task_id": task_id, "submitted_answer": ans}) | |
| status_lines.append(f" β Answer: {ans}\n") | |
| payload = { | |
| "username": os.getenv("HF_USERNAME", "BiGuan"), | |
| "agent_code": agent_code, | |
| "answers": answers, | |
| } | |
| LAST["payload"] = payload | |
| LAST["results"] = results | |
| ok, data = submit_with_retry(payload) | |
| if ok: | |
| final_status = f"β Score: {data.get('score')}%\nCorrect: {data.get('correct_count')}/{data.get('total_attempted')}\n" | |
| else: | |
| final_status = "β Submission failed" | |
| full_status = "\n".join(status_lines) + "\n" + final_status | |
| return full_status, pd.DataFrame(results) | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# GAIA Agent (90βPoint Attempt)") | |
| btn = gr.Button("Run Evaluation", variant="primary") | |
| status = gr.Textbox(label="Detailed Progress", lines=12) | |
| table = gr.DataFrame(label="Results (Task ID, Question, Answer)") | |
| btn.click(run_and_submit_all, outputs=[status, table], queue=True, show_progress="full") | |
| demo.launch() |