File size: 3,864 Bytes
ebc57d0 d001a5c 960dd3f f3ed293 960dd3f ebc57d0 d001a5c 960dd3f ebc57d0 960dd3f ebc57d0 d001a5c ebc57d0 960dd3f d001a5c 960dd3f ebc57d0 d001a5c 960dd3f ebc57d0 d001a5c ebc57d0 960dd3f b4f9d22 960dd3f d001a5c 960dd3f f3ed293 960dd3f f3ed293 d001a5c 960dd3f d001a5c 960dd3f ebc57d0 d001a5c 960dd3f ebc57d0 960dd3f ebc57d0 d001a5c 960dd3f d001a5c 960dd3f d001a5c 960dd3f d001a5c 960dd3f 1a4e9af ebc57d0 960dd3f ebc57d0 d001a5c 81105ee d001a5c 960dd3f d001a5c 960dd3f 81105ee 960dd3f 81105ee d001a5c 960dd3f 81105ee ebc57d0 b4f9d22 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
import os
import requests
import pandas as pd
import gradio as gr
from crew import run_crew # β your multi-agent logic
API_URL = "https://agents-course-unit4-scoring.hf.space"
# βββ AGENT WRAPPER ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
class CrewAgent:
def __call__(self, question: str) -> str:
return run_crew(question, file_path="") # It MUST use your real crew logic!
agent = CrewAgent()
# βββ MAIN HANDLER βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def evaluate_and_submit(username: str):
"""Runs the agent on benchmark questions and submits answers, with debug logging."""
username = username.strip()
if not username:
return "β Please enter your Hugging Face username.", None
space_id = os.getenv("SPACE_ID", "")
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else ""
# 1) Fetch questions
try:
questions = requests.get(f"{API_URL}/questions", timeout=30).json()
except Exception as e:
return f"β Failed to fetch questions: {e}", None
# 2) Answer questions, logging every result
answers, log = [], []
for item in questions:
qid, qtxt = item["task_id"], item["question"]
try:
ans = agent(qtxt)
# Debug print:
print(f"QID: {qid} | Q: {qtxt[:60]}... | Agent Answer: {ans}")
# Add warning if placeholder detected
if ans.strip().lower() in ["this is a default answer.", "", "n/a"]:
print(f"β οΈ Warning: Agent returned a default/empty answer for QID {qid}.")
except Exception as e:
ans = f"AGENT ERROR: {e}"
print(f"β οΈ Agent error on QID {qid}: {e}")
answers.append({"task_id": qid, "submitted_answer": ans})
log.append({"Task ID": qid, "Question": qtxt, "Answer": ans})
# Show part of the DataFrame in the console for debugging
try:
df = pd.DataFrame(log)
print("=== First 5 results ===")
print(df.head())
except Exception as e:
print(f"DataFrame print error: {e}")
if not answers:
return "β οΈ No answers generated.", pd.DataFrame(log)
# 3) Submit
try:
resp = requests.post(
f"{API_URL}/submit",
json={"username": username, "agent_code": agent_code, "answers": answers},
timeout=60,
)
resp.raise_for_status()
data = resp.json()
status = (
"β
Submission successful!\n"
f"Score: {data.get('score')} % "
f"({data.get('correct_count')}/{data.get('total_attempted')})\n"
f"Message: {data.get('message')}"
)
except Exception as e:
status = f"β Submission failed: {e}"
return status, pd.DataFrame(log)
# βββ GRADIO UI ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
demo = gr.Interface(
fn=evaluate_and_submit,
inputs=gr.Textbox(label="Hugging Face username", placeholder="e.g. john-doe"),
outputs=[
gr.Textbox(label="Status", lines=6),
gr.DataFrame(label="Submitted Answers"),
],
title="GAIA Agent Submission",
description=(
"Enter your Hugging Face username and click **Run Evaluation & Submit**. "
"The app will run your agent on all benchmark questions and send the answers."
),
)
if __name__ == "__main__":
demo.launch()
|