|
|
import os |
|
|
import requests |
|
|
import pandas as pd |
|
|
import gradio as gr |
|
|
from crew import run_crew |
|
|
|
|
|
API_URL = "https://agents-course-unit4-scoring.hf.space" |
|
|
|
|
|
|
|
|
|
|
|
class CrewAgent: |
|
|
def __call__(self, question: str) -> str: |
|
|
return run_crew(question, file_path="") |
|
|
|
|
|
agent = CrewAgent() |
|
|
|
|
|
|
|
|
|
|
|
def evaluate_and_submit(username: str): |
|
|
"""Runs the agent on benchmark questions and submits answers, with debug logging.""" |
|
|
username = username.strip() |
|
|
if not username: |
|
|
return "β Please enter your Hugging Face username.", None |
|
|
|
|
|
space_id = os.getenv("SPACE_ID", "") |
|
|
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "" |
|
|
|
|
|
|
|
|
try: |
|
|
questions = requests.get(f"{API_URL}/questions", timeout=30).json() |
|
|
except Exception as e: |
|
|
return f"β Failed to fetch questions: {e}", None |
|
|
|
|
|
|
|
|
answers, log = [], [] |
|
|
for item in questions: |
|
|
qid, qtxt = item["task_id"], item["question"] |
|
|
try: |
|
|
ans = agent(qtxt) |
|
|
|
|
|
print(f"QID: {qid} | Q: {qtxt[:60]}... | Agent Answer: {ans}") |
|
|
|
|
|
if ans.strip().lower() in ["this is a default answer.", "", "n/a"]: |
|
|
print(f"β οΈ Warning: Agent returned a default/empty answer for QID {qid}.") |
|
|
except Exception as e: |
|
|
ans = f"AGENT ERROR: {e}" |
|
|
print(f"β οΈ Agent error on QID {qid}: {e}") |
|
|
answers.append({"task_id": qid, "submitted_answer": ans}) |
|
|
log.append({"Task ID": qid, "Question": qtxt, "Answer": ans}) |
|
|
|
|
|
|
|
|
try: |
|
|
df = pd.DataFrame(log) |
|
|
print("=== First 5 results ===") |
|
|
print(df.head()) |
|
|
except Exception as e: |
|
|
print(f"DataFrame print error: {e}") |
|
|
|
|
|
if not answers: |
|
|
return "β οΈ No answers generated.", pd.DataFrame(log) |
|
|
|
|
|
|
|
|
try: |
|
|
resp = requests.post( |
|
|
f"{API_URL}/submit", |
|
|
json={"username": username, "agent_code": agent_code, "answers": answers}, |
|
|
timeout=60, |
|
|
) |
|
|
resp.raise_for_status() |
|
|
data = resp.json() |
|
|
status = ( |
|
|
"β
Submission successful!\n" |
|
|
f"Score: {data.get('score')} % " |
|
|
f"({data.get('correct_count')}/{data.get('total_attempted')})\n" |
|
|
f"Message: {data.get('message')}" |
|
|
) |
|
|
except Exception as e: |
|
|
status = f"β Submission failed: {e}" |
|
|
|
|
|
return status, pd.DataFrame(log) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
demo = gr.Interface( |
|
|
fn=evaluate_and_submit, |
|
|
inputs=gr.Textbox(label="Hugging Face username", placeholder="e.g. john-doe"), |
|
|
outputs=[ |
|
|
gr.Textbox(label="Status", lines=6), |
|
|
gr.DataFrame(label="Submitted Answers"), |
|
|
], |
|
|
title="GAIA Agent Submission", |
|
|
description=( |
|
|
"Enter your Hugging Face username and click **Run Evaluation & Submit**. " |
|
|
"The app will run your agent on all benchmark questions and send the answers." |
|
|
), |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |
|
|
|