File size: 3,864 Bytes
ebc57d0
 
 
d001a5c
960dd3f
f3ed293
960dd3f
ebc57d0
d001a5c
960dd3f
ebc57d0
 
960dd3f
ebc57d0
d001a5c
ebc57d0
 
960dd3f
 
 
 
d001a5c
960dd3f
ebc57d0
d001a5c
 
 
960dd3f
ebc57d0
d001a5c
ebc57d0
960dd3f
b4f9d22
960dd3f
d001a5c
 
960dd3f
f3ed293
960dd3f
 
 
 
 
 
f3ed293
d001a5c
960dd3f
d001a5c
960dd3f
 
 
 
 
 
 
 
 
ebc57d0
d001a5c
960dd3f
ebc57d0
960dd3f
ebc57d0
d001a5c
 
960dd3f
 
d001a5c
 
960dd3f
d001a5c
960dd3f
 
d001a5c
960dd3f
1a4e9af
ebc57d0
960dd3f
ebc57d0
d001a5c
81105ee
d001a5c
960dd3f
 
d001a5c
 
960dd3f
81105ee
960dd3f
 
81105ee
d001a5c
 
960dd3f
 
 
81105ee
 
ebc57d0
b4f9d22
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import os
import requests
import pandas as pd
import gradio as gr
from crew import run_crew          # ← your multi-agent logic

API_URL = "https://agents-course-unit4-scoring.hf.space"


# ─── AGENT WRAPPER ──────────────────────────────────────────────────────────────
class CrewAgent:
    def __call__(self, question: str) -> str:
        return run_crew(question, file_path="")   # It MUST use your real crew logic!

agent = CrewAgent()


# ─── MAIN HANDLER ───────────────────────────────────────────────────────────────
def evaluate_and_submit(username: str):
    """Runs the agent on benchmark questions and submits answers, with debug logging."""
    username = username.strip()
    if not username:
        return "❌  Please enter your Hugging Face username.", None

    space_id   = os.getenv("SPACE_ID", "")
    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else ""

    # 1) Fetch questions
    try:
        questions = requests.get(f"{API_URL}/questions", timeout=30).json()
    except Exception as e:
        return f"❌  Failed to fetch questions: {e}", None

    # 2) Answer questions, logging every result
    answers, log = [], []
    for item in questions:
        qid, qtxt = item["task_id"], item["question"]
        try:
            ans = agent(qtxt)
            # Debug print:
            print(f"QID: {qid} | Q: {qtxt[:60]}... | Agent Answer: {ans}")
            # Add warning if placeholder detected
            if ans.strip().lower() in ["this is a default answer.", "", "n/a"]:
                print(f"⚠️ Warning: Agent returned a default/empty answer for QID {qid}.")
        except Exception as e:
            ans = f"AGENT ERROR: {e}"
            print(f"⚠️ Agent error on QID {qid}: {e}")
        answers.append({"task_id": qid, "submitted_answer": ans})
        log.append({"Task ID": qid, "Question": qtxt, "Answer": ans})

    # Show part of the DataFrame in the console for debugging
    try:
        df = pd.DataFrame(log)
        print("=== First 5 results ===")
        print(df.head())
    except Exception as e:
        print(f"DataFrame print error: {e}")

    if not answers:
        return "⚠️  No answers generated.", pd.DataFrame(log)

    # 3) Submit
    try:
        resp = requests.post(
            f"{API_URL}/submit",
            json={"username": username, "agent_code": agent_code, "answers": answers},
            timeout=60,
        )
        resp.raise_for_status()
        data = resp.json()
        status = (
            "βœ…  Submission successful!\n"
            f"Score: {data.get('score')} % "
            f"({data.get('correct_count')}/{data.get('total_attempted')})\n"
            f"Message: {data.get('message')}"
        )
    except Exception as e:
        status = f"❌  Submission failed: {e}"

    return status, pd.DataFrame(log)



# ─── GRADIO UI ──────────────────────────────────────────────────────────────────
demo = gr.Interface(
    fn=evaluate_and_submit,
    inputs=gr.Textbox(label="Hugging Face username", placeholder="e.g. john-doe"),
    outputs=[
        gr.Textbox(label="Status", lines=6),
        gr.DataFrame(label="Submitted Answers"),
    ],
    title="GAIA Agent Submission",
    description=(
        "Enter your Hugging Face username and click **Run Evaluation & Submit**. "
        "The app will run your agent on all benchmark questions and send the answers."
    ),
)

if __name__ == "__main__":
    demo.launch()