File size: 5,157 Bytes
126c45b
 
 
3c4371f
10e9b7d
e80aab9
3db6293
126c45b
e80aab9
126c45b
 
 
 
 
 
 
78b82b6
31243f4
126c45b
 
5def130
126c45b
 
 
 
 
 
 
eebecd0
126c45b
 
4f72328
126c45b
5def130
126c45b
 
4f72328
126c45b
 
 
 
 
78b82b6
126c45b
 
78b82b6
126c45b
 
 
 
 
78b82b6
126c45b
 
4f72328
 
126c45b
 
 
 
 
 
 
 
 
 
 
 
4f72328
126c45b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e80aab9
 
 
126c45b
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import os
import gradio as gr
import requests
import pandas as pd

# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
HF_TOKEN = os.getenv("HF_TOKEN")  # Make sure your HF read token is set in environment variables

# --- Gaia Agent using Qwen API ---
class GaiaAgentQwen:
    def __init__(self, model="Qwen/Qwen2.5-Coder-32B-Instruct"):
        self.model = model
        self.api_url = f"https://api-inference.huggingface.co/models/{model}"
        self.headers = {"Authorization": f"Bearer {HF_TOKEN}"}
        print(f"GaiaAgentQwen initialized with model {model}")

    def __call__(self, question: str) -> str:
        prompt = f"Answer the following question concisely and correctly:\n{question}"
        payload = {"inputs": prompt, "options": {"wait_for_model": True}}
        try:
            response = requests.post(self.api_url, headers=self.headers, json=payload, timeout=60)
            response.raise_for_status()
            data = response.json()
            if isinstance(data, list) and "generated_text" in data[0]:
                return data[0]["generated_text"]
            else:
                return str(data)  # fallback
        except Exception as e:
            print(f"Error calling HF Inference API: {e}")
            return f"API ERROR: {e}"

# --- Main function ---
def run_and_submit_all(profile: gr.OAuthProfile | None):
    api_url = DEFAULT_API_URL
    space_id = os.getenv("SPACE_ID") or "unknown-space"

    username = profile.username if profile else "anonymous"
    if profile:
        print(f"User logged in: {username}")
    else:
        print("User not logged in.")

    questions_url = f"{api_url}/questions"
    submit_url = f"{api_url}/submit"

    # Instantiate the Gaia agent
    try:
        agent = GaiaAgentQwen()
    except Exception as e:
        return f"Error initializing agent: {e}", None

    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
    print(f"Agent code repo: {agent_code}")

    # Fetch questions
    try:
        print(f"Fetching questions from: {questions_url}")
        response = requests.get(questions_url, timeout=10)
        response.raise_for_status()
        questions_data = response.json()
        if not questions_data:
            return "Fetched questions list is empty or invalid format.", None
        print(f"Fetched {len(questions_data)} questions.")
    except Exception as e:
        return f"Error fetching questions: {e}", None

    # Run agent on questions
    results_log = []
    answers_payload = []
    print(f"Running agent on {len(questions_data)} questions...")
    for item in questions_data:
        task_id = item.get("task_id")
        question_text = item.get("question", "")
        if not task_id or not question_text:
            print(f"Skipping invalid question: {item}")
            continue
        try:
            answer = agent(question_text)
            answers_payload.append({"task_id": task_id, "submitted_answer": answer})
            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": answer})
        except Exception as e:
            print(f"Error running agent on task {task_id}: {e}")
            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})

    if not answers_payload:
        return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)

    # Submit answers
    submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
    try:
        print(f"Submitting {len(answers_payload)} answers for user '{username}'...")
        response = requests.post(submit_url, json=submission_data, timeout=20)
        response.raise_for_status()
        submission_result = response.json()
        print(f"Submission result: {submission_result}")
        return "Submission completed successfully!", pd.DataFrame(results_log)
    except Exception as e:
        return f"Error submitting answers: {e}", pd.DataFrame(results_log)

# --- Build Gradio Interface ---
with gr.Blocks() as demo:
    gr.Markdown("# Gaia Agent Evaluation Runner")
    gr.Markdown("""
    **Instructions:**
    1. Clone this space, then modify the code to define your agent's logic.
    2. Log in to your Hugging Face account using the button below.
    3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see results.

    **Note:** Using the HF API can take a few seconds per question.
    """)
    login_btn = gr.LoginButton()
    run_button = gr.Button("Run Evaluation & Submit All Answers")
    status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)

    run_button.click(
        fn=run_and_submit_all,
        inputs=[login_btn],
        outputs=[status_output, results_table]
    )

if __name__ == "__main__":
    print("\n" + "-"*30 + " App Starting " + "-"*30)
    print("Launching Gradio Interface for Gaia Agent Evaluation...")
    demo.launch(debug=True, share=False)