File size: 4,180 Bytes
10e9b7d
 
eccf8e4
3c4371f
91d888c
 
10e9b7d
e80aab9
3db6293
e80aab9
91d888c
31243f4
 
91d888c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31243f4
91d888c
 
 
 
 
 
 
 
 
4021bf3
91d888c
 
3c4371f
7e4a06b
91d888c
7e4a06b
7d65c66
3c4371f
7e4a06b
31243f4
 
e80aab9
91d888c
31243f4
 
 
 
91d888c
36ed51a
3c4371f
7d65c66
eccf8e4
31243f4
7d65c66
31243f4
7d65c66
91d888c
e80aab9
91d888c
7d65c66
 
31243f4
 
 
91d888c
31243f4
7d65c66
 
 
31243f4
91d888c
31243f4
91d888c
7d65c66
e80aab9
7d65c66
e80aab9
 
31243f4
e80aab9
 
91d888c
 
e80aab9
91d888c
7d65c66
91d888c
e80aab9
91d888c
e80aab9
91d888c
7e4a06b
91d888c
 
 
e80aab9
91d888c
e80aab9
 
91d888c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import os
import gradio as gr
import requests
import pandas as pd
# Import smolagents components
from smolagents import CodeAgent, DuckDuckGoSearchTool, HfApiModel, VisitWebpageTool

# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

# --- Robust Agent Definition ---
class BasicAgent:
    def __init__(self):
        print("Initializing High-Performance CodeAgent...")
        # We use Qwen 2.5 72B because it excels at the logic needed for GAIA tasks
        self.model = HfApiModel(model_id="Qwen/Qwen2.5-72B-Instruct")
        
        # Tools are the agent's 'hands'
        self.tools = [
            DuckDuckGoSearchTool(), 
            VisitWebpageTool()
        ]
        
        # The CodeAgent allows the LLM to write and run Python to solve math/sorting
        self.agent = CodeAgent(
            tools=self.tools,
            model=self.model,
            add_base_tools=True  # This gives it built-in tools for things like math
        )

    def __call__(self, question: str) -> str:
        print(f"Agent received question: {question[:100]}...")
        try:
            # We add a small prompt hint to ensure the answer is concise for the grader
            prompt = f"{question}\n\nFinal Answer Requirement: Provide ONLY the specific answer requested (number, name, or list) with no extra text."
            result = self.agent.run(prompt)
            return str(result)
        except Exception as e:
            print(f"Agent Error: {e}")
            return f"Error: {e}"

def run_and_submit_all(profile: gr.OAuthProfile | None):
    space_id = os.getenv("SPACE_ID") 

    if profile:
        username = f"{profile.username}"
    else:
        return "Please Login to Hugging Face with the button.", None

    api_url = DEFAULT_API_URL
    questions_url = f"{api_url}/questions"
    submit_url = f"{api_url}/submit"

    # 1. Instantiate our new Agent
    try:
        agent = BasicAgent()
    except Exception as e:
        return f"Error initializing agent: {e}", None
        
    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"

    # 2. Fetch Questions
    try:
        response = requests.get(questions_url, timeout=15)
        response.raise_for_status()
        questions_data = response.json()
    except Exception as e:
        return f"Error fetching questions: {e}", None

    # 3. Run Agent
    results_log = []
    answers_payload = []
    for item in questions_data:
        task_id = item.get("task_id")
        question_text = item.get("question")
        
        try:
            submitted_answer = agent(question_text)
            answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
        except Exception as e:
            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})

    # 4. Prepare & Submit
    submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
    try:
        response = requests.post(submit_url, json=submission_data, timeout=60)
        response.raise_for_status()
        result_data = response.json()
        final_status = (
            f"Submission Successful!\n"
            f"User: {result_data.get('username')}\n"
            f"Overall Score: {result_data.get('score', 'N/A')}% \n"
            f"Message: {result_data.get('message')}"
        )
        return final_status, pd.DataFrame(results_log)
    except Exception as e:
        return f"Submission Failed: {e}", pd.DataFrame(results_log)

# --- Gradio Interface ---
with gr.Blocks() as demo:
    gr.Markdown("# 🚀 Professional Agent Evaluation Runner")
    gr.LoginButton()
    run_button = gr.Button("Run Evaluation & Submit All Answers", variant="primary")
    status_output = gr.Textbox(label="Run Status / Submission Result", lines=5)
    results_table = gr.DataFrame(label="Live Results Trace", wrap=True)

    run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])

if __name__ == "__main__":
    demo.launch()