File size: 3,928 Bytes
3f0e339
36b0556
8ff3469
 
28b77fd
e0509e3
 
8ff3469
d267ada
8098f7a
9dbc28b
 
e0509e3
 
 
8098f7a
e0509e3
 
8f41e23
8098f7a
e0509e3
8f41e23
e0509e3
28b77fd
e0509e3
7f756b6
e0509e3
bf1ac9d
8098f7a
36b0556
e0509e3
28b77fd
e0509e3
 
8098f7a
e0509e3
 
bf1ac9d
8098f7a
bf1ac9d
 
e0509e3
 
9e55565
8ff3469
9dbc28b
8ff3469
28b77fd
e0509e3
28b77fd
 
8ff3469
28b77fd
8ff3469
 
28b77fd
e0509e3
8ff3469
 
 
e0509e3
28b77fd
8ff3469
 
bf1ac9d
8ff3469
 
e0509e3
8ff3469
e0509e3
8098f7a
 
 
 
 
 
8ff3469
 
e0509e3
8ff3469
e0509e3
 
 
 
 
8ff3469
9b5b26a
8ff3469
 
e0509e3
8ff3469
28b77fd
e0509e3
 
 
 
 
8ff3469
e0509e3
9b5b26a
e0509e3
8ff3469
e0509e3
8ff3469
e0509e3
8098f7a
 
 
 
 
 
 
 
 
 
 
cb53552
8ff3469
 
8098f7a
 
28b77fd
8098f7a
28b77fd
e0509e3
8098f7a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import os
import gradio as gr
import requests
import pandas as pd
from smolagents import CodeAgent, DuckDuckGoSearchTool, OpenAIServerModel

# Constants
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

# Basic Agent
class BasicAgent:
    def __init__(self):
        OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
        if not OPENAI_API_KEY:
            raise ValueError(
                "OPENAI_API_KEY not set. Add it in HF → Settings → Secrets."
            )

        model = OpenAIServerModel(
            model_id="gpt-4o-mini",
            api_key=OPENAI_API_KEY
        )

        search_tool = DuckDuckGoSearchTool()

        self.agent = CodeAgent(
            model=model,
            tools=[search_tool],
            max_steps=3  # prevents infinite loops
        )

    def __call__(self, question: str) -> str:
        return self.agent.run(question)

# Run + Submit
def run_and_submit_all(profile: gr.OAuthProfile | None):
    space_id = os.getenv("SPACE_ID")
    if not profile:
        return "Please login first.", None
    username = profile.username

    questions_url = f"{DEFAULT_API_URL}/questions"
    submit_url = f"{DEFAULT_API_URL}/submit"

    try:
        agent = BasicAgent()
    except Exception as e:
        return f"Error initializing agent: {e}", None

    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"

    try:
        response = requests.get(questions_url, timeout=15)
        response.raise_for_status()
        questions_data = response.json()
    except Exception as e:
        return f"Error fetching questions: {e}", None

    results_log = []
    answers_payload = []

    for item in questions_data:
        task_id = item.get("task_id")
        question_text = item.get("question")
        if not task_id or not question_text:
            continue
        try:
            answer = agent(question_text)
        except Exception as e:
            answer = f"AGENT ERROR: {e}"
        answers_payload.append({"task_id": task_id, "submitted_answer": answer})
        results_log.append({
            "Task ID": task_id,
            "Question": question_text,
            "Submitted Answer": answer
        })

    if not answers_payload:
        return "No answers generated.", pd.DataFrame(results_log)

    submission_data = {
        "username": username.strip(),
        "agent_code": agent_code,
        "answers": answers_payload
    }

    try:
        response = requests.post(submit_url, json=submission_data, timeout=60)
        response.raise_for_status()
        result = response.json()
        final_status = (
            f"Submission Successful!\n"
            f"User: {result.get('username')}\n"
            f"Score: {result.get('score', 'N/A')}% "
            f"({result.get('correct_count', '?')}/"
            f"{result.get('total_attempted', '?')})\n"
            f"Message: {result.get('message', '')}"
        )
        return final_status, pd.DataFrame(results_log)
    except Exception as e:
        return f"Submission failed: {e}", pd.DataFrame(results_log)

# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("# 🤖 Basic Agent Evaluation Runner")
    gr.Markdown("""
    **Instructions**
    1. Login with your Hugging Face account.
    2. Click **Run Evaluation & Submit All Answers**.
    3. Wait for the agent to finish.

    **Requirements**
    - Uses **OpenAI (gpt-4o-mini)**
    - Requires `OPENAI_API_KEY` in HF Space Secrets
    - Agent is **step-limited** (max 3 steps)
    """)

    gr.LoginButton()
    run_button = gr.Button("Run Evaluation & Submit All Answers")
    status_output = gr.Textbox(label="Run Status / Submission Result", lines=6, interactive=False)
    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)

    run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])

if __name__ == "__main__":
    demo.launch(debug=True, share=False)