File size: 3,922 Bytes
10e9b7d
 
eccf8e4
3c4371f
2dd087e
10e9b7d
e80aab9
3db6293
e80aab9
4bcd6d4
 
31243f4
2dd087e
 
8a36d69
2dd087e
 
 
 
 
 
 
 
8a36d69
4bcd6d4
 
 
 
 
 
2dd087e
4bcd6d4
 
31243f4
4bcd6d4
2dd087e
1552a23
 
2dd087e
 
1552a23
4bcd6d4
 
 
 
4021bf3
4bcd6d4
 
3c4371f
4bcd6d4
7d65c66
3c4371f
4bcd6d4
7e4a06b
31243f4
 
e80aab9
eccf8e4
4bcd6d4
2dd087e
31243f4
7d65c66
31243f4
e80aab9
4bcd6d4
 
 
 
 
 
2dd087e
 
4bcd6d4
2dd087e
7d65c66
 
31243f4
4bcd6d4
2dd087e
 
 
 
 
4bcd6d4
 
 
 
 
31243f4
e80aab9
 
2dd087e
4bcd6d4
e80aab9
4bcd6d4
 
7d65c66
2dd087e
e80aab9
4bcd6d4
60a3312
2dd087e
7e4a06b
60a3312
2dd087e
4bcd6d4
e80aab9
42aeb7b
e80aab9
 
1552a23
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import os
import gradio as gr
import requests
import pandas as pd
from smolagents import CodeAgent, DuckDuckGoSearchTool, LiteLLMModel, VisitWebpageTool

# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

# --- Agent Definition ---
class AgentArchitect:
    def __init__(self):
        # SECURE: Fetches the OpenAI API key from your Space Secrets
        openai_api_key = os.getenv("OPENAI_API_KEY")
        
        if not openai_api_key:
            print("CRITICAL: OPENAI_API_KEY is missing. Please add it to your Space Secrets!")

        # Bypassing Hugging Face billing completely.
        # We use gpt-4o-mini because it is highly capable at coding and very cost-effective.
        self.model = LiteLLMModel(
            model_id="gpt-4o-mini",
            api_key=openai_api_key
        )
        
        self.tools = [DuckDuckGoSearchTool(), VisitWebpageTool()]
        
        self.agent = CodeAgent(
            tools=self.tools,
            model=self.model,
            add_base_tools=True 
        )

    def __call__(self, question: str) -> str:
        try:
            # Enforce Exact Match scoring formatting
            prompt = (
                f"{question}\n\n"
                f"Instructions: Think step-by-step. Solve the problem using your tools. "
                f"Provide ONLY the final, concise answer."
            )
            result = self.agent.run(prompt)
            return str(result)
        except Exception as e:
            return f"Error: {e}"

def run_and_submit_all(profile: gr.OAuthProfile | None):
    space_id = os.getenv("SPACE_ID") 

    if not profile:
        return "Please Login to Hugging Face with the button.", None

    username = profile.username
    api_url = DEFAULT_API_URL
    questions_url = f"{api_url}/questions"
    submit_url = f"{api_url}/submit"

    try:
        agent_instance = AgentArchitect()
        
        response = requests.get(questions_url, timeout=15)
        response.raise_for_status()
        questions_data = response.json()

        results_log = []
        answers_payload = []
        
        for item in questions_data:
            task_id = item.get("task_id")
            question_text = item.get("question")
            
            # Agent logic with OpenAI's brain
            submitted_answer = agent_instance(question_text)
            
            answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})

        agent_code_link = f"https://huggingface.co/spaces/{space_id}/tree/main"
        submission_data = {
            "username": username.strip(), 
            "agent_code": agent_code_link, 
            "answers": answers_payload
        }
        
        submit_response = requests.post(submit_url, json=submission_data, timeout=60)
        submit_response.raise_for_status()
        result_data = submit_response.json()
        
        final_status = (
            f"Submission Successful!\n"
            f"User: {result_data.get('username')}\n"
            f"Final Score: {result_data.get('score')}% \n"
            f"Message: {result_data.get('message')}"
        )
        return final_status, pd.DataFrame(results_log)
        
    except Exception as e:
        return f"Submission Failed: {e}", None

# --- Gradio UI ---
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 🚀 Professional Agent Evaluator (OpenAI Edition)")
    gr.LoginButton()
    run_button = gr.Button("Run Evaluation & Submit All Answers", variant="primary")
    status_output = gr.Textbox(label="Leaderboard Status", lines=4)
    results_table = gr.DataFrame(label="Agent Reasoning Trace", wrap=True)

    run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])

if __name__ == "__main__":
    demo.launch()