File size: 4,469 Bytes
45a8d9b
 
 
 
 
4717d11
c6ca9a2
45a8d9b
3c4371f
c6ca9a2
45a8d9b
 
c6ca9a2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45a8d9b
 
c6ca9a2
45a8d9b
 
c6ca9a2
45a8d9b
c6ca9a2
45a8d9b
 
 
c6ca9a2
 
45a8d9b
c6ca9a2
 
45a8d9b
c6ca9a2
 
45a8d9b
c6ca9a2
 
 
 
 
 
 
 
45a8d9b
 
 
c6ca9a2
45a8d9b
 
c6ca9a2
 
 
45a8d9b
 
 
c6ca9a2
 
45a8d9b
c6ca9a2
45a8d9b
c6ca9a2
 
 
 
45a8d9b
c6ca9a2
45a8d9b
c6ca9a2
45a8d9b
c6ca9a2
45a8d9b
 
c6ca9a2
45a8d9b
 
c6ca9a2
 
 
45a8d9b
c6ca9a2
 
45a8d9b
c6ca9a2
 
 
 
 
 
 
45a8d9b
c6ca9a2
45a8d9b
 
 
c6ca9a2
45a8d9b
c6ca9a2
45a8d9b
c6ca9a2
 
 
 
 
 
45a8d9b
 
c6ca9a2
 
45a8d9b
c6ca9a2
45a8d9b
 
c6ca9a2
 
 
 
 
45a8d9b
 
c6ca9a2
45a8d9b
c6ca9a2
 
45a8d9b
c6ca9a2
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
import os
import gradio as gr
import requests
import pandas as pd
from smolagents import CodeAgent, DuckDuckGoSearchTool, LiteLLMModel

# API URL for submission
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

# My agent class
class BasicAgent:
    def __init__(self):
        print("Starting agent...")
        
        # Get my HF token
        token = os.getenv("hf_token")  # ← Changed to lowercase!
        
        if not token:
            raise Exception("Token not found! Check secrets.")
        
        # Create the model
        self.model = LiteLLMModel(
            model_id="huggingface/meta-llama/Llama-3.2-3B-Instruct",
            api_key=token
        )
        
        # Add web search
        self.search_tool = DuckDuckGoSearchTool()
        
        # Create agent
        self.agent = CodeAgent(
            tools=[self.search_tool],
            model=self.model,
            additional_authorized_imports=["requests", "pandas", "numpy", "openpyxl", "xlrd"]
        )
        
        print("Agent ready!")
    
    def __call__(self, question: str) -> str:
        print(f"Question: {question[:50]}...")
        try:
            answer = self.agent.run(question)
            return str(answer)
        except Exception as e:
            return f"Error: {str(e)}"


def run_and_submit_all(profile: gr.OAuthProfile | None):
    """Main function to run the evaluation"""
    
    space_id = os.getenv("SPACE_ID")
    
    # Check if logged in
    if profile:
        username = profile.username
        print(f"Logged in as: {username}")
    else:
        return "Please login first!", None
    
    # URLs
    questions_url = f"{DEFAULT_API_URL}/questions"
    submit_url = f"{DEFAULT_API_URL}/submit"
    
    # Initialize agent
    print("Creating agent...")
    try:
        agent = BasicAgent()
    except Exception as e:
        return f"Error creating agent: {e}", None
    
    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
    
    # Get questions
    print("Getting questions...")
    try:
        response = requests.get(questions_url, timeout=15)
        response.raise_for_status()
        questions = response.json()
        print(f"Got {len(questions)} questions")
    except Exception as e:
        return f"Error getting questions: {e}", None
    
    # Answer questions
    print("Answering questions...")
    results = []
    answers = []
    
    for i, item in enumerate(questions):
        task_id = item.get("task_id")
        question = item.get("question")
        
        if not task_id or not question:
            continue
        
        print(f"Question {i+1}/{len(questions)}")
        
        try:
            answer = agent(question)
            answers.append({"task_id": task_id, "submitted_answer": answer})
            results.append({"Task ID": task_id, "Question": question, "Submitted Answer": answer})
        except Exception as e:
            print(f"Error on question {i+1}: {e}")
            results.append({"Task ID": task_id, "Question": question, "Submitted Answer": f"Error: {e}"})
    
    if not answers:
        return "No answers generated", pd.DataFrame(results)
    
    # Submit
    print(f"Submitting {len(answers)} answers...")
    submission = {
        "username": username,
        "agent_code": agent_code,
        "answers": answers
    }
    
    try:
        response = requests.post(submit_url, json=submission, timeout=60)
        response.raise_for_status()
        result = response.json()
        
        status = (
            f"Submission successful!\n"
            f"User: {result.get('username')}\n"
            f"Score: {result.get('score')}%\n"
            f"Correct: {result.get('correct_count')}/{result.get('total_attempted')}\n"
            f"Message: {result.get('message')}"
        )
        
        return status, pd.DataFrame(results)
    
    except Exception as e:
        return f"Submission failed: {e}", pd.DataFrame(results)


# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("# My Agent Evaluation")
    gr.Markdown("Login and click the button to run the evaluation.")
    
    gr.LoginButton()
    
    run_btn = gr.Button("Run Evaluation & Submit")
    
    status = gr.Textbox(label="Status", lines=5)
    table = gr.DataFrame(label="Results", wrap=True)
    
    run_btn.click(fn=run_and_submit_all, outputs=[status, table])

if __name__ == "__main__":
    print("Starting app...")
    demo.launch()