File size: 7,273 Bytes
10e9b7d
 
eccf8e4
3c4371f
68e474c
5d1ae57
68e474c
5d1ae57
 
 
 
de60934
68e474c
10e9b7d
e80aab9
3db6293
e80aab9
de60934
68e474c
 
 
de60934
 
 
 
 
68e474c
 
 
 
de60934
 
 
 
 
68e474c
 
 
 
 
 
 
de60934
 
 
 
 
68e474c
 
 
 
 
 
 
 
 
 
 
5d1ae57
 
 
eb72447
c86304f
 
5d1ae57
c86304f
fd95519
c86304f
7ce9151
 
387c727
fd95519
eb72447
 
5d1ae57
c86304f
68e474c
5d1ae57
 
 
 
eb72447
68e474c
 
 
 
 
 
 
de60934
68e474c
 
eb72447
68e474c
5d1ae57
 
 
68e474c
5d1ae57
 
 
 
 
 
 
 
68e474c
 
 
5d1ae57
31243f4
5d1ae57
eb72447
68e474c
5d1ae57
68e474c
 
5d1ae57
68e474c
 
 
5d1ae57
68e474c
 
 
 
eb72447
de60934
68e474c
5d1ae57
 
68e474c
5d1ae57
68e474c
5d1ae57
eb72447
5d1ae57
68e474c
4021bf3
68e474c
eb72447
68e474c
 
eb72447
 
 
 
 
7e4a06b
31243f4
 
e80aab9
68e474c
31243f4
5d1ae57
31243f4
eb72447
68e474c
36ed51a
3c4371f
68e474c
eccf8e4
eb72447
31243f4
 
7d65c66
eb72447
68e474c
 
 
 
e80aab9
7d65c66
 
5d1ae57
68e474c
5d1ae57
 
31243f4
68e474c
 
eb72447
 
 
31243f4
68e474c
 
 
 
 
eb72447
68e474c
31243f4
68e474c
 
 
 
 
 
 
 
 
e80aab9
eb72447
68e474c
 
 
7d65c66
68e474c
e80aab9
68e474c
 
 
e80aab9
68e474c
e80aab9
c86304f
7e4a06b
5d1ae57
68e474c
de60934
68e474c
de60934
68e474c
 
 
 
 
de60934
 
eb72447
 
3c4371f
68e474c
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
import os
import gradio as gr
import requests
import pandas as pd
import time
from dotenv import load_dotenv
from smolagents import CodeAgent, DuckDuckGoSearchTool, LiteLLMModel, tool

# Load environment variables
try:
    load_dotenv()
except Exception:
    pass

# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

# --- Tool Definitions ---

@tool
def reverse_text(text: str) -> str:
    """Reverses the given text string. Useful for tasks requiring backward reading.

    Args:
        text: The string to be reversed.
    """
    return text[::-1]

@tool
def calculator(expression: str) -> str:
    """Evaluates a mathematical expression (e.g., '2 * 3 + 5') safely.

    Args:
        expression: The mathematical expression string to evaluate.
    """
    try:
        return str(eval(expression, {"__builtins__": {}}))
    except Exception as e:
        return f"Error: {e}"

@tool
def download_file(task_id: str) -> str:
    """Downloads a file associated with a Task ID from the evaluation API.

    Args:
        task_id: The unique identifier for the task to fetch the file for.
    """
    try:
        api_url = "https://agents-course-unit4-scoring.hf.space"
        files_url = f"{api_url}/files/{task_id}"
        response = requests.get(files_url, timeout=30)
        response.raise_for_status()
        return response.text
    except Exception as e:
        return f"Error downloading file: {e}"

# --- Agent Definition ---

class GAIAAgent:
    def __init__(self, verbose=False):
        self.verbose = verbose
        
        # 1. GET GEMINI API KEY
        api_key = os.environ.get("GEMINI_API_KEY")
        if not api_key:
            print("ERROR: GEMINI_API_KEY not found in environment variables.")
        
        # 2. SET GEMINI MODEL ID
        # FIX: Use the specific stable version 'gemini-1.5-flash-001'
        # The API often rejects 'latest' or base aliases on the v1beta endpoint.
        model_id = "gemini/gemini-2.5-flash-preview-09-2025"
            
        if self.verbose:
            print(f"Initializing Agent with model: {model_id}")

        # 3. INITIALIZE LITELLM MODEL
        self.model = LiteLLMModel(
            model_id=model_id,
            api_key=api_key,
        )

        # Initialize Tools
        self.tools = [
            DuckDuckGoSearchTool(), 
            reverse_text, 
            calculator, 
            download_file
        ]

        # Authorized imports for the CodeAgent
        authorised_imports = [
            "requests", "bs4", "pandas", "numpy", "scipy", "matplotlib", 
            "seaborn", "sklearn", "nltk", "PIL", "cv2", "re", "math", "time"
        ]

        self.agent = CodeAgent(
            tools=self.tools,
            model=self.model, 
            add_base_tools=True,
            planning_interval=3,
            verbosity_level=2 if self.verbose else 0,
            additional_authorized_imports=authorised_imports
        )

    def _is_reversed_text(self, text):
        """Check if the text appears to be reversed"""
        return (text.strip().startswith(".") or 
                "rewsna" in text.lower() or 
                "esaelp" in text.lower())

    def __call__(self, question: str) -> str:
        """Process a question and return the answer"""
        
        # Handle reversed text logic manually before sending to agent
        if self._is_reversed_text(question):
            if self.verbose: print("Detected reversed text. Decoding...")
            question = question[::-1]

        # Basic Prompt Template
        prompt = f"""
        QUESTION: {question}
        
        INSTRUCTIONS:
        1. Solve the problem step-by-step using Python code.
        2. If you need information, search for it.
        3. If you need to calculate something, write code to do it.
        4. If you need a file, use download_file with the task_id provided.
        5. FINAL ANSWER: Print the final result strictly as the last line of your output.
        """

        try:
            # Run the agent
            answer = self.agent.run(prompt)
            return str(answer)
        except Exception as e:
            return f"Error processing question: {e}"

# --- Gradio Logic ---

def run_and_submit_all(profile: gr.OAuthProfile | None, sample_size=0):
    # Determine space ID
    space_id = os.getenv("SPACE_ID") or "generic/agent-runner"
    
    # Check login
    if profile is None:
        return "Please Login to Hugging Face first!", pd.DataFrame()
    
    username = profile.username
    api_url = DEFAULT_API_URL
    questions_url = f"{api_url}/questions"
    submit_url = f"{api_url}/submit"

    # Initialize Agent
    try:
        agent = GAIAAgent(verbose=True)
    except Exception as e:
        return f"Error initializing agent: {e}", pd.DataFrame()

    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"

    # Fetch Questions
    try:
        print("Fetching questions...")
        response = requests.get(questions_url, timeout=15)
        questions_data = response.json()
    except Exception as e:
        return f"Error fetching questions: {e}", pd.DataFrame()

    # Optional Sampling
    if sample_size > 0:
        questions_data = questions_data[:int(sample_size)]

    results_log = []
    answers_payload = []

    print(f"Running on {len(questions_data)} questions...")
    
    for i, item in enumerate(questions_data):
        task_id = item.get("task_id")
        q_text = item.get("question")
        
        if not task_id or not q_text:
            continue

        try:
            print(f"--- Task {i+1} ---")
            ans = agent(q_text)
            answers_payload.append({"task_id": task_id, "submitted_answer": ans})
            results_log.append({"Task ID": task_id, "Question": q_text, "Answer": ans})
            
            # Rate limit protection (Wait 2s between questions)
            time.sleep(2) 
        except Exception as e:
            results_log.append({"Task ID": task_id, "Error": str(e)})

    # Submit
    submission = {
        "username": username,
        "agent_code": agent_code,
        "answers": answers_payload
    }
    
    try:
        print("Submitting answers...")
        r = requests.post(submit_url, json=submission)
        r.raise_for_status()
        return f"Success! {r.json()}", pd.DataFrame(results_log)
    except Exception as e:
        return f"Submission failed: {e}", pd.DataFrame(results_log)

def test_single(question):
    agent = GAIAAgent(verbose=True)
    return agent(question)

# --- UI ---
with gr.Blocks() as demo:
    gr.Markdown("# GAIA Agent Runner (Gemini Powered)")
    gr.LoginButton()
    
    with gr.Tab("Test"):
        q_in = gr.Textbox(label="Enter Question")
        btn_test = gr.Button("Run")
        out_test = gr.Textbox(label="Answer")
        btn_test.click(test_single, q_in, out_test)
        
    with gr.Tab("Evaluate"):
        slider = gr.Slider(0, 20, value=0, label="Sample Size (0=All)")
        btn_eval = gr.Button("Run & Submit")
        out_status = gr.Textbox(label="Status")
        out_df = gr.DataFrame(label="Results")
        
        btn_eval.click(run_and_submit_all, inputs=[slider], outputs=[out_status, out_df])

if __name__ == "__main__":
    demo.launch()