vissutagunawan commited on
Commit
b419a9b
ยท
verified ยท
1 Parent(s): 81917a3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +423 -64
app.py CHANGED
@@ -4,31 +4,336 @@ import requests
4
  import inspect
5
  import pandas as pd
6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  # (Keep Constants as is)
8
  # --- Constants ---
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
 
11
- # --- Basic Agent Definition ---
12
- # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
13
- class BasicAgent:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  def __init__(self):
15
- print("BasicAgent initialized.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  def __call__(self, question: str) -> str:
17
- print(f"Agent received question (first 50 chars): {question[:50]}...")
18
- fixed_answer = "This is a default answer."
19
- print(f"Agent returning fixed answer: {fixed_answer}")
20
- return fixed_answer
 
 
21
 
22
- def run_and_submit_all( profile: gr.OAuthProfile | None):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  """
24
- Fetches all questions, runs the BasicAgent on them, submits all answers,
25
  and displays the results.
26
  """
27
  # --- Determine HF Space Runtime URL and Repo URL ---
28
  space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
29
 
30
  if profile:
31
- username= f"{profile.username}"
32
  print(f"User logged in: {username}")
33
  else:
34
  print("User not logged in.")
@@ -38,18 +343,22 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
38
  questions_url = f"{api_url}/questions"
39
  submit_url = f"{api_url}/submit"
40
 
41
- # 1. Instantiate Agent ( modify this part to create your agent)
42
  try:
43
- agent = BasicAgent()
 
 
44
  except Exception as e:
45
- print(f"Error instantiating agent: {e}")
46
- return f"Error initializing agent: {e}", None
47
- # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
 
 
48
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
49
- print(agent_code)
50
 
51
  # 2. Fetch Questions
52
- print(f"Fetching questions from: {questions_url}")
53
  try:
54
  response = requests.get(questions_url, timeout=15)
55
  response.raise_for_status()
@@ -57,61 +366,87 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
57
  if not questions_data:
58
  print("Fetched questions list is empty.")
59
  return "Fetched questions list is empty or invalid format.", None
60
- print(f"Fetched {len(questions_data)} questions.")
61
  except requests.exceptions.RequestException as e:
62
- print(f"Error fetching questions: {e}")
63
  return f"Error fetching questions: {e}", None
64
  except requests.exceptions.JSONDecodeError as e:
65
- print(f"Error decoding JSON response from questions endpoint: {e}")
66
  print(f"Response text: {response.text[:500]}")
67
  return f"Error decoding server response for questions: {e}", None
68
  except Exception as e:
69
- print(f"An unexpected error occurred fetching questions: {e}")
70
  return f"An unexpected error occurred fetching questions: {e}", None
71
 
72
- # 3. Run your Agent
73
  results_log = []
74
  answers_payload = []
75
- print(f"Running agent on {len(questions_data)} questions...")
76
- for item in questions_data:
 
77
  task_id = item.get("task_id")
78
  question_text = item.get("question")
79
  if not task_id or question_text is None:
80
- print(f"Skipping item with missing task_id or question: {item}")
81
  continue
 
 
82
  try:
83
  submitted_answer = agent(question_text)
84
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
85
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
 
 
 
 
 
86
  except Exception as e:
87
- print(f"Error running agent on task {task_id}: {e}")
88
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
 
 
 
 
 
89
 
90
  if not answers_payload:
91
- print("Agent did not produce any answers to submit.")
92
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
93
 
94
  # 4. Prepare Submission
95
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
96
- status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
97
  print(status_update)
98
 
99
  # 5. Submit
100
- print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
101
  try:
102
  response = requests.post(submit_url, json=submission_data, timeout=60)
103
  response.raise_for_status()
104
  result_data = response.json()
 
 
 
 
 
105
  final_status = (
106
- f"Submission Successful!\n"
107
- f"User: {result_data.get('username')}\n"
108
- f"Overall Score: {result_data.get('score', 'N/A')}% "
109
- f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
110
- f"Message: {result_data.get('message', 'No message received.')}"
111
  )
112
- print("Submission successful.")
 
 
 
 
 
 
113
  results_df = pd.DataFrame(results_log)
114
  return final_status, results_df
 
115
  except requests.exceptions.HTTPError as e:
116
  error_detail = f"Server responded with status {e.response.status_code}."
117
  try:
@@ -119,52 +454,71 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
119
  error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
120
  except requests.exceptions.JSONDecodeError:
121
  error_detail += f" Response: {e.response.text[:500]}"
122
- status_message = f"Submission Failed: {error_detail}"
123
  print(status_message)
124
  results_df = pd.DataFrame(results_log)
125
  return status_message, results_df
126
  except requests.exceptions.Timeout:
127
- status_message = "Submission Failed: The request timed out."
128
  print(status_message)
129
  results_df = pd.DataFrame(results_log)
130
  return status_message, results_df
131
  except requests.exceptions.RequestException as e:
132
- status_message = f"Submission Failed: Network error - {e}"
133
  print(status_message)
134
  results_df = pd.DataFrame(results_log)
135
  return status_message, results_df
136
  except Exception as e:
137
- status_message = f"An unexpected error occurred during submission: {e}"
138
  print(status_message)
139
  results_df = pd.DataFrame(results_log)
140
  return status_message, results_df
141
 
142
 
143
  # --- Build Gradio Interface using Blocks ---
144
- with gr.Blocks() as demo:
145
- gr.Markdown("# Basic Agent Evaluation Runner")
146
  gr.Markdown(
147
  """
148
- **Instructions:**
149
 
150
- 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
151
- 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
152
- 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
 
 
 
 
153
 
 
 
 
 
 
 
 
 
154
  ---
155
- **Disclaimers:**
156
- Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
157
- This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
158
  """
159
  )
160
 
161
  gr.LoginButton()
162
 
163
- run_button = gr.Button("Run Evaluation & Submit All Answers")
164
 
165
- status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
166
- # Removed max_rows=10 from DataFrame constructor
167
- results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
 
 
 
 
 
 
 
 
 
168
 
169
  run_button.click(
170
  fn=run_and_submit_all,
@@ -172,25 +526,30 @@ with gr.Blocks() as demo:
172
  )
173
 
174
  if __name__ == "__main__":
175
- print("\n" + "-"*30 + " App Starting " + "-"*30)
 
 
 
176
  # Check for SPACE_HOST and SPACE_ID at startup for information
177
  space_host_startup = os.getenv("SPACE_HOST")
178
- space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
179
 
180
  if space_host_startup:
181
  print(f"โœ… SPACE_HOST found: {space_host_startup}")
182
- print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
183
  else:
184
  print("โ„น๏ธ SPACE_HOST environment variable not found (running locally?).")
185
 
186
- if space_id_startup: # Print repo URLs if SPACE_ID is found
187
  print(f"โœ… SPACE_ID found: {space_id_startup}")
188
- print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
189
- print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
190
  else:
191
- print("โ„น๏ธ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
192
 
193
- print("-"*(60 + len(" App Starting ")) + "\n")
 
 
 
194
 
195
- print("Launching Gradio Interface for Basic Agent Evaluation...")
196
  demo.launch(debug=True, share=False)
 
4
  import inspect
5
  import pandas as pd
6
 
7
+ # smolagents imports
8
+ from smolagents import CodeAgent, DuckDuckGoSearchTool, HfApiModel, tool
9
+ import re
10
+ from typing import Optional, Union, Any
11
+ import json
12
+ import csv
13
+ import io
14
+ import math
15
+ import statistics
16
+
17
+ # Additional imports for custom tools
18
+ import base64
19
+ from urllib.parse import urlparse
20
+ import mimetypes
21
+
22
  # (Keep Constants as is)
23
  # --- Constants ---
24
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
25
 
26
+ # --- Custom Tools for GAIA Tasks ---
27
+
28
+ @tool
29
+ def visit_webpage(url: str) -> str:
30
+ """Visits a webpage at the given URL and returns its content as text.
31
+
32
+ Args:
33
+ url: The URL of the webpage to visit
34
+
35
+ Returns:
36
+ The content of the webpage as text, or an error message if the request fails
37
+ """
38
+ try:
39
+ import requests
40
+ from bs4 import BeautifulSoup
41
+
42
+ headers = {
43
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
44
+ }
45
+
46
+ response = requests.get(url, headers=headers, timeout=10)
47
+ response.raise_for_status()
48
+
49
+ soup = BeautifulSoup(response.content, 'html.parser')
50
+
51
+ # Remove script and style elements
52
+ for script in soup(["script", "style"]):
53
+ script.decompose()
54
+
55
+ # Get text content
56
+ text = soup.get_text()
57
+
58
+ # Clean up text
59
+ lines = (line.strip() for line in text.splitlines())
60
+ chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
61
+ text = ' '.join(chunk for chunk in chunks if chunk)
62
+
63
+ # Limit text length to avoid token limits
64
+ if len(text) > 8000:
65
+ text = text[:8000] + "... [Content truncated]"
66
+
67
+ return text
68
+
69
+ except Exception as e:
70
+ return f"Error visiting webpage: {str(e)}"
71
+
72
+ @tool
73
+ def calculate_math(expression: str) -> str:
74
+ """Safely evaluates mathematical expressions and performs calculations.
75
+
76
+ Args:
77
+ expression: A mathematical expression to evaluate (e.g., "2+2", "sqrt(16)", "log(100)")
78
+
79
+ Returns:
80
+ The result of the calculation or an error message
81
+ """
82
+ try:
83
+ import math
84
+ import re
85
+
86
+ # Clean the expression
87
+ expression = expression.strip()
88
+
89
+ # Replace common mathematical functions
90
+ expression = re.sub(r'\blog\b', 'math.log10', expression)
91
+ expression = re.sub(r'\bln\b', 'math.log', expression)
92
+ expression = re.sub(r'\bsqrt\b', 'math.sqrt', expression)
93
+ expression = re.sub(r'\bsin\b', 'math.sin', expression)
94
+ expression = re.sub(r'\bcos\b', 'math.cos', expression)
95
+ expression = re.sub(r'\btan\b', 'math.tan', expression)
96
+ expression = re.sub(r'\babs\b', 'abs', expression)
97
+ expression = re.sub(r'\bpi\b', 'math.pi', expression)
98
+ expression = re.sub(r'\be\b', 'math.e', expression)
99
+
100
+ # Define safe functions for eval
101
+ safe_dict = {
102
+ "__builtins__": {},
103
+ "math": math,
104
+ "abs": abs,
105
+ "round": round,
106
+ "min": min,
107
+ "max": max,
108
+ "sum": sum,
109
+ "len": len,
110
+ "pow": pow,
111
+ }
112
+
113
+ result = eval(expression, safe_dict)
114
+ return str(result)
115
+
116
+ except Exception as e:
117
+ return f"Error in calculation: {str(e)}"
118
+
119
+ @tool
120
+ def analyze_data(data: str, operation: str = "summary") -> str:
121
+ """Analyzes numerical data and performs statistical operations.
122
+
123
+ Args:
124
+ data: Comma-separated numerical data or JSON array
125
+ operation: Type of analysis ("summary", "mean", "median", "std", "count", "sum", "min", "max")
126
+
127
+ Returns:
128
+ The result of the data analysis
129
+ """
130
+ try:
131
+ import json
132
+ import statistics
133
+
134
+ # Parse the data
135
+ if data.startswith('[') and data.endswith(']'):
136
+ # JSON array format
137
+ numbers = json.loads(data)
138
+ else:
139
+ # Comma-separated format
140
+ numbers = [float(x.strip()) for x in data.split(',') if x.strip()]
141
+
142
+ if not numbers:
143
+ return "No valid numerical data provided"
144
+
145
+ if operation == "summary":
146
+ result = {
147
+ "count": len(numbers),
148
+ "sum": sum(numbers),
149
+ "mean": statistics.mean(numbers),
150
+ "median": statistics.median(numbers),
151
+ "min": min(numbers),
152
+ "max": max(numbers)
153
+ }
154
+ if len(numbers) > 1:
155
+ result["std"] = statistics.stdev(numbers)
156
+ return json.dumps(result, indent=2)
157
+ elif operation == "mean":
158
+ return str(statistics.mean(numbers))
159
+ elif operation == "median":
160
+ return str(statistics.median(numbers))
161
+ elif operation == "std":
162
+ return str(statistics.stdev(numbers)) if len(numbers) > 1 else "0"
163
+ elif operation == "count":
164
+ return str(len(numbers))
165
+ elif operation == "sum":
166
+ return str(sum(numbers))
167
+ elif operation == "min":
168
+ return str(min(numbers))
169
+ elif operation == "max":
170
+ return str(max(numbers))
171
+ else:
172
+ return f"Unknown operation: {operation}"
173
+
174
+ except Exception as e:
175
+ return f"Error in data analysis: {str(e)}"
176
+
177
+ @tool
178
+ def extract_numbers(text: str) -> str:
179
+ """Extracts all numbers from a text string.
180
+
181
+ Args:
182
+ text: Text containing numbers
183
+
184
+ Returns:
185
+ Comma-separated list of extracted numbers
186
+ """
187
+ try:
188
+ import re
189
+
190
+ # Pattern to match integers and floats (including negative numbers)
191
+ pattern = r'-?\d+(?:\.\d+)?'
192
+ numbers = re.findall(pattern, text)
193
+
194
+ if not numbers:
195
+ return "No numbers found in the text"
196
+
197
+ return ', '.join(numbers)
198
+
199
+ except Exception as e:
200
+ return f"Error extracting numbers: {str(e)}"
201
+
202
+ @tool
203
+ def count_items(text: str, item_type: str = "words") -> str:
204
+ """Counts different types of items in text.
205
+
206
+ Args:
207
+ text: The text to analyze
208
+ item_type: What to count ("words", "characters", "lines", "sentences")
209
+
210
+ Returns:
211
+ The count as a string
212
+ """
213
+ try:
214
+ if item_type == "words":
215
+ words = text.split()
216
+ return str(len(words))
217
+ elif item_type == "characters":
218
+ return str(len(text))
219
+ elif item_type == "lines":
220
+ lines = text.split('\n')
221
+ return str(len(lines))
222
+ elif item_type == "sentences":
223
+ import re
224
+ sentences = re.split(r'[.!?]+', text)
225
+ sentences = [s.strip() for s in sentences if s.strip()]
226
+ return str(len(sentences))
227
+ else:
228
+ return f"Unknown item type: {item_type}"
229
+
230
+ except Exception as e:
231
+ return f"Error counting items: {str(e)}"
232
+
233
+ # --- Enhanced Agent Definition ---
234
+ class GAIAAgent:
235
  def __init__(self):
236
+ print("GAIAAgent initializing with smolagents...")
237
+
238
+ # Initialize the model (using HuggingFace free inference API)
239
+ try:
240
+ self.model = HfApiModel()
241
+ print("โœ… Model initialized successfully")
242
+ except Exception as e:
243
+ print(f"โŒ Error initializing model: {e}")
244
+ # Fallback to a basic model
245
+ self.model = HfApiModel()
246
+
247
+ # Initialize tools
248
+ self.tools = [
249
+ DuckDuckGoSearchTool(),
250
+ visit_webpage,
251
+ calculate_math,
252
+ analyze_data,
253
+ extract_numbers,
254
+ count_items
255
+ ]
256
+
257
+ # Create the CodeAgent with enhanced capabilities
258
+ try:
259
+ self.agent = CodeAgent(
260
+ tools=self.tools,
261
+ model=self.model,
262
+ additional_authorized_imports=[
263
+ 'requests', 'bs4', 'json', 'csv', 'math', 'statistics',
264
+ 're', 'urllib.parse', 'base64', 'datetime', 'calendar'
265
+ ],
266
+ max_steps=10, # Allow multiple reasoning steps
267
+ verbosity_level=1 # Reduce verbosity for cleaner output
268
+ )
269
+ print("โœ… GAIA Agent initialized successfully with enhanced tools")
270
+ except Exception as e:
271
+ print(f"โŒ Error initializing agent: {e}")
272
+ raise e
273
+
274
  def __call__(self, question: str) -> str:
275
+ """Process a question and return the answer."""
276
+ try:
277
+ print(f"๐Ÿค– Processing question: {question[:100]}...")
278
+
279
+ # Enhanced prompt with specific instructions for GAIA
280
+ enhanced_prompt = f"""You are a helpful AI assistant designed to answer questions accurately and concisely.
281
 
282
+ IMPORTANT INSTRUCTIONS:
283
+ 1. Read the question carefully and understand what is being asked
284
+ 2. Use the available tools when you need external information or calculations
285
+ 3. For mathematical problems, use the calculate_math tool or write Python code
286
+ 4. For web searches, use DuckDuckGoSearchTool and visit_webpage when needed
287
+ 5. Break down complex problems into steps
288
+ 6. Give ONLY the final answer - no explanations, no "FINAL ANSWER:" prefix
289
+ 7. Be precise with numbers and dates
290
+ 8. If the answer is a number, return just the number
291
+ 9. If the answer is text, return just the text without quotes
292
+
293
+ Question: {question}
294
+
295
+ Answer:"""
296
+
297
+ # Run the agent
298
+ result = self.agent.run(enhanced_prompt)
299
+
300
+ # Clean up the result to ensure it's just the answer
301
+ if isinstance(result, str):
302
+ # Remove common prefixes and suffixes
303
+ result = result.strip()
304
+
305
+ # Remove "FINAL ANSWER:" if present
306
+ result = re.sub(r'^(FINAL\s*ANSWER\s*:?\s*)', '', result, flags=re.IGNORECASE)
307
+ result = re.sub(r'^(ANSWER\s*:?\s*)', '', result, flags=re.IGNORECASE)
308
+ result = re.sub(r'^(RESULT\s*:?\s*)', '', result, flags=re.IGNORECASE)
309
+
310
+ # Remove quotes if the entire answer is wrapped in quotes
311
+ if (result.startswith('"') and result.endswith('"')) or (result.startswith("'") and result.endswith("'")):
312
+ result = result[1:-1]
313
+
314
+ result = result.strip()
315
+
316
+ print(f"โœ… Agent response: {result}")
317
+ return result
318
+ else:
319
+ print(f"โœ… Agent response: {str(result)}")
320
+ return str(result)
321
+
322
+ except Exception as e:
323
+ error_msg = f"Error processing question: {str(e)}"
324
+ print(f"โŒ {error_msg}")
325
+ return error_msg
326
+
327
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
328
  """
329
+ Fetches all questions, runs the GAIAAgent on them, submits all answers,
330
  and displays the results.
331
  """
332
  # --- Determine HF Space Runtime URL and Repo URL ---
333
  space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
334
 
335
  if profile:
336
+ username = f"{profile.username}"
337
  print(f"User logged in: {username}")
338
  else:
339
  print("User not logged in.")
 
343
  questions_url = f"{api_url}/questions"
344
  submit_url = f"{api_url}/submit"
345
 
346
+ # 1. Instantiate Enhanced Agent
347
  try:
348
+ print("๐Ÿš€ Initializing GAIA Agent with smolagents...")
349
+ agent = GAIAAgent()
350
+ print("โœ… Enhanced agent ready for GAIA benchmark!")
351
  except Exception as e:
352
+ error_msg = f"Error initializing agent: {e}"
353
+ print(f"โŒ {error_msg}")
354
+ return error_msg, None
355
+
356
+ # In the case of an app running as a hugging Face space, this link points toward your codebase
357
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
358
+ print(f"Agent code link: {agent_code}")
359
 
360
  # 2. Fetch Questions
361
+ print(f"๐Ÿ“ฅ Fetching questions from: {questions_url}")
362
  try:
363
  response = requests.get(questions_url, timeout=15)
364
  response.raise_for_status()
 
366
  if not questions_data:
367
  print("Fetched questions list is empty.")
368
  return "Fetched questions list is empty or invalid format.", None
369
+ print(f"โœ… Fetched {len(questions_data)} questions from GAIA benchmark.")
370
  except requests.exceptions.RequestException as e:
371
+ print(f"โŒ Error fetching questions: {e}")
372
  return f"Error fetching questions: {e}", None
373
  except requests.exceptions.JSONDecodeError as e:
374
+ print(f"โŒ Error decoding JSON response from questions endpoint: {e}")
375
  print(f"Response text: {response.text[:500]}")
376
  return f"Error decoding server response for questions: {e}", None
377
  except Exception as e:
378
+ print(f"โŒ An unexpected error occurred fetching questions: {e}")
379
  return f"An unexpected error occurred fetching questions: {e}", None
380
 
381
+ # 3. Run Enhanced Agent
382
  results_log = []
383
  answers_payload = []
384
+ print(f"๐Ÿค– Running enhanced GAIA agent on {len(questions_data)} questions...")
385
+
386
+ for i, item in enumerate(questions_data, 1):
387
  task_id = item.get("task_id")
388
  question_text = item.get("question")
389
  if not task_id or question_text is None:
390
+ print(f"โš ๏ธ Skipping item with missing task_id or question: {item}")
391
  continue
392
+
393
+ print(f"\n๐Ÿ“ Processing question {i}/{len(questions_data)} (ID: {task_id})")
394
  try:
395
  submitted_answer = agent(question_text)
396
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
397
+ results_log.append({
398
+ "Task ID": task_id,
399
+ "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
400
+ "Submitted Answer": submitted_answer
401
+ })
402
+ print(f"โœ… Answer for {task_id}: {submitted_answer}")
403
  except Exception as e:
404
+ error_msg = f"AGENT ERROR: {e}"
405
+ print(f"โŒ Error running agent on task {task_id}: {e}")
406
+ answers_payload.append({"task_id": task_id, "submitted_answer": error_msg})
407
+ results_log.append({
408
+ "Task ID": task_id,
409
+ "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
410
+ "Submitted Answer": error_msg
411
+ })
412
 
413
  if not answers_payload:
414
+ print("โŒ Agent did not produce any answers to submit.")
415
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
416
 
417
  # 4. Prepare Submission
418
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
419
+ status_update = f"๐Ÿš€ Agent finished processing. Submitting {len(answers_payload)} answers for user '{username}'..."
420
  print(status_update)
421
 
422
  # 5. Submit
423
+ print(f"๐Ÿ“ค Submitting {len(answers_payload)} answers to: {submit_url}")
424
  try:
425
  response = requests.post(submit_url, json=submission_data, timeout=60)
426
  response.raise_for_status()
427
  result_data = response.json()
428
+
429
+ score = result_data.get('score', 'N/A')
430
+ correct_count = result_data.get('correct_count', '?')
431
+ total_attempted = result_data.get('total_attempted', '?')
432
+
433
  final_status = (
434
+ f"๐ŸŽ‰ Submission Successful!\n"
435
+ f"๐Ÿ‘ค User: {result_data.get('username')}\n"
436
+ f"๐Ÿ“Š Overall Score: {score}% ({correct_count}/{total_attempted} correct)\n"
437
+ f"๐ŸŽฏ Target: >30% for certification\n"
438
+ f"๐Ÿ’ฌ Message: {result_data.get('message', 'No message received.')}"
439
  )
440
+
441
+ if isinstance(score, (int, float)) and score >= 30:
442
+ final_status += f"\n๐Ÿ† CONGRATULATIONS! You've achieved the target score of 30%!"
443
+ elif isinstance(score, (int, float)):
444
+ final_status += f"\n๐Ÿ“ˆ Keep improving! You need {30-score:.1f}% more to reach the target."
445
+
446
+ print("โœ… Submission successful!")
447
  results_df = pd.DataFrame(results_log)
448
  return final_status, results_df
449
+
450
  except requests.exceptions.HTTPError as e:
451
  error_detail = f"Server responded with status {e.response.status_code}."
452
  try:
 
454
  error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
455
  except requests.exceptions.JSONDecodeError:
456
  error_detail += f" Response: {e.response.text[:500]}"
457
+ status_message = f"โŒ Submission Failed: {error_detail}"
458
  print(status_message)
459
  results_df = pd.DataFrame(results_log)
460
  return status_message, results_df
461
  except requests.exceptions.Timeout:
462
+ status_message = "โŒ Submission Failed: The request timed out."
463
  print(status_message)
464
  results_df = pd.DataFrame(results_log)
465
  return status_message, results_df
466
  except requests.exceptions.RequestException as e:
467
+ status_message = f"โŒ Submission Failed: Network error - {e}"
468
  print(status_message)
469
  results_df = pd.DataFrame(results_log)
470
  return status_message, results_df
471
  except Exception as e:
472
+ status_message = f"โŒ An unexpected error occurred during submission: {e}"
473
  print(status_message)
474
  results_df = pd.DataFrame(results_log)
475
  return status_message, results_df
476
 
477
 
478
  # --- Build Gradio Interface using Blocks ---
479
+ with gr.Blocks(title="GAIA Agent Evaluation") as demo:
480
+ gr.Markdown("# ๐Ÿค– Enhanced GAIA Agent Evaluation Runner")
481
  gr.Markdown(
482
  """
483
+ **Enhanced Agent for GAIA Benchmark Certification**
484
 
485
+ This enhanced agent uses Hugging Face's **smolagents** framework with multiple specialized tools:
486
+ - ๐Ÿ” **Web Search**: DuckDuckGoSearchTool for finding information
487
+ - ๐ŸŒ **Web Scraping**: Custom webpage visitor for content extraction
488
+ - ๐Ÿงฎ **Mathematics**: Advanced calculation capabilities
489
+ - ๐Ÿ“Š **Data Analysis**: Statistical analysis of numerical data
490
+ - ๐Ÿ”ข **Number Extraction**: Intelligent number parsing from text
491
+ - ๐Ÿ“ **Text Analysis**: Counting and text processing utilities
492
 
493
+ **Instructions:**
494
+ 1. ๐Ÿ”„ **Clone this space** and customize the agent as needed
495
+ 2. ๐Ÿ”‘ **Log in** to your Hugging Face account using the button below
496
+ 3. ๐Ÿš€ **Click 'Run Evaluation'** to test your agent on GAIA benchmark questions
497
+ 4. ๐ŸŽฏ **Target**: Score >30% for course certification
498
+
499
+ **Goal**: Answer GAIA level 1 validation questions with exact match precision.
500
+
501
  ---
502
+ โš ๏ธ **Note**: Processing all questions may take several minutes due to the complexity of reasoning required.
 
 
503
  """
504
  )
505
 
506
  gr.LoginButton()
507
 
508
+ run_button = gr.Button("๐Ÿš€ Run Evaluation & Submit All Answers", variant="primary", size="lg")
509
 
510
+ status_output = gr.Textbox(
511
+ label="๐Ÿ“Š Evaluation Status & Results",
512
+ lines=8,
513
+ interactive=False,
514
+ placeholder="Click the button above to start the evaluation..."
515
+ )
516
+
517
+ results_table = gr.DataFrame(
518
+ label="๐Ÿ“‹ Questions and Agent Responses",
519
+ wrap=True,
520
+ headers=["Task ID", "Question", "Submitted Answer"]
521
+ )
522
 
523
  run_button.click(
524
  fn=run_and_submit_all,
 
526
  )
527
 
528
  if __name__ == "__main__":
529
+ print("\n" + "="*60)
530
+ print("๐Ÿค– ENHANCED GAIA AGENT STARTING UP")
531
+ print("="*60)
532
+
533
  # Check for SPACE_HOST and SPACE_ID at startup for information
534
  space_host_startup = os.getenv("SPACE_HOST")
535
+ space_id_startup = os.getenv("SPACE_ID")
536
 
537
  if space_host_startup:
538
  print(f"โœ… SPACE_HOST found: {space_host_startup}")
539
+ print(f" ๐ŸŒ Runtime URL: https://{space_host_startup}.hf.space")
540
  else:
541
  print("โ„น๏ธ SPACE_HOST environment variable not found (running locally?).")
542
 
543
+ if space_id_startup:
544
  print(f"โœ… SPACE_ID found: {space_id_startup}")
545
+ print(f" ๐Ÿ“ Repo URL: https://huggingface.co/spaces/{space_id_startup}")
546
+ print(f" ๐Ÿ”— Code URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
547
  else:
548
+ print("โ„น๏ธ SPACE_ID environment variable not found (running locally?).")
549
 
550
+ print("="*60)
551
+ print("๐Ÿš€ Launching Enhanced GAIA Agent Interface...")
552
+ print("๐ŸŽฏ Target: >30% score on GAIA benchmark")
553
+ print("="*60 + "\n")
554
 
 
555
  demo.launch(debug=True, share=False)