lmrkmrcs commited on
Commit
016d5db
·
verified ·
1 Parent(s): 3600a34

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +186 -496
app.py CHANGED
@@ -3,673 +3,363 @@ import re
3
  import requests
4
  import gradio as gr
5
  import pandas as pd
6
- from smolagents import CodeAgent, tool, LiteLLMModel
7
 
8
  # --- Constants ---
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
 
11
  # ============================================
12
- # CUSTOM TOOLS FOR THE AGENT
13
  # ============================================
14
 
15
- @tool
16
- def calculator(expression: str) -> str:
17
- """
18
- Performs mathematical calculations safely.
19
-
20
- Args:
21
- expression: A math expression like "2 + 2", "10 * 5 / 2", "2**10", "sqrt(16)"
22
-
23
- Returns:
24
- The result of the calculation as a string
25
- """
26
- import math
27
- try:
28
- expression = expression.strip()
29
-
30
- safe_dict = {
31
- "abs": abs, "round": round, "min": min, "max": max,
32
- "sum": sum, "pow": pow, "len": len,
33
- "sqrt": math.sqrt, "sin": math.sin, "cos": math.cos,
34
- "tan": math.tan, "log": math.log, "log10": math.log10,
35
- "pi": math.pi, "e": math.e, "floor": math.floor, "ceil": math.ceil,
36
- "factorial": math.factorial,
37
- }
38
-
39
- result = eval(expression, {"__builtins__": {}}, safe_dict)
40
- return str(result)
41
- except Exception as e:
42
- return f"Calculation error: {str(e)}"
43
-
44
-
45
  @tool
46
  def web_search(query: str) -> str:
47
  """
48
- Searches the web using DuckDuckGo and returns relevant results.
49
 
50
  Args:
51
- query: The search query string
52
 
53
  Returns:
54
- Search results with titles, URLs, and snippets
55
  """
56
  try:
57
  from duckduckgo_search import DDGS
58
-
59
  with DDGS() as ddgs:
60
- results = list(ddgs.text(query, max_results=5))
61
-
62
  if not results:
63
- return "No search results found."
64
-
65
  output = []
66
- for i, r in enumerate(results, 1):
67
- output.append(f"{i}. {r.get('title', 'No title')}")
68
- output.append(f" URL: {r.get('href', 'No URL')}")
69
- output.append(f" {r.get('body', 'No description')}")
70
- output.append("")
71
-
72
  return "\n".join(output)
73
  except Exception as e:
74
- return f"Search error: {str(e)}"
75
 
76
 
77
  @tool
78
  def visit_webpage(url: str) -> str:
79
  """
80
- Visits a webpage and extracts its text content.
81
 
82
  Args:
83
- url: The URL of the webpage to visit
84
 
85
  Returns:
86
- The text content of the webpage (truncated if too long)
87
  """
88
  try:
89
  from bs4 import BeautifulSoup
90
-
91
- headers = {
92
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
93
- }
94
-
95
- response = requests.get(url, headers=headers, timeout=15)
96
- response.raise_for_status()
97
-
98
  soup = BeautifulSoup(response.text, 'html.parser')
99
-
100
- for element in soup(['script', 'style', 'nav', 'footer', 'header']):
101
- element.decompose()
102
-
103
- text = soup.get_text(separator='\n', strip=True)
104
-
105
- if len(text) > 10000:
106
- text = text[:10000] + "\n...[truncated]"
107
-
108
- return text if text else "Could not extract text from webpage."
109
  except Exception as e:
110
- return f"Error visiting webpage: {str(e)}"
111
 
112
 
113
  @tool
114
- def wikipedia_search(query: str) -> str:
115
  """
116
- Searches Wikipedia and returns article summary.
117
 
118
  Args:
119
- query: The topic to search on Wikipedia
120
 
121
  Returns:
122
- Wikipedia article summary and key information
123
  """
124
  try:
125
- search_url = "https://en.wikipedia.org/w/api.php"
126
- search_params = {
127
  "action": "query",
128
  "list": "search",
129
- "srsearch": query,
130
  "format": "json",
131
- "srlimit": 3
132
  }
133
-
134
- response = requests.get(search_url, params=search_params, timeout=10)
135
  data = response.json()
136
 
137
  if not data.get("query", {}).get("search"):
138
- return f"No Wikipedia articles found for: {query}"
139
 
140
  title = data["query"]["search"][0]["title"]
141
 
142
- content_params = {
143
  "action": "query",
144
  "titles": title,
145
  "prop": "extracts",
146
- "exintro": False,
147
  "explaintext": True,
148
  "format": "json"
149
  }
 
 
150
 
151
- response = requests.get(search_url, params=content_params, timeout=10)
152
- data = response.json()
153
-
154
- pages = data.get("query", {}).get("pages", {})
155
- for page_id, page_data in pages.items():
156
- extract = page_data.get("extract", "No content available")
157
- if len(extract) > 5000:
158
- extract = extract[:5000] + "...[truncated]"
159
- return f"Wikipedia: {title}\n\n{extract}"
160
-
161
- return "Could not retrieve Wikipedia content."
162
  except Exception as e:
163
- return f"Wikipedia error: {str(e)}"
164
 
165
 
166
  @tool
167
- def get_gaia_file(task_id: str) -> str:
168
  """
169
- Downloads a file associated with a GAIA task from the API.
170
- Use this tool when the question mentions a file or attachment.
171
 
172
  Args:
173
- task_id: The task ID to get the file for
174
 
175
  Returns:
176
- Information about the file or its content if text-based
177
  """
 
178
  try:
179
- api_url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
180
- response = requests.get(api_url, timeout=30)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
 
182
  if response.status_code == 404:
183
- return "No file associated with this task."
184
-
185
- response.raise_for_status()
186
 
187
  content_type = response.headers.get('content-type', '').lower()
188
- content_disp = response.headers.get('content-disposition', '')
189
 
190
- filename = "unknown"
191
- if 'filename=' in content_disp:
192
- filename = content_disp.split('filename=')[-1].strip('"\'')
193
 
194
- # Handle text files
195
- if 'text' in content_type or filename.endswith(('.txt', '.csv', '.json', '.md')):
196
  content = response.text
197
- if len(content) > 8000:
198
- content = content[:8000] + "\n...[truncated]"
199
- return f"File: {filename}\n\nContent:\n{content}"
200
 
201
- # Handle Python files
202
- elif filename.endswith('.py'):
203
- content = response.text
204
- if len(content) > 8000:
205
- content = content[:8000] + "\n...[truncated]"
206
- return f"Python File: {filename}\n\nCode:\n{content}"
207
-
208
- # Handle Excel files
209
- elif filename.endswith(('.xlsx', '.xls')):
210
  try:
211
- import pandas as pd
212
  from io import BytesIO
213
  df = pd.read_excel(BytesIO(response.content))
214
- return f"Excel File: {filename}\n\nData:\n{df.to_string()}"
215
  except:
216
- return f"Excel File: {filename}\nNote: Could not parse Excel file."
217
-
218
- # Handle images
219
- elif 'image' in content_type or filename.endswith(('.png', '.jpg', '.jpeg', '.gif')):
220
- return f"File: {filename}\nType: Image ({content_type})\nNote: This is an image file. I cannot view images directly, but I can tell you it exists."
221
-
222
- # Handle audio
223
- elif 'audio' in content_type or filename.endswith(('.mp3', '.wav', '.m4a')):
224
- return f"File: {filename}\nType: Audio ({content_type})\nNote: This is an audio file. I cannot process audio directly."
225
 
226
- # Handle PDF
227
- elif 'pdf' in content_type or filename.endswith('.pdf'):
228
- return f"File: {filename}\nType: PDF document\nNote: This is a PDF file. I cannot read PDFs directly."
229
 
230
- else:
231
- return f"File: {filename}\nType: {content_type}\nSize: {len(response.content)} bytes"
232
-
233
  except Exception as e:
234
- return f"Error getting file: {str(e)}"
235
 
236
 
237
  @tool
238
- def read_file_content(url: str) -> str:
239
  """
240
- Downloads and reads content from a file URL.
241
 
242
  Args:
243
- url: The URL of the file to download and read
244
 
245
  Returns:
246
- The content of the file
247
- """
248
- try:
249
- headers = {"User-Agent": "Mozilla/5.0"}
250
- response = requests.get(url, headers=headers, timeout=30)
251
- response.raise_for_status()
252
-
253
- content = response.text
254
- if len(content) > 8000:
255
- content = content[:8000] + "\n...[truncated]"
256
- return content
257
-
258
- except Exception as e:
259
- return f"Error reading file: {str(e)}"
260
-
261
-
262
- @tool
263
- def reverse_text(text: str) -> str:
264
- """
265
- Reverses the given text string character by character.
266
-
267
- Args:
268
- text: The text to reverse
269
-
270
- Returns:
271
- The reversed text
272
  """
273
  return text[::-1]
274
 
275
 
276
- @tool
277
- def count_items(text: str, item_type: str = "words") -> str:
278
- """
279
- Counts items in text (words, characters, lines, sentences).
280
-
281
- Args:
282
- text: The text to analyze
283
- item_type: What to count - "words", "characters", "lines", or "sentences"
284
-
285
- Returns:
286
- The count as a string
287
- """
288
- item_type = item_type.lower().strip()
289
-
290
- if item_type == "words":
291
- count = len(text.split())
292
- elif item_type in ["characters", "chars", "char"]:
293
- count = len(text)
294
- elif item_type == "lines":
295
- count = len(text.split('\n'))
296
- elif item_type == "sentences":
297
- count = len(re.split(r'[.!?]+', text.strip()))
298
- else:
299
- return f"Unknown item type: {item_type}. Use: words, characters, lines, or sentences."
300
-
301
- return str(count)
302
-
303
-
304
- @tool
305
- def extract_numbers(text: str) -> str:
306
- """
307
- Extracts all numbers from a text string.
308
-
309
- Args:
310
- text: The text to extract numbers from
311
-
312
- Returns:
313
- A list of all numbers found in the text
314
- """
315
- numbers = re.findall(r'-?\d+\.?\d*', text)
316
- if not numbers:
317
- return "No numbers found in the text."
318
- return f"Numbers found: {', '.join(numbers)}"
319
-
320
-
321
- @tool
322
- def sort_list(items: str, order: str = "ascending") -> str:
323
- """
324
- Sorts a comma-separated list of items alphabetically or numerically.
325
-
326
- Args:
327
- items: Comma-separated items to sort (e.g., "banana, apple, cherry")
328
- order: "ascending" or "descending"
329
-
330
- Returns:
331
- Sorted items as comma-separated string
332
- """
333
- item_list = [item.strip() for item in items.split(',')]
334
-
335
- try:
336
- numeric_list = [float(item) for item in item_list]
337
- sorted_list = sorted(numeric_list, reverse=(order.lower() == "descending"))
338
- return ', '.join(str(int(x) if x == int(x) else x) for x in sorted_list)
339
- except ValueError:
340
- sorted_list = sorted(item_list, reverse=(order.lower() == "descending"))
341
- return ', '.join(sorted_list)
342
-
343
-
344
- @tool
345
- def convert_units(value: float, from_unit: str, to_unit: str) -> str:
346
- """
347
- Converts between common units of measurement.
348
-
349
- Args:
350
- value: The numeric value to convert
351
- from_unit: The source unit (e.g., "km", "miles", "celsius", "kg")
352
- to_unit: The target unit
353
-
354
- Returns:
355
- The converted value with units
356
- """
357
- conversions = {
358
- ("km", "miles"): lambda x: x * 0.621371,
359
- ("miles", "km"): lambda x: x * 1.60934,
360
- ("m", "feet"): lambda x: x * 3.28084,
361
- ("feet", "m"): lambda x: x * 0.3048,
362
- ("cm", "inches"): lambda x: x * 0.393701,
363
- ("inches", "cm"): lambda x: x * 2.54,
364
- ("celsius", "fahrenheit"): lambda x: (x * 9/5) + 32,
365
- ("fahrenheit", "celsius"): lambda x: (x - 32) * 5/9,
366
- ("celsius", "kelvin"): lambda x: x + 273.15,
367
- ("kelvin", "celsius"): lambda x: x - 273.15,
368
- ("kg", "lbs"): lambda x: x * 2.20462,
369
- ("lbs", "kg"): lambda x: x * 0.453592,
370
- ("g", "oz"): lambda x: x * 0.035274,
371
- ("oz", "g"): lambda x: x * 28.3495,
372
- }
373
-
374
- key = (from_unit.lower().strip(), to_unit.lower().strip())
375
- if key in conversions:
376
- result = conversions[key](value)
377
- return f"{value} {from_unit} = {result:.6f} {to_unit}"
378
- else:
379
- return f"Conversion from {from_unit} to {to_unit} not supported."
380
-
381
-
382
- @tool
383
- def get_current_time() -> str:
384
- """
385
- Gets the current date and time in UTC.
386
-
387
- Returns:
388
- The current date and time
389
- """
390
- from datetime import datetime
391
- now = datetime.utcnow()
392
- return f"Current UTC date/time: {now.strftime('%Y-%m-%d %H:%M:%S')}"
393
-
394
-
395
  # ============================================
396
- # BASIC AGENT CLASS - USING GROQ
397
  # ============================================
398
 
399
  class BasicAgent:
400
  def __init__(self):
401
- print("Initializing BasicAgent with Groq + Llama 3.3 70B...")
 
 
 
 
402
 
403
- # Use Groq with Llama 3.3 70B - fast and smart!
404
  self.model = LiteLLMModel(
405
  model_id="groq/llama-3.3-70b-versatile",
406
- api_key=os.environ.get("GROQ_API_KEY"),
407
  )
408
 
409
- # Create the agent with all tools
410
- self.agent = CodeAgent(
411
  model=self.model,
412
  tools=[
413
  web_search,
414
  visit_webpage,
415
  wikipedia_search,
416
  calculator,
417
- get_gaia_file,
418
- read_file_content,
419
- reverse_text,
420
- count_items,
421
- extract_numbers,
422
- sort_list,
423
- convert_units,
424
- get_current_time,
425
  ],
426
- max_steps=15,
427
  verbosity_level=1,
428
  )
429
-
430
- print("BasicAgent initialized successfully with Groq!")
431
 
432
  def __call__(self, question: str, task_id: str = None) -> str:
433
- print(f"Agent processing: {question[:100]}...")
434
-
435
  try:
436
- # Build the prompt with clear instructions
437
- file_instruction = ""
438
- if task_id:
439
- file_instruction = f"""
440
- IMPORTANT: This question may have an associated file.
441
- To check for and read the file, use: get_gaia_file("{task_id}")
442
- Always check for a file first if the question mentions any attachment, file, document, image, or data."""
443
-
444
- enhanced_prompt = f"""You are solving a GAIA benchmark question. Follow these rules:
445
-
446
- 1. THINK step by step before answering
447
- 2. USE TOOLS when you need information:
448
- - web_search() for current info or facts
449
- - wikipedia_search() for encyclopedic knowledge
450
- - visit_webpage() to read full webpage content
451
- - calculator() for any math
452
- - get_gaia_file("{task_id}") if there's an attached file
453
- 3. VERIFY your answer before submitting
454
- 4. Give ONLY the final answer - no explanation
455
- 5. Be PRECISE - answers are graded by exact match
456
- {file_instruction}
457
-
458
- Question: {question}
459
-
460
- Solve this step-by-step, then give your final answer."""
461
-
462
- # Run the agent
463
- answer = self.agent.run(enhanced_prompt)
464
-
465
- # Clean up answer
466
- answer = str(answer).strip()
467
 
468
- # Remove common prefixes
469
- prefixes = [
470
- "The answer is: ", "The answer is ",
471
- "Answer: ", "Final answer: ", "Final Answer: ",
472
- "The final answer is: ", "The final answer is ",
473
- "FINAL ANSWER: ", "FINAL ANSWER ",
474
- ]
475
- for prefix in prefixes:
476
- if answer.startswith(prefix):
477
- answer = answer[len(prefix):].strip()
478
- elif answer.lower().startswith(prefix.lower()):
479
  answer = answer[len(prefix):].strip()
480
 
481
- # Remove quotes if wrapped
482
- if (answer.startswith('"') and answer.endswith('"')) or \
483
- (answer.startswith("'") and answer.endswith("'")):
484
  answer = answer[1:-1]
485
 
486
- print(f"Final answer: {answer[:200]}")
487
  return answer
488
 
489
  except Exception as e:
490
- print(f"Agent error: {e}")
491
- return f"Error: {str(e)}"
492
 
493
 
494
  # ============================================
495
- # RUN AND SUBMIT FUNCTION
496
  # ============================================
497
 
498
  def run_and_submit_all(profile: gr.OAuthProfile | None):
499
- """
500
- Fetches all questions, runs the BasicAgent on them, submits all answers,
501
- and displays the results.
502
- """
503
  space_id = os.getenv("SPACE_ID")
504
 
505
- if profile:
506
- username = f"{profile.username}"
507
- print(f"User logged in: {username}")
508
- else:
509
- print("User not logged in.")
510
- return "Please Login to Hugging Face with the button.", None
511
 
512
- api_url = DEFAULT_API_URL
513
- questions_url = f"{api_url}/questions"
514
- submit_url = f"{api_url}/submit"
515
 
516
- # 1. Instantiate Agent
 
 
 
 
517
  try:
518
  agent = BasicAgent()
519
  except Exception as e:
520
- print(f"Error instantiating agent: {e}")
521
- return f"Error initializing agent: {e}", None
522
-
523
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
524
- print(f"Agent code URL: {agent_code}")
525
 
526
- # 2. Fetch Questions
527
- print(f"Fetching questions from: {questions_url}")
528
  try:
529
- response = requests.get(questions_url, timeout=15)
530
- response.raise_for_status()
531
- questions_data = response.json()
532
- if not questions_data:
533
- return "Fetched questions list is empty.", None
534
- print(f"Fetched {len(questions_data)} questions.")
535
  except Exception as e:
536
- print(f"Error fetching questions: {e}")
537
- return f"Error fetching questions: {e}", None
538
-
539
- # 3. Run Agent on all questions
540
- results_log = []
541
- answers_payload = []
542
- print(f"\n{'='*60}")
543
- print(f"Running agent on {len(questions_data)} questions...")
544
- print(f"{'='*60}\n")
545
 
546
- for i, item in enumerate(questions_data):
547
- task_id = item.get("task_id")
548
- question_text = item.get("question")
549
-
550
- if not task_id or question_text is None:
551
- continue
552
 
553
- print(f"\n[{i+1}/{len(questions_data)}] Task: {task_id}")
554
- print(f"Question: {question_text[:150]}{'...' if len(question_text) > 150 else ''}")
555
 
556
  try:
557
- submitted_answer = agent(question_text, task_id=task_id)
558
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
559
- results_log.append({
560
- "Task ID": task_id,
561
- "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
562
- "Answer": submitted_answer[:200] if len(submitted_answer) > 200 else submitted_answer
563
- })
564
- print(f"✓ Answer: {submitted_answer[:100]}")
565
  except Exception as e:
566
- print(f"Error: {e}")
567
- results_log.append({
568
- "Task ID": task_id,
569
- "Question": question_text[:100] + "...",
570
- "Answer": f"ERROR: {e}"
571
- })
572
-
573
- if not answers_payload:
574
- return "Agent did not produce any answers.", pd.DataFrame(results_log)
575
-
576
- # 4. Submit answers
577
- submission_data = {
578
- "username": username.strip(),
579
- "agent_code": agent_code,
580
- "answers": answers_payload
581
- }
582
-
583
- print(f"\n{'='*60}")
584
- print(f"Submitting {len(answers_payload)} answers...")
585
- print(f"{'='*60}\n")
586
 
 
 
 
587
  try:
588
- response = requests.post(submit_url, json=submission_data, timeout=120)
589
- response.raise_for_status()
590
- result_data = response.json()
591
-
592
- score = result_data.get('score', 'N/A')
593
- correct = result_data.get('correct_count', '?')
594
- total = result_data.get('total_attempted', '?')
595
-
596
- final_status = (
597
- f"✅ Submission Successful!\n\n"
598
- f"👤 User: {result_data.get('username')}\n"
599
- f"🎯 Score: {score}% ({correct}/{total} correct)\n\n"
600
- f"📝 {result_data.get('message', '')}"
601
- )
602
 
603
- if float(score) >= 30:
604
- final_status += "\n\n🎉 CONGRATULATIONS! You passed the 30% threshold!"
605
- else:
606
- final_status += f"\n\n📈 Need {30 - float(score)}% more to reach 30% passing score."
607
 
608
- print(final_status)
609
- return final_status, pd.DataFrame(results_log)
 
 
 
 
 
610
 
611
  except Exception as e:
612
- status_message = f"Submission Failed: {e}"
613
- print(status_message)
614
- return status_message, pd.DataFrame(results_log)
615
 
616
 
617
  # ============================================
618
- # GRADIO INTERFACE
619
  # ============================================
620
 
621
  with gr.Blocks() as demo:
622
- gr.Markdown("# 🎯 GAIA Agent Evaluation Runner")
623
- gr.Markdown(
624
- """
625
- **Unit 4 Final Project - HuggingFace AI Agents Course**
626
-
627
- This agent uses **Groq + Llama 3.3 70B** with the following tools:
628
-
629
- | Category | Tools |
630
- |----------|-------|
631
- | 🔍 **Search** | Web Search, Wikipedia, Visit Webpage |
632
- | 🧮 **Math** | Calculator, Unit Converter |
633
- | 📁 **Files** | GAIA File Reader, URL File Reader |
634
- | 📝 **Text** | Reverse, Count Items, Extract Numbers, Sort List |
635
- | 🕐 **Utility** | Current Time |
636
-
637
- ---
638
- **Instructions:**
639
- 1. Make sure `GROQ_API_KEY` is set in Space secrets
640
- 2. Log in with your Hugging Face account
641
- 3. Click the button and wait (~10-15 mins)
642
- 4. You need **30%** to pass!
643
- """
644
- )
645
-
646
- gr.LoginButton()
647
 
648
- run_button = gr.Button("🚀 Run Evaluation & Submit All Answers", variant="primary", size="lg")
 
 
 
649
 
650
- status_output = gr.Textbox(label="Status", lines=8, interactive=False)
651
- results_table = gr.DataFrame(label="Results", wrap=True)
 
 
652
 
653
- run_button.click(
654
- fn=run_and_submit_all,
655
- outputs=[status_output, results_table]
656
- )
657
 
658
  if __name__ == "__main__":
659
- print("\n" + "="*60)
660
- print("🎯 GAIA Agent - Powered by Groq + Llama 3.3 70B")
661
- print("="*60)
662
 
663
- # Check for API key
664
  if os.environ.get("GROQ_API_KEY"):
665
  print("✅ GROQ_API_KEY found")
666
  else:
667
- print("⚠️ GROQ_API_KEY not found - add it to Space secrets!")
668
-
669
- space_id = os.getenv("SPACE_ID")
670
- if space_id:
671
- print(f"✅ Space: https://huggingface.co/spaces/{space_id}")
672
-
673
- print("="*60 + "\n")
674
 
675
- demo.launch(debug=True, share=False)
 
3
  import requests
4
  import gradio as gr
5
  import pandas as pd
6
+ from smolagents import ToolCallingAgent, tool, LiteLLMModel
7
 
8
  # --- Constants ---
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
 
11
  # ============================================
12
+ # TOOLS
13
  # ============================================
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  @tool
16
  def web_search(query: str) -> str:
17
  """
18
+ Searches the web and returns results.
19
 
20
  Args:
21
+ query: What to search for
22
 
23
  Returns:
24
+ Search results
25
  """
26
  try:
27
  from duckduckgo_search import DDGS
 
28
  with DDGS() as ddgs:
29
+ results = list(ddgs.text(query, max_results=3))
 
30
  if not results:
31
+ return "No results found."
 
32
  output = []
33
+ for r in results:
34
+ output.append(f"- {r.get('title', '')}: {r.get('body', '')}")
 
 
 
 
35
  return "\n".join(output)
36
  except Exception as e:
37
+ return f"Search error: {e}"
38
 
39
 
40
  @tool
41
  def visit_webpage(url: str) -> str:
42
  """
43
+ Gets text content from a webpage.
44
 
45
  Args:
46
+ url: The webpage URL
47
 
48
  Returns:
49
+ Page text content
50
  """
51
  try:
52
  from bs4 import BeautifulSoup
53
+ headers = {"User-Agent": "Mozilla/5.0"}
54
+ response = requests.get(url, headers=headers, timeout=10)
 
 
 
 
 
 
55
  soup = BeautifulSoup(response.text, 'html.parser')
56
+ for tag in soup(['script', 'style', 'nav', 'footer']):
57
+ tag.decompose()
58
+ text = soup.get_text(separator=' ', strip=True)
59
+ return text[:5000] if len(text) > 5000 else text
 
 
 
 
 
 
60
  except Exception as e:
61
+ return f"Error: {e}"
62
 
63
 
64
  @tool
65
+ def wikipedia_search(topic: str) -> str:
66
  """
67
+ Searches Wikipedia for a topic.
68
 
69
  Args:
70
+ topic: What to look up
71
 
72
  Returns:
73
+ Wikipedia summary
74
  """
75
  try:
76
+ url = "https://en.wikipedia.org/w/api.php"
77
+ params = {
78
  "action": "query",
79
  "list": "search",
80
+ "srsearch": topic,
81
  "format": "json",
82
+ "srlimit": 1
83
  }
84
+ response = requests.get(url, params=params, timeout=10)
 
85
  data = response.json()
86
 
87
  if not data.get("query", {}).get("search"):
88
+ return "No Wikipedia article found."
89
 
90
  title = data["query"]["search"][0]["title"]
91
 
92
+ params2 = {
93
  "action": "query",
94
  "titles": title,
95
  "prop": "extracts",
96
+ "exintro": True,
97
  "explaintext": True,
98
  "format": "json"
99
  }
100
+ response = requests.get(url, params=params2, timeout=10)
101
+ pages = response.json().get("query", {}).get("pages", {})
102
 
103
+ for page in pages.values():
104
+ extract = page.get("extract", "")
105
+ return f"{title}: {extract[:3000]}"
106
+ return "Could not get content."
 
 
 
 
 
 
 
107
  except Exception as e:
108
+ return f"Error: {e}"
109
 
110
 
111
  @tool
112
+ def calculator(expression: str) -> str:
113
  """
114
+ Calculates a math expression.
 
115
 
116
  Args:
117
+ expression: Math like "2+2" or "sqrt(16)"
118
 
119
  Returns:
120
+ The result
121
  """
122
+ import math
123
  try:
124
+ safe = {"sqrt": math.sqrt, "pow": pow, "abs": abs, "round": round,
125
+ "sin": math.sin, "cos": math.cos, "pi": math.pi, "e": math.e,
126
+ "log": math.log, "floor": math.floor, "ceil": math.ceil}
127
+ return str(eval(expression, {"__builtins__": {}}, safe))
128
+ except Exception as e:
129
+ return f"Error: {e}"
130
+
131
+
132
+ @tool
133
+ def get_task_file(task_id: str) -> str:
134
+ """
135
+ Gets the file attached to a GAIA task.
136
+
137
+ Args:
138
+ task_id: The task ID
139
+
140
+ Returns:
141
+ File content or description
142
+ """
143
+ try:
144
+ url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
145
+ response = requests.get(url, timeout=20)
146
 
147
  if response.status_code == 404:
148
+ return "No file for this task."
 
 
149
 
150
  content_type = response.headers.get('content-type', '').lower()
151
+ disp = response.headers.get('content-disposition', '')
152
 
153
+ filename = "file"
154
+ if 'filename=' in disp:
155
+ filename = disp.split('filename=')[-1].strip('"\'')
156
 
157
+ # Text files
158
+ if 'text' in content_type or filename.endswith(('.txt', '.csv', '.json', '.py', '.md')):
159
  content = response.text
160
+ return f"File '{filename}':\n{content[:6000]}"
 
 
161
 
162
+ # Excel
163
+ if filename.endswith(('.xlsx', '.xls')):
 
 
 
 
 
 
 
164
  try:
 
165
  from io import BytesIO
166
  df = pd.read_excel(BytesIO(response.content))
167
+ return f"Excel '{filename}':\n{df.to_string()}"
168
  except:
169
+ return f"Excel file: {filename} (could not parse)"
 
 
 
 
 
 
 
 
170
 
171
+ # Other
172
+ return f"File: {filename} ({content_type}, {len(response.content)} bytes)"
 
173
 
 
 
 
174
  except Exception as e:
175
+ return f"Error: {e}"
176
 
177
 
178
  @tool
179
+ def reverse_string(text: str) -> str:
180
  """
181
+ Reverses a string.
182
 
183
  Args:
184
+ text: Text to reverse
185
 
186
  Returns:
187
+ Reversed text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
  """
189
  return text[::-1]
190
 
191
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  # ============================================
193
+ # AGENT
194
  # ============================================
195
 
196
  class BasicAgent:
197
  def __init__(self):
198
+ print("Initializing agent with Groq...")
199
+
200
+ api_key = os.environ.get("GROQ_API_KEY")
201
+ if not api_key:
202
+ raise ValueError("GROQ_API_KEY not found in environment!")
203
 
 
204
  self.model = LiteLLMModel(
205
  model_id="groq/llama-3.3-70b-versatile",
206
+ api_key=api_key,
207
  )
208
 
209
+ self.agent = ToolCallingAgent(
 
210
  model=self.model,
211
  tools=[
212
  web_search,
213
  visit_webpage,
214
  wikipedia_search,
215
  calculator,
216
+ get_task_file,
217
+ reverse_string,
 
 
 
 
 
 
218
  ],
219
+ max_steps=8,
220
  verbosity_level=1,
221
  )
222
+ print("Agent ready!")
 
223
 
224
  def __call__(self, question: str, task_id: str = None) -> str:
 
 
225
  try:
226
+ prompt = f"""Answer this question. Use tools if needed. Give ONLY the final answer, nothing else.
227
+
228
+ If there's a file mentioned, use get_task_file("{task_id}") first.
229
+
230
+ Question: {question}"""
231
+
232
+ result = self.agent.run(prompt)
233
+ answer = str(result).strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
 
235
+ # Clean prefixes
236
+ for prefix in ["Answer:", "Final answer:", "The answer is:", "FINAL ANSWER:"]:
237
+ if answer.lower().startswith(prefix.lower()):
 
 
 
 
 
 
 
 
238
  answer = answer[len(prefix):].strip()
239
 
240
+ # Remove quotes
241
+ if answer.startswith('"') and answer.endswith('"'):
 
242
  answer = answer[1:-1]
243
 
 
244
  return answer
245
 
246
  except Exception as e:
247
+ print(f"Error: {e}")
248
+ return "Unable to determine answer"
249
 
250
 
251
  # ============================================
252
+ # MAIN FUNCTION
253
  # ============================================
254
 
255
  def run_and_submit_all(profile: gr.OAuthProfile | None):
 
 
 
 
256
  space_id = os.getenv("SPACE_ID")
257
 
258
+ if not profile:
259
+ return "Please log in first.", None
 
 
 
 
260
 
261
+ username = profile.username
262
+ print(f"User: {username}")
 
263
 
264
+ # Check API key
265
+ if not os.environ.get("GROQ_API_KEY"):
266
+ return "ERROR: GROQ_API_KEY not set in Space secrets!", None
267
+
268
+ # Init agent
269
  try:
270
  agent = BasicAgent()
271
  except Exception as e:
272
+ return f"Agent init failed: {e}", None
 
 
 
 
273
 
274
+ # Get questions
 
275
  try:
276
+ response = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15)
277
+ questions = response.json()
278
+ print(f"Got {len(questions)} questions")
 
 
 
279
  except Exception as e:
280
+ return f"Failed to get questions: {e}", None
281
+
282
+ # Process questions
283
+ results = []
284
+ answers = []
 
 
 
 
285
 
286
+ for i, q in enumerate(questions):
287
+ task_id = q.get("task_id")
288
+ question = q.get("question", "")
 
 
 
289
 
290
+ print(f"\n[{i+1}/{len(questions)}] {question[:80]}...")
 
291
 
292
  try:
293
+ answer = agent(question, task_id)
294
+ print(f" {answer[:80]}")
 
 
 
 
 
 
295
  except Exception as e:
296
+ answer = f"Error: {e}"
297
+ print(f" → ERROR: {e}")
298
+
299
+ answers.append({"task_id": task_id, "submitted_answer": answer})
300
+ results.append({
301
+ "Q#": i+1,
302
+ "Question": question[:60] + "...",
303
+ "Answer": answer[:100]
304
+ })
 
 
 
 
 
 
 
 
 
 
 
305
 
306
+ # Submit
307
+ print(f"\nSubmitting {len(answers)} answers...")
308
+
309
  try:
310
+ submission = {
311
+ "username": username,
312
+ "agent_code": f"https://huggingface.co/spaces/{space_id}/tree/main",
313
+ "answers": answers
314
+ }
315
+ response = requests.post(f"{DEFAULT_API_URL}/submit", json=submission, timeout=60)
316
+ result = response.json()
 
 
 
 
 
 
 
317
 
318
+ score = result.get('score', 0)
319
+ correct = result.get('correct_count', 0)
320
+ total = result.get('total_attempted', 0)
 
321
 
322
+ status = f"""✅ Submitted!
323
+
324
+ Score: {score}% ({correct}/{total} correct)
325
+
326
+ {"🎉 PASSED! You got 30%+" if score >= 30 else f"Need {30-score}% more to pass"}
327
+ """
328
+ return status, pd.DataFrame(results)
329
 
330
  except Exception as e:
331
+ return f"Submit failed: {e}", pd.DataFrame(results)
 
 
332
 
333
 
334
  # ============================================
335
+ # UI
336
  # ============================================
337
 
338
  with gr.Blocks() as demo:
339
+ gr.Markdown("# 🎯 GAIA Agent - Unit 4")
340
+ gr.Markdown("""
341
+ **Powered by Groq + Llama 3.3 70B**
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
342
 
343
+ 1. Add `GROQ_API_KEY` to Space secrets
344
+ 2. Log in below
345
+ 3. Click Run
346
+ """)
347
 
348
+ gr.LoginButton()
349
+ run_btn = gr.Button("🚀 Run Evaluation", variant="primary")
350
+ status = gr.Textbox(label="Status", lines=6)
351
+ table = gr.DataFrame(label="Results")
352
 
353
+ run_btn.click(run_and_submit_all, outputs=[status, table])
 
 
 
354
 
355
  if __name__ == "__main__":
356
+ print("=" * 50)
357
+ print("GAIA Agent Starting")
358
+ print("=" * 50)
359
 
 
360
  if os.environ.get("GROQ_API_KEY"):
361
  print("✅ GROQ_API_KEY found")
362
  else:
363
+ print("GROQ_API_KEY missing!")
 
 
 
 
 
 
364
 
365
+ demo.launch()