mugwaneza commited on
Commit
5d1ae57
·
1 Parent(s): 0cdc222
Files changed (3) hide show
  1. app.py +224 -67
  2. requirements.txt +8 -6
  3. tools.py +365 -255
app.py CHANGED
@@ -3,8 +3,25 @@ import gradio as gr
3
  import requests
4
  import inspect
5
  import pandas as pd
6
- import re
7
- from tools import web_search, google_web_search, calculatorAndLogics, reverse_text, botany_vegetables_only, youtube_species_count
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  # (Keep Constants as is)
10
  # --- Constants ---
@@ -12,58 +29,141 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
12
 
13
  # --- Basic Agent Definition ---
14
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
15
- class BasicAgent:
16
- def __init__(self):
17
- print("BasicAgent initialized.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  def __call__(self, question: str) -> str:
19
- q = (question or "").strip()
20
-
21
- # Route 1: reversed text cue (contains many reversed words or obvious pattern)
22
- if re.search(r"\brewsna\b|\btfel\b|\bs\w+t\b", q[::-1]):
23
- return reverse_text(q)
24
-
25
- # Route 2: strict math/logic expressions (only math chars and spaces)
26
- if re.fullmatch(r"[0-9xX\s\+\-\*\/\^\.\(\)=]+", q):
27
- return calculatorAndLogics(q)
28
-
29
- # Route 3: info retrieval cues
30
- if ("http://" in q or "https://" in q):
31
- if "youtube.com/watch" in q.lower():
32
- # Extract URL from question
33
- url = None
34
- for token in q.split():
35
- if token.startswith("http://") or token.startswith("https://"):
36
- url = token
37
- break
38
- ans = youtube_species_count(url or q)
39
- # For evaluation, return just the number if found
40
- return ans if ans else web_search(q)
41
- # Other URLs Google web search (plain summary)
42
- return google_web_search(q)
43
-
44
- # Route 3b: info retrieval cues without explicit URLs
45
- if any(kw in q.lower() for kw in ["wikipedia", "youtube", "find", "who", "what", "when", "where", "which", "how many", "surname", "city", "first name", "last name"]):
46
- ans = google_web_search(q)
47
- return ans if ans else web_search(q)
48
-
49
- # Route 4: grocery list parsing (mentions list and botanically strict requirement)
50
- if "grocery" in q.lower() and "vegetable" in q.lower():
51
- # Try to extract the comma-separated segment after ':'
52
- parts = q.split(":", 1)
53
- list_text = parts[1] if len(parts) > 1 else q
54
- return botany_vegetables_only(list_text)
55
-
56
- # Default: Google search fallback to ddg
57
- ans = google_web_search(q)
58
- return ans if ans else web_search(q)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
  def run_and_submit_all( profile: gr.OAuthProfile | None):
61
  """
62
- Fetches all questions, runs the BasicAgent on them, submits all answers,
63
  and displays the results.
 
 
 
64
  """
65
  # --- Determine HF Space Runtime URL and Repo URL ---
66
- space_id = os.getenv("/mugwaneza/agents-course-final") # Get the SPACE_ID for sending link to the code
67
 
68
  if profile:
69
  username= f"{profile.username}"
@@ -78,13 +178,14 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
78
 
79
  # 1. Instantiate Agent ( modify this part to create your agent)
80
  try:
81
- agent = BasicAgent()
82
  except Exception as e:
83
  print(f"Error instantiating agent: {e}")
84
  return f"Error initializing agent: {e}", None
 
85
  # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
86
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
87
- print(agent_code)
88
 
89
  # 2. Fetch Questions
90
  print(f"Fetching questions from: {questions_url}")
@@ -110,17 +211,34 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
110
  # 3. Run your Agent
111
  results_log = []
112
  answers_payload = []
 
 
 
 
 
 
 
 
113
  print(f"Running agent on {len(questions_data)} questions...")
114
- for item in questions_data:
115
  task_id = item.get("task_id")
116
  question_text = item.get("question")
117
  if not task_id or question_text is None:
118
  print(f"Skipping item with missing task_id or question: {item}")
119
  continue
120
  try:
 
121
  submitted_answer = agent(question_text)
122
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
123
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
 
 
 
 
 
 
 
 
124
  except Exception as e:
125
  print(f"Error running agent on task {task_id}: {e}")
126
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
@@ -177,40 +295,79 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
177
  results_df = pd.DataFrame(results_log)
178
  return status_message, results_df
179
 
 
 
 
 
 
 
 
 
180
 
181
  # --- Build Gradio Interface using Blocks ---
182
  with gr.Blocks() as demo:
183
- gr.Markdown("# Basic Agent Evaluation Runner")
184
  gr.Markdown(
185
  """
186
- **Instructions:**
 
 
 
 
187
 
188
- 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
189
- 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
190
- 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
191
 
192
  ---
193
- **Disclaimers:**
194
  Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
195
- This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
196
  """
197
  )
198
 
199
  gr.LoginButton()
200
 
201
- run_button = gr.Button("Run Evaluation & Submit All Answers")
 
 
 
202
 
203
- status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
204
- # Removed max_rows=10 from DataFrame constructor
205
- results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
 
 
206
 
207
- run_button.click(
208
- fn=run_and_submit_all,
209
- outputs=[status_output, results_table]
210
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
 
212
  if __name__ == "__main__":
213
  print("\n" + "-"*30 + " App Starting " + "-"*30)
 
 
 
 
 
 
 
 
214
  # Check for SPACE_HOST and SPACE_ID at startup for information
215
  space_host_startup = os.getenv("SPACE_HOST")
216
  space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
@@ -230,5 +387,5 @@ if __name__ == "__main__":
230
 
231
  print("-"*(60 + len(" App Starting ")) + "\n")
232
 
233
- print("Launching Gradio Interface for Basic Agent Evaluation...")
234
- demo.launch(debug=True, share=False)
 
3
  import requests
4
  import inspect
5
  import pandas as pd
6
+ from dotenv import load_dotenv
7
+ from smolagents import CodeAgent, DuckDuckGoSearchTool, OpenAIServerModel, HfApiModel
8
+ from tools import (
9
+ ReverseTextTool,
10
+ ExtractTextFromImageTool,
11
+ AnalyzeCSVTool,
12
+ AnalyzeExcelTool,
13
+ DateCalculatorTool,
14
+ DownloadFileTool
15
+ )
16
+
17
+
18
+ # Load environment variables
19
+ try:
20
+ load_dotenv()
21
+ print("Environment variables are loaded from .env file")
22
+ except Exception as e:
23
+ print(f"Could not load .env file - {e}")
24
+
25
 
26
  # (Keep Constants as is)
27
  # --- Constants ---
 
29
 
30
  # --- Basic Agent Definition ---
31
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
32
+ class GAIAAgent:
33
+ def __init__(self, verbose=False):
34
+ self.verbose = verbose
35
+ print("Initializing Agent...")
36
+
37
+ # Get API Key
38
+ api_key = os.environ.get("HF_API_KEY")
39
+ if not api_key:
40
+ raise ValueError("HF API key not found. Please set the HF_API_KEY variable.")
41
+
42
+ # Initialize model with gpt-4o-mini
43
+ model_id = os.environ.get("HF_MODEL_ID", "Qwen/Qwen3-32B")
44
+ print(f"Using HF model: {model_id}")
45
+
46
+ model = HfApiModel(
47
+ model_id=model_id,
48
+ api_key=api_key,
49
+ temperature=0.6
50
+ )
51
+
52
+ # Initializing tools
53
+ search_tool = DuckDuckGoSearchTool()
54
+
55
+ self.tools = [search_tool,
56
+ ReverseTextTool(),
57
+ ExtractTextFromImageTool(),
58
+ AnalyzeCSVTool(),
59
+ AnalyzeExcelTool(),
60
+ DateCalculatorTool(),
61
+ DownloadFileTool()]
62
+
63
+ # Authorised imports
64
+ authorised_imports = ["PyPDF2", "pdf2image", "pillow", "nltk", "sklearn",
65
+ "networkx", "matplotlib", "seaborn", "scipy", "time"]
66
+
67
+ self.agent = CodeAgent(
68
+ tools=self.tools,
69
+ model=model,
70
+ add_base_tools=True,
71
+ planning_interval=3,
72
+ verbosity_level=2 if self.verbose else 0,
73
+ additional_authorized_imports=authorised_imports
74
+ )
75
+
76
+ print("Agent ready to Go!")
77
+
78
+ def _is_reversed_text(self, text):
79
+ """Check if the text appears to be reversed"""
80
+ return(text.startswith(".") or
81
+ ".rewsna eht sa" in text or
82
+ "esrever" in text or
83
+ "sdrawkcab" in text)
84
+
85
+
86
  def __call__(self, question: str) -> str:
87
+ """Process a question and return the answer"""
88
+ if self.verbose:
89
+ print(f"Processing question: {question[:100]}." if len(question) > 100 else f"Processing question: {question}")
90
+
91
+ if self._is_reversed_text(question):
92
+ if self.verbose:
93
+ print("Detected reversed text, it will be hadle accordingly")
94
+
95
+ prompt = f"""
96
+ You are a general AI Assistant. Your purpose is to answer question.
97
+
98
+ This question appears to be in reversed text. Here is the reversed version for clarity:
99
+ {question[::-1]}
100
+
101
+ Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER].
102
+
103
+ YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
104
+ - If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
105
+ - If you are asked for a string, don't use articles, neither abbreviations(e.g. for cites), and write the digits in plain text unless specified otherwise.
106
+ - If you are asked for a comma separated list, apply the above rules depending of whether the element to be put on the list is a number or a string.
107
+
108
+ IMPORTANT NOTES TO LIMIT COSTS AND PREVENT ERRORS:
109
+ - Use web search sparingly and only when absolutely necessary.
110
+ - Limit to 1-2 web searches per question.
111
+ - If the search fails due to rate limiting, add a 3-5 second delay using time.sleep() before retrying with a different search term.
112
+ - Do not import libraries that aren't available - stick to basic Python and the tools provided.
113
+ - Focus on answering directly with what you already know when possible.
114
+ - If you have made more than 3 attempts to solve a problem, prioritize providing your best guess.
115
+ - Always add a delay of 2-3 seconds between web searches using time.sleep() to avoid rate limiting.
116
+
117
+ Remember to structure your response in Python code format using the final_answer() function.
118
+ """
119
+
120
+ else:
121
+ prompt = f"""
122
+ You are a general AI Assistant. Your purpose is to answer question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER].
123
+
124
+ YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
125
+ - If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
126
+ - If you are asked for a string, don't use articles, neither abbreviations(e.g. for cites), and write the digits in plain text unless specified otherwise.
127
+ - If you are asked for a comma separated list, apply the above rules depending of whether the element to be put on the list is a number or a string.
128
+
129
+ Question: {question}
130
+
131
+ IMPORTANT NOTES TO LIMIT COSTS AND PREVENT ERRORS:
132
+ - Use web search sparingly and only when absolutely necessary.
133
+ - Limit to 1-2 web searches per question.
134
+ - If the search fails due to rate limiting, add a 3-5 second delay using time.sleep() before retrying with a different search term.
135
+ - Do not import libraries that aren't available - stick to basic Python and the tools provided.
136
+ - Focus on answering directly with what you already know when possible.
137
+ - If you have made more than 3 attempts to solve a problem, prioritize providing your best guess.
138
+ - Always add a delay of 2-3 seconds between web searches using time.sleep() to avoid rate limiting.
139
+
140
+ Remember to structure your response in Python code format using the final_answer() function.
141
+ """
142
+
143
+ try:
144
+ answer = self.agent.run(prompt)
145
+
146
+ if self.verbose:
147
+ print(f"Generated answer: {answer}")
148
+
149
+ return answer
150
+ except Exception as e:
151
+ error_msg = f"Error processing question: {e}"
152
+ if self.verbose:
153
+ print(error_msg)
154
+ return error_msg
155
+
156
 
157
  def run_and_submit_all( profile: gr.OAuthProfile | None):
158
  """
159
+ Fetches all questions, runs the Agent on them, submits all answers,
160
  and displays the results.
161
+
162
+ Args:
163
+ sample_size: Number of questions to process (0 for all questions)
164
  """
165
  # --- Determine HF Space Runtime URL and Repo URL ---
166
+ space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
167
 
168
  if profile:
169
  username= f"{profile.username}"
 
178
 
179
  # 1. Instantiate Agent ( modify this part to create your agent)
180
  try:
181
+ agent = GAIAAgent(verbose=True)
182
  except Exception as e:
183
  print(f"Error instantiating agent: {e}")
184
  return f"Error initializing agent: {e}", None
185
+
186
  # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
187
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
188
+ print(f"Agent code URL: {agent_code}")
189
 
190
  # 2. Fetch Questions
191
  print(f"Fetching questions from: {questions_url}")
 
211
  # 3. Run your Agent
212
  results_log = []
213
  answers_payload = []
214
+
215
+ # Limit number of questions if sample_size is specified
216
+ # if sample_size > 0 and sample_size < len(questions_data):
217
+ # import random
218
+ # print(f"Using a sample of {sample_size} questions from {len(questions_data)} total questions")
219
+ # questions_data = random.sample(questions_data, sample_size)
220
+
221
+
222
  print(f"Running agent on {len(questions_data)} questions...")
223
+ for i, item in enumerate(questions_data):
224
  task_id = item.get("task_id")
225
  question_text = item.get("question")
226
  if not task_id or question_text is None:
227
  print(f"Skipping item with missing task_id or question: {item}")
228
  continue
229
  try:
230
+ print(f"Processing question {i+1}/{len(questions_data)}: Task ID {task_id}")
231
  submitted_answer = agent(question_text)
232
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
233
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
234
+ print(f"Successfully processed question {i+1}")
235
+
236
+ # Delays next question to avoid rate limiting
237
+ if i< len(questions_data) - 1:
238
+ import time
239
+ print("Waiting 5 seconds before next question:)")
240
+ time.sleep(5)
241
+
242
  except Exception as e:
243
  print(f"Error running agent on task {task_id}: {e}")
244
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
295
  results_df = pd.DataFrame(results_log)
296
  return status_message, results_df
297
 
298
+ def test_single_question(question: str) -> str:
299
+ """Test the agent on a single question"""
300
+ try:
301
+ agent = GAIAAgent(verbose=True)
302
+ answer = agent(question)
303
+ return answer
304
+ except Exception as e:
305
+ return f"Error: {e}"
306
 
307
  # --- Build Gradio Interface using Blocks ---
308
  with gr.Blocks() as demo:
309
+ gr.Markdown("# Agent Evaluation Runner")
310
  gr.Markdown(
311
  """
312
+ ## Instructions:
313
+
314
+ 1. Log in to your Hugging Face account using the button below.
315
+ 2. Test your agent on individual questions in the Testing Tab.
316
+ 3. Run the Evaluation on the GAIA benchmark in teh Evaluation Tab.
317
 
318
+ This agent is designed to achieve a score of at least 30% on teh GAIA Benchmark.
 
 
319
 
320
  ---
321
+ ## Disclaimers:
322
  Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
 
323
  """
324
  )
325
 
326
  gr.LoginButton()
327
 
328
+ with gr.Tab("Test for a single question"):
329
+ test_input = gr.Textbox(label="Enter a question", lines=3)
330
+ test_output = gr.Textbox(label="Answer", lines=5)
331
+ test_button = gr.Button("Run Test")
332
 
333
+ test_button.click(
334
+ fn=test_single_question,
335
+ inputs = test_input,
336
+ outputs=test_output
337
+ )
338
 
339
+ with gr.Tab("Final Evaluation"):
340
+ with gr.Row():
341
+ sample_size = gr.Slider(
342
+ minimum=0,
343
+ maximum=20,
344
+ value=0,
345
+ step=1,
346
+ label="Sample Size (0 for all questions)",
347
+ info="Set a number to limit how many questions to process (reduces costs)"
348
+ )
349
+
350
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
351
+
352
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
353
+ # Removed max_rows=10 from DataFrame constructor
354
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
355
+
356
+ run_button.click(
357
+ fn=run_and_submit_all,
358
+ outputs=[status_output, results_table]
359
+ )
360
 
361
  if __name__ == "__main__":
362
  print("\n" + "-"*30 + " App Starting " + "-"*30)
363
+
364
+ # Check for API key
365
+ api_key = os.environ.get("HF_API_KEY")
366
+ if not api_key:
367
+ print("WARNING: HF API key is not found. Please set HF_API_KEY environment variable.")
368
+ else:
369
+ print("OpenAI API key was found.")
370
+
371
  # Check for SPACE_HOST and SPACE_ID at startup for information
372
  space_host_startup = os.getenv("SPACE_HOST")
373
  space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
 
387
 
388
  print("-"*(60 + len(" App Starting ")) + "\n")
389
 
390
+ print("Launching Gradio Interface for Agent Evaluation...")
391
+ demo.launch(debug=True)
requirements.txt CHANGED
@@ -1,9 +1,11 @@
1
  gradio
 
 
2
  requests
3
- fastapi
4
- datasets
5
- smolagents
6
- sympy
7
  pandas
8
- ddgs
9
- youtube-transcript-api
 
 
 
 
 
1
  gradio
2
+ gradio[oauth]
3
+ itsdangerous
4
  requests
 
 
 
 
5
  pandas
6
+ numpy
7
+ smolagents
8
+ smolagents[openai]
9
+ python-dotenv
10
+ openai>=1.0.0
11
+ litellm
tools.py CHANGED
@@ -1,266 +1,376 @@
1
- from smolagents import tool
2
-
3
- @tool
4
- def web_search(query: str) -> str:
5
- """Search the web and return a plain-text summary.
6
-
7
- Uses DuckDuckGo via `ddgs` and returns concise snippets (no links) prioritizing the top results.
8
-
9
- Args:
10
- query (str): The information need, e.g., a fact question or topic.
11
-
12
- Returns:
13
- str: A short summary synthesized from result snippets. If none available, returns the top result title.
14
- """
15
- try:
16
- from ddgs import DDGS
17
- bodies = []
18
- titles = []
19
- for r in DDGS().text(query, max_results=5):
20
- b = r.get("body") or ""
21
- t = r.get("title") or ""
22
- if b:
23
- bodies.append(b.strip())
24
- if t:
25
- titles.append(t.strip())
26
-
27
- if bodies:
28
- # Build a concise summary from top 2-3 snippets
29
- summary = " ".join(bodies[:3])
30
- # Trim overly long outputs
31
- return summary[:600]
32
- if titles:
33
- return titles[0]
34
- return "No relevant text found."
35
- except Exception as e:
36
- return f"ERROR: web_search failed: {e}"
37
-
38
- @tool
39
- def google_web_search(query: str) -> str:
40
- """Search Google via Serper and return a concise plain-text summary.
41
-
42
- Args:
43
- query (str): The information need (fact question or topic).
44
-
45
- Returns:
46
- str: A short summary synthesized from top result snippets (no links). If none, returns the top title.
47
- """
48
- try:
49
- import os
50
- import requests
51
- api_key = os.getenv("SERPER_API_KEY")
52
- if not api_key:
53
- return "ERROR: SERPER_API_KEY not set"
54
- headers = {"X-API-KEY": api_key, "Content-Type": "application/json"}
55
- payload = {"q": query, "gl": "us", "hl": "en"}
56
- r = requests.post("https://google.serper.dev/search", json=payload, headers=headers, timeout=20)
57
- r.raise_for_status()
58
- data = r.json()
59
- organic = data.get("organic") or []
60
- bodies = []
61
- titles = []
62
- for item in organic[:5]:
63
- snip = item.get("snippet") or ""
64
- title = item.get("title") or ""
65
- if snip:
66
- bodies.append(snip.strip())
67
- if title:
68
- titles.append(title.strip())
69
- if bodies:
70
- return (" ".join(bodies[:3]))[:600]
71
- if titles:
72
- return titles[0]
73
- return "No relevant text found."
74
- except Exception as e:
75
- return f"ERROR: google_web_search failed: {e}"
76
-
77
- @tool
78
- def calculatorAndLogics(expression: str) -> str:
79
- """Perform calculations and basic logic evaluation.
80
-
81
- Supports arithmetic (including parentheses and powers), boolean logic (and/or/not), and solving simple equations.
82
-
83
- Args:
84
- expression (str): A proposition or mathematical expression, e.g., "2*x+3=7" or "True and not False".
85
-
86
- Returns:
87
- str: The result of the calculation or logic operation.
88
- """
89
- try:
90
- import re
91
- from sympy import Eq, simplify, solve, sympify
92
-
93
- expr = expression.strip()
94
- lowered = expr.lower()
95
-
96
- # Handle basic boolean logic like: True and False, not(True or False)
97
- if any(k in lowered for k in [" and ", " or ", " not ", " true", " false"]):
98
- safe_globals = {"__builtins__": {}}
99
- safe_locals = {"True": True, "False": False}
100
- safe_expr = re.sub(r"\btrue\b", "True", lowered)
101
- safe_expr = re.sub(r"\bfalse\b", "False", safe_expr)
102
- result = eval(safe_expr, safe_globals, safe_locals)
103
- return str(result)
104
-
105
- # Solve simple equations like: 2*x + 3 = 7
106
- if "=" in expr:
107
- left, right = expr.split("=", 1)
108
- left_expr = sympify(left)
109
- right_expr = sympify(right)
110
- symbols_in_expr = list(left_expr.free_symbols.union(right_expr.free_symbols))
111
- if symbols_in_expr:
112
- sol = solve(Eq(left_expr, right_expr), symbols_in_expr)
113
- return str(sol)
114
-
115
- # Arithmetic evaluation and simplification
116
- res = simplify(sympify(expr))
117
- return str(res)
118
- except Exception as e:
119
- return f"ERROR: Unable to evaluate expression: {e}"
120
-
121
- @tool
122
- def guest_info_retriever(query: str) -> str:
123
- """Retrieve detailed information about gala guests.
124
-
125
- Args:
126
- query (str): The name or relation of the guest you want information about.
127
-
128
- Returns:
129
- str: A concise set of search results describing the guest and their relation.
130
- """
131
- try:
132
- from ddgs import DDGS
133
- q = f"gala guest {query} relation biography"
134
- bodies = []
135
- for r in DDGS().text(q, max_results=5):
136
- b = r.get("body") or ""
137
- if b:
138
- bodies.append(b.strip())
139
- if bodies:
140
- return (" ".join(bodies[:2]))[:600]
141
- return "No guest info found."
142
- except Exception:
143
- return "No guest info found."
144
-
145
- # Note: LLM agent disabled to avoid runtime errors when a proper LLM adapter is not configured.
146
-
147
- @tool
148
- def reverse_text(input_text: str) -> str:
149
- """Reverse the input text.
150
-
151
- Useful for tasks that present reversed sentences and expect the opposite word or normal reading.
152
-
153
- Args:
154
- input_text (str): The text to reverse.
155
-
156
- Returns:
157
- str: The reversed text.
158
- """
159
- return input_text[::-1]
160
-
161
- @tool
162
- def botany_vegetables_only(list_text: str) -> str:
163
- """Extract botanically correct vegetables from a grocery list.
164
-
165
- Parses a comma-separated list in natural language and returns only items that are vegetables under botanical definitions.
166
-
167
- Args:
168
- list_text (str): The text containing the grocery list (comma-separated), possibly embedded within a sentence.
169
-
170
- Returns:
171
- str: Alphabetized, comma-separated list of vegetables with botanical fruits excluded.
172
- """
173
- import re
174
- # Identify list items by splitting on commas, normalizing whitespace and case
175
- items = [re.sub(r"\s+", " ", x.strip()).lower() for x in list_text.split(",")]
176
-
177
- # Known botanical vegetables in the provided list
178
- veg_set = {
179
- "broccoli",
180
- "celery",
181
- "lettuce",
182
- "sweet potatoes",
183
  }
184
-
185
- # Items that are botanical fruits or non-vegetables to exclude
186
- exclude = {
187
- "bell pepper",
188
- "plums",
189
- "green beans",
190
- "zucchini",
191
- "corn",
192
- "rice",
193
- "peanuts",
194
- "acorns",
195
- "fresh basil",
196
- "whole allspice",
197
- "whole bean coffee",
198
- "milk",
199
- "eggs",
200
- "flour",
201
- "oreos",
202
  }
 
203
 
204
- # Normalize certain variants
205
- normalized = []
206
- for it in items:
207
- # Handle possible variants like singular forms
208
- if it == "sweet potato":
209
- it = "sweet potatoes"
210
- normalized.append(it)
211
-
212
- selected = sorted([x for x in normalized if x in veg_set and x not in exclude])
213
- return ", ".join(selected) if selected else ""
214
-
215
- @tool
216
- def youtube_species_count(url: str) -> str:
217
- """Extract the highest number of bird species on camera from a YouTube video.
218
-
219
- Attempts to retrieve the transcript and searches for lines mentioning 'species' and numbers.
220
-
221
- Args:
222
- url (str): Full YouTube watch URL (e.g., https://www.youtube.com/watch?v=... ).
223
-
224
- Returns:
225
- str: The highest number found as a string, or an empty string if not determinable.
226
- """
227
- try:
228
- import re
229
- from urllib.parse import urlparse, parse_qs
230
- from youtube_transcript_api import YouTubeTranscriptApi
231
-
232
- parsed = urlparse(url)
233
- vid = parse_qs(parsed.query).get('v', [''])[0]
234
- if not vid:
235
- return ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
236
 
237
- max_num = None
238
  try:
239
- # Try preferred English transcript
240
- transcripts = YouTubeTranscriptApi.get_transcript(vid, languages=['en'])
241
- except Exception:
242
- # Fall back: iterate available transcripts
243
- lister = YouTubeTranscriptApi.list_transcripts(vid)
244
- transcripts = None
245
- for tr in lister:
246
  try:
247
- transcripts = tr.fetch()
248
  break
249
- except Exception:
250
  continue
251
- if transcripts is None:
252
- return ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
253
 
254
- for entry in transcripts:
255
- text = entry.get('text', '')
256
- if not text:
257
- continue
258
- if 'species' in text.lower():
259
- for m in re.findall(r"\b(\d+)\b", text):
260
- n = int(m)
261
- max_num = n if (max_num is None or n > max_num) else max_num
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
262
 
263
- return str(max_num) if max_num is not None else ""
264
- except Exception:
265
- return ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
266
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from smolagents import Tool
2
+ import pandas as pd
3
+ import os
4
+ import tempfile
5
+ import requests
6
+ from urllib.parse import urlparse
7
+ import json
8
+ import re
9
+ from datetime import datetime, timedelta
10
+
11
+ class ReverseTextTool(Tool):
12
+ name = "reverse_text"
13
+ description = "Reverses the text in a string."
14
+ inputs = {
15
+ "text": {
16
+ "type": "string",
17
+ "description": "The text to reverse."
18
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  }
20
+ output_type = "string"
21
+
22
+ def forward(self, text: str) -> str:
23
+ return text[::-1]
24
+
25
+ class ExtractTextFromImageTool(Tool):
26
+ name = "extract_text_from_image"
27
+ description = "Extracts text from an image file using OCR."
28
+ inputs = {
29
+ "image_path": {
30
+ "type": "string",
31
+ "description": "Path to the image file."
32
+ }
 
 
 
 
 
33
  }
34
+ output_type = "string"
35
 
36
+ def forward(self, image_path: str) -> str:
37
+ try:
38
+ # Try to import pytesseract
39
+ import pytesseract
40
+ from PIL import Image
41
+
42
+ # Open the image
43
+ image = Image.open(image_path)
44
+
45
+ # Try different configurations for better results
46
+ configs = [
47
+ '--psm 6', # Assume a single uniform block of text
48
+ '--psm 3', # Automatic page segmentation, but no OSD
49
+ '--psm 1', # Automatic page segmentation with OSD
50
+ ]
51
+
52
+ results = []
53
+ for config in configs:
54
+ try:
55
+ text = pytesseract.image_to_string(image, config=config)
56
+ if text.strip():
57
+ results.append(text)
58
+ except Exception:
59
+ continue
60
+
61
+ if results:
62
+ # Return the longest result, which is likely the most complete
63
+ return f"Extracted text from image:\n\n{max(results, key=len)}"
64
+ else:
65
+ return "No text could be extracted from the image."
66
+ except ImportError:
67
+ return "Error: pytesseract is not installed. Please install it with 'pip install pytesseract' and ensure Tesseract OCR is installed on your system."
68
+ except Exception as e:
69
+ return f"Error extracting text from image: {str(e)}"
70
+
71
+ class AnalyzeCSVTool(Tool):
72
+ name = "analyze_csv_file"
73
+ description = "Analyzes a CSV file and provides information about its contents."
74
+ inputs = {
75
+ "file_path": {
76
+ "type": "string",
77
+ "description": "Path to the CSV file."
78
+ },
79
+ "query": {
80
+ "type": "string",
81
+ "description": "Optional query about the data.",
82
+ "default": "",
83
+ "nullable": True
84
+ }
85
+ }
86
+ output_type = "string"
87
 
88
+ def forward(self, file_path: str, query: str = "") -> str:
89
  try:
90
+ # Read CSV file with different encodings if needed
91
+ for encoding in ['utf-8', 'latin1', 'iso-8859-1', 'cp1252']:
 
 
 
 
 
92
  try:
93
+ df = pd.read_csv(file_path, encoding=encoding)
94
  break
95
+ except UnicodeDecodeError:
96
  continue
97
+ else:
98
+ return "Error: Could not read the CSV file with any of the attempted encodings."
99
+
100
+ # Basic information
101
+ result = f"CSV file has {len(df)} rows and {len(df.columns)} columns.\n"
102
+ result += f"Columns: {', '.join(df.columns)}\n\n"
103
+
104
+ # If there's a specific query
105
+ if query:
106
+ if "count" in query.lower():
107
+ result += f"Row count: {len(df)}\n"
108
+
109
+ # Look for column-specific queries
110
+ for col in df.columns:
111
+ if col.lower() in query.lower():
112
+ result += f"\nColumn '{col}' information:\n"
113
+ if pd.api.types.is_numeric_dtype(df[col]):
114
+ result += f"Min: {df[col].min()}\n"
115
+ result += f"Max: {df[col].max()}\n"
116
+ result += f"Mean: {df[col].mean()}\n"
117
+ result += f"Median: {df[col].median()}\n"
118
+ else:
119
+ # For categorical data
120
+ value_counts = df[col].value_counts().head(10)
121
+ result += f"Unique values: {df[col].nunique()}\n"
122
+ result += f"Top values:\n{value_counts.to_string()}\n"
123
+
124
+ # General statistics for all columns
125
+ else:
126
+ # For numeric columns
127
+ numeric_cols = df.select_dtypes(include=['number']).columns
128
+ if len(numeric_cols) > 0:
129
+ result += "Numeric columns statistics:\n"
130
+ result += df[numeric_cols].describe().to_string()
131
+ result += "\n\n"
132
+
133
+ # For categorical columns, show counts of unique values
134
+ cat_cols = df.select_dtypes(exclude=['number']).columns
135
+ if len(cat_cols) > 0:
136
+ result += "Categorical columns:\n"
137
+ for col in cat_cols[:5]: # Limit to first 5 columns
138
+ result += f"- {col}: {df[col].nunique()} unique values\n"
139
+
140
+ return result
141
+ except Exception as e:
142
+ return f"Error analyzing CSV file: {str(e)}"
143
+
144
+ class AnalyzeExcelTool(Tool):
145
+ name = "analyze_excel_file"
146
+ description = "Analyzes an Excel file and provides information about its contents."
147
+ inputs = {
148
+ "file_path": {
149
+ "type": "string",
150
+ "description": "Path to the Excel file."
151
+ },
152
+ "query": {
153
+ "type": "string",
154
+ "description": "Optional query about the data.",
155
+ "default": "",
156
+ "nullable": True
157
+ },
158
+ "sheet_name": {
159
+ "type": "string",
160
+ "description": "Name of the sheet to analyze (defaults to first sheet).",
161
+ "default": None,
162
+ "nullable": True
163
+ }
164
+ }
165
+ output_type = "string"
166
 
167
+ def forward(self, file_path: str, query: str = "", sheet_name: str = None) -> str:
168
+ try:
169
+ # Read sheet names first
170
+ excel_file = pd.ExcelFile(file_path)
171
+ sheet_names = excel_file.sheet_names
172
+
173
+ # Info about all sheets
174
+ result = f"Excel file contains {len(sheet_names)} sheets: {', '.join(sheet_names)}\n\n"
175
+
176
+ # If sheet name is specified, use it; otherwise use first sheet
177
+ if sheet_name is None:
178
+ sheet_name = sheet_names[0]
179
+ elif sheet_name not in sheet_names:
180
+ return f"Error: Sheet '{sheet_name}' not found. Available sheets: {', '.join(sheet_names)}"
181
+
182
+ # Read the specified sheet
183
+ df = pd.read_excel(file_path, sheet_name=sheet_name)
184
+
185
+ # Basic information
186
+ result += f"Sheet '{sheet_name}' has {len(df)} rows and {len(df.columns)} columns.\n"
187
+ result += f"Columns: {', '.join(df.columns)}\n\n"
188
+
189
+ # Handle query similar to CSV tool
190
+ if query:
191
+ if "count" in query.lower():
192
+ result += f"Row count: {len(df)}\n"
193
+
194
+ # Look for column-specific queries
195
+ for col in df.columns:
196
+ if col.lower() in query.lower():
197
+ result += f"\nColumn '{col}' information:\n"
198
+ if pd.api.types.is_numeric_dtype(df[col]):
199
+ result += f"Min: {df[col].min()}\n"
200
+ result += f"Max: {df[col].max()}\n"
201
+ result += f"Mean: {df[col].mean()}\n"
202
+ result += f"Median: {df[col].median()}\n"
203
+ else:
204
+ # For categorical data
205
+ value_counts = df[col].value_counts().head(10)
206
+ result += f"Unique values: {df[col].nunique()}\n"
207
+ result += f"Top values:\n{value_counts.to_string()}\n"
208
+ else:
209
+ # For numeric columns
210
+ numeric_cols = df.select_dtypes(include=['number']).columns
211
+ if len(numeric_cols) > 0:
212
+ result += "Numeric columns statistics:\n"
213
+ result += df[numeric_cols].describe().to_string()
214
+ result += "\n\n"
215
+
216
+ # For categorical columns, show counts of unique values
217
+ cat_cols = df.select_dtypes(exclude=['number']).columns
218
+ if len(cat_cols) > 0:
219
+ result += "Categorical columns:\n"
220
+ for col in cat_cols[:5]: # Limit to first 5 columns
221
+ result += f"- {col}: {df[col].nunique()} unique values\n"
222
+
223
+ return result
224
+ except Exception as e:
225
+ return f"Error analyzing Excel file: {str(e)}"
226
+
227
+ class DateCalculatorTool(Tool):
228
+ name = "date_calculator"
229
+ description = "Performs date calculations like adding days, formatting dates, etc."
230
+ inputs = {
231
+ "query": {
232
+ "type": "string",
233
+ "description": "The date calculation to perform (e.g., 'What day is 10 days from today?', 'Format 2023-05-15 as MM/DD/YYYY')"
234
+ }
235
+ }
236
+ output_type = "string"
237
 
238
+ def forward(self, query: str) -> str:
239
+ try:
240
+ # Get current date/time
241
+ if re.search(r'(today|now|current date|current time)', query, re.IGNORECASE):
242
+ now = datetime.now()
243
+
244
+ if 'time' in query.lower():
245
+ return f"Current date and time: {now.strftime('%Y-%m-%d %H:%M:%S')}"
246
+ else:
247
+ return f"Today's date: {now.strftime('%Y-%m-%d')}"
248
+
249
+ # Add days to a date
250
+ add_match = re.search(r'(what|when).+?(\d+)\s+(day|days|week|weeks|month|months|year|years)\s+(from|after)\s+(.+)', query, re.IGNORECASE)
251
+ if add_match:
252
+ amount = int(add_match.group(2))
253
+ unit = add_match.group(3).lower()
254
+ date_text = add_match.group(5).strip()
255
+
256
+ # Parse the date
257
+ if date_text.lower() in ['today', 'now']:
258
+ base_date = datetime.now()
259
+ else:
260
+ try:
261
+ # Try various date formats
262
+ for fmt in ['%Y-%m-%d', '%m/%d/%Y', '%d/%m/%Y', '%B %d, %Y']:
263
+ try:
264
+ base_date = datetime.strptime(date_text, fmt)
265
+ break
266
+ except ValueError:
267
+ continue
268
+ else:
269
+ return f"Could not parse date: {date_text}"
270
+ except Exception as e:
271
+ return f"Error parsing date: {e}"
272
+
273
+ # Calculate new date
274
+ if 'day' in unit:
275
+ new_date = base_date + timedelta(days=amount)
276
+ elif 'week' in unit:
277
+ new_date = base_date + timedelta(weeks=amount)
278
+ elif 'month' in unit:
279
+ # Simplified month calculation
280
+ new_month = base_date.month + amount
281
+ new_year = base_date.year + (new_month - 1) // 12
282
+ new_month = ((new_month - 1) % 12) + 1
283
+ new_date = base_date.replace(year=new_year, month=new_month)
284
+ elif 'year' in unit:
285
+ new_date = base_date.replace(year=base_date.year + amount)
286
+
287
+ return f"Date {amount} {unit} from {base_date.strftime('%Y-%m-%d')} is {new_date.strftime('%Y-%m-%d')}"
288
+
289
+ # Format a date
290
+ format_match = re.search(r'format\s+(.+?)\s+as\s+(.+)', query, re.IGNORECASE)
291
+ if format_match:
292
+ date_text = format_match.group(1).strip()
293
+ format_spec = format_match.group(2).strip()
294
+
295
+ # Parse the date
296
+ if date_text.lower() in ['today', 'now']:
297
+ date_obj = datetime.now()
298
+ else:
299
+ try:
300
+ # Try various date formats
301
+ for fmt in ['%Y-%m-%d', '%m/%d/%Y', '%d/%m/%Y', '%B %d, %Y']:
302
+ try:
303
+ date_obj = datetime.strptime(date_text, fmt)
304
+ break
305
+ except ValueError:
306
+ continue
307
+ else:
308
+ return f"Could not parse date: {date_text}"
309
+ except Exception as e:
310
+ return f"Error parsing date: {e}"
311
+
312
+ # Convert format specification to strftime format
313
+ format_mapping = {
314
+ 'YYYY': '%Y',
315
+ 'YY': '%y',
316
+ 'MM': '%m',
317
+ 'DD': '%d',
318
+ 'HH': '%H',
319
+ 'mm': '%M',
320
+ 'ss': '%S'
321
+ }
322
+
323
+ strftime_format = format_spec
324
+ for key, value in format_mapping.items():
325
+ strftime_format = strftime_format.replace(key, value)
326
+
327
+ return f"Formatted date: {date_obj.strftime(strftime_format)}"
328
+
329
+ return "I couldn't understand the date calculation query."
330
+ except Exception as e:
331
+ return f"Error performing date calculation: {str(e)}"
332
+
333
+ class DownloadFileTool(Tool):
334
+ name = "download_file"
335
+ description = "Downloads a file from a URL and saves it locally."
336
+ inputs = {
337
+ "url": {
338
+ "type": "string",
339
+ "description": "The URL to download from."
340
+ },
341
+ "filename": {
342
+ "type": "string",
343
+ "description": "Optional filename to save as (default: derived from URL).",
344
+ "default": None,
345
+ "nullable": True
346
+ }
347
+ }
348
+ output_type = "string"
349
 
350
+ def forward(self, url: str, filename: str = None) -> str:
351
+ try:
352
+ # Parse URL to get filename if not provided
353
+ if not filename:
354
+ path = urlparse(url).path
355
+ filename = os.path.basename(path)
356
+ if not filename:
357
+ # Generate a random name if we couldn't extract one
358
+ import uuid
359
+ filename = f"downloaded_{uuid.uuid4().hex[:8]}"
360
+
361
+ # Create temporary file
362
+ temp_dir = tempfile.gettempdir()
363
+ filepath = os.path.join(temp_dir, filename)
364
+
365
+ # Download the file
366
+ response = requests.get(url, stream=True)
367
+ response.raise_for_status()
368
+
369
+ # Save the file
370
+ with open(filepath, 'wb') as f:
371
+ for chunk in response.iter_content(chunk_size=8192):
372
+ f.write(chunk)
373
+
374
+ return f"File downloaded to {filepath}. You can now analyze this file."
375
+ except Exception as e:
376
+ return f"Error downloading file: {str(e)}"