ErdemTheFixer commited on
Commit
2144dc0
·
verified ·
1 Parent(s): 9a907a1

Create agent_tools.py

Browse files
Files changed (1) hide show
  1. agent_tools.py +353 -0
agent_tools.py ADDED
@@ -0,0 +1,353 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import os
3
+ import re
4
+ import sys
5
+ from typing import List, Callable, Any
6
+
7
+ import openai
8
+ import pandas as pd
9
+ import requests
10
+ from dotenv import load_dotenv
11
+ from google import genai
12
+ from google.genai import types
13
+ from langchain_community.document_loaders import WebBaseLoader, ImageCaptionLoader, WikipediaLoader, ArxivLoader
14
+ from langchain_community.tools import DuckDuckGoSearchResults
15
+ from langchain_core.tools import tool
16
+ from langchain_text_splitters import CharacterTextSplitter
17
+
18
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
19
+
20
+ load_dotenv()
21
+
22
+
23
+ @tool(description="Multiply two integers and return the result")
24
+ def multiply(a: int, b: int) -> int:
25
+ return a * b
26
+
27
+
28
+ @tool(description="Add two integers and return the result")
29
+ def add(a: int, b: int) -> int:
30
+ return a + b
31
+
32
+
33
+ @tool(description="Subtract the second integer from the first and return the result")
34
+ def subtract(a: int, b: int) -> int:
35
+ return a - b
36
+
37
+
38
+ @tool(
39
+ description="Divide the first integer by the second and return the result; raises an error if the second integer is zero")
40
+ def divide(a: int, b: int) -> float:
41
+ if b == 0:
42
+ raise ValueError("Cannot divide by zero.")
43
+ return a / b
44
+
45
+
46
+ @tool(description="Return the remainder of dividing the first integer by the second")
47
+ def modulus(a: int, b: int) -> int:
48
+ return a % b
49
+
50
+
51
+ @tool(description="""
52
+ Searches for a Wikipedia articles using the provided query and returns the content of the corresponding Wikipedia pages.
53
+ Args:
54
+ query (str): The search term to look up on Wikipedia.
55
+ Returns:
56
+ str: The text content of the Wikipedia articles related to the query.
57
+ """)
58
+ def wiki_search(query: str) -> str:
59
+ print("wiki_search called with:", query)
60
+ search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
61
+ formatted_search_docs = "\n\n---\n\n".join(
62
+ [
63
+ f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
64
+ for doc in search_docs
65
+ ])
66
+ return formatted_search_docs
67
+
68
+
69
+ @tool(description="""
70
+ Fetches raw HTML content of a web page.
71
+ Args:
72
+ url: the webpage url
73
+ Returns:
74
+ str: The combined raw text content of the webpage
75
+ """)
76
+ def visit_webpage(url: str) -> str:
77
+ try:
78
+ response = requests.get(url, timeout=5)
79
+ return response.text[:5000]
80
+ except Exception as e:
81
+ return f"[ERROR fetching {url}]: {str(e)}"
82
+
83
+
84
+ @tool(description="""
85
+ Performs a web search using the given query, downloads the content of two relevant web pages,
86
+ and returns their combined content as a raw string.
87
+ This is useful when the task requires analysis of web page content, such as retrieving poems,
88
+ changelogs, or other textual resources.
89
+ Args:
90
+ query (str): The search query.
91
+ Returns:
92
+ str: The combined raw text content of the two retrieved web pages.
93
+ """)
94
+ def duckduck_websearch(query: str) -> str:
95
+ search_engine = DuckDuckGoSearchResults(output_format="list", num_results=2)
96
+ results = search_engine.invoke({"query": query})
97
+ page_urls = [url["link"] for url in results]
98
+
99
+ loader = WebBaseLoader(web_paths=page_urls)
100
+ docs = loader.load()
101
+
102
+ combined_text = "\n\n".join(doc.page_content[:15000] for doc in docs)
103
+
104
+ # Clean up excessive newlines, spaces and strip leading/trailing whitespace
105
+ cleaned_text = re.sub(r'\n{3,}', '\n\n', combined_text).strip()
106
+ cleaned_text = re.sub(r'[ \t]{6,}', ' ', cleaned_text)
107
+
108
+ # Strip leading/trailing whitespace
109
+ cleaned_text = cleaned_text.strip()
110
+ return cleaned_text
111
+
112
+
113
+ @tool(description="""
114
+ Splits text into chunks using LangChain's CharacterTextSplitter.
115
+ Args:
116
+ text: A string of text to split.
117
+ Returns:
118
+ List[str]: a list of split text
119
+ """)
120
+ def text_splitter(text: str) -> List[str]:
121
+ splitter = CharacterTextSplitter(chunk_size=450, chunk_overlap=10)
122
+ return splitter.split_text(text)
123
+
124
+
125
+ @tool(description="""
126
+ First download the file, then read its content
127
+ Args:
128
+ dir: the task_id
129
+ Returns:
130
+ str: the file content
131
+ """)
132
+ def read_file(task_id: str) -> str:
133
+ file_url = f'{DEFAULT_API_URL}/files/{task_id}'
134
+ r = requests.get(file_url, timeout=15, allow_redirects=True)
135
+ with open('temp', "wb") as fp:
136
+ fp.write(r.content)
137
+ with open('temp') as f:
138
+ return f.read()
139
+
140
+
141
+ @tool(description="""
142
+ First download the excel file, then read its content
143
+ Args:
144
+ task_id: the task_id
145
+ Returns:
146
+ str: the content of excel file
147
+ """)
148
+ def excel_read(task_id: str) -> str:
149
+ try:
150
+ file_url = f'{DEFAULT_API_URL}/files/{task_id}'
151
+ r = requests.get(file_url, timeout=15, allow_redirects=True)
152
+ with open('temp.xlsx', "wb") as fp:
153
+ fp.write(r.content)
154
+ # Read the Excel file
155
+ df = pd.read_excel('temp.xlsx')
156
+ # Run various analyses based on the query
157
+ result = (
158
+ f"Excel file loaded with {len(df)} rows and {len(df.columns)} columns.\n"
159
+ )
160
+ result += f"Columns: {', '.join(df.columns)}\n\n"
161
+ # Add summary statistics
162
+ result += "Summary statistics:\n"
163
+ result += str(df.describe())
164
+ return result
165
+ except Exception as e:
166
+ return f"Error analyzing Excel file: {str(e)}"
167
+
168
+
169
+ @tool(description="""
170
+ First download the csv file, then read its content
171
+ Args:
172
+ dir: the task_id
173
+ Returns:
174
+ str: the content of csv file
175
+ """)
176
+ def csv_read(task_id: str) -> str:
177
+ try:
178
+ file_url = f'{DEFAULT_API_URL}/files/{task_id}'
179
+ r = requests.get(file_url, timeout=15, allow_redirects=True)
180
+ with open('temp.csv', "wb") as fp:
181
+ fp.write(r.content)
182
+ # Read the CSV file
183
+ df = pd.read_csv('temp.csv')
184
+ # Run various analyses based on the query
185
+ result = (
186
+ f"Excel file loaded with {len(df)} rows and {len(df.columns)} columns.\n"
187
+ )
188
+ result += f"Columns: {', '.join(df.columns)}\n\n"
189
+ # Add summary statistics
190
+ result += "Summary statistics:\n"
191
+ result += str(df.describe())
192
+ return result
193
+ except Exception as e:
194
+ return f"Error analyzing CSV file: {str(e)}"
195
+
196
+
197
+ @tool(description="""
198
+ Understand the content of the provided image
199
+ Args:
200
+ dir: the image url link
201
+ Returns:
202
+ str: the image caption
203
+ """)
204
+ def image_caption(task_id: str) -> str:
205
+ file_url = f'{DEFAULT_API_URL}/files/{task_id}'
206
+ loader = ImageCaptionLoader(images=[file_url])
207
+ metadata = loader.load()
208
+ return metadata[0].page_content
209
+
210
+
211
+ @tool(description="""
212
+ Analyzes a YouTube video from the provided URL and returns an answer
213
+ to the given question based on the analysis results.
214
+ Args:
215
+ youtube_url (str): The URL of the YouTube video, in the format
216
+ "https://www.youtube.com/...".
217
+ question (str): A question related to the content of the video.
218
+ Returns:
219
+ str: An answer to the question based on the video's content.
220
+ """)
221
+ def youtube_search(youtube_url: str, question: str) -> str:
222
+ client = genai.Client(api_key=os.getenv("GOOGLE_API_KEY"))
223
+ response = client.models.generate_content(
224
+ model='models/gemini-2.5-flash',
225
+ contents=types.Content(
226
+ parts=[
227
+ types.Part(
228
+ file_data=types.FileData(file_uri=youtube_url)
229
+ ),
230
+ types.Part(text=question)
231
+ ]
232
+ )
233
+ )
234
+ return response.text
235
+
236
+
237
+ @tool(description=
238
+ """Search Arxiv for a query and return maximum 3 result.
239
+ Args:
240
+ query: The search query.""")
241
+ def arvix_search(query: str) -> str:
242
+ search_docs = ArxivLoader(query=query, load_max_docs=3).load()
243
+ formatted_search_docs = "\n\n---\n\n".join(
244
+ [
245
+ f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
246
+ for doc in search_docs
247
+ ])
248
+ return formatted_search_docs
249
+
250
+
251
+ @tool(description= """
252
+ First download the mp3 file, then listen to it
253
+
254
+ Args:
255
+ dir: the task_id
256
+
257
+ Returns:
258
+ str: the content of mp3 file
259
+ """)
260
+ def whisper_transcribe_api(task_id: str) -> str:
261
+ openai.api_key = os.getenv("OPENAI_API_KEY")
262
+ file_url = f'{DEFAULT_API_URL}/files/{task_id}'
263
+
264
+ try:
265
+ r = requests.get(file_url, timeout=15, allow_redirects=True)
266
+ temp_path = 'temp.mp3'
267
+ with open(temp_path, "wb") as fp:
268
+ fp.write(r.content)
269
+ with open(temp_path, "rb") as audio_file:
270
+ transcript = openai.audio.transcriptions.create(
271
+ file=audio_file,
272
+ model="whisper-1"
273
+ )
274
+ return transcript.text
275
+ except Exception as e:
276
+ return f"Error transcribing audio: {e}"
277
+
278
+
279
+ @tool(description="""
280
+ Execute Python code from a file identified by task_id and file_name.
281
+ Returns the numeric result if defined, otherwise stdout.
282
+ """)
283
+ def run_python_file(task_id: str, file_name: str) -> str:
284
+ file_path = file_name
285
+ buffer = io.StringIO()
286
+ old_stdout = sys.stdout
287
+ ns = {"__builtins__": __builtins__, "__name__": "__main__"}
288
+ try:
289
+ file_url = f"{DEFAULT_API_URL}/files/{task_id}"
290
+ r = requests.get(file_url, timeout=15, allow_redirects=True)
291
+ if r.status_code != 200:
292
+ return f"❌ Failed to download file: {r.status_code}"
293
+
294
+ with open(file_path, "wb") as f:
295
+ f.write(r.content)
296
+
297
+ with open(file_path, "r", encoding="utf-8", errors="replace") as f:
298
+ code = f.read()
299
+
300
+ sys.stdout = buffer
301
+ try:
302
+ compiled = compile(code, file_path, "exec")
303
+ exec(compiled, ns, ns)
304
+ finally:
305
+ sys.stdout = old_stdout
306
+
307
+ if "result" in ns:
308
+ return str(ns["result"])
309
+ else:
310
+ output = buffer.getvalue().strip()
311
+ return output or "No output produced."
312
+
313
+ except Exception as e:
314
+ # Prefer returning a computed result or any partial stdout if available
315
+ try:
316
+ sys.stdout = old_stdout
317
+ except Exception:
318
+ pass
319
+ if "result" in ns:
320
+ return str(ns["result"])
321
+ output = buffer.getvalue().strip()
322
+ if output:
323
+ return output
324
+ return f"❌ Error executing Python file: {e}"
325
+ finally:
326
+ # Ensure the downloaded code file is removed after execution
327
+ try:
328
+ if os.path.exists(file_path):
329
+ os.remove(file_path)
330
+ except Exception:
331
+ pass
332
+
333
+
334
+
335
+ TOOLS: List[Callable[..., Any]] = [
336
+ multiply,
337
+ add,
338
+ subtract,
339
+ divide,
340
+ modulus,
341
+ duckduck_websearch,
342
+ arvix_search,
343
+ wiki_search,
344
+ visit_webpage,
345
+ youtube_search,
346
+ text_splitter,
347
+ read_file,
348
+ excel_read,
349
+ csv_read,
350
+ image_caption,
351
+ whisper_transcribe_api,
352
+ run_python_file
353
+ ]