sampsong commited on
Commit
d74a4bd
·
1 Parent(s): 535fa9a

add tool to read file and run python code

Browse files
Files changed (3) hide show
  1. Agents/agent.py +6 -2
  2. Tools/tools.py +132 -1
  3. requirements.txt +2 -1
Agents/agent.py CHANGED
@@ -19,7 +19,7 @@ from langchain_core.messages import (
19
  convert_to_messages,
20
  )
21
  from pydantic import BaseModel
22
- from Tools.tools import webSearch, youtubeVideoTranscript, arxivSearch, wikiSearch,add,multiply,divide,substract,modulus,power,count_substring
23
  from langchain_core.messages import SystemMessage, HumanMessage
24
  from dotenv import load_dotenv
25
  from supabase.client import Client, create_client
@@ -127,7 +127,11 @@ tools = [
127
  modulus,
128
  power,
129
  count_substring,
130
- youtubeVideoTranscript
 
 
 
 
131
  ]
132
 
133
  def tools_condition1(
 
19
  convert_to_messages,
20
  )
21
  from pydantic import BaseModel
22
+ from Tools.tools import webSearch, youtubeVideoTranscript, arxivSearch, wikiSearch,add,multiply,divide,substract,modulus,power,count_substring,run_python_code_from_url,read_excel_from_url,transcribe_audio,read_image_file
23
  from langchain_core.messages import SystemMessage, HumanMessage
24
  from dotenv import load_dotenv
25
  from supabase.client import Client, create_client
 
127
  modulus,
128
  power,
129
  count_substring,
130
+ youtubeVideoTranscript,
131
+ run_python_code_from_url,
132
+ read_excel_from_url,
133
+ transcribe_audio,
134
+ read_image_file,
135
  ]
136
 
137
  def tools_condition1(
Tools/tools.py CHANGED
@@ -8,6 +8,17 @@ from langchain.tools.retriever import create_retriever_tool
8
  from langchain_community.document_loaders import YoutubeLoader
9
  from langchain_community.document_loaders.youtube import TranscriptFormat
10
  from langchain_tavily import TavilySearch
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  @tool
13
  def add(a: int, b:int) -> int:
@@ -152,5 +163,125 @@ def count_substring(substring:str, text:str) -> int:
152
  """
153
  return text.count(substring)
154
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
 
156
-
 
 
 
 
 
 
 
8
  from langchain_community.document_loaders import YoutubeLoader
9
  from langchain_community.document_loaders.youtube import TranscriptFormat
10
  from langchain_tavily import TavilySearch
11
+ import base64
12
+ from groq import Groq
13
+ import os
14
+ import pandas as pd
15
+ import requests
16
+ from io import BytesIO
17
+ import sys
18
+ import io
19
+ import traceback
20
+
21
+ gaiaValidationURL = os.getenv("GaiaValidationURL")
22
 
23
  @tool
24
  def add(a: int, b:int) -> int:
 
163
  """
164
  return text.count(substring)
165
 
166
+ @tool
167
+ def read_image_file(file_path: str) -> dict:
168
+ """
169
+ Reads an image file and returns a dict containing base64-encoded image data.
170
+ This can be passed to a Groq vision-enabled LLM message.
171
+
172
+ Args:
173
+ file_path (str): Path to the image file.
174
+
175
+ Returns:
176
+ dict: {"type": "image_url", "image_url": {"url": "data:image/<ext>;base64,<data>"}}
177
+ """
178
+ try:
179
+ formattedFilePath = gaiaValidationURL+ file_path.lower()
180
+ print(f"excel url {formattedFilePath}")
181
+
182
+ ext = formattedFilePath.lower().lstrip(".")
183
+ if ext not in ["jpg", "jpeg", "png", "webp"]:
184
+ return {"error": f"Unsupported image format: {ext}"}
185
+
186
+ with open(formattedFilePath, "rb") as f:
187
+ b64_data = base64.b64encode(f.read()).decode("utf-8")
188
+
189
+ return {
190
+ "type": "image_url",
191
+ "image_url": {
192
+ "url": f"data:image/{ext};base64,{b64_data}"
193
+ }
194
+ }
195
+ except Exception as e:
196
+ return {"error": str(e)}
197
+
198
+ @tool
199
+ def transcribe_audio(file_path: str) -> str:
200
+ """
201
+ Transcribes an audio file (mp3, wav, m4a, etc.) using Groq Whisper model.
202
+
203
+ Args:
204
+ file_path (str): Path to the audio file.
205
+
206
+ Returns:
207
+ str: Transcribed text from the audio.
208
+ """
209
+ try:
210
+ formattedFilePath = gaiaValidationURL+ file_path.lower()
211
+ print(f"excel url {formattedFilePath}")
212
+
213
+ ext = formattedFilePath.lower().lstrip(".")
214
+ if ext not in ["mp3", "wav", "m4a"]:
215
+ return {"error": f"Unsupported image format: {ext}"}
216
+
217
+ client = Groq(api_key=os.getenv("GROQ_API_KEY"))
218
+
219
+ with open(formattedFilePath, "rb") as f:
220
+ transcription = client.audio.transcriptions.create(
221
+ model="whisper-large-v3",
222
+ file=f
223
+ )
224
+
225
+ return transcription.text
226
+ except Exception as e:
227
+ return f"Error transcribing audio: {str(e)}"
228
+
229
+ @tool
230
+ def read_excel_from_url(url: str, as_json: bool = False) -> str:
231
+ """
232
+ Downloads an Excel file from a URL and returns its contents as text or JSON.
233
+
234
+ Args:
235
+ url (str): URL to the Excel file.
236
+ as_json (bool): If True, return JSON string; otherwise plain text.
237
+
238
+ Returns:
239
+ str: Contents of the Excel file as text or JSON.
240
+ """
241
+ try:
242
+ response = requests.get(gaiaValidationURL + url)
243
+ print(f"excel url {gaiaValidationURL + url}")
244
+ response.raise_for_status()
245
+ data = BytesIO(response.content)
246
+ df = pd.read_excel(data)
247
+
248
+ if as_json:
249
+ return df.to_json(orient="records")
250
+ else:
251
+ return df.to_string(index=False)
252
+ except Exception as e:
253
+ return f"Error reading Excel file from URL: {str(e)}"
254
+
255
+ @tool
256
+ def run_python_code_from_url(url: str) -> str:
257
+ """
258
+ Downloads Python code from a URL, executes it, and returns the output or errors.
259
+
260
+ Args:
261
+ url (str): URL of the Python code to execute.
262
+
263
+ Returns:
264
+ str: Captured output or error traceback.
265
+ """
266
+ try:
267
+ formattedURL = gaiaValidationURL + url
268
+ response = requests.get(formattedURL)
269
+ print(f"pythonurl : {formattedURL}")
270
+ response.raise_for_status()
271
+ code = response.text
272
+
273
+ old_stdout = sys.stdout
274
+ sys.stdout = mystdout = io.StringIO()
275
+
276
+ exec_globals = {}
277
+ exec(code, exec_globals)
278
+
279
+ sys.stdout = old_stdout
280
 
281
+ output = mystdout.getvalue()
282
+ if not output.strip():
283
+ output = "Code executed successfully with no output."
284
+ return output
285
+ except Exception:
286
+ sys.stdout = old_stdout
287
+ return "Error executing code:\n" + traceback.format_exc()
requirements.txt CHANGED
@@ -21,4 +21,5 @@ pytesseract
21
  matplotlib
22
  langfuse
23
  typing
24
- pydantic
 
 
21
  matplotlib
22
  langfuse
23
  typing
24
+ pydantic
25
+ groq