akrstova commited on
Commit
bb4ec09
·
1 Parent(s): 32aa30b

Add more tools

Browse files
Files changed (5) hide show
  1. agent.py +5 -2
  2. pyproject.toml +1 -0
  3. requirements.txt +2 -1
  4. tools/file_tools.py +114 -2
  5. uv.lock +15 -0
agent.py CHANGED
@@ -14,7 +14,7 @@ from supabase.client import Client, create_client
14
  from tools.math_tools import add, subtract, multiply, divide, modulus, power, sqrt
15
  from tools.search_tools import search_wikipedia, web_search, arxiv_search
16
  from tools.image_video_tools import query_image
17
- from tools.file_tools import analyze_excel_file, execute_python_code
18
 
19
  system_prompt = Path("system_prompt.txt").read_text()
20
 
@@ -43,7 +43,10 @@ def build_graph():
43
  max_retries=2,
44
  google_api_key=os.getenv("GOOGLE_API_KEY") # Get API key from environment variable
45
  )
46
- tools = [add, subtract, multiply, divide, modulus, power, sqrt, web_search, arxiv_search, search_wikipedia, query_image, analyze_excel_file, execute_python_code]
 
 
 
47
 
48
  llm_with_tools = llm.bind_tools(tools)
49
 
 
14
  from tools.math_tools import add, subtract, multiply, divide, modulus, power, sqrt
15
  from tools.search_tools import search_wikipedia, web_search, arxiv_search
16
  from tools.image_video_tools import query_image
17
+ from tools.file_tools import analyze_excel_file, execute_python_code, analyze_csv_file, save_and_read_file, download_file_from_url, extract_text_from_image
18
 
19
  system_prompt = Path("system_prompt.txt").read_text()
20
 
 
43
  max_retries=2,
44
  google_api_key=os.getenv("GOOGLE_API_KEY") # Get API key from environment variable
45
  )
46
+ tools = [add, subtract, multiply, divide, modulus, power, sqrt,
47
+ web_search, arxiv_search, search_wikipedia,
48
+ query_image,
49
+ analyze_excel_file, execute_python_code, analyze_csv_file, save_and_read_file, download_file_from_url, extract_text_from_image]
50
 
51
  llm_with_tools = llm.bind_tools(tools)
52
 
pyproject.toml CHANGED
@@ -15,6 +15,7 @@ dependencies = [
15
  "langgraph>=0.4.3",
16
  "openai-whisper>=20240930",
17
  "pandas>=2.2.3",
 
18
  "requests>=2.32.3",
19
  "supabase>=2.15.1",
20
  "torch==2.2.2",
 
15
  "langgraph>=0.4.3",
16
  "openai-whisper>=20240930",
17
  "pandas>=2.2.3",
18
+ "pytesseract>=0.3.13",
19
  "requests>=2.32.3",
20
  "supabase>=2.15.1",
21
  "torch==2.2.2",
requirements.txt CHANGED
@@ -18,4 +18,5 @@ pymupdf
18
  wikipedia
19
  pgvector
20
  python-dotenv
21
- openai-whisper
 
 
18
  wikipedia
19
  pgvector
20
  python-dotenv
21
+ openai-whisper
22
+ pytesseract
tools/file_tools.py CHANGED
@@ -1,20 +1,29 @@
1
  import io
2
  import os
 
 
 
 
3
  import pandas as pd
4
  import contextlib
5
  from langchain_core.tools import tool
6
  from langchain_google_genai import ChatGoogleGenerativeAI
 
7
  import whisper
 
 
 
8
 
9
  # Load Whisper model once
10
  whisper_model = whisper.load_model("base") # or "small", "medium", "large"
11
 
12
  @tool
13
- def analyze_excel_file(file_path: str) -> str:
14
  """
15
- Provides summary statistics for an Excel file.
16
  Args:
17
  file_path (str): the path to the Excel file.
 
18
  """
19
  try:
20
  # Read the Excel file
@@ -36,6 +45,7 @@ def analyze_excel_file(file_path: str) -> str:
36
  return f"Error analyzing Excel file: {str(e)}"
37
 
38
 
 
39
  @tool
40
  def process_mp3_file(file_path: str, query: str) -> str:
41
  """
@@ -97,3 +107,105 @@ def execute_python_code(code: str) -> str:
97
  return "Code executed successfully, but produced no output."
98
  except Exception as e:
99
  return f"Error executing code: {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import io
2
  import os
3
+ import tempfile
4
+ from typing import Optional
5
+ from urllib.parse import urlparse
6
+ import uuid
7
  import pandas as pd
8
  import contextlib
9
  from langchain_core.tools import tool
10
  from langchain_google_genai import ChatGoogleGenerativeAI
11
+ import requests
12
  import whisper
13
+ from PIL import Image
14
+ import pytesseract
15
+
16
 
17
  # Load Whisper model once
18
  whisper_model = whisper.load_model("base") # or "small", "medium", "large"
19
 
20
  @tool
21
+ def analyze_excel_file(file_path: str, query: str) -> str:
22
  """
23
+ Analyze an Excel file using pandas and answer a question about it.
24
  Args:
25
  file_path (str): the path to the Excel file.
26
+ query (str): Question about the data
27
  """
28
  try:
29
  # Read the Excel file
 
45
  return f"Error analyzing Excel file: {str(e)}"
46
 
47
 
48
+
49
  @tool
50
  def process_mp3_file(file_path: str, query: str) -> str:
51
  """
 
107
  return "Code executed successfully, but produced no output."
108
  except Exception as e:
109
  return f"Error executing code: {e}"
110
+
111
+
112
+
113
+ @tool
114
+ def save_and_read_file(content: str, filename: Optional[str] = None) -> str:
115
+ """
116
+ Save content to a file and return the path.
117
+ Args:
118
+ content (str): the content to save to the file
119
+ filename (str, optional): the name of the file. If not provided, a random name file will be created.
120
+ """
121
+ temp_dir = tempfile.gettempdir()
122
+ if filename is None:
123
+ temp_file = tempfile.NamedTemporaryFile(delete=False, dir=temp_dir)
124
+ filepath = temp_file.name
125
+ else:
126
+ filepath = os.path.join(temp_dir, filename)
127
+
128
+ with open(filepath, "w") as f:
129
+ f.write(content)
130
+
131
+ return f"File saved to {filepath}. You can read this file to process its contents."
132
+
133
+
134
+ @tool
135
+ def download_file_from_url(url: str, filename: Optional[str] = None) -> str:
136
+ """
137
+ Download a file from a URL and save it to a temporary location.
138
+ Args:
139
+ url (str): the URL of the file to download.
140
+ filename (str, optional): the name of the file. If not provided, a random name file will be created.
141
+ """
142
+ try:
143
+ # Parse URL to get filename if not provided
144
+ if not filename:
145
+ path = urlparse(url).path
146
+ filename = os.path.basename(path)
147
+ if not filename:
148
+ filename = f"downloaded_{uuid.uuid4().hex[:8]}"
149
+
150
+ # Create temporary file
151
+ temp_dir = tempfile.gettempdir()
152
+ filepath = os.path.join(temp_dir, filename)
153
+
154
+ # Download the file
155
+ response = requests.get(url, stream=True)
156
+
157
+ response.raise_for_status()
158
+
159
+ # Save the file
160
+ with open(filepath, "wb") as f:
161
+ for chunk in response.iter_content(chunk_size=8192):
162
+ f.write(chunk)
163
+
164
+ return f"File downloaded to {filepath}. You can read this file to process its contents."
165
+ except Exception as e:
166
+ return f"Error downloading file: {str(e)}"
167
+
168
+
169
+ @tool
170
+ def extract_text_from_image(image_path: str) -> str:
171
+ """
172
+ Extract text from an image using OCR library pytesseract (if available).
173
+ Args:
174
+ image_path (str): the path to the image file.
175
+ """
176
+ try:
177
+ # Open the image
178
+ image = Image.open(image_path)
179
+
180
+ # Extract text from the image
181
+ text = pytesseract.image_to_string(image)
182
+
183
+ return f"Extracted text from image:\n\n{text}"
184
+ except Exception as e:
185
+ return f"Error extracting text from image: {str(e)}"
186
+
187
+
188
+ @tool
189
+ def analyze_csv_file(file_path: str, query: str) -> str:
190
+ """
191
+ Analyze a CSV file using pandas and answer a question about it.
192
+ Args:
193
+ file_path (str): the path to the CSV file.
194
+ query (str): Question about the data
195
+ """
196
+ try:
197
+ # Read the CSV file
198
+ df = pd.read_csv(file_path)
199
+
200
+ # Run various analyses based on the query
201
+ result = f"CSV file loaded with {len(df)} rows and {len(df.columns)} columns.\n"
202
+ result += f"Columns: {', '.join(df.columns)}\n\n"
203
+
204
+ # Add summary statistics
205
+ result += "Summary statistics:\n"
206
+ result += str(df.describe())
207
+
208
+ return result
209
+
210
+ except Exception as e:
211
+ return f"Error analyzing CSV file: {str(e)}"
uv.lock CHANGED
@@ -370,6 +370,7 @@ dependencies = [
370
  { name = "langgraph" },
371
  { name = "openai-whisper" },
372
  { name = "pandas" },
 
373
  { name = "requests" },
374
  { name = "supabase" },
375
  { name = "torch" },
@@ -388,6 +389,7 @@ requires-dist = [
388
  { name = "langgraph", specifier = ">=0.4.3" },
389
  { name = "openai-whisper", specifier = ">=20240930" },
390
  { name = "pandas", specifier = ">=2.2.3" },
 
391
  { name = "requests", specifier = ">=2.32.3" },
392
  { name = "supabase", specifier = ">=2.15.1" },
393
  { name = "torch", specifier = "==2.2.2" },
@@ -1782,6 +1784,19 @@ wheels = [
1782
  { url = "https://files.pythonhosted.org/packages/61/ad/689f02752eeec26aed679477e80e632ef1b682313be70793d798c1d5fc8f/PyJWT-2.10.1-py3-none-any.whl", hash = "sha256:dcdd193e30abefd5debf142f9adfcdd2b58004e644f25406ffaebd50bd98dacb", size = 22997 },
1783
  ]
1784
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1785
  [[package]]
1786
  name = "pytest"
1787
  version = "8.3.5"
 
370
  { name = "langgraph" },
371
  { name = "openai-whisper" },
372
  { name = "pandas" },
373
+ { name = "pytesseract" },
374
  { name = "requests" },
375
  { name = "supabase" },
376
  { name = "torch" },
 
389
  { name = "langgraph", specifier = ">=0.4.3" },
390
  { name = "openai-whisper", specifier = ">=20240930" },
391
  { name = "pandas", specifier = ">=2.2.3" },
392
+ { name = "pytesseract", specifier = ">=0.3.13" },
393
  { name = "requests", specifier = ">=2.32.3" },
394
  { name = "supabase", specifier = ">=2.15.1" },
395
  { name = "torch", specifier = "==2.2.2" },
 
1784
  { url = "https://files.pythonhosted.org/packages/61/ad/689f02752eeec26aed679477e80e632ef1b682313be70793d798c1d5fc8f/PyJWT-2.10.1-py3-none-any.whl", hash = "sha256:dcdd193e30abefd5debf142f9adfcdd2b58004e644f25406ffaebd50bd98dacb", size = 22997 },
1785
  ]
1786
 
1787
+ [[package]]
1788
+ name = "pytesseract"
1789
+ version = "0.3.13"
1790
+ source = { registry = "https://pypi.org/simple" }
1791
+ dependencies = [
1792
+ { name = "packaging" },
1793
+ { name = "pillow" },
1794
+ ]
1795
+ sdist = { url = "https://files.pythonhosted.org/packages/9f/a6/7d679b83c285974a7cb94d739b461fa7e7a9b17a3abfd7bf6cbc5c2394b0/pytesseract-0.3.13.tar.gz", hash = "sha256:4bf5f880c99406f52a3cfc2633e42d9dc67615e69d8a509d74867d3baddb5db9", size = 17689 }
1796
+ wheels = [
1797
+ { url = "https://files.pythonhosted.org/packages/7a/33/8312d7ce74670c9d39a532b2c246a853861120486be9443eebf048043637/pytesseract-0.3.13-py3-none-any.whl", hash = "sha256:7a99c6c2ac598360693d83a416e36e0b33a67638bb9d77fdcac094a3589d4b34", size = 14705 },
1798
+ ]
1799
+
1800
  [[package]]
1801
  name = "pytest"
1802
  version = "8.3.5"