Prasanthkumar commited on
Commit
f64893d
·
verified ·
1 Parent(s): 2dd54e3

Upload 4 files

Browse files
tools/Web_Search_tools.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from supabase.client import Client, create_client
3
+ from langchain_core.tools import tool
4
+ from langchain_community.tools.tavily_search import TavilySearchResults
5
+ from langchain_community.document_loaders import WikipediaLoader
6
+ from langchain_community.document_loaders import ArxivLoader
7
+ from langchain_huggingface import HuggingFaceEmbeddings
8
+ from langchain_community.vectorstores import SupabaseVectorStore
9
+ from langchain.tools.retriever import create_retriever_tool
10
+
11
+
12
+ @tool
13
+ def wiki_search(query: str) -> str:
14
+ """Search Wikipedia for a query and return maximum 2 results.
15
+
16
+ Args:
17
+ query: The search query."""
18
+ search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
19
+ formatted_search_docs = "\n\n---\n\n".join([f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>' for doc in search_docs])
20
+ return {"wiki_results": formatted_search_docs}
21
+
22
+ @tool
23
+ def web_search(query: str) -> str:
24
+ """Search Tavily for a query and return maximum 3 results.
25
+
26
+ Args:
27
+ query: The search query."""
28
+ search_docs = TavilySearchResults(max_results=3).invoke(query=query)
29
+ formatted_search_docs = "\n\n---\n\n".join([f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>' for doc in search_docs])
30
+ return {"web_results": formatted_search_docs}
31
+
32
+ @tool
33
+ def arxiv_search(query: str) -> str:
34
+ """Search Arxiv for a query and return maximum 3 result.
35
+
36
+ Args:
37
+ query: The search query."""
38
+ search_docs = ArxivLoader(query=query, load_max_docs=3).load()
39
+ formatted_search_docs = "\n\n---\n\n".join([f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>' for doc in search_docs])
40
+ return {"arxiv_results": formatted_search_docs}
41
+
42
+ @tool
43
+ def similar_question_search(question: str) -> str:
44
+ """Search the vector database for similar questions and return the first results.
45
+
46
+ Args:
47
+ question: the question human provided."""
48
+ matched_docs = vector_store.similarity_search(question, 3)
49
+ formatted_search_docs = "\n\n---\n\n".join([f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>' for doc in matched_docs])
50
+ return {"similar_questions": formatted_search_docs}
tools/calculator.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_core.tools import tool
2
+ import cmath
3
+ import math
4
+
5
+ @tool
6
+ def add(a: int, b: int) -> int:
7
+ """
8
+ Adds two numbers.
9
+ Args:
10
+ a (integer): the first number
11
+ b (integer): the second number
12
+ """
13
+ return a + b
14
+
15
+ @tool
16
+ def sub(a: int, b: int) -> int:
17
+ """
18
+ Subracts two numbers.
19
+ Args:
20
+ a (integer): the first number
21
+ b (integer): the second number
22
+ """
23
+ return a - b
24
+
25
+ @tool
26
+ def mul(a: int, b: int) -> int:
27
+ """
28
+ multiplies two numbers.
29
+ Args:
30
+ a (integer): the first number
31
+ b (integer): the second number
32
+ """
33
+ return a * b
34
+
35
+ @tool
36
+ def div(a: int, b: int) -> float:
37
+ """
38
+ divides two numbers and gave float as a result
39
+ Args:
40
+ a (integer): the first number
41
+ b (integer): the second number
42
+ """
43
+ return a / b
44
+
45
+ @tool
46
+ def floor_div(a: int, b: int) -> int:
47
+ """
48
+ divides two numbers and gave integr as a result
49
+ Args:
50
+ a (integer): the first number
51
+ b (integer): the second number
52
+ """
53
+ return a // b
54
+
55
+ @tool
56
+ def square(a: int) -> int:
57
+ """
58
+ returns square of the number
59
+ Args:
60
+ a (integer): the number
61
+ """
62
+ return a * a
63
+
64
+ @tool
65
+ def mod(a: int, b: int) -> int:
66
+ """
67
+ Modulus of two numbers.
68
+ Args:
69
+ a (integer): the first number
70
+ b (integer): the second number
71
+ """
72
+ return a % b
73
+
74
+ @tool
75
+ def pow(a: int, b: int) -> int:
76
+ """
77
+ Get the power of two numbers
78
+ Args:
79
+ a (integer): the first number
80
+ b (integer): the second number
81
+ """
82
+ return a ** b
83
+
84
+ @tool
85
+ def square_root(a: int):
86
+ """
87
+ Square root of the number
88
+ Args:
89
+ a (integer): the number
90
+ """
91
+ if a < 0:
92
+ return cmath.sqrt(a)
93
+ else:
94
+ return a ** 0.5
95
+
96
+ @tool
97
+ def absolute(a: int) -> int:
98
+ """
99
+ returns absolute value of the number
100
+ Args:
101
+ a (integer): the number
102
+ """
103
+ return a if a >= 0 else -a
104
+
105
+ @tool
106
+ def gcd(a: int, b: int) -> int:
107
+ """
108
+ returns gcd of two numbers using recursion
109
+ Args:
110
+ a (integer): the first number
111
+ b (integer): the second number
112
+ """
113
+ if b == 0:
114
+ return a
115
+ return gcd(b, a % b)
116
+
117
+ @tool
118
+ def lcm(a: int, b: int) -> int:
119
+ """
120
+ returns lcm of two numbers
121
+ Args:
122
+ a (integer): the first number
123
+ b (integer): the second number
124
+ """
125
+ return absolute(a * b) // gcd(a, b)
126
+
127
+ @tool
128
+ def factorial(a: int) -> int:
129
+ """
130
+ returns factorial of a number
131
+ Args:
132
+ a (integer): the number
133
+ """
134
+ if a <= 1:
135
+ return 1
136
+ return a * factorial(a - 1)
tools/code_interpreter_tools.py ADDED
@@ -0,0 +1,339 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ========================== #
2
+ # 📦 Imports and Setup
3
+ # ========================== #
4
+ import os
5
+ import io
6
+ import sys
7
+ import uuid
8
+ import base64
9
+ import traceback
10
+ import contextlib
11
+ import tempfile
12
+ import subprocess
13
+ import sqlite3
14
+ import logging
15
+ from typing import Dict, Any
16
+ import numpy as np
17
+ import pandas as pd
18
+ import matplotlib.pyplot as plt
19
+ from PIL import Image
20
+ from langchain_core.tools import tool
21
+
22
+ # ========================== #
23
+ # 📋 Logging Setup
24
+ # ========================== #
25
+ def setup_logger(log_file="execution.log"):
26
+ logger = logging.getLogger("CodeInterpreter")
27
+ logger.setLevel(logging.INFO)
28
+ if not logger.handlers:
29
+ handler = logging.FileHandler(log_file)
30
+ formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
31
+ handler.setFormatter(formatter)
32
+ logger.addHandler(handler)
33
+ return logger
34
+
35
+ logger = setup_logger()
36
+
37
+ # =================================================================== #
38
+ # Code interpreter tools for languages like Python, Java, C++, SQL and C
39
+ # =================================================================== #
40
+
41
+ class Code_Interpreter:
42
+
43
+ def __init__ (
44
+ self,
45
+ allowed_modules = None,
46
+ max_execution_time = 30,
47
+ working_directory = None
48
+ )
49
+
50
+ self.allowed_modules = allowed_modules or [
51
+ "numpy", "pandas", "matplotlib", "scipy", "sklearn", "math", "random", "statistics",
52
+ "datetime", "collections", "itertools", "functools", "operator", "re", "json", "sympy",
53
+ "networkx", "nltk", "PIL", "pytesseract", "cmath", "uuid", "tempfile", "requests", "urllib"
54
+ ]
55
+
56
+ self.max_execution_time = max_execution_time
57
+
58
+ self.working_directory = working_directory or os.path.join(os.getcwd())
59
+ if not os.path.exists(self.working_directory):
60
+ os.makedirs(self.working_directory)
61
+
62
+ self.globals = {"__builtins__": __builtins__, "np": np, "pd": pd, "plt": plt, "Image": Image}
63
+ self.temp_sqlite_db = os.path.join(tempfile.gettempdir(), "code_exec.db")
64
+
65
+ def execute_code(self, code: str, language: str = "python") -> Dict[str, Any]:
66
+ """Dispatch execution to the appropriate language handler."""
67
+
68
+ lang = langauge.lower()
69
+
70
+ execution_id = str(uuid.uuid4())
71
+ logger.info(f"[{execution_id}] Executing code in language: {lang}")
72
+
73
+ result = {
74
+ "execution_id": execution_id,
75
+ "status": "error",
76
+ "stdout": "",
77
+ "stderr": "",
78
+ "result": None,
79
+ "plots": [],
80
+ "dataframes": []
81
+ }
82
+
83
+ try:
84
+ if lang == "python":
85
+ if any(x in code for x in ["os.remove", "shutil.rmtree", "open('/etc", "__import__"]):
86
+ raise ValueError("Unsafe code detected.")
87
+ return self._execute_python(code, execution_id)
88
+ elif lang == "java":
89
+ return self._execute_java(code, execution_id)
90
+ elif lang == "c":
91
+ return self._execute_c(code, execution_id)
92
+ elif lang == "sql":
93
+ return self._execute_sql(code, execution_id)
94
+ elif lang == "bash":
95
+ return self._execute_bash(code, execution_id)
96
+ except Exception as e:
97
+ result["stderr"] = str(e)
98
+ logger.error(f"[{execution_id}] Execution error: {e}", exc_info=True)
99
+
100
+ return result
101
+
102
+ def _execute_python(self, code: str, execution_id: str) -> dict:
103
+ """Execute Python code safely with stdout/stderr capture and plot handling."""
104
+ output_buffer = io.StringIO()
105
+ error_buffer = io.StringIO()
106
+ result = {
107
+ "execution_id": execution_id,
108
+ "status": "error",
109
+ "stdout": "",
110
+ "stderr": "",
111
+ "result": None,
112
+ "plots": [],
113
+ "dataframes": []
114
+ }
115
+
116
+ try:
117
+ exec_dir = os.path.join(self.working_directory, execution_id)
118
+ os.makedirs(exec_dir, exist_ok=True)
119
+ plt.switch_backend('Agg')
120
+
121
+ with contextlib.redirect_stdout(output_buffer), contextlib.redirect_stderr(error_buffer):
122
+ exec_result = exec(code, self.globals)
123
+
124
+ # Capture plots
125
+ if plt.get_fignums():
126
+ for i, fig_num in enumerate(plt.get_fignums()):
127
+ fig = plt.figure(fig_num)
128
+ img_path = os.path.join(exec_dir, f"plot_{i}.png")
129
+ fig.savefig(img_path)
130
+ with open(img_path, "rb") as img_file:
131
+ img_data = base64.b64encode(img_file.read()).decode('utf-8')
132
+ result["plots"].append({"figure_number": fig_num, "data": img_data})
133
+
134
+ # Capture dataframes
135
+ for var_name, var_value in self.globals.items():
136
+ if isinstance(var_value, pd.DataFrame) and len(var_value) > 0:
137
+ result["dataframes"].append({
138
+ "name": var_name,
139
+ "head": var_value.head().to_dict(),
140
+ "shape": var_value.shape,
141
+ "dtypes": str(var_value.dtypes)
142
+ })
143
+
144
+ result["status"] = "success"
145
+ result["stdout"] = output_buffer.getvalue()
146
+ result["result"] = exec_result
147
+ logger.info(f"[{execution_id}] Python code executed successfully.")
148
+
149
+ except Exception as e:
150
+ result["status"] = "error"
151
+ result["stderr"] = error_buffer.getvalue() + "\n" + traceback.format_exc()
152
+ logger.error(f"[{execution_id}] Python execution failed: {e}", exc_info=True)
153
+
154
+ return result
155
+
156
+ def _execute_java(self, code: str, execution_id: str) -> dict:
157
+ temp_dir = tempfile.mkdtemp()
158
+ source_path = os.path.join(temp_dir, "Main.java")
159
+
160
+ try:
161
+ with open(source_path, "w") as f:
162
+ f.write(code)
163
+
164
+ compile_proc = subprocess.run(["javac", source_path], capture_output=True, text=True, timeout=self.max_execution_time)
165
+ if compile_proc.returncode != 0:
166
+ return {
167
+ "execution_id": execution_id,
168
+ "status": "error",
169
+ "stdout": compile_proc.stdout,
170
+ "stderr": compile_proc.stderr,
171
+ "result": None,
172
+ "plots": [],
173
+ "dataframes": []
174
+ }
175
+
176
+ run_proc = subprocess.run(["java", "-cp", temp_dir, "Main"], capture_output=True, text=True, timeout=self.max_execution_time)
177
+ return {
178
+ "execution_id": execution_id,
179
+ "status": "success" if run_proc.returncode == 0 else "error",
180
+ "stdout": run_proc.stdout,
181
+ "stderr": run_proc.stderr,
182
+ "result": None,
183
+ "plots": [],
184
+ "dataframes": []
185
+ }
186
+
187
+ except Exception as e:
188
+ return {
189
+ "execution_id": execution_id,
190
+ "status": "error",
191
+ "stdout": "",
192
+ "stderr": str(e),
193
+ "result": None,
194
+ "plots": [],
195
+ "dataframes": []
196
+ }
197
+
198
+
199
+ def _execute_c(self, code: str, execution_id: str) -> dict:
200
+ temp_dir = tempfile.mkdtemp()
201
+ source_path = os.path.join(temp_dir, "program.c")
202
+ binary_path = os.path.join(temp_dir, "program")
203
+
204
+ try:
205
+ with open(source_path, "w") as f:
206
+ f.write(code)
207
+
208
+ compile_proc = subprocess.run(["gcc", source_path, "-o", binary_path], capture_output=True, text=True, timeout=self.max_execution_time)
209
+ if compile_proc.returncode != 0:
210
+ return {
211
+ "execution_id": execution_id,
212
+ "status": "error",
213
+ "stdout": compile_proc.stdout,
214
+ "stderr": compile_proc.stderr,
215
+ "result": None,
216
+ "plots": [],
217
+ "dataframes": []
218
+ }
219
+
220
+ run_proc = subprocess.run([binary_path], capture_output=True, text=True, timeout=self.max_execution_time)
221
+ return {
222
+ "execution_id": execution_id,
223
+ "status": "success" if run_proc.returncode == 0 else "error",
224
+ "stdout": run_proc.stdout,
225
+ "stderr": run_proc.stderr,
226
+ "result": None,
227
+ "plots": [],
228
+ "dataframes": []
229
+ }
230
+
231
+ except Exception as e:
232
+ return {
233
+ "execution_id": execution_id,
234
+ "status": "error",
235
+ "stdout": "",
236
+ "stderr": str(e),
237
+ "result": None,
238
+ "plots": [],
239
+ "dataframes": []
240
+ }
241
+
242
+ def _execute_sql(self, code: str, execution_id: str) -> dict:
243
+ result = {
244
+ "execution_id": execution_id,
245
+ "status": "error",
246
+ "stdout": "",
247
+ "stderr": "",
248
+ "result": None,
249
+ "plots": [],
250
+ "dataframes": []
251
+ }
252
+ try:
253
+ conn = sqlite3.connect(self.temp_sqlite_db)
254
+ cur = conn.cursor()
255
+ cur.execute(code)
256
+ if code.strip().lower().startswith("select"):
257
+ columns = [desc[0] for desc in cur.description]
258
+ rows = cur.fetchall()
259
+ df = pd.DataFrame(rows, columns=columns)
260
+ result["dataframes"].append({
261
+ "name": "query_result",
262
+ "head": df.head().to_dict(),
263
+ "shape": df.shape,
264
+ "dtypes": str(df.dtypes)
265
+ })
266
+ else:
267
+ conn.commit()
268
+ result["status"] = "success"
269
+ result["stdout"] = "Query executed successfully."
270
+ except Exception as e:
271
+ result["stderr"] = str(e)
272
+ logger.error(f"[{execution_id}] SQL execution failed: {e}", exc_info=True)
273
+ finally:
274
+ conn.close()
275
+ return result
276
+
277
+ def _execute_bash(self, code: str, execution_id: str) -> dict:
278
+ try:
279
+ completed = subprocess.run(code, shell=True, capture_output=True, text=True, timeout=self.max_execution_time)
280
+ return {
281
+ "execution_id": execution_id,
282
+ "status": "success" if completed.returncode == 0 else "error",
283
+ "stdout": completed.stdout,
284
+ "stderr": completed.stderr,
285
+ "result": None,
286
+ "plots": [],
287
+ "dataframes": []
288
+ }
289
+ except subprocess.TimeoutExpired:
290
+ return {
291
+ "execution_id": execution_id,
292
+ "status": "error",
293
+ "stdout": "",
294
+ "stderr": "Execution timed out.",
295
+ "result": None,
296
+ "plots": [],
297
+ "dataframes": []
298
+ }
299
+
300
+ # ================================== #
301
+ # LangChain tool
302
+ # ================================== #
303
+
304
+ interpreter = Code_Interpreter()
305
+
306
+ @tool
307
+ def execute_code_multilang(code: str, language: str = "python") -> str:
308
+ """
309
+ Execute code in multiple languages (Python, Bash, SQL, C, Java) and return results.
310
+ Args:
311
+ code (str): the source code to execute
312
+ language (str): the language of the code
313
+ """
314
+ result = interpreter_instance.execute_code(code, language)
315
+ response = []
316
+
317
+ if result["status"] == "success":
318
+ response.append(f"✅ Code executed successfully in **{language.upper()}**")
319
+
320
+ if result.get("stdout"):
321
+ response.append("\n**Standard Output:**\n```\n" + result["stdout"].strip() + "\n```")
322
+
323
+ if result.get("stderr"):
324
+ response.append("\n**Standard Error (if any):**\n```\n" + result["stderr"].strip() + "\n```")
325
+
326
+ if result.get("dataframes"):
327
+ for df in result["dataframes"]:
328
+ preview = pd.DataFrame(df["head"])
329
+ response.append(f"\n**DataFrame `{df['name']}` (Shape: {df['shape']})**\n```\n{preview}\n```")
330
+
331
+ if result.get("plots"):
332
+ response.append(f"\n🖼️ {len(result['plots'])} plot(s) generated (encoded)")
333
+
334
+ else:
335
+ response.append(f"❌ Code execution failed in **{language.upper()}**")
336
+ if result.get("stderr"):
337
+ response.append("\n**Error Log:**\n```\n" + result["stderr"].strip() + "\n```")
338
+
339
+ return "\n".join(response)
tools/document_parser.py ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import uuid
3
+ import requests
4
+ import tempfile
5
+ from PIL import Image
6
+ import pytesseract
7
+ import pandas as pd
8
+ from urllib.parse import urlparse
9
+ from langchain_core.tools import tool
10
+ from typing import Optional
11
+ import logging
12
+ import pandasql as psql
13
+
14
+ # ------------------- 🔧 Logger Setup -------------------
15
+ def setup_logger():
16
+ logger = logging.getLogger("FileToolLogger")
17
+ logger.setLevel(logging.INFO)
18
+ if not logger.handlers:
19
+ handler = logging.StreamHandler()
20
+ formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
21
+ handler.setFormatter(formatter)
22
+ logger.addHandler(handler)
23
+ return logger
24
+
25
+ logger = setup_logger()
26
+
27
+ # ------------------- 📄 Save Content to File -------------------
28
+ @tool
29
+ def save_and_read_file(content: str, filename: Optional[str] = None) -> str:
30
+ """
31
+ Save content to a file and return the path.
32
+ Args:
33
+ content (str): the content to save to the file
34
+ filename (str, optional): the name of the file. If not provided, a random name file will be created.
35
+ """
36
+ temp_dir = tempfile.gettempdir()
37
+ if filename is None:
38
+ temp_file = tempfile.NamedTemporaryFile(delete=False, dir=temp_dir)
39
+ filepath = temp_file.name
40
+ else:
41
+ filepath = os.path.join(temp_dir, filename)
42
+
43
+ with open(filepath, "w") as f:
44
+ f.write(content)
45
+
46
+ return f"File saved to {filepath}. You can read this file to process its contents."
47
+
48
+ # ------------------- 📄 Save Content to File -------------------
49
+
50
+ @tool
51
+ def download_file_from_url(url: str, filename: Optional[str] = None) -> str:
52
+ """
53
+ Download a file from a URL and save it to a temporary location.
54
+ Args:
55
+ url (str): the URL of the file to download.
56
+ filename (str, optional): the name of the file. If not provided, a random name file will be created.
57
+ """
58
+ try:
59
+ # Parse URL to get filename if not provided
60
+ if not filename:
61
+ path = urlparse(url).path
62
+ filename = os.path.basename(path)
63
+ if not filename:
64
+ filename = f"downloaded_{uuid.uuid4().hex[:8]}"
65
+
66
+ # Create temporary file
67
+ temp_dir = tempfile.gettempdir()
68
+ filepath = os.path.join(temp_dir, filename)
69
+
70
+ # Download the file
71
+ response = requests.get(url, stream=True)
72
+ response.raise_for_status()
73
+
74
+ # Save the file
75
+ with open(filepath, "wb") as f:
76
+ for chunk in response.iter_content(chunk_size=8192):
77
+ f.write(chunk)
78
+
79
+ return f"File downloaded to {filepath}. You can read this file to process its contents."
80
+ except Exception as e:
81
+ return f"Error downloading file: {str(e)}"
82
+
83
+ @tool
84
+ def extract_text_from_image(image_path: str) -> str:
85
+ """
86
+ Extract text from an image using OCR library pytesseract (if available).
87
+ Args:
88
+ image_path (str): the path to the image file.
89
+ """
90
+ try:
91
+
92
+ # Open the image
93
+ image = Image.open(image_path)
94
+
95
+ # Extract text from the image
96
+ text = pytesseract.image_to_string(image)
97
+ return f"Extracted text from image:\n\n{text}"
98
+
99
+ except Exception as e:
100
+ return f"Error extracting text from image: {str(e)}"
101
+
102
+ @tool
103
+ def analyze_csv_file(file_path: str, query: Optional[str] = None) -> str:
104
+
105
+ """
106
+ Analyze a CSV file using pandas and answer a question about it.
107
+ Args:
108
+ file_path (str): the path to the CSV file.
109
+ query (str): Question about the data
110
+ """
111
+ if not os.path.isfile(file_path) or not file_path.endswith((".csv")):
112
+ return "Invalid or missing csv file."
113
+ try :
114
+
115
+ df = pd.read_csv(file_path)
116
+ columns = df.columns
117
+ result = [f"CSV loaded with shape: {df.shape}", f" Columns: {', '.join(columns)}"]
118
+
119
+ if query:
120
+ result.append(f"\n Query: {query}")
121
+ result_df = psql.sqldf(query, {"df": df})
122
+ result.append("Query Result:\n" + result_df.to_string(index=False))
123
+ else:
124
+ result.append("\nSummary:\n" + str(df.describe(include='all')))
125
+
126
+ return "\n".join(result)
127
+
128
+ except Exception as e:
129
+ return f"Error analyzing CSV file: {str(e)}"
130
+
131
+ @tool
132
+ def analyze_excel_file(file_path: str, query: Optional[str] = None) -> str:
133
+
134
+ """
135
+ Analyze a excel file using pandas and answer a question about it.
136
+ Args:
137
+ file_path (str): the path to the xls or xlsx file.
138
+ query (str): Question about the data
139
+ """
140
+ if not os.path.isfile(file_path) or not file_path.endswith((".xls", ".xlsx")):
141
+ return "Invalid or missing Excel file."
142
+
143
+ try :
144
+
145
+ df = pd.read_excel(file_path)
146
+ columns = df.columns
147
+ result = [f"CSV loaded with shape: {df.shape}", f" Columns: {', '.join(columns)}"]
148
+
149
+ if query:
150
+ result.append(f"\n Query: {query}")
151
+ result_df = psql.sqldf(query, {"df": df})
152
+ result.append("Query Result:\n" + result_df.to_string(index=False))
153
+ else:
154
+ result.append("\nSummary:\n" + str(df.describe(include='all')))
155
+
156
+ return "\n".join(result)
157
+
158
+ except Exception as e:
159
+ return f"Error analyzing Excel file: {str(e)}"
160
+