antonioschiro commited on
Commit
df5ab56
·
verified ·
1 Parent(s): 943121e

Update tools.py

Browse files
Files changed (1) hide show
  1. tools.py +228 -7
tools.py CHANGED
@@ -1,14 +1,78 @@
 
1
  from langchain_core.tools import tool
2
- from langchain_community.tools import DuckDuckGoSearchRun
3
- #from langchain_tavily import TavilySearch
 
 
 
 
 
 
 
 
 
 
4
  import os
5
- #from dotenv import load_dotenv
6
- #load_dotenv()
 
 
7
 
 
8
  os.environ["TAVILY_API_KEY"] = os.getenv("TAVILY_API_KEY")
 
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  @tool
11
- def websearch(query: str) -> str:
12
  """
13
  Perform a web search using DuckDuckGo.
14
 
@@ -20,8 +84,165 @@ def websearch(query: str) -> str:
20
  If an exception occurs, returns a fallback string indicating no results were found.
21
  """
22
  search_engine = DuckDuckGoSearchRun()
23
- try:
24
  response = search_engine.invoke(query)
25
  return response
26
  except:
27
- return f"No results found on the web for this query: {query}."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import contextlib
2
  from langchain_core.tools import tool
3
+ from langchain_community.tools import DuckDuckGoSearchRun, WikipediaQueryRun
4
+ from langchain_community.utilities import WikipediaAPIWrapper
5
+ from langchain_tavily import TavilySearch
6
+ from langchain_chroma import Chroma
7
+ from langchain_community.document_loaders import TextLoader, PyPDFLoader, CSVLoader, JSONLoader
8
+ from langchain_community.document_loaders.image import UnstructuredImageLoader
9
+ from langchain_community.document_loaders.youtube import YoutubeLoader, TranscriptFormat
10
+ #from langchain_unstructured import UnstructuredLoader
11
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
12
+ from langchain_huggingface import HuggingFaceEmbeddings
13
+ from transformers import pipeline
14
+ import asyncio
15
  import os
16
+ import io
17
+ from dotenv import load_dotenv
18
+ #from PIL import Image
19
+ #from io import StringIO
20
 
21
+ load_dotenv()
22
  os.environ["TAVILY_API_KEY"] = os.getenv("TAVILY_API_KEY")
23
+ os.environ["UNSTRUCTURED_API_KEY"] = os.getenv("UNSTRUCTURED_API_KEY")
24
 
25
+ # Retriever
26
+ @tool
27
+ def retriever(query: str, file_path: str) -> str:
28
+ """
29
+ Retrieve relevant information from a text, PDF, CSV JSON or image file using semantic search.
30
+
31
+ Args:
32
+ query (str): The search query string.
33
+ file_path (str): Path to the text file to be searched.
34
+
35
+ Returns:
36
+ str: The most relevant text chunks from the file based on the query.
37
+ """
38
+ try:
39
+ if file_path.endswith(".pdf"):
40
+ loader = PyPDFLoader(file_path)
41
+ elif file_path.endswith(".csv"):
42
+ loader = CSVLoader(file_path)
43
+ elif file_path.endswith(".json"):
44
+ loader = JSONLoader(file_path)
45
+ elif file_path.endswith((".png", ".jpeg", ".jpg")):
46
+ loader = UnstructuredImageLoader(file_path)
47
+ else:
48
+ loader = TextLoader(file_path)
49
+ # Load data into document objects
50
+ doc_list = []
51
+ docs = loader.load()
52
+ doc_list.extend(docs)
53
+ # Chunks
54
+ text_splitter= RecursiveCharacterTextSplitter(
55
+ chunk_size=100,
56
+ chunk_overlap=20,
57
+ length_function=len
58
+ )
59
+ chunks = text_splitter.split_documents(doc_list)
60
+ # Define embeddings and load them into vectorstore
61
+ embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
62
+ vectorstore = Chroma.from_documents(
63
+ documents=chunks,
64
+ embedding=embeddings,
65
+ )
66
+ retriever = vectorstore.as_retriever(search_kwargs = {"k":1})
67
+ doc_result = retriever.invoke(query)
68
+ result = '\n\n'.join(doc.page_content for doc in doc_result)
69
+ return result
70
+ except Exception:
71
+ return "No results found."
72
+
73
+ # Websearch tools
74
  @tool
75
+ def web_search(query: str) -> str:
76
  """
77
  Perform a web search using DuckDuckGo.
78
 
 
84
  If an exception occurs, returns a fallback string indicating no results were found.
85
  """
86
  search_engine = DuckDuckGoSearchRun()
87
+ try:
88
  response = search_engine.invoke(query)
89
  return response
90
  except:
91
+ return f"No results found on the web for this query: {query}."
92
+
93
+ @tool
94
+ def wiki_search(query: str) -> str:
95
+ """
96
+ Search Wikipedia for the given query and return a summary.
97
+
98
+ Args:
99
+ query (str): The search query string.
100
+
101
+ Returns:
102
+ str: A summary or relevant information from Wikipedia about the query.
103
+ """
104
+ wikipedia = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())
105
+ response = wikipedia.run(query)
106
+ return response
107
+
108
+ @tool
109
+ def youtube_analysis(yt_url: str) -> str:
110
+ """
111
+ Analyze a YouTube video and return its transcript and metadata.
112
+
113
+ Args:
114
+ yt_url (str): The URL of the YouTube video.
115
+
116
+ Returns:
117
+ str: A string containing video information and transcript chunks.
118
+ """
119
+
120
+ loader = YoutubeLoader.from_youtube_url(
121
+ yt_url,
122
+ add_video_info=True,
123
+ transcript_format=TranscriptFormat.CHUNKS,
124
+ chunk_size_seconds=30,
125
+ )
126
+ return "\n\n".join(map(repr, loader.load()))
127
+
128
+ # Calculator tools
129
+ @tool
130
+ def add_numbers(a: int|float, b:int|float)-> int|float:
131
+ """
132
+ Add two numbers.
133
+
134
+ Args:
135
+ a (int | float): The first number.
136
+ b (int | float): The second number.
137
+
138
+ Returns:
139
+ int | float: The sum of a and b.
140
+ """
141
+ return a + b
142
+ @tool
143
+ def subtract_numbers(a: int|float, b:int|float)-> int|float:
144
+ """
145
+ Subtract one number from another.
146
+
147
+ Args:
148
+ a (int | float): The number to subtract from.
149
+ b (int | float): The number to subtract.
150
+
151
+ Returns:
152
+ int | float: The result of a minus b.
153
+ """
154
+ return a - b
155
+
156
+ @tool
157
+ def multiply_numbers(a: int|float, b:int|float)-> int|float:
158
+ """
159
+ Multiply two numbers.
160
+
161
+ Args:
162
+ a (int | float): The first number.
163
+ b (int | float): The second number.
164
+
165
+ Returns:
166
+ int | float: The product of a and b.
167
+ """
168
+ return a * b
169
+
170
+ @tool
171
+ def divide_numbers(a: int|float, b:int|float)-> float|None:
172
+ """
173
+ Divide one number by another.
174
+
175
+ Args:
176
+ a (int | float): The numerator.
177
+ b (int | float): The denominator.
178
+
179
+ Returns:
180
+ int | float: The result of a divided by b.
181
+ Returns None if b is zero.
182
+ """
183
+ try:
184
+ return a / b
185
+ except ZeroDivisionError:
186
+ return None
187
+
188
+ @tool
189
+ def modulus_numbers(a: int|float, b:int|float)-> int|float:
190
+ """
191
+ Compute the modulus of two numbers.
192
+
193
+ Args:
194
+ a (int | float): The dividend.
195
+ b (int | float): The divisor.
196
+
197
+ Returns:
198
+ int | float: The remainder after dividing a by b.
199
+ """
200
+ return a % b
201
+
202
+ # Image recognition
203
+
204
+ @tool
205
+ def detect_objects(image_path: str) -> str:
206
+ """
207
+ Detects objects in an image and returns a list with labels and confidence scores.
208
+
209
+ Args:
210
+ image_path (str): Path to the input image file.
211
+
212
+ Returns:
213
+ str: Detected objects with confidence scores.
214
+ """
215
+ # Load object detection pipeline (using a pre-trained model like DETR)
216
+ object_detector = pipeline("object-detection", model="facebook/detr-resnet-50")
217
+ results = object_detector(image_path)
218
+ output = []
219
+ for r in results:
220
+ label = r["label"]
221
+ score = round(r["score"], 3)
222
+ box = r["box"]
223
+ output.append(f"{label} (score={score}, box={box})")
224
+ return "\n".join(output)
225
+
226
+ # Code execution
227
+ @tool
228
+ def run_python(code: str) -> str:
229
+ """
230
+ Executes Python code safely and returns stdout.
231
+
232
+ Args:
233
+ code (str): The Python code to execute.
234
+
235
+ Returns:
236
+ str: Captured stdout and/or result.
237
+ """
238
+ stdout = io.StringIO()
239
+ local_vars = {}
240
+ try:
241
+ with contextlib.redirect_stdout(stdout):
242
+ exec(code, {}, local_vars)
243
+ # Return printed output or last expression result
244
+ if "_result" in local_vars:
245
+ return str(local_vars["_result"])
246
+ return stdout.getvalue().strip() or "Code executed successfully."
247
+ except Exception as e:
248
+ return f"Execution error: {e}"