CTPC commited on
Commit
153f125
·
verified ·
1 Parent(s): d15130d

Create tools.py

Browse files
Files changed (1) hide show
  1. tools.py +220 -0
tools.py ADDED
@@ -0,0 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from langchain_community.tools import DuckDuckGoSearchRun, TavilySearchResults
3
+ from langchain_core.tools import tool
4
+ from langchain.schema import HumanMessage, AIMessage, SystemMessage
5
+ from langchain_openai import AzureChatOpenAI
6
+ from azure.identity import EnvironmentCredential
7
+ from langchain_google_genai import ChatGoogleGenerativeAI
8
+ import base64
9
+
10
+ #LLMs
11
+ def get_access_token():
12
+ credential = EnvironmentCredential()
13
+ access_token = credential.get_token("https://cognitiveservices.azure.com/.default")
14
+ return access_token.token
15
+
16
+ llm = AzureChatOpenAI(
17
+ model_name="gpt-4o",
18
+ api_key=get_access_token(),
19
+ azure_endpoint="https://cog-sandbox-dev-eastus2-001.openai.azure.com/",
20
+ api_version="2024-08-01-preview"
21
+ )
22
+
23
+ google_llm = ChatGoogleGenerativeAI(model='gemini-2.0-flash-lite')
24
+
25
+ #IMAGE_TOOLS
26
+ @tool
27
+ def extract_text(img_path: str) -> str:
28
+ """
29
+ Extract text from an image file using a multimodal model.
30
+
31
+ Args:
32
+ img_path: A local image file path (strings).
33
+
34
+ Returns:
35
+ A single string containing the concatenated text extracted from each image.
36
+ """
37
+ all_text = ""
38
+ try:
39
+
40
+ # Read image and encode as base64
41
+ with open(img_path, "rb") as image_file:
42
+ image_bytes = image_file.read()
43
+
44
+ image_base64 = base64.b64encode(image_bytes).decode("utf-8")
45
+
46
+ # Prepare the prompt including the base64 image data
47
+ message = [
48
+ HumanMessage(
49
+ content=[
50
+ {
51
+ "type": "text",
52
+ "text": (
53
+ "Extract all the text from this image. "
54
+ "Return only the extracted text, no explanations."
55
+ ),
56
+ },
57
+ {
58
+ "type": "image_url",
59
+ "image_url": {
60
+ "url": f"data:image/png;base64,{image_base64}"
61
+ },
62
+ },
63
+ ]
64
+ )
65
+ ]
66
+
67
+ # Call the vision-capable model
68
+ response = llm.invoke(message)
69
+
70
+ # Append extracted text
71
+ all_text += response.content + "\n\n"
72
+
73
+ return all_text.strip()
74
+ except Exception as e:
75
+ # You can choose whether to raise or just return an empty string / error message
76
+ error_msg = f"Error extracting text: {str(e)}"
77
+ print(error_msg)
78
+ return ""
79
+
80
+ @tool
81
+ def describe_image(img_path: str) -> str:
82
+ """
83
+ Takes an image file path or URL and returns a detailed description of the image.
84
+
85
+ Args:
86
+ image_path_or_url (str): Local file path or URL to the image.
87
+
88
+ Returns:
89
+ str: A detailed description of the image content.
90
+ """
91
+ all_text = ""
92
+ try:
93
+
94
+ # Read image and encode as base64
95
+ with open(img_path, "rb") as image_file:
96
+ image_bytes = image_file.read()
97
+
98
+ image_base64 = base64.b64encode(image_bytes).decode("utf-8")
99
+
100
+ # Prepare the prompt including the base64 image data
101
+ message = [
102
+ HumanMessage(
103
+ content=[
104
+ {
105
+ "type": "text",
106
+ "text": (
107
+ "Provide a detailed description from this image. "
108
+ "Return descriptive text only."
109
+ ),
110
+ },
111
+ {
112
+ "type": "image_url",
113
+ "image_url": {
114
+ "url": f"data:image/png;base64,{image_base64}"
115
+ },
116
+ },
117
+ ]
118
+ )
119
+ ]
120
+
121
+ # Call the vision-capable model
122
+ response = llm.invoke(message)
123
+
124
+ # Append extracted text
125
+ all_text += response.content + "\n\n"
126
+
127
+ return all_text.strip()
128
+ except Exception as e:
129
+ # You can choose whether to raise or just return an empty string / error message
130
+ error_msg = f"Error extracting text: {str(e)}"
131
+ print(error_msg)
132
+ return ""
133
+
134
+ #AUDIO_TOOLS
135
+ @tool
136
+ def transcribe_audio(audio_path: str) -> str:
137
+ """
138
+ Transcribe audio from a file using a multimodal model.
139
+
140
+ Args:
141
+ audio_path: A local audio file path (strings).
142
+
143
+ Returns:
144
+ A single string containing the transcribed text.
145
+ """
146
+ all_text = ""
147
+ try:
148
+ # Read audio and encode as base64
149
+ with open(audio_path, "rb") as audio_file:
150
+ audio_bytes = audio_file.read()
151
+
152
+ audio_base64 = base64.b64encode(audio_bytes).decode()
153
+
154
+ # Prepare the prompt including the base64 image data
155
+ message = [
156
+ HumanMessage(
157
+ content=[
158
+ {
159
+ "type": "text",
160
+ "text": (
161
+ "Transcribe the following audio input:"
162
+ ),
163
+ },
164
+ {
165
+ "type": "input_audio",
166
+ "input_audio": {
167
+ "data": audio_base64,
168
+ "format": "wav"
169
+ },
170
+ },
171
+ ]
172
+ )
173
+ ]
174
+
175
+ # Call the vision-capable model
176
+ response = google_llm.invoke(message)
177
+
178
+ # Append extracted text
179
+ all_text += response.content + "\n\n"
180
+ return all_text.strip()
181
+
182
+ except Exception as e:
183
+ # You can choose whether to raise or just return an empty string / error message
184
+ error_msg = f"Error transcribing audio: {str(e)}"
185
+ print(error_msg)
186
+ return ""
187
+
188
+ #WEB_SEARCH_TOOL
189
+ @tool
190
+ def web_search(query: str) -> str:
191
+ """Perform a web search and return the top 5 results."""
192
+ #search_tool = DuckDuckGoSearchRun()
193
+ search_tool = TavilySearchResults(searxch_depth='basic')
194
+ result = search_tool.invoke(query)
195
+ return result
196
+
197
+ #FILE_PARSE_TOOL
198
+ @tool
199
+ def read_file(file_path: str) -> str:
200
+ """
201
+ Reads a text based file and returns its content as a string.
202
+
203
+ Args:
204
+ file_path (str): The path to the file.
205
+
206
+ Returns:
207
+ str: The content of the file.
208
+ """
209
+ if file_path.endswith('.txt'):
210
+ with open(file_path, 'r') as file:
211
+ return file.read()
212
+ elif file_path.endswith('.csv'):
213
+ return pd.read_csv(file_path).to_string()
214
+ elif file_path.endswith('.xlsx'):
215
+ return pd.read_excel(file_path).to_string()
216
+ elif file_path.endswith('.py'):
217
+ with open(file_path, 'r') as file:
218
+ return file.read()
219
+ else:
220
+ raise ValueError("Unsupported file format. Only .txt, .csv, and .xlsx are supported.")