ameglei-external commited on
Commit
7ae8320
·
verified ·
1 Parent(s): 67bb955

Add new tools

Browse files
Files changed (1) hide show
  1. app.py +82 -1
app.py CHANGED
@@ -1,5 +1,7 @@
1
  import os
 
2
  from contextlib import suppress
 
3
  from pprint import pprint
4
  from typing import TypedDict, List, Dict, Any, Optional, Tuple
5
  from typing_extensions import Annotated
@@ -7,6 +9,9 @@ from typing_extensions import Annotated
7
  import gradio as gr
8
  import requests
9
  import inspect
 
 
 
10
  import pandas as pd
11
 
12
  from duckduckgo_search import DDGS
@@ -33,7 +38,10 @@ class BasicAgent:
33
  self.tools = [
34
  BasicAgent.search_tool,
35
  BasicAgent.find_local_files_tool,
36
- BasicAgent.read_text_file_tool
 
 
 
37
  ]
38
 
39
  # Chat model with tool support
@@ -157,6 +165,79 @@ class BasicAgent:
157
  print(f"\nCalling read text file tool for", file_name)
158
  with open(file_name, 'r') as f:
159
  return f.read()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
 
161
 
162
  def run_and_submit_all( profile: gr.OAuthProfile | None):
 
1
  import os
2
+ import tempfile
3
  from contextlib import suppress
4
+ from io import BytesIO
5
  from pprint import pprint
6
  from typing import TypedDict, List, Dict, Any, Optional, Tuple
7
  from typing_extensions import Annotated
 
9
  import gradio as gr
10
  import requests
11
  import inspect
12
+ from PIL import Image
13
+ from pydub import AudioSegment
14
+ import whisper
15
  import pandas as pd
16
 
17
  from duckduckgo_search import DDGS
 
38
  self.tools = [
39
  BasicAgent.search_tool,
40
  BasicAgent.find_local_files_tool,
41
+ BasicAgent.read_text_file_tool,
42
+ BasicAgent.vision_tool,
43
+ BasicAgent.audio_qa_tool,
44
+ BasicAgent.excel_tool
45
  ]
46
 
47
  # Chat model with tool support
 
165
  print(f"\nCalling read text file tool for", file_name)
166
  with open(file_name, 'r') as f:
167
  return f.read()
168
+
169
+ @staticmethod
170
+ @tool(
171
+ description="Analyze an image file and answer a follow-up question about its content."
172
+ )
173
+ def vision_tool(path: str, question: str) -> str:
174
+ """
175
+ Args:
176
+ path: Path to a local image file.
177
+ question: What you want to know (e.g. 'How many people are in this photo?').
178
+ Returns:
179
+ The LLM’s answer based on the image content.
180
+ """
181
+ # Load & save as bytes so the vision model can consume it
182
+ img = Image.open(path)
183
+ img_bytes = BytesIO()
184
+ img.save(img_bytes, format=img.format)
185
+ img_bytes.seek(0)
186
+
187
+ vision = ChatOpenAI(model="gpt-4o-vision", temperature=0)
188
+ result = vision.analyze_image(img_bytes, question)
189
+ return result
190
+
191
+ @staticmethod
192
+ @tool(
193
+ description="Transcribe an audio file with Whisper and answer a question about its content."
194
+ )
195
+ def audio_qa_tool(path: str, question: str, max_chars: int = 2048) -> str:
196
+ """
197
+ Args:
198
+ path: Local filesystem path to an audio file (mp3, wav, etc.).
199
+ question: What to ask about the audio content.
200
+ max_chars: Maximum length of the returned answer.
201
+ Returns:
202
+ The LLM’s answer, based on the transcript (truncated if necessary).
203
+ """
204
+ if not os.path.exists(path):
205
+ return f"Error: file not found at {path}"
206
+
207
+ audio = AudioSegment.from_file(path)
208
+ tmp_path = os.path.join(tempfile.gettempdir(), "tmp_audio.wav")
209
+ audio.export(tmp_path, format="wav")
210
+
211
+ model = whisper.load_model("base")
212
+ result = model.transcribe(tmp_path)
213
+ transcript = result.get("text", "")
214
+
215
+ prompt = f"""Here is the transcript of an audio file:
216
+ {transcript}
217
+
218
+ Question: {question}
219
+
220
+ Please answer briefly based on this transcript, and give only the answer."""
221
+
222
+ response = self.model(completion_kwargs={"max_tokens": 200})(prompt)
223
+ answer = response.choices[0].text.strip()
224
+
225
+ return answer[:max_chars]
226
+
227
+
228
+ @staticmethod
229
+ @tool(
230
+ description="Load an Excel file and returns it's text representation."
231
+ )
232
+ def excel_tool(path: str) -> str:
233
+ """
234
+ Args:
235
+ path: Path to the .xlsx file.
236
+ Returns:
237
+ The string form of the content.
238
+ """
239
+ df = pd.read_excel(path)
240
+ return str(df.to_csv(index=False))
241
 
242
 
243
  def run_and_submit_all( profile: gr.OAuthProfile | None):