giulia-fontanella commited on
Commit
2d91d8b
·
verified ·
1 Parent(s): 88aac3f

Update tools.py

Browse files
Files changed (1) hide show
  1. tools.py +55 -0
tools.py CHANGED
@@ -155,7 +155,62 @@ class DescribeImage:
155
  error_msg = f"Error describing image: {str(e)}"
156
  print(error_msg)
157
  return ""
 
158
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
 
160
  @tool
161
  def wiki_search(query: str) -> str:
 
155
  error_msg = f"Error describing image: {str(e)}"
156
  print(error_msg)
157
  return ""
158
+
159
 
160
+ class TranscribeAudio:
161
+ def __init__(self, audio_llm: Runnable):
162
+ """
163
+ Initialize with a LangChain-compatible vision+audio GPT-4o model.
164
+
165
+ Args:
166
+ audio_llm: A LangChain Runnable for GPT-4o (must support audio inputs).
167
+ """
168
+ self.audio_llm = audio_llm
169
+
170
+ def __call__(self, audio_path: str) -> str:
171
+ """
172
+ Transcribe an MP3 file.
173
+
174
+ Args:
175
+ audio_path: Path to the MP3 audio file.
176
+
177
+ Returns:
178
+ Transcribed text as a string.
179
+ """
180
+ try:
181
+ with open(audio_path, "rb") as audio_file:
182
+ audio_bytes = audio_file.read()
183
+
184
+ audio_data = AudioFile(
185
+ mime_type="audio/mpeg", # MP3 MIME type
186
+ data=audio_bytes
187
+ )
188
+
189
+ message = [
190
+ HumanMessage(
191
+ content=[
192
+ {
193
+ "type": "text",
194
+ "text": (
195
+ "Transcribe the speech from this audio file. "
196
+ "Return only the transcribed text, with no extra commentary."
197
+ ),
198
+ },
199
+ {
200
+ "type": "audio",
201
+ "audio": audio_data,
202
+ },
203
+ ]
204
+ )
205
+ ]
206
+
207
+ response = self.audio_llm.invoke(message)
208
+ return response.content.strip()
209
+
210
+ except Exception as e:
211
+ error_msg = f"Error transcribing audio: {str(e)}"
212
+ print(error_msg)
213
+ return ""
214
 
215
  @tool
216
  def wiki_search(query: str) -> str: