Update tools.py
Browse files
tools.py
CHANGED
|
@@ -155,7 +155,62 @@ class DescribeImage:
|
|
| 155 |
error_msg = f"Error describing image: {str(e)}"
|
| 156 |
print(error_msg)
|
| 157 |
return ""
|
|
|
|
| 158 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 159 |
|
| 160 |
@tool
|
| 161 |
def wiki_search(query: str) -> str:
|
|
|
|
| 155 |
error_msg = f"Error describing image: {str(e)}"
|
| 156 |
print(error_msg)
|
| 157 |
return ""
|
| 158 |
+
|
| 159 |
|
| 160 |
+
class TranscribeAudio:
|
| 161 |
+
def __init__(self, audio_llm: Runnable):
|
| 162 |
+
"""
|
| 163 |
+
Initialize with a LangChain-compatible vision+audio GPT-4o model.
|
| 164 |
+
|
| 165 |
+
Args:
|
| 166 |
+
audio_llm: A LangChain Runnable for GPT-4o (must support audio inputs).
|
| 167 |
+
"""
|
| 168 |
+
self.audio_llm = audio_llm
|
| 169 |
+
|
| 170 |
+
def __call__(self, audio_path: str) -> str:
|
| 171 |
+
"""
|
| 172 |
+
Transcribe an MP3 file.
|
| 173 |
+
|
| 174 |
+
Args:
|
| 175 |
+
audio_path: Path to the MP3 audio file.
|
| 176 |
+
|
| 177 |
+
Returns:
|
| 178 |
+
Transcribed text as a string.
|
| 179 |
+
"""
|
| 180 |
+
try:
|
| 181 |
+
with open(audio_path, "rb") as audio_file:
|
| 182 |
+
audio_bytes = audio_file.read()
|
| 183 |
+
|
| 184 |
+
audio_data = AudioFile(
|
| 185 |
+
mime_type="audio/mpeg", # MP3 MIME type
|
| 186 |
+
data=audio_bytes
|
| 187 |
+
)
|
| 188 |
+
|
| 189 |
+
message = [
|
| 190 |
+
HumanMessage(
|
| 191 |
+
content=[
|
| 192 |
+
{
|
| 193 |
+
"type": "text",
|
| 194 |
+
"text": (
|
| 195 |
+
"Transcribe the speech from this audio file. "
|
| 196 |
+
"Return only the transcribed text, with no extra commentary."
|
| 197 |
+
),
|
| 198 |
+
},
|
| 199 |
+
{
|
| 200 |
+
"type": "audio",
|
| 201 |
+
"audio": audio_data,
|
| 202 |
+
},
|
| 203 |
+
]
|
| 204 |
+
)
|
| 205 |
+
]
|
| 206 |
+
|
| 207 |
+
response = self.audio_llm.invoke(message)
|
| 208 |
+
return response.content.strip()
|
| 209 |
+
|
| 210 |
+
except Exception as e:
|
| 211 |
+
error_msg = f"Error transcribing audio: {str(e)}"
|
| 212 |
+
print(error_msg)
|
| 213 |
+
return ""
|
| 214 |
|
| 215 |
@tool
|
| 216 |
def wiki_search(query: str) -> str:
|