unit4_test / tools /audio_inspector_tool.py
Vladyslav Khaitov
Add new YouTube tools, change audio tool to audio transcriber, improve system prompt
4933f00
import base64
from dotenv import load_dotenv
load_dotenv(override=True)
from smolagents import Tool
from smolagents.models import Model, ChatMessage
class AudioTranscriberTool(Tool):
name = "transcribe_audio"
description = """A tool that transcribes audio files to text. Use this tool when you need to convert speech or audio content into written text.
This tool handles various audio formats and provides accurate transcriptions of audio content.
"""
inputs = {
"audio_path": {
"description": "The path to the audio file to transcribe. This should be a local path to downloaded audio.",
"type": "string",
},
}
output_type = "string"
def __init__(self, model: Model):
super().__init__()
self.model = model
def forward(self, audio_path: str) -> str:
if not isinstance(audio_path, str):
raise Exception("You should provide the `audio_path` string argument to this tool!")
with open(audio_path, "rb") as audio_file:
base64_audio = base64.b64encode(audio_file.read()).decode('utf-8')
format = audio_path.split(".")[-1]
messages = [
ChatMessage(
role="user",
content = [
{
"type": "text",
"text": "Please transcribe this audio file accurately. Provide only the transcribed text without any additional commentary or formatting.",
},
{
"type": "input_audio",
"input_audio": {
"data": base64_audio,
"format": format
}
}
]
)
]
try:
output = self.model(messages).content
if isinstance(output, list):
# Handle case where content is a list of dicts
output = str(output)
except Exception as e:
raise Exception("Transcription failed: " + str(e))
return str(output)