Spaces:
Sleeping
Sleeping
Vladyslav Khaitov
Add new YouTube tools, change audio tool to audio transcriber, improve system prompt
4933f00 | import base64 | |
| from dotenv import load_dotenv | |
| load_dotenv(override=True) | |
| from smolagents import Tool | |
| from smolagents.models import Model, ChatMessage | |
| class AudioTranscriberTool(Tool): | |
| name = "transcribe_audio" | |
| description = """A tool that transcribes audio files to text. Use this tool when you need to convert speech or audio content into written text. | |
| This tool handles various audio formats and provides accurate transcriptions of audio content. | |
| """ | |
| inputs = { | |
| "audio_path": { | |
| "description": "The path to the audio file to transcribe. This should be a local path to downloaded audio.", | |
| "type": "string", | |
| }, | |
| } | |
| output_type = "string" | |
| def __init__(self, model: Model): | |
| super().__init__() | |
| self.model = model | |
| def forward(self, audio_path: str) -> str: | |
| if not isinstance(audio_path, str): | |
| raise Exception("You should provide the `audio_path` string argument to this tool!") | |
| with open(audio_path, "rb") as audio_file: | |
| base64_audio = base64.b64encode(audio_file.read()).decode('utf-8') | |
| format = audio_path.split(".")[-1] | |
| messages = [ | |
| ChatMessage( | |
| role="user", | |
| content = [ | |
| { | |
| "type": "text", | |
| "text": "Please transcribe this audio file accurately. Provide only the transcribed text without any additional commentary or formatting.", | |
| }, | |
| { | |
| "type": "input_audio", | |
| "input_audio": { | |
| "data": base64_audio, | |
| "format": format | |
| } | |
| } | |
| ] | |
| ) | |
| ] | |
| try: | |
| output = self.model(messages).content | |
| if isinstance(output, list): | |
| # Handle case where content is a list of dicts | |
| output = str(output) | |
| except Exception as e: | |
| raise Exception("Transcription failed: " + str(e)) | |
| return str(output) |