| import os | |
| import whisper | |
| from smolagents import Tool | |
| class OpenAISpeechToTextTool(Tool): | |
| """ | |
| Tool to convert speech to text using OpenAI's Whisper model. | |
| Args: | |
| audio_path (str): Path to the audio file. | |
| Returns: | |
| str: Transcribed text from the audio file. | |
| """ | |
| name = "transcribe_audio" | |
| description = "Transcribes audio to text and returns the text" | |
| inputs = { | |
| "audio_path": {"type": "string", "description": "Path to the audio file"}, | |
| } | |
| output_type = "string" | |
| def forward(self, audio_path: str) -> str: | |
| try: | |
| model = whisper.load_model("small") | |
| if not os.path.exists(audio_path): | |
| return f"Error: Audio file not found at {audio_path}" | |
| result = model.transcribe(audio_path) | |
| return result["text"] | |
| except Exception as e: | |
| return f"Error transcribing audio: {str(e)}" |