| import speech_recognition as sr |
| import os |
| from pydub import AudioSegment |
| from smolagents import tool |
|
|
|
|
| @tool |
| def transcribe_audio(mp3_path: str) -> str: |
| """ |
| Transcribes text from an MP3 audio file using speech recognition. |
| |
| Args: |
| mp3_path (str): Path to the MP3 file to be transcribed. |
| |
| Returns: |
| str: The transcribed text from the audio file. |
| |
| Raises: |
| FileNotFoundError: If the MP3 file does not exist at the specified path. |
| ValueError: If the file is not a valid MP3 file or audio cannot be processed. |
| Exception: For other unexpected errors during transcription. |
| |
| Example: |
| >>> text = transcribe_audio("sample.mp3") |
| >>> print(text) |
| "Hello, this is a sample audio." |
| """ |
| |
| if not os.path.exists(mp3_path): |
| raise FileNotFoundError(f"The file {mp3_path} does not exist.") |
|
|
| |
| recognizer = sr.Recognizer() |
|
|
| try: |
| |
| audio = AudioSegment.from_mp3(mp3_path) |
| wav_path = mp3_path.replace(".mp3", ".wav") |
| audio.export(wav_path, format="wav") |
|
|
| |
| with sr.AudioFile(wav_path) as source: |
| |
| recognizer.adjust_for_ambient_noise(source) |
| |
| audio_data = recognizer.record(source) |
|
|
| |
| os.remove(wav_path) |
|
|
| |
| text = recognizer.recognize_google(audio_data) |
| return text |
|
|
| except sr.UnknownValueError: |
| raise ValueError("Could not understand the audio.") |
| except sr.RequestError as e: |
| raise ValueError(f"Could not process audio; {e}") |
| except Exception as e: |
| raise Exception(f"An error occurred during transcription: {e}") |
|
|