Spaces:
Sleeping
Sleeping
| """Docstring""" | |
| import os | |
| import speech_recognition as sr | |
| from pydub import AudioSegment | |
| from pydub.silence import split_on_silence | |
| from modules.console_colors import ULTRASINGER_HEAD | |
| # todo: Code from here: https://www.thepythoncode.com/article/using-speech-recognition-to-convert-speech-to-text-python | |
| def print_text(wav_file): | |
| """Docstring""" | |
| # English speech! | |
| recognizer = sr.Recognizer() | |
| # open the file | |
| with sr.AudioFile(wav_file) as source: | |
| # listen for the data (load audio to memory) | |
| audio_data = recognizer.record(source) | |
| # recognize (convert from speech to text) | |
| text = recognizer.recognize_google(audio_data) | |
| print(text) | |
| def get_large_audio_transcription(wav_file): | |
| """ | |
| Splitting the large audio file into chunks | |
| and apply speech recognition on each of these chunks | |
| """ | |
| # open the audio file using pydub | |
| sound = AudioSegment.from_wav(wav_file) | |
| # split audio sound where silence is 700 miliseconds or more and get chunks | |
| chunks = split_on_silence( | |
| sound, | |
| # experiment with this value for your target audio file | |
| min_silence_len=500, | |
| # adjust this per requirement | |
| silence_thresh=sound.dBFS - 14, | |
| # keep the silence for 1 second, adjustable as well | |
| keep_silence=500, | |
| ) | |
| folder_name = "audio-chunks" | |
| # create a directory to store the audio chunks | |
| if not os.path.isdir(folder_name): | |
| os.mkdir(folder_name) | |
| whole_text = "" | |
| recognizer = sr.Recognizer() | |
| # process each chunk | |
| for i, audio_chunk in enumerate(chunks, start=1): | |
| # export audio chunk and save it in | |
| # the `folder_name` directory. | |
| chunk_filename = os.path.join(folder_name, f"chunk{i}.wav") | |
| audio_chunk.export(chunk_filename, format="wav") | |
| # recognize the chunk | |
| with sr.AudioFile(chunk_filename) as source: | |
| audio_listened = recognizer.record(source) | |
| # try converting it to text | |
| try: | |
| text = recognizer.recognize_google(audio_listened) | |
| except sr.UnknownValueError as error: | |
| print("Error:", str(error)) | |
| else: | |
| text = f"{text.capitalize()}. " | |
| print(chunk_filename, ":", text) | |
| whole_text += text | |
| # return the text for all chunks detected | |
| return whole_text | |
| def transcribe_audio(audio_file): | |
| """Docstring""" | |
| recognizer = sr.Recognizer() | |
| with sr.AudioFile(audio_file) as source: | |
| audio = recognizer.record(source) | |
| try: | |
| transcript = recognizer.recognize_google(audio, show_all=True) | |
| start_time = transcript["result"][0]["alternative"][0]["words"][0][ | |
| "startTime" | |
| ] | |
| end_time = transcript["result"][0]["alternative"][0]["words"][-1][ | |
| "endTime" | |
| ] | |
| return ( | |
| transcript["result"][0]["alternative"][0]["transcript"], | |
| start_time, | |
| end_time, | |
| ) | |
| except sr.UnknownValueError: | |
| print(f"{ULTRASINGER_HEAD} Could not understand audio") | |
| except sr.RequestError as error: | |
| print(f"{ULTRASINGER_HEAD} Error with recognizing service; {error}") | |
| class SpeechToText: | |
| """Docstring""" | |