Spaces:
Runtime error
Runtime error
| from deepspeech import Model | |
| import gradio as gr | |
| import numpy as np | |
| import urllib.request | |
| import wave | |
| import subprocess | |
| import sys | |
| import shlex | |
| from shlex import quote | |
| model_file_path = "deepspeech-0.9.3-models.pbmm" | |
| lm_file_path = "deepspeech-0.9.3-models.scorer" | |
| url = "https://github.com/mozilla/DeepSpeech/releases/download/v0.9.3/" | |
| urllib.request.urlretrieve(url + model_file_path, filename=model_file_path) | |
| urllib.request.urlretrieve(url + lm_file_path, filename=lm_file_path) | |
| beam_width = 100 | |
| lm_alpha = 0.93 | |
| lm_beta = 1.18 | |
| model = Model(model_file_path) | |
| model.enableExternalScorer(lm_file_path) | |
| model.setScorerAlphaBeta(lm_alpha, lm_beta) | |
| model.setBeamWidth(beam_width) | |
| def convert_samplerate(audio_path, desired_sample_rate): | |
| sox_cmd = 'sox {} --type raw --bits 16 --channels 1 --rate {} --encoding signed-integer --endian little --compression 0.0 --no-dither - '.format(quote(audio_path), desired_sample_rate) | |
| try: | |
| output = subprocess.check_output(shlex.split(sox_cmd), stderr=subprocess.PIPE) | |
| except subprocess.CalledProcessError as e: | |
| raise RuntimeError('SoX returned non-zero status: {}'.format(e.stderr)) | |
| except OSError as e: | |
| raise OSError(e.errno, 'SoX not found, use {}hz files or install it: {}'.format(desired_sample_rate, e.strerror)) | |
| return desired_sample_rate, np.frombuffer(output, np.int16) | |
| def transcribe(audio_file): | |
| desired_sample_rate = model.sampleRate() | |
| fin = wave.open(audio_file, 'rb') | |
| fs_orig = fin.getframerate() | |
| if fs_orig != desired_sample_rate: | |
| print('Warning: original sample rate ({}) is different than {}hz. Resampling might produce erratic speech recognition.'.format(fs_orig, desired_sample_rate), file=sys.stderr) | |
| fs_new, audio = convert_samplerate(audio_file, desired_sample_rate) | |
| else: | |
| audio = np.frombuffer(fin.readframes(fin.getnframes()), np.int16) | |
| audio_length = fin.getnframes() * (1/fs_orig) | |
| fin.close() | |
| text = model.stt(audio) | |
| return text | |
| demo = gr.Interface( | |
| transcribe, | |
| # [gr.Audio(source="microphone", streaming=True), "state"], | |
| gr.Audio(label="Upload Audio File", source="upload", type="filepath"), | |
| outputs=gr.Textbox(label="Transcript") | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |