Spaces:
Build error
Build error
| # -*- coding: utf-8 -*- | |
| import torch | |
| import torchaudio | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| print(device) | |
| import IPython | |
| import matplotlib.pyplot as plt | |
| from torchaudio.utils import download_asset | |
| ctc_preTrained_object = torchaudio.pipelines.WAV2VEC2_ASR_BASE_960H | |
| model = ctc_preTrained_object.get_model().to(device) | |
| from torchaudio.models.decoder import download_pretrained_files | |
| files = download_pretrained_files('librispeech-4-gram') | |
| f = open(files.tokens, 'r') | |
| from torchaudio.models.decoder import ctc_decoder | |
| beam_search_decoder = ctc_decoder( | |
| lexicon = files.lexicon, | |
| tokens = files.tokens, | |
| lm = files.lm, | |
| nbest = 3, | |
| beam_size = 3 | |
| ) | |
| import audio_support_functions as myFunc | |
| def theaudio(x): | |
| waveform, sample_rate = torchaudio.load(x) | |
| waveform = waveform.to(device) | |
| #myFunc.play_audio(waveform.cpu(), sample_rate) | |
| waveform = waveform if sample_rate == ctc_preTrained_object.sample_rate else torchaudio.functional.resample(waveform, sample_rate, ctc_preTrained_object.sample_rate) | |
| with torch.inference_mode(): | |
| pred_tokens, _ = model(waveform) | |
| #print(pred_tokens.size()) | |
| pred_tokens = pred_tokens.to('cpu') | |
| beam_search_result = beam_search_decoder(pred_tokens) | |
| beam_search_transcript = " ".join(beam_search_result[0][0].words).strip() | |
| return beam_search_transcript | |
| import gradio as gr | |
| import librosa | |
| iface = gr.Interface( | |
| fn=theaudio, | |
| inputs=gr.Audio(type="filepath"), | |
| outputs="text", | |
| title="Aud2Text Using CTC", | |
| description="Upload an audio file or record one and the AI will transcribe it for you!" | |
| ) | |
| iface.launch() |