Spaces:

Obai33
/

AudioToText

Build error

App Files Files Community

Obai33 commited on Aug 30, 2024

Commit

f8b4e77

verified ·

1 Parent(s): 6c4dd7d

Upload app.py

Browse files

Files changed (1) hide show

app.py +71 -0

app.py ADDED Viewed

	@@ -0,0 +1,71 @@

+# -*- coding: utf-8 -*-
+"""app.ipynb
+Automatically generated by Colab.
+Original file is located at
+    https://colab.research.google.com/drive/1CuRN-kiD-QDBFlev8vWpV3rVkjiWlaeP
+"""
+import torch
+import torchaudio
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+print(device)
+import IPython
+import matplotlib.pyplot as plt
+from torchaudio.utils import download_asset
+ctc_preTrained_object = torchaudio.pipelines.WAV2VEC2_ASR_BASE_960H
+model = ctc_preTrained_object.get_model().to(device)
+!pip install flashlight-text
+from torchaudio.models.decoder import download_pretrained_files
+files = download_pretrained_files('librispeech-4-gram')
+f = open(files.tokens, 'r')
+from torchaudio.models.decoder import ctc_decoder
+beam_search_decoder = ctc_decoder(
+    lexicon = files.lexicon,
+    tokens = files.tokens,
+    lm = files.lm,
+    nbest = 3,
+    beam_size = 3
+)
+import audio_support_functions as myFunc
+def theaudio(x):
+    waveform, sample_rate = torchaudio.load(x)
+    waveform = waveform.to(device)
+    #myFunc.play_audio(waveform.cpu(), sample_rate)
+    waveform = waveform if sample_rate == ctc_preTrained_object.sample_rate else torchaudio.functional.resample(waveform, sample_rate, ctc_preTrained_object.sample_rate)
+    with torch.inference_mode():
+        pred_tokens, _ = model(waveform)
+    #print(pred_tokens.size())
+    pred_tokens = pred_tokens.to('cpu')
+    beam_search_result = beam_search_decoder(pred_tokens)
+    beam_search_transcript = " ".join(beam_search_result[0][0].words).strip()
+    return beam_search_transcript
+import gradio as gr
+import librosa
+iface = gr.Interface(
+    fn=theaudio,
+    inputs=gr.Audio(type="filepath"),
+    outputs="text",
+    title="Audio Input Example",
+    description="Upload an audio file or record one to see its duration."
+)
+iface.launch()