Spaces:

reach-vb
/

asr-pyctcdecode

Runtime error

App Files Files Community

Vaibhav Srivastav commited on Jan 12, 2022

Commit

8d69919

1 Parent(s): f6bce7b

for the love of god please work

Browse files

Files changed (2) hide show

app.py +11 -10
test.wav +0 -0

app.py CHANGED Viewed

@@ -7,9 +7,9 @@ from transformers import AutoProcessor, AutoModelForCTC
 nltk.download("punkt")
-model_name = "facebook/wav2vec2-base-960h"
-processor = AutoProcessor.from_pretrained(model_name)
-model = AutoModelForCTC.from_pretrained(model_name)
 def load_and_fix_data(input_file):
   #read the file
@@ -26,7 +26,8 @@ def fix_transcription_casing(input_sentence):
   sentences = nltk.sent_tokenize(input_sentence)
   return (' '.join([s.replace(s[0],s[0].capitalize(),1) for s in sentences]))
-def predict_and_ctc_decode(input_file):
   speech = load_and_fix_data(input_file)
   input_values = processor(speech, return_tensors="pt", sampling_rate=16000).input_values
@@ -40,7 +41,8 @@ def predict_and_ctc_decode(input_file):
   return transcribed_text
-def predict_and_greedy_decode(input_file):
   speech = load_and_fix_data(input_file)
   input_values = processor(speech, return_tensors="pt", sampling_rate=16000).input_values
@@ -54,14 +56,13 @@ def predict_and_greedy_decode(input_file):
   return transcribed_text
 def return_all_predictions(input_file, model_name):
-  print(model_name)
-  return predict_and_ctc_decode(input_file), predict_and_greedy_decode(input_file)
 gr.Interface(return_all_predictions,
              inputs = [gr.inputs.Audio(source="microphone", type="filepath", label="Record/ Drop audio"), gr.inputs.Dropdown(["facebook/wav2vec2-base-960h", "facebook/hubert-large-ls960-ft"], label="Model Name")],
-             outputs = [gr.outputs.Textbox(label="Beam CTC Decoding"), gr.outputs.Textbox(label="Greedy Decoding")],
-             title="ASR using Wav2Vec 2.0 & pyctcdecode",
-             description = "Extending HF ASR models with pyctcdecode decoder",
              layout = "horizontal",
              examples = [["test1.wav", "facebook/wav2vec2-base-960h"], ["test2.wav", "facebook/hubert-large-ls960-ft"]], theme="huggingface").launch()

 nltk.download("punkt")
+def return_processor_and_model(model_name):
+    return AutoProcessor.from_pretrained(model_name), AutoModelForCTC.from_pretrained(model_name)
 def load_and_fix_data(input_file):
   #read the file
   sentences = nltk.sent_tokenize(input_sentence)
   return (' '.join([s.replace(s[0],s[0].capitalize(),1) for s in sentences]))
+def predict_and_ctc_decode(input_file, model_name):
+  processor, model = return_processor_and_model(model_name)
   speech = load_and_fix_data(input_file)
   input_values = processor(speech, return_tensors="pt", sampling_rate=16000).input_values
   return transcribed_text
+def predict_and_greedy_decode(input_file, model_name):
+  processor, model = return_processor_and_model(model_name)
   speech = load_and_fix_data(input_file)
   input_values = processor(speech, return_tensors="pt", sampling_rate=16000).input_values
   return transcribed_text
 def return_all_predictions(input_file, model_name):
+  return predict_and_ctc_decode(input_file, model_name), predict_and_greedy_decode(input_file, model_name)
 gr.Interface(return_all_predictions,
              inputs = [gr.inputs.Audio(source="microphone", type="filepath", label="Record/ Drop audio"), gr.inputs.Dropdown(["facebook/wav2vec2-base-960h", "facebook/hubert-large-ls960-ft"], label="Model Name")],
+             outputs = [gr.outputs.Textbox(label="Beam CTC decoding"), gr.outputs.Textbox(label="Greedy decoding")],
+             title="ASR using Wav2Vec2/ Hubert & pyctcdecode",
+             description = "Comparing Wav2Vec2 & Hubert with Greedy vs Beam Search decoding",
              layout = "horizontal",
              examples = [["test1.wav", "facebook/wav2vec2-base-960h"], ["test2.wav", "facebook/hubert-large-ls960-ft"]], theme="huggingface").launch()

test.wav DELETED Viewed

Binary file (165 kB)