osanseviero
/

pyctcdecode_asr

Automatic Speech Recognition

Model card Files Files and versions

Omar Sanseviero commited on Aug 6, 2021

Commit

5913155

·

1 Parent(s): 46d31bb

Update pipeline.py

Files changed (1) hide show

pipeline.py +5 -3

pipeline.py CHANGED Viewed

@@ -9,9 +9,9 @@ class PreTrainedPipeline():
         """
         Initialize model
         """
-        processor = Wav2Vec2Processor.from_pretrained(path)
-        model = Wav2Vec2ForCTC.from_pretrained(path)
-        vocab_list = list(processor.tokenizer.get_vocab().keys())
         # convert ctc blank character representation
         vocab_list[0] = ""
@@ -39,6 +39,8 @@ class PreTrainedPipeline():
             A :obj:`dict`:. The object return should be liked {"text": "XXX"} containing
             the detected text from the input audio.
         """
         return {
             "text": self.decoder.decode(logits)
         }

         """
         Initialize model
         """
+        self.processor = Wav2Vec2Processor.from_pretrained(path)
+        self.model = Wav2Vec2ForCTC.from_pretrained(path)
+        vocab_list = list(self.processor.tokenizer.get_vocab().keys())
         # convert ctc blank character representation
         vocab_list[0] = ""
             A :obj:`dict`:. The object return should be liked {"text": "XXX"} containing
             the detected text from the input audio.
         """
+        input_values = self.processor(arr, return_tensors="pt", sampling_rate=self.sampling_rate).input_values  # Batch size 1
+        logits = self.model(input_values).logits.cpu().detach().numpy()[0]
         return {
             "text": self.decoder.decode(logits)
         }