Spaces:

clr
/

w2v2asr

Sleeping

App Files Files Community

clr commited on May 23, 2024

Commit

2c4f91a

verified ·

1 Parent(s): cb91338

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -11

app.py CHANGED Viewed

@@ -3,8 +3,8 @@ import soundfile as sf
 from scipy import signal
 import numpy as np
 import torch, torchaudio
-from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor, pipeline
-from faster_whisper import WhisperModel
 MODEL_IS="language-and-voice-lab/wav2vec2-large-xlsr-53-icelandic-ep30-967h"
 MODEL_FO="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h"
@@ -22,9 +22,10 @@ pipe_is = pipeline(model=MODEL_IS)
 pipe_fo = pipeline(model=MODEL_FO)
-wdevice = "cuda" if torch.cuda.is_available() else "cpu"
-whm_is = WhisperModel(model_size_or_path=MODEL_WHIS, device=wdevice)
 def readwav(a_f):
@@ -46,12 +47,17 @@ def recc(audio_file,model,processor):
         xcp = processor.batch_decode(pred_ids)
         return xcp[0]
-def whrecc(audio_file,lang,wmodel):
     wav = readwav(audio_file)
-    xcps, info = wmodel.transcribe(audio = audio_file, language = lang, no_repeat_ngram_size = 5)
-    txts = [xtp.text for xcp in xcps]
-    txt = ' '.join(txts)
-    return txt
 def recis(audio_file):
@@ -68,7 +74,7 @@ def recfo(audio_file):
 def recwhis(audio_file):
-    wh_output = whrecc(audio_file,"is",whm_is)
     return(wh_output)
 def pick_asrc(au_src):

 from scipy import signal
 import numpy as np
 import torch, torchaudio
+from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor, pipeline, WhisperForConditionalGeneration, WhisperProcessor
+#from faster_whisper import WhisperModel
 MODEL_IS="language-and-voice-lab/wav2vec2-large-xlsr-53-icelandic-ep30-967h"
 MODEL_FO="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h"
 pipe_fo = pipeline(model=MODEL_FO)
+#wdevice = "cuda" if torch.cuda.is_available() else "cpu"
+#whm_is = WhisperModel(model_size_or_path=MODEL_WHIS, device=wdevice)
+whisperprocessor = WhisperProcessor.from_pretrained(MODEL_WHIS)
+whispermodel = WhisperForConditionalGeneration.from_pretrained(MODEL_WHIS)
 def readwav(a_f):
         xcp = processor.batch_decode(pred_ids)
         return xcp[0]
+def whrecc(audio_file,whisperprocessor,whispermodel):#lang,wmodel):
     wav = readwav(audio_file)
+    #xcps, info = wmodel.transcribe(audio = audio_file, language = lang, no_repeat_ngram_size = 5)
+    #txts = [xtp.text for xcp in xcps]
+    #txt = ' '.join(txts)
+    #return txt
+    input_features = whisperprocessor(wav, sampling_rate=16000, return_tensors="pt").input_features
+    predicted_ids = whispermodel.generate(input_features)
+    dec = whisperprocessor.batch_decode(predicted_ids, skip_special_tokens=True,language_id='is')
+    xcp = dec[0]
+    return(xcp)
 def recis(audio_file):
 def recwhis(audio_file):
+    wh_output = whrecc(audio_file,whisperprocessor,whispermodel)#"is",whm_is)
     return(wh_output)
 def pick_asrc(au_src):