Update app.py
Browse files
app.py
CHANGED
|
@@ -3,8 +3,8 @@ import soundfile as sf
|
|
| 3 |
from scipy import signal
|
| 4 |
import numpy as np
|
| 5 |
import torch, torchaudio
|
| 6 |
-
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor, pipeline
|
| 7 |
-
from faster_whisper import WhisperModel
|
| 8 |
|
| 9 |
MODEL_IS="language-and-voice-lab/wav2vec2-large-xlsr-53-icelandic-ep30-967h"
|
| 10 |
MODEL_FO="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h"
|
|
@@ -22,9 +22,10 @@ pipe_is = pipeline(model=MODEL_IS)
|
|
| 22 |
pipe_fo = pipeline(model=MODEL_FO)
|
| 23 |
|
| 24 |
|
| 25 |
-
wdevice = "cuda" if torch.cuda.is_available() else "cpu"
|
| 26 |
-
whm_is = WhisperModel(model_size_or_path=MODEL_WHIS, device=wdevice)
|
| 27 |
-
|
|
|
|
| 28 |
|
| 29 |
|
| 30 |
def readwav(a_f):
|
|
@@ -46,12 +47,17 @@ def recc(audio_file,model,processor):
|
|
| 46 |
xcp = processor.batch_decode(pred_ids)
|
| 47 |
return xcp[0]
|
| 48 |
|
| 49 |
-
def whrecc(audio_file,lang,wmodel):
|
| 50 |
wav = readwav(audio_file)
|
| 51 |
-
xcps, info = wmodel.transcribe(audio = audio_file, language = lang, no_repeat_ngram_size = 5)
|
| 52 |
-
txts = [xtp.text for xcp in xcps]
|
| 53 |
-
txt = ' '.join(txts)
|
| 54 |
-
return txt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
|
| 56 |
|
| 57 |
def recis(audio_file):
|
|
@@ -68,7 +74,7 @@ def recfo(audio_file):
|
|
| 68 |
|
| 69 |
|
| 70 |
def recwhis(audio_file):
|
| 71 |
-
wh_output = whrecc(audio_file,"is",whm_is)
|
| 72 |
return(wh_output)
|
| 73 |
|
| 74 |
def pick_asrc(au_src):
|
|
|
|
| 3 |
from scipy import signal
|
| 4 |
import numpy as np
|
| 5 |
import torch, torchaudio
|
| 6 |
+
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor, pipeline, WhisperForConditionalGeneration, WhisperProcessor
|
| 7 |
+
#from faster_whisper import WhisperModel
|
| 8 |
|
| 9 |
MODEL_IS="language-and-voice-lab/wav2vec2-large-xlsr-53-icelandic-ep30-967h"
|
| 10 |
MODEL_FO="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h"
|
|
|
|
| 22 |
pipe_fo = pipeline(model=MODEL_FO)
|
| 23 |
|
| 24 |
|
| 25 |
+
#wdevice = "cuda" if torch.cuda.is_available() else "cpu"
|
| 26 |
+
#whm_is = WhisperModel(model_size_or_path=MODEL_WHIS, device=wdevice)
|
| 27 |
+
whisperprocessor = WhisperProcessor.from_pretrained(MODEL_WHIS)
|
| 28 |
+
whispermodel = WhisperForConditionalGeneration.from_pretrained(MODEL_WHIS)
|
| 29 |
|
| 30 |
|
| 31 |
def readwav(a_f):
|
|
|
|
| 47 |
xcp = processor.batch_decode(pred_ids)
|
| 48 |
return xcp[0]
|
| 49 |
|
| 50 |
+
def whrecc(audio_file,whisperprocessor,whispermodel):#lang,wmodel):
|
| 51 |
wav = readwav(audio_file)
|
| 52 |
+
#xcps, info = wmodel.transcribe(audio = audio_file, language = lang, no_repeat_ngram_size = 5)
|
| 53 |
+
#txts = [xtp.text for xcp in xcps]
|
| 54 |
+
#txt = ' '.join(txts)
|
| 55 |
+
#return txt
|
| 56 |
+
input_features = whisperprocessor(wav, sampling_rate=16000, return_tensors="pt").input_features
|
| 57 |
+
predicted_ids = whispermodel.generate(input_features)
|
| 58 |
+
dec = whisperprocessor.batch_decode(predicted_ids, skip_special_tokens=True,language_id='is')
|
| 59 |
+
xcp = dec[0]
|
| 60 |
+
return(xcp)
|
| 61 |
|
| 62 |
|
| 63 |
def recis(audio_file):
|
|
|
|
| 74 |
|
| 75 |
|
| 76 |
def recwhis(audio_file):
|
| 77 |
+
wh_output = whrecc(audio_file,whisperprocessor,whispermodel)#"is",whm_is)
|
| 78 |
return(wh_output)
|
| 79 |
|
| 80 |
def pick_asrc(au_src):
|