File size: 1,005 Bytes
cc36729 7e3c986 a6299ef 6a7cb13 7e3c986 cc36729 6a7cb13 7e3c986 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 | import base64, tempfile, os, torch
from transformers import pipeline
from functions.utils import getAudioDuration
# from huggingface_hub import login
MODEL_NAME = "facebook/mms-1b-all"
device = "cuda" if torch.cuda.is_available() else "cpu"
pipe = pipeline("automatic-speech-recognition", model=MODEL_NAME, model_kwargs={"target_lang": "mos"}, device=device)
# login(token=os.environ["HF_TOKEN"])
# MODEL_NAME = "burkimbia/BIA-WHISPER-LARGE-SACHI_V3"
# pipe = pipeline("automatic-speech-recognition", model=MODEL_NAME)
def mooreSTT(audioBase64: str) -> dict:
audioBytes = base64.b64decode(audioBase64)
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tempFile:
tempFile.write(audioBytes)
tempAudioPath = tempFile.name
try:
result = pipe(tempAudioPath)
text = result["text"]
duration = getAudioDuration(tempAudioPath)
finally:
os.remove(tempAudioPath)
return {'text': text, 'language': 'mos', 'duration': duration}
|