File size: 1,005 Bytes
cc36729
7e3c986
a6299ef
6a7cb13
7e3c986
 
cc36729
 
 
6a7cb13
 
 
7e3c986
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import base64, tempfile, os, torch
from transformers import pipeline
from functions.utils import getAudioDuration
# from huggingface_hub import login

MODEL_NAME = "facebook/mms-1b-all"
device = "cuda" if torch.cuda.is_available() else "cpu"

pipe = pipeline("automatic-speech-recognition", model=MODEL_NAME, model_kwargs={"target_lang": "mos"}, device=device)
# login(token=os.environ["HF_TOKEN"])
# MODEL_NAME = "burkimbia/BIA-WHISPER-LARGE-SACHI_V3"
# pipe = pipeline("automatic-speech-recognition", model=MODEL_NAME)


def mooreSTT(audioBase64: str) -> dict:
    audioBytes = base64.b64decode(audioBase64)

    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tempFile:
        tempFile.write(audioBytes)
        tempAudioPath = tempFile.name

    try:
        result = pipe(tempAudioPath)
        text = result["text"]
        duration = getAudioDuration(tempAudioPath)
    finally:
        os.remove(tempAudioPath)

    return {'text': text, 'language': 'mos', 'duration': duration}