AI-API / language /moore /mos_stt.py
Anicet
update: minors corrections
cc36729
Raw
History Blame Contribute Delete
1.01 kB
import base64, tempfile, os, torch
from transformers import pipeline
from functions.utils import getAudioDuration
# from huggingface_hub import login
MODEL_NAME = "facebook/mms-1b-all"
device = "cuda" if torch.cuda.is_available() else "cpu"
pipe = pipeline("automatic-speech-recognition", model=MODEL_NAME, model_kwargs={"target_lang": "mos"}, device=device)
# login(token=os.environ["HF_TOKEN"])
# MODEL_NAME = "burkimbia/BIA-WHISPER-LARGE-SACHI_V3"
# pipe = pipeline("automatic-speech-recognition", model=MODEL_NAME)
def mooreSTT(audioBase64: str) -> dict:
audioBytes = base64.b64decode(audioBase64)
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tempFile:
tempFile.write(audioBytes)
tempAudioPath = tempFile.name
try:
result = pipe(tempAudioPath)
text = result["text"]
duration = getAudioDuration(tempAudioPath)
finally:
os.remove(tempAudioPath)
return {'text': text, 'language': 'mos', 'duration': duration}