import base64, tempfile, os, torch from transformers import pipeline from functions.utils import getAudioDuration # from huggingface_hub import login MODEL_NAME = "facebook/mms-1b-all" device = "cuda" if torch.cuda.is_available() else "cpu" pipe = pipeline("automatic-speech-recognition", model=MODEL_NAME, model_kwargs={"target_lang": "mos"}, device=device) # login(token=os.environ["HF_TOKEN"]) # MODEL_NAME = "burkimbia/BIA-WHISPER-LARGE-SACHI_V3" # pipe = pipeline("automatic-speech-recognition", model=MODEL_NAME) def mooreSTT(audioBase64: str) -> dict: audioBytes = base64.b64decode(audioBase64) with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tempFile: tempFile.write(audioBytes) tempAudioPath = tempFile.name try: result = pipe(tempAudioPath) text = result["text"] duration = getAudioDuration(tempAudioPath) finally: os.remove(tempAudioPath) return {'text': text, 'language': 'mos', 'duration': duration}