| import base64, tempfile, os, torch | |
| from transformers import pipeline | |
| from functions.utils import getAudioDuration | |
| # from huggingface_hub import login | |
| MODEL_NAME = "facebook/mms-1b-all" | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| pipe = pipeline("automatic-speech-recognition", model=MODEL_NAME, model_kwargs={"target_lang": "mos"}, device=device) | |
| # login(token=os.environ["HF_TOKEN"]) | |
| # MODEL_NAME = "burkimbia/BIA-WHISPER-LARGE-SACHI_V3" | |
| # pipe = pipeline("automatic-speech-recognition", model=MODEL_NAME) | |
| def mooreSTT(audioBase64: str) -> dict: | |
| audioBytes = base64.b64decode(audioBase64) | |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tempFile: | |
| tempFile.write(audioBytes) | |
| tempAudioPath = tempFile.name | |
| try: | |
| result = pipe(tempAudioPath) | |
| text = result["text"] | |
| duration = getAudioDuration(tempAudioPath) | |
| finally: | |
| os.remove(tempAudioPath) | |
| return {'text': text, 'language': 'mos', 'duration': duration} | |