Spaces:

Kora3
/

AI-API

Sleeping

File size: 1,005 Bytes

cc36729
7e3c986
a6299ef
6a7cb13
7e3c986
 
cc36729
 
 
6a7cb13
 
 
7e3c986

import base64, tempfile, os, torch
from transformers import pipeline
from functions.utils import getAudioDuration
# from huggingface_hub import login

MODEL_NAME = "facebook/mms-1b-all"
device = "cuda" if torch.cuda.is_available() else "cpu"

pipe = pipeline("automatic-speech-recognition", model=MODEL_NAME, model_kwargs={"target_lang": "mos"}, device=device)
# login(token=os.environ["HF_TOKEN"])
# MODEL_NAME = "burkimbia/BIA-WHISPER-LARGE-SACHI_V3"
# pipe = pipeline("automatic-speech-recognition", model=MODEL_NAME)


def mooreSTT(audioBase64: str) -> dict:
    audioBytes = base64.b64decode(audioBase64)

    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tempFile:
        tempFile.write(audioBytes)
        tempAudioPath = tempFile.name

    try:
        result = pipe(tempAudioPath)
        text = result["text"]
        duration = getAudioDuration(tempAudioPath)
    finally:
        os.remove(tempAudioPath)

    return {'text': text, 'language': 'mos', 'duration': duration}