File size: 798 Bytes
0fea237
 
 
02c9b64
0fea237
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
from huggingface_hub import InferenceClient
from os import getenv, path, unlink
from utils import save_audio_to_temp_file, get_model_sample_rate

def automatic_speech_recognition(client: InferenceClient, audio: tuple[int, bytes]) -> str:
    temp_file_path = None
    try:
        model_id = getenv("AUDIO_TRANSCRIPTION_MODEL")
        sample_rate = get_model_sample_rate(model_id)
        temp_file_path = save_audio_to_temp_file(sample_rate, audio)
        result = client.automatic_speech_recognition(temp_file_path, model=model_id)
        return result["text"]
    finally:
        if temp_file_path and path.exists(temp_file_path): # Clean up temporary file.
            try:
                unlink(temp_file_path)
            except Exception:
                pass # Ignore clean-up errors.