from transformers import pipeline pipe = pipeline("automatic-speech-recognition", model="stt-thai",return_timestamps=True) result=pipe("thai.wav", generate_kwargs={"task": "transcribe","language":"th"})["text"] print (result)