import sys from transformers import pipeline model="wav2vec-stt" pipe = pipeline("automatic-speech-recognition", model=model) audio_file = sys.argv[1] result = pipe(audio_file)["text"] print(result)