File size: 1,932 Bytes
ca02ffa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
import argparse
import os
from whisper import Whisper
import time
def get_args():
parser = argparse.ArgumentParser(
prog="whisper",
description="Run Whisper on input audio file"
)
parser.add_argument("--wav", "-w", type=str, required=True, help="Input audio file")
parser.add_argument("--model_type", "-t", type=str, choices=["tiny", "base", "small", "large", "large-v3", "turbo"], required=True, help="model type, only support tiny, base and small currently")
parser.add_argument("--model_path", "-p", type=str, required=False, default="../models/models-ax650", help="model path for *.axmodel, tokens.txt, positional_embedding.bin")
parser.add_argument("--language", "-l", type=str, required=False, default="zh", help="Target language, support en, zh, ja, and others. See languages.py for more options.")
parser.add_argument("--task", type=str, required=False, choices=["translate", "transcribe"], default="transcribe")
parser.add_argument("--print_rtf", action="store_true", help="Print Real-Time Factor")
return parser.parse_args()
def print_args(args):
print(f"wav: {args.wav}")
print(f"model_type: {args.model_type}")
print(f"model_path: {args.model_path}")
print(f"language: {args.language}")
print(f"task: {args.task}")
def main():
args = get_args()
print_args(args)
# Check wav existence
wav_path = args.wav
assert os.path.exists(wav_path), f"{wav_path} NOT exist"
model = Whisper(args.model_type, args.model_path, args.language, args.task)
print("\n预测结果:")
start = time.time()
print(model.run(wav_path))
end = time.time()
if args.print_rtf:
import librosa
samples, sr = librosa.load(wav_path, sr=16000)
duration = len(samples) / sr
process_time = end - start
print(f"RTF: {process_time / duration}")
if __name__ == "__main__":
main()
|