from pathlib import Path import csv import time from funasr import AutoModel from funasr_onnx import SeacoParaformer, CT_Transformer, Fsmn_vad model_dir = Path("/Users/jeqin/work/code/Translator/moyoyo_asr_models") def save_csv(file_path, rows): with open(file_path, "w", encoding="utf-8") as f: writer = csv.writer(f) writer.writerows(rows) print(f"write csv to {file_path}") def export_onnx(): asr_model_path = model_dir / 'speech_seaco_paraformer_large_asr_nat-zh-cn-16k-common-vocab8404-pytorch' vad_model_path = model_dir / '/speech_fsmn_vad_zh-cn-16k-common-pytorch' punc_model_path = model_dir / '/punc_ct-transformer_cn-en-common-vocab471067-large' model = AutoModel(model=asr_model_path) output = model.export(type="onnx", quantize=True, disable_update=True) print(output) model = AutoModel(model=vad_model_path) output = model.export(type="onnx", quantize=True, disable_update=True) print(output) model = AutoModel(model=punc_model_path) output = model.export(type="onnx", quantize=True, disable_update=True) print(output) def run_funasr(): asr_model_path = model_dir / 'speech_seaco_paraformer_large_asr_nat-zh-cn-16k-common-vocab8404-pytorch' vad_model_path = model_dir / 'speech_fsmn_vad_zh-cn-16k-common-pytorch' punc_model_path = model_dir / 'punc_ct-transformer_cn-en-common-vocab471067-large' t0 = time.time() model = AutoModel( model=asr_model_path.as_posix(), vad_model=vad_model_path.as_posix(), punc_model=punc_model_path.as_posix(), log_level="ERROR", disable_update=True ) t1 = time.time() print("load model: ", t1 - t0) audios = Path("/test_data/audio_clips") rows = [["file_name", "inference_time", "inference_result"]] for audio in sorted(audios.glob("Chinese-mayun-part2.mp3")): print(audio) t1 = time.time() try: result = model.generate(input=str(audio), disable_pbar=True, hotword="") except Exception as e: print(audio) print(e) t2 = time.time() t = t2 - t1 print("inference time:", t) text = result[0]["text"] print("inference result", text) rows.append([f"{audio.parent.name}/{audio.name}", t, text]) save_csv(f"run_funasr.csv", rows) def run_onnx(quant=True): asr_model_path = model_dir / 'speech_seaco_paraformer_large_asr_nat-zh-cn-16k-common-vocab8404-pytorch' vad_model_path = model_dir / 'speech_fsmn_vad_zh-cn-16k-common-pytorch' punc_model_path = model_dir / 'punc_ct-transformer_cn-en-common-vocab471067-large' t0 = time.time() vad_model = Fsmn_vad(vad_model_path, quantize=quant) asr_model = SeacoParaformer(asr_model_path, quantize=quant) punc_model = CT_Transformer(punc_model_path, quantize=quant) t1 = time.time() print("load model: ", t1 - t0) audios = Path("/Users/moyoyo/code/tests/audios") rows = [["file_name", "inference_time", "inference_result"]] for audio in sorted(audios.glob("*s/*.wav")): t1 = time.time() vad_res = vad_model(str(audio)) t2 = time.time() # print("vad time:", t2 - t1) asr_res = asr_model(str(audio), hotwords="") asr_text = asr_res[0]["preds"] t3 = time.time() # print("asr time:", t3 - t2) # print("asr text:", asr_text) result = punc_model(asr_text) text = result[0] t4 = time.time() # print("punc time:", t4 - t3) # print("punc text:", text) print(text) t = t4 - t1 print("inference time:", t) rows.append([f"{audio.parent.name}/{audio.name}", t, text]) file_name = "run_quant.csv" if quant else "run_onnx.csv" save_csv(file_name, rows) if __name__ == '__main__': run_funasr()