from pathlib import Path import time import csv import numpy as np from funasr_onnx import SeacoParaformer, CT_Transformer, Fsmn_vad from lib.utils import Timer, read_audio MODEL_DIR = Path("/Users/jeqin/work/code/Translator/python_server/moyoyo_asr_models") class FunAsrOnnx: def __init__(self, model_dir=MODEL_DIR, quantize=True): asr_model_path = model_dir / 'speech_seaco_paraformer_large_asr_nat-zh-cn-16k-common-vocab8404-pytorch' # vad_model_path = model_dir / 'speech_fsmn_vad_zh-cn-16k-common-pytorch' punc_model_path = model_dir / 'punc_ct-transformer_cn-en-common-vocab471067-large' t0 = time.time() # vad_model = Fsmn_vad(vad_model_path, quantize=quantize) with Timer("load FunASR") as t: self.asr_model = SeacoParaformer(asr_model_path, quantize=quantize) self.punc_model = CT_Transformer(punc_model_path, quantize=quantize) self._warm_up() def _warm_up(self): # 生成 1 秒 16kHz 的假音频数据 fake_audio = np.random.randn(16000).astype(np.float32) self.asr_model(fake_audio, hotwords="") def transcribe(self, audio:np.ndarray): with Timer("FunASR inference") as t: asr_res = self.asr_model(audio, hotwords="") asr_text = asr_res[0]["preds"] result = self.punc_model(asr_text) text = result[0] return text, t.duration if __name__ == '__main__': funasr = FunAsrOnnx() audio = read_audio(Path("/Users/jeqin/work/code/TestTranslator/test_data/recordings/1.wav")) text, time_cost =funasr.transcribe(audio) print(text) print(time_cost)