File size: 1,658 Bytes
1e495f3 b295d06 1e495f3 b295d06 1e495f3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
from pathlib import Path
import time
import csv
import numpy as np
from funasr_onnx import SeacoParaformer, CT_Transformer, Fsmn_vad
from lib.utils import Timer, read_audio
MODEL_DIR = Path("/Users/jeqin/work/code/Translator/python_server/moyoyo_asr_models")
class FunAsrOnnx:
def __init__(self, model_dir=MODEL_DIR, quantize=True):
asr_model_path = model_dir / 'speech_seaco_paraformer_large_asr_nat-zh-cn-16k-common-vocab8404-pytorch'
# vad_model_path = model_dir / 'speech_fsmn_vad_zh-cn-16k-common-pytorch'
punc_model_path = model_dir / 'punc_ct-transformer_cn-en-common-vocab471067-large'
t0 = time.time()
# vad_model = Fsmn_vad(vad_model_path, quantize=quantize)
with Timer("load FunASR") as t:
self.asr_model = SeacoParaformer(asr_model_path, quantize=quantize)
self.punc_model = CT_Transformer(punc_model_path, quantize=quantize)
self._warm_up()
def _warm_up(self):
# 生成 1 秒 16kHz 的假音频数据
fake_audio = np.random.randn(16000).astype(np.float32)
self.asr_model(fake_audio, hotwords="")
def transcribe(self, audio:np.ndarray):
with Timer("FunASR inference") as t:
asr_res = self.asr_model(audio, hotwords="")
asr_text = asr_res[0]["preds"]
result = self.punc_model(asr_text)
text = result[0]
return text, t.duration
if __name__ == '__main__':
funasr = FunAsrOnnx()
audio = read_audio(Path("/Users/jeqin/work/code/TestTranslator/test_data/recordings/1.wav"))
text, time_cost =funasr.transcribe(audio)
print(text)
print(time_cost)
|