|
|
from pathlib import Path |
|
|
import time |
|
|
import csv |
|
|
import numpy as np |
|
|
from funasr_onnx import SeacoParaformer, CT_Transformer, Fsmn_vad |
|
|
|
|
|
from lib.utils import Timer, read_audio |
|
|
|
|
|
MODEL_DIR = Path("/Users/jeqin/work/code/Translator/python_server/moyoyo_asr_models") |
|
|
|
|
|
class FunAsrOnnx: |
|
|
def __init__(self, model_dir=MODEL_DIR, quantize=True): |
|
|
asr_model_path = model_dir / 'speech_seaco_paraformer_large_asr_nat-zh-cn-16k-common-vocab8404-pytorch' |
|
|
|
|
|
punc_model_path = model_dir / 'punc_ct-transformer_cn-en-common-vocab471067-large' |
|
|
t0 = time.time() |
|
|
|
|
|
with Timer("load FunASR") as t: |
|
|
self.asr_model = SeacoParaformer(asr_model_path, quantize=quantize) |
|
|
self.punc_model = CT_Transformer(punc_model_path, quantize=quantize) |
|
|
self._warm_up() |
|
|
|
|
|
def _warm_up(self): |
|
|
|
|
|
fake_audio = np.random.randn(16000).astype(np.float32) |
|
|
self.asr_model(fake_audio, hotwords="") |
|
|
|
|
|
def transcribe(self, audio:np.ndarray): |
|
|
with Timer("FunASR inference") as t: |
|
|
asr_res = self.asr_model(audio, hotwords="") |
|
|
asr_text = asr_res[0]["preds"] |
|
|
result = self.punc_model(asr_text) |
|
|
text = result[0] |
|
|
return text, t.duration |
|
|
|
|
|
if __name__ == '__main__': |
|
|
funasr = FunAsrOnnx() |
|
|
audio = read_audio(Path("/Users/jeqin/work/code/TestTranslator/test_data/recordings/1.wav")) |
|
|
text, time_cost =funasr.transcribe(audio) |
|
|
print(text) |
|
|
print(time_cost) |
|
|
|
|
|
|