|
|
from pathlib import Path |
|
|
import csv |
|
|
import time |
|
|
from funasr import AutoModel |
|
|
from funasr_onnx import SeacoParaformer, CT_Transformer, Fsmn_vad |
|
|
|
|
|
|
|
|
model_dir = Path("/Users/jeqin/work/code/Translator/moyoyo_asr_models") |
|
|
|
|
|
def save_csv(file_path, rows): |
|
|
with open(file_path, "w", encoding="utf-8") as f: |
|
|
writer = csv.writer(f) |
|
|
writer.writerows(rows) |
|
|
print(f"write csv to {file_path}") |
|
|
|
|
|
def export_onnx(): |
|
|
asr_model_path = model_dir / 'speech_seaco_paraformer_large_asr_nat-zh-cn-16k-common-vocab8404-pytorch' |
|
|
vad_model_path = model_dir / '/speech_fsmn_vad_zh-cn-16k-common-pytorch' |
|
|
punc_model_path = model_dir / '/punc_ct-transformer_cn-en-common-vocab471067-large' |
|
|
|
|
|
model = AutoModel(model=asr_model_path) |
|
|
output = model.export(type="onnx", quantize=True, disable_update=True) |
|
|
print(output) |
|
|
|
|
|
model = AutoModel(model=vad_model_path) |
|
|
output = model.export(type="onnx", quantize=True, disable_update=True) |
|
|
print(output) |
|
|
|
|
|
model = AutoModel(model=punc_model_path) |
|
|
output = model.export(type="onnx", quantize=True, disable_update=True) |
|
|
print(output) |
|
|
|
|
|
def run_funasr(): |
|
|
asr_model_path = model_dir / 'speech_seaco_paraformer_large_asr_nat-zh-cn-16k-common-vocab8404-pytorch' |
|
|
vad_model_path = model_dir / 'speech_fsmn_vad_zh-cn-16k-common-pytorch' |
|
|
punc_model_path = model_dir / 'punc_ct-transformer_cn-en-common-vocab471067-large' |
|
|
t0 = time.time() |
|
|
model = AutoModel( |
|
|
model=asr_model_path.as_posix(), |
|
|
vad_model=vad_model_path.as_posix(), |
|
|
punc_model=punc_model_path.as_posix(), |
|
|
log_level="ERROR", |
|
|
disable_update=True |
|
|
) |
|
|
t1 = time.time() |
|
|
print("load model: ", t1 - t0) |
|
|
audios = Path("/test_data/audio_clips") |
|
|
rows = [["file_name", "inference_time", "inference_result"]] |
|
|
for audio in sorted(audios.glob("Chinese-mayun-part2.mp3")): |
|
|
print(audio) |
|
|
t1 = time.time() |
|
|
try: |
|
|
result = model.generate(input=str(audio), disable_pbar=True, |
|
|
hotword="") |
|
|
except Exception as e: |
|
|
print(audio) |
|
|
print(e) |
|
|
t2 = time.time() |
|
|
t = t2 - t1 |
|
|
print("inference time:", t) |
|
|
text = result[0]["text"] |
|
|
print("inference result", text) |
|
|
rows.append([f"{audio.parent.name}/{audio.name}", t, text]) |
|
|
save_csv(f"run_funasr.csv", rows) |
|
|
|
|
|
def run_onnx(quant=True): |
|
|
asr_model_path = model_dir / 'speech_seaco_paraformer_large_asr_nat-zh-cn-16k-common-vocab8404-pytorch' |
|
|
vad_model_path = model_dir / 'speech_fsmn_vad_zh-cn-16k-common-pytorch' |
|
|
punc_model_path = model_dir / 'punc_ct-transformer_cn-en-common-vocab471067-large' |
|
|
t0 = time.time() |
|
|
vad_model = Fsmn_vad(vad_model_path, quantize=quant) |
|
|
asr_model = SeacoParaformer(asr_model_path, quantize=quant) |
|
|
punc_model = CT_Transformer(punc_model_path, quantize=quant) |
|
|
t1 = time.time() |
|
|
print("load model: ", t1 - t0) |
|
|
audios = Path("/Users/moyoyo/code/tests/audios") |
|
|
rows = [["file_name", "inference_time", "inference_result"]] |
|
|
for audio in sorted(audios.glob("*s/*.wav")): |
|
|
t1 = time.time() |
|
|
vad_res = vad_model(str(audio)) |
|
|
t2 = time.time() |
|
|
|
|
|
asr_res = asr_model(str(audio), hotwords="") |
|
|
asr_text = asr_res[0]["preds"] |
|
|
t3 = time.time() |
|
|
|
|
|
|
|
|
result = punc_model(asr_text) |
|
|
text = result[0] |
|
|
t4 = time.time() |
|
|
|
|
|
|
|
|
print(text) |
|
|
t = t4 - t1 |
|
|
print("inference time:", t) |
|
|
rows.append([f"{audio.parent.name}/{audio.name}", t, text]) |
|
|
file_name = "run_quant.csv" if quant else "run_onnx.csv" |
|
|
save_csv(file_name, rows) |
|
|
|
|
|
if __name__ == '__main__': |
|
|
run_funasr() |