File size: 1,646 Bytes
e4406a3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3d1d87d
e4406a3
8a3bc32
e4406a3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
from funasr import AutoModel
from pathlib import Path
import time
import csv

def save_csv(file_path, rows):
    with open(file_path, "w", encoding="utf-8") as f:
        writer = csv.writer(f)
        writer.writerows(rows)
        print(f"write csv to {file_path}")

def main():
    model_dir = Path("/Users/jeqin/work/code/Translator/python_server/moyoyo_asr_models")

    asr_model_path = model_dir / 'speech_seaco_paraformer_large_asr_nat-zh-cn-16k-common-vocab8404-pytorch'
    vad_model_path = model_dir / 'speech_fsmn_vad_zh-cn-16k-common-pytorch'
    punc_model_path = model_dir / 'punc_ct-transformer_cn-en-common-vocab471067-large'
    t0 = time.time()
    model = AutoModel(
        model=asr_model_path.as_posix(),
        vad_model=vad_model_path.as_posix(),
        punc_model=punc_model_path.as_posix(),
        log_level="ERROR",
        disable_update=True
    )
    t1 = time.time()
    print("load model: ", t1 - t0)
    audios = Path("/test_data/audio_clips/")
    rows = [["file_name", "inference_time", "inference_result"]]
    for audio in sorted(audios.glob("*mix/*")):
        print(audio)
        t1 = time.time()
        try:
            result = model.generate(input=str(audio), disable_pbar=True,
                                hotword="")
        except Exception as e:
            print(audio)
            print(e)
        t2 = time.time()
        t = t2-t1
        print("inference time:", t)
        text = result[0]["text"]
        print("inference result", text)
        rows.append([f"{audio.parent.name}/{audio.name}", t, text])
    save_csv(f"csv/funasr.csv", rows)



if __name__ == '__main__':
    main()