yujuanqin commited on
Commit
e4406a3
·
1 Parent(s): b27f71f

update scripts and test_data

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. environment.py +1 -1
  2. scripts/audios.txt +70 -0
  3. scripts/compare_whisper.py +92 -0
  4. scripts/export_onnx.py +18 -0
  5. scripts/infer_finetuned_whisper.py +157 -0
  6. scripts/run_funasr.py +50 -0
  7. scripts/run_funasr_c.py +39 -0
  8. scripts/run_kokoro.py +54 -0
  9. scripts/run_kokoro_sample.py +65 -0
  10. scripts/run_quant.py +51 -0
  11. scripts/run_whisper.py +39 -20
  12. scripts/split_audio.py +35 -0
  13. temp.py +4 -0
  14. tests/test_accuracy_and_delay.py +2 -2
  15. tests/test_data/test_audios.zip +3 -0
  16. tests/test_data/test_audios/10s-en-ac1-16k/English-chaos-part2-0.wav +3 -0
  17. tests/test_data/test_audios/10s-en-ac1-16k/English-chaos-part2-10.wav +3 -0
  18. tests/test_data/test_audios/10s-en-ac1-16k/English-chaos-part2-20.wav +3 -0
  19. tests/test_data/test_audios/10s-en-ac1-16k/English-chaos-part2-30.wav +3 -0
  20. tests/test_data/test_audios/10s-en-ac1-16k/English-computer_sicence-part1-0.wav +3 -0
  21. tests/test_data/test_audios/10s-en-ac1-16k/English-computer_sicence-part1-10.wav +3 -0
  22. tests/test_data/test_audios/10s-en-ac1-16k/English-computer_sicence-part1-20.wav +3 -0
  23. tests/test_data/test_audios/10s-en-ac1-16k/English-computer_sicence-part1-30.wav +3 -0
  24. tests/test_data/test_audios/10s-en-ac1-16k/English-generative_ai-part1-0.wav +3 -0
  25. tests/test_data/test_audios/10s-en-ac1-16k/English-generative_ai-part1-10.wav +3 -0
  26. tests/test_data/test_audios/10s-en-ac1-16k/English-generative_ai-part1-20.wav +3 -0
  27. tests/test_data/test_audios/10s-en-ac1-16k/English-generative_ai-part1-30.wav +3 -0
  28. tests/test_data/test_audios/10s-en-ac1-16k/English-internet-part20-0.wav +3 -0
  29. tests/test_data/test_audios/10s-en-ac1-16k/English-internet-part20-10.wav +3 -0
  30. tests/test_data/test_audios/10s-en-ac1-16k/English-internet-part20-20.wav +3 -0
  31. tests/test_data/test_audios/10s-en-ac1-16k/English-internet-part20-30.wav +3 -0
  32. tests/test_data/test_audios/10s-en-ac1-16k/English-legalsystem-part1-0.wav +3 -0
  33. tests/test_data/test_audios/10s-en-ac1-16k/English-legalsystem-part1-10.wav +3 -0
  34. tests/test_data/test_audios/10s-en-ac1-16k/English-legalsystem-part1-20.wav +3 -0
  35. tests/test_data/test_audios/10s-en-ac1-16k/English-legalsystem-part1-30.wav +3 -0
  36. tests/test_data/test_audios/10s-mix/qiaodan-part1-0.wav +3 -0
  37. tests/test_data/test_audios/10s-mix/qiaodan-part1-10.wav +3 -0
  38. tests/test_data/test_audios/10s-mix/qiaodan-part1-20.wav +3 -0
  39. tests/test_data/test_audios/10s-mix/qiaodan-part1-30.wav +3 -0
  40. tests/test_data/test_audios/10s-mix/qiaodan-part2-0.wav +3 -0
  41. tests/test_data/test_audios/10s-mix/qiaodan-part2-10.wav +3 -0
  42. tests/test_data/test_audios/10s-mix/qiaodan-part2-20.wav +3 -0
  43. tests/test_data/test_audios/10s-mix/qiaodan-part2-30.wav +3 -0
  44. tests/test_data/test_audios/10s-mix/randomforest-part1-0.wav +3 -0
  45. tests/test_data/test_audios/10s-mix/randomforest-part1-10.wav +3 -0
  46. tests/test_data/test_audios/10s-mix/randomforest-part1-20.wav +3 -0
  47. tests/test_data/test_audios/10s-mix/randomforest-part1-30.wav +3 -0
  48. tests/test_data/test_audios/10s-mix/zhanghuailong-part1-0.wav +3 -0
  49. tests/test_data/test_audios/10s-mix/zhanghuailong-part1-10.wav +3 -0
  50. tests/test_data/test_audios/10s-mix/zhanghuailong-part1-20.wav +3 -0
environment.py CHANGED
@@ -26,4 +26,4 @@ class RunType(Enum):
26
  code = 0
27
  electron = 1
28
  dev = 2
29
- RUN_TYPE = RunType.dev # electron or web
 
26
  code = 0
27
  electron = 1
28
  dev = 2
29
+ RUN_TYPE = RunType.electron # electron or web
scripts/audios.txt ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /Users/jeqin/work/code/TestTranslator/tests/test_data/test_audios/5s-en-ac1-16k/English-chaos-part2-0.wav
2
+ /Users/jeqin/work/code/TestTranslator/tests/test_data/test_audios/5s-en-ac1-16k/English-chaos-part2-5.wav
3
+ /Users/jeqin/work/code/TestTranslator/tests/test_data/test_audios/5s-en-ac1-16k/English-chaos-part2-10.wav
4
+ /Users/jeqin/work/code/TestTranslator/tests/test_data/test_audios/5s-en-ac1-16k/English-chaos-part2-15.wav
5
+ /Users/jeqin/work/code/TestTranslator/tests/test_data/test_audios/10s-en-ac1-16k/English-chaos-part2-0.wav
6
+ /Users/jeqin/work/code/TestTranslator/tests/test_data/test_audios/10s-en-ac1-16k/English-chaos-part2-10.wav
7
+ /Users/jeqin/work/code/TestTranslator/tests/test_data/test_audios/10s-en-ac1-16k/English-chaos-part2-20.wav
8
+ /Users/jeqin/work/code/TestTranslator/tests/test_data/test_audios/10s-en-ac1-16k/English-chaos-part2-30.wav
9
+ /Users/jeqin/work/code/TestTranslator/tests/test_data/test_audios/English-chaos-part2.wav
10
+
11
+ /Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/5s/es-1-0.wav
12
+ /Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/5s/es-1-5.wav
13
+ /Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/5s/es-1-10.wav
14
+ /Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/5s/es-1-15.wav
15
+ /Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/10s/es-1-0.wav
16
+ /Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/10s/es-1-10.wav
17
+ /Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/10s/es-1-20.wav
18
+ /Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/10s/es-1-30.wav
19
+ /Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/es-1.wav
20
+
21
+ /Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/5s/fr-1-0.wav
22
+ /Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/5s/fr-1-5.wav
23
+ /Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/5s/fr-1-10.wav
24
+ /Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/5s/fr-1-15.wav
25
+ /Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/10s/fr-1-0.wav
26
+ /Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/10s/fr-1-10.wav
27
+ /Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/10s/fr-1-20.wav
28
+ /Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/10s/fr-1-30.wav
29
+ /Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/fr-1.wav
30
+
31
+ /Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/5s/hi-2-0.wav
32
+ /Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/5s/hi-2-5.wav
33
+ /Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/5s/hi-2-10.wav
34
+ /Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/5s/hi-2-15.wav
35
+ /Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/10s/hi-2-0.wav
36
+ /Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/10s/hi-2-10.wav
37
+ /Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/10s/hi-2-20.wav
38
+ /Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/10s/hi-2-30.wav
39
+ /Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/hi-2.wav
40
+
41
+ /Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/5s/it-1-0.wav
42
+ /Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/5s/it-1-5.wav
43
+ /Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/5s/it-1-10.wav
44
+ /Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/5s/it-1-15.wav
45
+ /Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/10s/it-1-0.wav
46
+ /Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/10s/it-1-10.wav
47
+ /Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/10s/it-1-20.wav
48
+ /Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/10s/it-1-30.wav
49
+ /Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/it-1.wav
50
+
51
+ /Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/5s/ja-1-0.wav
52
+ /Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/5s/ja-1-5.wav
53
+ /Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/5s/ja-1-10.wav
54
+ /Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/5s/ja-1-15.wav
55
+ /Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/10s/ja-1-0.wav
56
+ /Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/10s/ja-1-10.wav
57
+ /Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/10s/ja-1-20.wav
58
+ /Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/10s/ja-1-30.wav
59
+ /Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/ja-1.wav
60
+
61
+ /Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/5s/pt-1-0.wav
62
+ /Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/5s/pt-1-5.wav
63
+ /Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/5s/pt-1-10.wav
64
+ /Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/5s/pt-1-15.wav
65
+ /Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/10s/pt-1-0.wav
66
+ /Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/10s/pt-1-10.wav
67
+ /Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/10s/pt-1-20.wav
68
+ /Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/10s/pt-1-30.wav
69
+ /Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/pt-1.wav
70
+
scripts/compare_whisper.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pywhispercpp.model import Model
2
+ from pathlib import Path
3
+ import time
4
+ import csv
5
+
6
+ from silero_vad.utils_vad import languages
7
+
8
+
9
+ def save_csv(file_path, rows):
10
+ with open(file_path, "w", encoding="utf-8") as f:
11
+ writer = csv.writer(f)
12
+ writer.writerows(rows)
13
+ print(f"write csv to {file_path}")
14
+
15
+ def run_audios_after_vad(models_dir, audio_dir, model_name):
16
+ t0 = time.time()
17
+ model = Model(
18
+ model=model_name,
19
+ models_dir=models_dir,
20
+ print_realtime=False,
21
+ print_progress=False,
22
+ print_timestamps=False,
23
+ translate=False,
24
+ # beam_search=1,
25
+ temperature=0.,
26
+ no_context=True
27
+ )
28
+ print("load model time: ", time.time()-t0)
29
+ rows = [["lang", "file_name", "inference_time", "python_res", "intel_res"]]
30
+ for lang in ["es", "fr", "hi", "it", "ja", "pt"]:
31
+ print("*" * 10, lang, "*"*10)
32
+ for audio in sorted(list((audio_dir/lang).glob("*.wav"))):
33
+ print("Audio name:", audio.name)
34
+ t1 = time.time()
35
+ output = model.transcribe(str(audio), language=lang)
36
+ t = time.time() - t1
37
+ print("Inference time:", t)
38
+ # print(output)
39
+ text = " ".join([a.text for a in output])
40
+ print("Text from Python:", text)
41
+ try:
42
+ with open(audio.with_suffix(".txt"), encoding="utf-8") as f:
43
+ intel_text = f.read().strip()
44
+ except Exception as e:
45
+ intel_text = ""
46
+ print(f"Error reading Intel text for {audio.name}: {e}")
47
+ print("Text from Intel :", intel_text)
48
+ rows.append([lang, audio.name, t, text, intel_text])
49
+ save_csv("csv/compare_whisper_intel.csv", rows)
50
+
51
+ def run_long_audios(models_dir, audios_list, model_name):
52
+ t0 = time.time()
53
+ model = Model(
54
+ model=model_name,
55
+ models_dir=models_dir,
56
+ print_realtime=False,
57
+ print_progress=False,
58
+ print_timestamps=False,
59
+ translate=False,
60
+ # beam_search=1,
61
+ temperature=0.,
62
+ no_context=True
63
+ )
64
+ print("load model time: ", time.time() - t0)
65
+ rows = [["file_name", "inference_time", "res_text"]]
66
+ audios = audios_list.read_text().splitlines()
67
+ for audio in audios:
68
+ if not audio:
69
+ rows.append([])
70
+ continue
71
+ lang = Path(audio).name.split('-')[0]
72
+ if lang not in ["es", "fr", "hi", "it", "ja", "pt"]:
73
+ lang = "en"
74
+ print(f"Audio file: {audio}, lang: {lang}")
75
+ t1 = time.time()
76
+ output = model.transcribe(str(audio), language=lang)
77
+ t = time.time() - t1
78
+ print("Inference time:", t)
79
+ # print(output)
80
+ text = " ".join([a.text for a in output])
81
+ print("Text:", text)
82
+ rows.append([audio, t, text])
83
+ save_csv("csv/compare_whisper.csv", rows)
84
+ if __name__ == '__main__':
85
+ models_dir = Path("/Users/jeqin/work/code/Translator/python_server/moyoyo_asr_models")
86
+ # model_name = "large-v3-turbo-q5_0"
87
+ model_name = "large-v3-turbo-q8_0"
88
+ # model_name = "small-q8_0"
89
+ # audios_after_vad = Path("/Users/jeqin/work/test/test_yoyotranslator/audios_after_vad/audio2-with-noise")
90
+ audios_list = Path("/Users/jeqin/work/code/TestTranslator/scripts/audios.txt")
91
+ # run_audios_after_vad(models_dir, audios_after_vad, model_name)
92
+ run_long_audios(models_dir, audios_list, model_name)
scripts/export_onnx.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from funasr import AutoModel
2
+
3
+ model_dir = "/Users/moyoyo/code/Translator/moyoyo_asr_models"
4
+ asr_model_path = model_dir + '/speech_seaco_paraformer_large_asr_nat-zh-cn-16k-common-vocab8404-pytorch'
5
+ vad_model_path = model_dir + '/speech_fsmn_vad_zh-cn-16k-common-pytorch'
6
+ punc_model_path = model_dir + '/punc_ct-transformer_cn-en-common-vocab471067-large'
7
+
8
+ model = AutoModel(model=asr_model_path)
9
+ model_dir = model.export(type="onnx", quantize=True, disable_update=True)
10
+ print(model_dir)
11
+
12
+ model = AutoModel(model=vad_model_path)
13
+ model_dir = model.export(type="onnx", quantize=True, disable_update=True)
14
+ print(model_dir)
15
+
16
+ model = AutoModel(model=punc_model_path)
17
+ model_dir = model.export(type="onnx", quantize=True, disable_update=True)
18
+ print(model_dir)
scripts/infer_finetuned_whisper.py ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import os
3
+ import time
4
+ from pathlib import Path
5
+ import csv
6
+
7
+ import numpy as np
8
+ import torch
9
+ import librosa
10
+ from transformers import WhisperForConditionalGeneration, WhisperProcessor
11
+
12
+ def save_csv(file_path, rows):
13
+ with open(file_path, "w", encoding="utf-8") as f:
14
+ writer = csv.writer(f)
15
+ writer.writerows(rows)
16
+ print(f"write csv to {file_path}")
17
+
18
+
19
+ def load_audio(audio_path: str, sr: int = 16000):
20
+ # 读取音频并转成 16k 单声道 numpy float32
21
+ audio, _ = librosa.load(audio_path, sr=sr, mono=True)
22
+ return audio
23
+
24
+
25
+ def transcribe_file(
26
+ audio_path: str,
27
+ model,
28
+ processor,
29
+ language: str = "Chinese",
30
+ task: str = "transcribe",
31
+ timestamps: bool = False,
32
+ max_new_tokens: int = 255,
33
+ ):
34
+ # 准备特征
35
+ audio = load_audio(audio_path, sr=16000)
36
+ inputs = processor(audio, sampling_rate=16000, return_tensors="pt")
37
+
38
+ # 放到设备
39
+ device = next(model.parameters()).device
40
+ input_features = inputs["input_features"].to(device)
41
+
42
+ # 生成
43
+ with torch.inference_mode(), torch.autocast(device_type="cuda", enabled=(device.type == "cuda")):
44
+ generated_ids = model.generate(
45
+ input_features=input_features,
46
+ max_new_tokens=max_new_tokens,
47
+ return_timestamps=timestamps, # 仅部分版本支持;不支持时自动忽略
48
+ )
49
+
50
+ # 解码
51
+ text = processor.tokenizer.batch_decode(generated_ids.cpu().numpy(), skip_special_tokens=True)
52
+ return text[0]
53
+
54
+
55
+ def main():
56
+ parser = argparse.ArgumentParser("Simple Whisper Inference")
57
+ parser.add_argument("--model_path", type=str, default="whisper-large-v3-turbo-finetune",
58
+ help="本地合并模型路径或HF模型名")
59
+ parser.add_argument("--input", type=str, required=True,
60
+ help="音频文件路径,或目录(将批量处理其中的音频)")
61
+ parser.add_argument("--language", type=str, default="Chinese",
62
+ help="语言(如 Chinese / English / zh / en)")
63
+ parser.add_argument("--task", type=str, default="transcribe", choices=["transcribe", "translate"],
64
+ help="任务:转写或翻译")
65
+ parser.add_argument("--timestamps", action="store_true", help="是否返回时间戳(若模型与版本支持)")
66
+ parser.add_argument("--local_files_only", action="store_true", help="仅本地加载,不联网")
67
+ parser.add_argument("--batch_exts", type=str, default=".wav,.mp3,.flac,.m4a",
68
+ help="当 --input 是目录时,处理这些后缀的文件,逗号分隔")
69
+ args = parser.parse_args()
70
+
71
+ # 加载处理器 & 模型
72
+ processor = WhisperProcessor.from_pretrained(
73
+ args.model_path,
74
+ language=args.language,
75
+ task=args.task,
76
+ no_timestamps=not args.timestamps,
77
+ local_files_only=args.local_files_only,
78
+ )
79
+ model = WhisperForConditionalGeneration.from_pretrained(
80
+ args.model_path,
81
+ device_map="auto",
82
+ local_files_only=args.local_files_only,
83
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
84
+ )
85
+
86
+ model.generation_config.language = args.language.lower()
87
+ model.generation_config.forced_decoder_ids = None
88
+ model.eval()
89
+
90
+ path = Path(args.input)
91
+ if path.is_file():
92
+ text = transcribe_file(
93
+ str(path), model, processor,
94
+ language=args.language, task=args.task, timestamps=args.timestamps
95
+ )
96
+ print(f"{path.name} -> {text}")
97
+ else:
98
+ # 目录批量
99
+ exts = {e.strip().lower() for e in args.batch_exts.split(",")}
100
+ files = [p for p in path.rglob("*") if p.suffix.lower() in exts]
101
+ if not files:
102
+ print("目录中未找到可处理的音频文件。")
103
+ return
104
+ for p in sorted(files):
105
+ try:
106
+ t0 = time.time()
107
+ text = transcribe_file(
108
+ str(p), model, processor,
109
+ language=args.language, task=args.task, timestamps=args.timestamps
110
+ )
111
+ t1 = time.time()
112
+ print(f"{p.name} -> {text}; time cost: {t1-t0}")
113
+ except Exception as e:
114
+ print(f"{p.name} -> 失败: {e}")
115
+ def run():
116
+ model_path = "/Users/jeqin/Downloads/whisper-large-v3-turbo-finetune-0901"
117
+ lang = "en"
118
+ t0 = time.time()
119
+ processor = WhisperProcessor.from_pretrained(
120
+ model_path,
121
+ language=lang,
122
+ task="transcribe",
123
+ no_timestamps=True,
124
+ local_files_only=True,
125
+ )
126
+ model = WhisperForConditionalGeneration.from_pretrained(
127
+ model_path,
128
+ device_map="mps",
129
+ local_files_only=True,
130
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
131
+ )
132
+
133
+ model.generation_config.language = lang.lower()
134
+ model.generation_config.forced_decoder_ids = None
135
+ model.eval()
136
+
137
+
138
+ print("load model time: ", time.time() - t0)
139
+ audios = Path("/Users/jeqin/work/code/TestTranslator/tests/test_data/test_audios/")
140
+ rows = [["file_name", "inference_time", "inference_result"]]
141
+ for audio in sorted(audios.glob("*en-ac1-16k/*.wav")): # *s/randomforest*.wav"
142
+ try:
143
+ t0 = time.time()
144
+ text = transcribe_file(
145
+ str(audio), model, processor
146
+ )
147
+
148
+ t = time.time()-t0
149
+ print(f"{audio.name} -> {text}; time cost: {t}")
150
+ rows.append([f"{audio.parent.name}/{audio.name}", t, text])
151
+ except Exception as e:
152
+ print(f"{audio.name} -> 失败: {e}")
153
+ save_csv("csv/fine-tune_whisper-0901.csv", rows)
154
+
155
+ if __name__ == "__main__":
156
+ # main()
157
+ run()
scripts/run_funasr.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from funasr import AutoModel
2
+ from pathlib import Path
3
+ import time
4
+ import csv
5
+
6
+ def save_csv(file_path, rows):
7
+ with open(file_path, "w", encoding="utf-8") as f:
8
+ writer = csv.writer(f)
9
+ writer.writerows(rows)
10
+ print(f"write csv to {file_path}")
11
+
12
+ def main():
13
+ model_dir = Path("/Users/jeqin/work/code/Translator/python_server/moyoyo_asr_models")
14
+
15
+ asr_model_path = model_dir / 'speech_seaco_paraformer_large_asr_nat-zh-cn-16k-common-vocab8404-pytorch'
16
+ vad_model_path = model_dir / 'speech_fsmn_vad_zh-cn-16k-common-pytorch'
17
+ punc_model_path = model_dir / 'punc_ct-transformer_cn-en-common-vocab471067-large'
18
+ t0 = time.time()
19
+ model = AutoModel(
20
+ model=asr_model_path.as_posix(),
21
+ vad_model=vad_model_path.as_posix(),
22
+ punc_model=punc_model_path.as_posix(),
23
+ log_level="ERROR",
24
+ disable_update=True
25
+ )
26
+ t1 = time.time()
27
+ print("load model: ", t1 - t0)
28
+ audios = Path("/Users/jeqin/work/code/TestTranslator/tests/test_data/test_audios/")
29
+ rows = [["file_name", "inference_time", "inference_result"]]
30
+ for audio in sorted(audios.glob("*ac1-16k/Chinese*")):
31
+ print(audio)
32
+ t1 = time.time()
33
+ try:
34
+ result = model.generate(input=str(audio), disable_pbar=True,
35
+ hotword="")
36
+ except Exception as e:
37
+ print(audio)
38
+ print(e)
39
+ t2 = time.time()
40
+ t = t2-t1
41
+ print("inference time:", t)
42
+ text = result[0]["text"]
43
+ print("inference result", text)
44
+ rows.append([f"{audio.parent.name}/{audio.name}", t, text])
45
+ save_csv(f"csv/funasr.csv", rows)
46
+
47
+
48
+
49
+ if __name__ == '__main__':
50
+ main()
scripts/run_funasr_c.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import sys
3
+ import time
4
+ from pathlib import Path
5
+ import csv
6
+
7
+ sys.path.append('/Users/jeqin/work/code/funasr_wrapper/build') # 添加编译生成的模块路径
8
+ sys.path.append('/Users/jeqin/work/code/funasr_wrapper/build/src') # 添加编译生成的模块路径
9
+ import funasr_py
10
+
11
+ def save_csv(file_path, rows):
12
+ with open(file_path, "w", encoding="utf-8") as f:
13
+ writer = csv.writer(f)
14
+ writer.writerows(rows)
15
+ print(f"write csv to {file_path}")
16
+
17
+ def main():
18
+ t0 = time.time()
19
+ config_file = "/Users/jeqin/work/code/funasr_wrapper/testpy/config.json"
20
+ asr = funasr_py.FunasrEasy(config_file)
21
+ # 初始化模型
22
+ asr.init()
23
+ t1 = time.time()
24
+ print("Initializing model: ", t1-t0)
25
+ audios = Path("/Users/jeqin/work/code/TestTranslator/tests/test_data/test_audios/")
26
+ rows = [["file_name", "inference_time", "inference_result"]]
27
+ for audio in sorted(audios.glob("*s-ac1/Chinese*")):
28
+ print(audio)
29
+ t1 = time.time()
30
+ result = asr.infer(str(audio))
31
+ text = asr.get_text(result)
32
+ asr.free_result(result)
33
+ t = time.time() - t1
34
+ print("inference time:", t)
35
+ print(text)
36
+ rows.append([f"{audio.parent.name}/{audio.name}", t, text])
37
+ save_csv("csv/funasr_c.csv", rows)
38
+ if __name__ == '__main__':
39
+ main()
scripts/run_kokoro.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ from time import time
3
+ import os
4
+
5
+ import soundfile as sf
6
+ from misaki import zh
7
+ import onnxruntime
8
+
9
+ from kokoro_onnx import Kokoro
10
+
11
+ # providers = onnxruntime.get_available_providers()
12
+ # print(f"Available onnx runtime providers: {providers}")
13
+
14
+ def create_session(model_path):
15
+ # See list of providers https://github.com/microsoft/onnxruntime/issues/22101#issuecomment-2357667377
16
+ providers = onnxruntime.get_available_providers()
17
+ providers = providers[1:2]
18
+ print(f"Available onnx runtime providers: {providers}")
19
+
20
+
21
+ # See session options https://onnxruntime.ai/docs/performance/tune-performance/threading.html#thread-management
22
+ sess_options = onnxruntime.SessionOptions()
23
+ cpu_count = os.cpu_count() // 2
24
+ print(f"Setting threads to CPU cores count: {cpu_count}")
25
+ # sess_options.intra_op_num_threads = cpu_count
26
+ session = onnxruntime.InferenceSession(
27
+ model_path, providers=providers, sess_options=sess_options
28
+ )
29
+ return session
30
+
31
+ model_folder = Path("/Users/jeqin/work/code/Translator/python_server/moyoyo_asr_models/kokoro")
32
+ model_path = str(model_folder/"kokoro-quant.onnx")
33
+ voice_model_path = str(model_folder/"voices-v1.0.bin")
34
+ vocab_config = str(model_folder/"zh_config.json")
35
+
36
+ texts = [
37
+ "千里之行,始于足下。",
38
+ "我想听你唱首歌",
39
+ "窗前明月光,疑是地上霜。举头望明月,低头思故乡。"
40
+ ]
41
+ voice = "zf_xiaoyi"
42
+ session = create_session(model_path)
43
+ model = Kokoro.from_session(session, voice_model_path, vocab_config=vocab_config)
44
+ g2p = zh.ZHG2P()
45
+ for i in range(5):
46
+ for index, text in enumerate(texts):
47
+ phonemes, _ = g2p(text)
48
+ start = time()
49
+ samples, sample_rate = model.create(phonemes, voice=voice, speed=1.0, is_phonemes=True)
50
+ end = time()
51
+ time_cost = end - start
52
+ print(f"time cost: {time_cost} for text: {text}")
53
+ sf.write(f"audio_{index}.wav", samples, sample_rate)
54
+ print(f"Created audio_{index}.wav")
scripts/run_kokoro_sample.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from time import time
2
+ import soundfile as sf
3
+ from misaki import en, espeak, zh
4
+ from kokoro_onnx import Kokoro
5
+
6
+
7
+
8
+ def run_en():
9
+ # Misaki G2P with espeak-ng fallback
10
+ fallback = espeak.EspeakFallback(british=False)
11
+ g2p = en.G2P(trf=False, british=False, fallback=fallback)
12
+
13
+ models = "/Users/jeqin/work/code/TestTranslator/scripts/kokoro_models/"
14
+ # Kokoro
15
+ kokoro = Kokoro(f"{models}kokoro-v1.0.onnx", f"{models}voices-v1.0.bin")
16
+
17
+ texts = [
18
+ "[Misaki](/misˈɑki/) is a G2P engine designed for [Kokoro](/kˈOkəɹO/) models.",
19
+ "For example, the geology and terrain along the railway line.",
20
+ " When choosing solid-state drives, we sometimes see reviews or videos discussing whether a particular solid-state drive has a caching scheme or an uncaching scheme in the performance testing section."
21
+ ]
22
+ for index, text in enumerate(texts):
23
+ # Phonemize
24
+ # text = "[Misaki](/misˈɑki/) is a G2P engine designed for [Kokoro](/kˈOkəɹO/) models."
25
+ phonemes, _ = g2p(text)
26
+
27
+ # Create
28
+ start = time()
29
+ samples, sample_rate = kokoro.create(phonemes, "af_heart", is_phonemes=True)
30
+ end = time()
31
+ time_cost = end - start
32
+ print(f"time cost: {time_cost} for text: {text}")
33
+ # Save
34
+ sf.write(f"audio{index}.wav", samples, sample_rate)
35
+ print(f"Created audio{index}.wav")
36
+
37
+ def run_zh():
38
+ # Misaki G2P with espeak-ng fallback
39
+ # fallback = espeak.EspeakFallback(british=False)
40
+ g2p = zh.ZHG2P()
41
+
42
+ models = "/Users/jeqin/work/code/Translator/python_server/moyoyo_asr_models/kokoro"
43
+ # Kokoro
44
+ kokoro = Kokoro(f"{models}/kokoro-quant.onnx", f"{models}/voices-v1.0.bin", vocab_config=f"{models}/zh_config.json")
45
+
46
+ texts = [
47
+ "千里之行,始于足下。",
48
+ "我想听你唱首歌",
49
+ "窗前明月光,疑是地上霜。举头望明月,低头思故乡。"
50
+ ]
51
+ for index, text in enumerate(texts):
52
+ phonemes, _ = g2p(text)
53
+
54
+ # Create
55
+ start = time()
56
+ samples, sample_rate = kokoro.create(phonemes, "zf_xiaoyi", is_phonemes=True, speed=1.0)
57
+ end = time()
58
+ time_cost = end - start
59
+ print(f"time cost: {time_cost} for text: {text}")
60
+ # Save
61
+ sf.write(f"audio{index}.wav", samples, sample_rate)
62
+ print(f"Created audio{index}.wav")
63
+
64
+ if __name__ == '__main__':
65
+ run_zh()
scripts/run_quant.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ import time
3
+ import csv
4
+ from funasr_onnx import SeacoParaformer, CT_Transformer, Fsmn_vad
5
+
6
+ def save_csv(file_path, rows):
7
+ with open(file_path, "w", encoding="utf-8") as f:
8
+ writer = csv.writer(f)
9
+ writer.writerows(rows)
10
+ print(f"write csv to {file_path}")
11
+ def main():
12
+ model_dir = Path("/Users/jeqin/work/code/Translator/python_server/moyoyo_asr_models")
13
+
14
+ asr_model_path = model_dir / 'speech_seaco_paraformer_large_asr_nat-zh-cn-16k-common-vocab8404-pytorch'
15
+ vad_model_path = model_dir / 'speech_fsmn_vad_zh-cn-16k-common-pytorch'
16
+ punc_model_path = model_dir / 'punc_ct-transformer_cn-en-common-vocab471067-large'
17
+ t0 = time.time()
18
+ quantize = True
19
+ vad_model = Fsmn_vad(vad_model_path, quantize=quantize)
20
+ asr_model = SeacoParaformer(asr_model_path, quantize=quantize)
21
+ punc_model = CT_Transformer(punc_model_path, quantize=quantize)
22
+ t1 = time.time()
23
+ print("load model time:", t1 - t0)
24
+ audios = Path("/Users/jeqin/work/code/TestTranslator/tests/test_data/test_audios/")
25
+ rows = [["file_name", "inference_time", "inference_result"]]
26
+ for audio in sorted(audios.glob("*s/randomforest*.wav")):
27
+ t1 = time.time()
28
+ vad_res = vad_model(str(audio))
29
+ t2 = time.time()
30
+ print("vad time:", t2-t1)
31
+ asr_res = asr_model(str(audio), hotwords="")
32
+ asr_text = asr_res[0]["preds"]
33
+ t3 = time.time()
34
+ print("asr time:", t3-t2)
35
+ print("asr text:", asr_text)
36
+ result = punc_model(asr_text)
37
+ text = result[0]
38
+ t4 = time.time()
39
+ print("punc time:", t4-t3)
40
+ print("punc text:", text)
41
+ # print(text)
42
+ # vad_res = vad_model(str(audio))
43
+ # t5 = time.time()
44
+ # print("vad time:", t5 - t4)
45
+ t = t4-t1
46
+ print("inference:", t)
47
+ rows.append([f"{audio.parent.name}/{audio.name}", t, text])
48
+ file_name = "csv/quant.csv" if quantize else "run_onnx.csv"
49
+ save_csv(file_name, rows)
50
+ if __name__ == '__main__':
51
+ main()
scripts/run_whisper.py CHANGED
@@ -1,26 +1,45 @@
1
  from pywhispercpp.model import Model
2
  from pathlib import Path
3
  import time
 
4
 
5
  from silero_vad.utils_vad import languages
6
 
7
- models_dir = Path("/Users/jeqin/work/code/Translator/moyoyo_asr_models")
8
- whisper_model = 'large-v3-turbo-q5_0'
9
- model = Model(
10
- model=whisper_model,
11
- models_dir=models_dir,
12
- print_realtime=False,
13
- print_progress=False,
14
- print_timestamps=False,
15
- translate=False,
16
- # beam_search=1,
17
- temperature=0.,
18
- no_context=True
19
- )
20
- audios = Path("/Users/jeqin/work/code/TestTranslator/tests/test_data/test_audios")
21
- for audio in sorted(audios.glob("English*")):
22
- print(audio)
23
- t1 = time.time()
24
- output = model.transcribe(str(audio), language="en")
25
- print("inference time:", time.time()-t1)
26
- print(" ".join([a.text for a in output]))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from pywhispercpp.model import Model
2
  from pathlib import Path
3
  import time
4
+ import csv
5
 
6
  from silero_vad.utils_vad import languages
7
 
8
+
9
+ def save_csv(file_path, rows):
10
+ with open(file_path, "w", encoding="utf-8") as f:
11
+ writer = csv.writer(f)
12
+ writer.writerows(rows)
13
+ print(f"write csv to {file_path}")
14
+
15
+ def main():
16
+ models_dir = Path("/Users/jeqin/work/code/Translator/python_server/moyoyo_asr_models")
17
+ whisper_model = 'large-v3-turbo-q5_0'
18
+ t0 = time.time()
19
+ model = Model(
20
+ model=whisper_model,
21
+ models_dir=models_dir,
22
+ print_realtime=False,
23
+ print_progress=False,
24
+ print_timestamps=False,
25
+ translate=False,
26
+ # beam_search=1,
27
+ temperature=0.,
28
+ no_context=True
29
+ )
30
+ print("load model time: ", time.time()-t0)
31
+ audios = Path("/Users/jeqin/work/code/TestTranslator/tests/test_data/test_audios/")
32
+ rows = [["file_name", "inference_time", "inference_result"]]
33
+ for audio in sorted(audios.glob("*-mix/randomforest*.wav")):
34
+ print(audio)
35
+ t1 = time.time()
36
+ output = model.transcribe(str(audio), language="zh")#, language="zh", initial_prompt="这是一段中文的会议内容。")# initial_prompt="这是一段中文的会议内容。"
37
+ t = time.time() - t1
38
+ print("inference time:", t)
39
+ text = " ".join([a.text for a in output])
40
+ print(text)
41
+ rows.append([f"{audio.parent.name}/{audio.name}", t, text])
42
+ # save_csv("csv/whisper.csv", rows)
43
+
44
+ if __name__ == '__main__':
45
+ main()
scripts/split_audio.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ import subprocess
3
+ from subprocess import CompletedProcess
4
+
5
+
6
+ def cmd(command: str, check=True, capture_output=False) -> CompletedProcess:
7
+ print(command)
8
+ if capture_output:
9
+ ret = subprocess.run(command, shell=True, check=check, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
10
+ universal_newlines=True)
11
+ else:
12
+ ret = subprocess.run(command, shell=True, check=check)
13
+ print(ret.stdout)
14
+ return ret
15
+
16
+
17
+ current = Path("/Users/jeqin/work/code/TestTranslator/tests/test_data/test_audios")
18
+ audios_5s = current/"5s"
19
+ audios_10s = current/"10s"
20
+ if not audios_5s.exists():
21
+ audios_5s.mkdir(parents=True, exist_ok=True)
22
+ if not audios_10s.exists():
23
+ audios_10s.mkdir(parents=True, exist_ok=True)
24
+ for f in sorted(current.glob("randomforest*.wav")):
25
+ file_name = f.name
26
+ print(file_name)
27
+ for i in [0, 5, 10, 15]:
28
+ new_name = f"{f.stem}-{i}.wav"
29
+ # -ac 1 -ar 16000
30
+ command=f"ffmpeg -i {f} -ss 00:00:{str(i).zfill(2)} -ac 1 -ar 16000 -t 00:00:05 {audios_5s/new_name}"
31
+ cmd(command)
32
+ for i in [0, 10, 20, 30]:
33
+ new_name = f"{f.stem}-{i}.wav"
34
+ command = f"ffmpeg -i {f} -ss 00:00:{str(i).zfill(2)} -ac 1 -ar 16000 -t 00:00:10 {audios_10s/new_name}"
35
+ cmd(command)
temp.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ text ="""
2
+ {%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0].role == 'system' %}\n {{- messages[0].content + '\\n\\n' }}\n {%- endif %}\n {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0].role == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}\n{%- for message in messages[::-1] %}\n {%- set index = (messages|length - 1) - loop.index0 %}\n {%- if ns.multi_step_tool and message.role == \"user\" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}\n {%- set ns.multi_step_tool = false %}\n {%- set ns.last_query_index = index %}\n {%- endif %}\n{%- endfor %}\n{%- for message in messages %}\n {%- if message.content is string %}\n {%- set content = message.content %}\n {%- else %}\n {%- set content = '' %}\n {%- endif %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {%- set reasoning_content = '' %}\n {%- if message.reasoning_content is string %}\n {%- set reasoning_content = message.reasoning_content %}\n {%- else %}\n {%- if '</think>' in content %}\n {%- set reasoning_content = content.split('</think>')[0].rstrip('\\n').split('<think>')[-1].lstrip('\\n') %}\n {%- set content = content.split('</think>')[-1].lstrip('\\n') %}\n {%- endif %}\n {%- endif %}\n {%- if loop.index0 > ns.last_query_index %}\n {%- if loop.last or (not loop.last and reasoning_content) %}\n {{- '<|im_start|>' + message.role + '\\n<think>\\n' + reasoning_content.strip('\\n') + '\\n</think>\\n\\n' + content.lstrip('\\n') }}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- if message.tool_calls %}\n {%- for tool_call in message.tool_calls %}\n {%- if (loop.first and content) or (not loop.first) %}\n {{- '\\n' }}\n {%- endif %}\n {%- if tool_call.function %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {%- if tool_call.arguments is string %}\n {{- tool_call.arguments }}\n {%- else %}\n {{- tool_call.arguments | tojson }}\n {%- endif %}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {%- endif %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}
3
+ """
4
+ print(text)
tests/test_accuracy_and_delay.py CHANGED
@@ -14,7 +14,7 @@ def test_accuracy_and_delay_zh2en(app, log_file, page: TranslatorPage,
14
  audio:Path,):
15
  page.start_zh2en()
16
  translation_lang = "zh2en"
17
- time.sleep(2)
18
  audio_length = get_length(audio)
19
  play_audio(audio)
20
  web_records = page.get_current_node_text(duration=audio_length)
@@ -38,7 +38,7 @@ def test_accuracy_and_delay_en2zh(app, log_file, page: TranslatorPage,
38
  audio:Path):
39
  page.start_en2zh()
40
  translation_lang = "en2zh"
41
- time.sleep(2)
42
  audio_length = get_length(audio)
43
  play_audio(audio)
44
  web_records = page.get_current_node_text(duration=audio_length)
 
14
  audio:Path,):
15
  page.start_zh2en()
16
  translation_lang = "zh2en"
17
+ time.sleep(3)
18
  audio_length = get_length(audio)
19
  play_audio(audio)
20
  web_records = page.get_current_node_text(duration=audio_length)
 
38
  audio:Path):
39
  page.start_en2zh()
40
  translation_lang = "en2zh"
41
+ time.sleep(3)
42
  audio_length = get_length(audio)
43
  play_audio(audio)
44
  web_records = page.get_current_node_text(duration=audio_length)
tests/test_data/test_audios.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b3832d0c066ab144e2cda7e37df5144922dbceb0ae2605134eada3c866b0d43
3
+ size 83025760
tests/test_data/test_audios/10s-en-ac1-16k/English-chaos-part2-0.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:111f098aa42c139e19c795fc65b14d3b1435a29d75d208592c59e98f5e43144a
3
+ size 320078
tests/test_data/test_audios/10s-en-ac1-16k/English-chaos-part2-10.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:836b29596a0c1609aa91d6d48bc3fd7c73ebda89656744d5ba5691168bebc8a7
3
+ size 320078
tests/test_data/test_audios/10s-en-ac1-16k/English-chaos-part2-20.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a2f7a0f2e768846ad361672b402bb243c30c22631286908f58a8ffb9d4361ad
3
+ size 320078
tests/test_data/test_audios/10s-en-ac1-16k/English-chaos-part2-30.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef83616e756e449f0307c93b97b8d260bc4c68e213dc878e9f0ca4a46e2a69b7
3
+ size 320078
tests/test_data/test_audios/10s-en-ac1-16k/English-computer_sicence-part1-0.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbf8ee215f2e447dff00e5d3cfee257a2945f1689c5af2fd995729f02315802d
3
+ size 320078
tests/test_data/test_audios/10s-en-ac1-16k/English-computer_sicence-part1-10.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e27e550cde48277d1239cb9d9ee40749c520c2f4d5824bfb2fb46b29a8db2fc8
3
+ size 320078
tests/test_data/test_audios/10s-en-ac1-16k/English-computer_sicence-part1-20.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69bedb0d844819919c44f5280aeb8ce20d3eee30099565bfff926aa883702a3c
3
+ size 320078
tests/test_data/test_audios/10s-en-ac1-16k/English-computer_sicence-part1-30.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:606f1b5ad35f4edeaf274fd7e54c1b32cf22e905feee795c207de9d837f9031a
3
+ size 320078
tests/test_data/test_audios/10s-en-ac1-16k/English-generative_ai-part1-0.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:171b9e830af34320b9049564390b36305fd98168a82a45d3bb93f24acb2ede29
3
+ size 320078
tests/test_data/test_audios/10s-en-ac1-16k/English-generative_ai-part1-10.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7077bd545e60a791a4f5bef34e6d52a0c580ea7f3cb767bc2808442836347ec5
3
+ size 320078
tests/test_data/test_audios/10s-en-ac1-16k/English-generative_ai-part1-20.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a85b17f66cc09d5cbdd55f17e52bc376db9d7c3668a1613b74cc60c146a8b2aa
3
+ size 320078
tests/test_data/test_audios/10s-en-ac1-16k/English-generative_ai-part1-30.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b25e9278c64ba162139f4785a0419435112653d3c7bd66bb7d6a35e7c20bd12b
3
+ size 320078
tests/test_data/test_audios/10s-en-ac1-16k/English-internet-part20-0.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2661796edf3667289ebb1772c3a3fb3d120ae7fb2e96c08899a5261b817fef49
3
+ size 320078
tests/test_data/test_audios/10s-en-ac1-16k/English-internet-part20-10.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:100272f678ee41dc71c35e1b705bfbf3aef69650562539a1390f87a8ec21a926
3
+ size 320078
tests/test_data/test_audios/10s-en-ac1-16k/English-internet-part20-20.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5110fd6774eab81a40c6c11fe5b08cb941588aa3fec0a00aa6bc951907750dec
3
+ size 320078
tests/test_data/test_audios/10s-en-ac1-16k/English-internet-part20-30.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ace6cb64e830ab31892ea4ca072051b5993bbe393ac63bb887157b6c6808bbf6
3
+ size 320078
tests/test_data/test_audios/10s-en-ac1-16k/English-legalsystem-part1-0.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b6d1ba99abc344ec4d31a3a6e34af5ab81dda139512f306bba94c5c52b71edc
3
+ size 320078
tests/test_data/test_audios/10s-en-ac1-16k/English-legalsystem-part1-10.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48e5aff409e3b83ae387dd6fd9c06f131116191161e2a844ba69febf1e8cbf3f
3
+ size 320078
tests/test_data/test_audios/10s-en-ac1-16k/English-legalsystem-part1-20.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfea77a95b17e3dace6dd5504bb5d618597619e1e64ecc91e52748e354331170
3
+ size 320078
tests/test_data/test_audios/10s-en-ac1-16k/English-legalsystem-part1-30.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0111f1d6d65692dce0b3200de36af5fa27b086401e462123baddc11f45fc8ef6
3
+ size 320078
tests/test_data/test_audios/10s-mix/qiaodan-part1-0.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:464420cd14b00b08d4240fb5ecc19b2aa053ebbf534cb0aadb8f80f7bf0da668
3
+ size 320078
tests/test_data/test_audios/10s-mix/qiaodan-part1-10.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5dbd856cf2a9543f3c133483708a6354a9ee06718b496bce03c29a981a56f8fc
3
+ size 320078
tests/test_data/test_audios/10s-mix/qiaodan-part1-20.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59c1d0e112e1fafe9ec26a02d786451c890636f83f1dc63dc36fdcec4c9526e4
3
+ size 320078
tests/test_data/test_audios/10s-mix/qiaodan-part1-30.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de45323f5741aac05143b6b995568c8e10e0bad5eadde2df9d67e0770ecc57b8
3
+ size 320078
tests/test_data/test_audios/10s-mix/qiaodan-part2-0.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:648770939935801a613bc36917df034811ae70012c72d399582b54ff54d5cae0
3
+ size 320078
tests/test_data/test_audios/10s-mix/qiaodan-part2-10.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9b3f117ccbb2bc9bab9355a3174b3fe9b39a6786ad37c0c20d577b643031aff
3
+ size 320078
tests/test_data/test_audios/10s-mix/qiaodan-part2-20.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c918e58777b7d72307089963690a72ee1e67cf733f328fa93a385c204e0e4b02
3
+ size 320078
tests/test_data/test_audios/10s-mix/qiaodan-part2-30.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85891cc64cb79a385d986915dfcfa4fbc3e0ddf12870c7beafcc76c1e012bf78
3
+ size 320078
tests/test_data/test_audios/10s-mix/randomforest-part1-0.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:802125c360c22476bad1aaabb6c5210d21460ed69884f21a3cb318ea00377345
3
+ size 320078
tests/test_data/test_audios/10s-mix/randomforest-part1-10.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1605d02574c5a50a91bda5c397079eb6f503311012ed9f15e9b9f90ee7c5f30e
3
+ size 320078
tests/test_data/test_audios/10s-mix/randomforest-part1-20.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:777b2c46d021c3c10fd0eafce8d6a17bddfc0867119d4537daaff945cf839462
3
+ size 320078
tests/test_data/test_audios/10s-mix/randomforest-part1-30.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9da19f35aab393949a408855ea7970f55ae50f13ceaa0f87e576cb1270cc019
3
+ size 320078
tests/test_data/test_audios/10s-mix/zhanghuailong-part1-0.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6799ca07ab7c8db1ebb97244bc89022b281464dc864de0267b52192db8a3e107
3
+ size 320078
tests/test_data/test_audios/10s-mix/zhanghuailong-part1-10.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8709c003dd26f01d0a140ed3c31e0dd84801984b7a50a36b0239e6b9590538f0
3
+ size 320078
tests/test_data/test_audios/10s-mix/zhanghuailong-part1-20.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c15a860a7c6994862244e49cb1fd2e19ac633e1b2c2abddedb10bed480968ed0
3
+ size 320078