yujuanqin commited on Sep 23, 2025

Commit

e4406a3

1 Parent(s): b27f71f

update scripts and test_data

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

environment.py +1 -1
scripts/audios.txt +70 -0
scripts/compare_whisper.py +92 -0
scripts/export_onnx.py +18 -0
scripts/infer_finetuned_whisper.py +157 -0
scripts/run_funasr.py +50 -0
scripts/run_funasr_c.py +39 -0
scripts/run_kokoro.py +54 -0
scripts/run_kokoro_sample.py +65 -0
scripts/run_quant.py +51 -0
scripts/run_whisper.py +39 -20
scripts/split_audio.py +35 -0
temp.py +4 -0
tests/test_accuracy_and_delay.py +2 -2
tests/test_data/test_audios.zip +3 -0
tests/test_data/test_audios/10s-en-ac1-16k/English-chaos-part2-0.wav +3 -0
tests/test_data/test_audios/10s-en-ac1-16k/English-chaos-part2-10.wav +3 -0
tests/test_data/test_audios/10s-en-ac1-16k/English-chaos-part2-20.wav +3 -0
tests/test_data/test_audios/10s-en-ac1-16k/English-chaos-part2-30.wav +3 -0
tests/test_data/test_audios/10s-en-ac1-16k/English-computer_sicence-part1-0.wav +3 -0
tests/test_data/test_audios/10s-en-ac1-16k/English-computer_sicence-part1-10.wav +3 -0
tests/test_data/test_audios/10s-en-ac1-16k/English-computer_sicence-part1-20.wav +3 -0
tests/test_data/test_audios/10s-en-ac1-16k/English-computer_sicence-part1-30.wav +3 -0
tests/test_data/test_audios/10s-en-ac1-16k/English-generative_ai-part1-0.wav +3 -0
tests/test_data/test_audios/10s-en-ac1-16k/English-generative_ai-part1-10.wav +3 -0
tests/test_data/test_audios/10s-en-ac1-16k/English-generative_ai-part1-20.wav +3 -0
tests/test_data/test_audios/10s-en-ac1-16k/English-generative_ai-part1-30.wav +3 -0
tests/test_data/test_audios/10s-en-ac1-16k/English-internet-part20-0.wav +3 -0
tests/test_data/test_audios/10s-en-ac1-16k/English-internet-part20-10.wav +3 -0
tests/test_data/test_audios/10s-en-ac1-16k/English-internet-part20-20.wav +3 -0
tests/test_data/test_audios/10s-en-ac1-16k/English-internet-part20-30.wav +3 -0
tests/test_data/test_audios/10s-en-ac1-16k/English-legalsystem-part1-0.wav +3 -0
tests/test_data/test_audios/10s-en-ac1-16k/English-legalsystem-part1-10.wav +3 -0
tests/test_data/test_audios/10s-en-ac1-16k/English-legalsystem-part1-20.wav +3 -0
tests/test_data/test_audios/10s-en-ac1-16k/English-legalsystem-part1-30.wav +3 -0
tests/test_data/test_audios/10s-mix/qiaodan-part1-0.wav +3 -0
tests/test_data/test_audios/10s-mix/qiaodan-part1-10.wav +3 -0
tests/test_data/test_audios/10s-mix/qiaodan-part1-20.wav +3 -0
tests/test_data/test_audios/10s-mix/qiaodan-part1-30.wav +3 -0
tests/test_data/test_audios/10s-mix/qiaodan-part2-0.wav +3 -0
tests/test_data/test_audios/10s-mix/qiaodan-part2-10.wav +3 -0
tests/test_data/test_audios/10s-mix/qiaodan-part2-20.wav +3 -0
tests/test_data/test_audios/10s-mix/qiaodan-part2-30.wav +3 -0
tests/test_data/test_audios/10s-mix/randomforest-part1-0.wav +3 -0
tests/test_data/test_audios/10s-mix/randomforest-part1-10.wav +3 -0
tests/test_data/test_audios/10s-mix/randomforest-part1-20.wav +3 -0
tests/test_data/test_audios/10s-mix/randomforest-part1-30.wav +3 -0
tests/test_data/test_audios/10s-mix/zhanghuailong-part1-0.wav +3 -0
tests/test_data/test_audios/10s-mix/zhanghuailong-part1-10.wav +3 -0
tests/test_data/test_audios/10s-mix/zhanghuailong-part1-20.wav +3 -0

environment.py CHANGED Viewed

@@ -26,4 +26,4 @@ class RunType(Enum):
     code = 0
     electron = 1
     dev = 2
-RUN_TYPE = RunType.dev # electron or web

     code = 0
     electron = 1
     dev = 2
+RUN_TYPE = RunType.electron # electron or web

scripts/audios.txt ADDED Viewed

	@@ -0,0 +1,70 @@

+/Users/jeqin/work/code/TestTranslator/tests/test_data/test_audios/5s-en-ac1-16k/English-chaos-part2-0.wav
+/Users/jeqin/work/code/TestTranslator/tests/test_data/test_audios/5s-en-ac1-16k/English-chaos-part2-5.wav
+/Users/jeqin/work/code/TestTranslator/tests/test_data/test_audios/5s-en-ac1-16k/English-chaos-part2-10.wav
+/Users/jeqin/work/code/TestTranslator/tests/test_data/test_audios/5s-en-ac1-16k/English-chaos-part2-15.wav
+/Users/jeqin/work/code/TestTranslator/tests/test_data/test_audios/10s-en-ac1-16k/English-chaos-part2-0.wav
+/Users/jeqin/work/code/TestTranslator/tests/test_data/test_audios/10s-en-ac1-16k/English-chaos-part2-10.wav
+/Users/jeqin/work/code/TestTranslator/tests/test_data/test_audios/10s-en-ac1-16k/English-chaos-part2-20.wav
+/Users/jeqin/work/code/TestTranslator/tests/test_data/test_audios/10s-en-ac1-16k/English-chaos-part2-30.wav
+/Users/jeqin/work/code/TestTranslator/tests/test_data/test_audios/English-chaos-part2.wav
+/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/5s/es-1-0.wav
+/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/5s/es-1-5.wav
+/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/5s/es-1-10.wav
+/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/5s/es-1-15.wav
+/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/10s/es-1-0.wav
+/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/10s/es-1-10.wav
+/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/10s/es-1-20.wav
+/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/10s/es-1-30.wav
+/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/es-1.wav
+/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/5s/fr-1-0.wav
+/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/5s/fr-1-5.wav
+/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/5s/fr-1-10.wav
+/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/5s/fr-1-15.wav
+/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/10s/fr-1-0.wav
+/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/10s/fr-1-10.wav
+/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/10s/fr-1-20.wav
+/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/10s/fr-1-30.wav
+/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/fr-1.wav
+/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/5s/hi-2-0.wav
+/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/5s/hi-2-5.wav
+/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/5s/hi-2-10.wav
+/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/5s/hi-2-15.wav
+/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/10s/hi-2-0.wav
+/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/10s/hi-2-10.wav
+/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/10s/hi-2-20.wav
+/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/10s/hi-2-30.wav
+/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/hi-2.wav
+/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/5s/it-1-0.wav
+/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/5s/it-1-5.wav
+/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/5s/it-1-10.wav
+/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/5s/it-1-15.wav
+/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/10s/it-1-0.wav
+/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/10s/it-1-10.wav
+/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/10s/it-1-20.wav
+/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/10s/it-1-30.wav
+/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/it-1.wav
+/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/5s/ja-1-0.wav
+/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/5s/ja-1-5.wav
+/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/5s/ja-1-10.wav
+/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/5s/ja-1-15.wav
+/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/10s/ja-1-0.wav
+/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/10s/ja-1-10.wav
+/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/10s/ja-1-20.wav
+/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/10s/ja-1-30.wav
+/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/ja-1.wav
+/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/5s/pt-1-0.wav
+/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/5s/pt-1-5.wav
+/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/5s/pt-1-10.wav
+/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/5s/pt-1-15.wav
+/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/10s/pt-1-0.wav
+/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/10s/pt-1-10.wav
+/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/10s/pt-1-20.wav
+/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/10s/pt-1-30.wav
+/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/pt-1.wav

scripts/compare_whisper.py ADDED Viewed

	@@ -0,0 +1,92 @@

+from pywhispercpp.model import Model
+from pathlib import Path
+import time
+import csv
+from silero_vad.utils_vad import languages
+def save_csv(file_path, rows):
+    with open(file_path, "w", encoding="utf-8") as f:
+        writer = csv.writer(f)
+        writer.writerows(rows)
+        print(f"write csv to {file_path}")
+def run_audios_after_vad(models_dir, audio_dir, model_name):
+    t0 = time.time()
+    model = Model(
+                model=model_name,
+                models_dir=models_dir,
+                print_realtime=False,
+                print_progress=False,
+                print_timestamps=False,
+                translate=False,
+                # beam_search=1,
+                temperature=0.,
+                no_context=True
+            )
+    print("load model time: ", time.time()-t0)
+    rows = [["lang", "file_name", "inference_time", "python_res", "intel_res"]]
+    for lang in ["es", "fr", "hi", "it", "ja", "pt"]:
+        print("*" * 10, lang, "*"*10)
+        for audio in sorted(list((audio_dir/lang).glob("*.wav"))):
+            print("Audio name:", audio.name)
+            t1 = time.time()
+            output = model.transcribe(str(audio), language=lang)
+            t = time.time() - t1
+            print("Inference time:", t)
+            # print(output)
+            text = " ".join([a.text for a in output])
+            print("Text from Python:", text)
+            try:
+                with open(audio.with_suffix(".txt"), encoding="utf-8") as f:
+                    intel_text = f.read().strip()
+            except Exception as e:
+                intel_text = ""
+                print(f"Error reading Intel text for {audio.name}: {e}")
+            print("Text from Intel :", intel_text)
+            rows.append([lang, audio.name, t, text, intel_text])
+    save_csv("csv/compare_whisper_intel.csv", rows)
+def run_long_audios(models_dir, audios_list, model_name):
+    t0 = time.time()
+    model = Model(
+        model=model_name,
+        models_dir=models_dir,
+        print_realtime=False,
+        print_progress=False,
+        print_timestamps=False,
+        translate=False,
+        # beam_search=1,
+        temperature=0.,
+        no_context=True
+    )
+    print("load model time: ", time.time() - t0)
+    rows = [["file_name", "inference_time", "res_text"]]
+    audios = audios_list.read_text().splitlines()
+    for audio in audios:
+        if not audio:
+            rows.append([])
+            continue
+        lang = Path(audio).name.split('-')[0]
+        if lang not in ["es", "fr", "hi", "it", "ja", "pt"]:
+            lang = "en"
+        print(f"Audio file: {audio}, lang: {lang}")
+        t1 = time.time()
+        output = model.transcribe(str(audio), language=lang)
+        t = time.time() - t1
+        print("Inference time:", t)
+        # print(output)
+        text = " ".join([a.text for a in output])
+        print("Text:", text)
+        rows.append([audio, t, text])
+    save_csv("csv/compare_whisper.csv", rows)
+if __name__ == '__main__':
+    models_dir = Path("/Users/jeqin/work/code/Translator/python_server/moyoyo_asr_models")
+    # model_name = "large-v3-turbo-q5_0"
+    model_name = "large-v3-turbo-q8_0"
+    # model_name = "small-q8_0"
+    # audios_after_vad = Path("/Users/jeqin/work/test/test_yoyotranslator/audios_after_vad/audio2-with-noise")
+    audios_list = Path("/Users/jeqin/work/code/TestTranslator/scripts/audios.txt")
+    # run_audios_after_vad(models_dir, audios_after_vad, model_name)
+    run_long_audios(models_dir, audios_list, model_name)

scripts/export_onnx.py ADDED Viewed

	@@ -0,0 +1,18 @@

+from funasr import AutoModel
+model_dir = "/Users/moyoyo/code/Translator/moyoyo_asr_models"
+asr_model_path = model_dir + '/speech_seaco_paraformer_large_asr_nat-zh-cn-16k-common-vocab8404-pytorch'
+vad_model_path = model_dir + '/speech_fsmn_vad_zh-cn-16k-common-pytorch'
+punc_model_path = model_dir + '/punc_ct-transformer_cn-en-common-vocab471067-large'
+model = AutoModel(model=asr_model_path)
+model_dir = model.export(type="onnx", quantize=True, disable_update=True)
+print(model_dir)
+model = AutoModel(model=vad_model_path)
+model_dir = model.export(type="onnx", quantize=True, disable_update=True)
+print(model_dir)
+model = AutoModel(model=punc_model_path)
+model_dir = model.export(type="onnx", quantize=True, disable_update=True)
+print(model_dir)

scripts/infer_finetuned_whisper.py ADDED Viewed

	@@ -0,0 +1,157 @@

+import argparse
+import os
+import time
+from pathlib import Path
+import csv
+import numpy as np
+import torch
+import librosa
+from transformers import WhisperForConditionalGeneration, WhisperProcessor
+def save_csv(file_path, rows):
+    with open(file_path, "w", encoding="utf-8") as f:
+        writer = csv.writer(f)
+        writer.writerows(rows)
+        print(f"write csv to {file_path}")
+def load_audio(audio_path: str, sr: int = 16000):
+    # 读取音频并转成 16k 单声道 numpy float32
+    audio, _ = librosa.load(audio_path, sr=sr, mono=True)
+    return audio
+def transcribe_file(
+        audio_path: str,
+        model,
+        processor,
+        language: str = "Chinese",
+        task: str = "transcribe",
+        timestamps: bool = False,
+        max_new_tokens: int = 255,
+):
+    # 准备特征
+    audio = load_audio(audio_path, sr=16000)
+    inputs = processor(audio, sampling_rate=16000, return_tensors="pt")
+    # 放到设备
+    device = next(model.parameters()).device
+    input_features = inputs["input_features"].to(device)
+    # 生成
+    with torch.inference_mode(), torch.autocast(device_type="cuda", enabled=(device.type == "cuda")):
+        generated_ids = model.generate(
+            input_features=input_features,
+            max_new_tokens=max_new_tokens,
+            return_timestamps=timestamps,  # 仅部分版本支持；不支持时自动忽略
+        )
+    # 解码
+    text = processor.tokenizer.batch_decode(generated_ids.cpu().numpy(), skip_special_tokens=True)
+    return text[0]
+def main():
+    parser = argparse.ArgumentParser("Simple Whisper Inference")
+    parser.add_argument("--model_path", type=str, default="whisper-large-v3-turbo-finetune",
+                        help="本地合并模型路径或HF模型名")
+    parser.add_argument("--input", type=str, required=True,
+                        help="音频文件路径，或目录（将批量处理其中的音频）")
+    parser.add_argument("--language", type=str, default="Chinese",
+                        help="语言（如 Chinese / English / zh / en）")
+    parser.add_argument("--task", type=str, default="transcribe", choices=["transcribe", "translate"],
+                        help="任务：转写或翻译")
+    parser.add_argument("--timestamps", action="store_true", help="是否返回时间戳（若模型与版本支持）")
+    parser.add_argument("--local_files_only", action="store_true", help="仅本地加载，不联网")
+    parser.add_argument("--batch_exts", type=str, default=".wav,.mp3,.flac,.m4a",
+                        help="当 --input 是目录时，处理这些后缀的文件，逗号分隔")
+    args = parser.parse_args()
+    # 加载处理器 & 模型
+    processor = WhisperProcessor.from_pretrained(
+        args.model_path,
+        language=args.language,
+        task=args.task,
+        no_timestamps=not args.timestamps,
+        local_files_only=args.local_files_only,
+    )
+    model = WhisperForConditionalGeneration.from_pretrained(
+        args.model_path,
+        device_map="auto",
+        local_files_only=args.local_files_only,
+        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+    )
+    model.generation_config.language = args.language.lower()
+    model.generation_config.forced_decoder_ids = None
+    model.eval()
+    path = Path(args.input)
+    if path.is_file():
+        text = transcribe_file(
+            str(path), model, processor,
+            language=args.language, task=args.task, timestamps=args.timestamps
+        )
+        print(f"{path.name} -> {text}")
+    else:
+        # 目录批量
+        exts = {e.strip().lower() for e in args.batch_exts.split(",")}
+        files = [p for p in path.rglob("*") if p.suffix.lower() in exts]
+        if not files:
+            print("目录中未找到可处理的音频文件。")
+            return
+        for p in sorted(files):
+            try:
+                t0 = time.time()
+                text = transcribe_file(
+                    str(p), model, processor,
+                    language=args.language, task=args.task, timestamps=args.timestamps
+                )
+                t1 = time.time()
+                print(f"{p.name} -> {text}; time cost: {t1-t0}")
+            except Exception as e:
+                print(f"{p.name} -> 失败: {e}")
+def run():
+    model_path = "/Users/jeqin/Downloads/whisper-large-v3-turbo-finetune-0901"
+    lang = "en"
+    t0 = time.time()
+    processor = WhisperProcessor.from_pretrained(
+        model_path,
+        language=lang,
+        task="transcribe",
+        no_timestamps=True,
+        local_files_only=True,
+    )
+    model = WhisperForConditionalGeneration.from_pretrained(
+        model_path,
+        device_map="mps",
+        local_files_only=True,
+        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+    )
+    model.generation_config.language = lang.lower()
+    model.generation_config.forced_decoder_ids = None
+    model.eval()
+    print("load model time: ", time.time() - t0)
+    audios = Path("/Users/jeqin/work/code/TestTranslator/tests/test_data/test_audios/")
+    rows = [["file_name", "inference_time", "inference_result"]]
+    for audio in sorted(audios.glob("*en-ac1-16k/*.wav")): # *s/randomforest*.wav"
+        try:
+            t0 = time.time()
+            text = transcribe_file(
+                str(audio), model, processor
+            )
+            t = time.time()-t0
+            print(f"{audio.name} -> {text}; time cost: {t}")
+            rows.append([f"{audio.parent.name}/{audio.name}", t, text])
+        except Exception as e:
+            print(f"{audio.name} -> 失败: {e}")
+    save_csv("csv/fine-tune_whisper-0901.csv", rows)
+if __name__ == "__main__":
+    # main()
+    run()

scripts/run_funasr.py ADDED Viewed

	@@ -0,0 +1,50 @@

+from funasr import AutoModel
+from pathlib import Path
+import time
+import csv
+def save_csv(file_path, rows):
+    with open(file_path, "w", encoding="utf-8") as f:
+        writer = csv.writer(f)
+        writer.writerows(rows)
+        print(f"write csv to {file_path}")
+def main():
+    model_dir = Path("/Users/jeqin/work/code/Translator/python_server/moyoyo_asr_models")
+    asr_model_path = model_dir / 'speech_seaco_paraformer_large_asr_nat-zh-cn-16k-common-vocab8404-pytorch'
+    vad_model_path = model_dir / 'speech_fsmn_vad_zh-cn-16k-common-pytorch'
+    punc_model_path = model_dir / 'punc_ct-transformer_cn-en-common-vocab471067-large'
+    t0 = time.time()
+    model = AutoModel(
+        model=asr_model_path.as_posix(),
+        vad_model=vad_model_path.as_posix(),
+        punc_model=punc_model_path.as_posix(),
+        log_level="ERROR",
+        disable_update=True
+    )
+    t1 = time.time()
+    print("load model: ", t1 - t0)
+    audios = Path("/Users/jeqin/work/code/TestTranslator/tests/test_data/test_audios/")
+    rows = [["file_name", "inference_time", "inference_result"]]
+    for audio in sorted(audios.glob("*ac1-16k/Chinese*")):
+        print(audio)
+        t1 = time.time()
+        try:
+            result = model.generate(input=str(audio), disable_pbar=True,
+                                hotword="")
+        except Exception as e:
+            print(audio)
+            print(e)
+        t2 = time.time()
+        t = t2-t1
+        print("inference time:", t)
+        text = result[0]["text"]
+        print("inference result", text)
+        rows.append([f"{audio.parent.name}/{audio.name}", t, text])
+    save_csv(f"csv/funasr.csv", rows)
+if __name__ == '__main__':
+    main()

scripts/run_funasr_c.py ADDED Viewed

	@@ -0,0 +1,39 @@

+import sys
+import time
+from pathlib import Path
+import csv
+sys.path.append('/Users/jeqin/work/code/funasr_wrapper/build')  # 添加编译生成的模块路径
+sys.path.append('/Users/jeqin/work/code/funasr_wrapper/build/src')  # 添加编译生成的模块路径
+import funasr_py
+def save_csv(file_path, rows):
+    with open(file_path, "w", encoding="utf-8") as f:
+        writer = csv.writer(f)
+        writer.writerows(rows)
+        print(f"write csv to {file_path}")
+def main():
+    t0 = time.time()
+    config_file = "/Users/jeqin/work/code/funasr_wrapper/testpy/config.json"
+    asr = funasr_py.FunasrEasy(config_file)
+    # 初始化模型
+    asr.init()
+    t1 = time.time()
+    print("Initializing model: ", t1-t0)
+    audios = Path("/Users/jeqin/work/code/TestTranslator/tests/test_data/test_audios/")
+    rows = [["file_name", "inference_time", "inference_result"]]
+    for audio in sorted(audios.glob("*s-ac1/Chinese*")):
+        print(audio)
+        t1 = time.time()
+        result = asr.infer(str(audio))
+        text = asr.get_text(result)
+        asr.free_result(result)
+        t = time.time() - t1
+        print("inference time:", t)
+        print(text)
+        rows.append([f"{audio.parent.name}/{audio.name}", t, text])
+    save_csv("csv/funasr_c.csv", rows)
+if __name__ == '__main__':
+    main()

scripts/run_kokoro.py ADDED Viewed

	@@ -0,0 +1,54 @@

+from pathlib import Path
+from time import time
+import os
+import soundfile as sf
+from misaki import zh
+import onnxruntime
+from kokoro_onnx import Kokoro
+# providers = onnxruntime.get_available_providers()
+# print(f"Available onnx runtime providers: {providers}")
+def create_session(model_path):
+    # See list of providers https://github.com/microsoft/onnxruntime/issues/22101#issuecomment-2357667377
+    providers = onnxruntime.get_available_providers()
+    providers = providers[1:2]
+    print(f"Available onnx runtime providers: {providers}")
+    # See session options https://onnxruntime.ai/docs/performance/tune-performance/threading.html#thread-management
+    sess_options = onnxruntime.SessionOptions()
+    cpu_count = os.cpu_count() // 2
+    print(f"Setting threads to CPU cores count: {cpu_count}")
+    # sess_options.intra_op_num_threads = cpu_count
+    session = onnxruntime.InferenceSession(
+        model_path, providers=providers, sess_options=sess_options
+    )
+    return session
+model_folder = Path("/Users/jeqin/work/code/Translator/python_server/moyoyo_asr_models/kokoro")
+model_path = str(model_folder/"kokoro-quant.onnx")
+voice_model_path = str(model_folder/"voices-v1.0.bin")
+vocab_config = str(model_folder/"zh_config.json")
+texts = [
+    "千里之行，始于足下。",
+    "我想听你唱首歌",
+    "窗前明月光，疑是地上霜。举头望明月，低头思故乡。"
+]
+voice = "zf_xiaoyi"
+session = create_session(model_path)
+model = Kokoro.from_session(session, voice_model_path, vocab_config=vocab_config)
+g2p = zh.ZHG2P()
+for i in range(5):
+    for index, text in enumerate(texts):
+        phonemes, _ = g2p(text)
+        start = time()
+        samples, sample_rate = model.create(phonemes, voice=voice, speed=1.0, is_phonemes=True)
+        end = time()
+        time_cost = end - start
+        print(f"time cost: {time_cost} for text: {text}")
+        sf.write(f"audio_{index}.wav", samples, sample_rate)
+        print(f"Created audio_{index}.wav")

scripts/run_kokoro_sample.py ADDED Viewed

	@@ -0,0 +1,65 @@

+from time import time
+import soundfile as sf
+from misaki import en, espeak, zh
+from kokoro_onnx import Kokoro
+def run_en():
+    # Misaki G2P with espeak-ng fallback
+    fallback = espeak.EspeakFallback(british=False)
+    g2p = en.G2P(trf=False, british=False, fallback=fallback)
+    models = "/Users/jeqin/work/code/TestTranslator/scripts/kokoro_models/"
+    # Kokoro
+    kokoro = Kokoro(f"{models}kokoro-v1.0.onnx", f"{models}voices-v1.0.bin")
+    texts = [
+        "[Misaki](/misˈɑki/) is a G2P engine designed for [Kokoro](/kˈOkəɹO/) models.",
+        "For example, the geology and terrain along the railway line.",
+        " When choosing solid-state drives, we sometimes see reviews or videos discussing whether a particular solid-state drive has a caching scheme or an uncaching scheme in the performance testing section."
+    ]
+    for index, text in enumerate(texts):
+        # Phonemize
+        # text = "[Misaki](/misˈɑki/) is a G2P engine designed for [Kokoro](/kˈOkəɹO/) models."
+        phonemes, _ = g2p(text)
+        # Create
+        start = time()
+        samples, sample_rate = kokoro.create(phonemes, "af_heart", is_phonemes=True)
+        end = time()
+        time_cost = end - start
+        print(f"time cost: {time_cost} for text: {text}")
+        # Save
+        sf.write(f"audio{index}.wav", samples, sample_rate)
+        print(f"Created audio{index}.wav")
+def run_zh():
+    # Misaki G2P with espeak-ng fallback
+    # fallback = espeak.EspeakFallback(british=False)
+    g2p = zh.ZHG2P()
+    models = "/Users/jeqin/work/code/Translator/python_server/moyoyo_asr_models/kokoro"
+    # Kokoro
+    kokoro = Kokoro(f"{models}/kokoro-quant.onnx", f"{models}/voices-v1.0.bin", vocab_config=f"{models}/zh_config.json")
+    texts = [
+        "千里之行，始于足下。",
+        "我想听你唱首歌",
+        "窗前明月光，疑是地上霜。举头望明月，低头思故乡。"
+    ]
+    for index, text in enumerate(texts):
+        phonemes, _ = g2p(text)
+        # Create
+        start = time()
+        samples, sample_rate = kokoro.create(phonemes, "zf_xiaoyi", is_phonemes=True, speed=1.0)
+        end = time()
+        time_cost = end - start
+        print(f"time cost: {time_cost} for text: {text}")
+        # Save
+        sf.write(f"audio{index}.wav", samples, sample_rate)
+        print(f"Created audio{index}.wav")
+if __name__ == '__main__':
+    run_zh()

scripts/run_quant.py ADDED Viewed

	@@ -0,0 +1,51 @@

+from pathlib import Path
+import time
+import csv
+from funasr_onnx import SeacoParaformer, CT_Transformer, Fsmn_vad
+def save_csv(file_path, rows):
+    with open(file_path, "w", encoding="utf-8") as f:
+        writer = csv.writer(f)
+        writer.writerows(rows)
+        print(f"write csv to {file_path}")
+def main():
+    model_dir = Path("/Users/jeqin/work/code/Translator/python_server/moyoyo_asr_models")
+    asr_model_path = model_dir / 'speech_seaco_paraformer_large_asr_nat-zh-cn-16k-common-vocab8404-pytorch'
+    vad_model_path = model_dir / 'speech_fsmn_vad_zh-cn-16k-common-pytorch'
+    punc_model_path = model_dir / 'punc_ct-transformer_cn-en-common-vocab471067-large'
+    t0 = time.time()
+    quantize = True
+    vad_model = Fsmn_vad(vad_model_path, quantize=quantize)
+    asr_model = SeacoParaformer(asr_model_path, quantize=quantize)
+    punc_model = CT_Transformer(punc_model_path, quantize=quantize)
+    t1 = time.time()
+    print("load model time:", t1 - t0)
+    audios = Path("/Users/jeqin/work/code/TestTranslator/tests/test_data/test_audios/")
+    rows = [["file_name", "inference_time", "inference_result"]]
+    for audio in sorted(audios.glob("*s/randomforest*.wav")):
+        t1 = time.time()
+        vad_res = vad_model(str(audio))
+        t2 = time.time()
+        print("vad time:", t2-t1)
+        asr_res = asr_model(str(audio), hotwords="")
+        asr_text = asr_res[0]["preds"]
+        t3 = time.time()
+        print("asr time:", t3-t2)
+        print("asr text:", asr_text)
+        result = punc_model(asr_text)
+        text = result[0]
+        t4 = time.time()
+        print("punc time:", t4-t3)
+        print("punc text:", text)
+        # print(text)
+        # vad_res = vad_model(str(audio))
+        # t5 = time.time()
+        # print("vad time:", t5 - t4)
+        t = t4-t1
+        print("inference:", t)
+        rows.append([f"{audio.parent.name}/{audio.name}", t, text])
+    file_name = "csv/quant.csv" if quantize else "run_onnx.csv"
+    save_csv(file_name, rows)
+if __name__ == '__main__':
+    main()

scripts/run_whisper.py CHANGED Viewed

@@ -1,26 +1,45 @@
 from pywhispercpp.model import Model
 from pathlib import Path
 import time
 from silero_vad.utils_vad import languages
-models_dir = Path("/Users/jeqin/work/code/Translator/moyoyo_asr_models")
-whisper_model = 'large-v3-turbo-q5_0'
-model = Model(
-            model=whisper_model,
-            models_dir=models_dir,
-            print_realtime=False,
-            print_progress=False,
-            print_timestamps=False,
-            translate=False,
-            # beam_search=1,
-            temperature=0.,
-            no_context=True
-        )
-audios = Path("/Users/jeqin/work/code/TestTranslator/tests/test_data/test_audios")
-for audio in sorted(audios.glob("English*")):
-    print(audio)
-    t1 = time.time()
-    output = model.transcribe(str(audio), language="en")
-    print("inference time:", time.time()-t1)
-    print(" ".join([a.text for a in output]))

 from pywhispercpp.model import Model
 from pathlib import Path
 import time
+import csv
 from silero_vad.utils_vad import languages
+def save_csv(file_path, rows):
+    with open(file_path, "w", encoding="utf-8") as f:
+        writer = csv.writer(f)
+        writer.writerows(rows)
+        print(f"write csv to {file_path}")
+def main():
+    models_dir = Path("/Users/jeqin/work/code/Translator/python_server/moyoyo_asr_models")
+    whisper_model = 'large-v3-turbo-q5_0'
+    t0 = time.time()
+    model = Model(
+                model=whisper_model,
+                models_dir=models_dir,
+                print_realtime=False,
+                print_progress=False,
+                print_timestamps=False,
+                translate=False,
+                # beam_search=1,
+                temperature=0.,
+                no_context=True
+            )
+    print("load model time: ", time.time()-t0)
+    audios = Path("/Users/jeqin/work/code/TestTranslator/tests/test_data/test_audios/")
+    rows = [["file_name", "inference_time", "inference_result"]]
+    for audio in sorted(audios.glob("*-mix/randomforest*.wav")):
+        print(audio)
+        t1 = time.time()
+        output = model.transcribe(str(audio), language="zh")#, language="zh", initial_prompt="这是一段中文的会议内容。")# initial_prompt="这是一段中文的会议内容。"
+        t = time.time() - t1
+        print("inference time:", t)
+        text = " ".join([a.text for a in output])
+        print(text)
+        rows.append([f"{audio.parent.name}/{audio.name}", t, text])
+    # save_csv("csv/whisper.csv", rows)
+if __name__ == '__main__':
+    main()

scripts/split_audio.py ADDED Viewed

	@@ -0,0 +1,35 @@

+from pathlib import Path
+import subprocess
+from subprocess import CompletedProcess
+def cmd(command: str, check=True, capture_output=False) -> CompletedProcess:
+    print(command)
+    if capture_output:
+        ret = subprocess.run(command, shell=True, check=check, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+                             universal_newlines=True)
+    else:
+        ret = subprocess.run(command, shell=True, check=check)
+    print(ret.stdout)
+    return ret
+current = Path("/Users/jeqin/work/code/TestTranslator/tests/test_data/test_audios")
+audios_5s = current/"5s"
+audios_10s = current/"10s"
+if not audios_5s.exists():
+    audios_5s.mkdir(parents=True, exist_ok=True)
+if not audios_10s.exists():
+    audios_10s.mkdir(parents=True, exist_ok=True)
+for f in sorted(current.glob("randomforest*.wav")):
+    file_name = f.name
+    print(file_name)
+    for i in [0, 5, 10, 15]:
+        new_name = f"{f.stem}-{i}.wav"
+        # -ac 1 -ar 16000
+        command=f"ffmpeg -i {f} -ss 00:00:{str(i).zfill(2)} -ac 1 -ar 16000 -t 00:00:05 {audios_5s/new_name}"
+        cmd(command)
+    for i in [0, 10, 20, 30]:
+        new_name = f"{f.stem}-{i}.wav"
+        command = f"ffmpeg -i {f} -ss 00:00:{str(i).zfill(2)} -ac 1 -ar 16000 -t 00:00:10 {audios_10s/new_name}"
+        cmd(command)

temp.py ADDED Viewed

	@@ -0,0 +1,4 @@

+text ="""
+{%- if tools %}\n    {{- '<|im_start|>system\\n' }}\n    {%- if messages[0].role == 'system' %}\n        {{- messages[0].content + '\\n\\n' }}\n    {%- endif %}\n    {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n    {%- for tool in tools %}\n        {{- \"\\n\" }}\n        {{- tool | tojson }}\n    {%- endfor %}\n    {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n    {%- if messages[0].role == 'system' %}\n        {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }}\n    {%- endif %}\n{%- endif %}\n{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}\n{%- for message in messages[::-1] %}\n    {%- set index = (messages|length - 1) - loop.index0 %}\n    {%- if ns.multi_step_tool and message.role == \"user\" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}\n        {%- set ns.multi_step_tool = false %}\n        {%- set ns.last_query_index = index %}\n    {%- endif %}\n{%- endfor %}\n{%- for message in messages %}\n    {%- if message.content is string %}\n        {%- set content = message.content %}\n    {%- else %}\n        {%- set content = '' %}\n    {%- endif %}\n    {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n        {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>' + '\\n' }}\n    {%- elif message.role == \"assistant\" %}\n        {%- set reasoning_content = '' %}\n        {%- if message.reasoning_content is string %}\n            {%- set reasoning_content = message.reasoning_content %}\n        {%- else %}\n            {%- if '</think>' in content %}\n                {%- set reasoning_content = content.split('</think>')[0].rstrip('\\n').split('<think>')[-1].lstrip('\\n') %}\n                {%- set content = content.split('</think>')[-1].lstrip('\\n') %}\n            {%- endif %}\n        {%- endif %}\n        {%- if loop.index0 > ns.last_query_index %}\n            {%- if loop.last or (not loop.last and reasoning_content) %}\n                {{- '<|im_start|>' + message.role + '\\n<think>\\n' + reasoning_content.strip('\\n') + '\\n</think>\\n\\n' + content.lstrip('\\n') }}\n            {%- else %}\n                {{- '<|im_start|>' + message.role + '\\n' + content }}\n            {%- endif %}\n        {%- else %}\n            {{- '<|im_start|>' + message.role + '\\n' + content }}\n        {%- endif %}\n        {%- if message.tool_calls %}\n            {%- for tool_call in message.tool_calls %}\n                {%- if (loop.first and content) or (not loop.first) %}\n                    {{- '\\n' }}\n                {%- endif %}\n                {%- if tool_call.function %}\n                    {%- set tool_call = tool_call.function %}\n                {%- endif %}\n                {{- '<tool_call>\\n{\"name\": \"' }}\n                {{- tool_call.name }}\n                {{- '\", \"arguments\": ' }}\n                {%- if tool_call.arguments is string %}\n                    {{- tool_call.arguments }}\n                {%- else %}\n                    {{- tool_call.arguments | tojson }}\n                {%- endif %}\n                {{- '}\\n</tool_call>' }}\n            {%- endfor %}\n        {%- endif %}\n        {{- '<|im_end|>\\n' }}\n    {%- elif message.role == \"tool\" %}\n        {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n            {{- '<|im_start|>user' }}\n        {%- endif %}\n        {{- '\\n<tool_response>\\n' }}\n        {{- content }}\n        {{- '\\n</tool_response>' }}\n        {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n            {{- '<|im_end|>\\n' }}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|im_start|>assistant\\n' }}\n{%- endif %}
+"""
+print(text)

tests/test_accuracy_and_delay.py CHANGED Viewed

@@ -14,7 +14,7 @@ def test_accuracy_and_delay_zh2en(app, log_file, page: TranslatorPage,
                                   audio:Path,):
     page.start_zh2en()
     translation_lang = "zh2en"
-    time.sleep(2)
     audio_length = get_length(audio)
     play_audio(audio)
     web_records = page.get_current_node_text(duration=audio_length)
@@ -38,7 +38,7 @@ def test_accuracy_and_delay_en2zh(app, log_file, page: TranslatorPage,
                                   audio:Path):
     page.start_en2zh()
     translation_lang = "en2zh"
-    time.sleep(2)
     audio_length = get_length(audio)
     play_audio(audio)
     web_records = page.get_current_node_text(duration=audio_length)

                                   audio:Path,):
     page.start_zh2en()
     translation_lang = "zh2en"
+    time.sleep(3)
     audio_length = get_length(audio)
     play_audio(audio)
     web_records = page.get_current_node_text(duration=audio_length)
                                   audio:Path):
     page.start_en2zh()
     translation_lang = "en2zh"
+    time.sleep(3)
     audio_length = get_length(audio)
     play_audio(audio)
     web_records = page.get_current_node_text(duration=audio_length)

tests/test_data/test_audios.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4b3832d0c066ab144e2cda7e37df5144922dbceb0ae2605134eada3c866b0d43
+size 83025760

tests/test_data/test_audios/10s-en-ac1-16k/English-chaos-part2-0.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:111f098aa42c139e19c795fc65b14d3b1435a29d75d208592c59e98f5e43144a
+size 320078

tests/test_data/test_audios/10s-en-ac1-16k/English-chaos-part2-10.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:836b29596a0c1609aa91d6d48bc3fd7c73ebda89656744d5ba5691168bebc8a7
+size 320078

tests/test_data/test_audios/10s-en-ac1-16k/English-chaos-part2-20.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8a2f7a0f2e768846ad361672b402bb243c30c22631286908f58a8ffb9d4361ad
+size 320078

tests/test_data/test_audios/10s-en-ac1-16k/English-chaos-part2-30.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ef83616e756e449f0307c93b97b8d260bc4c68e213dc878e9f0ca4a46e2a69b7
+size 320078

tests/test_data/test_audios/10s-en-ac1-16k/English-computer_sicence-part1-0.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dbf8ee215f2e447dff00e5d3cfee257a2945f1689c5af2fd995729f02315802d
+size 320078

tests/test_data/test_audios/10s-en-ac1-16k/English-computer_sicence-part1-10.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e27e550cde48277d1239cb9d9ee40749c520c2f4d5824bfb2fb46b29a8db2fc8
+size 320078

tests/test_data/test_audios/10s-en-ac1-16k/English-computer_sicence-part1-20.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:69bedb0d844819919c44f5280aeb8ce20d3eee30099565bfff926aa883702a3c
+size 320078

tests/test_data/test_audios/10s-en-ac1-16k/English-computer_sicence-part1-30.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:606f1b5ad35f4edeaf274fd7e54c1b32cf22e905feee795c207de9d837f9031a
+size 320078

tests/test_data/test_audios/10s-en-ac1-16k/English-generative_ai-part1-0.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:171b9e830af34320b9049564390b36305fd98168a82a45d3bb93f24acb2ede29
+size 320078

tests/test_data/test_audios/10s-en-ac1-16k/English-generative_ai-part1-10.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7077bd545e60a791a4f5bef34e6d52a0c580ea7f3cb767bc2808442836347ec5
+size 320078

tests/test_data/test_audios/10s-en-ac1-16k/English-generative_ai-part1-20.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a85b17f66cc09d5cbdd55f17e52bc376db9d7c3668a1613b74cc60c146a8b2aa
+size 320078

tests/test_data/test_audios/10s-en-ac1-16k/English-generative_ai-part1-30.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b25e9278c64ba162139f4785a0419435112653d3c7bd66bb7d6a35e7c20bd12b
+size 320078

tests/test_data/test_audios/10s-en-ac1-16k/English-internet-part20-0.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2661796edf3667289ebb1772c3a3fb3d120ae7fb2e96c08899a5261b817fef49
+size 320078

tests/test_data/test_audios/10s-en-ac1-16k/English-internet-part20-10.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:100272f678ee41dc71c35e1b705bfbf3aef69650562539a1390f87a8ec21a926
+size 320078

tests/test_data/test_audios/10s-en-ac1-16k/English-internet-part20-20.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5110fd6774eab81a40c6c11fe5b08cb941588aa3fec0a00aa6bc951907750dec
+size 320078

tests/test_data/test_audios/10s-en-ac1-16k/English-internet-part20-30.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ace6cb64e830ab31892ea4ca072051b5993bbe393ac63bb887157b6c6808bbf6
+size 320078

tests/test_data/test_audios/10s-en-ac1-16k/English-legalsystem-part1-0.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2b6d1ba99abc344ec4d31a3a6e34af5ab81dda139512f306bba94c5c52b71edc
+size 320078

tests/test_data/test_audios/10s-en-ac1-16k/English-legalsystem-part1-10.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:48e5aff409e3b83ae387dd6fd9c06f131116191161e2a844ba69febf1e8cbf3f
+size 320078

tests/test_data/test_audios/10s-en-ac1-16k/English-legalsystem-part1-20.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bfea77a95b17e3dace6dd5504bb5d618597619e1e64ecc91e52748e354331170
+size 320078

tests/test_data/test_audios/10s-en-ac1-16k/English-legalsystem-part1-30.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0111f1d6d65692dce0b3200de36af5fa27b086401e462123baddc11f45fc8ef6
+size 320078

tests/test_data/test_audios/10s-mix/qiaodan-part1-0.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:464420cd14b00b08d4240fb5ecc19b2aa053ebbf534cb0aadb8f80f7bf0da668
+size 320078

tests/test_data/test_audios/10s-mix/qiaodan-part1-10.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5dbd856cf2a9543f3c133483708a6354a9ee06718b496bce03c29a981a56f8fc
+size 320078

tests/test_data/test_audios/10s-mix/qiaodan-part1-20.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:59c1d0e112e1fafe9ec26a02d786451c890636f83f1dc63dc36fdcec4c9526e4
+size 320078

tests/test_data/test_audios/10s-mix/qiaodan-part1-30.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:de45323f5741aac05143b6b995568c8e10e0bad5eadde2df9d67e0770ecc57b8
+size 320078

tests/test_data/test_audios/10s-mix/qiaodan-part2-0.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:648770939935801a613bc36917df034811ae70012c72d399582b54ff54d5cae0
+size 320078

tests/test_data/test_audios/10s-mix/qiaodan-part2-10.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f9b3f117ccbb2bc9bab9355a3174b3fe9b39a6786ad37c0c20d577b643031aff
+size 320078

tests/test_data/test_audios/10s-mix/qiaodan-part2-20.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c918e58777b7d72307089963690a72ee1e67cf733f328fa93a385c204e0e4b02
+size 320078

tests/test_data/test_audios/10s-mix/qiaodan-part2-30.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:85891cc64cb79a385d986915dfcfa4fbc3e0ddf12870c7beafcc76c1e012bf78
+size 320078

tests/test_data/test_audios/10s-mix/randomforest-part1-0.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:802125c360c22476bad1aaabb6c5210d21460ed69884f21a3cb318ea00377345
+size 320078

tests/test_data/test_audios/10s-mix/randomforest-part1-10.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1605d02574c5a50a91bda5c397079eb6f503311012ed9f15e9b9f90ee7c5f30e
+size 320078

tests/test_data/test_audios/10s-mix/randomforest-part1-20.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:777b2c46d021c3c10fd0eafce8d6a17bddfc0867119d4537daaff945cf839462
+size 320078

tests/test_data/test_audios/10s-mix/randomforest-part1-30.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d9da19f35aab393949a408855ea7970f55ae50f13ceaa0f87e576cb1270cc019
+size 320078

tests/test_data/test_audios/10s-mix/zhanghuailong-part1-0.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6799ca07ab7c8db1ebb97244bc89022b281464dc864de0267b52192db8a3e107
+size 320078

tests/test_data/test_audios/10s-mix/zhanghuailong-part1-10.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8709c003dd26f01d0a140ed3c31e0dd84801984b7a50a36b0239e6b9590538f0
+size 320078

tests/test_data/test_audios/10s-mix/zhanghuailong-part1-20.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c15a860a7c6994862244e49cb1fd2e19ac633e1b2c2abddedb10bed480968ed0
+size 320078