support test_models on Intel
Browse files- environment.py +1 -1
- lib/models/intel/__init__.py +15 -0
- lib/models/intel/funasr.py +29 -0
- lib/models/intel/kokoro.py +111 -0
- lib/models/intel/llm.py +30 -0
- lib/models/intel/whisper.py +30 -0
- lib/models/mac/__init__.py +0 -0
- lib/models/{funasr.py → mac/funasr.py} +2 -2
- lib/models/{kokoro.py → mac/kokoro.py} +0 -2
- lib/models/{llm.py → mac/llm.py} +0 -0
- lib/models/{whisper.py → mac/whisper.py} +0 -0
- tests/test_models/test_funasr.py +7 -5
- tests/test_models/test_llm.py +6 -5
- tests/test_models/test_tts.py +10 -9
- tests/test_models/test_whisper.py +5 -4
environment.py
CHANGED
|
@@ -3,7 +3,7 @@ from enum import Enum
|
|
| 3 |
|
| 4 |
|
| 5 |
|
| 6 |
-
PROJECT_DIR = Path(
|
| 7 |
APP_PATH = Path("/Applications/YoYo Translator.app/Contents/MacOS/YoYo Translator")
|
| 8 |
APP_LOG = Path('/tmp/translator.log')
|
| 9 |
|
|
|
|
| 3 |
|
| 4 |
|
| 5 |
|
| 6 |
+
PROJECT_DIR = Path(__file__).parent
|
| 7 |
APP_PATH = Path("/Applications/YoYo Translator.app/Contents/MacOS/YoYo Translator")
|
| 8 |
APP_LOG = Path('/tmp/translator.log')
|
| 9 |
|
lib/models/intel/__init__.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import sys
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
|
| 5 |
+
BINARY_DIR = Path(r"D:\yujuan\yoyo-translator-win\resources\bin")
|
| 6 |
+
|
| 7 |
+
def load_s2ts_lib():
|
| 8 |
+
sys.path.append(f"{BINARY_DIR}")
|
| 9 |
+
dll_path = os.environ.get("PATH")
|
| 10 |
+
new_dll_path = dll_path + f";{BINARY_DIR}"
|
| 11 |
+
os.environ["PATH"] = new_dll_path
|
| 12 |
+
os.environ["ONEDNN_MAX_CPU_ISA"] = "AVX2_VNNI"
|
| 13 |
+
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
|
| 14 |
+
|
| 15 |
+
load_s2ts_lib()
|
lib/models/intel/funasr.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pathlib import Path
|
| 2 |
+
import numpy as np
|
| 3 |
+
from lib.utils import Timer
|
| 4 |
+
from s2ts import S2TS
|
| 5 |
+
from s2ts import TaskExecInfo as CTaskExecInfo
|
| 6 |
+
|
| 7 |
+
MODEL_DIR = Path(r"D:\yujuan\yoyo-translator-win\models\funasr")
|
| 8 |
+
|
| 9 |
+
class FunAsrOv:
|
| 10 |
+
def __init__(self, model_dir=MODEL_DIR):
|
| 11 |
+
with Timer("load FunASR"):
|
| 12 |
+
self.instance = S2TS()
|
| 13 |
+
ret = self.instance.start_fun_asr(f'{model_dir}/model_files', f'{model_dir}/punc', f"{model_dir}/hotword.bin")
|
| 14 |
+
print(f"model load {'success' if ret else 'failed'}")
|
| 15 |
+
self._warm_up()
|
| 16 |
+
def _warm_up(self):
|
| 17 |
+
fake_audio = np.random.randn(16000).astype(np.float32)
|
| 18 |
+
self.transcribe(fake_audio)
|
| 19 |
+
|
| 20 |
+
def transcribe(self, audio: np.ndarray):
|
| 21 |
+
task_info = CTaskExecInfo()
|
| 22 |
+
task_info.audio_data = audio.tolist()
|
| 23 |
+
task_info.audio_language = "zh"
|
| 24 |
+
with Timer("FunASR inference") as t:
|
| 25 |
+
self.instance.put_asr(task_info)
|
| 26 |
+
res: CTaskExecInfo = self.instance.get_asr(0)
|
| 27 |
+
return "".join(word.text for word in res.words), t.duration
|
| 28 |
+
|
| 29 |
+
|
lib/models/intel/kokoro.py
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pathlib import Path
|
| 2 |
+
from kokoro_onnx import Kokoro
|
| 3 |
+
from misaki import espeak, ja, en, zh
|
| 4 |
+
from misaki.espeak import EspeakG2P
|
| 5 |
+
import re
|
| 6 |
+
from functools import lru_cache
|
| 7 |
+
from loguru import logger
|
| 8 |
+
import onnxruntime
|
| 9 |
+
import os
|
| 10 |
+
from lib.utils import Timer, write_audio
|
| 11 |
+
|
| 12 |
+
providers = onnxruntime.get_available_providers()
|
| 13 |
+
print(f"Available onnx runtime providers: {providers}")
|
| 14 |
+
MODEL_DIR = Path(r"D:\yujuan\yoyo-translator-win\models\kokoro")
|
| 15 |
+
|
| 16 |
+
def create_session(model_path):
|
| 17 |
+
# See list of providers https://github.com/microsoft/onnxruntime/issues/22101#issuecomment-2357667377
|
| 18 |
+
providers = onnxruntime.get_available_providers()
|
| 19 |
+
print(f"Available onnx runtime providers: {providers}")
|
| 20 |
+
|
| 21 |
+
# See session options https://onnxruntime.ai/docs/performance/tune-performance/threading.html#thread-management
|
| 22 |
+
sess_options = onnxruntime.SessionOptions()
|
| 23 |
+
cpu_count = os.cpu_count() // 2
|
| 24 |
+
print(f"Setting threads to CPU cores count: {cpu_count}")
|
| 25 |
+
sess_options.intra_op_num_threads = cpu_count
|
| 26 |
+
session = onnxruntime.InferenceSession(
|
| 27 |
+
model_path, providers=providers, sess_options=sess_options
|
| 28 |
+
)
|
| 29 |
+
return session
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
class KokoroTTS:
|
| 33 |
+
language_voice_mapping = {
|
| 34 |
+
"JP": "jf_alpha",
|
| 35 |
+
"JA": "jf_alpha",
|
| 36 |
+
"ZH": "zf_xiaoyi",
|
| 37 |
+
"EN": "af_heart",
|
| 38 |
+
"FR": "ff_siwis",
|
| 39 |
+
"IT": "im_nicola",
|
| 40 |
+
"HI": "hf_alpha",
|
| 41 |
+
"PT": "im_nicola",
|
| 42 |
+
"ES": "im_nicola"
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
def __init__(self, model_path: str, voice_model_path: str, vocab_config=None, gcp=None, voice=None):
|
| 46 |
+
self._session = create_session(model_path)
|
| 47 |
+
self.model = Kokoro.from_session(self._session, voice_model_path, vocab_config=vocab_config)
|
| 48 |
+
self.g2p = gcp
|
| 49 |
+
self.voice = voice
|
| 50 |
+
|
| 51 |
+
@classmethod
|
| 52 |
+
def from_language(cls, language: str, model_dir: Path=MODEL_DIR):
|
| 53 |
+
model_path: str = str(model_dir/"kokoro-quant.onnx")
|
| 54 |
+
voice_model_path: str = str(model_dir/"voices-v1.0.bin")
|
| 55 |
+
voice = cls.language_voice_mapping.get(language.upper())
|
| 56 |
+
logger.info(f"[TTS] language: {language}")
|
| 57 |
+
if not voice:
|
| 58 |
+
raise ValueError(f"Unsupported language: {language}, voice: {voice}")
|
| 59 |
+
if language.upper() == "ZH":
|
| 60 |
+
tts = cls(model_path, voice_model_path, vocab_config=model_dir / "zh_config.json", gcp=zh.ZHG2P(),
|
| 61 |
+
voice=voice)
|
| 62 |
+
tts.generate("你好")
|
| 63 |
+
elif language.upper() in ['JP', 'JA']:
|
| 64 |
+
tts = cls(model_path, voice_model_path, vocab_config=model_dir / "ja_config.json", gcp=ja.JAG2P(),
|
| 65 |
+
voice=voice)
|
| 66 |
+
elif language.upper() == 'EN':
|
| 67 |
+
fallback = espeak.EspeakFallback(british=False)
|
| 68 |
+
tts = cls(model_path, voice_model_path, gcp=en.G2P(trf=False, british=False, fallback=fallback),
|
| 69 |
+
voice=voice)
|
| 70 |
+
tts.generate("hello")
|
| 71 |
+
elif language.upper() == "HI":
|
| 72 |
+
g2p = EspeakG2P(language="hi")
|
| 73 |
+
tts = cls(model_path, voice_model_path, gcp=g2p, voice=voice)
|
| 74 |
+
tts.generate("हेलो")
|
| 75 |
+
elif language.upper() == "IT":
|
| 76 |
+
g2p = EspeakG2P(language="it")
|
| 77 |
+
tts = cls(model_path, voice_model_path, gcp=g2p, voice=voice)
|
| 78 |
+
tts.generate("Ciao")
|
| 79 |
+
elif language.upper() == "PT":
|
| 80 |
+
g2p = EspeakG2P(language="pt-br")
|
| 81 |
+
tts = cls(model_path, voice_model_path, gcp=g2p, voice=voice)
|
| 82 |
+
tts.generate("Olá")
|
| 83 |
+
elif language.upper() == "ES":
|
| 84 |
+
g2p = EspeakG2P(language="es")
|
| 85 |
+
tts = cls(model_path, voice_model_path, gcp=g2p, voice=voice)
|
| 86 |
+
tts.generate("Hola")
|
| 87 |
+
elif language.upper() == "FR":
|
| 88 |
+
g2p = EspeakG2P(language="fr-fr")
|
| 89 |
+
tts = cls(model_path, voice_model_path, gcp=g2p, voice=voice)
|
| 90 |
+
tts.generate("Bonjour")
|
| 91 |
+
else:
|
| 92 |
+
tts = cls(model_path, voice_model_path, gcp=EspeakG2P(language.lower()), voice=voice)
|
| 93 |
+
return tts
|
| 94 |
+
|
| 95 |
+
def generate(self, text, speed=1.2):
|
| 96 |
+
with Timer("tts inference") as t:
|
| 97 |
+
phonemes, _ = self.g2p(text)
|
| 98 |
+
samples, sample_rate = self.model.create(phonemes, self.voice, is_phonemes=True, speed=speed)
|
| 99 |
+
|
| 100 |
+
return samples, sample_rate, t.duration
|
| 101 |
+
|
| 102 |
+
async def stream(self, text, speed=1.2):
|
| 103 |
+
phonemes, _ = self.g2p(text)
|
| 104 |
+
stream = self.model.create_stream(phonemes, self.voice, is_phonemes=True, speed=speed)
|
| 105 |
+
async for samples, sample_rate in stream:
|
| 106 |
+
yield samples, sample_rate
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
@lru_cache
|
| 110 |
+
def get_model(language):
|
| 111 |
+
return KokoroTTS.from_language(language=language, model_dir_path=resource_path('models/kokoro'))
|
lib/models/intel/llm.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pathlib import Path
|
| 2 |
+
import numpy as np
|
| 3 |
+
from lib.utils import Timer
|
| 4 |
+
from s2ts import S2TS
|
| 5 |
+
from s2ts import TaskExecInfo as CTaskExecInfo
|
| 6 |
+
|
| 7 |
+
MODEL_DIR = Path(r"D:\yujuan\yoyo-translator-win\models\llm\Qwen3-1.7B-int8-ov")
|
| 8 |
+
|
| 9 |
+
class QwenOv:
|
| 10 |
+
def __init__(self, model_dir=MODEL_DIR):
|
| 11 |
+
with Timer("load LLM"):
|
| 12 |
+
self.instance = S2TS()
|
| 13 |
+
ret = self.instance.start_translate_genai(str(model_dir))
|
| 14 |
+
print(f"model load {'success' if ret else 'failed'}")
|
| 15 |
+
self._warm_up()
|
| 16 |
+
def _warm_up(self):
|
| 17 |
+
self.translate("How are you?", "en", "zh")
|
| 18 |
+
|
| 19 |
+
def translate(self, prompt, src_lang, dst_lang):
|
| 20 |
+
task_info = CTaskExecInfo()
|
| 21 |
+
task_info.transcribe_content = prompt
|
| 22 |
+
task_info.audio_language=src_lang
|
| 23 |
+
task_info.translate_language=dst_lang
|
| 24 |
+
|
| 25 |
+
with Timer("LLM inference") as t:
|
| 26 |
+
self.instance.put_llm(task_info)
|
| 27 |
+
res: CTaskExecInfo = self.instance.get_llm(0)
|
| 28 |
+
return res.translate_content, t.duration
|
| 29 |
+
|
| 30 |
+
|
lib/models/intel/whisper.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pathlib import Path
|
| 2 |
+
import numpy as np
|
| 3 |
+
from lib.utils import Timer
|
| 4 |
+
from s2ts import S2TS
|
| 5 |
+
from s2ts import TaskExecInfo as CTaskExecInfo
|
| 6 |
+
|
| 7 |
+
MODEL_DIR = Path(r"D:\yujuan\yoyo-translator-win\models\whisper-large-v3-turbo-int8")
|
| 8 |
+
|
| 9 |
+
class WhisperOv:
|
| 10 |
+
def __init__(self, model_dir=MODEL_DIR):
|
| 11 |
+
with Timer("load Whisper"):
|
| 12 |
+
self.instance = S2TS()
|
| 13 |
+
ret = self.instance.start_asr_genai("en", str(model_dir), False, "")
|
| 14 |
+
print(f"model load {'success' if ret else 'failed'}")
|
| 15 |
+
self._warm_up()
|
| 16 |
+
def _warm_up(self):
|
| 17 |
+
fake_audio = np.random.randn(16000).astype(np.float32)
|
| 18 |
+
self.transcribe(fake_audio, "en")
|
| 19 |
+
|
| 20 |
+
def transcribe(self, audio: np.ndarray, language):
|
| 21 |
+
task_info = CTaskExecInfo()
|
| 22 |
+
task_info.audio_data = audio.tolist()
|
| 23 |
+
task_info.audio_language = language
|
| 24 |
+
|
| 25 |
+
with Timer("Whisper inference") as t:
|
| 26 |
+
self.instance.put_asr(task_info)
|
| 27 |
+
res: CTaskExecInfo = self.instance.get_asr(0)
|
| 28 |
+
return "".join(word.text for word in res.words), t.duration
|
| 29 |
+
|
| 30 |
+
|
lib/models/mac/__init__.py
ADDED
|
File without changes
|
lib/models/{funasr.py → mac/funasr.py}
RENAMED
|
@@ -8,7 +8,7 @@ from lib.utils import Timer, read_audio
|
|
| 8 |
|
| 9 |
MODEL_DIR = Path("/Users/jeqin/work/code/Translator/python_server/moyoyo_asr_models")
|
| 10 |
|
| 11 |
-
class
|
| 12 |
def __init__(self, model_dir=MODEL_DIR, quantize=True):
|
| 13 |
asr_model_path = model_dir / 'speech_seaco_paraformer_large_asr_nat-zh-cn-16k-common-vocab8404-pytorch'
|
| 14 |
# vad_model_path = model_dir / 'speech_fsmn_vad_zh-cn-16k-common-pytorch'
|
|
@@ -34,7 +34,7 @@ class FunASR:
|
|
| 34 |
return text, t.duration
|
| 35 |
|
| 36 |
if __name__ == '__main__':
|
| 37 |
-
funasr =
|
| 38 |
audio = read_audio(Path("/Users/jeqin/work/code/TestTranslator/test_data/recordings/1.wav"))
|
| 39 |
text, time_cost =funasr.transcribe(audio)
|
| 40 |
print(text)
|
|
|
|
| 8 |
|
| 9 |
MODEL_DIR = Path("/Users/jeqin/work/code/Translator/python_server/moyoyo_asr_models")
|
| 10 |
|
| 11 |
+
class FunAsrOnnx:
|
| 12 |
def __init__(self, model_dir=MODEL_DIR, quantize=True):
|
| 13 |
asr_model_path = model_dir / 'speech_seaco_paraformer_large_asr_nat-zh-cn-16k-common-vocab8404-pytorch'
|
| 14 |
# vad_model_path = model_dir / 'speech_fsmn_vad_zh-cn-16k-common-pytorch'
|
|
|
|
| 34 |
return text, t.duration
|
| 35 |
|
| 36 |
if __name__ == '__main__':
|
| 37 |
+
funasr = FunAsrOnnx()
|
| 38 |
audio = read_audio(Path("/Users/jeqin/work/code/TestTranslator/test_data/recordings/1.wav"))
|
| 39 |
text, time_cost =funasr.transcribe(audio)
|
| 40 |
print(text)
|
lib/models/{kokoro.py → mac/kokoro.py}
RENAMED
|
@@ -3,9 +3,7 @@ from pathlib import Path
|
|
| 3 |
from kokoro_onnx import Kokoro
|
| 4 |
from misaki import espeak, en, zh
|
| 5 |
from misaki.espeak import EspeakG2P
|
| 6 |
-
from functools import lru_cache
|
| 7 |
from logging import getLogger
|
| 8 |
-
import librosa
|
| 9 |
import onnxruntime
|
| 10 |
|
| 11 |
from lib.utils import Timer, write_audio
|
|
|
|
| 3 |
from kokoro_onnx import Kokoro
|
| 4 |
from misaki import espeak, en, zh
|
| 5 |
from misaki.espeak import EspeakG2P
|
|
|
|
| 6 |
from logging import getLogger
|
|
|
|
| 7 |
import onnxruntime
|
| 8 |
|
| 9 |
from lib.utils import Timer, write_audio
|
lib/models/{llm.py → mac/llm.py}
RENAMED
|
File without changes
|
lib/models/{whisper.py → mac/whisper.py}
RENAMED
|
File without changes
|
tests/test_models/test_funasr.py
CHANGED
|
@@ -1,17 +1,19 @@
|
|
| 1 |
import pytest
|
| 2 |
-
from lib.models.funasr import FunASR
|
| 3 |
from lib.utils import read_audio, save_csv
|
| 4 |
from test_data.audios import read_emilia
|
| 5 |
from environment import REPORTS_DIR
|
| 6 |
|
| 7 |
@pytest.fixture(scope="module")
|
| 8 |
-
def asr(get_platform)
|
| 9 |
if get_platform == "apple":
|
| 10 |
-
|
|
|
|
| 11 |
elif get_platform == "intel":
|
| 12 |
-
|
|
|
|
| 13 |
|
| 14 |
-
|
|
|
|
| 15 |
#TODO: 测试CER
|
| 16 |
report = []
|
| 17 |
for audio_file, text, duration in read_emilia(count_limit=100):
|
|
|
|
| 1 |
import pytest
|
|
|
|
| 2 |
from lib.utils import read_audio, save_csv
|
| 3 |
from test_data.audios import read_emilia
|
| 4 |
from environment import REPORTS_DIR
|
| 5 |
|
| 6 |
@pytest.fixture(scope="module")
|
| 7 |
+
def asr(get_platform):
|
| 8 |
if get_platform == "apple":
|
| 9 |
+
from lib.models.mac.funasr import FunAsrOnnx
|
| 10 |
+
return FunAsrOnnx()
|
| 11 |
elif get_platform == "intel":
|
| 12 |
+
from lib.models.intel.funasr import FunAsrOv
|
| 13 |
+
return FunAsrOv()
|
| 14 |
|
| 15 |
+
|
| 16 |
+
def test_inference(asr):
|
| 17 |
#TODO: 测试CER
|
| 18 |
report = []
|
| 19 |
for audio_file, text, duration in read_emilia(count_limit=100):
|
tests/test_models/test_llm.py
CHANGED
|
@@ -1,17 +1,18 @@
|
|
| 1 |
import pytest
|
| 2 |
-
from lib.models.llm import QwenTranslator
|
| 3 |
from test_data.texts import read_translation
|
| 4 |
from lib.utils import save_csv
|
| 5 |
from environment import REPORTS_DIR
|
| 6 |
|
| 7 |
@pytest.fixture(scope="module")
|
| 8 |
-
def llm(get_platform)
|
| 9 |
if get_platform == "apple":
|
|
|
|
| 10 |
return QwenTranslator()
|
| 11 |
elif get_platform == "intel":
|
| 12 |
-
|
|
|
|
| 13 |
|
| 14 |
-
def test_llm_zh(llm
|
| 15 |
report = []
|
| 16 |
for src in read_translation("zh"):
|
| 17 |
dst, time_cost = llm.translate(src, src_lang="zh", dst_lang="en")
|
|
@@ -20,7 +21,7 @@ def test_llm_zh(llm: QwenTranslator):
|
|
| 20 |
report.append([src, dst, time_cost])
|
| 21 |
save_csv(REPORTS_DIR/"translation_zh.csv", ["src", "dst", "time"], report)
|
| 22 |
|
| 23 |
-
def test_llm_en(llm
|
| 24 |
report = []
|
| 25 |
for src in read_translation("en"):
|
| 26 |
dst, time_cost = llm.translate(src, src_lang="en", dst_lang="zh")
|
|
|
|
| 1 |
import pytest
|
|
|
|
| 2 |
from test_data.texts import read_translation
|
| 3 |
from lib.utils import save_csv
|
| 4 |
from environment import REPORTS_DIR
|
| 5 |
|
| 6 |
@pytest.fixture(scope="module")
|
| 7 |
+
def llm(get_platform):
|
| 8 |
if get_platform == "apple":
|
| 9 |
+
from lib.models.mac.llm import QwenTranslator
|
| 10 |
return QwenTranslator()
|
| 11 |
elif get_platform == "intel":
|
| 12 |
+
from lib.models.intel.llm import QwenOv
|
| 13 |
+
return QwenOv()
|
| 14 |
|
| 15 |
+
def test_llm_zh(llm):
|
| 16 |
report = []
|
| 17 |
for src in read_translation("zh"):
|
| 18 |
dst, time_cost = llm.translate(src, src_lang="zh", dst_lang="en")
|
|
|
|
| 21 |
report.append([src, dst, time_cost])
|
| 22 |
save_csv(REPORTS_DIR/"translation_zh.csv", ["src", "dst", "time"], report)
|
| 23 |
|
| 24 |
+
def test_llm_en(llm):
|
| 25 |
report = []
|
| 26 |
for src in read_translation("en"):
|
| 27 |
dst, time_cost = llm.translate(src, src_lang="en", dst_lang="zh")
|
tests/test_models/test_tts.py
CHANGED
|
@@ -1,20 +1,21 @@
|
|
| 1 |
import pytest
|
| 2 |
-
from lib.models.kokoro import KokoroTTS
|
| 3 |
from test_data.texts import read_translation
|
| 4 |
from lib.utils import save_csv
|
| 5 |
from environment import REPORTS_DIR
|
| 6 |
|
| 7 |
|
| 8 |
@pytest.fixture(scope="module")
|
| 9 |
-
def
|
| 10 |
if get_platform == "apple":
|
| 11 |
-
|
|
|
|
| 12 |
elif get_platform == "intel":
|
| 13 |
-
|
|
|
|
| 14 |
|
| 15 |
|
| 16 |
-
def test_tts_zh():
|
| 17 |
-
tts =
|
| 18 |
report = []
|
| 19 |
for text in read_translation("zh"):
|
| 20 |
samples, sr, time_cost = tts.generate(text)
|
|
@@ -22,10 +23,10 @@ def test_tts_zh():
|
|
| 22 |
save_csv(REPORTS_DIR / "tts_zh.csv", ["text", "time"], report)
|
| 23 |
|
| 24 |
|
| 25 |
-
def test_tts_en():
|
| 26 |
-
tts =
|
| 27 |
report = []
|
| 28 |
for text in read_translation("en"):
|
| 29 |
-
samples, sr, time_cost = tts.generate(text)
|
| 30 |
report.append([text, time_cost])
|
| 31 |
save_csv(REPORTS_DIR / "tts_en.csv", ["text", "time"], report)
|
|
|
|
| 1 |
import pytest
|
|
|
|
| 2 |
from test_data.texts import read_translation
|
| 3 |
from lib.utils import save_csv
|
| 4 |
from environment import REPORTS_DIR
|
| 5 |
|
| 6 |
|
| 7 |
@pytest.fixture(scope="module")
|
| 8 |
+
def tts(get_platform):
|
| 9 |
if get_platform == "apple":
|
| 10 |
+
from lib.models.mac.kokoro import KokoroTTS
|
| 11 |
+
return KokoroTTS
|
| 12 |
elif get_platform == "intel":
|
| 13 |
+
from lib.models.intel.kokoro import KokoroTTS
|
| 14 |
+
return KokoroTTS
|
| 15 |
|
| 16 |
|
| 17 |
+
def test_tts_zh(tts):
|
| 18 |
+
tts = tts.from_language("zh")
|
| 19 |
report = []
|
| 20 |
for text in read_translation("zh"):
|
| 21 |
samples, sr, time_cost = tts.generate(text)
|
|
|
|
| 23 |
save_csv(REPORTS_DIR / "tts_zh.csv", ["text", "time"], report)
|
| 24 |
|
| 25 |
|
| 26 |
+
def test_tts_en(tts):
|
| 27 |
+
tts = tts.from_language("en")
|
| 28 |
report = []
|
| 29 |
for text in read_translation("en"):
|
| 30 |
+
samples, sr, time_cost = tts.generate(text, speed=1.4)
|
| 31 |
report.append([text, time_cost])
|
| 32 |
save_csv(REPORTS_DIR / "tts_en.csv", ["text", "time"], report)
|
tests/test_models/test_whisper.py
CHANGED
|
@@ -1,17 +1,18 @@
|
|
| 1 |
import pytest
|
| 2 |
-
from lib.models.whisper import WhisperCPP
|
| 3 |
from lib.utils import read_audio, save_csv
|
| 4 |
from test_data.audios import read_emilia
|
| 5 |
from environment import REPORTS_DIR
|
| 6 |
|
| 7 |
@pytest.fixture(scope="module")
|
| 8 |
-
def whisper(get_platform)
|
| 9 |
if get_platform == "apple":
|
|
|
|
| 10 |
return WhisperCPP()
|
| 11 |
elif get_platform == "intel":
|
| 12 |
-
|
|
|
|
| 13 |
|
| 14 |
-
def test_inference(whisper
|
| 15 |
#TODO: 测试CER
|
| 16 |
report = []
|
| 17 |
for audio_file, text, duration in read_emilia(count_limit=100):
|
|
|
|
| 1 |
import pytest
|
|
|
|
| 2 |
from lib.utils import read_audio, save_csv
|
| 3 |
from test_data.audios import read_emilia
|
| 4 |
from environment import REPORTS_DIR
|
| 5 |
|
| 6 |
@pytest.fixture(scope="module")
|
| 7 |
+
def whisper(get_platform):
|
| 8 |
if get_platform == "apple":
|
| 9 |
+
from lib.models.mac.whisper import WhisperCPP
|
| 10 |
return WhisperCPP()
|
| 11 |
elif get_platform == "intel":
|
| 12 |
+
from lib.models.intel.whisper import WhisperOv
|
| 13 |
+
return WhisperOv()
|
| 14 |
|
| 15 |
+
def test_inference(whisper):
|
| 16 |
#TODO: 测试CER
|
| 17 |
report = []
|
| 18 |
for audio_file, text, duration in read_emilia(count_limit=100):
|