yujuanqin commited on
Commit
b295d06
·
1 Parent(s): fe108d8

support test_models on Intel

Browse files
environment.py CHANGED
@@ -3,7 +3,7 @@ from enum import Enum
3
 
4
 
5
 
6
- PROJECT_DIR = Path("/Users/jeqin/work/code/TestTranslator")
7
  APP_PATH = Path("/Applications/YoYo Translator.app/Contents/MacOS/YoYo Translator")
8
  APP_LOG = Path('/tmp/translator.log')
9
 
 
3
 
4
 
5
 
6
+ PROJECT_DIR = Path(__file__).parent
7
  APP_PATH = Path("/Applications/YoYo Translator.app/Contents/MacOS/YoYo Translator")
8
  APP_LOG = Path('/tmp/translator.log')
9
 
lib/models/intel/__init__.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ from pathlib import Path
4
+
5
+ BINARY_DIR = Path(r"D:\yujuan\yoyo-translator-win\resources\bin")
6
+
7
+ def load_s2ts_lib():
8
+ sys.path.append(f"{BINARY_DIR}")
9
+ dll_path = os.environ.get("PATH")
10
+ new_dll_path = dll_path + f";{BINARY_DIR}"
11
+ os.environ["PATH"] = new_dll_path
12
+ os.environ["ONEDNN_MAX_CPU_ISA"] = "AVX2_VNNI"
13
+ os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
14
+
15
+ load_s2ts_lib()
lib/models/intel/funasr.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ import numpy as np
3
+ from lib.utils import Timer
4
+ from s2ts import S2TS
5
+ from s2ts import TaskExecInfo as CTaskExecInfo
6
+
7
+ MODEL_DIR = Path(r"D:\yujuan\yoyo-translator-win\models\funasr")
8
+
9
+ class FunAsrOv:
10
+ def __init__(self, model_dir=MODEL_DIR):
11
+ with Timer("load FunASR"):
12
+ self.instance = S2TS()
13
+ ret = self.instance.start_fun_asr(f'{model_dir}/model_files', f'{model_dir}/punc', f"{model_dir}/hotword.bin")
14
+ print(f"model load {'success' if ret else 'failed'}")
15
+ self._warm_up()
16
+ def _warm_up(self):
17
+ fake_audio = np.random.randn(16000).astype(np.float32)
18
+ self.transcribe(fake_audio)
19
+
20
+ def transcribe(self, audio: np.ndarray):
21
+ task_info = CTaskExecInfo()
22
+ task_info.audio_data = audio.tolist()
23
+ task_info.audio_language = "zh"
24
+ with Timer("FunASR inference") as t:
25
+ self.instance.put_asr(task_info)
26
+ res: CTaskExecInfo = self.instance.get_asr(0)
27
+ return "".join(word.text for word in res.words), t.duration
28
+
29
+
lib/models/intel/kokoro.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ from kokoro_onnx import Kokoro
3
+ from misaki import espeak, ja, en, zh
4
+ from misaki.espeak import EspeakG2P
5
+ import re
6
+ from functools import lru_cache
7
+ from loguru import logger
8
+ import onnxruntime
9
+ import os
10
+ from lib.utils import Timer, write_audio
11
+
12
+ providers = onnxruntime.get_available_providers()
13
+ print(f"Available onnx runtime providers: {providers}")
14
+ MODEL_DIR = Path(r"D:\yujuan\yoyo-translator-win\models\kokoro")
15
+
16
+ def create_session(model_path):
17
+ # See list of providers https://github.com/microsoft/onnxruntime/issues/22101#issuecomment-2357667377
18
+ providers = onnxruntime.get_available_providers()
19
+ print(f"Available onnx runtime providers: {providers}")
20
+
21
+ # See session options https://onnxruntime.ai/docs/performance/tune-performance/threading.html#thread-management
22
+ sess_options = onnxruntime.SessionOptions()
23
+ cpu_count = os.cpu_count() // 2
24
+ print(f"Setting threads to CPU cores count: {cpu_count}")
25
+ sess_options.intra_op_num_threads = cpu_count
26
+ session = onnxruntime.InferenceSession(
27
+ model_path, providers=providers, sess_options=sess_options
28
+ )
29
+ return session
30
+
31
+
32
+ class KokoroTTS:
33
+ language_voice_mapping = {
34
+ "JP": "jf_alpha",
35
+ "JA": "jf_alpha",
36
+ "ZH": "zf_xiaoyi",
37
+ "EN": "af_heart",
38
+ "FR": "ff_siwis",
39
+ "IT": "im_nicola",
40
+ "HI": "hf_alpha",
41
+ "PT": "im_nicola",
42
+ "ES": "im_nicola"
43
+ }
44
+
45
+ def __init__(self, model_path: str, voice_model_path: str, vocab_config=None, gcp=None, voice=None):
46
+ self._session = create_session(model_path)
47
+ self.model = Kokoro.from_session(self._session, voice_model_path, vocab_config=vocab_config)
48
+ self.g2p = gcp
49
+ self.voice = voice
50
+
51
+ @classmethod
52
+ def from_language(cls, language: str, model_dir: Path=MODEL_DIR):
53
+ model_path: str = str(model_dir/"kokoro-quant.onnx")
54
+ voice_model_path: str = str(model_dir/"voices-v1.0.bin")
55
+ voice = cls.language_voice_mapping.get(language.upper())
56
+ logger.info(f"[TTS] language: {language}")
57
+ if not voice:
58
+ raise ValueError(f"Unsupported language: {language}, voice: {voice}")
59
+ if language.upper() == "ZH":
60
+ tts = cls(model_path, voice_model_path, vocab_config=model_dir / "zh_config.json", gcp=zh.ZHG2P(),
61
+ voice=voice)
62
+ tts.generate("你好")
63
+ elif language.upper() in ['JP', 'JA']:
64
+ tts = cls(model_path, voice_model_path, vocab_config=model_dir / "ja_config.json", gcp=ja.JAG2P(),
65
+ voice=voice)
66
+ elif language.upper() == 'EN':
67
+ fallback = espeak.EspeakFallback(british=False)
68
+ tts = cls(model_path, voice_model_path, gcp=en.G2P(trf=False, british=False, fallback=fallback),
69
+ voice=voice)
70
+ tts.generate("hello")
71
+ elif language.upper() == "HI":
72
+ g2p = EspeakG2P(language="hi")
73
+ tts = cls(model_path, voice_model_path, gcp=g2p, voice=voice)
74
+ tts.generate("हेलो")
75
+ elif language.upper() == "IT":
76
+ g2p = EspeakG2P(language="it")
77
+ tts = cls(model_path, voice_model_path, gcp=g2p, voice=voice)
78
+ tts.generate("Ciao")
79
+ elif language.upper() == "PT":
80
+ g2p = EspeakG2P(language="pt-br")
81
+ tts = cls(model_path, voice_model_path, gcp=g2p, voice=voice)
82
+ tts.generate("Olá")
83
+ elif language.upper() == "ES":
84
+ g2p = EspeakG2P(language="es")
85
+ tts = cls(model_path, voice_model_path, gcp=g2p, voice=voice)
86
+ tts.generate("Hola")
87
+ elif language.upper() == "FR":
88
+ g2p = EspeakG2P(language="fr-fr")
89
+ tts = cls(model_path, voice_model_path, gcp=g2p, voice=voice)
90
+ tts.generate("Bonjour")
91
+ else:
92
+ tts = cls(model_path, voice_model_path, gcp=EspeakG2P(language.lower()), voice=voice)
93
+ return tts
94
+
95
+ def generate(self, text, speed=1.2):
96
+ with Timer("tts inference") as t:
97
+ phonemes, _ = self.g2p(text)
98
+ samples, sample_rate = self.model.create(phonemes, self.voice, is_phonemes=True, speed=speed)
99
+
100
+ return samples, sample_rate, t.duration
101
+
102
+ async def stream(self, text, speed=1.2):
103
+ phonemes, _ = self.g2p(text)
104
+ stream = self.model.create_stream(phonemes, self.voice, is_phonemes=True, speed=speed)
105
+ async for samples, sample_rate in stream:
106
+ yield samples, sample_rate
107
+
108
+
109
+ @lru_cache
110
+ def get_model(language):
111
+ return KokoroTTS.from_language(language=language, model_dir_path=resource_path('models/kokoro'))
lib/models/intel/llm.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ import numpy as np
3
+ from lib.utils import Timer
4
+ from s2ts import S2TS
5
+ from s2ts import TaskExecInfo as CTaskExecInfo
6
+
7
+ MODEL_DIR = Path(r"D:\yujuan\yoyo-translator-win\models\llm\Qwen3-1.7B-int8-ov")
8
+
9
+ class QwenOv:
10
+ def __init__(self, model_dir=MODEL_DIR):
11
+ with Timer("load LLM"):
12
+ self.instance = S2TS()
13
+ ret = self.instance.start_translate_genai(str(model_dir))
14
+ print(f"model load {'success' if ret else 'failed'}")
15
+ self._warm_up()
16
+ def _warm_up(self):
17
+ self.translate("How are you?", "en", "zh")
18
+
19
+ def translate(self, prompt, src_lang, dst_lang):
20
+ task_info = CTaskExecInfo()
21
+ task_info.transcribe_content = prompt
22
+ task_info.audio_language=src_lang
23
+ task_info.translate_language=dst_lang
24
+
25
+ with Timer("LLM inference") as t:
26
+ self.instance.put_llm(task_info)
27
+ res: CTaskExecInfo = self.instance.get_llm(0)
28
+ return res.translate_content, t.duration
29
+
30
+
lib/models/intel/whisper.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ import numpy as np
3
+ from lib.utils import Timer
4
+ from s2ts import S2TS
5
+ from s2ts import TaskExecInfo as CTaskExecInfo
6
+
7
+ MODEL_DIR = Path(r"D:\yujuan\yoyo-translator-win\models\whisper-large-v3-turbo-int8")
8
+
9
+ class WhisperOv:
10
+ def __init__(self, model_dir=MODEL_DIR):
11
+ with Timer("load Whisper"):
12
+ self.instance = S2TS()
13
+ ret = self.instance.start_asr_genai("en", str(model_dir), False, "")
14
+ print(f"model load {'success' if ret else 'failed'}")
15
+ self._warm_up()
16
+ def _warm_up(self):
17
+ fake_audio = np.random.randn(16000).astype(np.float32)
18
+ self.transcribe(fake_audio, "en")
19
+
20
+ def transcribe(self, audio: np.ndarray, language):
21
+ task_info = CTaskExecInfo()
22
+ task_info.audio_data = audio.tolist()
23
+ task_info.audio_language = language
24
+
25
+ with Timer("Whisper inference") as t:
26
+ self.instance.put_asr(task_info)
27
+ res: CTaskExecInfo = self.instance.get_asr(0)
28
+ return "".join(word.text for word in res.words), t.duration
29
+
30
+
lib/models/mac/__init__.py ADDED
File without changes
lib/models/{funasr.py → mac/funasr.py} RENAMED
@@ -8,7 +8,7 @@ from lib.utils import Timer, read_audio
8
 
9
  MODEL_DIR = Path("/Users/jeqin/work/code/Translator/python_server/moyoyo_asr_models")
10
 
11
- class FunASR:
12
  def __init__(self, model_dir=MODEL_DIR, quantize=True):
13
  asr_model_path = model_dir / 'speech_seaco_paraformer_large_asr_nat-zh-cn-16k-common-vocab8404-pytorch'
14
  # vad_model_path = model_dir / 'speech_fsmn_vad_zh-cn-16k-common-pytorch'
@@ -34,7 +34,7 @@ class FunASR:
34
  return text, t.duration
35
 
36
  if __name__ == '__main__':
37
- funasr = FunASR()
38
  audio = read_audio(Path("/Users/jeqin/work/code/TestTranslator/test_data/recordings/1.wav"))
39
  text, time_cost =funasr.transcribe(audio)
40
  print(text)
 
8
 
9
  MODEL_DIR = Path("/Users/jeqin/work/code/Translator/python_server/moyoyo_asr_models")
10
 
11
+ class FunAsrOnnx:
12
  def __init__(self, model_dir=MODEL_DIR, quantize=True):
13
  asr_model_path = model_dir / 'speech_seaco_paraformer_large_asr_nat-zh-cn-16k-common-vocab8404-pytorch'
14
  # vad_model_path = model_dir / 'speech_fsmn_vad_zh-cn-16k-common-pytorch'
 
34
  return text, t.duration
35
 
36
  if __name__ == '__main__':
37
+ funasr = FunAsrOnnx()
38
  audio = read_audio(Path("/Users/jeqin/work/code/TestTranslator/test_data/recordings/1.wav"))
39
  text, time_cost =funasr.transcribe(audio)
40
  print(text)
lib/models/{kokoro.py → mac/kokoro.py} RENAMED
@@ -3,9 +3,7 @@ from pathlib import Path
3
  from kokoro_onnx import Kokoro
4
  from misaki import espeak, en, zh
5
  from misaki.espeak import EspeakG2P
6
- from functools import lru_cache
7
  from logging import getLogger
8
- import librosa
9
  import onnxruntime
10
 
11
  from lib.utils import Timer, write_audio
 
3
  from kokoro_onnx import Kokoro
4
  from misaki import espeak, en, zh
5
  from misaki.espeak import EspeakG2P
 
6
  from logging import getLogger
 
7
  import onnxruntime
8
 
9
  from lib.utils import Timer, write_audio
lib/models/{llm.py → mac/llm.py} RENAMED
File without changes
lib/models/{whisper.py → mac/whisper.py} RENAMED
File without changes
tests/test_models/test_funasr.py CHANGED
@@ -1,17 +1,19 @@
1
  import pytest
2
- from lib.models.funasr import FunASR
3
  from lib.utils import read_audio, save_csv
4
  from test_data.audios import read_emilia
5
  from environment import REPORTS_DIR
6
 
7
  @pytest.fixture(scope="module")
8
- def asr(get_platform)-> FunASR:
9
  if get_platform == "apple":
10
- return FunASR()
 
11
  elif get_platform == "intel":
12
- pass
 
13
 
14
- def test_inference(asr: FunASR):
 
15
  #TODO: 测试CER
16
  report = []
17
  for audio_file, text, duration in read_emilia(count_limit=100):
 
1
  import pytest
 
2
  from lib.utils import read_audio, save_csv
3
  from test_data.audios import read_emilia
4
  from environment import REPORTS_DIR
5
 
6
  @pytest.fixture(scope="module")
7
+ def asr(get_platform):
8
  if get_platform == "apple":
9
+ from lib.models.mac.funasr import FunAsrOnnx
10
+ return FunAsrOnnx()
11
  elif get_platform == "intel":
12
+ from lib.models.intel.funasr import FunAsrOv
13
+ return FunAsrOv()
14
 
15
+
16
+ def test_inference(asr):
17
  #TODO: 测试CER
18
  report = []
19
  for audio_file, text, duration in read_emilia(count_limit=100):
tests/test_models/test_llm.py CHANGED
@@ -1,17 +1,18 @@
1
  import pytest
2
- from lib.models.llm import QwenTranslator
3
  from test_data.texts import read_translation
4
  from lib.utils import save_csv
5
  from environment import REPORTS_DIR
6
 
7
  @pytest.fixture(scope="module")
8
- def llm(get_platform)-> QwenTranslator:
9
  if get_platform == "apple":
 
10
  return QwenTranslator()
11
  elif get_platform == "intel":
12
- pass
 
13
 
14
- def test_llm_zh(llm: QwenTranslator):
15
  report = []
16
  for src in read_translation("zh"):
17
  dst, time_cost = llm.translate(src, src_lang="zh", dst_lang="en")
@@ -20,7 +21,7 @@ def test_llm_zh(llm: QwenTranslator):
20
  report.append([src, dst, time_cost])
21
  save_csv(REPORTS_DIR/"translation_zh.csv", ["src", "dst", "time"], report)
22
 
23
- def test_llm_en(llm: QwenTranslator):
24
  report = []
25
  for src in read_translation("en"):
26
  dst, time_cost = llm.translate(src, src_lang="en", dst_lang="zh")
 
1
  import pytest
 
2
  from test_data.texts import read_translation
3
  from lib.utils import save_csv
4
  from environment import REPORTS_DIR
5
 
6
  @pytest.fixture(scope="module")
7
+ def llm(get_platform):
8
  if get_platform == "apple":
9
+ from lib.models.mac.llm import QwenTranslator
10
  return QwenTranslator()
11
  elif get_platform == "intel":
12
+ from lib.models.intel.llm import QwenOv
13
+ return QwenOv()
14
 
15
+ def test_llm_zh(llm):
16
  report = []
17
  for src in read_translation("zh"):
18
  dst, time_cost = llm.translate(src, src_lang="zh", dst_lang="en")
 
21
  report.append([src, dst, time_cost])
22
  save_csv(REPORTS_DIR/"translation_zh.csv", ["src", "dst", "time"], report)
23
 
24
+ def test_llm_en(llm):
25
  report = []
26
  for src in read_translation("en"):
27
  dst, time_cost = llm.translate(src, src_lang="en", dst_lang="zh")
tests/test_models/test_tts.py CHANGED
@@ -1,20 +1,21 @@
1
  import pytest
2
- from lib.models.kokoro import KokoroTTS
3
  from test_data.texts import read_translation
4
  from lib.utils import save_csv
5
  from environment import REPORTS_DIR
6
 
7
 
8
  @pytest.fixture(scope="module")
9
- def llm(get_platform) -> KokoroTTS:
10
  if get_platform == "apple":
11
- pass
 
12
  elif get_platform == "intel":
13
- pass
 
14
 
15
 
16
- def test_tts_zh():
17
- tts = KokoroTTS.from_language("zh")
18
  report = []
19
  for text in read_translation("zh"):
20
  samples, sr, time_cost = tts.generate(text)
@@ -22,10 +23,10 @@ def test_tts_zh():
22
  save_csv(REPORTS_DIR / "tts_zh.csv", ["text", "time"], report)
23
 
24
 
25
- def test_tts_en():
26
- tts = KokoroTTS.from_language("en")
27
  report = []
28
  for text in read_translation("en"):
29
- samples, sr, time_cost = tts.generate(text)
30
  report.append([text, time_cost])
31
  save_csv(REPORTS_DIR / "tts_en.csv", ["text", "time"], report)
 
1
  import pytest
 
2
  from test_data.texts import read_translation
3
  from lib.utils import save_csv
4
  from environment import REPORTS_DIR
5
 
6
 
7
  @pytest.fixture(scope="module")
8
+ def tts(get_platform):
9
  if get_platform == "apple":
10
+ from lib.models.mac.kokoro import KokoroTTS
11
+ return KokoroTTS
12
  elif get_platform == "intel":
13
+ from lib.models.intel.kokoro import KokoroTTS
14
+ return KokoroTTS
15
 
16
 
17
+ def test_tts_zh(tts):
18
+ tts = tts.from_language("zh")
19
  report = []
20
  for text in read_translation("zh"):
21
  samples, sr, time_cost = tts.generate(text)
 
23
  save_csv(REPORTS_DIR / "tts_zh.csv", ["text", "time"], report)
24
 
25
 
26
+ def test_tts_en(tts):
27
+ tts = tts.from_language("en")
28
  report = []
29
  for text in read_translation("en"):
30
+ samples, sr, time_cost = tts.generate(text, speed=1.4)
31
  report.append([text, time_cost])
32
  save_csv(REPORTS_DIR / "tts_en.csv", ["text", "time"], report)
tests/test_models/test_whisper.py CHANGED
@@ -1,17 +1,18 @@
1
  import pytest
2
- from lib.models.whisper import WhisperCPP
3
  from lib.utils import read_audio, save_csv
4
  from test_data.audios import read_emilia
5
  from environment import REPORTS_DIR
6
 
7
  @pytest.fixture(scope="module")
8
- def whisper(get_platform)-> WhisperCPP:
9
  if get_platform == "apple":
 
10
  return WhisperCPP()
11
  elif get_platform == "intel":
12
- pass
 
13
 
14
- def test_inference(whisper: WhisperCPP):
15
  #TODO: 测试CER
16
  report = []
17
  for audio_file, text, duration in read_emilia(count_limit=100):
 
1
  import pytest
 
2
  from lib.utils import read_audio, save_csv
3
  from test_data.audios import read_emilia
4
  from environment import REPORTS_DIR
5
 
6
  @pytest.fixture(scope="module")
7
+ def whisper(get_platform):
8
  if get_platform == "apple":
9
+ from lib.models.mac.whisper import WhisperCPP
10
  return WhisperCPP()
11
  elif get_platform == "intel":
12
+ from lib.models.intel.whisper import WhisperOv
13
+ return WhisperOv()
14
 
15
+ def test_inference(whisper):
16
  #TODO: 测试CER
17
  report = []
18
  for audio_file, text, duration in read_emilia(count_limit=100):