liumaolin commited on
Commit
ef0d09e
·
1 Parent(s): 025ca3f

Refactor TTS architecture: implement runtime interface, TTS manager, universal registry, and factory pattern to support multiple engines.

Browse files
src/VoiceDialogue/main.py CHANGED
@@ -7,7 +7,7 @@ from config.paths import load_third_party
7
 
8
  load_third_party()
9
 
10
- from models.voice_model import tts_config_registry, TTSConfigType
11
  from services.audio.aec_audio_capture import EchoCancellingAudioCapture
12
  from services.audio.audio_answer import TTSAudioGenerator
13
  from services.audio.audio_player import AudioStreamPlayer
@@ -73,7 +73,7 @@ def launch_system(
73
  audio_generator_worker = TTSAudioGenerator(
74
  processed_answer_queue=generated_answer_queue,
75
  tts_generated_audio_queue=tts_generated_audio_queue,
76
- voice_role=tts_speaker_config
77
  )
78
  audio_generator_worker.start()
79
  threads.append(audio_generator_worker)
 
7
 
8
  load_third_party()
9
 
10
+ from services.audio.audio_generator.models import tts_config_registry, TTSConfigType
11
  from services.audio.aec_audio_capture import EchoCancellingAudioCapture
12
  from services.audio.audio_answer import TTSAudioGenerator
13
  from services.audio.audio_player import AudioStreamPlayer
 
73
  audio_generator_worker = TTSAudioGenerator(
74
  processed_answer_queue=generated_answer_queue,
75
  tts_generated_audio_queue=tts_generated_audio_queue,
76
+ tts_config=tts_speaker_config
77
  )
78
  audio_generator_worker.start()
79
  threads.append(audio_generator_worker)
src/VoiceDialogue/models/voice_model/__init__.py DELETED
@@ -1,19 +0,0 @@
1
- from .base import TTSConfigType, VoiceModelStatus, tts_config_registry
2
- from .moyoyo_configs import get_moyoyo_configs
3
- from .moyoyo_tts import MoYoYoTTSConfig, MoYoYoTTSInference
4
-
5
- # 注册MoYoYo TTS
6
- moyoyo_inference = MoYoYoTTSInference()
7
- tts_config_registry.register_inference_engine(TTSConfigType.MOYOYO, moyoyo_inference)
8
-
9
- # 注册所有MoYoYo配置
10
- for config in get_moyoyo_configs():
11
- tts_config_registry.register_config(config)
12
-
13
- __all__ = [
14
- 'TTSConfigType',
15
- 'VoiceModelStatus',
16
- 'tts_config_registry',
17
- 'MoYoYoTTSConfig',
18
- 'MoYoYoTTSInference',
19
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/VoiceDialogue/services/audio/audio_answer.py CHANGED
@@ -1,56 +1,28 @@
1
  import time
2
- import typing
3
  from multiprocessing import Queue
4
  from queue import Empty
5
 
6
- from config.paths import load_third_party
7
-
8
- load_third_party()
9
-
10
- from moyoyo_tts import TTSModule, TTS_Config
11
-
12
  from models.voice_task import VoiceTask
13
  from services.core.base import BaseThread
14
  from services.core.constants import dropped_audio_cache, user_still_speaking_event, voice_state_manager
15
- from models.voice_model import MoYoYoTTSConfig
16
 
17
 
18
  class TTSAudioGenerator(BaseThread):
19
  """TTS 音频生成器 - 负责将文本转换为音频"""
20
 
21
  def __init__(self, group=None, target=None, name=None, args=(), kwargs={}, *, daemon=None,
22
- processed_answer_queue, tts_generated_audio_queue, voice_role: MoYoYoTTSConfig):
23
  super().__init__(group, target, name, args, kwargs, daemon=daemon)
24
  self.processed_answer_queue: Queue = processed_answer_queue
25
  self.tts_generated_audio_queue: Queue = tts_generated_audio_queue
26
 
27
- self._device = "cpu" # mps slower 11.66(cpu) vs 39.42(mps)
28
- self._tts_config = voice_role
29
- self.tts_module: typing.Optional[TTSModule] = None
30
-
31
- def setup_tts_config(self, voice_role: MoYoYoTTSConfig):
32
- tts_config = TTS_Config(voice_role.get_runtime_config())
33
- return tts_config
34
-
35
- def warmup(self, warmup_steps=1):
36
- print('[INFO:] Warming up TTS engine...')
37
- warmup_texts = ['Warming up TTS engine.', '预热文字转音频引擎。']
38
- for _ in range(warmup_steps):
39
- for warmup_text in warmup_texts:
40
- self.tts_module.generate_audio(warmup_text)
41
- print('[INFO:] Warm up TTS engine finished.')
42
 
43
  def run(self):
44
 
45
- tts_config = self.setup_tts_config(self._tts_config)
46
-
47
- self.tts_module = TTSModule(tts_config)
48
- self.tts_module.setup_inference_params(
49
- ref_audio=self._tts_config.reference_audio_path,
50
- parallel_infer=False,
51
- **self._tts_config.inference_parameters.model_dump()
52
- )
53
- self.warmup()
54
 
55
  self.is_ready = True
56
 
@@ -80,7 +52,7 @@ class TTSAudioGenerator(BaseThread):
80
  continue
81
 
82
  voice_task.tts_start_time = time.time()
83
- tts_generated_sentence_audio = self.tts_module.generate_audio(voice_task.answer_sentence)
84
  voice_task.tts_generated_sentence_audio = tts_generated_sentence_audio
85
  voice_task.tts_end_time = time.time()
86
  # print(f'生成音频:{voice_task.answer_sentence}')
 
1
  import time
 
2
  from multiprocessing import Queue
3
  from queue import Empty
4
 
 
 
 
 
 
 
5
  from models.voice_task import VoiceTask
6
  from services.core.base import BaseThread
7
  from services.core.constants import dropped_audio_cache, user_still_speaking_event, voice_state_manager
8
+ from .audio_generator import tts_manager, BaseTTSConfig
9
 
10
 
11
  class TTSAudioGenerator(BaseThread):
12
  """TTS 音频生成器 - 负责将文本转换为音频"""
13
 
14
  def __init__(self, group=None, target=None, name=None, args=(), kwargs={}, *, daemon=None,
15
+ processed_answer_queue, tts_generated_audio_queue, tts_config: BaseTTSConfig):
16
  super().__init__(group, target, name, args, kwargs, daemon=daemon)
17
  self.processed_answer_queue: Queue = processed_answer_queue
18
  self.tts_generated_audio_queue: Queue = tts_generated_audio_queue
19
 
20
+ self.tts_instance = tts_manager.create_tts(tts_config)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  def run(self):
23
 
24
+ self.tts_instance.setup()
25
+ self.tts_instance.warmup()
 
 
 
 
 
 
 
26
 
27
  self.is_ready = True
28
 
 
52
  continue
53
 
54
  voice_task.tts_start_time = time.time()
55
+ tts_generated_sentence_audio = self.tts_instance.synthesize(voice_task.answer_sentence)
56
  voice_task.tts_generated_sentence_audio = tts_generated_sentence_audio
57
  voice_task.tts_end_time = time.time()
58
  # print(f'生成音频:{voice_task.answer_sentence}')
src/VoiceDialogue/services/audio/audio_generator/__init__.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Audio Generator Module
3
+
4
+ 提供文本转语音(TTS)功能的完整解决方案,包括:
5
+ - TTS管理器和注册系统
6
+ - 多种TTS引擎支持
7
+ - 配置管理
8
+ - 运行时接口
9
+ """
10
+
11
+ from .models import (
12
+ TTSConfigType,
13
+ VoiceModelStatus,
14
+ tts_config_registry,
15
+ BaseTTSConfig
16
+ )
17
+ from .runtime import (
18
+ TTSInterface,
19
+ TTSFactory
20
+ )
21
+ from .tts_manager import (
22
+ TTSManager,
23
+ TTSRegistryTables,
24
+ tts_manager,
25
+ tts_tables,
26
+ register_all_tts
27
+ )
28
+
29
+ __version__ = "1.0.0"
30
+
31
+ __all__ = [
32
+ # 管理器和注册表
33
+ 'TTSManager',
34
+ 'TTSRegistryTables',
35
+ 'tts_manager',
36
+ 'tts_tables',
37
+ 'register_all_tts',
38
+
39
+ # 配置模型
40
+ 'TTSConfigType',
41
+ 'VoiceModelStatus',
42
+ 'tts_config_registry',
43
+ 'BaseTTSConfig',
44
+
45
+ # 运行时接口
46
+ 'TTSInterface',
47
+ 'TTSFactory',
48
+ ]
49
+
50
+ # 模块初始化时自动注册所有TTS实现
51
+ # register_all_tts() 已在 tts_manager 模块中自动调用
src/VoiceDialogue/services/audio/audio_generator/configs/__init__.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Configs Module
3
+
4
+ TTS配置模块,包含:
5
+ - 各种TTS引擎的预配置
6
+ - 配置加载函数
7
+ """
8
+
9
+ # 导入配置加载函数
10
+ try:
11
+ from .moyoyo import get_moyoyo_configs
12
+
13
+ __all__ = [
14
+ 'get_moyoyo_configs',
15
+ ]
16
+
17
+ # 配置获取函数映射
18
+ CONFIG_GETTERS = {
19
+ 'moyoyo': get_moyoyo_configs,
20
+ }
21
+
22
+ except ImportError as e:
23
+ import logging
24
+
25
+ logging.warning(f"Failed to import some config modules: {e}")
26
+ __all__ = []
27
+ CONFIG_GETTERS = {}
28
+
29
+
30
+ def get_all_configs():
31
+ """获取所有可用的TTS配置"""
32
+ all_configs = []
33
+ for getter_func in CONFIG_GETTERS.values():
34
+ try:
35
+ configs = getter_func()
36
+ all_configs.extend(configs)
37
+ except Exception as e:
38
+ import logging
39
+ logging.error(f"Failed to load configs from {getter_func.__name__}: {e}")
40
+ return all_configs
41
+
42
+
43
+ def get_configs_by_type(tts_type: str):
44
+ """根据TTS类型获取配置"""
45
+ if tts_type in CONFIG_GETTERS:
46
+ try:
47
+ return CONFIG_GETTERS[tts_type]()
48
+ except Exception as e:
49
+ import logging
50
+ logging.error(f"Failed to load configs for {tts_type}: {e}")
51
+ return []
52
+ return []
src/VoiceDialogue/{models/voice_model/moyoyo_configs.py → services/audio/audio_generator/configs/moyoyo.py} RENAMED
@@ -1,4 +1,4 @@
1
- from .moyoyo_tts import MoYoYoTTSConfig
2
 
3
  # 基础预训练模型文件映射
4
  BASE_PRETRAINED_FILES = {
 
1
+ from services.audio.audio_generator.models.moyoyo import MoYoYoTTSConfig
2
 
3
  # 基础预训练模型文件映射
4
  BASE_PRETRAINED_FILES = {
src/VoiceDialogue/services/audio/audio_generator/models/__init__.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Models Module
3
+
4
+ TTS模型定义模块,包含:
5
+ - 基础配置抽象类
6
+ - 各种TTS引擎的配置模型
7
+ - 全局配置注册表
8
+ """
9
+
10
+ from .base import (
11
+ TTSConfigType,
12
+ VoiceModelStatus,
13
+ BaseTTSConfig,
14
+ TTSConfigRegistry,
15
+ tts_config_registry
16
+ )
17
+
18
+ # 导入具体的配置模型
19
+ try:
20
+ from .moyoyo import MoYoYoTTSConfig
21
+
22
+ _moyoyo_available = True
23
+ except ImportError:
24
+ _moyoyo_available = False
25
+ import logging
26
+
27
+ logging.warning("MoYoYo TTS config not available")
28
+
29
+ # 动态构建导出列表
30
+ __all__ = [
31
+ 'TTSConfigType',
32
+ 'VoiceModelStatus',
33
+ 'BaseTTSConfig',
34
+ 'TTSConfigRegistry',
35
+ 'tts_config_registry',
36
+ ]
37
+
38
+ if _moyoyo_available:
39
+ __all__.append('MoYoYoTTSConfig')
40
+
41
+
42
+ # 自动注册所有可用的配置
43
+ def _auto_register_configs():
44
+ """自动注册所有TTS配置"""
45
+ try:
46
+ if _moyoyo_available:
47
+ from ..configs.moyoyo import get_moyoyo_configs
48
+ for config in get_moyoyo_configs():
49
+ tts_config_registry.register_config(config)
50
+ except Exception as e:
51
+ import logging
52
+ logging.error(f"Failed to auto-register configs: {e}")
53
+
54
+
55
+ # 模块加载时自动注册配置
56
+ _auto_register_configs()
57
+
58
+
59
+ # 配置统计信息
60
+ def get_config_stats():
61
+ """获取配置统计信息"""
62
+ all_configs = tts_config_registry.get_all_configs()
63
+ stats = {
64
+ 'total_configs': len(all_configs),
65
+ 'configs_by_type': {}
66
+ }
67
+
68
+ for config_type in TTSConfigType:
69
+ type_configs = tts_config_registry.get_configs_by_type(config_type)
70
+ stats['configs_by_type'][config_type.value] = len(type_configs)
71
+
72
+ return stats
src/VoiceDialogue/{models/voice_model → services/audio/audio_generator/models}/base.py RENAMED
@@ -52,36 +52,17 @@ class BaseTTSConfig(BaseModel, ABC):
52
  pass
53
 
54
 
55
- class BaseTTSInference(ABC):
56
- """TTS推理基类"""
57
-
58
- @abstractmethod
59
- def generate_speech(self, text: str, config: BaseTTSConfig, **kwargs) -> bytes:
60
- """生成语音"""
61
- pass
62
-
63
- @abstractmethod
64
- def is_supported_config(self, config: BaseTTSConfig) -> bool:
65
- """检查是否支持此配置"""
66
- pass
67
-
68
-
69
  class TTSConfigRegistry:
70
  """TTS注册表,管理所有TTS引擎和配置"""
71
 
72
  def __init__(self):
73
  self._configs: dict[str, BaseTTSConfig] = {}
74
- self._inference_engines: dict[TTSConfigType, BaseTTSInference] = {}
75
 
76
  def register_config(self, config: BaseTTSConfig):
77
  """注册TTS配置"""
78
  key = f"{config.tts_type.value}:{config.character_name}"
79
  self._configs[key] = config
80
 
81
- def register_inference_engine(self, tts_type: TTSConfigType, engine: BaseTTSInference):
82
- """注册TTS推理引擎"""
83
- self._inference_engines[tts_type] = engine
84
-
85
  def get_config(self, tts_type: TTSConfigType, character_name: str) -> BaseTTSConfig:
86
  """获取指定配置"""
87
  key = f"{tts_type.value}:{character_name}"
@@ -96,24 +77,6 @@ class TTSConfigRegistry:
96
  """获取所有配置"""
97
  return list(self._configs.values())
98
 
99
- def get_inference_engine(self, tts_type: TTSConfigType) -> BaseTTSInference:
100
- """获取推理引擎"""
101
- return self._inference_engines.get(tts_type)
102
-
103
- def generate_speech(self, tts_type: TTSConfigType, character_name: str,
104
- text: str, **kwargs) -> bytes:
105
- """生成语音的统一接口"""
106
- config = self.get_config(tts_type, character_name)
107
- engine = self.get_inference_engine(tts_type)
108
-
109
- if not config or not engine:
110
- raise ValueError(f"TTS配置或引擎不存在: {tts_type.value}:{character_name}")
111
-
112
- if not engine.is_supported_config(config):
113
- raise ValueError(f"推理引擎不支持此配置: {tts_type.value}:{character_name}")
114
-
115
- return engine.generate_speech(text, config, **kwargs)
116
-
117
 
118
  # 全局TTS注册表实例
119
  tts_config_registry = TTSConfigRegistry()
 
52
  pass
53
 
54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  class TTSConfigRegistry:
56
  """TTS注册表,管理所有TTS引擎和配置"""
57
 
58
  def __init__(self):
59
  self._configs: dict[str, BaseTTSConfig] = {}
 
60
 
61
  def register_config(self, config: BaseTTSConfig):
62
  """注册TTS配置"""
63
  key = f"{config.tts_type.value}:{config.character_name}"
64
  self._configs[key] = config
65
 
 
 
 
 
66
  def get_config(self, tts_type: TTSConfigType, character_name: str) -> BaseTTSConfig:
67
  """获取指定配置"""
68
  key = f"{tts_type.value}:{character_name}"
 
77
  """获取所有配置"""
78
  return list(self._configs.values())
79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
 
81
  # 全局TTS注册表实例
82
  tts_config_registry = TTSConfigRegistry()
src/VoiceDialogue/{models/voice_model/moyoyo_tts.py → services/audio/audio_generator/models/moyoyo.py} RENAMED
@@ -6,7 +6,7 @@ from pydantic import BaseModel, Field
6
 
7
  from config.settings import settings
8
  from utils.download_utils import download_file_from_huggingface
9
- from .base import BaseTTSConfig, BaseTTSInference, TTSConfigType, VoiceModelStatus
10
 
11
 
12
  class InferenceParameters(BaseModel):
@@ -140,20 +140,3 @@ class MoYoYoTTSConfig(BaseTTSConfig):
140
  'bert_base_path': self.bert_model_path,
141
  }
142
  }
143
-
144
-
145
- class MoYoYoTTSInference(BaseTTSInference):
146
- """MoYoYo TTS推理引擎"""
147
-
148
- def generate_speech(self, text: str, config: BaseTTSConfig, **kwargs) -> bytes:
149
- """生成语音"""
150
- if not isinstance(config, MoYoYoTTSConfig):
151
- raise ValueError("配置类型不匹配,需要MoYoYoTTSConfig")
152
-
153
- # 这里实现MoYoYo TTS的具体推理逻辑
154
- # 暂时返回空字节,实际实现需要调用相应的TTS模型
155
- return b""
156
-
157
- def is_supported_config(self, config: BaseTTSConfig) -> bool:
158
- """检查是否支持此配置"""
159
- return isinstance(config, MoYoYoTTSConfig)
 
6
 
7
  from config.settings import settings
8
  from utils.download_utils import download_file_from_huggingface
9
+ from .base import BaseTTSConfig, TTSConfigType, VoiceModelStatus
10
 
11
 
12
  class InferenceParameters(BaseModel):
 
140
  'bert_base_path': self.bert_model_path,
141
  }
142
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/VoiceDialogue/services/audio/audio_generator/runtime/__init__.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Runtime Module
3
+
4
+ TTS运行时模块,包含:
5
+ - TTS抽象接口定义
6
+ - TTS工厂类
7
+ - 具体TTS实现
8
+ """
9
+
10
+ from .interface import TTSInterface, TTSFactory
11
+
12
+ # 导入所有TTS实现,确保注册装饰器被执行
13
+ try:
14
+ from .moyoyo import MoYoYoTTS
15
+
16
+ __all__ = [
17
+ 'TTSInterface',
18
+ 'TTSFactory',
19
+ 'MoYoYoTTS'
20
+ ]
21
+ except ImportError as e:
22
+ # 如果某些TTS实现无法导入,不影响整体功能
23
+ import logging
24
+
25
+ logging.warning(f"Failed to import some TTS implementations: {e}")
26
+ __all__ = [
27
+ 'TTSInterface',
28
+ 'TTSFactory'
29
+ ]
30
+
31
+ # 可用的TTS实现列表
32
+ AVAILABLE_TTS_IMPLEMENTATIONS = [impl for impl in __all__ if impl.endswith('TTS')]
src/VoiceDialogue/services/audio/audio_generator/runtime/interface.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from abc import ABC, abstractmethod
2
+ from typing import Tuple
3
+
4
+ import numpy as np
5
+
6
+ from ..models.base import BaseTTSConfig
7
+
8
+
9
+ class TTSInterface(ABC):
10
+ """TTS服务的抽象接口"""
11
+
12
+ def __init__(self, config: BaseTTSConfig):
13
+ self.config = config
14
+ self._is_ready = False
15
+
16
+ @abstractmethod
17
+ def setup(self, **kwargs) -> None:
18
+ """
19
+ 初始化TTS服务
20
+
21
+ Args:
22
+ **kwargs: 额外的初始化参数
23
+ """
24
+ pass
25
+
26
+ @abstractmethod
27
+ def warmup(self, warmup_steps: int = 1) -> None:
28
+ """
29
+ 预热TTS引擎
30
+
31
+ Args:
32
+ warmup_steps: 预热步数
33
+ """
34
+ pass
35
+
36
+ @abstractmethod
37
+ def synthesize(self, text: str, **kwargs) -> Tuple[int, np.ndarray]:
38
+ """
39
+ 将文本转换为语音
40
+
41
+ Args:
42
+ text: 要转换的文本
43
+ **kwargs: 额外的合成参数
44
+
45
+ Returns:
46
+ Tuple[np.ndarray, int]: (音频数据, 采样率)
47
+ """
48
+ pass
49
+
50
+ @property
51
+ def is_ready(self) -> bool:
52
+ """
53
+ 检查TTS服务是否准备就绪
54
+
55
+ Returns:
56
+ bool: 是否准备就绪
57
+ """
58
+ return self._is_ready
59
+
60
+ @is_ready.setter
61
+ def is_ready(self, value: bool):
62
+ self._is_ready = value
63
+
64
+ def get_config(self) -> BaseTTSConfig:
65
+ """获取当前配置"""
66
+ return self.config
67
+
68
+
69
+ class TTSFactory:
70
+ """TTS工厂类,用于创建不同的TTS实现"""
71
+
72
+ _registry = {}
73
+
74
+ @classmethod
75
+ def register(cls, provider_name: str, tts_class):
76
+ """注册TTS提供者"""
77
+ cls._registry[provider_name] = tts_class
78
+
79
+ @classmethod
80
+ def create(cls, config: BaseTTSConfig) -> TTSInterface:
81
+ """
82
+ 根据配置创建TTS实例
83
+
84
+ Args:
85
+ config: TTS配置
86
+
87
+ Returns:
88
+ TTSInterface: TTS实例
89
+
90
+ Raises:
91
+ ValueError: 不支持的TTS提供者
92
+ """
93
+ provider = config.provider.value
94
+ if provider not in cls._registry:
95
+ raise ValueError(f"不支持的TTS提供者: {provider}")
96
+
97
+ tts_class = cls._registry[provider]
98
+ return tts_class(config)
99
+
100
+ @classmethod
101
+ def list_providers(cls):
102
+ """列出所有已注册的TTS提供者"""
103
+ return list(cls._registry.keys())
src/VoiceDialogue/services/audio/audio_generator/runtime/moyoyo.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import typing
2
+ from typing import Tuple
3
+
4
+ import numpy as np
5
+
6
+ from config.paths import load_third_party
7
+ from .interface import TTSInterface
8
+ from ..models.moyoyo import MoYoYoTTSConfig
9
+ from ..tts_manager import tts_tables
10
+
11
+ load_third_party()
12
+
13
+ from moyoyo_tts import TTSModule, TTS_Config
14
+
15
+
16
+ @tts_tables.register("tts_classes", "moyoyo")
17
+ class MoYoYoTTS(TTSInterface):
18
+ """MoYoYo TTS实现"""
19
+
20
+ def __init__(self, config: MoYoYoTTSConfig):
21
+ super().__init__(config)
22
+ self.tts_module: typing.Optional[TTSModule] = None
23
+
24
+ def setup(self, **kwargs) -> None:
25
+ """设置TTS模块"""
26
+ tts_config = TTS_Config(self.config.get_runtime_config())
27
+ self.tts_module = TTSModule(tts_config)
28
+ self.tts_module.setup_inference_params(
29
+ ref_audio=self.config.reference_audio_path,
30
+ parallel_infer=False,
31
+ **self.config.inference_parameters.model_dump()
32
+ )
33
+ self.is_ready = True
34
+
35
+ def warmup(self, warmup_steps: int = 1) -> None:
36
+ """预热TTS引擎"""
37
+ print('[INFO:] Warming up TTS engine...')
38
+ warmup_texts = ['Warming up TTS engine.', '预热文字转音频引擎。']
39
+ for _ in range(warmup_steps):
40
+ for warmup_text in warmup_texts:
41
+ self.tts_module.generate_audio(warmup_text, warmup=True)
42
+ print('[INFO:] Warm up TTS engine finished.')
43
+
44
+ def synthesize(self, text: str, **kwargs) -> Tuple[np.ndarray, int]:
45
+ """合成语音"""
46
+ if not self.is_ready:
47
+ raise RuntimeError("TTS module is not ready. Please call setup() first.")
48
+
49
+ (sample_rate, audio_data), *_ = self.tts_module.generate_audio(text)
50
+ return audio_data, sample_rate
src/VoiceDialogue/services/audio/audio_generator/tts_manager.py ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import inspect
3
+ from dataclasses import dataclass
4
+ import re
5
+ from typing import Dict, Type, Optional
6
+ from .runtime.interface import TTSInterface
7
+ from .models.base import BaseTTSConfig, TTSConfigType
8
+
9
+
10
+ @dataclass
11
+ class TTSRegistryTables:
12
+ """TTS注册表系统,用于管理不同的TTS实现"""
13
+
14
+ tts_classes: Dict[str, Type[TTSInterface]] = None
15
+
16
+ def __post_init__(self):
17
+ if self.tts_classes is None:
18
+ self.tts_classes = {}
19
+
20
+ def print(self, key: str = None) -> None:
21
+ """打印已注册的TTS类"""
22
+ print("\nTTS Registry Tables: \n")
23
+ headers = ["register name", "class name", "class location"]
24
+
25
+ if self.tts_classes and (key is None or "tts_classes" in key):
26
+ print(f"----------- ** tts_classes ** --------------")
27
+ metas = []
28
+ for register_key, tts_class in self.tts_classes.items():
29
+ class_file = inspect.getfile(tts_class)
30
+ class_line = inspect.getsourcelines(tts_class)[1]
31
+ # 简化路径显示
32
+ pattern = r"^.+/VoiceDialogue/"
33
+ class_file = re.sub(pattern, "VoiceDialogue/", class_file)
34
+ meta_data = [
35
+ register_key,
36
+ tts_class.__name__,
37
+ f"{class_file}:{class_line}",
38
+ ]
39
+ metas.append(meta_data)
40
+
41
+ metas.sort(key=lambda x: x[0])
42
+ data = [headers] + metas
43
+ col_widths = [max(len(str(item)) for item in col) for col in zip(*data)]
44
+
45
+ for row in data:
46
+ print(
47
+ "| "
48
+ + " | ".join(str(item).ljust(width) for item, width in zip(row, col_widths))
49
+ + " |"
50
+ )
51
+ print("\n")
52
+
53
+ def register(self, register_table_key: str, key: str = None) -> callable:
54
+ """装饰器,用于注册TTS类"""
55
+
56
+ def decorator(target_class):
57
+ if not hasattr(self, register_table_key):
58
+ setattr(self, register_table_key, {})
59
+ logging.debug(f"New TTS registry table added: {register_table_key}")
60
+
61
+ registry = getattr(self, register_table_key)
62
+ registry_key = key if key is not None else target_class.__name__
63
+
64
+ if registry_key in registry:
65
+ logging.debug(
66
+ f"Key {registry_key} already exists in {register_table_key}, re-register"
67
+ )
68
+
69
+ registry[registry_key] = target_class
70
+ logging.info(f"Registered TTS class: {registry_key} -> {target_class.__name__}")
71
+ return target_class
72
+
73
+ return decorator
74
+
75
+
76
+ # 全局TTS注册表实例
77
+ tts_tables = TTSRegistryTables()
78
+
79
+
80
+ class TTSManager:
81
+ """TTS管理器,负责管理和创建TTS实例"""
82
+
83
+ def __init__(self):
84
+ self._tts_instances: Dict[str, TTSInterface] = {}
85
+
86
+ def create_tts(self, config: BaseTTSConfig) -> TTSInterface:
87
+ """
88
+ 根据配置创建TTS实例
89
+
90
+ Args:
91
+ config: TTS配置对象
92
+
93
+ Returns:
94
+ TTSInterface: TTS实例
95
+
96
+ Raises:
97
+ ValueError: 如果TTS类型未注册
98
+ """
99
+ tts_type = config.tts_type.value
100
+
101
+ if tts_type not in tts_tables.tts_classes:
102
+ raise ValueError(f"未注册的TTS类型: {tts_type}. 可用类型: {list(tts_tables.tts_classes.keys())}")
103
+
104
+ tts_class = tts_tables.tts_classes[tts_type]
105
+ return tts_class(config)
106
+
107
+ def get_or_create_tts(self, config: BaseTTSConfig) -> TTSInterface:
108
+ """
109
+ 获取或创建TTS实例(单例模式)
110
+
111
+ Args:
112
+ config: TTS配置对象
113
+
114
+ Returns:
115
+ TTSInterface: TTS实例
116
+ """
117
+ instance_key = f"{config.tts_type.value}:{config.character_name}"
118
+
119
+ if instance_key not in self._tts_instances:
120
+ self._tts_instances[instance_key] = self.create_tts(config)
121
+
122
+ return self._tts_instances[instance_key]
123
+
124
+ def list_registered_tts(self) -> Dict[str, Type[TTSInterface]]:
125
+ """列出所有已注册的TTS类"""
126
+ return tts_tables.tts_classes.copy()
127
+
128
+ def is_tts_registered(self, tts_type: str) -> bool:
129
+ """检查指定TTS类型是否已注册"""
130
+ return tts_type in tts_tables.tts_classes
131
+
132
+ def print_registry(self):
133
+ """打印注册表信息"""
134
+ tts_tables.print()
135
+
136
+
137
+ # 全局TTS管理器实例
138
+ tts_manager = TTSManager()
139
+
140
+
141
+ def register_all_tts():
142
+ """自动发现并注册runtime目录中的所有TTS实现"""
143
+ import os
144
+ import importlib
145
+ from pathlib import Path
146
+
147
+ # 获取runtime目录路径
148
+ runtime_dir = Path(__file__).parent / "runtime"
149
+
150
+ # 扫描runtime目录中的Python文件
151
+ for py_file in runtime_dir.glob("*.py"):
152
+ if py_file.name in ["__init__.py", "interface.py"]:
153
+ continue
154
+
155
+ module_name = py_file.stem
156
+ try:
157
+ # 动态导入模块
158
+ module = importlib.import_module(f".runtime.{module_name}",
159
+ package="VoiceDialogue.services.audio.audio_generator")
160
+ logging.info(f"Successfully imported TTS module: {module_name}")
161
+ except ImportError as e:
162
+ logging.warning(f"Failed to import TTS module {module_name}: {e}")
163
+ except Exception as e:
164
+ logging.error(f"Unexpected error importing TTS module {module_name}: {e}")
165
+
166
+
167
+ # 在模块导入时自动注册所有TTS
168
+ register_all_tts()
src/VoiceDialogue/services/audio/audio_player.py CHANGED
@@ -70,7 +70,8 @@ class AudioStreamPlayer(BaseThread):
70
 
71
  voice_state_manager.set_audio_playing(task_id)
72
  voice_state_manager.reset_task_id()
73
- self.playing_audio(voice_task.tts_generated_sentence_audio)
 
74
 
75
  if self.audio_playing_queue.empty():
76
  print(f'回答播放完了')
@@ -90,11 +91,9 @@ class AudioStreamPlayer(BaseThread):
90
 
91
  chat_history_cache[voice_task.session_id] = chat_history
92
 
93
- def playing_audio(self, tts_generated_audio):
94
- audio_data = tts_generated_audio[0][1]
95
- samplerate = tts_generated_audio[0][0]
96
  with tempfile.NamedTemporaryFile('w+b', suffix='.wav') as soundfile:
97
  # print(f'================soundfile : {soundfile.name}')
98
- sf.write(soundfile, audio_data, samplerate=samplerate, subtype='PCM_16', closefd=False)
99
  # print(soundfile.name)
100
  playsound(soundfile.name, block=True)
 
70
 
71
  voice_state_manager.set_audio_playing(task_id)
72
  voice_state_manager.reset_task_id()
73
+ audio_data, sample_rate = voice_task.tts_generated_sentence_audio
74
+ self.playing_audio(audio_data, sample_rate)
75
 
76
  if self.audio_playing_queue.empty():
77
  print(f'回答播放完了')
 
91
 
92
  chat_history_cache[voice_task.session_id] = chat_history
93
 
94
+ def playing_audio(self, audio_data, sample_rate=16000):
 
 
95
  with tempfile.NamedTemporaryFile('w+b', suffix='.wav') as soundfile:
96
  # print(f'================soundfile : {soundfile.name}')
97
+ sf.write(soundfile, audio_data, samplerate=sample_rate, subtype='PCM_16', closefd=False)
98
  # print(soundfile.name)
99
  playsound(soundfile.name, block=True)