liumaolin commited on
Commit
a28f7e3
·
1 Parent(s): a16e0e5

Extend TTS registry functionality and integrate default system configurations: implement prioritization logic, language preference handling, and fallback mechanisms in `TTSConfigRegistry`; refactor service factory and lifespan management to support dynamic TTS selection and initialization.

Browse files
src/VoiceDialogue/api/core/__init__.py CHANGED
@@ -1,13 +1,13 @@
1
  from .config import AppConfig, TTSConfigInitializer
2
  from .lifespan import lifespan, LifespanManager
3
- from .service_factories import ServiceFactories, get_service_definitions
4
  from .service_manager import ServiceManager, ServiceDefinition
5
 
6
  __all__ = [
7
  'ServiceManager',
8
  'ServiceDefinition',
9
  'ServiceFactories',
10
- 'get_service_definitions',
11
  'AppConfig',
12
  'TTSConfigInitializer',
13
  'lifespan',
 
1
  from .config import AppConfig, TTSConfigInitializer
2
  from .lifespan import lifespan, LifespanManager
3
+ from .service_factories import ServiceFactories, get_core_voice_service_definitions
4
  from .service_manager import ServiceManager, ServiceDefinition
5
 
6
  __all__ = [
7
  'ServiceManager',
8
  'ServiceDefinition',
9
  'ServiceFactories',
10
+ 'get_core_voice_service_definitions',
11
  'AppConfig',
12
  'TTSConfigInitializer',
13
  'lifespan',
src/VoiceDialogue/api/core/lifespan.py CHANGED
@@ -6,7 +6,7 @@ from fastapi import FastAPI
6
 
7
  from utils import get_system_language
8
  from .config import TTSConfigInitializer
9
- from .service_factories import get_service_definitions
10
  from .service_manager import ServiceManager
11
 
12
  logger = logging.getLogger(__name__)
@@ -34,7 +34,7 @@ class LifespanManager:
34
  self._update_app_state(tts_config)
35
 
36
  # 获取服务定义
37
- service_definitions = get_service_definitions(system_language)
38
 
39
  # 启动所有服务
40
  await self._start_all_services(service_definitions)
 
6
 
7
  from utils import get_system_language
8
  from .config import TTSConfigInitializer
9
+ from .service_factories import get_core_voice_service_definitions
10
  from .service_manager import ServiceManager
11
 
12
  logger = logging.getLogger(__name__)
 
34
  self._update_app_state(tts_config)
35
 
36
  # 获取服务定义
37
+ service_definitions = get_core_voice_service_definitions(system_language)
38
 
39
  # 启动所有服务
40
  await self._start_all_services(service_definitions)
src/VoiceDialogue/api/core/service_factories.py CHANGED
@@ -1,7 +1,7 @@
1
  from typing import Any
2
 
3
  from services.audio import EchoCancellingAudioCapture, TTSAudioGenerator, AudioStreamPlayer
4
- from services.audio.audio_generator import BaseTTSConfig
5
  from services.core.constants import (
6
  transcribed_text_queue, text_input_queue, audio_output_queue,
7
  audio_frames_queue, user_voice_queue
@@ -47,12 +47,15 @@ class ServiceFactories:
47
  )
48
 
49
  @staticmethod
50
- def create_tts_audio_generator(tts_speaker_config: BaseTTSConfig) -> TTSAudioGenerator:
51
  """创建TTS音频生成服务"""
 
 
 
52
  return TTSAudioGenerator(
53
  text_input_queue=text_input_queue,
54
  audio_output_queue=audio_output_queue,
55
- tts_config=tts_speaker_config
56
  )
57
 
58
  @staticmethod
@@ -60,39 +63,31 @@ class ServiceFactories:
60
  """创建音频播放服务"""
61
  return AudioStreamPlayer(audio_playing_queue=audio_output_queue)
62
 
63
- @staticmethod
64
- def create_tts_config_loader() -> Any:
65
- """创建TTS配置加载器的虚拟服务"""
66
-
67
- class TTSConfigLoader:
68
- def __init__(self):
69
- self.is_ready = False
70
- self._running = False
71
-
72
- def start(self):
73
- self._running = True
74
- self.is_ready = True
75
-
76
- def stop(self):
77
- self._running = False
78
-
79
- def is_alive(self):
80
- return self._running
81
-
82
- return TTSConfigLoader()
83
-
84
-
85
- def get_service_definitions(system_language: str) -> list:
86
- """获取服务定义配置"""
87
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  return [
89
- ServiceDefinition(
90
- name="tts_config_loader",
91
- factory=ServiceFactories.create_tts_config_loader,
92
- required=False,
93
- startup_timeout=10
94
- ),
95
-
 
 
96
  ServiceDefinition(
97
  name="speech_monitor",
98
  factory=ServiceFactories.create_speech_monitor,
@@ -100,21 +95,45 @@ def get_service_definitions(system_language: str) -> list:
100
  health_check=lambda service: hasattr(service, 'is_ready') and service.is_ready
101
  ),
102
 
 
103
  ServiceDefinition(
104
  name="asr_worker",
105
  factory=lambda: ServiceFactories.create_asr_worker(system_language),
106
  dependencies=["speech_monitor"]
107
  ),
108
 
 
109
  ServiceDefinition(
110
  name="llm_generator",
111
  factory=ServiceFactories.create_llm_generator,
112
- dependencies=["asr_worker"]
 
 
 
 
 
 
 
 
 
113
  ),
114
 
 
115
  ServiceDefinition(
116
  name="audio_player",
117
  factory=ServiceFactories.create_audio_player,
118
- dependencies=["llm_generator"]
119
  )
120
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from typing import Any
2
 
3
  from services.audio import EchoCancellingAudioCapture, TTSAudioGenerator, AudioStreamPlayer
4
+ from services.audio.audio_generator import BaseTTSConfig, tts_config_registry
5
  from services.core.constants import (
6
  transcribed_text_queue, text_input_queue, audio_output_queue,
7
  audio_frames_queue, user_voice_queue
 
47
  )
48
 
49
  @staticmethod
50
+ def create_tts_audio_generator(tts_config: BaseTTSConfig = None) -> TTSAudioGenerator:
51
  """创建TTS音频生成服务"""
52
+ if tts_config is None:
53
+ tts_config = tts_config_registry.get_default_config_for_system()
54
+
55
  return TTSAudioGenerator(
56
  text_input_queue=text_input_queue,
57
  audio_output_queue=audio_output_queue,
58
+ tts_config=tts_config
59
  )
60
 
61
  @staticmethod
 
63
  """创建音频播放服务"""
64
  return AudioStreamPlayer(audio_playing_queue=audio_output_queue)
65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
+ def get_core_voice_service_definitions(system_language: str, tts_config: BaseTTSConfig = None) -> list:
68
+ """
69
+ 获取核心语音对话服务定义配置
70
+
71
+ 这些服务构成完整的语音对话处理流水线:
72
+ 1. 音频捕获 -> 2. 语音监控 -> 3. 语音识别 -> 4. 文本生成 -> 5. 语音合成 -> 6. 音频播放
73
+
74
+ Args:
75
+ system_language: 系统默认语言
76
+ tts_config: TTS配置,如果为None则使用默认配置
77
+
78
+ Returns:
79
+ list: 服务定义列表
80
+ """
81
  return [
82
+ # # 音频捕获服务(最底层服务)
83
+ # ServiceDefinition(
84
+ # name="audio_capture",
85
+ # factory=ServiceFactories.create_audio_capture,
86
+ # dependencies=[],
87
+ # health_check=lambda service: hasattr(service, 'is_ready') and service.is_ready
88
+ # ),
89
+
90
+ # 语音状态监控服务
91
  ServiceDefinition(
92
  name="speech_monitor",
93
  factory=ServiceFactories.create_speech_monitor,
 
95
  health_check=lambda service: hasattr(service, 'is_ready') and service.is_ready
96
  ),
97
 
98
+ # ASR语音识别服务
99
  ServiceDefinition(
100
  name="asr_worker",
101
  factory=lambda: ServiceFactories.create_asr_worker(system_language),
102
  dependencies=["speech_monitor"]
103
  ),
104
 
105
+ # LLM文本生成服务
106
  ServiceDefinition(
107
  name="llm_generator",
108
  factory=ServiceFactories.create_llm_generator,
109
+ dependencies=["asr_worker"],
110
+ startup_timeout=60 # LLM服务启动较慢,增加超时时间
111
+ ),
112
+
113
+ # TTS音频生成服务
114
+ ServiceDefinition(
115
+ name="tts_audio_generator",
116
+ factory=lambda: ServiceFactories.create_tts_audio_generator(tts_config),
117
+ dependencies=["llm_generator"],
118
+ startup_timeout=45 # TTS模型加载较慢
119
  ),
120
 
121
+ # 音频播放服务(最终输出服务)
122
  ServiceDefinition(
123
  name="audio_player",
124
  factory=ServiceFactories.create_audio_player,
125
+ dependencies=["tts_audio_generator"]
126
  )
127
  ]
128
+
129
+
130
+ def get_service_health_checkers() -> dict:
131
+ """获取服务健康检查器映射"""
132
+ return {
133
+ "audio_capture": lambda service: hasattr(service, 'is_ready') and service.is_ready,
134
+ "speech_monitor": lambda service: hasattr(service, 'is_ready') and service.is_ready,
135
+ "asr_worker": lambda service: hasattr(service, 'is_ready') and service.is_ready,
136
+ "llm_generator": lambda service: hasattr(service, 'is_ready') and service.is_ready,
137
+ "tts_audio_generator": lambda service: hasattr(service, 'is_ready') and service.is_ready,
138
+ "audio_player": lambda service: hasattr(service, 'is_ready') and service.is_ready,
139
+ }
src/VoiceDialogue/services/audio/audio_generator/models/base.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import typing
2
  from abc import ABC, abstractmethod
3
  from enum import Enum
@@ -5,13 +6,13 @@ from pathlib import Path
5
 
6
  from pydantic import BaseModel
7
 
 
 
8
 
9
  class TTSConfigType(Enum):
10
  """TTS引擎类型枚举"""
11
  MOYOYO = 'moyoyo'
12
- EDGE_TTS = 'edge_tts'
13
- BARK = 'bark'
14
- # 可以添加更多TTS引擎
15
 
16
 
17
  class VoiceModelStatus(Enum):
@@ -57,6 +58,10 @@ class TTSConfigRegistry:
57
 
58
  def __init__(self):
59
  self._configs: dict[str, BaseTTSConfig] = {}
 
 
 
 
60
 
61
  def register_config(self, config: BaseTTSConfig):
62
  """注册TTS配置"""
@@ -77,6 +82,188 @@ class TTSConfigRegistry:
77
  """获取所有配置"""
78
  return list(self._configs.values())
79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
 
81
  # 全局TTS注册表实例
82
  tts_config_registry = TTSConfigRegistry()
 
1
+ import logging
2
  import typing
3
  from abc import ABC, abstractmethod
4
  from enum import Enum
 
6
 
7
  from pydantic import BaseModel
8
 
9
+ logger = logging.getLogger(__name__)
10
+
11
 
12
  class TTSConfigType(Enum):
13
  """TTS引擎类型枚举"""
14
  MOYOYO = 'moyoyo'
15
+ KOKORO = 'kokoro'
 
 
16
 
17
 
18
  class VoiceModelStatus(Enum):
 
58
 
59
  def __init__(self):
60
  self._configs: dict[str, BaseTTSConfig] = {}
61
+ self._priority_order = {
62
+ TTSConfigType.KOKORO: 1,
63
+ TTSConfigType.MOYOYO: 2,
64
+ }
65
 
66
  def register_config(self, config: BaseTTSConfig):
67
  """注册TTS配置"""
 
82
  """获取所有配置"""
83
  return list(self._configs.values())
84
 
85
+ def get_default_config(self, user_language: typing.Optional[typing.Literal['zh', 'en']] = None) -> typing.Optional[
86
+ BaseTTSConfig]:
87
+ """
88
+ 获取默认的TTS配置
89
+
90
+ 选择逻辑:
91
+ 1. 根据用户语言偏好选择对应的语音类型(中文/非中文)
92
+ 2. 优先选择已下载完整的模型
93
+ 3. 按照预定义的优先级顺序选择TTS类型
94
+ 4. 在同类型中优先选择匹配语言的语音
95
+ 5. 如果都没有完整模型,返回优先级最高且语言匹配的配置
96
+
97
+ Args:
98
+ user_language: 用户语言偏好,'zh'为中文,'en'为英文,None则自动检测系统语言
99
+
100
+ Returns:
101
+ BaseTTSConfig: 默认配置,如果没有任何配置则返回None
102
+ """
103
+ try:
104
+ # 如果没有指定用户语言,则自动检测系统语言
105
+ if user_language is None:
106
+ try:
107
+ from utils.system import get_system_language
108
+ user_language = get_system_language()
109
+ logger.info(f"自动检测到系统语言: {user_language}")
110
+ except ImportError:
111
+ logger.warning("无法导入系统语言检测模块,使用默认语言 'zh'")
112
+ user_language = 'zh'
113
+ except Exception as e:
114
+ logger.warning(f"系统语言检测失败: {e},使用默认语言 'zh'")
115
+ user_language = 'zh'
116
+
117
+ all_configs = self.get_all_configs()
118
+
119
+ if not all_configs:
120
+ logger.warning("没有找到任何TTS配置")
121
+ return None
122
+
123
+ # 确定语音偏好:中文系统偏好中文语音,非中文系统偏好非中文语音
124
+ prefer_chinese_voice = (user_language == 'zh')
125
+ logger.info(f"用户语言: {user_language}, 语音偏好: {'中文语音' if prefer_chinese_voice else '非中文语音'}")
126
+
127
+ # 首先尝试找到已完整下载且语言匹配的配置
128
+ complete_configs = [config for config in all_configs if config.is_model_complete()]
129
+
130
+ if complete_configs:
131
+ # 按语言偏好和优先级排序已完整的配置
132
+ selected_config = self._select_config_by_priority_and_language(complete_configs, prefer_chinese_voice)
133
+ logger.info(
134
+ f"选择已完整的默认TTS配置: {selected_config.tts_type.value}:{selected_config.character_name} "
135
+ f"(语音类型: {'中文' if selected_config.is_chinese_voice else '非中文'})")
136
+ return selected_config
137
+
138
+ # 如果没有完整的配置,选择优先级最高且语言匹配的配置
139
+ logger.warning("没有找到完整下载的TTS模型,选择优先级最高且语言匹配的配置")
140
+ fallback_config = self._select_config_by_priority_and_language(all_configs, prefer_chinese_voice)
141
+ logger.info(f"使用备选默认TTS配置: {fallback_config.tts_type.value}:{fallback_config.character_name} "
142
+ f"(语音类型: {'中文' if fallback_config.is_chinese_voice else '非中文'})")
143
+ return fallback_config
144
+
145
+ except Exception as e:
146
+ logger.error(f"获取默认TTS配置时发生错误: {e}", exc_info=True)
147
+ return None
148
+
149
+ def _select_config_by_priority_and_language(
150
+ self,
151
+ configs: list[BaseTTSConfig],
152
+ prefer_chinese_voice: bool
153
+ ) -> BaseTTSConfig:
154
+ """
155
+ 按优先级和语言偏好选择配置
156
+
157
+ Args:
158
+ configs: 配置列表
159
+ prefer_chinese_voice: 是否偏好中文语音
160
+
161
+ Returns:
162
+ BaseTTSConfig: 选中的配置
163
+ """
164
+ if not configs:
165
+ raise ValueError("配置列表不能为空")
166
+
167
+ # 按优先级和语言偏好排序
168
+ def sort_key(config: BaseTTSConfig):
169
+ # 优先级权重(数字越小优先级越高)
170
+ priority = self._priority_order.get(config.tts_type, 999)
171
+
172
+ # 语言匹配加分
173
+ # 如果偏好中文语音且配置是中文语音,或者偏好非中文语音且配置是非中文语音,则加分
174
+ language_match = (prefer_chinese_voice == config.is_chinese_voice)
175
+ language_bonus = 0 if language_match else 1
176
+
177
+ # 角色名称作为最后的排序条件
178
+ return (priority, language_bonus, config.character_name)
179
+
180
+ sorted_configs = sorted(configs, key=sort_key)
181
+ return sorted_configs[0]
182
+
183
+ def get_recommended_configs(self, max_count: int = 3,
184
+ user_language: typing.Optional[typing.Literal['zh', 'en']] = None) -> list[
185
+ BaseTTSConfig]:
186
+ """
187
+ 获取推荐的TTS配置列表
188
+
189
+ Args:
190
+ max_count: 最大返回数量
191
+ user_language: 用户语言偏好,'zh'为中文,'en'为英文,None则自动检测系统语言
192
+
193
+ Returns:
194
+ list[BaseTTSConfig]: 推荐配置列表
195
+ """
196
+ try:
197
+ # 如果没有指定用户语言,则自动检测系统语言
198
+ if user_language is None:
199
+ try:
200
+ from utils.system import get_system_language
201
+ user_language = get_system_language()
202
+ except (ImportError, Exception):
203
+ user_language = 'zh'
204
+
205
+ all_configs = self.get_all_configs()
206
+
207
+ if not all_configs:
208
+ return []
209
+
210
+ prefer_chinese_voice = (user_language == 'zh')
211
+
212
+ # 优先返回已完整下载的配置
213
+ complete_configs = [config for config in all_configs if config.is_model_complete()]
214
+
215
+ if complete_configs:
216
+ sorted_configs = sorted(complete_configs,
217
+ key=lambda c: (self._priority_order.get(c.tts_type, 999),
218
+ 0 if (prefer_chinese_voice == c.is_chinese_voice) else 1,
219
+ c.character_name))
220
+ return sorted_configs[:max_count]
221
+
222
+ # 如果没有完整配置,返回按优先级和语言偏好排序的配置
223
+ sorted_configs = sorted(all_configs,
224
+ key=lambda c: (self._priority_order.get(c.tts_type, 999),
225
+ 0 if (prefer_chinese_voice == c.is_chinese_voice) else 1,
226
+ c.character_name))
227
+ return sorted_configs[:max_count]
228
+
229
+ except Exception as e:
230
+ logger.error(f"获取推荐TTS配置时发生错误: {e}", exc_info=True)
231
+ return []
232
+
233
+ def get_default_config_for_system(self) -> typing.Optional[BaseTTSConfig]:
234
+ """
235
+ 为系统首次启动获取默认TTS配置
236
+
237
+ 专门用于系统首次启动时的场景,会自动检测系统语言并选择最合适的默认配置
238
+
239
+ Returns:
240
+ BaseTTSConfig: 系统默认配置
241
+ """
242
+ try:
243
+ from utils.system import get_system_language
244
+ system_language = get_system_language()
245
+ logger.info(f"系统首次启动,检测到系统语言: {system_language}")
246
+
247
+ default_config = self.get_default_config(user_language=system_language)
248
+
249
+ if default_config:
250
+ logger.info(
251
+ f"为系统首次启动选择默认TTS配置: {default_config.tts_type.value}:{default_config.character_name}")
252
+ # 记录配置详情,方便调试
253
+ logger.debug(f"默认配置详情: 语音类型={'中文' if default_config.is_chinese_voice else '非中文'}, "
254
+ f"模型完整性={'完整' if default_config.is_model_complete() else '未完整'}")
255
+ else:
256
+ logger.error("无法为系统首次启动选择默认TTS配置")
257
+
258
+ return default_config
259
+
260
+ except ImportError:
261
+ logger.warning("无法导入系统语言检测模块,使用中文作为默认语言")
262
+ return self.get_default_config(user_language='zh')
263
+ except Exception as e:
264
+ logger.error(f"为系统首次启动获取默认配置时发生错误: {e}", exc_info=True)
265
+ return self.get_default_config(user_language='zh')
266
+
267
 
268
  # 全局TTS注册表实例
269
  tts_config_registry = TTSConfigRegistry()