liumaolin
commited on
Commit
·
cf355e6
1
Parent(s):
1ae18a4
Update TTS speaker configuration: replace static mapping with dynamic retrieval, add available speaker listing, and update CLI argument parsing for improved flexibility and maintainability.
Browse files- src/VoiceDialogue/main.py +116 -34
src/VoiceDialogue/main.py
CHANGED
|
@@ -87,19 +87,13 @@ def launch_system(
|
|
| 87 |
answer_generator_worker.start()
|
| 88 |
threads.append(answer_generator_worker)
|
| 89 |
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
'马云': 'Ma Yun',
|
| 97 |
-
}
|
| 98 |
-
role = speaker_mapping.get(speaker)
|
| 99 |
-
if role is None:
|
| 100 |
-
raise ValueError(f"不支持的TTS配置: {speaker}")
|
| 101 |
|
| 102 |
-
tts_speaker_config = tts_config_registry.get_config(TTSConfigType.MOYOYO, role)
|
| 103 |
audio_generator_worker = TTSAudioGenerator(
|
| 104 |
text_input_queue=text_input_queue,
|
| 105 |
audio_output_queue=audio_output_queue,
|
|
@@ -121,37 +115,100 @@ def launch_system(
|
|
| 121 |
thread.join()
|
| 122 |
|
| 123 |
|
| 124 |
-
def
|
| 125 |
"""
|
| 126 |
-
|
|
|
|
|
|
|
|
|
|
| 127 |
|
| 128 |
Args:
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
|
|
|
| 132 |
"""
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 147 |
|
| 148 |
|
| 149 |
def create_argument_parser():
|
| 150 |
"""创建命令行参数解析器"""
|
|
|
|
|
|
|
|
|
|
| 151 |
parser = argparse.ArgumentParser(
|
| 152 |
description="VoiceDialogue - 语音对话系统",
|
| 153 |
formatter_class=argparse.RawDescriptionHelpFormatter,
|
| 154 |
-
epilog="""
|
| 155 |
示例用法:
|
| 156 |
# 启动命令行模式(默认)
|
| 157 |
python main.py
|
|
@@ -169,7 +226,7 @@ def create_argument_parser():
|
|
| 169 |
python main.py --mode api --port 8000 --reload
|
| 170 |
|
| 171 |
支持的说话人:
|
| 172 |
-
|
| 173 |
"""
|
| 174 |
)
|
| 175 |
|
|
@@ -191,8 +248,8 @@ def create_argument_parser():
|
|
| 191 |
)
|
| 192 |
cli_group.add_argument(
|
| 193 |
'--speaker', '-s',
|
| 194 |
-
choices=
|
| 195 |
-
default='沈逸',
|
| 196 |
help='TTS说话人 (默认: 沈逸)'
|
| 197 |
)
|
| 198 |
|
|
@@ -218,6 +275,31 @@ def create_argument_parser():
|
|
| 218 |
return parser
|
| 219 |
|
| 220 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 221 |
def main():
|
| 222 |
"""
|
| 223 |
主程序入口函数
|
|
|
|
| 87 |
answer_generator_worker.start()
|
| 88 |
threads.append(answer_generator_worker)
|
| 89 |
|
| 90 |
+
# 动态获取TTS配置,而不是使用固定映射
|
| 91 |
+
tts_speaker_config = _get_tts_config_by_speaker_name(speaker)
|
| 92 |
+
if tts_speaker_config is None:
|
| 93 |
+
# 如果找不到指定说话人,列出所有可用说话人并抛出异常
|
| 94 |
+
available_speakers = _get_available_speaker_names()
|
| 95 |
+
raise ValueError(f"不支持的TTS说话人: {speaker}。可用说话人: {', '.join(available_speakers)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
|
|
|
|
| 97 |
audio_generator_worker = TTSAudioGenerator(
|
| 98 |
text_input_queue=text_input_queue,
|
| 99 |
audio_output_queue=audio_output_queue,
|
|
|
|
| 115 |
thread.join()
|
| 116 |
|
| 117 |
|
| 118 |
+
def _get_tts_config_by_speaker_name(speaker_name: str):
|
| 119 |
"""
|
| 120 |
+
根据说话人名称获取TTS配置
|
| 121 |
+
|
| 122 |
+
支持中文名称和英文名称,优先匹配中文名称映射,
|
| 123 |
+
如果找不到则直接使用英文名称搜索
|
| 124 |
|
| 125 |
Args:
|
| 126 |
+
speaker_name (str): 说话人名称
|
| 127 |
+
|
| 128 |
+
Returns:
|
| 129 |
+
BaseTTSConfig: TTS配置,如果找不到则返回None
|
| 130 |
"""
|
| 131 |
+
# 中文名称到英文名称的映射(保持向后兼容)
|
| 132 |
+
chinese_to_english_mapping = {
|
| 133 |
+
'罗翔': 'Luo Xiang',
|
| 134 |
+
'马保国': 'Ma Baoguo',
|
| 135 |
+
'沈逸': 'Shen Yi',
|
| 136 |
+
'杨幂': 'Yang Mi',
|
| 137 |
+
'周杰伦': 'Zhou Jielun',
|
| 138 |
+
'马云': 'Ma Yun',
|
| 139 |
+
}
|
| 140 |
+
|
| 141 |
+
# 首先尝试中文名称映射
|
| 142 |
+
english_name = chinese_to_english_mapping.get(speaker_name, speaker_name)
|
| 143 |
+
|
| 144 |
+
# 获取所有可用配置
|
| 145 |
+
all_configs = tts_config_registry.get_all_configs()
|
| 146 |
+
|
| 147 |
+
# 搜索匹配的配置
|
| 148 |
+
for config in all_configs:
|
| 149 |
+
if config.character_name == english_name:
|
| 150 |
+
return config
|
| 151 |
+
|
| 152 |
+
# 如果通过映射找不到,尝试直接匹配输入的名称
|
| 153 |
+
if speaker_name != english_name:
|
| 154 |
+
for config in all_configs:
|
| 155 |
+
if config.character_name == speaker_name:
|
| 156 |
+
return config
|
| 157 |
+
|
| 158 |
+
return None
|
| 159 |
|
| 160 |
+
|
| 161 |
+
def _get_available_speaker_names():
|
| 162 |
+
"""
|
| 163 |
+
获取所有可用的说话人名称列表
|
| 164 |
+
|
| 165 |
+
Returns:
|
| 166 |
+
list[str]: 包含中文显示名称和英文原始名称的列表
|
| 167 |
+
"""
|
| 168 |
+
# 中文显示名称映射
|
| 169 |
+
english_to_chinese_mapping = {
|
| 170 |
+
'Luo Xiang': '罗翔',
|
| 171 |
+
'Ma Baoguo': '马保国',
|
| 172 |
+
'Shen Yi': '沈逸',
|
| 173 |
+
'Yang Mi': '杨幂',
|
| 174 |
+
'Zhou Jielun': '周杰伦',
|
| 175 |
+
'Ma Yun': '马云',
|
| 176 |
+
}
|
| 177 |
+
|
| 178 |
+
all_configs = tts_config_registry.get_all_configs()
|
| 179 |
+
speaker_names = []
|
| 180 |
+
|
| 181 |
+
for config in all_configs:
|
| 182 |
+
# 优先显示中文名称
|
| 183 |
+
chinese_name = english_to_chinese_mapping.get(config.character_name)
|
| 184 |
+
if chinese_name:
|
| 185 |
+
speaker_names.append(chinese_name)
|
| 186 |
+
else:
|
| 187 |
+
# 如果没有中文映射,使用英文原名
|
| 188 |
+
speaker_names.append(config.character_name)
|
| 189 |
+
|
| 190 |
+
return sorted(speaker_names)
|
| 191 |
+
|
| 192 |
+
|
| 193 |
+
def _update_argument_parser_speaker_choices():
|
| 194 |
+
"""
|
| 195 |
+
动态更新命令行参数解析器中的说话人选项
|
| 196 |
+
|
| 197 |
+
Returns:
|
| 198 |
+
list[str]: 可用的说话人选择列表
|
| 199 |
+
"""
|
| 200 |
+
return _get_available_speaker_names()
|
| 201 |
|
| 202 |
|
| 203 |
def create_argument_parser():
|
| 204 |
"""创建命令行参数解析器"""
|
| 205 |
+
# 动态获取可用说话人列表
|
| 206 |
+
available_speakers = _update_argument_parser_speaker_choices()
|
| 207 |
+
|
| 208 |
parser = argparse.ArgumentParser(
|
| 209 |
description="VoiceDialogue - 语音对话系统",
|
| 210 |
formatter_class=argparse.RawDescriptionHelpFormatter,
|
| 211 |
+
epilog=f"""
|
| 212 |
示例用法:
|
| 213 |
# 启动命令行模式(默认)
|
| 214 |
python main.py
|
|
|
|
| 226 |
python main.py --mode api --port 8000 --reload
|
| 227 |
|
| 228 |
支持的说话人:
|
| 229 |
+
{', '.join(available_speakers)}
|
| 230 |
"""
|
| 231 |
)
|
| 232 |
|
|
|
|
| 248 |
)
|
| 249 |
cli_group.add_argument(
|
| 250 |
'--speaker', '-s',
|
| 251 |
+
choices=available_speakers,
|
| 252 |
+
default='沈逸' if '沈逸' in available_speakers else (available_speakers[0] if available_speakers else '沈逸'),
|
| 253 |
help='TTS说话人 (默认: 沈逸)'
|
| 254 |
)
|
| 255 |
|
|
|
|
| 275 |
return parser
|
| 276 |
|
| 277 |
|
| 278 |
+
def launch_api_server(host: str = "0.0.0.0", port: int = 8000, reload: bool = False):
|
| 279 |
+
"""
|
| 280 |
+
启动API服务器
|
| 281 |
+
|
| 282 |
+
Args:
|
| 283 |
+
host (str): 服务器主机地址,默认为 "0.0.0.0"
|
| 284 |
+
port (int): 服务器端口,默认为 8000
|
| 285 |
+
reload (bool): 是否启用热重载,默认为 False
|
| 286 |
+
"""
|
| 287 |
+
print(f'{"=" * 80}\n正在启动API服务器...\n{"=" * 80}')
|
| 288 |
+
print(f"服务器地址: http://{host}:{port}")
|
| 289 |
+
print(f"API文档: http://{host}:{port}/docs")
|
| 290 |
+
print(f"热重载: {'启用' if reload else '禁用'}")
|
| 291 |
+
print(f'{"=" * 80}')
|
| 292 |
+
|
| 293 |
+
# 导入并启动FastAPI应用
|
| 294 |
+
uvicorn.run(
|
| 295 |
+
"api.app:app",
|
| 296 |
+
host=host,
|
| 297 |
+
port=port,
|
| 298 |
+
reload=reload,
|
| 299 |
+
log_level="info"
|
| 300 |
+
)
|
| 301 |
+
|
| 302 |
+
|
| 303 |
def main():
|
| 304 |
"""
|
| 305 |
主程序入口函数
|