liumaolin
Update `main.py`: modify `uvicorn.run` to use updated `voice_dialogue.api.app` module path.
15e98c9
| import argparse | |
| import sys | |
| import time | |
| import typing | |
| from pathlib import Path | |
| import multiprocessing | |
| import uvicorn | |
| HERE = Path(__file__).parent | |
| lib_path = HERE / "src" | |
| if lib_path.exists() and lib_path.as_posix() not in sys.path: | |
| sys.path.insert(0, lib_path.as_posix()) | |
| from voice_dialogue.core.constants import ( | |
| audio_frames_queue, | |
| user_voice_queue, | |
| transcribed_text_queue, | |
| text_input_queue, | |
| audio_output_queue | |
| ) | |
| from voice_dialogue.services.audio.capture import EchoCancellingAudioCapture | |
| from voice_dialogue.services.audio.generator import TTSAudioGenerator | |
| from voice_dialogue.services.audio.generators.models import tts_config_registry | |
| from voice_dialogue.services.audio.player import AudioStreamPlayer | |
| from voice_dialogue.services.speech.monitor import SpeechStateMonitor | |
| from voice_dialogue.services.speech.recognizer import ASRWorker | |
| from voice_dialogue.services.text.generator import LLMResponseGenerator | |
| language: typing.Literal['zh', 'en'] = 'en' | |
| def launch_system( | |
| user_language: str, | |
| speaker: str | |
| ) -> None: | |
| """ | |
| 启动完整的语音对话系统 | |
| 该函数负责启动并协调语音对话系统的所有组件,包括音频采集、语音识别、 | |
| 文本生成、语音合成和音频播放等功能模块。系统采用多线程架构,各组件 | |
| 通过队列进行数据传递和通信。 | |
| 系统工作流程: | |
| 1. 音频采集:EchoCancellingAudioCapture 采集用户语音并进行回声消除 | |
| 2. 语音监测:SpeechStateMonitor 检测用户是否在说话 | |
| 3. 语音识别:ASRWorker 将用户语音转换为文本 | |
| 4. 文本生成:LLMResponseGenerator 基于用户问题生成AI回答 | |
| 5. 语音合成:TTSAudioGenerator 将AI回答转换为语音 | |
| 6. 音频播放:AudioStreamPlayer 播放生成的语音 | |
| Args: | |
| user_language (str): 用户语言,支持 'zh'(中文)和 'en'(英文) | |
| speaker (str): 语音合成使用的说话人,支持: | |
| '罗翔', '马保国', '沈逸', '杨幂', '周杰伦', '马云' | |
| Raises: | |
| ValueError: 当指定的说话人不在支持列表中时抛出异常 | |
| Returns: | |
| None: 函数会一直运行直到所有线程结束 | |
| Note: | |
| 该函数会阻塞运行,直到系统被外部停止或发生异常 | |
| """ | |
| threads = [] | |
| # | |
| audio_frame_probe = EchoCancellingAudioCapture(audio_frames_queue=audio_frames_queue) | |
| audio_frame_probe.start() | |
| threads.append(audio_frame_probe) | |
| # | |
| user_voice_checker = SpeechStateMonitor( | |
| audio_frame_queue=audio_frames_queue, | |
| user_voice_queue=user_voice_queue, | |
| ) | |
| user_voice_checker.start() | |
| threads.append(user_voice_checker) | |
| # | |
| whisper_worker = ASRWorker( | |
| user_voice_queue=user_voice_queue, transcribed_text_queue=transcribed_text_queue, | |
| language=user_language | |
| ) | |
| whisper_worker.start() | |
| threads.append(whisper_worker) | |
| answer_generator_worker = LLMResponseGenerator( | |
| user_question_queue=transcribed_text_queue, | |
| generated_answer_queue=text_input_queue | |
| ) | |
| answer_generator_worker.start() | |
| threads.append(answer_generator_worker) | |
| # 动态获取TTS配置,而不是使用固定映射 | |
| tts_speaker_config = _get_tts_config_by_speaker_name(speaker) | |
| if tts_speaker_config is None: | |
| # 如果找不到指定说话人,列出所有可用说话人并抛出异常 | |
| available_speakers = _get_available_speaker_names() | |
| raise ValueError(f"不支持的TTS说话人: {speaker}。可用说话人: {', '.join(available_speakers)}") | |
| audio_generator_worker = TTSAudioGenerator( | |
| text_input_queue=text_input_queue, | |
| audio_output_queue=audio_output_queue, | |
| tts_config=tts_speaker_config | |
| ) | |
| audio_generator_worker.start() | |
| threads.append(audio_generator_worker) | |
| audio_playing_worker = AudioStreamPlayer(audio_playing_queue=audio_output_queue) | |
| audio_playing_worker.start() | |
| threads.append(audio_playing_worker) | |
| while not all([thread.is_ready for thread in threads]): | |
| time.sleep(0.1) | |
| # audio_frame_probe.start_record() | |
| print(f'{"=" * 80}\n服务启动成功\n{"=" * 80}') | |
| for thread in threads: | |
| thread.join() | |
| def _get_tts_config_by_speaker_name(speaker_name: str): | |
| """ | |
| 根据说话人名称获取TTS配置 | |
| 支持中文名称和英文名称,优先匹配中文名称映射, | |
| 如果找不到则直接使用英文名称搜索 | |
| Args: | |
| speaker_name (str): 说话人名称 | |
| Returns: | |
| BaseTTSConfig: TTS配置,如果找不到则返回None | |
| """ | |
| # 中文名称到英文名称的映射(保持向后兼容) | |
| chinese_to_english_mapping = { | |
| '罗翔': 'Luo Xiang', | |
| '马保国': 'Ma Baoguo', | |
| '沈逸': 'Shen Yi', | |
| '杨幂': 'Yang Mi', | |
| '周杰伦': 'Zhou Jielun', | |
| '马云': 'Ma Yun', | |
| } | |
| # 首先尝试中文名称映射 | |
| english_name = chinese_to_english_mapping.get(speaker_name, speaker_name) | |
| # 获取所有可用配置 | |
| all_configs = tts_config_registry.get_all_configs() | |
| # 搜索匹配的配置 | |
| for config in all_configs: | |
| if config.character_name == english_name: | |
| return config | |
| # 如果通过映射找不到,尝试直接匹配输入的名称 | |
| if speaker_name != english_name: | |
| for config in all_configs: | |
| if config.character_name == speaker_name: | |
| return config | |
| return None | |
| def _get_available_speaker_names(): | |
| """ | |
| 获取所有可用的说话人名称列表 | |
| Returns: | |
| list[str]: 包含中文显示名称和英文原始名称的列表 | |
| """ | |
| # 中文显示名称映射 | |
| english_to_chinese_mapping = { | |
| 'Luo Xiang': '罗翔', | |
| 'Ma Baoguo': '马保国', | |
| 'Shen Yi': '沈逸', | |
| 'Yang Mi': '杨幂', | |
| 'Zhou Jielun': '周杰伦', | |
| 'Ma Yun': '马云', | |
| } | |
| all_configs = tts_config_registry.get_all_configs() | |
| speaker_names = [] | |
| for config in all_configs: | |
| # 优先显示中文名称 | |
| chinese_name = english_to_chinese_mapping.get(config.character_name) | |
| if chinese_name: | |
| speaker_names.append(chinese_name) | |
| else: | |
| # 如果没有中文映射,使用英文原名 | |
| speaker_names.append(config.character_name) | |
| return sorted(speaker_names) | |
| def _update_argument_parser_speaker_choices(): | |
| """ | |
| 动态更新命令行参数解析器中的说话人选项 | |
| Returns: | |
| list[str]: 可用的说话人选择列表 | |
| """ | |
| return _get_available_speaker_names() | |
| def create_argument_parser(): | |
| """创建命令行参数解析器""" | |
| # 动态获取可用说话人列表 | |
| available_speakers = _update_argument_parser_speaker_choices() | |
| parser = argparse.ArgumentParser( | |
| description="VoiceDialogue - 语音对话系统", | |
| formatter_class=argparse.RawDescriptionHelpFormatter, | |
| epilog=f""" | |
| 示例用法: | |
| # 启动命令行模式(默认) | |
| python main.py | |
| # 启动命令行模式并指定参数 | |
| python main.py --mode cli --language zh --speaker 沈逸 | |
| # 启动API服务器 | |
| python main.py --mode api | |
| # 启动API服务器并指定端口 | |
| python main.py --mode api --port 9000 | |
| # 启动API服务器并启用热重载(开发模式) | |
| python main.py --mode api --port 8000 --reload | |
| 支持的说话人: | |
| {', '.join(available_speakers)} | |
| """ | |
| ) | |
| # 运行模式选择 | |
| parser.add_argument( | |
| '--mode', '-m', | |
| choices=['cli', 'api'], | |
| default='cli', | |
| help='运行模式: cli=命令行模式, api=API服务器模式 (默认: cli)' | |
| ) | |
| # 命令行模式参数 | |
| cli_group = parser.add_argument_group('命令行模式参数') | |
| cli_group.add_argument( | |
| '--language', '-l', | |
| choices=['zh', 'en'], | |
| default='zh', | |
| help='用户语言: zh=中文, en=英文 (默认: zh)' | |
| ) | |
| cli_group.add_argument( | |
| '--speaker', '-s', | |
| choices=available_speakers, | |
| default='沈逸' if '沈逸' in available_speakers else (available_speakers[0] if available_speakers else '沈逸'), | |
| help='TTS说话人 (默认: 沈逸)' | |
| ) | |
| # API服务器模式参数 | |
| api_group = parser.add_argument_group('API服务器模式参数') | |
| api_group.add_argument( | |
| '--host', | |
| default='0.0.0.0', | |
| help='服务器主机地址 (默认: 0.0.0.0)' | |
| ) | |
| api_group.add_argument( | |
| '--port', '-p', | |
| type=int, | |
| default=8000, | |
| help='服务器端口 (默认: 8000)' | |
| ) | |
| api_group.add_argument( | |
| '--reload', | |
| action='store_true', | |
| help='启用热重载(开发模式)' | |
| ) | |
| return parser | |
| def launch_api_server(host: str = "0.0.0.0", port: int = 8000, reload: bool = False): | |
| """ | |
| 启动API服务器 | |
| Args: | |
| host (str): 服务器主机地址,默认为 "0.0.0.0" | |
| port (int): 服务器端口,默认为 8000 | |
| reload (bool): 是否启用热重载,默认为 False | |
| """ | |
| print(f'{"=" * 80}\n正在启动API服务器...\n{"=" * 80}') | |
| print(f"服务器地址: http://{host}:{port}") | |
| print(f"API文档: http://{host}:{port}/docs") | |
| print(f"热重载: {'启用' if reload else '禁用'}") | |
| print(f'{"=" * 80}') | |
| # 导入并启动FastAPI应用 | |
| uvicorn.run( | |
| "voice_dialogue.api.app:app", | |
| host=host, | |
| port=port, | |
| reload=reload, | |
| log_level="info" | |
| ) | |
| def main(): | |
| """ | |
| 主程序入口函数 | |
| 根据命令行参数选择启动模式: | |
| - cli: 启动命令行语音对话系统 | |
| - api: 启动HTTP API服务器 | |
| """ | |
| parser = create_argument_parser() | |
| args = parser.parse_args() | |
| print(f""" | |
| {"=" * 80} | |
| VoiceDialogue - 语音对话系统 | |
| {"=" * 80} | |
| 运行模式: {args.mode.upper()} | |
| {"=" * 80} | |
| """) | |
| try: | |
| if args.mode == 'cli': | |
| print(f"语言设置: {args.language}") | |
| print(f"说话人: {args.speaker}") | |
| print("正在启动命令行语音对话系统...") | |
| launch_system(args.language, args.speaker) | |
| elif args.mode == 'api': | |
| launch_api_server( | |
| host=args.host, | |
| port=args.port, | |
| reload=args.reload | |
| ) | |
| except KeyboardInterrupt: | |
| print("\n程序被用户中断") | |
| except Exception as e: | |
| print(f"程序运行出错: {e}") | |
| raise | |
| if __name__ == '__main__': | |
| multiprocessing.freeze_support() | |
| main() | |