|
|
""" |
|
|
语音对话系统启动器 |
|
|
|
|
|
负责启动和协调语音对话系统的所有组件 |
|
|
""" |
|
|
|
|
|
import time |
|
|
|
|
|
from voice_dialogue.audio.capture import AudioCapture |
|
|
from voice_dialogue.config.speaker_config import get_tts_config_by_speaker_name, get_available_speaker_names |
|
|
from voice_dialogue.core.constants import ( |
|
|
audio_frames_queue, |
|
|
user_voice_queue, |
|
|
transcribed_text_queue, |
|
|
text_input_queue, |
|
|
audio_output_queue |
|
|
) |
|
|
from voice_dialogue.services import ASRService, LLMService, AudioPlayerService, SpeechStateMonitor, TTSAudioGenerator |
|
|
from voice_dialogue.utils.logger import logger |
|
|
|
|
|
|
|
|
def launch_system( |
|
|
user_language: str, |
|
|
speaker: str, |
|
|
disable_echo_cancellation: bool = False, |
|
|
) -> None: |
|
|
""" |
|
|
启动完整的语音对话系统 |
|
|
|
|
|
该函数负责启动并协调语音对话系统的所有组件,包括音频采集、语音识别、 |
|
|
文本生成、语音合成和音频播放等功能模块。系统采用多线程架构,各组件 |
|
|
通过队列进行数据传递和通信。 |
|
|
|
|
|
系统工作流程: |
|
|
1. 音频采集:EchoCancellingAudioCapture 采集用户语音并进行回声消除 |
|
|
2. 语音监测:SpeechStateMonitor 检测用户是否在说话 |
|
|
3. 语音识别:ASRWorker 将用户语音转换为文本 |
|
|
4. 文本生成:LLMResponseGenerator 基于用户问题生成AI回答 |
|
|
5. 语音合成:TTSAudioGenerator 将AI回答转换为语音 |
|
|
6. 音频播放:AudioStreamPlayer 播放生成的语音 |
|
|
|
|
|
Args: |
|
|
user_language (str): 用户语言,支持 'zh'(中文)和 'en'(英文) |
|
|
speaker (str): 语音合成使用的说话人,支持: |
|
|
'罗翔', '马保国', '沈逸', '杨幂', '周杰伦', '马云' |
|
|
|
|
|
Raises: |
|
|
ValueError: 当指定的说话人不在支持列表中时抛出异常 |
|
|
|
|
|
Returns: |
|
|
None: 函数会一直运行直到所有线程结束 |
|
|
|
|
|
Note: |
|
|
该函数会阻塞运行,直到系统被外部停止或发生异常 |
|
|
""" |
|
|
|
|
|
|
|
|
threads = [] |
|
|
|
|
|
|
|
|
asr_worker = ASRService( |
|
|
user_voice_queue=user_voice_queue, |
|
|
transcribed_text_queue=transcribed_text_queue, |
|
|
language=user_language |
|
|
) |
|
|
asr_worker.daemon = True |
|
|
asr_worker.start() |
|
|
threads.append(asr_worker) |
|
|
|
|
|
|
|
|
text_generator = LLMService( |
|
|
user_question_queue=transcribed_text_queue, |
|
|
generated_answer_queue=text_input_queue |
|
|
) |
|
|
text_generator.daemon = True |
|
|
text_generator.start() |
|
|
threads.append(text_generator) |
|
|
|
|
|
|
|
|
tts_speaker_config = get_tts_config_by_speaker_name(speaker) |
|
|
if tts_speaker_config is None: |
|
|
|
|
|
available_speakers = get_available_speaker_names() |
|
|
raise ValueError(f"不支持的TTS说话人: {speaker}。可用说话人: {', '.join(available_speakers)}") |
|
|
|
|
|
|
|
|
audio_generator = TTSAudioGenerator( |
|
|
text_input_queue=text_input_queue, |
|
|
audio_output_queue=audio_output_queue, |
|
|
tts_config=tts_speaker_config |
|
|
) |
|
|
audio_generator.daemon = True |
|
|
audio_generator.start() |
|
|
threads.append(audio_generator) |
|
|
|
|
|
|
|
|
audio_player = AudioPlayerService(audio_playing_queue=audio_output_queue) |
|
|
audio_player.daemon = True |
|
|
audio_player.start() |
|
|
threads.append(audio_player) |
|
|
|
|
|
|
|
|
enable_vad = disable_echo_cancellation |
|
|
speech_monitor = SpeechStateMonitor( |
|
|
audio_frame_queue=audio_frames_queue, |
|
|
user_voice_queue=user_voice_queue, |
|
|
enable_vad=enable_vad |
|
|
) |
|
|
speech_monitor.daemon = True |
|
|
speech_monitor.start() |
|
|
threads.append(speech_monitor) |
|
|
|
|
|
|
|
|
enable_echo_cancellation = not disable_echo_cancellation |
|
|
audio_capture = AudioCapture( |
|
|
audio_frames_queue=audio_frames_queue, |
|
|
enable_echo_cancellation=enable_echo_cancellation |
|
|
) |
|
|
audio_capture.daemon = True |
|
|
audio_capture.start() |
|
|
threads.append(audio_capture) |
|
|
|
|
|
|
|
|
while not all([thread.is_ready for thread in threads]): |
|
|
time.sleep(0.1) |
|
|
|
|
|
logger.info( |
|
|
f'\n' |
|
|
f"┌──────────────────────────────────────────┐\n" |
|
|
f"│ │\n" |
|
|
f"│ 🚀 服务启动成功 🚀 │\n" |
|
|
f"│ │\n" |
|
|
f"└──────────────────────────────────────────┘" |
|
|
) |
|
|
|
|
|
|
|
|
for thread in threads: |
|
|
thread.join() |
|
|
|