File size: 4,826 Bytes
d08a15b
 
 
 
 
 
 
 
60f8238
d08a15b
 
 
 
 
 
 
 
60f8238
851495c
d08a15b
 
 
 
4e071d3
 
d08a15b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
851495c
d08a15b
60f8238
851495c
d08a15b
 
 
7d8046a
 
 
d08a15b
 
60f8238
d08a15b
 
 
7d8046a
 
 
d08a15b
 
 
 
 
 
 
 
 
7d8046a
d08a15b
 
 
 
7d8046a
 
 
d08a15b
 
60f8238
7d8046a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d08a15b
 
 
 
 
fa296dd
 
 
 
 
 
 
 
851495c
d08a15b
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
"""
语音对话系统启动器

负责启动和协调语音对话系统的所有组件
"""

import time

from voice_dialogue.audio.capture import AudioCapture
from voice_dialogue.config.speaker_config import get_tts_config_by_speaker_name, get_available_speaker_names
from voice_dialogue.core.constants import (
    audio_frames_queue,
    user_voice_queue,
    transcribed_text_queue,
    text_input_queue,
    audio_output_queue
)
from voice_dialogue.services import ASRService, LLMService, AudioPlayerService, SpeechStateMonitor, TTSAudioGenerator
from voice_dialogue.utils.logger import logger


def launch_system(
        user_language: str,
        speaker: str,
        disable_echo_cancellation: bool = False,
) -> None:
    """
    启动完整的语音对话系统

    该函数负责启动并协调语音对话系统的所有组件,包括音频采集、语音识别、
    文本生成、语音合成和音频播放等功能模块。系统采用多线程架构,各组件
    通过队列进行数据传递和通信。

    系统工作流程:
    1. 音频采集:EchoCancellingAudioCapture 采集用户语音并进行回声消除
    2. 语音监测:SpeechStateMonitor 检测用户是否在说话
    3. 语音识别:ASRWorker 将用户语音转换为文本
    4. 文本生成:LLMResponseGenerator 基于用户问题生成AI回答
    5. 语音合成:TTSAudioGenerator 将AI回答转换为语音
    6. 音频播放:AudioStreamPlayer 播放生成的语音

    Args:
        user_language (str): 用户语言,支持 'zh'(中文)和 'en'(英文)
        speaker (str): 语音合成使用的说话人,支持:
                      '罗翔', '马保国', '沈逸', '杨幂', '周杰伦', '马云'

    Raises:
        ValueError: 当指定的说话人不在支持列表中时抛出异常

    Returns:
        None: 函数会一直运行直到所有线程结束

    Note:
        该函数会阻塞运行,直到系统被外部停止或发生异常
    """
    # 导入speaker配置相关功能

    threads = []

    # 语音识别
    asr_worker = ASRService(
        user_voice_queue=user_voice_queue,
        transcribed_text_queue=transcribed_text_queue,
        language=user_language
    )
    asr_worker.daemon = True
    asr_worker.start()
    threads.append(asr_worker)

    # 文本生成
    text_generator = LLMService(
        user_question_queue=transcribed_text_queue,
        generated_answer_queue=text_input_queue
    )
    text_generator.daemon = True
    text_generator.start()
    threads.append(text_generator)

    # 动态获取TTS配置
    tts_speaker_config = get_tts_config_by_speaker_name(speaker)
    if tts_speaker_config is None:
        # 如果找不到指定说话人,列出所有可用说话人并抛出异常
        available_speakers = get_available_speaker_names()
        raise ValueError(f"不支持的TTS说话人: {speaker}。可用说话人: {', '.join(available_speakers)}")

    # 语音合成
    audio_generator = TTSAudioGenerator(
        text_input_queue=text_input_queue,
        audio_output_queue=audio_output_queue,
        tts_config=tts_speaker_config
    )
    audio_generator.daemon = True
    audio_generator.start()
    threads.append(audio_generator)

    # 音频播放
    audio_player = AudioPlayerService(audio_playing_queue=audio_output_queue)
    audio_player.daemon = True
    audio_player.start()
    threads.append(audio_player)

    # 语音状态监测
    enable_vad = disable_echo_cancellation
    speech_monitor = SpeechStateMonitor(
        audio_frame_queue=audio_frames_queue,
        user_voice_queue=user_voice_queue,
        enable_vad=enable_vad
    )
    speech_monitor.daemon = True
    speech_monitor.start()
    threads.append(speech_monitor)

    # 音频采集
    enable_echo_cancellation = not disable_echo_cancellation
    audio_capture = AudioCapture(
        audio_frames_queue=audio_frames_queue,
        enable_echo_cancellation=enable_echo_cancellation
    )
    audio_capture.daemon = True
    audio_capture.start()
    threads.append(audio_capture)

    # 等待所有线程准备就绪
    while not all([thread.is_ready for thread in threads]):
        time.sleep(0.1)

    logger.info(
        f'\n'
        f"┌──────────────────────────────────────────┐\n"
        f"│                                          │\n"
        f"│             🚀 服务启动成功 🚀             │\n"
        f"│                                          │\n"
        f"└──────────────────────────────────────────┘"
    )

    # 等待所有线程结束
    for thread in threads:
        thread.join()