liumaolin
commited on
Commit
·
bba0d84
1
Parent(s):
ef0d09e
Refactor TTS audio generation: rename queues for clarity, update `TTSAudioGenerator` initialization, and enhance docstrings for better maintainability.
Browse files
src/VoiceDialogue/main.py
CHANGED
|
@@ -22,12 +22,42 @@ language: typing.Literal['zh', 'en'] = 'en'
|
|
| 22 |
def launch_system(
|
| 23 |
user_language: str,
|
| 24 |
speaker: str
|
| 25 |
-
):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
audio_frames_queue = Queue()
|
| 27 |
user_voice_queue = Queue()
|
| 28 |
transcribed_text_queue = Queue()
|
| 29 |
-
|
| 30 |
-
|
| 31 |
threads = []
|
| 32 |
#
|
| 33 |
audio_frame_probe = EchoCancellingAudioCapture(audio_frames_queue=audio_frames_queue)
|
|
@@ -52,7 +82,7 @@ def launch_system(
|
|
| 52 |
|
| 53 |
answer_generator_worker = LLMResponseGenerator(
|
| 54 |
user_question_queue=transcribed_text_queue,
|
| 55 |
-
generated_answer_queue=
|
| 56 |
)
|
| 57 |
answer_generator_worker.start()
|
| 58 |
threads.append(answer_generator_worker)
|
|
@@ -71,14 +101,14 @@ def launch_system(
|
|
| 71 |
|
| 72 |
tts_speaker_config = tts_config_registry.get_config(TTSConfigType.MOYOYO, role)
|
| 73 |
audio_generator_worker = TTSAudioGenerator(
|
| 74 |
-
|
| 75 |
-
|
| 76 |
tts_config=tts_speaker_config
|
| 77 |
)
|
| 78 |
audio_generator_worker.start()
|
| 79 |
threads.append(audio_generator_worker)
|
| 80 |
|
| 81 |
-
audio_playing_worker = AudioStreamPlayer(audio_playing_queue=
|
| 82 |
audio_playing_worker.start()
|
| 83 |
threads.append(audio_playing_worker)
|
| 84 |
|
|
@@ -92,6 +122,25 @@ def launch_system(
|
|
| 92 |
|
| 93 |
|
| 94 |
def main():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
user_language: typing.Literal['zh', 'en'] = 'zh'
|
| 96 |
|
| 97 |
# '罗翔', '马保国', '沈逸', '杨幂', '周杰伦', '马云'
|
|
|
|
| 22 |
def launch_system(
|
| 23 |
user_language: str,
|
| 24 |
speaker: str
|
| 25 |
+
) -> None:
|
| 26 |
+
"""
|
| 27 |
+
启动完整的语音对话系统
|
| 28 |
+
|
| 29 |
+
该函数负责启动并协调语音对话系统的所有组件,包括音频采集、语音识别、
|
| 30 |
+
文本生成、语音合成和音频播放等功能模块。系统采用多线程架构,各组件
|
| 31 |
+
通过队列进行数据传递和通信。
|
| 32 |
+
|
| 33 |
+
系统工作流程:
|
| 34 |
+
1. 音频采集:EchoCancellingAudioCapture 采集用户语音并进行回声消除
|
| 35 |
+
2. 语音监测:SpeechStateMonitor 检测用户是否在说话
|
| 36 |
+
3. 语音识别:ASRWorker 将用户语音转换为文本
|
| 37 |
+
4. 文本生成:LLMResponseGenerator 基于用户问题生成AI回答
|
| 38 |
+
5. 语音合成:TTSAudioGenerator 将AI回答转换为语音
|
| 39 |
+
6. 音频播放:AudioStreamPlayer 播放生成的语音
|
| 40 |
+
|
| 41 |
+
Args:
|
| 42 |
+
user_language (str): 用户语言,支持 'zh'(中文)和 'en'(英文)
|
| 43 |
+
speaker (str): 语音合成使用的说话人,支持:
|
| 44 |
+
'罗翔', '马保国', '沈逸', '杨幂', '周杰伦', '马云'
|
| 45 |
+
|
| 46 |
+
Raises:
|
| 47 |
+
ValueError: 当指定的说话人不在支持列表中时抛出异常
|
| 48 |
+
|
| 49 |
+
Returns:
|
| 50 |
+
None: 函数会一直运行直到所有线程结束
|
| 51 |
+
|
| 52 |
+
Note:
|
| 53 |
+
该函数会阻塞运行,直到系统被外部停止或发生异常
|
| 54 |
+
"""
|
| 55 |
+
|
| 56 |
audio_frames_queue = Queue()
|
| 57 |
user_voice_queue = Queue()
|
| 58 |
transcribed_text_queue = Queue()
|
| 59 |
+
text_input_queue = Queue()
|
| 60 |
+
audio_output_queue = Queue()
|
| 61 |
threads = []
|
| 62 |
#
|
| 63 |
audio_frame_probe = EchoCancellingAudioCapture(audio_frames_queue=audio_frames_queue)
|
|
|
|
| 82 |
|
| 83 |
answer_generator_worker = LLMResponseGenerator(
|
| 84 |
user_question_queue=transcribed_text_queue,
|
| 85 |
+
generated_answer_queue=text_input_queue
|
| 86 |
)
|
| 87 |
answer_generator_worker.start()
|
| 88 |
threads.append(answer_generator_worker)
|
|
|
|
| 101 |
|
| 102 |
tts_speaker_config = tts_config_registry.get_config(TTSConfigType.MOYOYO, role)
|
| 103 |
audio_generator_worker = TTSAudioGenerator(
|
| 104 |
+
text_input_queue=text_input_queue,
|
| 105 |
+
audio_output_queue=audio_output_queue,
|
| 106 |
tts_config=tts_speaker_config
|
| 107 |
)
|
| 108 |
audio_generator_worker.start()
|
| 109 |
threads.append(audio_generator_worker)
|
| 110 |
|
| 111 |
+
audio_playing_worker = AudioStreamPlayer(audio_playing_queue=audio_output_queue)
|
| 112 |
audio_playing_worker.start()
|
| 113 |
threads.append(audio_playing_worker)
|
| 114 |
|
|
|
|
| 122 |
|
| 123 |
|
| 124 |
def main():
|
| 125 |
+
"""
|
| 126 |
+
主程序入口函数
|
| 127 |
+
|
| 128 |
+
配置并启动语音对话系统的默认设置。当前配置:
|
| 129 |
+
- 用户语言:中文 ('zh')
|
| 130 |
+
- TTS说话人:沈逸
|
| 131 |
+
|
| 132 |
+
该函数可以根据需要修改默认配置,或者扩展为支持命令行参数。
|
| 133 |
+
|
| 134 |
+
Returns:
|
| 135 |
+
None
|
| 136 |
+
|
| 137 |
+
Example:
|
| 138 |
+
直接运行脚本:
|
| 139 |
+
$ python main.py
|
| 140 |
+
|
| 141 |
+
系统将使用默认配置启动语音对话服务
|
| 142 |
+
"""
|
| 143 |
+
|
| 144 |
user_language: typing.Literal['zh', 'en'] = 'zh'
|
| 145 |
|
| 146 |
# '罗翔', '马保国', '沈逸', '杨幂', '周杰伦', '马云'
|
src/VoiceDialogue/services/audio/audio_answer.py
CHANGED
|
@@ -9,17 +9,44 @@ from .audio_generator import tts_manager, BaseTTSConfig
|
|
| 9 |
|
| 10 |
|
| 11 |
class TTSAudioGenerator(BaseThread):
|
| 12 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
def __init__(self, group=None, target=None, name=None, args=(), kwargs={}, *, daemon=None,
|
| 15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
super().__init__(group, target, name, args, kwargs, daemon=daemon)
|
| 17 |
-
|
| 18 |
-
self.
|
|
|
|
| 19 |
|
| 20 |
self.tts_instance = tts_manager.create_tts(tts_config)
|
| 21 |
|
| 22 |
def run(self):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
self.tts_instance.setup()
|
| 25 |
self.tts_instance.warmup()
|
|
@@ -28,7 +55,7 @@ class TTSAudioGenerator(BaseThread):
|
|
| 28 |
|
| 29 |
while not self.stopped():
|
| 30 |
try:
|
| 31 |
-
voice_task: VoiceTask = self.
|
| 32 |
except Empty:
|
| 33 |
continue
|
| 34 |
|
|
@@ -57,4 +84,4 @@ class TTSAudioGenerator(BaseThread):
|
|
| 57 |
voice_task.tts_end_time = time.time()
|
| 58 |
# print(f'生成音频:{voice_task.answer_sentence}')
|
| 59 |
|
| 60 |
-
self.
|
|
|
|
| 9 |
|
| 10 |
|
| 11 |
class TTSAudioGenerator(BaseThread):
|
| 12 |
+
"""
|
| 13 |
+
TTS 音频生成器 - 负责将文本转换为音频
|
| 14 |
+
|
| 15 |
+
这个类是一个多线程音频生成器,主要功能包括:
|
| 16 |
+
1. 从处理完的答案队列中获取语音任务
|
| 17 |
+
2. 使用TTS引擎将文本转换为音频
|
| 18 |
+
3. 处理用户打断和音频缓存逻辑
|
| 19 |
+
4. 将生成的音频任务放入音频队列中
|
| 20 |
+
"""
|
| 21 |
|
| 22 |
def __init__(self, group=None, target=None, name=None, args=(), kwargs={}, *, daemon=None,
|
| 23 |
+
text_input_queue: Queue, audio_output_queue: Queue, tts_config: BaseTTSConfig):
|
| 24 |
+
"""
|
| 25 |
+
初始化TTS音频生成器
|
| 26 |
+
|
| 27 |
+
Args:
|
| 28 |
+
text_input_queue: 文本输入队列,包含待转换的文本任务
|
| 29 |
+
audio_output_queue: 音频输出队列,用于输出转换后的音频
|
| 30 |
+
tts_config: TTS配置对象,包含语音合成的相关设置
|
| 31 |
+
"""
|
| 32 |
+
|
| 33 |
super().__init__(group, target, name, args, kwargs, daemon=daemon)
|
| 34 |
+
|
| 35 |
+
self.text_input_queue: Queue = text_input_queue
|
| 36 |
+
self.audio_output_queue: Queue = audio_output_queue
|
| 37 |
|
| 38 |
self.tts_instance = tts_manager.create_tts(tts_config)
|
| 39 |
|
| 40 |
def run(self):
|
| 41 |
+
"""
|
| 42 |
+
主运行循环
|
| 43 |
+
|
| 44 |
+
执行流程:
|
| 45 |
+
1. 初始化和预热TTS引擎
|
| 46 |
+
2. 持续监听处理队列
|
| 47 |
+
3. 处理语音任务和中断逻辑
|
| 48 |
+
4. 生成音频并放入输出队列
|
| 49 |
+
"""
|
| 50 |
|
| 51 |
self.tts_instance.setup()
|
| 52 |
self.tts_instance.warmup()
|
|
|
|
| 55 |
|
| 56 |
while not self.stopped():
|
| 57 |
try:
|
| 58 |
+
voice_task: VoiceTask = self.text_input_queue.get(block=False, timeout=0.1)
|
| 59 |
except Empty:
|
| 60 |
continue
|
| 61 |
|
|
|
|
| 84 |
voice_task.tts_end_time = time.time()
|
| 85 |
# print(f'生成音频:{voice_task.answer_sentence}')
|
| 86 |
|
| 87 |
+
self.audio_output_queue.put(voice_task)
|