liumaolin commited on
Commit
2ebe57f
·
1 Parent(s): d74bcbf

新增Mixin类以增强语音任务处理功能

Browse files

- 添加`TaskStatusMixin`、`HistoryMixin`和`PerformanceLogMixin`类,提供语音任务状态检查、聊天历史更新和性能日志记录功能。
- 在`TTSAudioGenerator`和`AudioStreamPlayer`类中集成新混入类,优化任务处理逻辑。
- 更新`utils.py`,新增音频帧归一化和时长计算功能。

src/voice_dialogue/services/audio/generator.py CHANGED
@@ -1,17 +1,17 @@
1
- import re
2
  import time
3
  from multiprocessing import Queue
4
  from queue import Empty
5
 
6
  from voice_dialogue.core.base import BaseThread
7
- from voice_dialogue.core.constants import dropped_audio_cache, user_still_speaking_event, voice_state_manager, \
8
- session_manager, is_debug_mode
9
  from voice_dialogue.models.voice_task import VoiceTask
 
 
10
  from voice_dialogue.utils.logger import logger
11
  from .generators import tts_manager, BaseTTSConfig
12
 
13
 
14
- class TTSAudioGenerator(BaseThread):
15
  """
16
  TTS 音频生成器 - 负责将文本转换为音频
17
 
@@ -62,69 +62,45 @@ class TTSAudioGenerator(BaseThread):
62
 
63
  while not self.is_exited:
64
  try:
65
- voice_task: VoiceTask = self.text_input_queue.get(block=False, timeout=1)
66
- except Empty:
67
- continue
68
-
69
- if not voice_task.answer_sentence:
70
- continue
71
-
72
- answer_id = voice_task.answer_id
73
- if user_still_speaking_event.is_set():
74
- voice_state_manager.drop_audio_task(voice_task.id)
75
- dropped_audio_cache[answer_id] = answer_id
76
- user_still_speaking_event.clear()
77
- continue
78
 
79
- if answer_id in dropped_audio_cache:
80
- continue
81
-
82
- if self.is_task_interrupted(voice_task):
83
- continue
84
-
85
- if voice_task.session_id != session_manager.current_id:
86
- continue
87
 
88
- if self.has_no_words(voice_task.answer_sentence):
89
- logger.info(f"跳过仅包含标点的文本: '{voice_task.answer_sentence}'")
90
  continue
 
 
 
91
 
92
- if is_debug_mode():
93
- logger.info(f"TTS 音频生成: {voice_task.answer_sentence}")
 
 
94
 
95
- voice_task.tts_start_time = time.time()
96
- try:
97
- tts_generated_sentence_audio = self.tts_instance.synthesize(voice_task.answer_sentence)
98
- except Exception as e:
99
- logger.error(f"TTS 音频生成失败: {e}")
100
- voice_state_manager.reset_task_id()
101
- continue
102
 
103
- voice_task.tts_generated_sentence_audio = tts_generated_sentence_audio
104
- voice_task.tts_end_time = time.time()
105
- # print(f'生成音频:{voice_task.answer_sentence}')
106
 
107
- self.audio_output_queue.put(voice_task)
 
 
108
 
109
- def is_task_interrupted(self, voice_task: VoiceTask) -> bool:
110
- """
111
- 检查语音任务是否被中断
112
 
113
- Args:
114
- voice_task: 当前处理的语音任务
 
 
 
 
 
115
 
116
- Returns:
117
- bool: 如果任务被中断返回True,否则返回False
118
- """
119
- return (voice_state_manager.interrupt_task_id and
120
- voice_task.id != voice_state_manager.interrupt_task_id)
121
 
122
- def has_no_words(self, text: str) -> bool:
123
- """
124
- 检查文本是否不包含任何单词(字母、数字或中文字符)。
125
- 如果文本只包含标点、空格等符号,则返回 True。
126
- """
127
- # 搜索任何字母、数字或中文字符
128
- if re.search(r'[\u4e00-\u9fa5a-zA-Z0-9]', text):
129
- return False
130
- return True
 
 
1
  import time
2
  from multiprocessing import Queue
3
  from queue import Empty
4
 
5
  from voice_dialogue.core.base import BaseThread
6
+ from voice_dialogue.core.constants import voice_state_manager, is_debug_mode
 
7
  from voice_dialogue.models.voice_task import VoiceTask
8
+ from voice_dialogue.services.mixins import TaskStatusMixin
9
+ from voice_dialogue.services.utils import has_no_words
10
  from voice_dialogue.utils.logger import logger
11
  from .generators import tts_manager, BaseTTSConfig
12
 
13
 
14
+ class TTSAudioGenerator(BaseThread, TaskStatusMixin):
15
  """
16
  TTS 音频生成器 - 负责将文本转换为音频
17
 
 
62
 
63
  while not self.is_exited:
64
  try:
65
+ voice_task: VoiceTask = self.text_input_queue.get(block=True, timeout=1)
66
+ if not voice_task:
67
+ continue
 
 
 
 
 
 
 
 
 
 
68
 
69
+ self._process_task(voice_task)
 
 
 
 
 
 
 
70
 
71
+ except Empty:
 
72
  continue
73
+ except Exception as e:
74
+ logger.error(f"TTSAudioGenerator 主循环错误: {e}")
75
+ time.sleep(0.1)
76
 
77
+ def _process_task(self, voice_task: VoiceTask):
78
+ """处理单个文本到语音任务"""
79
+ if not voice_task.answer_sentence:
80
+ return
81
 
82
+ if self.handle_user_speaking_interruption(voice_task):
83
+ return
 
 
 
 
 
84
 
85
+ if not self.is_task_valid(voice_task):
86
+ return
 
87
 
88
+ if has_no_words(voice_task.answer_sentence):
89
+ logger.info(f"跳过仅包含标点的文本: '{voice_task.answer_sentence}'")
90
+ return
91
 
92
+ if is_debug_mode():
93
+ logger.info(f"TTS 音频生成: {voice_task.answer_sentence}")
 
94
 
95
+ voice_task.tts_start_time = time.time()
96
+ try:
97
+ tts_generated_sentence_audio = self.tts_instance.synthesize(voice_task.answer_sentence)
98
+ except Exception as e:
99
+ logger.error(f"TTS 音频生成失败: {e}")
100
+ voice_state_manager.reset_task_id()
101
+ return
102
 
103
+ voice_task.tts_generated_sentence_audio = tts_generated_sentence_audio
104
+ voice_task.tts_end_time = time.time()
 
 
 
105
 
106
+ self.audio_output_queue.put(voice_task)
 
 
 
 
 
 
 
 
src/voice_dialogue/services/audio/player.py CHANGED
@@ -1,21 +1,22 @@
1
  import tempfile
2
- from collections import OrderedDict
3
  from multiprocessing import Queue
4
  from queue import Empty
 
5
 
6
  import soundfile as sf
7
  from playsound import playsound
8
 
9
  from voice_dialogue.core.base import BaseThread
10
  from voice_dialogue.core.constants import (
11
- user_still_speaking_event, voice_state_manager, dropped_audio_cache, chat_history_cache,
12
- silence_over_threshold_event, session_manager, is_debug_mode
13
  )
14
  from voice_dialogue.models.voice_task import VoiceTask, AnswerDisplayMessage
 
15
  from voice_dialogue.utils.logger import logger
16
 
17
 
18
- class AudioStreamPlayer(BaseThread):
19
  """音频流播放器 - 负责播放生成的音频并管理播放状态"""
20
 
21
  def __init__(
@@ -27,116 +28,76 @@ class AudioStreamPlayer(BaseThread):
27
  self.audio_playing_queue: Queue = audio_playing_queue
28
  self.websocket_message_queue: Queue = websocket_message_queue
29
 
30
- def run(self):
31
- self.is_ready = True
 
 
32
 
 
 
 
33
  while not self.is_exited:
 
 
34
 
35
- try:
36
- voice_task: VoiceTask = self.audio_playing_queue.get(block=False, timeout=1)
37
- except Empty:
 
 
 
38
  continue
39
 
40
- while True:
41
- task_id = voice_task.id
42
- answer_id = voice_task.answer_id
43
- if user_still_speaking_event.is_set():
44
- logger.info('用户还有说话')
45
- voice_state_manager.drop_audio_task(task_id)
46
- dropped_audio_cache[answer_id] = answer_id
47
- user_still_speaking_event.clear()
48
- break
49
-
50
- if self.is_task_interrupted(voice_task):
51
- break
52
-
53
- if voice_task.session_id != session_manager.current_id:
54
- break
55
-
56
- if answer_id in dropped_audio_cache:
57
- # print('Drop answer audio')
58
- break
59
-
60
- if not silence_over_threshold_event.is_set():
61
- continue
62
-
63
- if self.websocket_message_queue:
64
- self.websocket_message_queue.put_nowait(
65
- AnswerDisplayMessage(
66
- session_id=voice_task.session_id,
67
- task_id=task_id,
68
- answer_index=voice_task.answer_index,
69
- answer=voice_task.answer_sentence,
70
- )
71
  )
 
72
 
73
- if is_debug_mode():
74
- self._log_task_info(voice_task)
75
 
76
- self.update_chat_history(voice_task)
77
 
78
- voice_state_manager.set_audio_playing(task_id)
79
- voice_state_manager.reset_task_id()
80
 
81
- if not self.is_stopped:
82
- audio_data, sample_rate = voice_task.tts_generated_sentence_audio
83
- self.playing_audio(audio_data, sample_rate)
84
 
85
- if self.audio_playing_queue.empty():
86
- logger.info(f'��答播放完了')
87
 
88
- break
 
 
 
89
 
90
- def is_task_interrupted(self, voice_task: VoiceTask) -> bool:
 
 
 
91
  """
92
- 检查语音任务是否被中断
 
93
 
94
- Args:
95
- voice_task: 当前处理的语音任务
96
 
97
- Returns:
98
- bool: 如果任务被中断返回True,否则返回False
99
- """
100
- return (voice_state_manager.interrupt_task_id and
101
- voice_task.id != voice_state_manager.interrupt_task_id)
102
-
103
- def _log_task_info(self, voice_task):
104
- import librosa
105
- asr_duration = voice_task.whisper_end_time - voice_task.whisper_start_time
106
- llm_duration = voice_task.llm_end_time - voice_task.llm_start_time
107
- tts_duration = voice_task.tts_end_time - voice_task.tts_start_time
108
- audio_data, sample_rate = voice_task.tts_generated_sentence_audio
109
- audio_duration = librosa.get_duration(y=audio_data, sr=sample_rate)
110
- logger.info(
111
- f"\n"
112
- f"┌───────────────────────── 任务信息 ───────────────────────┐\n"
113
- f"│ 任务ID: {voice_task.id}\n"
114
- f"├───────────────────────── 性能统计 ────────────────────────┤\n"
115
- f"│ ASR 耗时: {asr_duration:.2f}s\n"
116
- f"│ LLM 耗时: {llm_duration:.2f}s\n"
117
- f"│ TTS 耗时: {tts_duration:.2f}s\n"
118
- f"│ 音频长度: {audio_duration:.2f}s\n"
119
- f"├───────────────────────── 生成内容 ────────────────────────┤\n"
120
- f"│-> {voice_task.answer_sentence}\n"
121
- f"└──────────────────────────────────────────────────────────┘"
122
- )
123
-
124
- def update_chat_history(self, voice_task):
125
- chat_history = chat_history_cache.get(voice_task.session_id, OrderedDict())
126
- task_answer_id = voice_task.answer_id
127
- user_question = f'{task_answer_id}:human'
128
- chat_history[user_question] = voice_task.transcribed_text
129
-
130
- ai_answer = f'{task_answer_id}:ai'
131
- cached_ai_answer = chat_history.get(ai_answer, [])
132
- cached_ai_answer.append(voice_task.answer_sentence)
133
- chat_history[ai_answer] = cached_ai_answer
134
-
135
- chat_history_cache[voice_task.session_id] = chat_history
136
-
137
- def playing_audio(self, audio_data, sample_rate=16000):
138
- with tempfile.NamedTemporaryFile('w+b', suffix='.wav') as soundfile:
139
- # print(f'================soundfile : {soundfile.name}')
140
- sf.write(soundfile, audio_data, samplerate=sample_rate, subtype='PCM_16', closefd=False)
141
- # print(soundfile.name)
142
- playsound(soundfile.name, block=True)
 
1
  import tempfile
2
+ import time
3
  from multiprocessing import Queue
4
  from queue import Empty
5
+ from typing import Optional
6
 
7
  import soundfile as sf
8
  from playsound import playsound
9
 
10
  from voice_dialogue.core.base import BaseThread
11
  from voice_dialogue.core.constants import (
12
+ voice_state_manager, silence_over_threshold_event, is_debug_mode
 
13
  )
14
  from voice_dialogue.models.voice_task import VoiceTask, AnswerDisplayMessage
15
+ from voice_dialogue.services.mixins import TaskStatusMixin, HistoryMixin, PerformanceLogMixin
16
  from voice_dialogue.utils.logger import logger
17
 
18
 
19
+ class AudioStreamPlayer(BaseThread, TaskStatusMixin, HistoryMixin, PerformanceLogMixin):
20
  """音频流播放器 - 负责播放生成的音频并管理播放状态"""
21
 
22
  def __init__(
 
28
  self.audio_playing_queue: Queue = audio_playing_queue
29
  self.websocket_message_queue: Queue = websocket_message_queue
30
 
31
+ def _get_task_from_queue(self) -> Optional[VoiceTask]:
32
+ """从音频播放队列中获取任务。"""
33
+ # 使用阻塞式获取,当队列为空时,run循环中的Empty异常会处理它
34
+ return self.audio_playing_queue.get(block=True, timeout=1)
35
 
36
+ def _process_task(self, voice_task: VoiceTask):
37
+ """处理单个音频播放任务。"""
38
+ # 这个内部循环用于等待一个外部事件(用户静音),同时检查任务是否被中断
39
  while not self.is_exited:
40
+ if self.handle_user_speaking_interruption(voice_task):
41
+ return # 任务被中断,结束处理
42
 
43
+ if not self.is_task_valid(voice_task):
44
+ return # 任务无效,结束处理
45
+
46
+ # 等待用户彻底静音的信号
47
+ if not silence_over_threshold_event.is_set():
48
+ time.sleep(0.05) # 短暂等待,避免CPU空转
49
  continue
50
 
51
+ # --- 开始播放逻辑 ---
52
+ if self.websocket_message_queue:
53
+ self.websocket_message_queue.put_nowait(
54
+ AnswerDisplayMessage(
55
+ session_id=voice_task.session_id,
56
+ task_id=voice_task.id,
57
+ answer_index=voice_task.answer_index,
58
+ answer=voice_task.answer_sentence,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  )
60
+ )
61
 
62
+ if is_debug_mode():
63
+ self.log_task_performance(voice_task, "音频播放")
64
 
65
+ self.update_chat_history(voice_task)
66
 
67
+ voice_state_manager.set_audio_playing(voice_task.id)
68
+ voice_state_manager.reset_task_id()
69
 
70
+ if not self.is_stopped:
71
+ audio_data, sample_rate = voice_task.tts_generated_sentence_audio
72
+ self._play_audio(audio_data, sample_rate)
73
 
74
+ # 任务处理完毕,跳出内部循环
75
+ break
76
 
77
+ def _play_audio(self, audio_data, sample_rate=16000):
78
+ with tempfile.NamedTemporaryFile('w+b', suffix='.wav') as soundfile:
79
+ sf.write(soundfile, audio_data, samplerate=sample_rate, subtype='PCM_16', closefd=False)
80
+ playsound(soundfile.name, block=True)
81
 
82
+ def run(self):
83
+ """
84
+ 主运行循环。
85
+ 不断从队列获取任务,并调用_process_task进行处理。
86
  """
87
+ if not hasattr(self, 'is_ready'):
88
+ logger.warning(f"{self.__class__.__name__} 中缺少 'is_ready' 属性。")
89
 
90
+ self.is_ready = True
 
91
 
92
+ while not self.is_exited:
93
+ try:
94
+ task = self._get_task_from_queue()
95
+ if task:
96
+ self._process_task(task)
97
+
98
+ except Empty:
99
+ # 队列在1秒内没有新项目,这是正常现象,继续循环
100
+ continue
101
+ except Exception as e:
102
+ logger.error(f"在 AudioStreamPlayer 环节发生错误: {e}")
103
+ time.sleep(0.1) # 发生未知错误时短暂休眠
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/voice_dialogue/services/mixins.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from collections import OrderedDict
2
+
3
+ from voice_dialogue.core.constants import (
4
+ voice_state_manager, session_manager, dropped_audio_cache,
5
+ user_still_speaking_event, chat_history_cache
6
+ )
7
+ from voice_dialogue.models.voice_task import VoiceTask
8
+ from voice_dialogue.utils.logger import logger
9
+
10
+
11
+ class TaskStatusMixin:
12
+ """提供语音任务状态检查和中断处理的通用功能"""
13
+
14
+ def is_task_interrupted(self, voice_task: VoiceTask) -> bool:
15
+ """检查语音任务是否被其他任务中断"""
16
+ return (voice_state_manager.interrupt_task_id and
17
+ voice_task.id != voice_state_manager.interrupt_task_id)
18
+
19
+ def is_task_valid(self, voice_task: VoiceTask) -> bool:
20
+ """检查语音任务是否有效(会话匹配、未被丢弃等)"""
21
+ if self.is_task_interrupted(voice_task):
22
+ return False
23
+ if voice_task.session_id != session_manager.current_id:
24
+ return False
25
+ if voice_task.answer_id in dropped_audio_cache:
26
+ return False
27
+ return True
28
+
29
+ def handle_user_speaking_interruption(self, voice_task: VoiceTask) -> bool:
30
+ """处理用户继续说话导致的中断"""
31
+ if user_still_speaking_event.is_set():
32
+ logger.info(f'用户仍在说话,丢弃任务 {voice_task.id}')
33
+ voice_state_manager.drop_audio_task(voice_task.id)
34
+ dropped_audio_cache[voice_task.answer_id] = voice_task.answer_id
35
+ user_still_speaking_event.clear()
36
+ return True
37
+ return False
38
+
39
+
40
+ class HistoryMixin:
41
+ """提供更新聊天历史记录的功能"""
42
+
43
+ def update_chat_history(self, voice_task: VoiceTask) -> None:
44
+ """更新会话的聊天历史"""
45
+ chat_history = chat_history_cache.get(voice_task.session_id, OrderedDict())
46
+ task_answer_id = voice_task.answer_id
47
+
48
+ user_question_key = f'{task_answer_id}:human'
49
+ if user_question_key not in chat_history:
50
+ chat_history[user_question_key] = voice_task.transcribed_text
51
+
52
+ ai_answer_key = f'{task_answer_id}:ai'
53
+ cached_ai_answer = chat_history.get(ai_answer_key, [])
54
+ cached_ai_answer.append(voice_task.answer_sentence)
55
+ chat_history[ai_answer_key] = cached_ai_answer
56
+
57
+ chat_history_cache[voice_task.session_id] = chat_history
58
+
59
+
60
+ class PerformanceLogMixin:
61
+ """提供记录任务性能日志的功能"""
62
+
63
+ def log_task_performance(self, voice_task: VoiceTask, task_name: str = "任务"):
64
+ """记录ASR, LLM, TTS各阶段耗时和音频长度"""
65
+ try:
66
+ from voice_dialogue.services.utils import calculate_audio_duration
67
+
68
+ asr_duration = getattr(voice_task, 'whisper_end_time', 0) - getattr(voice_task, 'whisper_start_time', 0)
69
+ llm_duration = getattr(voice_task, 'llm_end_time', 0) - getattr(voice_task, 'llm_start_time', 0)
70
+ tts_duration = getattr(voice_task, 'tts_end_time', 0) - getattr(voice_task, 'tts_start_time', 0)
71
+
72
+ audio_duration = 0
73
+ if hasattr(voice_task, 'tts_generated_sentence_audio') and voice_task.tts_generated_sentence_audio:
74
+ audio_data, sample_rate = voice_task.tts_generated_sentence_audio
75
+ audio_duration = calculate_audio_duration(audio_data, sample_rate)
76
+
77
+ logger.info(
78
+ f"\n"
79
+ f"┌───────────────────────── 任务信息 ───────────────────────┐\n"
80
+ f"│ 任务ID: {voice_task.id}\n"
81
+ f"├───────────────────────── 性能统计 ────────────────────────┤\n"
82
+ f"│ ASR 耗时: {asr_duration:.2f}s\n"
83
+ f"│ LLM 耗时: {llm_duration:.2f}s\n"
84
+ f"│ TTS 耗时: {tts_duration:.2f}s\n"
85
+ f"│ 音频长度: {audio_duration:.2f}s\n"
86
+ f"├───────────────────────── 生成内容 ────────────────────────┤\n"
87
+ f"│-> {voice_task.answer_sentence}\n"
88
+ f"└──────────────────────────────────────────────────────────┘"
89
+ )
90
+ except Exception as e:
91
+ logger.error(f"记录任务性能信息时出错: {e}")
src/voice_dialogue/services/speech/monitor.py CHANGED
@@ -20,6 +20,7 @@ from voice_dialogue.core.constants import (
20
  from voice_dialogue.core.enums import AudioState
21
  from voice_dialogue.models.voice_task import VoiceTask
22
  from voice_dialogue.services.audio.vad import SileroVAD
 
23
  from voice_dialogue.utils.logger import logger
24
 
25
 
@@ -110,7 +111,7 @@ class SpeechStateMonitor(BaseThread):
110
 
111
  def _normalize_audio_frame(self, data: bytes) -> np.ndarray:
112
  """将 int16 格式的音频字节数据转换为 [-1.0, 1.0] 范围的 numpy 浮点数组。"""
113
- return np.frombuffer(data, dtype=np.int16).astype(np.float32) / np.iinfo(np.int16).max
114
 
115
  def _detect_speech(self, audio_frame: np.ndarray) -> bool:
116
  return self._vad_instance.is_voice_active(audio_frame, self.sample_rate)
@@ -119,11 +120,11 @@ class SpeechStateMonitor(BaseThread):
119
  """从队列获取音频帧"""
120
  try:
121
  if self._enable_vad:
122
- data = self.audio_frame_queue.get(block=False, timeout=self.config.QUEUE_TIMEOUT)
123
  audio_frame = self._normalize_audio_frame(data)
124
  is_voice_active = self._detect_speech(audio_frame)
125
  else:
126
- data, is_voice_active = self.audio_frame_queue.get(block=False, timeout=self.config.QUEUE_TIMEOUT)
127
  audio_frame = self._normalize_audio_frame(data)
128
  return audio_frame, is_voice_active
129
  except Empty:
@@ -131,7 +132,7 @@ class SpeechStateMonitor(BaseThread):
131
 
132
  def _calculate_frame_duration_ms(self, audio_frame):
133
  """计算音频帧时长(毫秒)"""
134
- return librosa.get_duration(y=audio_frame, sr=self.sample_rate) * 1000
135
 
136
  def _process_active_voice_frame(self, audio_frame: np.ndarray):
137
  """
 
20
  from voice_dialogue.core.enums import AudioState
21
  from voice_dialogue.models.voice_task import VoiceTask
22
  from voice_dialogue.services.audio.vad import SileroVAD
23
+ from voice_dialogue.services.utils import normalize_audio_frame, calculate_audio_duration
24
  from voice_dialogue.utils.logger import logger
25
 
26
 
 
111
 
112
  def _normalize_audio_frame(self, data: bytes) -> np.ndarray:
113
  """将 int16 格式的音频字节数据转换为 [-1.0, 1.0] 范围的 numpy 浮点数组。"""
114
+ return normalize_audio_frame(data)
115
 
116
  def _detect_speech(self, audio_frame: np.ndarray) -> bool:
117
  return self._vad_instance.is_voice_active(audio_frame, self.sample_rate)
 
120
  """从队列获取音频帧"""
121
  try:
122
  if self._enable_vad:
123
+ data = self.audio_frame_queue.get(block=True, timeout=self.config.QUEUE_TIMEOUT)
124
  audio_frame = self._normalize_audio_frame(data)
125
  is_voice_active = self._detect_speech(audio_frame)
126
  else:
127
+ data, is_voice_active = self.audio_frame_queue.get(block=True, timeout=self.config.QUEUE_TIMEOUT)
128
  audio_frame = self._normalize_audio_frame(data)
129
  return audio_frame, is_voice_active
130
  except Empty:
 
132
 
133
  def _calculate_frame_duration_ms(self, audio_frame):
134
  """计算音频帧时长(毫秒)"""
135
+ return calculate_audio_duration(audio_data=audio_frame, sample_rate=self.sample_rate) * 1000
136
 
137
  def _process_active_voice_frame(self, audio_frame: np.ndarray):
138
  """
src/voice_dialogue/services/speech/recognizer.py CHANGED
@@ -33,7 +33,7 @@ class ASRWorker(BaseThread):
33
 
34
  while not self.is_exited:
35
  try:
36
- voice_task: VoiceTask = self.user_voice_queue.get(block=False, timeout=1)
37
  except Empty:
38
  continue
39
 
 
33
 
34
  while not self.is_exited:
35
  try:
36
+ voice_task: VoiceTask = self.user_voice_queue.get(block=True, timeout=1)
37
  except Empty:
38
  continue
39
 
src/voice_dialogue/services/text/generator.py CHANGED
@@ -214,7 +214,7 @@ class LLMResponseGenerator(BaseThread):
214
  """主运行循环"""
215
  while not self.is_exited:
216
  try:
217
- voice_task: VoiceTask = self.user_question_queue.get(block=False, timeout=1)
218
  self._process_voice_task(voice_task)
219
  except Empty:
220
  continue
 
214
  """主运行循环"""
215
  while not self.is_exited:
216
  try:
217
+ voice_task: VoiceTask = self.user_question_queue.get(block=True, timeout=1)
218
  self._process_voice_task(voice_task)
219
  except Empty:
220
  continue
src/voice_dialogue/services/utils.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ import librosa
4
+ import numpy as np
5
+
6
+ from voice_dialogue.utils.logger import logger
7
+
8
+
9
+ def has_no_words(text: str) -> bool:
10
+ """
11
+ 检查文本是否不包含任何单词(字母、数字或中文字符)。
12
+ 如果文本只包含标点、空格等符号,则返回 True。
13
+ """
14
+ if not text:
15
+ return True
16
+ # 搜索任何字母、数字或中文字符
17
+ if re.search(r'[\u4e00-\u9fa5a-zA-Z0-9]', text):
18
+ return False
19
+ return True
20
+
21
+
22
+ def normalize_audio_frame(data: bytes) -> 'np.ndarray':
23
+ """
24
+ 将 int16 格式的音频字节数据转换为 [-1.0, 1.0] 范围的 numpy 浮点数组。
25
+ """
26
+ return np.frombuffer(data, dtype=np.int16).astype(np.float32) / np.iinfo(np.int16).max
27
+
28
+
29
+ def calculate_audio_duration(audio_data: 'np.ndarray', sample_rate: int = 16000) -> float:
30
+ """
31
+ 计算音频时长(秒)。
32
+ """
33
+ try:
34
+ return librosa.get_duration(y=audio_data, sr=sample_rate)
35
+ except Exception as e:
36
+ logger.error(f"计算音频时长时发生错误: {e}")
37
+ return 0.0