liumaolin commited on
Commit
57b0084
·
1 Parent(s): 99e8988

Refactor audio processing pipeline to normalize data in `SpeechMonitor` and streamline queuing in `AudioCapture`

Browse files
src/voice_dialogue/services/audio/capture.py CHANGED
@@ -11,7 +11,6 @@ import threading
11
  import time
12
  from multiprocessing import Queue
13
 
14
- import numpy as np
15
  import pyaudio
16
 
17
  from voice_dialogue.config.paths import LIBRARIES_PATH
@@ -96,12 +95,9 @@ class AudioCapture(BaseThread):
96
  data = stream.read(chunk)
97
 
98
  if self.is_paused:
99
- time.sleep(0.01)
100
  continue
101
 
102
- # 将音频数据转换为 [-1.0, 1.0] 范围内的浮点数
103
- audio_frame = np.frombuffer(data, dtype=np.int16).astype(np.float32) / np.iinfo(np.int16).max
104
- self.audio_frames_queue.put(audio_frame)
105
 
106
  except Exception as e:
107
  logger.error(f'PyAudio 音频捕获器运行时发生错误: {e}')
@@ -134,13 +130,10 @@ class AudioCapture(BaseThread):
134
 
135
  if data_ptr and size.value > 0:
136
  audio_data = bytes(data_ptr[: size.value])
137
- # 将音频数据转换为 [-1.0, 1.0] 范围内的浮点数
138
- audio_frame = np.frombuffer(audio_data, dtype=np.int16).astype(np.float32) / np.iinfo(
139
- np.int16).max
140
 
141
  if not self.is_paused:
142
  # 将音频帧和语音活动状态一同放入队列
143
- self.audio_frames_queue.put((audio_frame, is_voice_active.value))
144
 
145
  # 释放原生库分配的内存
146
  audio_recorder.freeAudioData(data_ptr)
 
11
  import time
12
  from multiprocessing import Queue
13
 
 
14
  import pyaudio
15
 
16
  from voice_dialogue.config.paths import LIBRARIES_PATH
 
95
  data = stream.read(chunk)
96
 
97
  if self.is_paused:
 
98
  continue
99
 
100
+ self.audio_frames_queue.put(data)
 
 
101
 
102
  except Exception as e:
103
  logger.error(f'PyAudio 音频捕获器运行时发生错误: {e}')
 
130
 
131
  if data_ptr and size.value > 0:
132
  audio_data = bytes(data_ptr[: size.value])
 
 
 
133
 
134
  if not self.is_paused:
135
  # 将音频帧和语音活动状态一同放入队列
136
+ self.audio_frames_queue.put((audio_data, is_voice_active.value))
137
 
138
  # 释放原生库分配的内存
139
  audio_recorder.freeAudioData(data_ptr)
src/voice_dialogue/services/speech/monitor.py CHANGED
@@ -45,24 +45,23 @@ class SpeechStateMonitor(BaseThread):
45
  - 音频帧的缓存和处理
46
  """
47
 
48
- def __init__(self, group=None, target=None, name=None, args=(), kwargs=None, *, daemon=None,
49
- audio_frame_queue: Queue,
50
- user_voice_queue: Queue,
51
- device_sample_rate: int = 16000
52
- ):
53
  """
54
  初始化语音状态监控器
55
 
56
  Args:
57
  audio_frame_queue: 音频帧队列
58
  user_voice_queue: 用户语音队列
59
- device_sample_rate: 设备采样率,默认16000Hz
60
  """
61
  super().__init__(group, target, name, args, kwargs, daemon=daemon)
62
 
63
  self.audio_frame_queue = audio_frame_queue
64
  self.user_voice_queue = user_voice_queue
65
- self.sample_rate = device_sample_rate
66
 
67
  # 配置参数
68
  self.config = SpeechMonitorConfig()
@@ -101,10 +100,16 @@ class SpeechStateMonitor(BaseThread):
101
  if self.user_silence_duration >= self.config.USER_SILENCE_THRESHOLD:
102
  silence_over_threshold_event.set()
103
 
 
 
 
 
104
  def _get_audio_frame_from_queue(self):
105
  """从队列获取音频帧"""
106
  try:
107
- return self.audio_frame_queue.get(block=False, timeout=self.config.QUEUE_TIMEOUT)
 
 
108
  except Empty:
109
  return None, None
110
 
 
45
  - 音频帧的缓存和处理
46
  """
47
 
48
+ def __init__(
49
+ self, group=None, target=None, name=None, args=(), kwargs=None, *, daemon=None,
50
+ audio_frame_queue: Queue,
51
+ user_voice_queue: Queue,
52
+ ):
53
  """
54
  初始化语音状态监控器
55
 
56
  Args:
57
  audio_frame_queue: 音频帧队列
58
  user_voice_queue: 用户语音队列
 
59
  """
60
  super().__init__(group, target, name, args, kwargs, daemon=daemon)
61
 
62
  self.audio_frame_queue = audio_frame_queue
63
  self.user_voice_queue = user_voice_queue
64
+ self.sample_rate = 16000
65
 
66
  # 配置参数
67
  self.config = SpeechMonitorConfig()
 
100
  if self.user_silence_duration >= self.config.USER_SILENCE_THRESHOLD:
101
  silence_over_threshold_event.set()
102
 
103
+ def _normalize_audio_frame(self, data: bytes) -> np.ndarray:
104
+ """将 int16 格式的音频字节数据转换为 [-1.0, 1.0] 范围的 numpy 浮点数组。"""
105
+ return np.frombuffer(data, dtype=np.int16).astype(np.float32) / np.iinfo(np.int16).max
106
+
107
  def _get_audio_frame_from_queue(self):
108
  """从队列获取音频帧"""
109
  try:
110
+ data, is_voice_active = self.audio_frame_queue.get(block=False, timeout=self.config.QUEUE_TIMEOUT)
111
+ audio_frame = self._normalize_audio_frame(data)
112
+ return audio_frame, is_voice_active
113
  except Empty:
114
  return None, None
115