liumaolin
commited on
Commit
·
99e8988
1
Parent(s):
a453c72
Update `AudioCapture` to support both PyAudio and macOS native AEC+VAD libraries
Browse files- Add PyAudio-based standard audio capture as an option.
- Enable runtime switching between PyAudio and macOS native libraries based on echo cancellation settings.
- Refactor capture methods for better modularity and maintainability.
src/voice_dialogue/services/audio/capture.py
CHANGED
|
@@ -1,6 +1,9 @@
|
|
| 1 |
"""
|
| 2 |
-
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
| 4 |
"""
|
| 5 |
|
| 6 |
import ctypes
|
|
@@ -9,6 +12,7 @@ import time
|
|
| 9 |
from multiprocessing import Queue
|
| 10 |
|
| 11 |
import numpy as np
|
|
|
|
| 12 |
|
| 13 |
from voice_dialogue.config.paths import LIBRARIES_PATH
|
| 14 |
from voice_dialogue.core.base import BaseThread
|
|
@@ -17,8 +21,10 @@ from voice_dialogue.utils.logger import logger
|
|
| 17 |
|
| 18 |
class AudioCapture(BaseThread):
|
| 19 |
"""
|
| 20 |
-
|
| 21 |
-
|
|
|
|
|
|
|
| 22 |
"""
|
| 23 |
|
| 24 |
def __init__(
|
|
@@ -26,6 +32,15 @@ class AudioCapture(BaseThread):
|
|
| 26 |
audio_frames_queue: Queue = None,
|
| 27 |
enable_echo_cancellation: bool = True,
|
| 28 |
):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
super().__init__(group, target, name, args, kwargs, daemon=daemon)
|
| 30 |
|
| 31 |
self.audio_frames_queue = audio_frames_queue
|
|
@@ -33,43 +48,104 @@ class AudioCapture(BaseThread):
|
|
| 33 |
self._enable_echo_cancellation = enable_echo_cancellation
|
| 34 |
|
| 35 |
@property
|
| 36 |
-
def is_paused(self):
|
|
|
|
| 37 |
return self._pause_event.is_set()
|
| 38 |
|
| 39 |
def pause(self):
|
|
|
|
| 40 |
self._pause_event.set()
|
| 41 |
|
| 42 |
def resume(self):
|
|
|
|
| 43 |
self._pause_event.clear()
|
| 44 |
|
| 45 |
def run(self):
|
| 46 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
audio_recorder = ctypes.CDLL(LIBRARIES_PATH / 'libAudioCapture.dylib')
|
| 48 |
audio_recorder.getAudioData.argtypes = [ctypes.POINTER(ctypes.c_int), ctypes.POINTER(ctypes.c_bool)]
|
| 49 |
audio_recorder.getAudioData.restype = ctypes.POINTER(ctypes.c_ubyte)
|
| 50 |
audio_recorder.freeAudioData.argtypes = [ctypes.POINTER(ctypes.c_ubyte)]
|
| 51 |
-
audio_recorder.startRecord()
|
| 52 |
|
|
|
|
| 53 |
self.is_ready = True
|
| 54 |
|
| 55 |
try:
|
| 56 |
while not self.is_exited:
|
| 57 |
size = ctypes.c_int(0)
|
| 58 |
is_voice_active = ctypes.c_bool(False)
|
| 59 |
-
#
|
| 60 |
data_ptr = audio_recorder.getAudioData(ctypes.byref(size), ctypes.byref(is_voice_active))
|
| 61 |
|
| 62 |
if data_ptr and size.value > 0:
|
| 63 |
audio_data = bytes(data_ptr[: size.value])
|
| 64 |
-
|
|
|
|
|
|
|
| 65 |
|
| 66 |
if not self.is_paused:
|
|
|
|
| 67 |
self.audio_frames_queue.put((audio_frame, is_voice_active.value))
|
| 68 |
|
| 69 |
-
#
|
| 70 |
audio_recorder.freeAudioData(data_ptr)
|
| 71 |
else:
|
| 72 |
-
#
|
| 73 |
time.sleep(0.01)
|
| 74 |
except Exception as e:
|
| 75 |
logger.error(f'回声消除音频捕获器运行时发生错误: {e}')
|
|
|
|
| 1 |
"""
|
| 2 |
+
音频捕获模块
|
| 3 |
+
|
| 4 |
+
提供两种音频采集方式:
|
| 5 |
+
1. 使用 PyAudio 进行标准音频采集。
|
| 6 |
+
2. 使用集成了声学回声消除(AEC)和语音活动检测(VAD)的 macOS 原生库进行音频采集。
|
| 7 |
"""
|
| 8 |
|
| 9 |
import ctypes
|
|
|
|
| 12 |
from multiprocessing import Queue
|
| 13 |
|
| 14 |
import numpy as np
|
| 15 |
+
import pyaudio
|
| 16 |
|
| 17 |
from voice_dialogue.config.paths import LIBRARIES_PATH
|
| 18 |
from voice_dialogue.core.base import BaseThread
|
|
|
|
| 21 |
|
| 22 |
class AudioCapture(BaseThread):
|
| 23 |
"""
|
| 24 |
+
音频捕获器。
|
| 25 |
+
|
| 26 |
+
根据配置选择使用 PyAudio 或带回声消除(AEC)的 macOS 原生库进行音频采集。
|
| 27 |
+
作为一个后台线程运行,将捕获的音频帧放入队列中。
|
| 28 |
"""
|
| 29 |
|
| 30 |
def __init__(
|
|
|
|
| 32 |
audio_frames_queue: Queue = None,
|
| 33 |
enable_echo_cancellation: bool = True,
|
| 34 |
):
|
| 35 |
+
"""
|
| 36 |
+
初始化音频捕获器。
|
| 37 |
+
|
| 38 |
+
Args:
|
| 39 |
+
audio_frames_queue (Queue): 用于存放捕获的音频帧的队列。
|
| 40 |
+
enable_echo_cancellation (bool): 是否启用回声消除功能。
|
| 41 |
+
若为 True,则使用原生库进行捕获;
|
| 42 |
+
否则,使用 PyAudio。
|
| 43 |
+
"""
|
| 44 |
super().__init__(group, target, name, args, kwargs, daemon=daemon)
|
| 45 |
|
| 46 |
self.audio_frames_queue = audio_frames_queue
|
|
|
|
| 48 |
self._enable_echo_cancellation = enable_echo_cancellation
|
| 49 |
|
| 50 |
@property
|
| 51 |
+
def is_paused(self) -> bool:
|
| 52 |
+
"""检查捕获器是否已暂停。"""
|
| 53 |
return self._pause_event.is_set()
|
| 54 |
|
| 55 |
def pause(self):
|
| 56 |
+
"""暂停音频捕获。"""
|
| 57 |
self._pause_event.set()
|
| 58 |
|
| 59 |
def resume(self):
|
| 60 |
+
"""恢复音频捕获。"""
|
| 61 |
self._pause_event.clear()
|
| 62 |
|
| 63 |
def run(self):
|
| 64 |
+
"""
|
| 65 |
+
线程主循环。
|
| 66 |
+
|
| 67 |
+
根据 `_enable_echo_cancellation` 标志,分派到相应的捕获方法。
|
| 68 |
+
"""
|
| 69 |
+
if self._enable_echo_cancellation:
|
| 70 |
+
self._run_with_aec()
|
| 71 |
+
else:
|
| 72 |
+
self._run()
|
| 73 |
+
|
| 74 |
+
def _run(self):
|
| 75 |
+
"""
|
| 76 |
+
使用 PyAudio 进行标准的音频采集。
|
| 77 |
+
|
| 78 |
+
此方法不提供回声消除或语音活动检测。
|
| 79 |
+
"""
|
| 80 |
+
p = pyaudio.PyAudio()
|
| 81 |
+
chunk = 512
|
| 82 |
+
sample_rate = 16000
|
| 83 |
+
stream = p.open(
|
| 84 |
+
format=pyaudio.paInt16,
|
| 85 |
+
channels=1,
|
| 86 |
+
rate=sample_rate,
|
| 87 |
+
input=True,
|
| 88 |
+
frames_per_buffer=chunk,
|
| 89 |
+
)
|
| 90 |
+
|
| 91 |
+
logger.info("使用 PyAudio 开始音频采集...")
|
| 92 |
+
self.is_ready = True
|
| 93 |
+
|
| 94 |
+
try:
|
| 95 |
+
while not self.is_exited:
|
| 96 |
+
data = stream.read(chunk)
|
| 97 |
+
|
| 98 |
+
if self.is_paused:
|
| 99 |
+
time.sleep(0.01)
|
| 100 |
+
continue
|
| 101 |
+
|
| 102 |
+
# 将音频数据转换为 [-1.0, 1.0] 范围内的浮点数
|
| 103 |
+
audio_frame = np.frombuffer(data, dtype=np.int16).astype(np.float32) / np.iinfo(np.int16).max
|
| 104 |
+
self.audio_frames_queue.put(audio_frame)
|
| 105 |
+
|
| 106 |
+
except Exception as e:
|
| 107 |
+
logger.error(f'PyAudio 音频捕获器运行时发生错误: {e}')
|
| 108 |
+
finally:
|
| 109 |
+
logger.info("停止 PyAudio 音频采集...")
|
| 110 |
+
stream.stop_stream()
|
| 111 |
+
stream.close()
|
| 112 |
+
p.terminate()
|
| 113 |
+
|
| 114 |
+
def _run_with_aec(self):
|
| 115 |
+
"""
|
| 116 |
+
使用 macOS 原生库进行音频捕获。
|
| 117 |
+
|
| 118 |
+
此方法通过 ctypes 调用外部动态库,支持声学回声消除(AEC)和语音活动检测(VAD)。
|
| 119 |
+
"""
|
| 120 |
audio_recorder = ctypes.CDLL(LIBRARIES_PATH / 'libAudioCapture.dylib')
|
| 121 |
audio_recorder.getAudioData.argtypes = [ctypes.POINTER(ctypes.c_int), ctypes.POINTER(ctypes.c_bool)]
|
| 122 |
audio_recorder.getAudioData.restype = ctypes.POINTER(ctypes.c_ubyte)
|
| 123 |
audio_recorder.freeAudioData.argtypes = [ctypes.POINTER(ctypes.c_ubyte)]
|
|
|
|
| 124 |
|
| 125 |
+
audio_recorder.startRecord()
|
| 126 |
self.is_ready = True
|
| 127 |
|
| 128 |
try:
|
| 129 |
while not self.is_exited:
|
| 130 |
size = ctypes.c_int(0)
|
| 131 |
is_voice_active = ctypes.c_bool(False)
|
| 132 |
+
# 从原生库获取音频数据
|
| 133 |
data_ptr = audio_recorder.getAudioData(ctypes.byref(size), ctypes.byref(is_voice_active))
|
| 134 |
|
| 135 |
if data_ptr and size.value > 0:
|
| 136 |
audio_data = bytes(data_ptr[: size.value])
|
| 137 |
+
# 将音频数据转换为 [-1.0, 1.0] 范围内的浮点数
|
| 138 |
+
audio_frame = np.frombuffer(audio_data, dtype=np.int16).astype(np.float32) / np.iinfo(
|
| 139 |
+
np.int16).max
|
| 140 |
|
| 141 |
if not self.is_paused:
|
| 142 |
+
# 将音频帧和语音活动状态一同放入队列
|
| 143 |
self.audio_frames_queue.put((audio_frame, is_voice_active.value))
|
| 144 |
|
| 145 |
+
# 释放原生库分配的内存
|
| 146 |
audio_recorder.freeAudioData(data_ptr)
|
| 147 |
else:
|
| 148 |
+
# 无数据时短暂休眠,避免CPU空转
|
| 149 |
time.sleep(0.01)
|
| 150 |
except Exception as e:
|
| 151 |
logger.error(f'回声消除音频捕获器运行时发生错误: {e}')
|