|
|
import json |
|
|
import threading |
|
|
import time |
|
|
import os |
|
|
import sys |
|
|
from pathlib import Path |
|
|
from vosk import Model, KaldiRecognizer, SetLogLevel |
|
|
from pypinyin import lazy_pinyin |
|
|
import pyaudio |
|
|
|
|
|
from src.constants.constants import AudioConfig |
|
|
from src.utils.config_manager import ConfigManager |
|
|
from src.utils.logging_config import get_logger |
|
|
|
|
|
logger = get_logger(__name__) |
|
|
|
|
|
class WakeWordDetector: |
|
|
"""唤醒词检测类(集成AudioCodec优化版)""" |
|
|
|
|
|
def __init__(self, |
|
|
sample_rate=AudioConfig.INPUT_SAMPLE_RATE, |
|
|
buffer_size=AudioConfig.INPUT_FRAME_SIZE, |
|
|
audio_codec=None): |
|
|
""" |
|
|
初始化唤醒词检测器 |
|
|
|
|
|
参数: |
|
|
audio_codec: AudioCodec实例(新增) |
|
|
sample_rate: 音频采样率 |
|
|
buffer_size: 音频缓冲区大小 |
|
|
""" |
|
|
|
|
|
self.audio_codec = audio_codec |
|
|
|
|
|
|
|
|
self.on_detected_callbacks = [] |
|
|
self.running = False |
|
|
self.detection_thread = None |
|
|
self.paused = False |
|
|
self.audio = None |
|
|
self.stream = None |
|
|
self.external_stream = False |
|
|
self.stream_lock = threading.Lock() |
|
|
self.on_error = None |
|
|
|
|
|
|
|
|
config = ConfigManager.get_instance() |
|
|
if not config.get_config('WAKE_WORD_OPTIONS.USE_WAKE_WORD', False): |
|
|
logger.info("唤醒词功能已禁用") |
|
|
self.enabled = False |
|
|
return |
|
|
|
|
|
|
|
|
self.enabled = True |
|
|
self.sample_rate = sample_rate |
|
|
self.buffer_size = buffer_size |
|
|
self.sensitivity = config.get_config("WAKE_WORD_OPTIONS.SENSITIVITY", 0.5) |
|
|
|
|
|
|
|
|
self.wake_words = config.get_config('WAKE_WORD_OPTIONS.WAKE_WORDS', [ |
|
|
"你好小明", "你好小智", "你好小天", "小爱同学", "贾维斯" |
|
|
]) |
|
|
self.wake_words_pinyin = [''.join(lazy_pinyin(word)) for word in self.wake_words] |
|
|
|
|
|
|
|
|
try: |
|
|
model_path = self._get_model_path(config) |
|
|
if not os.path.exists(model_path): |
|
|
raise FileNotFoundError(f"模型路径不存在: {model_path}") |
|
|
|
|
|
logger.info(f"加载语音识别模型: {model_path}") |
|
|
SetLogLevel(-1) |
|
|
self.model = Model(model_path=model_path) |
|
|
self.recognizer = KaldiRecognizer(self.model, self.sample_rate) |
|
|
self.recognizer.SetWords(True) |
|
|
logger.info("模型加载完成") |
|
|
|
|
|
|
|
|
logger.info(f"已配置 {len(self.wake_words)} 个唤醒词") |
|
|
for idx, (word, pinyin) in enumerate(zip(self.wake_words, self.wake_words_pinyin)): |
|
|
logger.debug(f"唤醒词 {idx+1}: {word.ljust(8)} => {pinyin}") |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"初始化失败: {e}", exc_info=True) |
|
|
self.enabled = False |
|
|
|
|
|
def _get_model_path(self, config): |
|
|
"""获取模型路径(更智能的路径查找)""" |
|
|
|
|
|
model_name = config.get_config( |
|
|
'WAKE_WORD_OPTIONS.MODEL_PATH', |
|
|
'vosk-model-small-cn-0.22' |
|
|
) |
|
|
|
|
|
|
|
|
model_path = Path(model_name) |
|
|
|
|
|
|
|
|
if len(model_path.parts) == 1: |
|
|
model_path = Path('models') / model_path |
|
|
|
|
|
|
|
|
possible_base_dirs = [ |
|
|
Path(__file__).parent.parent.parent, |
|
|
Path.cwd(), |
|
|
] |
|
|
|
|
|
|
|
|
if getattr(sys, 'frozen', False): |
|
|
|
|
|
exe_dir = Path(sys.executable).parent |
|
|
possible_base_dirs.append(exe_dir) |
|
|
|
|
|
|
|
|
if hasattr(sys, '_MEIPASS'): |
|
|
meipass_dir = Path(sys._MEIPASS) |
|
|
possible_base_dirs.append(meipass_dir) |
|
|
|
|
|
possible_base_dirs.append(meipass_dir.parent) |
|
|
|
|
|
|
|
|
possible_base_dirs.append(exe_dir.parent) |
|
|
|
|
|
logger.debug(f"可执行文件目录: {exe_dir}") |
|
|
if hasattr(sys, '_MEIPASS'): |
|
|
logger.debug(f"PyInstaller临时目录: {meipass_dir}") |
|
|
|
|
|
|
|
|
model_file_path = None |
|
|
|
|
|
|
|
|
for base_dir in filter(None, possible_base_dirs): |
|
|
|
|
|
path_to_check = base_dir / model_path |
|
|
if path_to_check.exists(): |
|
|
model_file_path = path_to_check |
|
|
logger.info(f"找到模型文件: {model_file_path}") |
|
|
break |
|
|
|
|
|
|
|
|
if len(model_path.parts) > 1 and model_path.parts[0] == 'models': |
|
|
|
|
|
alt_path = base_dir / Path(*model_path.parts[1:]) |
|
|
if alt_path.exists(): |
|
|
model_file_path = alt_path |
|
|
logger.info(f"在替代位置找到模型: {model_file_path}") |
|
|
break |
|
|
|
|
|
|
|
|
if model_file_path is None and getattr(sys, 'frozen', False): |
|
|
|
|
|
special_paths = [ |
|
|
|
|
|
Path(sys.executable).parent / "_internal" / model_path, |
|
|
|
|
|
Path(sys.executable).parent / "models" / model_path.name, |
|
|
|
|
|
Path(sys.executable).parent / model_path.name |
|
|
] |
|
|
|
|
|
for path in special_paths: |
|
|
if path.exists(): |
|
|
model_file_path = path |
|
|
logger.info(f"在特殊位置找到模型: {model_file_path}") |
|
|
break |
|
|
|
|
|
|
|
|
if model_file_path is None: |
|
|
|
|
|
if model_path.is_absolute(): |
|
|
model_file_path = model_path |
|
|
else: |
|
|
|
|
|
model_file_path = Path(__file__).parent.parent.parent / model_path |
|
|
|
|
|
logger.warning(f"未找到模型,将使用默认路径: {model_file_path}") |
|
|
|
|
|
|
|
|
model_path_str = str(model_file_path) |
|
|
logger.debug(f"最终模型路径: {model_path_str}") |
|
|
return model_path_str |
|
|
|
|
|
def start(self, audio_codec_or_stream=None): |
|
|
"""启动检测(支持音频编解码器或直接流传入)""" |
|
|
if not self.enabled: |
|
|
logger.warning("唤醒词功能未启用") |
|
|
return False |
|
|
|
|
|
|
|
|
if audio_codec_or_stream: |
|
|
|
|
|
if hasattr(audio_codec_or_stream, 'read') and hasattr(audio_codec_or_stream, 'is_active'): |
|
|
|
|
|
self.stream = audio_codec_or_stream |
|
|
self.external_stream = True |
|
|
return self._start_with_external_stream() |
|
|
else: |
|
|
|
|
|
self.audio_codec = audio_codec_or_stream |
|
|
|
|
|
|
|
|
if self.audio_codec: |
|
|
return self._start_with_audio_codec() |
|
|
else: |
|
|
return self._start_standalone() |
|
|
|
|
|
def _start_with_audio_codec(self): |
|
|
"""使用AudioCodec的输入流(直接访问)""" |
|
|
try: |
|
|
|
|
|
if not self.audio_codec or not self.audio_codec.input_stream: |
|
|
logger.error("音频编解码器无效或输入流不可用") |
|
|
return False |
|
|
|
|
|
|
|
|
self.stream = self.audio_codec.input_stream |
|
|
self.external_stream = True |
|
|
|
|
|
|
|
|
self.sample_rate = AudioConfig.INPUT_SAMPLE_RATE |
|
|
self.buffer_size = AudioConfig.INPUT_FRAME_SIZE |
|
|
|
|
|
|
|
|
self.running = True |
|
|
self.paused = False |
|
|
self.detection_thread = threading.Thread( |
|
|
target=self._audio_codec_detection_loop, |
|
|
daemon=True, |
|
|
name="WakeWordDetector-AudioCodec" |
|
|
) |
|
|
self.detection_thread.start() |
|
|
|
|
|
logger.info("唤醒词检测已启动(直接使用AudioCodec输入流)") |
|
|
return True |
|
|
except Exception as e: |
|
|
logger.error(f"通过AudioCodec启动失败: {e}") |
|
|
return False |
|
|
|
|
|
def _start_standalone(self): |
|
|
"""独立音频模式""" |
|
|
try: |
|
|
self.audio = pyaudio.PyAudio() |
|
|
self.stream = self.audio.open( |
|
|
format=pyaudio.paInt16, |
|
|
channels=AudioConfig.CHANNELS, |
|
|
rate=self.sample_rate, |
|
|
input=True, |
|
|
frames_per_buffer=self.buffer_size |
|
|
) |
|
|
|
|
|
self.running = True |
|
|
self.paused = False |
|
|
self.detection_thread = threading.Thread( |
|
|
target=self._detection_loop, |
|
|
daemon=True, |
|
|
name="WakeWordDetector-Standalone" |
|
|
) |
|
|
self.detection_thread.start() |
|
|
|
|
|
logger.info("唤醒词检测已启动(独立音频模式)") |
|
|
return True |
|
|
except Exception as e: |
|
|
logger.error(f"独立模式启动失败: {e}") |
|
|
return False |
|
|
|
|
|
def _start_with_external_stream(self): |
|
|
"""使用外部提供的音频流""" |
|
|
try: |
|
|
|
|
|
self.sample_rate = AudioConfig.INPUT_SAMPLE_RATE |
|
|
self.buffer_size = AudioConfig.INPUT_FRAME_SIZE |
|
|
|
|
|
|
|
|
self.running = True |
|
|
self.paused = False |
|
|
self.detection_thread = threading.Thread( |
|
|
target=self._detection_loop, |
|
|
daemon=True, |
|
|
name="WakeWordDetector-ExternalStream" |
|
|
) |
|
|
self.detection_thread.start() |
|
|
|
|
|
logger.info("唤醒词检测已启动(使用外部音频流)") |
|
|
return True |
|
|
except Exception as e: |
|
|
logger.error(f"使用外部流启动失败: {e}") |
|
|
return False |
|
|
|
|
|
def _audio_codec_detection_loop(self): |
|
|
"""AudioCodec专用检测循环(优化直接访问)""" |
|
|
logger.info("进入AudioCodec检测循环") |
|
|
error_count = 0 |
|
|
MAX_ERRORS = 5 |
|
|
STREAM_TIMEOUT = 3.0 |
|
|
|
|
|
while self.running: |
|
|
try: |
|
|
if self.paused: |
|
|
time.sleep(0.1) |
|
|
continue |
|
|
|
|
|
|
|
|
if not self.audio_codec or not hasattr(self.audio_codec, 'input_stream'): |
|
|
logger.warning("AudioCodec不可用,等待中...") |
|
|
time.sleep(STREAM_TIMEOUT) |
|
|
continue |
|
|
|
|
|
|
|
|
stream = self.audio_codec.input_stream |
|
|
if not stream or not stream.is_active(): |
|
|
logger.debug("AudioCodec输入流不活跃,等待恢复...") |
|
|
try: |
|
|
|
|
|
if stream and hasattr(stream, 'start_stream'): |
|
|
stream.start_stream() |
|
|
else: |
|
|
time.sleep(0.5) |
|
|
continue |
|
|
except Exception as e: |
|
|
logger.warning(f"激活流失败: {e}") |
|
|
time.sleep(0.5) |
|
|
continue |
|
|
|
|
|
|
|
|
data = self._read_audio_data_direct(stream) |
|
|
if not data: |
|
|
continue |
|
|
|
|
|
|
|
|
self._process_audio_data(data) |
|
|
error_count = 0 |
|
|
|
|
|
except Exception as e: |
|
|
error_count += 1 |
|
|
logger.error(f"检测循环错误({error_count}/{MAX_ERRORS}): {str(e)}") |
|
|
|
|
|
if error_count >= MAX_ERRORS: |
|
|
logger.critical("达到最大错误次数,停止检测") |
|
|
self.stop() |
|
|
time.sleep(0.5) |
|
|
|
|
|
def _read_audio_data_direct(self, stream): |
|
|
"""直接从流读取数据(简化版)""" |
|
|
try: |
|
|
with self.stream_lock: |
|
|
|
|
|
if hasattr(stream, 'get_read_available'): |
|
|
available = stream.get_read_available() |
|
|
if available < self.buffer_size: |
|
|
return None |
|
|
|
|
|
|
|
|
return stream.read(self.buffer_size, exception_on_overflow=False) |
|
|
except OSError as e: |
|
|
error_msg = str(e) |
|
|
logger.warning(f"音频流错误: {error_msg}") |
|
|
|
|
|
|
|
|
critical_errors = ["Input overflowed", "Device unavailable"] |
|
|
if any(msg in error_msg for msg in critical_errors) and self.audio_codec: |
|
|
logger.info("触发音频流重置...") |
|
|
try: |
|
|
|
|
|
self.audio_codec._reinitialize_input_stream() |
|
|
except Exception as re: |
|
|
logger.error(f"流重置失败: {re}") |
|
|
|
|
|
time.sleep(0.5) |
|
|
return None |
|
|
except Exception as e: |
|
|
logger.error(f"读取音频数据异常: {e}") |
|
|
return None |
|
|
|
|
|
def _detection_loop(self): |
|
|
"""标准检测循环(用于外部流或独立模式)""" |
|
|
logger.info("进入标准检测循环") |
|
|
error_count = 0 |
|
|
MAX_ERRORS = 5 |
|
|
|
|
|
while self.running: |
|
|
try: |
|
|
if self.paused: |
|
|
time.sleep(0.1) |
|
|
continue |
|
|
|
|
|
|
|
|
try: |
|
|
with self.stream_lock: |
|
|
if not self.stream: |
|
|
logger.warning("音频流不可用") |
|
|
time.sleep(0.5) |
|
|
continue |
|
|
|
|
|
|
|
|
if not self.stream.is_active(): |
|
|
try: |
|
|
self.stream.start_stream() |
|
|
except Exception as e: |
|
|
logger.error(f"启动音频流失败: {e}") |
|
|
time.sleep(0.5) |
|
|
continue |
|
|
|
|
|
|
|
|
data = self.stream.read( |
|
|
self.buffer_size, |
|
|
exception_on_overflow=False |
|
|
) |
|
|
except Exception as e: |
|
|
logger.error(f"读取音频数据失败: {e}") |
|
|
time.sleep(0.5) |
|
|
continue |
|
|
|
|
|
|
|
|
if data and len(data) > 0: |
|
|
self._process_audio_data(data) |
|
|
error_count = 0 |
|
|
|
|
|
except Exception as e: |
|
|
error_count += 1 |
|
|
logger.error(f"检测循环错误({error_count}/{MAX_ERRORS}): {e}") |
|
|
|
|
|
if error_count >= MAX_ERRORS: |
|
|
logger.critical("达到最大错误次数,停止检测") |
|
|
self.stop() |
|
|
time.sleep(0.5) |
|
|
|
|
|
def stop(self): |
|
|
"""停止检测(优化资源释放)""" |
|
|
if self.running: |
|
|
logger.info("正在停止唤醒词检测...") |
|
|
self.running = False |
|
|
|
|
|
if self.detection_thread and self.detection_thread.is_alive(): |
|
|
self.detection_thread.join(timeout=1.0) |
|
|
|
|
|
|
|
|
if not self.external_stream and not self.audio_codec and self.stream: |
|
|
try: |
|
|
if self.stream.is_active(): |
|
|
self.stream.stop_stream() |
|
|
self.stream.close() |
|
|
except Exception as e: |
|
|
logger.error(f"关闭音频流失败: {e}") |
|
|
|
|
|
|
|
|
if self.audio: |
|
|
try: |
|
|
self.audio.terminate() |
|
|
except Exception as e: |
|
|
logger.error(f"终止音频设备失败: {e}") |
|
|
|
|
|
|
|
|
self.stream = None |
|
|
self.audio = None |
|
|
self.external_stream = False |
|
|
logger.info("唤醒词检测已停止") |
|
|
|
|
|
def is_running(self): |
|
|
"""检查唤醒词检测是否正在运行""" |
|
|
return self.running and not self.paused |
|
|
|
|
|
def update_stream(self, new_stream): |
|
|
"""更新唤醒词检测器使用的音频流""" |
|
|
if not self.running: |
|
|
logger.warning("唤醒词检测器未运行,无法更新流") |
|
|
return False |
|
|
|
|
|
with self.stream_lock: |
|
|
|
|
|
if not self.external_stream and not self.audio_codec and self.stream: |
|
|
try: |
|
|
if self.stream.is_active(): |
|
|
self.stream.stop_stream() |
|
|
self.stream.close() |
|
|
except Exception as e: |
|
|
logger.warning(f"关闭旧流时出错: {e}") |
|
|
|
|
|
|
|
|
self.stream = new_stream |
|
|
self.external_stream = True |
|
|
logger.info("已更新唤醒词检测器的音频流") |
|
|
return True |
|
|
|
|
|
def _process_audio_data(self, data): |
|
|
"""处理音频数据(优化日志)""" |
|
|
if self.recognizer.AcceptWaveform(data): |
|
|
result = json.loads(self.recognizer.Result()) |
|
|
if text := result.get('text', ''): |
|
|
logger.debug(f"完整识别: {text}") |
|
|
self._check_wake_word(text) |
|
|
|
|
|
partial = json.loads(self.recognizer.PartialResult()).get('partial', '') |
|
|
if partial: |
|
|
logger.debug(f"部分识别: {partial}") |
|
|
self._check_wake_word(partial, is_partial=True) |
|
|
|
|
|
def _check_wake_word(self, text, is_partial=False): |
|
|
"""唤醒词检查(优化拼音匹配)""" |
|
|
text_pinyin = ''.join(lazy_pinyin(text)).replace(' ', '') |
|
|
for word, pinyin in zip(self.wake_words, self.wake_words_pinyin): |
|
|
if pinyin in text_pinyin: |
|
|
logger.info(f"检测到唤醒词 '{word}' (匹配拼音: {pinyin})") |
|
|
self._trigger_callbacks(word, text) |
|
|
self.recognizer.Reset() |
|
|
return |
|
|
|
|
|
def pause(self): |
|
|
"""暂停检测""" |
|
|
if self.running and not self.paused: |
|
|
self.paused = True |
|
|
logger.info("检测已暂停") |
|
|
|
|
|
def resume(self): |
|
|
"""恢复检测""" |
|
|
if self.running and self.paused: |
|
|
self.paused = False |
|
|
logger.info("检测已恢复") |
|
|
|
|
|
def on_detected(self, callback): |
|
|
"""注册回调""" |
|
|
self.on_detected_callbacks.append(callback) |
|
|
|
|
|
def _trigger_callbacks(self, wake_word, text): |
|
|
"""触发回调(带异常处理)""" |
|
|
for cb in self.on_detected_callbacks: |
|
|
try: |
|
|
cb(wake_word, text) |
|
|
except Exception as e: |
|
|
logger.error(f"回调执行失败: {e}", exc_info=True) |
|
|
|
|
|
def __del__(self): |
|
|
self.stop() |