Xin Zhang commited on
Commit
f5ce097
·
2 Parent(s): 2d7fc97 9bda9f9

Merge branch 'main' of hf.co:MoYoYoTech/VoiceDialogue

Browse files
.gitignore CHANGED
@@ -262,3 +262,4 @@ electron-app/dist
262
  tests/tts_test_output
263
  .DS_Store
264
  package-lock.json
 
 
262
  tests/tts_test_output
263
  .DS_Store
264
  package-lock.json
265
+ output
src/voice_dialogue/services/audio/player.py CHANGED
@@ -59,8 +59,7 @@ class AudioStreamPlayer(BaseThread, TaskStatusMixin, HistoryMixin, PerformanceLo
59
  )
60
  )
61
 
62
- if is_debug_mode():
63
- self.log_task_performance(voice_task, "音频播放")
64
 
65
  self.update_chat_history(voice_task)
66
 
 
59
  )
60
  )
61
 
62
+ self.log_task_performance(voice_task, "音频播放")
 
63
 
64
  self.update_chat_history(voice_task)
65
 
src/voice_dialogue/services/mixins.py CHANGED
@@ -2,7 +2,7 @@ from collections import OrderedDict
2
 
3
  from voice_dialogue.core.constants import (
4
  voice_state_manager, session_manager, dropped_audio_cache,
5
- user_still_speaking_event, chat_history_cache
6
  )
7
  from voice_dialogue.models.voice_task import VoiceTask
8
  from voice_dialogue.utils.logger import logger
@@ -62,6 +62,9 @@ class PerformanceLogMixin:
62
 
63
  def log_task_performance(self, voice_task: VoiceTask, task_name: str = "任务"):
64
  """记录ASR, LLM, TTS各阶段耗时和音频长度"""
 
 
 
65
  try:
66
  from voice_dialogue.services.utils import calculate_audio_duration
67
 
@@ -89,3 +92,16 @@ class PerformanceLogMixin:
89
  )
90
  except Exception as e:
91
  logger.error(f"记录任务性能信息时出错: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  from voice_dialogue.core.constants import (
4
  voice_state_manager, session_manager, dropped_audio_cache,
5
+ user_still_speaking_event, chat_history_cache, is_debug_mode
6
  )
7
  from voice_dialogue.models.voice_task import VoiceTask
8
  from voice_dialogue.utils.logger import logger
 
62
 
63
  def log_task_performance(self, voice_task: VoiceTask, task_name: str = "任务"):
64
  """记录ASR, LLM, TTS各阶段耗时和音频长度"""
65
+ if not is_debug_mode():
66
+ return
67
+
68
  try:
69
  from voice_dialogue.services.utils import calculate_audio_duration
70
 
 
92
  )
93
  except Exception as e:
94
  logger.error(f"记录任务性能信息时出错: {e}")
95
+
96
+ def log_task_user_question(self, voice_task: VoiceTask):
97
+ if not is_debug_mode():
98
+ return
99
+
100
+ from voice_dialogue.config.paths import PROJECT_ROOT
101
+ output_path = PROJECT_ROOT / "output"
102
+ if not output_path.exists():
103
+ output_path.mkdir(parents=True, exist_ok=True)
104
+
105
+ import soundfile as sf
106
+ output_filename = output_path / (voice_task.id + ".wav")
107
+ sf.write(output_filename.as_posix(), voice_task.user_voice, 16000, subtype="PCM_16")
src/voice_dialogue/services/speech/monitor.py CHANGED
@@ -30,7 +30,7 @@ class SpeechMonitorConfig:
30
  QUEUE_TIMEOUT = 0.1 # 队列获取超时时间(秒)
31
 
32
  # 时间阈值(毫秒)
33
- ACTIVE_FRAME_THRESHOLD = 0.32 * 1000 # 连续活跃帧数阈值
34
  USER_SILENCE_THRESHOLD = 1 * 1000 # 用户静音阈值
35
  SILENCE_THRESHOLD = 0.3 * 1000 # 静音检测阈值
36
  AUDIO_FRAMES_THRESHOLD = 5 * 1000 # 音频帧时长阈值
 
30
  QUEUE_TIMEOUT = 0.1 # 队列获取超时时间(秒)
31
 
32
  # 时间阈值(毫秒)
33
+ ACTIVE_FRAME_THRESHOLD = 0.1 * 1000 # 连续活跃帧数阈值
34
  USER_SILENCE_THRESHOLD = 1 * 1000 # 用户静音阈值
35
  SILENCE_THRESHOLD = 0.3 * 1000 # 静音检测阈值
36
  AUDIO_FRAMES_THRESHOLD = 5 * 1000 # 音频帧时长阈值
src/voice_dialogue/services/speech/recognizer.py CHANGED
@@ -7,11 +7,12 @@ import numpy as np
7
  from voice_dialogue.core.base import BaseThread
8
  from voice_dialogue.core.constants import user_still_speaking_event, voice_state_manager, dropped_audio_cache
9
  from voice_dialogue.models.voice_task import VoiceTask
 
10
  from voice_dialogue.utils.cache import LRUCacheDict
11
  from .recognizers import asr_manager
12
 
13
 
14
- class ASRWorker(BaseThread):
15
  def __init__(self, group=None, target=None, name=None, args=(), kwargs=None, *, daemon=None,
16
  user_voice_queue: Queue,
17
  transcribed_text_queue: Queue,
@@ -37,6 +38,7 @@ class ASRWorker(BaseThread):
37
  except Empty:
38
  continue
39
 
 
40
  voice_task.language = self.language
41
  voice_task.whisper_start_time = time.time()
42
 
@@ -45,6 +47,8 @@ class ASRWorker(BaseThread):
45
  if not transcribed_text.strip():
46
  voice_state_manager.reset_task_id()
47
  continue
 
 
48
 
49
  voice_task.whisper_end_time = time.time()
50
 
 
7
  from voice_dialogue.core.base import BaseThread
8
  from voice_dialogue.core.constants import user_still_speaking_event, voice_state_manager, dropped_audio_cache
9
  from voice_dialogue.models.voice_task import VoiceTask
10
+ from voice_dialogue.services.mixins import PerformanceLogMixin
11
  from voice_dialogue.utils.cache import LRUCacheDict
12
  from .recognizers import asr_manager
13
 
14
 
15
+ class ASRWorker(BaseThread, PerformanceLogMixin):
16
  def __init__(self, group=None, target=None, name=None, args=(), kwargs=None, *, daemon=None,
17
  user_voice_queue: Queue,
18
  transcribed_text_queue: Queue,
 
38
  except Empty:
39
  continue
40
 
41
+
42
  voice_task.language = self.language
43
  voice_task.whisper_start_time = time.time()
44
 
 
47
  if not transcribed_text.strip():
48
  voice_state_manager.reset_task_id()
49
  continue
50
+
51
+ self.log_task_user_question(voice_task)
52
 
53
  voice_task.whisper_end_time = time.time()
54