Xin Zhang commited on
Commit
4029b29
·
2 Parent(s): 797cd52 2c7e742

Merge branches 'main' and 'main' of hf.co:MoYoYoTech/VoiceDialogue

Browse files
src/voice_dialogue/services/audio/generator.py CHANGED
@@ -103,4 +103,4 @@ class TTSAudioGenerator(BaseThread, TaskStatusMixin):
103
  voice_task.tts_generated_sentence_audio = tts_generated_sentence_audio
104
  voice_task.tts_end_time = time.time()
105
 
106
- self.audio_output_queue.put(voice_task)
 
103
  voice_task.tts_generated_sentence_audio = tts_generated_sentence_audio
104
  voice_task.tts_end_time = time.time()
105
 
106
+ self.audio_output_queue.put(voice_task.model_copy())
src/voice_dialogue/services/speech/monitor.py CHANGED
@@ -291,7 +291,7 @@ class SpeechStateMonitor(BaseThread):
291
  # 8. 检查是否需要发送语音任务
292
  if self._should_send_voice_task(is_audio_sent_for_processing):
293
  voice_task = self._create_voice_task(audio_frames)
294
- self.user_voice_queue.put(voice_task)
295
 
296
  # 更新状态
297
  is_audio_sent_for_processing = True
 
291
  # 8. 检查是否需要发送语音任务
292
  if self._should_send_voice_task(is_audio_sent_for_processing):
293
  voice_task = self._create_voice_task(audio_frames)
294
+ self.user_voice_queue.put(voice_task.model_copy(deep=True))
295
 
296
  # 更新状态
297
  is_audio_sent_for_processing = True
src/voice_dialogue/services/speech/recognizer.py CHANGED
@@ -38,7 +38,6 @@ class ASRWorker(BaseThread, PerformanceLogMixin):
38
  except Empty:
39
  continue
40
 
41
-
42
  voice_task.language = self.language
43
  voice_task.whisper_start_time = time.time()
44
 
@@ -47,16 +46,16 @@ class ASRWorker(BaseThread, PerformanceLogMixin):
47
  if not transcribed_text.strip():
48
  voice_state_manager.reset_task_id()
49
  continue
50
-
51
  self.log_task_user_question(voice_task)
52
 
53
  voice_task.whisper_end_time = time.time()
54
 
55
  task_id = voice_task.id
56
- cached_user_question = self.cached_user_questions.get(task_id, [])
57
- cached_user_question.append(transcribed_text)
58
 
 
59
  if voice_task.is_over_audio_frames_threshold:
 
60
  self.cached_user_questions[task_id] = cached_user_question
61
 
62
  answer_id = voice_task.answer_id
@@ -72,4 +71,4 @@ class ASRWorker(BaseThread, PerformanceLogMixin):
72
  voice_task.transcribed_text = ' '.join(cached_user_question) if cached_user_question else transcribed_text
73
 
74
  voice_task.user_voice = []
75
- self.transcribed_text_queue.put(voice_task)
 
38
  except Empty:
39
  continue
40
 
 
41
  voice_task.language = self.language
42
  voice_task.whisper_start_time = time.time()
43
 
 
46
  if not transcribed_text.strip():
47
  voice_state_manager.reset_task_id()
48
  continue
49
+
50
  self.log_task_user_question(voice_task)
51
 
52
  voice_task.whisper_end_time = time.time()
53
 
54
  task_id = voice_task.id
 
 
55
 
56
+ cached_user_question = self.cached_user_questions.get(task_id, [])
57
  if voice_task.is_over_audio_frames_threshold:
58
+ cached_user_question.append(transcribed_text)
59
  self.cached_user_questions[task_id] = cached_user_question
60
 
61
  answer_id = voice_task.answer_id
 
71
  voice_task.transcribed_text = ' '.join(cached_user_question) if cached_user_question else transcribed_text
72
 
73
  voice_task.user_voice = []
74
+ self.transcribed_text_queue.put(voice_task.model_copy())
src/voice_dialogue/services/text/generator.py CHANGED
@@ -1,5 +1,6 @@
1
  import copy
2
  import time
 
3
  from queue import Queue, Empty
4
 
5
  from langchain.memory import ConversationBufferWindowMemory
@@ -99,12 +100,30 @@ class LLMResponseGenerator(BaseThread):
99
  """重置 chunks 列表"""
100
  return [remain_content] if remain_content else []
101
 
 
 
 
 
 
 
 
102
  def _process_chunk_content(self, chunk_content: str) -> tuple:
103
- """处理 chunk 内容,分离句子结束标记和剩余内容"""
104
- if len(chunk_content) > 1:
105
- return chunk_content[0], chunk_content[1:]
106
- else:
107
- return chunk_content, ''
 
 
 
 
 
 
 
 
 
 
 
108
 
109
  def _process_voice_task(self, voice_task: VoiceTask) -> None:
110
  """处理单个语音任务"""
@@ -140,8 +159,11 @@ class LLMResponseGenerator(BaseThread):
140
 
141
  chunk_content = f'{chunk.content}'
142
 
143
- sentence_end_mark, remain_content = self._process_chunk_content(chunk_content)
144
- chunks.append(sentence_end_mark)
 
 
 
145
 
146
  sentence = preprocess_sentence_text(chunks)
147
  if not sentence:
 
1
  import copy
2
  import time
3
+ import unicodedata
4
  from queue import Queue, Empty
5
 
6
  from langchain.memory import ConversationBufferWindowMemory
 
100
  """重置 chunks 列表"""
101
  return [remain_content] if remain_content else []
102
 
103
+ def _is_punctuation(self, char: str) -> bool:
104
+ """判断一个字符是否是标点符号"""
105
+ if not char or len(char) != 1:
106
+ return False
107
+ # 检查字符的 Unicode 类别是否为标点符号 (Punctuation)
108
+ return unicodedata.category(char).startswith('P')
109
+
110
  def _process_chunk_content(self, chunk_content: str) -> tuple:
111
+ """处理 chunk 内容,从右到左找标点符号并分割成三部分"""
112
+ if not chunk_content:
113
+ return '', '', ''
114
+
115
+ # 从右到左迭代,找到第一个标点符号
116
+ for i in range(len(chunk_content) - 1, -1, -1):
117
+ char = chunk_content[i]
118
+ if self._is_punctuation(char):
119
+ # 找到标点符号,分割成三部分
120
+ before_punct = chunk_content[:i] # 标点之前的部分
121
+ punct = char # 标点符号本身
122
+ after_punct = chunk_content[i + 1:] # 标点之后的部分
123
+ return before_punct, punct, after_punct
124
+
125
+ # 如果没有找到标点符号,返回整个内容作为第一部分
126
+ return chunk_content, '', ''
127
 
128
  def _process_voice_task(self, voice_task: VoiceTask) -> None:
129
  """处理单个语音任务"""
 
159
 
160
  chunk_content = f'{chunk.content}'
161
 
162
+ before_punct, sentence_end_mark, remain_content = self._process_chunk_content(chunk_content)
163
+ if before_punct:
164
+ chunks.append(before_punct)
165
+ if sentence_end_mark:
166
+ chunks.append(sentence_end_mark)
167
 
168
  sentence = preprocess_sentence_text(chunks)
169
  if not sentence:
src/voice_dialogue/services/text/processor.py CHANGED
@@ -9,9 +9,6 @@ from langchain_core.prompts import (
9
  from langchain_core.runnables import RunnableWithMessageHistory
10
 
11
  from voice_dialogue.utils.logger import logger
12
- from voice_dialogue.utils.strings import (
13
- remove_emojis, convert_comma_separated_numbers, convert_uppercase_words_to_lowercase
14
- )
15
 
16
 
17
  def create_langchain_chat_llamacpp_instance(
@@ -54,9 +51,6 @@ def warmup_langchain_pipeline(pipeline):
54
 
55
  def preprocess_sentence_text(sentences):
56
  sentence_text = ''.join(sentences)
57
- sentence_text = remove_emojis(sentence_text)
58
- sentence_text = convert_comma_separated_numbers(sentence_text)
59
- sentence_text = convert_uppercase_words_to_lowercase(sentence_text)
60
  if sentence_text:
61
  sentence_mark = sentence_text[-1]
62
  sentence_content = sentence_text[:-1].replace('!', ',').replace('?', ',').replace('.', ',')
 
9
  from langchain_core.runnables import RunnableWithMessageHistory
10
 
11
  from voice_dialogue.utils.logger import logger
 
 
 
12
 
13
 
14
  def create_langchain_chat_llamacpp_instance(
 
51
 
52
  def preprocess_sentence_text(sentences):
53
  sentence_text = ''.join(sentences)
 
 
 
54
  if sentence_text:
55
  sentence_mark = sentence_text[-1]
56
  sentence_content = sentence_text[:-1].replace('!', ',').replace('?', ',').replace('.', ',')