liumaolin commited on
Commit
2988b10
·
1 Parent(s): ecc005d

Add multilingual support and optimize LLM pipeline configuration.

Browse files
models/llm/Qwen2.5-14B-Instruct.Q4_0.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c9d82509108e9f758ff9ad1034d2c23088c611c76eaec308ad246caf8decac5
3
+ size 8517725952
src/VoiceDialogue/main.py CHANGED
@@ -6,7 +6,6 @@ from config.paths import load_third_party
6
 
7
  load_third_party()
8
 
9
- from models.language_model import language_model_registry
10
  from models.voice_model import voice_model_registry
11
  from services.audio.aec_audio_capture import EchoCancellingAudioCapture
12
  from services.audio.audio_answer import TTSAudioGenerator
@@ -15,16 +14,13 @@ from services.speech.speech_monitor import SpeechStateMonitor
15
  from services.speech.asr_service import ASRWorker
16
  from services.text.text_generator import LLMResponseGenerator
17
 
18
-
19
  HERE = Path(__file__).parent
20
  language: typing.Literal['zh', 'en'] = 'en'
21
 
22
 
23
  def launch_system(
24
  user_language: str,
25
- system_prompt: str,
26
- tts_speaker: str,
27
- llm: typing.Literal['7B', '14B'] = '14B'
28
  ):
29
  audio_frames_queue = Queue()
30
  user_voice_queue = Queue()
@@ -53,31 +49,9 @@ def launch_system(
53
  whisper_worker.start()
54
  threads.append(whisper_worker)
55
 
56
- if llm == '8B':
57
- selected_llm_model = language_model_registry[-1]
58
- elif llm == '7B':
59
- selected_llm_model = language_model_registry[-3]
60
- else:
61
- selected_llm_model = language_model_registry[-2]
62
-
63
- selected_llm_model.download_model()
64
- default_llm_params = {
65
- 'streaming': True,
66
- 'n_gpu_layers': -1,
67
- 'n_batch': 512,
68
- 'n_ctx': 2048,
69
- 'f16_kv': True,
70
- 'temperature': 0.8,
71
- # 'n_predict': -1,
72
- 'top_k': 50,
73
- 'top_p': 1.0,
74
- }
75
  answer_generator_worker = LLMResponseGenerator(
76
  user_question_queue=transcribed_text_queue,
77
- generated_answer_queue=generated_answer_queue,
78
- local_model_path=selected_llm_model.pretrained_model_path,
79
- model_params=default_llm_params,
80
- prompt_template=system_prompt
81
  )
82
  answer_generator_worker.start()
83
  threads.append(answer_generator_worker)
@@ -114,16 +88,10 @@ def launch_system(
114
  def main():
115
  user_language: typing.Literal['zh', 'en'] = 'zh'
116
 
117
- SYSTEM_PROMPT = ("你是善于模拟真实的思考过程的AI助手。"
118
- "回答时,必须首先生成一个不超过5个字的简短句子,"
119
- "比如:\"让我想一下\"、\"在我看来\"、\"稍等我理一理\"、\"不错的问题\"、\"稍等片刻\"等,然后再进行正式回答,"
120
- "保持中文口语化表达,禁用emoji和系统相关描述,确保衔接词与内容存在合理逻辑关联。")
121
  # '罗翔', '马保国', '沈逸', '杨幂', '周杰伦', '马云'
122
  tts_speaker = '沈逸'
123
- # QWen2.5 7B or 14B
124
- llm = '14B'
125
 
126
- launch_system(user_language, SYSTEM_PROMPT, tts_speaker, llm=llm)
127
 
128
 
129
  if __name__ == '__main__':
 
6
 
7
  load_third_party()
8
 
 
9
  from models.voice_model import voice_model_registry
10
  from services.audio.aec_audio_capture import EchoCancellingAudioCapture
11
  from services.audio.audio_answer import TTSAudioGenerator
 
14
  from services.speech.asr_service import ASRWorker
15
  from services.text.text_generator import LLMResponseGenerator
16
 
 
17
  HERE = Path(__file__).parent
18
  language: typing.Literal['zh', 'en'] = 'en'
19
 
20
 
21
  def launch_system(
22
  user_language: str,
23
+ tts_speaker: str
 
 
24
  ):
25
  audio_frames_queue = Queue()
26
  user_voice_queue = Queue()
 
49
  whisper_worker.start()
50
  threads.append(whisper_worker)
51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  answer_generator_worker = LLMResponseGenerator(
53
  user_question_queue=transcribed_text_queue,
54
+ generated_answer_queue=generated_answer_queue
 
 
 
55
  )
56
  answer_generator_worker.start()
57
  threads.append(answer_generator_worker)
 
88
  def main():
89
  user_language: typing.Literal['zh', 'en'] = 'zh'
90
 
 
 
 
 
91
  # '罗翔', '马保国', '沈逸', '杨幂', '周杰伦', '马云'
92
  tts_speaker = '沈逸'
 
 
93
 
94
+ launch_system(user_language, tts_speaker)
95
 
96
 
97
  if __name__ == '__main__':
src/VoiceDialogue/models/voice_task.py CHANGED
@@ -8,6 +8,7 @@ class VoiceTask(BaseModel):
8
  id: str
9
 
10
  session_id: str = Field(default="")
 
11
  is_speaking_over_threshold: bool = Field(default=False)
12
  is_over_audio_frames_threshold: bool = Field(default=False)
13
  user_voice: np.array = Field(default=np.array([]))
 
8
  id: str
9
 
10
  session_id: str = Field(default="")
11
+ language: str = Field(default="zh")
12
  is_speaking_over_threshold: bool = Field(default=False)
13
  is_over_audio_frames_threshold: bool = Field(default=False)
14
  user_voice: np.array = Field(default=np.array([]))
src/VoiceDialogue/services/speech/asr_service.py CHANGED
@@ -180,6 +180,7 @@ class ASRWorker(BaseThread):
180
 
181
  while not self.stopped():
182
  voice_task: VoiceTask = self.user_voice_queue.get()
 
183
  voice_task.whisper_start_time = time.time()
184
  user_voice: np.array = voice_task.user_voice
185
  transcribed_text = self.client.transcribe(user_voice)
 
180
 
181
  while not self.stopped():
182
  voice_task: VoiceTask = self.user_voice_queue.get()
183
+ voice_task.language = self.language
184
  voice_task.whisper_start_time = time.time()
185
  user_voice: np.array = voice_task.user_voice
186
  transcribed_text = self.client.transcribe(user_voice)
src/VoiceDialogue/services/text/{llm.py → langchain_llm.py} RENAMED
@@ -1,13 +1,8 @@
1
- import hashlib
2
- import os
3
  import pathlib
4
- import threading
5
  import typing
6
- from collections import OrderedDict
7
 
8
  from langchain_community.chat_models.llamacpp import ChatLlamaCpp
9
  from langchain_core.callbacks import StreamingStdOutCallbackHandler, CallbackManager
10
- from langchain_core.language_models.llms import LLM
11
  from langchain_core.messages import SystemMessage
12
  from langchain_core.prompts import (
13
  ChatPromptTemplate, MessagesPlaceholder, HumanMessagePromptTemplate
@@ -16,74 +11,8 @@ from langchain_core.runnables import RunnableWithMessageHistory
16
 
17
  from utils.strings import remove_emojis, convert_comma_separated_numbers, convert_uppercase_words_to_lowercase
18
 
19
- default_llm_params = OrderedDict({
20
- 'streaming': True,
21
- 'n_gpu_layers': -1,
22
- 'n_batch': 512,
23
- 'n_ctx': 2048,
24
- 'f16_kv': True,
25
- 'temperature': 0.7,
26
- 'n_predict': -1,
27
- 'top_k': 50,
28
- 'top_p': 1.0,
29
- })
30
 
31
- singleton_chat_langchain_instance: typing.Optional[LLM] = None
32
- singleton_chat_langchain_instance_uid: str = ''
33
- single_chat_instance_locker = threading.Lock()
34
-
35
-
36
- def setup_chat_langchain_pipeline(
37
- local_model_path: str,
38
- model_params: dict | None = None,
39
- prompt_template: str = '',
40
- get_session_history: typing.Callable = None
41
- ):
42
- model_path = pathlib.Path(local_model_path)
43
- if not model_path.exists():
44
- raise RuntimeError(f'Model path not exists: {model_path}')
45
-
46
- if get_session_history is None:
47
- raise RuntimeError(f'Function<get_session_history> can\'t be None.')
48
-
49
- if not isinstance(model_params, dict):
50
- model_params = default_llm_params
51
-
52
- current_model_uid = generate_unique_id(model_path, model_params)
53
-
54
- with single_chat_instance_locker:
55
- global singleton_chat_langchain_instance_uid, singleton_chat_langchain_instance
56
- if current_model_uid == singleton_chat_langchain_instance_uid:
57
- instance = singleton_chat_langchain_instance
58
- langchain_pipeline_is_warmup = True
59
- else:
60
- singleton_chat_langchain_instance_uid = current_model_uid
61
- instance = setup_chat_llamacpp_langchain_instance(local_model_path, model_params)
62
- singleton_chat_langchain_instance = instance
63
- langchain_pipeline_is_warmup = False
64
-
65
- pipeline = build_chat_langchain_pipeline(instance, prompt_template, get_session_history)
66
-
67
- if not langchain_pipeline_is_warmup:
68
- warmup_chat_langchain_pipeline(pipeline)
69
-
70
- return pipeline
71
-
72
-
73
- def generate_unique_id(
74
- model_path: str | os.PathLike,
75
- model_params: dict,
76
- multimodal_path: str | os.PathLike = ''
77
- ):
78
- model_uid_params = [f'llm_path={model_path}']
79
- if multimodal_path:
80
- model_uid_params.append(f'multimodal={multimodal_path}')
81
- model_uid_params.extend(f'{k}:{v}' for k, v in model_params.items())
82
- current_model_uid = hashlib.md5('&'.join(model_uid_params).encode()).hexdigest()
83
- return current_model_uid
84
-
85
-
86
- def setup_chat_llamacpp_langchain_instance(
87
  local_model_path: str,
88
  model_params: dict | None = None
89
  ) -> ChatLlamaCpp:
@@ -109,7 +38,7 @@ def setup_chat_llamacpp_langchain_instance(
109
  return llamacpp_langchain_instance
110
 
111
 
112
- def build_chat_langchain_pipeline(langchain_instance: LLM, system_prompt: str, get_session_history: typing.Callable):
113
  prompt = ChatPromptTemplate(messages=[
114
  SystemMessage(content=system_prompt),
115
  MessagesPlaceholder(variable_name="history"),
@@ -123,7 +52,7 @@ def build_chat_langchain_pipeline(langchain_instance: LLM, system_prompt: str, g
123
  return chain_with_history
124
 
125
 
126
- def warmup_chat_langchain_pipeline(pipeline):
127
  print("Warmup chat pipeline...")
128
 
129
  user_input = 'Hello, this is warming up step, if you understand, output "Ok".'
 
 
 
1
  import pathlib
 
2
  import typing
 
3
 
4
  from langchain_community.chat_models.llamacpp import ChatLlamaCpp
5
  from langchain_core.callbacks import StreamingStdOutCallbackHandler, CallbackManager
 
6
  from langchain_core.messages import SystemMessage
7
  from langchain_core.prompts import (
8
  ChatPromptTemplate, MessagesPlaceholder, HumanMessagePromptTemplate
 
11
 
12
  from utils.strings import remove_emojis, convert_comma_separated_numbers, convert_uppercase_words_to_lowercase
13
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
+ def create_langchain_chat_llamacpp_instance(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  local_model_path: str,
17
  model_params: dict | None = None
18
  ) -> ChatLlamaCpp:
 
38
  return llamacpp_langchain_instance
39
 
40
 
41
+ def create_langchain_pipeline(langchain_instance, system_prompt: str, get_session_history: typing.Callable):
42
  prompt = ChatPromptTemplate(messages=[
43
  SystemMessage(content=system_prompt),
44
  MessagesPlaceholder(variable_name="history"),
 
52
  return chain_with_history
53
 
54
 
55
+ def warmup_langchain_pipeline(pipeline):
56
  print("Warmup chat pipeline...")
57
 
58
  user_input = 'Hello, this is warming up step, if you understand, output "Ok".'
src/VoiceDialogue/services/text/text_generator.py CHANGED
@@ -5,10 +5,22 @@ from queue import Queue, Empty
5
  from langchain.memory import ConversationBufferWindowMemory
6
  from langchain_core.chat_history import InMemoryChatMessageHistory
7
 
 
8
  from models.voice_task import VoiceTask
9
  from services.core.base import BaseThread
10
  from services.core.constants import chat_history_cache
11
- from services.text.llm import setup_chat_langchain_pipeline, preprocess_sentence_text
 
 
 
 
 
 
 
 
 
 
 
12
 
13
 
14
  class LLMResponseGenerator(BaseThread):
@@ -16,18 +28,19 @@ class LLMResponseGenerator(BaseThread):
16
 
17
  def __init__(self, group=None, target=None, name=None, args=(), kwargs={}, *, daemon=None,
18
  user_question_queue: Queue,
19
- generated_answer_queue: Queue,
20
- local_model_path: str,
21
- model_params: dict | None = None,
22
- prompt_template: str = ''):
23
  super().__init__(group, target, name, args, kwargs, daemon=daemon)
24
 
25
  self.user_question_queue = user_question_queue
26
  self.generated_answer_queue = generated_answer_queue
27
 
28
- self.langchain_pipeline = setup_chat_langchain_pipeline(
29
- local_model_path, model_params, prompt_template, self.get_session_history
30
- )
 
 
 
31
 
32
  def get_session_history(self, session_id: str) -> InMemoryChatMessageHistory:
33
  message_history = InMemoryChatMessageHistory()
@@ -104,10 +117,13 @@ class LLMResponseGenerator(BaseThread):
104
  print(f'用户问题: {user_question}')
105
  voice_task.llm_start_time = time.time()
106
 
 
 
 
107
  config = {"configurable": {"session_id": voice_task.session_id}}
108
 
109
  try:
110
- for chunk in self.langchain_pipeline.stream(input={'input': user_question}, config=config):
111
  chunk_content = f'{chunk.content.strip()}'
112
  if not chunk_content:
113
  continue
@@ -148,6 +164,24 @@ class LLMResponseGenerator(BaseThread):
148
  self._send_sentence_to_queue(voice_task, sentence, answer_index)
149
 
150
  def run(self):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  """主运行循环"""
152
  while not self.stopped():
153
  try:
 
5
  from langchain.memory import ConversationBufferWindowMemory
6
  from langchain_core.chat_history import InMemoryChatMessageHistory
7
 
8
+ from config import paths
9
  from models.voice_task import VoiceTask
10
  from services.core.base import BaseThread
11
  from services.core.constants import chat_history_cache
12
+ from services.text.langchain_llm import preprocess_sentence_text, \
13
+ create_langchain_chat_llamacpp_instance, create_langchain_pipeline, warmup_langchain_pipeline
14
+
15
+ CHINESE_SYSTEM_PROMPT = ("你是善于模拟真实的思考过程的AI助手。"
16
+ "回答时,必须首先生成一个不超过5个字的简短句子,"
17
+ "比如:\"让我想一下\"、\"在我看来\"、\"稍等我理一理\"、\"不错的问题\"、\"稍等片刻\"等,然后再进行正式回答,"
18
+ "保持中文口语化表达,禁用emoji和系统相关描述,确保衔接词与内容存在合理逻辑关联。")
19
+
20
+ ENGLISH_SYSTEM_PROMPT = ("You are an AI assistant skilled at simulating authentic thinking processes. "
21
+ "When responding, you must first generate a brief phrase of no more than 5 words, "
22
+ "such as: 'Let me think', 'I see', 'Let me process this', 'Good question', 'One moment', etc., then proceed with your formal response. "
23
+ "Maintain natural conversational English expression, avoid emojis and system-related descriptions, and ensure logical coherence between transitional phrases and content.")
24
 
25
 
26
  class LLMResponseGenerator(BaseThread):
 
28
 
29
  def __init__(self, group=None, target=None, name=None, args=(), kwargs={}, *, daemon=None,
30
  user_question_queue: Queue,
31
+ generated_answer_queue: Queue
32
+ ):
 
 
33
  super().__init__(group, target, name, args, kwargs, daemon=daemon)
34
 
35
  self.user_question_queue = user_question_queue
36
  self.generated_answer_queue = generated_answer_queue
37
 
38
+ def _get_prompt_by_language(self, language: str) -> str:
39
+ """根据语言获取对应的 prompt"""
40
+ if language == "zh":
41
+ return CHINESE_SYSTEM_PROMPT
42
+ else:
43
+ return ENGLISH_SYSTEM_PROMPT
44
 
45
  def get_session_history(self, session_id: str) -> InMemoryChatMessageHistory:
46
  message_history = InMemoryChatMessageHistory()
 
117
  print(f'用户问题: {user_question}')
118
  voice_task.llm_start_time = time.time()
119
 
120
+ system_prompt = self._get_prompt_by_language(voice_task.language)
121
+ pipeline = create_langchain_pipeline(self.model_instance, system_prompt, self.get_session_history)
122
+
123
  config = {"configurable": {"session_id": voice_task.session_id}}
124
 
125
  try:
126
+ for chunk in pipeline.stream(input={'input': user_question}, config=config):
127
  chunk_content = f'{chunk.content.strip()}'
128
  if not chunk_content:
129
  continue
 
164
  self._send_sentence_to_queue(voice_task, sentence, answer_index)
165
 
166
  def run(self):
167
+ model_path = paths.MODELS_PATH / 'llm' / 'Qwen2.5-14B-Instruct.Q4_0.gguf'
168
+ model_params = {
169
+ 'streaming': True,
170
+ 'n_gpu_layers': -1,
171
+ 'n_batch': 512,
172
+ 'n_ctx': 2048,
173
+ 'f16_kv': True,
174
+ 'temperature': 0.8,
175
+ # 'n_predict': -1,
176
+ 'top_k': 50,
177
+ 'top_p': 1.0,
178
+ }
179
+ self.model_instance = create_langchain_chat_llamacpp_instance(
180
+ local_model_path=model_path, model_params=model_params
181
+ )
182
+ pipeline = create_langchain_pipeline(self.model_instance, CHINESE_SYSTEM_PROMPT, self.get_session_history)
183
+ warmup_langchain_pipeline(pipeline)
184
+
185
  """主运行循环"""
186
  while not self.stopped():
187
  try: