liumaolin
commited on
Commit
·
2988b10
1
Parent(s):
ecc005d
Add multilingual support and optimize LLM pipeline configuration.
Browse files- models/llm/Qwen2.5-14B-Instruct.Q4_0.gguf +3 -0
- src/VoiceDialogue/main.py +3 -35
- src/VoiceDialogue/models/voice_task.py +1 -0
- src/VoiceDialogue/services/speech/asr_service.py +1 -0
- src/VoiceDialogue/services/text/{llm.py → langchain_llm.py} +3 -74
- src/VoiceDialogue/services/text/text_generator.py +43 -9
models/llm/Qwen2.5-14B-Instruct.Q4_0.gguf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1c9d82509108e9f758ff9ad1034d2c23088c611c76eaec308ad246caf8decac5
|
| 3 |
+
size 8517725952
|
src/VoiceDialogue/main.py
CHANGED
|
@@ -6,7 +6,6 @@ from config.paths import load_third_party
|
|
| 6 |
|
| 7 |
load_third_party()
|
| 8 |
|
| 9 |
-
from models.language_model import language_model_registry
|
| 10 |
from models.voice_model import voice_model_registry
|
| 11 |
from services.audio.aec_audio_capture import EchoCancellingAudioCapture
|
| 12 |
from services.audio.audio_answer import TTSAudioGenerator
|
|
@@ -15,16 +14,13 @@ from services.speech.speech_monitor import SpeechStateMonitor
|
|
| 15 |
from services.speech.asr_service import ASRWorker
|
| 16 |
from services.text.text_generator import LLMResponseGenerator
|
| 17 |
|
| 18 |
-
|
| 19 |
HERE = Path(__file__).parent
|
| 20 |
language: typing.Literal['zh', 'en'] = 'en'
|
| 21 |
|
| 22 |
|
| 23 |
def launch_system(
|
| 24 |
user_language: str,
|
| 25 |
-
|
| 26 |
-
tts_speaker: str,
|
| 27 |
-
llm: typing.Literal['7B', '14B'] = '14B'
|
| 28 |
):
|
| 29 |
audio_frames_queue = Queue()
|
| 30 |
user_voice_queue = Queue()
|
|
@@ -53,31 +49,9 @@ def launch_system(
|
|
| 53 |
whisper_worker.start()
|
| 54 |
threads.append(whisper_worker)
|
| 55 |
|
| 56 |
-
if llm == '8B':
|
| 57 |
-
selected_llm_model = language_model_registry[-1]
|
| 58 |
-
elif llm == '7B':
|
| 59 |
-
selected_llm_model = language_model_registry[-3]
|
| 60 |
-
else:
|
| 61 |
-
selected_llm_model = language_model_registry[-2]
|
| 62 |
-
|
| 63 |
-
selected_llm_model.download_model()
|
| 64 |
-
default_llm_params = {
|
| 65 |
-
'streaming': True,
|
| 66 |
-
'n_gpu_layers': -1,
|
| 67 |
-
'n_batch': 512,
|
| 68 |
-
'n_ctx': 2048,
|
| 69 |
-
'f16_kv': True,
|
| 70 |
-
'temperature': 0.8,
|
| 71 |
-
# 'n_predict': -1,
|
| 72 |
-
'top_k': 50,
|
| 73 |
-
'top_p': 1.0,
|
| 74 |
-
}
|
| 75 |
answer_generator_worker = LLMResponseGenerator(
|
| 76 |
user_question_queue=transcribed_text_queue,
|
| 77 |
-
generated_answer_queue=generated_answer_queue
|
| 78 |
-
local_model_path=selected_llm_model.pretrained_model_path,
|
| 79 |
-
model_params=default_llm_params,
|
| 80 |
-
prompt_template=system_prompt
|
| 81 |
)
|
| 82 |
answer_generator_worker.start()
|
| 83 |
threads.append(answer_generator_worker)
|
|
@@ -114,16 +88,10 @@ def launch_system(
|
|
| 114 |
def main():
|
| 115 |
user_language: typing.Literal['zh', 'en'] = 'zh'
|
| 116 |
|
| 117 |
-
SYSTEM_PROMPT = ("你是善于模拟真实的思考过程的AI助手。"
|
| 118 |
-
"回答时,必须首先生成一个不超过5个字的简短句子,"
|
| 119 |
-
"比如:\"让我想一下\"、\"在我看来\"、\"稍等我理一理\"、\"不错的问题\"、\"稍等片刻\"等,然后再进行正式回答,"
|
| 120 |
-
"保持中文口语化表达,禁用emoji和系统相关描述,确保衔接词与内容存在合理逻辑关联。")
|
| 121 |
# '罗翔', '马保国', '沈逸', '杨幂', '周杰伦', '马云'
|
| 122 |
tts_speaker = '沈逸'
|
| 123 |
-
# QWen2.5 7B or 14B
|
| 124 |
-
llm = '14B'
|
| 125 |
|
| 126 |
-
launch_system(user_language,
|
| 127 |
|
| 128 |
|
| 129 |
if __name__ == '__main__':
|
|
|
|
| 6 |
|
| 7 |
load_third_party()
|
| 8 |
|
|
|
|
| 9 |
from models.voice_model import voice_model_registry
|
| 10 |
from services.audio.aec_audio_capture import EchoCancellingAudioCapture
|
| 11 |
from services.audio.audio_answer import TTSAudioGenerator
|
|
|
|
| 14 |
from services.speech.asr_service import ASRWorker
|
| 15 |
from services.text.text_generator import LLMResponseGenerator
|
| 16 |
|
|
|
|
| 17 |
HERE = Path(__file__).parent
|
| 18 |
language: typing.Literal['zh', 'en'] = 'en'
|
| 19 |
|
| 20 |
|
| 21 |
def launch_system(
|
| 22 |
user_language: str,
|
| 23 |
+
tts_speaker: str
|
|
|
|
|
|
|
| 24 |
):
|
| 25 |
audio_frames_queue = Queue()
|
| 26 |
user_voice_queue = Queue()
|
|
|
|
| 49 |
whisper_worker.start()
|
| 50 |
threads.append(whisper_worker)
|
| 51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
answer_generator_worker = LLMResponseGenerator(
|
| 53 |
user_question_queue=transcribed_text_queue,
|
| 54 |
+
generated_answer_queue=generated_answer_queue
|
|
|
|
|
|
|
|
|
|
| 55 |
)
|
| 56 |
answer_generator_worker.start()
|
| 57 |
threads.append(answer_generator_worker)
|
|
|
|
| 88 |
def main():
|
| 89 |
user_language: typing.Literal['zh', 'en'] = 'zh'
|
| 90 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
# '罗翔', '马保国', '沈逸', '杨幂', '周杰伦', '马云'
|
| 92 |
tts_speaker = '沈逸'
|
|
|
|
|
|
|
| 93 |
|
| 94 |
+
launch_system(user_language, tts_speaker)
|
| 95 |
|
| 96 |
|
| 97 |
if __name__ == '__main__':
|
src/VoiceDialogue/models/voice_task.py
CHANGED
|
@@ -8,6 +8,7 @@ class VoiceTask(BaseModel):
|
|
| 8 |
id: str
|
| 9 |
|
| 10 |
session_id: str = Field(default="")
|
|
|
|
| 11 |
is_speaking_over_threshold: bool = Field(default=False)
|
| 12 |
is_over_audio_frames_threshold: bool = Field(default=False)
|
| 13 |
user_voice: np.array = Field(default=np.array([]))
|
|
|
|
| 8 |
id: str
|
| 9 |
|
| 10 |
session_id: str = Field(default="")
|
| 11 |
+
language: str = Field(default="zh")
|
| 12 |
is_speaking_over_threshold: bool = Field(default=False)
|
| 13 |
is_over_audio_frames_threshold: bool = Field(default=False)
|
| 14 |
user_voice: np.array = Field(default=np.array([]))
|
src/VoiceDialogue/services/speech/asr_service.py
CHANGED
|
@@ -180,6 +180,7 @@ class ASRWorker(BaseThread):
|
|
| 180 |
|
| 181 |
while not self.stopped():
|
| 182 |
voice_task: VoiceTask = self.user_voice_queue.get()
|
|
|
|
| 183 |
voice_task.whisper_start_time = time.time()
|
| 184 |
user_voice: np.array = voice_task.user_voice
|
| 185 |
transcribed_text = self.client.transcribe(user_voice)
|
|
|
|
| 180 |
|
| 181 |
while not self.stopped():
|
| 182 |
voice_task: VoiceTask = self.user_voice_queue.get()
|
| 183 |
+
voice_task.language = self.language
|
| 184 |
voice_task.whisper_start_time = time.time()
|
| 185 |
user_voice: np.array = voice_task.user_voice
|
| 186 |
transcribed_text = self.client.transcribe(user_voice)
|
src/VoiceDialogue/services/text/{llm.py → langchain_llm.py}
RENAMED
|
@@ -1,13 +1,8 @@
|
|
| 1 |
-
import hashlib
|
| 2 |
-
import os
|
| 3 |
import pathlib
|
| 4 |
-
import threading
|
| 5 |
import typing
|
| 6 |
-
from collections import OrderedDict
|
| 7 |
|
| 8 |
from langchain_community.chat_models.llamacpp import ChatLlamaCpp
|
| 9 |
from langchain_core.callbacks import StreamingStdOutCallbackHandler, CallbackManager
|
| 10 |
-
from langchain_core.language_models.llms import LLM
|
| 11 |
from langchain_core.messages import SystemMessage
|
| 12 |
from langchain_core.prompts import (
|
| 13 |
ChatPromptTemplate, MessagesPlaceholder, HumanMessagePromptTemplate
|
|
@@ -16,74 +11,8 @@ from langchain_core.runnables import RunnableWithMessageHistory
|
|
| 16 |
|
| 17 |
from utils.strings import remove_emojis, convert_comma_separated_numbers, convert_uppercase_words_to_lowercase
|
| 18 |
|
| 19 |
-
default_llm_params = OrderedDict({
|
| 20 |
-
'streaming': True,
|
| 21 |
-
'n_gpu_layers': -1,
|
| 22 |
-
'n_batch': 512,
|
| 23 |
-
'n_ctx': 2048,
|
| 24 |
-
'f16_kv': True,
|
| 25 |
-
'temperature': 0.7,
|
| 26 |
-
'n_predict': -1,
|
| 27 |
-
'top_k': 50,
|
| 28 |
-
'top_p': 1.0,
|
| 29 |
-
})
|
| 30 |
|
| 31 |
-
|
| 32 |
-
singleton_chat_langchain_instance_uid: str = ''
|
| 33 |
-
single_chat_instance_locker = threading.Lock()
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
def setup_chat_langchain_pipeline(
|
| 37 |
-
local_model_path: str,
|
| 38 |
-
model_params: dict | None = None,
|
| 39 |
-
prompt_template: str = '',
|
| 40 |
-
get_session_history: typing.Callable = None
|
| 41 |
-
):
|
| 42 |
-
model_path = pathlib.Path(local_model_path)
|
| 43 |
-
if not model_path.exists():
|
| 44 |
-
raise RuntimeError(f'Model path not exists: {model_path}')
|
| 45 |
-
|
| 46 |
-
if get_session_history is None:
|
| 47 |
-
raise RuntimeError(f'Function<get_session_history> can\'t be None.')
|
| 48 |
-
|
| 49 |
-
if not isinstance(model_params, dict):
|
| 50 |
-
model_params = default_llm_params
|
| 51 |
-
|
| 52 |
-
current_model_uid = generate_unique_id(model_path, model_params)
|
| 53 |
-
|
| 54 |
-
with single_chat_instance_locker:
|
| 55 |
-
global singleton_chat_langchain_instance_uid, singleton_chat_langchain_instance
|
| 56 |
-
if current_model_uid == singleton_chat_langchain_instance_uid:
|
| 57 |
-
instance = singleton_chat_langchain_instance
|
| 58 |
-
langchain_pipeline_is_warmup = True
|
| 59 |
-
else:
|
| 60 |
-
singleton_chat_langchain_instance_uid = current_model_uid
|
| 61 |
-
instance = setup_chat_llamacpp_langchain_instance(local_model_path, model_params)
|
| 62 |
-
singleton_chat_langchain_instance = instance
|
| 63 |
-
langchain_pipeline_is_warmup = False
|
| 64 |
-
|
| 65 |
-
pipeline = build_chat_langchain_pipeline(instance, prompt_template, get_session_history)
|
| 66 |
-
|
| 67 |
-
if not langchain_pipeline_is_warmup:
|
| 68 |
-
warmup_chat_langchain_pipeline(pipeline)
|
| 69 |
-
|
| 70 |
-
return pipeline
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
def generate_unique_id(
|
| 74 |
-
model_path: str | os.PathLike,
|
| 75 |
-
model_params: dict,
|
| 76 |
-
multimodal_path: str | os.PathLike = ''
|
| 77 |
-
):
|
| 78 |
-
model_uid_params = [f'llm_path={model_path}']
|
| 79 |
-
if multimodal_path:
|
| 80 |
-
model_uid_params.append(f'multimodal={multimodal_path}')
|
| 81 |
-
model_uid_params.extend(f'{k}:{v}' for k, v in model_params.items())
|
| 82 |
-
current_model_uid = hashlib.md5('&'.join(model_uid_params).encode()).hexdigest()
|
| 83 |
-
return current_model_uid
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
def setup_chat_llamacpp_langchain_instance(
|
| 87 |
local_model_path: str,
|
| 88 |
model_params: dict | None = None
|
| 89 |
) -> ChatLlamaCpp:
|
|
@@ -109,7 +38,7 @@ def setup_chat_llamacpp_langchain_instance(
|
|
| 109 |
return llamacpp_langchain_instance
|
| 110 |
|
| 111 |
|
| 112 |
-
def
|
| 113 |
prompt = ChatPromptTemplate(messages=[
|
| 114 |
SystemMessage(content=system_prompt),
|
| 115 |
MessagesPlaceholder(variable_name="history"),
|
|
@@ -123,7 +52,7 @@ def build_chat_langchain_pipeline(langchain_instance: LLM, system_prompt: str, g
|
|
| 123 |
return chain_with_history
|
| 124 |
|
| 125 |
|
| 126 |
-
def
|
| 127 |
print("Warmup chat pipeline...")
|
| 128 |
|
| 129 |
user_input = 'Hello, this is warming up step, if you understand, output "Ok".'
|
|
|
|
|
|
|
|
|
|
| 1 |
import pathlib
|
|
|
|
| 2 |
import typing
|
|
|
|
| 3 |
|
| 4 |
from langchain_community.chat_models.llamacpp import ChatLlamaCpp
|
| 5 |
from langchain_core.callbacks import StreamingStdOutCallbackHandler, CallbackManager
|
|
|
|
| 6 |
from langchain_core.messages import SystemMessage
|
| 7 |
from langchain_core.prompts import (
|
| 8 |
ChatPromptTemplate, MessagesPlaceholder, HumanMessagePromptTemplate
|
|
|
|
| 11 |
|
| 12 |
from utils.strings import remove_emojis, convert_comma_separated_numbers, convert_uppercase_words_to_lowercase
|
| 13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
+
def create_langchain_chat_llamacpp_instance(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
local_model_path: str,
|
| 17 |
model_params: dict | None = None
|
| 18 |
) -> ChatLlamaCpp:
|
|
|
|
| 38 |
return llamacpp_langchain_instance
|
| 39 |
|
| 40 |
|
| 41 |
+
def create_langchain_pipeline(langchain_instance, system_prompt: str, get_session_history: typing.Callable):
|
| 42 |
prompt = ChatPromptTemplate(messages=[
|
| 43 |
SystemMessage(content=system_prompt),
|
| 44 |
MessagesPlaceholder(variable_name="history"),
|
|
|
|
| 52 |
return chain_with_history
|
| 53 |
|
| 54 |
|
| 55 |
+
def warmup_langchain_pipeline(pipeline):
|
| 56 |
print("Warmup chat pipeline...")
|
| 57 |
|
| 58 |
user_input = 'Hello, this is warming up step, if you understand, output "Ok".'
|
src/VoiceDialogue/services/text/text_generator.py
CHANGED
|
@@ -5,10 +5,22 @@ from queue import Queue, Empty
|
|
| 5 |
from langchain.memory import ConversationBufferWindowMemory
|
| 6 |
from langchain_core.chat_history import InMemoryChatMessageHistory
|
| 7 |
|
|
|
|
| 8 |
from models.voice_task import VoiceTask
|
| 9 |
from services.core.base import BaseThread
|
| 10 |
from services.core.constants import chat_history_cache
|
| 11 |
-
from services.text.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
|
| 14 |
class LLMResponseGenerator(BaseThread):
|
|
@@ -16,18 +28,19 @@ class LLMResponseGenerator(BaseThread):
|
|
| 16 |
|
| 17 |
def __init__(self, group=None, target=None, name=None, args=(), kwargs={}, *, daemon=None,
|
| 18 |
user_question_queue: Queue,
|
| 19 |
-
generated_answer_queue: Queue
|
| 20 |
-
|
| 21 |
-
model_params: dict | None = None,
|
| 22 |
-
prompt_template: str = ''):
|
| 23 |
super().__init__(group, target, name, args, kwargs, daemon=daemon)
|
| 24 |
|
| 25 |
self.user_question_queue = user_question_queue
|
| 26 |
self.generated_answer_queue = generated_answer_queue
|
| 27 |
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
def get_session_history(self, session_id: str) -> InMemoryChatMessageHistory:
|
| 33 |
message_history = InMemoryChatMessageHistory()
|
|
@@ -104,10 +117,13 @@ class LLMResponseGenerator(BaseThread):
|
|
| 104 |
print(f'用户问题: {user_question}')
|
| 105 |
voice_task.llm_start_time = time.time()
|
| 106 |
|
|
|
|
|
|
|
|
|
|
| 107 |
config = {"configurable": {"session_id": voice_task.session_id}}
|
| 108 |
|
| 109 |
try:
|
| 110 |
-
for chunk in
|
| 111 |
chunk_content = f'{chunk.content.strip()}'
|
| 112 |
if not chunk_content:
|
| 113 |
continue
|
|
@@ -148,6 +164,24 @@ class LLMResponseGenerator(BaseThread):
|
|
| 148 |
self._send_sentence_to_queue(voice_task, sentence, answer_index)
|
| 149 |
|
| 150 |
def run(self):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
"""主运行循环"""
|
| 152 |
while not self.stopped():
|
| 153 |
try:
|
|
|
|
| 5 |
from langchain.memory import ConversationBufferWindowMemory
|
| 6 |
from langchain_core.chat_history import InMemoryChatMessageHistory
|
| 7 |
|
| 8 |
+
from config import paths
|
| 9 |
from models.voice_task import VoiceTask
|
| 10 |
from services.core.base import BaseThread
|
| 11 |
from services.core.constants import chat_history_cache
|
| 12 |
+
from services.text.langchain_llm import preprocess_sentence_text, \
|
| 13 |
+
create_langchain_chat_llamacpp_instance, create_langchain_pipeline, warmup_langchain_pipeline
|
| 14 |
+
|
| 15 |
+
CHINESE_SYSTEM_PROMPT = ("你是善于模拟真实的思考过程的AI助手。"
|
| 16 |
+
"回答时,必须首先生成一个不超过5个字的简短句子,"
|
| 17 |
+
"比如:\"让我想一下\"、\"在我看来\"、\"稍等我理一理\"、\"不错的问题\"、\"稍等片刻\"等,然后再进行正式回答,"
|
| 18 |
+
"保持中文口语化表达,禁用emoji和系统相关描述,确保衔接词与内容存在合理逻辑关联。")
|
| 19 |
+
|
| 20 |
+
ENGLISH_SYSTEM_PROMPT = ("You are an AI assistant skilled at simulating authentic thinking processes. "
|
| 21 |
+
"When responding, you must first generate a brief phrase of no more than 5 words, "
|
| 22 |
+
"such as: 'Let me think', 'I see', 'Let me process this', 'Good question', 'One moment', etc., then proceed with your formal response. "
|
| 23 |
+
"Maintain natural conversational English expression, avoid emojis and system-related descriptions, and ensure logical coherence between transitional phrases and content.")
|
| 24 |
|
| 25 |
|
| 26 |
class LLMResponseGenerator(BaseThread):
|
|
|
|
| 28 |
|
| 29 |
def __init__(self, group=None, target=None, name=None, args=(), kwargs={}, *, daemon=None,
|
| 30 |
user_question_queue: Queue,
|
| 31 |
+
generated_answer_queue: Queue
|
| 32 |
+
):
|
|
|
|
|
|
|
| 33 |
super().__init__(group, target, name, args, kwargs, daemon=daemon)
|
| 34 |
|
| 35 |
self.user_question_queue = user_question_queue
|
| 36 |
self.generated_answer_queue = generated_answer_queue
|
| 37 |
|
| 38 |
+
def _get_prompt_by_language(self, language: str) -> str:
|
| 39 |
+
"""根据语言获取对应的 prompt"""
|
| 40 |
+
if language == "zh":
|
| 41 |
+
return CHINESE_SYSTEM_PROMPT
|
| 42 |
+
else:
|
| 43 |
+
return ENGLISH_SYSTEM_PROMPT
|
| 44 |
|
| 45 |
def get_session_history(self, session_id: str) -> InMemoryChatMessageHistory:
|
| 46 |
message_history = InMemoryChatMessageHistory()
|
|
|
|
| 117 |
print(f'用户问题: {user_question}')
|
| 118 |
voice_task.llm_start_time = time.time()
|
| 119 |
|
| 120 |
+
system_prompt = self._get_prompt_by_language(voice_task.language)
|
| 121 |
+
pipeline = create_langchain_pipeline(self.model_instance, system_prompt, self.get_session_history)
|
| 122 |
+
|
| 123 |
config = {"configurable": {"session_id": voice_task.session_id}}
|
| 124 |
|
| 125 |
try:
|
| 126 |
+
for chunk in pipeline.stream(input={'input': user_question}, config=config):
|
| 127 |
chunk_content = f'{chunk.content.strip()}'
|
| 128 |
if not chunk_content:
|
| 129 |
continue
|
|
|
|
| 164 |
self._send_sentence_to_queue(voice_task, sentence, answer_index)
|
| 165 |
|
| 166 |
def run(self):
|
| 167 |
+
model_path = paths.MODELS_PATH / 'llm' / 'Qwen2.5-14B-Instruct.Q4_0.gguf'
|
| 168 |
+
model_params = {
|
| 169 |
+
'streaming': True,
|
| 170 |
+
'n_gpu_layers': -1,
|
| 171 |
+
'n_batch': 512,
|
| 172 |
+
'n_ctx': 2048,
|
| 173 |
+
'f16_kv': True,
|
| 174 |
+
'temperature': 0.8,
|
| 175 |
+
# 'n_predict': -1,
|
| 176 |
+
'top_k': 50,
|
| 177 |
+
'top_p': 1.0,
|
| 178 |
+
}
|
| 179 |
+
self.model_instance = create_langchain_chat_llamacpp_instance(
|
| 180 |
+
local_model_path=model_path, model_params=model_params
|
| 181 |
+
)
|
| 182 |
+
pipeline = create_langchain_pipeline(self.model_instance, CHINESE_SYSTEM_PROMPT, self.get_session_history)
|
| 183 |
+
warmup_langchain_pipeline(pipeline)
|
| 184 |
+
|
| 185 |
"""主运行循环"""
|
| 186 |
while not self.stopped():
|
| 187 |
try:
|