learn-ai

Sleeping

dh-mc commited on Aug 6, 2023

Commit

8282222

1 Parent(s): b2d49c7

added USE_LLAMA_2_PROMPT_TEMPLATE

Files changed (2) hide show

.env.example CHANGED Viewed

@@ -19,6 +19,7 @@ HF_PIPELINE_DEVICE_TYPE=
 # LOAD_QUANTIZED_MODEL=4bit
 # LOAD_QUANTIZED_MODEL=8bit
 DISABLE_MODEL_PRELOADING=true
 CHAT_HISTORY_ENABLED=true
 SHOW_PARAM_SETTINGS=false

 # LOAD_QUANTIZED_MODEL=4bit
 # LOAD_QUANTIZED_MODEL=8bit
+USE_LLAMA_2_PROMPT_TEMPLATE=true
 DISABLE_MODEL_PRELOADING=true
 CHAT_HISTORY_ENABLED=true
 SHOW_PARAM_SETTINGS=false

app_modules/llm_chat_chain.py CHANGED Viewed

@@ -1,3 +1,5 @@
 from langchain import LLMChain, PromptTemplate
 from langchain.chains import ConversationalRetrievalChain
 from langchain.chains.base import Chain
@@ -6,19 +8,38 @@ from langchain.memory import ConversationBufferMemory
 from app_modules.llm_inference import LLMInference
 class ChatChain(LLMInference):
     def __init__(self, llm_loader):
         super().__init__(llm_loader)
     def create_chain(self) -> Chain:
-        template = """You are a chatbot having a conversation with a human.
 {chat_history}
 Human: {question}
 Chatbot:"""
         prompt = PromptTemplate(
             input_variables=["chat_history", "question"], template=template
         )
         memory = ConversationBufferMemory(memory_key="chat_history")
         llm_chain = LLMChain(

+import os
 from langchain import LLMChain, PromptTemplate
 from langchain.chains import ConversationalRetrievalChain
 from langchain.chains.base import Chain
 from app_modules.llm_inference import LLMInference
+def get_llama_2_prompt_template():
+    B_INST, E_INST = "[INST]", "[/INST]"
+    B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
+    instruction = "Chat History:\n\n{chat_history} \n\nUser: {question}"
+    system_prompt = "You are a helpful assistant, you always only answer for the assistant then you stop. read the chat history to get context"
+    SYSTEM_PROMPT = B_SYS + system_prompt + E_SYS
+    prompt_template = B_INST + SYSTEM_PROMPT + instruction + E_INST
+    return prompt_template
 class ChatChain(LLMInference):
     def __init__(self, llm_loader):
         super().__init__(llm_loader)
     def create_chain(self) -> Chain:
+        template = (
+            get_llama_2_prompt_template()
+            if os.environ.get("USE_LLAMA_2_PROMPT_TEMPLATE") == "true"
+            else """You are a chatbot having a conversation with a human.
 {chat_history}
 Human: {question}
 Chatbot:"""
+        )
+        print(f"template: {template}")
         prompt = PromptTemplate(
             input_variables=["chat_history", "question"], template=template
         )
         memory = ConversationBufferMemory(memory_key="chat_history")
         llm_chain = LLMChain(