Multimodal_RAG / src /llama_cpp_chains.py
Al1Abdullah's picture
Create llama_cpp_chains.py
ca1edba verified
from langchain_community.llms import LlamaCpp
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferWindowMemory
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableSequence
from src.utils import load_config
class LlamaChain:
def __init__(self, chat_memory) -> None:
prompt = PromptTemplate(
template="""<|begin_of_text|>
<|start_header_id|>system<|end_header_id|>
You are a helpful and knowledgeable AI assistant.
<|eot_id|>
<|start_header_id|>user<|end_header_id|>
Previous conversation={chat_history}
Question: {input}
Answer: <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
input_variables=['chat_history', 'input']
)
self.memory = ConversationBufferWindowMemory(
memory_key='chat_history',
chat_memory=chat_memory,
k=3,
return_messages=True
)
config = load_config()
llm = LlamaCpp(**config['chat_model'])
self.llm_chain = RunnableSequence(prompt | llm | self.memory | StrOutputParser())
def run(self, user_input):
response = self.llm_chain.invoke(user_input)
return response['text']