import os import time import pinecone import chainlit as cl from promptwatch import PromptWatch from prompts import load_query_gen_prompt, load_simchat_prompt from chainlit import on_message, on_chat_start from langchain.embeddings.cohere import CohereEmbeddings from langchain.vectorstores import Pinecone from langchain.chains import ConversationalRetrievalChain, LLMChain from langchain.chat_models import ChatOpenAI from langchain.memory import ConversationTokenBufferMemory from langchain.prompts import ( ChatPromptTemplate, PromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate, ) from langchain.prompts.prompt import PromptTemplate from langchain.chains.qa_with_sources import load_qa_with_sources_chain # from langchain.callbacks import ContextCallbackHandler index_name = "chat-index-v1" simchat = load_simchat_prompt() query_gen_prompt = load_query_gen_prompt() CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(query_gen_prompt) pinecone.init( api_key=os.environ.get("PINECONE_API_KEY"), environment='gcp-starter', ) @cl.on_chat_start async def on_chat_start(): # token = os.environ["CONTEXT_TOKEN"] # context_callback = ContextCallbackHandler(token) llm = ChatOpenAI(temperature=0.0, verbose=False, openai_api_key = os.environ.get("OPENAI_API_KEY"), streaming=True, model_name='gpt-3.5-turbo', # callbacks=[context_callback] ) memory = ConversationTokenBufferMemory(llm=llm,memory_key="chat_history", return_messages=True,input_key='question',max_token_limit=1000) embeddings = CohereEmbeddings(model='embed-english-light-v2.0',cohere_api_key=os.environ.get("COHERE_API_KEY")) docsearch = Pinecone.from_existing_index( index_name=index_name, embedding=embeddings ) retriever = docsearch.as_retriever(search_kwargs={"k": 4}) messages = [SystemMessagePromptTemplate.from_template(simchat)] messages.append(HumanMessagePromptTemplate.from_template("{question}")) prompt = ChatPromptTemplate.from_messages(messages) question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT, verbose=False) doc_chain = load_qa_with_sources_chain(llm, chain_type="stuff", verbose=False, prompt=prompt) chain = ConversationalRetrievalChain( retriever=retriever, question_generator=question_generator, combine_docs_chain=doc_chain, verbose=False, memory=memory, rephrase_question=False, # callbacks=[context_callback] ) cl.user_session.set("conversation_chain", chain) cl.user_session.set( "message_history", [{"role": "system", "content": "You are a helpful assistant."}], ) # await cl.Message(content="Selamat datang! saya adalah Nada, asisten virtual anda.").send() @cl.on_message async def on_message(message: cl.Message): message_history = cl.user_session.get("message_history") message_history.append({"role": "user", "content": message.content}) chain = cl.user_session.get("conversation_chain") res = await chain.arun({"question": message.content, 'stream': True}) msg = cl.Message(content=res) message_history.append({"role": "assistant", "content": msg.content}) await msg.send() await msg.update() print(message.content) print(message.id)