Spaces:

rubabuddin
/

no-ragrets

Sleeping

App Files Files Community

no-ragrets / app.py

rubabuddin

Add initial application

2d71cfe almost 2 years ago

raw

history blame contribute delete

4.21 kB

	import json
	from langchain_openai import ChatOpenAI
	from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
	from langchain.schema.runnable import Runnable
	from langchain.schema.runnable.config import RunnableConfig

	from langchain.memory import ChatMessageHistory
	from langchain_core.chat_history import BaseChatMessageHistory
	from langchain_core.runnables.history import RunnableWithMessageHistory

	from langchain.chains import create_history_aware_retriever, create_retrieval_chain
	from langchain.chains.combine_documents import create_stuff_documents_chain

	import chainlit as cl
	from retriever import fetch_retriever_or_load_local_retriever

	# to run locally use: chainlit run app.py -w
	# see https://python.langchain.com/v0.1/assets/images/conversational_retrieval_chain-5c7a96abe29e582bc575a0a0d63f86b0.png for architecture diagram

	llm = ChatOpenAI(model="gpt-4o", temperature=0, streaming=True)

	# load local FAISS retriever with preloaded embeddings
	retriever = fetch_retriever_or_load_local_retriever()

	### Contextualize question ###
	contextualize_q_system_prompt = """Given a chat history and the latest user question \
	which might reference context in the chat history, formulate a standalone question \
	which can be understood without the chat history. Do NOT answer the question, \
	just reformulate it if needed and otherwise return it as is."""
	contextualize_q_prompt = ChatPromptTemplate.from_messages(
	[
	("system", contextualize_q_system_prompt),
	MessagesPlaceholder("chat_history"),
	("human", "{input}"),
	]
	)

	# also manages the case where chat_history is empty, and otherwise applies prompt \| llm \| StrOutputParser() \| retriever in sequence.
	history_aware_retriever = create_history_aware_retriever(
	llm, retriever, contextualize_q_prompt
	)
	### Answer question
	qa_system_prompt = """You're an assistant that answers questions about movies and films. \
	and eloquent answers to questions about movies. Use the following pieces of \
	retrieved context to answer the question. Use three sentences maximum and \
	keep the answer concise.

	{context}"""
	qa_prompt = ChatPromptTemplate.from_messages(
	[
	("system", qa_system_prompt),
	MessagesPlaceholder("chat_history"),
	("human", "{input}"),
	]
	)

	# chain to accept the retrieved context alongside the conversation history and query to generate an answer
	question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)

	# This chain applies the history_aware_retriever and question_answer_chain in sequence, retaining intermediate outputs such as the retrieved context for convenience
	rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

	### Statefully manage chat history
	store = {}


	def get_session_history(session_id: str) -> BaseChatMessageHistory:
	if session_id not in store:
	store[session_id] = ChatMessageHistory()
	return store[session_id]


	@cl.on_chat_start
	async def on_chat_start():
	conversational_rag_chain = RunnableWithMessageHistory(
	rag_chain,
	get_session_history,
	input_messages_key="input",
	history_messages_key="chat_history",
	output_messages_key="answer",
	)
	cl.user_session.set("runnable", conversational_rag_chain)


	@cl.on_message
	async def on_message(message: cl.Message):
	runnable = cl.user_session.get("runnable") # type: Runnable
	msg = cl.Message(content="")

	async for chunk in runnable.astream(
	{"input": message.content},
	config=RunnableConfig(
	callbacks=[cl.LangchainCallbackHandler()],
	configurable={"session_id": cl.user_session.get("id")},
	),
	):
	# process Documents to be JSON serializable and passed into the context window but not served up as part of the tokened response
	if "context" in chunk:
	docs = chunk["context"]
	docs_dict = [
	{"page_content": doc.page_content, "metadata": doc.metadata}
	for doc in docs
	]
	chunk["context"] = json.dumps(docs_dict)
	if "answer" in chunk:
	await msg.stream_token(chunk["answer"])
	await msg.send()