Spaces:

jamesthong
/

chatbot_rag

Sleeping

App Files Files Community

chatbot_rag / app.py

jamesthong

Update app.py

a971189 verified over 1 year ago

raw

history blame contribute delete

3.79 kB

	# import bs4
	from langchain_community.document_loaders import WebBaseLoader, PyPDFLoader
	from langchain_community.vectorstores import FAISS
	from langchain_core.runnables import RunnablePassthrough
	from langchain_core.output_parsers import StrOutputParser
	from langchain_text_splitters import RecursiveCharacterTextSplitter
	from langchain_huggingface import HuggingFaceEmbeddings, HuggingFaceEndpoint
	from langchain.prompts import PromptTemplate
	import gradio as gr


	repo_id = "HuggingFaceH4/zephyr-7b-beta"

	llm = HuggingFaceEndpoint(
	repo_id=repo_id, max_length=128, temperature=0.1
	)



	def web_load(path):
	loader = WebBaseLoader(
	web_paths=(path,),
	# bs_kwargs=dict(
	# parse_only=bs4.SoupStrainer(
	# class_=("post-content", "post-title", "post-header")
	# )
	# ),
	)
	docs = loader.load()

	return docs


	def pdf_load(path):
	loader = PyPDFLoader(path)
	pages = loader.load_and_split()

	return pages


	def vector_store(path):
	if path.endswith(".pdf"):
	docs = pdf_load(path)
	elif path.startswith("http" or "www"):
	docs = web_load(path)
	text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
	splits = text_splitter.split_documents(docs)
	vectorstore = FAISS.from_documents(documents=splits, embedding=HuggingFaceEmbeddings(model_name='BAAI/bge-base-en-v1.5'))

	return vectorstore, "Done setup! You may proceed to Chatbot. "


	def invoke(user_input, retriever):
	prompt_template = """
	<\|system\|>
	Answer the question based on your knowledge. Use the following context to help:

	{context}

	</s>
	<\|user\|>
	{question}
	</s>
	<\|assistant\|>

	"""

	prompt = PromptTemplate(
	input_variables=["context", "question"],
	template=prompt_template,
	)

	rag_chain = (
	{"context": retriever, "question": RunnablePassthrough()}
	\| prompt
	\| llm
	\| StrOutputParser()
	)

	ans = rag_chain.invoke(user_input)

	return ans



	def rag_chatbot(vectorstore, user_input, chat_history):
	retriever = vectorstore.as_retriever()
	answer = invoke(user_input, retriever)
	chat_history.append((user_input, answer))

	return "", chat_history


	def source (radio, source1, source2):
	if radio == "website":
	return source1
	elif radio == "PDF":
	return source2



	with gr.Blocks() as demo:
	vectorstore = gr.State()
	with gr.Tab("Setup"):
	gr.Markdown("Input a website ULR or upload a PDF file")
	with gr.Row():
	source1 = gr.Textbox(label="Input website",)
	source2 = gr.Files(label="Upload a PDF file", file_count="single", file_types=["pdf"])
	radio = gr.Radio(["website", "PDF"], label="Select type of source", interactive=True)
	path = gr.Textbox(label="Path of source", visible=True, interactive=False)
	radio.change(fn=source, inputs=[radio,source1,source2], outputs=path)
	source1.change(fn=source, inputs=[radio,source1,source2], outputs=path)
	source2.change(fn=source, inputs=[radio,source1,source2], outputs=path)
	done = gr.Textbox(label="Progress", interactive=False)
	setup_btn = gr.Button("Initialize vectorstore")
	setup_btn.click(fn=vector_store, inputs=[path], outputs=[vectorstore, done])
	with gr.Tab("Chatbot"):
	chatbot = gr.Chatbot()
	msg = gr.Textbox()
	with gr.Row():
	clear = gr.ClearButton([msg, chatbot], icon="https://img.icons8.com/?size=100&id=Xnx8cxDef16O&format=png&color=000000")
	send_btn = gr.Button("Send", variant='primary', icon="https://img.icons8.com/?size=100&id=g8ltXTwIfJ1n&format=png&color=000000")
	msg.submit(fn=rag_chatbot, inputs=[vectorstore, msg, chatbot], outputs=[msg, chatbot])
	send_btn.click(fn=rag_chatbot, inputs=[vectorstore, msg, chatbot], outputs=[msg, chatbot])


	if __name__ == "__main__":
	demo.launch()