Spaces:

jamesthong
/

chatbot_rag

Sleeping

File size: 3,794 Bytes

# import bs4
from langchain_community.document_loaders import WebBaseLoader, PyPDFLoader 
from langchain_community.vectorstores import FAISS
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFaceEndpoint
from langchain.prompts import PromptTemplate
import gradio as gr


repo_id = "HuggingFaceH4/zephyr-7b-beta"

llm = HuggingFaceEndpoint(
    repo_id=repo_id, max_length=128, temperature=0.1
)



def web_load(path):
  loader = WebBaseLoader(
      web_paths=(path,),
      # bs_kwargs=dict(
      #     parse_only=bs4.SoupStrainer(
      #         class_=("post-content", "post-title", "post-header")
      #     )
      # ),
  )
  docs = loader.load()

  return docs


def pdf_load(path):
  loader = PyPDFLoader(path)
  pages = loader.load_and_split()

  return pages


def vector_store(path):
  if path.endswith(".pdf"):
    docs = pdf_load(path)
  elif path.startswith("http" or "www"):
    docs = web_load(path)
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
  splits = text_splitter.split_documents(docs)
  vectorstore = FAISS.from_documents(documents=splits, embedding=HuggingFaceEmbeddings(model_name='BAAI/bge-base-en-v1.5'))

  return vectorstore, "Done setup! You may proceed to Chatbot. "


def invoke(user_input, retriever):
  prompt_template = """
  <|system|>
  Answer the question based on your knowledge. Use the following context to help:

  {context}

  </s>
  <|user|>
  {question}
  </s>
  <|assistant|>

  """

  prompt = PromptTemplate(
      input_variables=["context", "question"],
      template=prompt_template,
  )

  rag_chain = (
      {"context": retriever, "question": RunnablePassthrough()}
      | prompt
      | llm
      | StrOutputParser()
  )

  ans = rag_chain.invoke(user_input)

  return ans



def rag_chatbot(vectorstore, user_input, chat_history):
  retriever = vectorstore.as_retriever()
  answer = invoke(user_input, retriever)
  chat_history.append((user_input, answer))

  return "", chat_history


def source (radio, source1, source2):
  if radio == "website":
    return source1
  elif radio == "PDF":
    return source2



with gr.Blocks() as demo:
  vectorstore = gr.State()
  with gr.Tab("Setup"):
    gr.Markdown("Input a website ULR or upload a PDF file")
    with gr.Row():
      source1 = gr.Textbox(label="Input website",)
      source2 = gr.Files(label="Upload a PDF file", file_count="single", file_types=["pdf"]) 
    radio = gr.Radio(["website", "PDF"], label="Select type of source", interactive=True)
    path = gr.Textbox(label="Path of source", visible=True, interactive=False)
    radio.change(fn=source, inputs=[radio,source1,source2], outputs=path)
    source1.change(fn=source, inputs=[radio,source1,source2], outputs=path)
    source2.change(fn=source, inputs=[radio,source1,source2], outputs=path)
    done = gr.Textbox(label="Progress", interactive=False)
    setup_btn = gr.Button("Initialize vectorstore")
    setup_btn.click(fn=vector_store, inputs=[path], outputs=[vectorstore, done])
  with gr.Tab("Chatbot"):
    chatbot = gr.Chatbot()
    msg = gr.Textbox()
    with gr.Row(): 
      clear = gr.ClearButton([msg, chatbot], icon="https://img.icons8.com/?size=100&id=Xnx8cxDef16O&format=png&color=000000")
      send_btn = gr.Button("Send", variant='primary', icon="https://img.icons8.com/?size=100&id=g8ltXTwIfJ1n&format=png&color=000000")   
      msg.submit(fn=rag_chatbot, inputs=[vectorstore, msg, chatbot], outputs=[msg, chatbot])
      send_btn.click(fn=rag_chatbot, inputs=[vectorstore, msg, chatbot], outputs=[msg, chatbot])


if __name__ == "__main__":
    demo.launch()