File size: 3,794 Bytes
3197c1b
987a1d3
 
9abf004
73757d5
987a1d3
 
b7779d7
4df370d
9abf004
 
 
 
 
 
 
 
 
 
b7779d7
 
 
7a2447c
 
 
 
 
b7779d7
 
 
 
 
9abf004
7a2447c
 
 
 
 
 
 
 
 
 
 
 
b7779d7
 
 
9abf004
7a2447c
b7779d7
 
 
 
 
 
9abf004
b7779d7
4df370d
b7779d7
 
 
 
 
9abf004
b7779d7
 
 
 
 
 
 
 
7a2447c
b7779d7
 
 
 
 
 
 
 
 
 
7a2447c
 
 
b7779d7
7a2447c
b7779d7
7a2447c
b7779d7
 
7a2447c
 
 
 
 
4df370d
7a2447c
 
 
 
 
a971189
7a2447c
 
3197c1b
48230d0
 
7a2447c
3197c1b
 
23c03a7
7a2447c
 
 
 
 
a971189
 
 
 
 
7a2447c
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# import bs4
from langchain_community.document_loaders import WebBaseLoader, PyPDFLoader 
from langchain_community.vectorstores import FAISS
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFaceEndpoint
from langchain.prompts import PromptTemplate
import gradio as gr


repo_id = "HuggingFaceH4/zephyr-7b-beta"

llm = HuggingFaceEndpoint(
    repo_id=repo_id, max_length=128, temperature=0.1
)



def web_load(path):
  loader = WebBaseLoader(
      web_paths=(path,),
      # bs_kwargs=dict(
      #     parse_only=bs4.SoupStrainer(
      #         class_=("post-content", "post-title", "post-header")
      #     )
      # ),
  )
  docs = loader.load()

  return docs


def pdf_load(path):
  loader = PyPDFLoader(path)
  pages = loader.load_and_split()

  return pages


def vector_store(path):
  if path.endswith(".pdf"):
    docs = pdf_load(path)
  elif path.startswith("http" or "www"):
    docs = web_load(path)
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
  splits = text_splitter.split_documents(docs)
  vectorstore = FAISS.from_documents(documents=splits, embedding=HuggingFaceEmbeddings(model_name='BAAI/bge-base-en-v1.5'))

  return vectorstore, "Done setup! You may proceed to Chatbot. "


def invoke(user_input, retriever):
  prompt_template = """
  <|system|>
  Answer the question based on your knowledge. Use the following context to help:

  {context}

  </s>
  <|user|>
  {question}
  </s>
  <|assistant|>

  """

  prompt = PromptTemplate(
      input_variables=["context", "question"],
      template=prompt_template,
  )

  rag_chain = (
      {"context": retriever, "question": RunnablePassthrough()}
      | prompt
      | llm
      | StrOutputParser()
  )

  ans = rag_chain.invoke(user_input)

  return ans



def rag_chatbot(vectorstore, user_input, chat_history):
  retriever = vectorstore.as_retriever()
  answer = invoke(user_input, retriever)
  chat_history.append((user_input, answer))

  return "", chat_history


def source (radio, source1, source2):
  if radio == "website":
    return source1
  elif radio == "PDF":
    return source2



with gr.Blocks() as demo:
  vectorstore = gr.State()
  with gr.Tab("Setup"):
    gr.Markdown("Input a website ULR or upload a PDF file")
    with gr.Row():
      source1 = gr.Textbox(label="Input website",)
      source2 = gr.Files(label="Upload a PDF file", file_count="single", file_types=["pdf"]) 
    radio = gr.Radio(["website", "PDF"], label="Select type of source", interactive=True)
    path = gr.Textbox(label="Path of source", visible=True, interactive=False)
    radio.change(fn=source, inputs=[radio,source1,source2], outputs=path)
    source1.change(fn=source, inputs=[radio,source1,source2], outputs=path)
    source2.change(fn=source, inputs=[radio,source1,source2], outputs=path)
    done = gr.Textbox(label="Progress", interactive=False)
    setup_btn = gr.Button("Initialize vectorstore")
    setup_btn.click(fn=vector_store, inputs=[path], outputs=[vectorstore, done])
  with gr.Tab("Chatbot"):
    chatbot = gr.Chatbot()
    msg = gr.Textbox()
    with gr.Row(): 
      clear = gr.ClearButton([msg, chatbot], icon="https://img.icons8.com/?size=100&id=Xnx8cxDef16O&format=png&color=000000")
      send_btn = gr.Button("Send", variant='primary', icon="https://img.icons8.com/?size=100&id=g8ltXTwIfJ1n&format=png&color=000000")   
      msg.submit(fn=rag_chatbot, inputs=[vectorstore, msg, chatbot], outputs=[msg, chatbot])
      send_btn.click(fn=rag_chatbot, inputs=[vectorstore, msg, chatbot], outputs=[msg, chatbot])


if __name__ == "__main__":
    demo.launch()