Spaces:
Sleeping
Sleeping
File size: 3,794 Bytes
3197c1b 987a1d3 9abf004 73757d5 987a1d3 b7779d7 4df370d 9abf004 b7779d7 7a2447c b7779d7 9abf004 7a2447c b7779d7 9abf004 7a2447c b7779d7 9abf004 b7779d7 4df370d b7779d7 9abf004 b7779d7 7a2447c b7779d7 7a2447c b7779d7 7a2447c b7779d7 7a2447c b7779d7 7a2447c 4df370d 7a2447c a971189 7a2447c 3197c1b 48230d0 7a2447c 3197c1b 23c03a7 7a2447c a971189 7a2447c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
# import bs4
from langchain_community.document_loaders import WebBaseLoader, PyPDFLoader
from langchain_community.vectorstores import FAISS
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFaceEndpoint
from langchain.prompts import PromptTemplate
import gradio as gr
repo_id = "HuggingFaceH4/zephyr-7b-beta"
llm = HuggingFaceEndpoint(
repo_id=repo_id, max_length=128, temperature=0.1
)
def web_load(path):
loader = WebBaseLoader(
web_paths=(path,),
# bs_kwargs=dict(
# parse_only=bs4.SoupStrainer(
# class_=("post-content", "post-title", "post-header")
# )
# ),
)
docs = loader.load()
return docs
def pdf_load(path):
loader = PyPDFLoader(path)
pages = loader.load_and_split()
return pages
def vector_store(path):
if path.endswith(".pdf"):
docs = pdf_load(path)
elif path.startswith("http" or "www"):
docs = web_load(path)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = FAISS.from_documents(documents=splits, embedding=HuggingFaceEmbeddings(model_name='BAAI/bge-base-en-v1.5'))
return vectorstore, "Done setup! You may proceed to Chatbot. "
def invoke(user_input, retriever):
prompt_template = """
<|system|>
Answer the question based on your knowledge. Use the following context to help:
{context}
</s>
<|user|>
{question}
</s>
<|assistant|>
"""
prompt = PromptTemplate(
input_variables=["context", "question"],
template=prompt_template,
)
rag_chain = (
{"context": retriever, "question": RunnablePassthrough()}
| prompt
| llm
| StrOutputParser()
)
ans = rag_chain.invoke(user_input)
return ans
def rag_chatbot(vectorstore, user_input, chat_history):
retriever = vectorstore.as_retriever()
answer = invoke(user_input, retriever)
chat_history.append((user_input, answer))
return "", chat_history
def source (radio, source1, source2):
if radio == "website":
return source1
elif radio == "PDF":
return source2
with gr.Blocks() as demo:
vectorstore = gr.State()
with gr.Tab("Setup"):
gr.Markdown("Input a website ULR or upload a PDF file")
with gr.Row():
source1 = gr.Textbox(label="Input website",)
source2 = gr.Files(label="Upload a PDF file", file_count="single", file_types=["pdf"])
radio = gr.Radio(["website", "PDF"], label="Select type of source", interactive=True)
path = gr.Textbox(label="Path of source", visible=True, interactive=False)
radio.change(fn=source, inputs=[radio,source1,source2], outputs=path)
source1.change(fn=source, inputs=[radio,source1,source2], outputs=path)
source2.change(fn=source, inputs=[radio,source1,source2], outputs=path)
done = gr.Textbox(label="Progress", interactive=False)
setup_btn = gr.Button("Initialize vectorstore")
setup_btn.click(fn=vector_store, inputs=[path], outputs=[vectorstore, done])
with gr.Tab("Chatbot"):
chatbot = gr.Chatbot()
msg = gr.Textbox()
with gr.Row():
clear = gr.ClearButton([msg, chatbot], icon="https://img.icons8.com/?size=100&id=Xnx8cxDef16O&format=png&color=000000")
send_btn = gr.Button("Send", variant='primary', icon="https://img.icons8.com/?size=100&id=g8ltXTwIfJ1n&format=png&color=000000")
msg.submit(fn=rag_chatbot, inputs=[vectorstore, msg, chatbot], outputs=[msg, chatbot])
send_btn.click(fn=rag_chatbot, inputs=[vectorstore, msg, chatbot], outputs=[msg, chatbot])
if __name__ == "__main__":
demo.launch() |