Spaces:
Sleeping
Sleeping
| # import bs4 | |
| from langchain_community.document_loaders import WebBaseLoader, PyPDFLoader | |
| from langchain_community.vectorstores import FAISS | |
| from langchain_core.runnables import RunnablePassthrough | |
| from langchain_core.output_parsers import StrOutputParser | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from langchain_huggingface import HuggingFaceEmbeddings, HuggingFaceEndpoint | |
| from langchain.prompts import PromptTemplate | |
| import gradio as gr | |
| repo_id = "HuggingFaceH4/zephyr-7b-beta" | |
| llm = HuggingFaceEndpoint( | |
| repo_id=repo_id, max_length=128, temperature=0.1 | |
| ) | |
| def web_load(path): | |
| loader = WebBaseLoader( | |
| web_paths=(path,), | |
| # bs_kwargs=dict( | |
| # parse_only=bs4.SoupStrainer( | |
| # class_=("post-content", "post-title", "post-header") | |
| # ) | |
| # ), | |
| ) | |
| docs = loader.load() | |
| return docs | |
| def pdf_load(path): | |
| loader = PyPDFLoader(path) | |
| pages = loader.load_and_split() | |
| return pages | |
| def vector_store(path): | |
| if path.endswith(".pdf"): | |
| docs = pdf_load(path) | |
| elif path.startswith("http" or "www"): | |
| docs = web_load(path) | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) | |
| splits = text_splitter.split_documents(docs) | |
| vectorstore = FAISS.from_documents(documents=splits, embedding=HuggingFaceEmbeddings(model_name='BAAI/bge-base-en-v1.5')) | |
| return vectorstore, "Done setup! You may proceed to Chatbot. " | |
| def invoke(user_input, retriever): | |
| prompt_template = """ | |
| <|system|> | |
| Answer the question based on your knowledge. Use the following context to help: | |
| {context} | |
| </s> | |
| <|user|> | |
| {question} | |
| </s> | |
| <|assistant|> | |
| """ | |
| prompt = PromptTemplate( | |
| input_variables=["context", "question"], | |
| template=prompt_template, | |
| ) | |
| rag_chain = ( | |
| {"context": retriever, "question": RunnablePassthrough()} | |
| | prompt | |
| | llm | |
| | StrOutputParser() | |
| ) | |
| ans = rag_chain.invoke(user_input) | |
| return ans | |
| def rag_chatbot(vectorstore, user_input, chat_history): | |
| retriever = vectorstore.as_retriever() | |
| answer = invoke(user_input, retriever) | |
| chat_history.append((user_input, answer)) | |
| return "", chat_history | |
| def source (radio, source1, source2): | |
| if radio == "website": | |
| return source1 | |
| elif radio == "PDF": | |
| return source2 | |
| with gr.Blocks() as demo: | |
| vectorstore = gr.State() | |
| with gr.Tab("Setup"): | |
| gr.Markdown("Input a website ULR or upload a PDF file") | |
| with gr.Row(): | |
| source1 = gr.Textbox(label="Input website",) | |
| source2 = gr.Files(label="Upload a PDF file", file_count="single", file_types=["pdf"]) | |
| radio = gr.Radio(["website", "PDF"], label="Select type of source", interactive=True) | |
| path = gr.Textbox(label="Path of source", visible=True, interactive=False) | |
| radio.change(fn=source, inputs=[radio,source1,source2], outputs=path) | |
| source1.change(fn=source, inputs=[radio,source1,source2], outputs=path) | |
| source2.change(fn=source, inputs=[radio,source1,source2], outputs=path) | |
| done = gr.Textbox(label="Progress", interactive=False) | |
| setup_btn = gr.Button("Initialize vectorstore") | |
| setup_btn.click(fn=vector_store, inputs=[path], outputs=[vectorstore, done]) | |
| with gr.Tab("Chatbot"): | |
| chatbot = gr.Chatbot() | |
| msg = gr.Textbox() | |
| with gr.Row(): | |
| clear = gr.ClearButton([msg, chatbot], icon="https://img.icons8.com/?size=100&id=Xnx8cxDef16O&format=png&color=000000") | |
| send_btn = gr.Button("Send", variant='primary', icon="https://img.icons8.com/?size=100&id=g8ltXTwIfJ1n&format=png&color=000000") | |
| msg.submit(fn=rag_chatbot, inputs=[vectorstore, msg, chatbot], outputs=[msg, chatbot]) | |
| send_btn.click(fn=rag_chatbot, inputs=[vectorstore, msg, chatbot], outputs=[msg, chatbot]) | |
| if __name__ == "__main__": | |
| demo.launch() |