Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import os | |
| from langchain_community.document_loaders import PyPDFLoader | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| from langchain.chat_models import ChatOpenAI | |
| from langchain.retrievers.document_compressors import LLMChainExtractor | |
| from langchain.retrievers.multi_query import MultiQueryRetriever | |
| from langchain.retrievers import ContextualCompressionRetriever | |
| from langchain.prompts.chat import ChatPromptTemplate, HumanMessagePromptTemplate | |
| from langchain.vectorstores import Chroma | |
| chat = ChatOpenAI() | |
| embedding_function = HuggingFaceEmbeddings(model_name = "BAAI/bge-large-en-v1.5",model_kwargs={'device': 'cpu'},encode_kwargs={"normalize_embeddings": True}) | |
| def add_docs(path): | |
| loader = PyPDFLoader(file_path=path) | |
| docs = loader.load_and_split(text_splitter=RecursiveCharacterTextSplitter(chunk_size = 500, | |
| chunk_overlap = 100, | |
| length_function = len, | |
| is_separator_regex=False)) | |
| model_vectorstore = Chroma | |
| db = model_vectorstore.from_documents(documents=docs,embedding= embedding_function, persist_directory="output/general_knowledge") | |
| return db | |
| def answer_query(message, chat_history): | |
| base_compressor = LLMChainExtractor.from_llm(chat) | |
| db = Chroma(persist_directory = "output/general_knowledge", embedding_function=embedding_function) | |
| base_retriever = db.as_retriever() | |
| mq_retriever = MultiQueryRetriever.from_llm(retriever = base_retriever, llm=chat) | |
| compression_retriever = ContextualCompressionRetriever(base_compressor=base_compressor, base_retriever=mq_retriever) | |
| matched_docs = compression_retriever.get_relevant_documents(query = message) | |
| context = "" | |
| for doc in matched_docs: | |
| page_content = doc.page_content | |
| context+=page_content | |
| context += "\n\n" | |
| template = """ | |
| Answer the following question only by using the context given below in the triple backticks, do not use any other information to answer the question. | |
| If you can't answer the given question with the given context, you can return an emtpy string ('') | |
| Context: ```{context}``` | |
| ---------------------------- | |
| Question: {query} | |
| ---------------------------- | |
| Answer: """ | |
| human_message_prompt = HumanMessagePromptTemplate.from_template(template=template) | |
| chat_prompt = ChatPromptTemplate.from_messages([human_message_prompt]) | |
| prompt = chat_prompt.format_prompt(query = message, context = context) | |
| response = chat(messages=prompt.to_messages()).content | |
| chat_history.append((message,response)) | |
| return "", chat_history | |
| with gr.Blocks() as demo: | |
| gr.HTML("<h1 align = 'center'>Smart Assistant</h1>") | |
| gr.HTML("<h2 align = 'center'>Upload any PDF and ask your questions.</h2>") | |
| with gr.Row(): | |
| upload_files = gr.File(label = 'Upload a PDF',file_types=['.pdf'],file_count='single') | |
| chatbot = gr.Chatbot() | |
| msg = gr.Textbox(label = "Enter your question here") | |
| upload_files.upload(add_docs,upload_files) | |
| msg.submit(answer_query,[msg,chatbot],[msg,chatbot]) | |
| if __name__ == "__main__": | |
| demo.launch() |