Spaces:
Runtime error
Runtime error
| from pypdf import PdfReader | |
| import torch | |
| import PyPDF2 | |
| from io import BytesIO | |
| from langchain.prompts import PromptTemplate | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| from langchain.vectorstores import FAISS | |
| from langchain.chains import RetrievalQA | |
| import gradio as gr | |
| import time | |
| from langchain.memory import ConversationBufferMemory | |
| from langchain.llms.huggingface_pipeline import HuggingFacePipeline | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, BitsAndBytesConfig | |
| from langchain.document_loaders import PyPDFDirectoryLoader | |
| CHUNK_SIZE = 1000 | |
| # Using HuggingFaceEmbeddings with the chosen embedding model | |
| embeddings = HuggingFaceEmbeddings( | |
| model_name="sentence-transformers/all-mpnet-base-v2",model_kwargs = {"device": "cuda"}) | |
| # transformer model configuration | |
| quant_config = BitsAndBytesConfig( | |
| bnb_4bit_compute_dtype=torch.bfloat16 | |
| ) | |
| def load_llm(): | |
| model_id = "Deci/DeciLM-6b-instruct" | |
| tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| model = AutoModelForCausalLM.from_pretrained(model_id, | |
| trust_remote_code=True, | |
| device_map = "auto", | |
| quantization_config=quant_config) | |
| pipe = pipeline("text-generation", | |
| model=model, | |
| tokenizer=tokenizer, | |
| temperature=0, | |
| num_beams=5, | |
| no_repeat_ngram_size=4, | |
| early_stopping=True, | |
| max_new_tokens=50, | |
| ) | |
| llm = HuggingFacePipeline(pipeline=pipe) | |
| return llm | |
| def add_text(history, text): | |
| if not text: | |
| raise gr.Error('Enter text') | |
| history = history + [(text, '')] | |
| return history | |
| def upload_file(file): | |
| # file_path = [file.name for file in files] | |
| print(type(file)) | |
| return file | |
| def process_file(files): | |
| # loader = PyPDFLoader(file_path= file.name) | |
| # document = loader.load() | |
| pdf_text = "" | |
| for file in files: | |
| # pdf_stream = BytesIO(file.name.content) | |
| pdf = PyPDF2.PdfReader(file.name) | |
| for page in pdf.pages: | |
| pdf_text += page.extract_text() | |
| # split into smaller chunks | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=200) | |
| splits = text_splitter.create_documents([pdf_text]) | |
| # create a FAISS vector store db | |
| # embedd the chunks and store in the db | |
| vectorstore_db = FAISS.from_documents(splits, embeddings) | |
| #create a custom prompt | |
| custom_prompt_template = """Given the uploaded files, generate a pecise answer to the question asked by the user. | |
| If you don't know the answer, just say that you don't know, don't try to make up an answer. | |
| Context= {context} | |
| History = {history} | |
| Question= {question} | |
| Helpful Answer: | |
| """ | |
| prompt = PromptTemplate(template=custom_prompt_template, input_variables=["question", "context", "history"]) | |
| # set QA chain with memory | |
| qa_chain_with_memory = RetrievalQA.from_chain_type(llm=load_llm(), | |
| chain_type='stuff', | |
| return_source_documents=True, | |
| retriever=vectorstore_db.as_retriever(), | |
| chain_type_kwargs={"verbose": True, | |
| "prompt": prompt, | |
| "memory": ConversationBufferMemory( | |
| input_key="question", | |
| memory_key="history", | |
| return_messages=True) }) | |
| # get answers | |
| return qa_chain_with_memory | |
| def generate_bot_response(history,query, btn): | |
| if not btn: | |
| raise gr.Error(message='Upload a PDF') | |
| qa_chain_with_memory = process_file(btn) | |
| bot_response = qa_chain_with_memory({"query": query}) | |
| # return bot_response["result"] | |
| for char in bot_response['result']: | |
| history[-1][-1] += char | |
| time.sleep(0.05) | |
| yield history,'' | |
| with gr.Blocks() as demo: | |
| with gr.Row(): | |
| with gr.Row(): | |
| chatbot = gr.Chatbot(label="DeciLM-6b-instruct bot", value=[], elem_id='chatbot') | |
| with gr.Row(): | |
| file_output = gr.File(label="Your PDFs") | |
| with gr.Column(): | |
| btn = gr.UploadButton("📁 Upload a PDF(s)", file_types=[".pdf"], file_count="multiple") | |
| with gr.Column(): | |
| with gr.Column(): | |
| txt = gr.Text(show_label=False, placeholder="Enter question") | |
| with gr.Column(): | |
| submit_btn = gr.Button('Ask') | |
| # Event handler for uploading a PDF | |
| btn.upload(fn=upload_file, inputs=[btn], outputs=[file_output]) | |
| submit_btn.click( | |
| fn= add_text, | |
| inputs=[chatbot, txt], | |
| outputs=[chatbot], | |
| queue=False | |
| ).success( | |
| fn=generate_bot_response, | |
| inputs=[chatbot, txt, btn], | |
| outputs=[chatbot, txt] | |
| ).success( | |
| fn=upload_file, | |
| inputs=[btn], | |
| outputs=[file_output] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |