from langchain.embeddings.openai import OpenAIEmbeddings from langchain.text_splitter import CharacterTextSplitter from langchain.vectorstores.faiss import FAISS from langchain.chains import VectorDBQAWithSourcesChain from langchain import OpenAI import openai import os import gradio as gr from pathlib import Path import pdfplumber os.environ["OPENAI_API_KEY"] = os.environ.get("openaiapi") def get_info(pdf_obj, query): if isinstance(pdf_obj, list): pdf_obj = pdf_obj[0] file_path = Path(pdf_obj.name) with pdfplumber.open(file_path) as pdf: output = '' for page in pdf.pages: extract = page.extract_text() if len(extract) > 2048: output += extract[0:len(extract)//2] output += '\n\nNEW PAGE\n\n' output += extract[len(extract)//2:len(extract)] output += '\n\nNEW PAGE\n\n' else: output += extract output += '\n\nNEW PAGE\n\n' text_splitter = CharacterTextSplitter(chunk_size=2048, chunk_overlap=0, separator='\n\nNEW PAGE\n\n') texts = text_splitter.split_text(output.strip()) embeddings = OpenAIEmbeddings() docsearch = FAISS.from_texts(texts, embeddings) # Add in a fake source information for i, d in enumerate(docsearch.docstore._dict.values()): d.metadata = {'source': f"{i}-pl"} chain = VectorDBQAWithSourcesChain.from_chain_type(OpenAI(temperature=0), chain_type="stuff", vectorstore=docsearch) return chain({"question": query}, return_only_outputs=True)['answer'] demo = gr.Blocks() with demo: gr.Markdown("# Legal AI Q/A Bot😄") with gr.Row(): with gr.Column(): uploaded_file = gr.File(label="Upload a PDF file", file_count="single", type="file") #uploaded_file = gr.Files(label="Upload a PDF file(s)", type="file") input_1 = gr.Textbox(lines=1, label="Search or Ask a ques?") convert_button = gr.Button(variant="primary") with gr.Column(): output_1 = gr.components.Textbox(label="Output") convert_button.click(fn=get_info, inputs=[uploaded_file, input_1], outputs=[output_1]) # demo.launch(enable_queue=True) demo.launch()