| from langchain.embeddings.openai import OpenAIEmbeddings |
| from langchain.text_splitter import CharacterTextSplitter |
| from langchain.vectorstores.faiss import FAISS |
| from langchain.chains import VectorDBQAWithSourcesChain |
| from langchain import OpenAI |
|
|
| import openai |
| import os |
|
|
| import gradio as gr |
| from pathlib import Path |
| import pdfplumber |
|
|
| os.environ["OPENAI_API_KEY"] = os.environ.get("openaiapi") |
|
|
|
|
| def get_info(pdf_obj, query): |
|
|
| if isinstance(pdf_obj, list): |
| pdf_obj = pdf_obj[0] |
| |
| file_path = Path(pdf_obj.name) |
|
|
| with pdfplumber.open(file_path) as pdf: |
| output = '' |
| for page in pdf.pages: |
| extract = page.extract_text() |
| if len(extract) > 2048: |
| output += extract[0:len(extract)//2] |
| output += '\n\nNEW PAGE\n\n' |
| output += extract[len(extract)//2:len(extract)] |
| output += '\n\nNEW PAGE\n\n' |
| else: |
| output += extract |
| output += '\n\nNEW PAGE\n\n' |
| |
| text_splitter = CharacterTextSplitter(chunk_size=2048, chunk_overlap=0, separator='\n\nNEW PAGE\n\n') |
| texts = text_splitter.split_text(output.strip()) |
|
|
| embeddings = OpenAIEmbeddings() |
| docsearch = FAISS.from_texts(texts, embeddings) |
| |
| |
| for i, d in enumerate(docsearch.docstore._dict.values()): |
| d.metadata = {'source': f"{i}-pl"} |
|
|
| chain = VectorDBQAWithSourcesChain.from_chain_type(OpenAI(temperature=0), chain_type="stuff", vectorstore=docsearch) |
|
|
| return chain({"question": query}, return_only_outputs=True)['answer'] |
|
|
|
|
| demo = gr.Blocks() |
|
|
| with demo: |
| gr.Markdown("# Legal AI Q/A Bot😄") |
|
|
| with gr.Row(): |
|
|
| with gr.Column(): |
| uploaded_file = gr.File(label="Upload a PDF file", file_count="single", type="file") |
| |
| input_1 = gr.Textbox(lines=1, label="Search or Ask a ques?") |
| convert_button = gr.Button(variant="primary") |
|
|
| with gr.Column(): |
| output_1 = gr.components.Textbox(label="Output") |
|
|
|
|
| convert_button.click(fn=get_info, inputs=[uploaded_file, input_1], outputs=[output_1]) |
|
|
| |
| demo.launch() |