lekkalar commited on
Commit
031b10b
·
0 Parent(s):

Duplicate from lekkalar/mortgage-mate-ask-moli

Browse files
Files changed (4) hide show
  1. .gitattributes +34 -0
  2. README.md +13 -0
  3. app.py +111 -0
  4. requirements.txt +6 -0
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Mortage Mate - Ask Moli
3
+ emoji: 👁
4
+ colorFrom: indigo
5
+ colorTo: gray
6
+ sdk: gradio
7
+ sdk_version: 3.33.1
8
+ app_file: app.py
9
+ pinned: false
10
+ duplicated_from: lekkalar/mortgage-mate-ask-moli
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import time
4
+ import pandas as pd
5
+
6
+
7
+ from langchain.document_loaders import OnlinePDFLoader #for laoding the pdf
8
+ from langchain.embeddings import OpenAIEmbeddings # for creating embeddings
9
+ from langchain.vectorstores import Chroma # for the vectorization part
10
+ from langchain.chains import RetrievalQA # for conversing with chatGPT
11
+ from langchain.chat_models import ChatOpenAI # the LLM model we'll use (ChatGPT)
12
+ from langchain import PromptTemplate # to format the response
13
+
14
+
15
+ def load_pdf_and_generate_embeddings(pdf_doc, open_ai_key, relevant_pages):
16
+ if openai_key is not None:
17
+ os.environ['OPENAI_API_KEY'] = open_ai_key
18
+ #Load the pdf file
19
+ loader = OnlinePDFLoader(pdf_doc.name)
20
+ pages = loader.load_and_split()
21
+
22
+ #Create an instance of OpenAIEmbeddings, which is responsible for generating embeddings for text
23
+ embeddings = OpenAIEmbeddings()
24
+
25
+ pages_to_be_loaded =[]
26
+
27
+ if relevant_pages:
28
+ page_numbers = relevant_pages.split(",")
29
+ if len(page_numbers) != 0:
30
+ for page_number in page_numbers:
31
+ if page_number.isdigit():
32
+ pageIndex = int(page_number)-1
33
+ if pageIndex >=0 and pageIndex <len(pages):
34
+ pages_to_be_loaded.append(pages[pageIndex])
35
+ #In the scenario where none of the page numbers supplied exist in the PDF, we will revert to using the entire PDF.
36
+ if len(pages_to_be_loaded) ==0:
37
+ pages_to_be_loaded = pages.copy()
38
+
39
+
40
+ #To create a vector store, we use the Chroma class, which takes the documents (pages in our case) and the embeddings instance
41
+ vectordb = Chroma.from_documents(pages_to_be_loaded, embedding=embeddings)
42
+
43
+ #Finally, we create the bot using the RetrievalQA class
44
+ global pdf_qa
45
+
46
+ #Configuring the Prompt Template is the key to getting the desired response in the desired format.
47
+ prompt_template = """Use the following pieces of context to answer the question at the end. If you encounter a date, return it in mm/dd/yyyy format. If there is a Preface section in the document, extract the chapter# and the short description from the Preface section of the document. Chapter numbers are listed to the left in Preface and always start with an alphabet, for example A1-1
48
+
49
+ {context}
50
+
51
+ Question: {question}
52
+ Return the answer. Where applicable, break the answer into bullet points. When the sentences are long, try and break them into sub sections and include all the information and do not skip any information. If there is an exception to the answer, please do include it in a 'Note:' section. If the document has a Preface section, include a 'For additional details refer to' section with chapter# and a short description. List only the chapters that contain information or skip this section altogether. Do not use page numbers as chapter numbers as they are different. If additional information is found in multiple pages within the same chapter, list the chapter only once. If chapter information cannot be extracted, include section# from the document"""
53
+
54
+ PROMPT = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
55
+
56
+ chain_type_kwargs = {"prompt": PROMPT}
57
+ pdf_qa = RetrievalQA.from_chain_type(llm=ChatOpenAI(temperature=0, model_name="gpt-4"),chain_type="stuff", retriever=vectordb.as_retriever(search_kwargs={"k": 4}), chain_type_kwargs=chain_type_kwargs, return_source_documents=False)
58
+
59
+ return "Ready"
60
+ else:
61
+ return "Please provide an OpenAI gpt-4 API key"
62
+
63
+
64
+ def answer_query(query):
65
+ question = query
66
+ return pdf_qa.run(question)
67
+
68
+
69
+ css="""
70
+ #col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
71
+ """
72
+
73
+ title = """
74
+ <div style="text-align: center;max-width: 700px;">
75
+ <h1>Ask Moli - Chatbot for complex documents</h1>
76
+ <p style="text-align: center;">'Load a File', click the "Upload file to Moli" button, <br />
77
+ wait for the Status to show Ready. Type your question, click on "Ask Moli" <br />
78
+ The app is built on GPT-4</p>
79
+ </div>
80
+ """
81
+
82
+ with gr.Blocks(css=css,theme=gr.themes.Monochrome()) as demo:
83
+ with gr.Column(elem_id="col-container"):
84
+ gr.HTML(title)
85
+
86
+ with gr.Column():
87
+ openai_key = gr.Textbox(label="Your GPT-4 OpenAI API key", type="password")
88
+ pdf_doc = gr.File(label="Load a file",file_types=['.pdf'],type='file')
89
+ relevant_pages = gr.Textbox(label="*Optional - Leave this field blank to use the entire PDF or provide comma separated page numbers like 3,4,5")
90
+
91
+ with gr.Row():
92
+ status = gr.Textbox(label="Status", placeholder="", interactive=False)
93
+ load_pdf = gr.Button("Upload file to Moli").style(full_width=False)
94
+
95
+
96
+ with gr.Row():
97
+ input = gr.Textbox(label="Type in your question")
98
+ output = gr.Textbox(label="Answer")
99
+ submit_query = gr.Button("Ask Moli").style(full_width=False)
100
+
101
+
102
+ load_pdf.click(load_pdf_and_generate_embeddings, inputs=[pdf_doc, openai_key, relevant_pages], outputs=status)
103
+
104
+ submit_query.click(answer_query,input,output)
105
+
106
+
107
+ demo.launch()
108
+
109
+
110
+
111
+
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ openai
2
+ tiktoken
3
+ chromadb
4
+ langchain
5
+ unstructured
6
+ unstructured[local-inference]