NAB1108 commited on
Commit
290f4f8
·
1 Parent(s): f70825a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +148 -0
app.py ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ from langchain.document_loaders import OnlinePDFLoader
4
+
5
+ from langchain.text_splitter import CharacterTextSplitter
6
+
7
+ from langchain.llms import HuggingFaceHub
8
+
9
+ from langchain.embeddings import HuggingFaceHubEmbeddings
10
+
11
+ from langchain.vectorstores import Chroma
12
+
13
+ from langchain.chains import RetrievalQA
14
+
15
+ import os
16
+ import panel as pn
17
+ import tempfile
18
+ import openai
19
+ import json
20
+ import re
21
+ from langchain.docstore.document import Document
22
+ from langchain.document_loaders import TextLoader
23
+ from langchain.text_splitter import CharacterTextSplitter
24
+ from langchain.embeddings.openai import OpenAIEmbeddings
25
+ from langchain.vectorstores import FAISS
26
+ from langchain.chains import RetrievalQA
27
+ from langchain.llms import OpenAI
28
+ from langchain.document_loaders import PyPDFLoader
29
+ from langchain.indexes import VectorstoreIndexCreator
30
+ import tempfile
31
+
32
+ os.environ["OPENAI_API_KEY"] = "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"
33
+ query1=" "
34
+ def loading_pdf():
35
+ return "Loading..."
36
+
37
+ def pdf_changes(pdf_doc, repo_id):
38
+
39
+ loader = OnlinePDFLoader(pdf_doc.name)
40
+ documents = loader.load()
41
+
42
+ name_filter = "**/*.md"
43
+ separator = "\n"
44
+ chunk_size_limit = 1000
45
+ max_chunk_overlap = 50
46
+
47
+ text_splitter = CharacterTextSplitter(separator=separator, chunk_size=chunk_size_limit, chunk_overlap=max_chunk_overlap)
48
+ split_docs = text_splitter.split_documents(documents)
49
+
50
+ embeddings = OpenAIEmbeddings()
51
+ vector_store1 = FAISS.from_documents(split_docs, embeddings)
52
+
53
+ from langchain.prompts import (
54
+ ChatPromptTemplate,
55
+ SystemMessagePromptTemplate,
56
+ HumanMessagePromptTemplate,
57
+ )
58
+
59
+
60
+ system_template="""You are a helpful chatbot used by the user to chat with pdf documents. Only answer the questions by using information provided in the context provided to you. If there is no relavant context, tell 'Hmm, I'm not sure'.
61
+ ----------------
62
+ {summaries}"""
63
+
64
+ messages = [
65
+ SystemMessagePromptTemplate.from_template(system_template),
66
+ HumanMessagePromptTemplate.from_template("{question}")
67
+ ]
68
+ prompt2 = ChatPromptTemplate.from_messages(messages)
69
+
70
+ from langchain.chat_models import ChatOpenAI
71
+ from langchain.chains import RetrievalQAWithSourcesChain
72
+ global query1
73
+ chain_type_kwargs = {"prompt": prompt2}
74
+ llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, max_tokens=512) # Modify model_name if you have access to GPT-4
75
+ global chain
76
+ chain = RetrievalQAWithSourcesChain.from_chain_type(
77
+ llm=llm,
78
+ chain_type="stuff",
79
+ retriever=vector_store1.as_retriever(search_kwargs={'k': 2}),
80
+ return_source_documents=True,
81
+ chain_type_kwargs=chain_type_kwargs
82
+ )
83
+ return "Ready"
84
+
85
+ def add_text(history, text):
86
+ history = history + [(text, None)]
87
+ return history, ""
88
+
89
+ def bot(history):
90
+ response = infer(history[-1][0])
91
+ history[-1][1] = response
92
+ return history
93
+
94
+ def infer(question):
95
+ global query1
96
+ openai.api_key = "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"
97
+ prompt_text = question
98
+ if prompt_text:
99
+ query1 = query1 + "\nUser: " + prompt_text + "\nBot: "
100
+ result = chain(query1)
101
+ query1 = query1 + result['answer']
102
+ query1 = openai.ChatCompletion.create(
103
+ model="gpt-3.5-turbo",
104
+ messages=[
105
+ {"role": "system", "content": "You are provided with chat history and latset conversation between user and bot. Summarise the history and latest conversationin minimum most tokens possible. Do not include greetings in the summary like hi, hello, etc."},
106
+ {"role": "user", "content": query1},
107
+ ]
108
+ ) ["choices"][0]["message"]["content"].replace("'", "")
109
+ return result['answer']
110
+
111
+ css="""
112
+ #col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
113
+ """
114
+
115
+ title = """
116
+ <div style="text-align: center;max-width: 700px;">
117
+ <h1>Chat with PDF</h1>
118
+ <p style="text-align: center;">Upload a .PDF from your computer, click the "Load PDF" button, <br />
119
+ when everything is ready, you can start asking questions about the pdf ;)</p>
120
+ </div>
121
+ """
122
+
123
+
124
+ with gr.Blocks(css=css) as demo:
125
+ with gr.Column(elem_id="col-container"):
126
+ gr.HTML(title)
127
+
128
+ with gr.Column():
129
+ pdf_doc = gr.File(label="Load a pdf", file_types=['.pdf'], type="file")
130
+ #repo_id = gr.Dropdown(label="LLM", choices=["google/flan-ul2", "OpenAssistant/oasst-sft-1-pythia-12b", "bigscience/bloomz"], value="google/flan-ul2")
131
+ with gr.Row():
132
+ langchain_status = gr.Textbox(label="Status", placeholder="", interactive=False)
133
+ load_pdf = gr.Button("Load pdf")
134
+
135
+ chatbot = gr.Chatbot([], elem_id="chatbot").style(height=350)
136
+ question = gr.Textbox(label="Question", placeholder="Type your question and hit Enter ")
137
+ submit_btn = gr.Button("Send message")
138
+ #load_pdf.click(loading_pdf, None, langchain_status, queue=False)
139
+ #repo_id.change(pdf_changes, inputs=[pdf_doc], outputs=[langchain_status], queue=False)
140
+ load_pdf.click(pdf_changes, inputs=[pdf_doc], outputs=[langchain_status], queue=False)
141
+ question.submit(add_text, [chatbot, question], [chatbot, question]).then(
142
+ bot, chatbot, chatbot
143
+ )
144
+ submit_btn.click(add_text, [chatbot, question], [chatbot, question]).then(
145
+ bot, chatbot, chatbot
146
+ )
147
+
148
+ demo.launch()