danieldager commited on
Commit
b28439c
·
verified ·
1 Parent(s): b5f0bd1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +200 -49
app.py CHANGED
@@ -1,63 +1,214 @@
1
- import gradio as gr
2
- from huggingface_hub import InferenceClient
 
 
 
 
 
 
3
 
4
  """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
  """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
 
 
9
 
10
- def respond(
11
- message,
12
- history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
- ):
18
- messages = [{"role": "system", "content": system_message}]
19
 
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
 
25
 
26
- messages.append({"role": "user", "content": message})
 
27
 
28
- response = ""
 
 
 
 
 
 
 
29
 
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
38
 
39
- response += token
40
- yield response
 
 
 
41
 
42
- """
43
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
44
- """
45
- demo = gr.ChatInterface(
46
- respond,
47
- additional_inputs=[
48
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
49
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
50
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
51
- gr.Slider(
52
- minimum=0.1,
53
- maximum=1.0,
54
- value=0.95,
55
- step=0.05,
56
- label="Top-p (nucleus sampling)",
57
- ),
58
- ],
59
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
- if __name__ == "__main__":
63
- demo.launch()
 
1
+ """
2
+ This code uses the PyMuPDF package.
3
+
4
+ PyMuPDF is AGPL licensed, please refer to:
5
+ https://pymupdf.readthedocs.io/en/latest/about.html#license-and-copyright
6
+
7
+ https://github.com/pymupdf/RAG
8
+ https://medium.com/@pymupdf/building-a-rag-chatbot-gui-with-the-chatgpt-api-and-pymupdf-9ea8c7fc4ab5
9
 
10
  """
11
+
12
  """
13
+ Code below is based on an implementation by Sunil Kumar Dash:
14
 
15
+ MIT License
16
 
17
+ Copyright (c) 2023 Sunil Kumar Dash
 
 
 
 
 
 
 
 
18
 
19
+ Permission is hereby granted, free of charge, to any person obtaining a copy
20
+ of this software and associated documentation files (the "Software"), to deal
21
+ in the Software without restriction, including without limitation the rights
22
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
23
+ copies of the Software, and to permit persons to whom the Software is
24
+ furnished to do so, subject to the following conditions:
25
 
26
+ The above copyright notice and this permission notice shall be included in all
27
+ copies or substantial portions of the Software.
28
 
29
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
30
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
31
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
32
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
33
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
34
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
35
+ SOFTWARE.
36
+ """
37
 
38
+ import os, re
39
+ import pymupdf
40
+ import gradio as gr
41
+ from PIL import Image
42
+ from typing import Any
43
+ from dotenv import load_dotenv
 
 
44
 
45
+ from langchain_openai import ChatOpenAI
46
+ from langchain_openai import OpenAIEmbeddings
47
+ from langchain_community.vectorstores import Chroma
48
+ from langchain.chains import ConversationalRetrievalChain
49
+ from langchain_community.document_loaders import PyMuPDFLoader
50
 
51
+
52
+
53
+ load_dotenv()
54
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
55
+
56
+ enable_box = gr.Textbox(
57
+ value=None, placeholder="Upload your OpenAI API key", interactive=True
 
 
 
 
 
 
 
 
 
 
58
  )
59
+ disable_box = gr.Textbox(value="OpenAI API key is set", interactive=False)
60
+
61
+ def set_apikey(api_key: str):
62
+ print("API Key set")
63
+ app.OPENAI_API_KEY = api_key
64
+ return disable_box
65
+
66
+ def enable_api_box():
67
+ return enable_box
68
+
69
+ def add_text(history, text: str):
70
+ if not text:
71
+ raise gr.Error("enter text")
72
+ history = history + [(text, "")]
73
+ return history
74
+
75
+ class my_app:
76
+ def __init__(self, OPENAI_API_KEY: str = None) -> None:
77
+ self.OPENAI_API_KEY: str = OPENAI_API_KEY
78
+ self.chain = None
79
+ self.chat_history: list = []
80
+ self.N: int = 0
81
+ self.count: int = 0
82
+
83
+ def __call__(self, file: str) -> Any:
84
+ if self.count == 0:
85
+ self.chain = self.build_chain(file)
86
+ self.count += 1
87
+ return self.chain
88
+
89
+ def process_file(self, file: str):
90
+ loader = PyMuPDFLoader(file.name)
91
+ documents = loader.load()
92
+ pattern = r"/([^/]+)$"
93
+ match = re.search(pattern, file.name)
94
+ try:
95
+ file_name = match.group(1)
96
+ except:
97
+ file_name = os.path.basename(file)
98
+
99
+ return documents, file_name
100
+
101
+ def build_chain(self, file: str):
102
+ documents, file_name = self.process_file(file)
103
+ # Load embeddings model
104
+ embeddings = OpenAIEmbeddings(openai_api_key=self.OPENAI_API_KEY)
105
+ pdfsearch = Chroma.from_documents(
106
+ documents,
107
+ embeddings,
108
+ collection_name=file_name,
109
+ )
110
+ chain = ConversationalRetrievalChain.from_llm(
111
+ ChatOpenAI(temperature=0.0, openai_api_key=self.OPENAI_API_KEY),
112
+ retriever=pdfsearch.as_retriever(search_kwargs={"k": 1}),
113
+ return_source_documents=True,
114
+ )
115
+ return chain
116
+
117
+
118
+ def get_response(history, query, file):
119
+ if not file:
120
+ raise gr.Error(message="Upload a PDF")
121
+ chain = app(file)
122
+ result = chain(
123
+ {"question": query, "chat_history": app.chat_history}, return_only_outputs=True
124
+ )
125
+ app.chat_history += [(query, result["answer"])]
126
+ app.N = list(result["source_documents"][0])[1][1]["page"]
127
+ for char in result["answer"]:
128
+ history[-1][-1] += char
129
+ yield history, ""
130
+
131
+
132
+ def render_file(file):
133
+ doc = pymupdf.open(file.name)
134
+ page = doc[app.N]
135
+ # Render the page as a PNG image with a resolution of 150 DPI
136
+ pix = page.get_pixmap(dpi=150)
137
+ image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
138
+ return image
139
+
140
+
141
+ def purge_chat_and_render_first(file):
142
+ print("purge_chat_and_render_first")
143
+ # Purges the previous chat session so that the bot has no concept of previous documents
144
+ app.chat_history = []
145
+ app.count = 0
146
+
147
+ # Use PyMuPDF to render the first page of the uploaded document
148
+ doc = pymupdf.open(file.name)
149
+ page = doc[0]
150
+ # Render the page as a PNG image with a resolution of 150 DPI
151
+ pix = page.get_pixmap(dpi=150)
152
+ image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
153
+ return image, []
154
+
155
+ app = my_app()
156
+
157
+ with gr.Blocks() as demo:
158
+ with gr.Column():
159
+ with gr.Row():
160
+
161
+ with gr.Column(scale=1):
162
+ api_key = gr.Textbox(
163
+ placeholder="Enter OpenAI API key and hit <RETURN>",
164
+ show_label=False,
165
+ interactive=True
166
+ )
167
+
168
+ with gr.Row():
169
+ with gr.Column(scale=2):
170
+ with gr.Row():
171
+ chatbot = gr.Chatbot(value=[], elem_id="chatbot")
172
+ with gr.Row():
173
+ txt = gr.Textbox(
174
+ show_label=False,
175
+ placeholder="Enter text and press submit",
176
+ scale=2
177
+ )
178
+ submit_btn = gr.Button("submit", scale=1)
179
+
180
+ with gr.Column(scale=1):
181
+ with gr.Row():
182
+ show_img = gr.Image(label="Upload PDF")
183
+ with gr.Row():
184
+ btn = gr.UploadButton("📁 upload a PDF", file_types=[".pdf"])
185
+
186
+ api_key.submit(
187
+ fn=set_apikey,
188
+ inputs=[api_key],
189
+ outputs=[
190
+ api_key,
191
+ ],
192
+ )
193
+
194
+ btn.upload(
195
+ fn=purge_chat_and_render_first,
196
+ inputs=[btn],
197
+ outputs=[show_img, chatbot],
198
+ )
199
 
200
+ submit_btn.click(
201
+ fn=add_text,
202
+ inputs=[chatbot, txt],
203
+ outputs=[
204
+ chatbot,
205
+ ],
206
+ queue=False,
207
+ ).success(
208
+ fn=get_response, inputs=[chatbot, txt, btn], outputs=[chatbot, txt]
209
+ ).success(
210
+ fn=render_file, inputs=[btn], outputs=[show_img]
211
+ )
212
 
213
+ demo.queue()
214
+ demo.launch()