binqiangliu commited on
Commit
359ae65
·
1 Parent(s): 6f96b5e

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. README.md +1 -7
  2. app.py +151 -0
  3. requirements.txt +7 -0
  4. valuation.pdf +0 -0
README.md CHANGED
@@ -1,12 +1,6 @@
1
  ---
2
  title: NewPDFChatbot
3
- emoji: 🐠
4
- colorFrom: indigo
5
- colorTo: blue
6
  sdk: gradio
7
  sdk_version: 3.34.0
8
- app_file: app.py
9
- pinned: false
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
  title: NewPDFChatbot
3
+ app_file: app.py
 
 
4
  sdk: gradio
5
  sdk_version: 3.34.0
 
 
6
  ---
 
 
app.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #完整代码:https://www.analyticsvidhya.com/blog/2023/05/build-a-chatgpt-for-pdfs-with-langchain/
2
+
3
+ #import csv
4
+ import gradio as gr
5
+ #在Gradio deploy到Huggingface的时候遇到问题(Traceback (most recent call last): File "/home/user/app/fullcode.py", line 5, in <module> from langchain.embeddings.openai import OpenAIEmbeddings ModuleNotFoundError: No module named 'langchain'),所以尝试增加下面的代码import langchain
6
+ import langchain
7
+ from langchain.embeddings.openai import OpenAIEmbeddings
8
+ from langchain.text_splitter import CharacterTextSplitter
9
+ from langchain.vectorstores import Chroma
10
+
11
+ from langchain.chains import ConversationalRetrievalChain
12
+ from langchain.chat_models import ChatOpenAI
13
+
14
+ from langchain.document_loaders import PyPDFLoader
15
+ import os
16
+
17
+ import fitz
18
+ from PIL import Image
19
+
20
+ # Global variables
21
+ COUNT, N = 0, 0
22
+ chat_history = []
23
+ chain = ''
24
+ enable_box = gr.Textbox.update(value=None,
25
+ placeholder='Upload your OpenAI API key', interactive=True)
26
+ disable_box = gr.Textbox.update(value='OpenAI API key is Set', interactive=False)
27
+
28
+ # Function to set the OpenAI API key
29
+ def set_apikey(api_key):
30
+ os.environ['OPENAI_API_KEY'] = api_key
31
+ return disable_box
32
+
33
+ # Function to enable the API key input box
34
+ def enable_api_box():
35
+ return enable_box
36
+
37
+ # Function to add text to the chat history
38
+ def add_text(history, text):
39
+ if not text:
40
+ raise gr.Error('Enter text')
41
+ history = history + [(text, '')]
42
+ return history
43
+
44
+ # Function to process the PDF file and create a conversation chain
45
+ def process_file(file):
46
+ if 'OPENAI_API_KEY' not in os.environ:
47
+ raise gr.Error('Upload your OpenAI API key')
48
+
49
+ loader = PyPDFLoader(file.name)
50
+ documents = loader.load()
51
+
52
+ embeddings = OpenAIEmbeddings()
53
+
54
+ pdfsearch = Chroma.from_documents(documents, embeddings)
55
+
56
+ chain = ConversationalRetrievalChain.from_llm(ChatOpenAI(temperature=0.3),
57
+ retriever=pdfsearch.as_retriever(search_kwargs={"k": 1}),
58
+ return_source_documents=True)
59
+ return chain
60
+
61
+ # Function to generate a response based on the chat history and query
62
+ def generate_response(history, query, btn):
63
+ global COUNT, N, chat_history, chain
64
+
65
+ if not btn:
66
+ raise gr.Error(message='Upload a PDF')
67
+ if COUNT == 0:
68
+ chain = process_file(btn)
69
+ COUNT += 1
70
+
71
+ result = chain({"question": query, 'chat_history': chat_history}, return_only_outputs=True)
72
+ chat_history += [(query, result["answer"])]
73
+ N = list(result['source_documents'][0])[1][1]['page']
74
+
75
+ for char in result['answer']:
76
+ history[-1][-1] += char
77
+ yield history, ''
78
+
79
+ # Function to render a specific page of a PDF file as an image
80
+ def render_file(file):
81
+ global N
82
+ doc = fitz.open(file.name)
83
+ page = doc[N]
84
+ # Render the page as a PNG image with a resolution of 300 DPI
85
+ pix = page.get_pixmap(matrix=fitz.Matrix(300/72, 300/72))
86
+ image = Image.frombytes('RGB', [pix.width, pix.height], pix.samples)
87
+ return image
88
+
89
+ # Gradio application setup
90
+ with gr.Blocks() as demo:
91
+ # Create a Gradio block
92
+
93
+ with gr.Column():
94
+ with gr.Row():
95
+ with gr.Column(scale=0.8):
96
+ api_key = gr.Textbox(
97
+ placeholder='Enter OpenAI API key',
98
+ show_label=False,
99
+ interactive=True
100
+ ).style(container=False)
101
+ with gr.Column(scale=0.2):
102
+ change_api_key = gr.Button('Change Key')
103
+
104
+ with gr.Row():
105
+ chatbot = gr.Chatbot(value=[], elem_id='chatbot').style(height=650)
106
+ show_img = gr.Image(label='Upload PDF', tool='select').style(height=680)
107
+
108
+ with gr.Row():
109
+ with gr.Column(scale=0.70):
110
+ txt = gr.Textbox(
111
+ show_label=False,
112
+ placeholder="Enter text and press enter"
113
+ ).style(container=False)
114
+
115
+ with gr.Column(scale=0.15):
116
+ submit_btn = gr.Button('Submit')
117
+
118
+ with gr.Column(scale=0.15):
119
+ btn = gr.UploadButton("📁 Upload a PDF", file_types=[".pdf"]).style()
120
+
121
+ # Set up event handlers
122
+
123
+ # Event handler for submitting the OpenAI API key
124
+ api_key.submit(fn=set_apikey, inputs=[api_key], outputs=[api_key])
125
+
126
+ # Event handler for changing the API key
127
+ change_api_key.click(fn=enable_api_box, outputs=[api_key])
128
+
129
+ # Event handler for uploading a PDF
130
+ btn.upload(fn=render_file, inputs=[btn], outputs=[show_img])
131
+
132
+ # Event handler for submitting text and generating response
133
+ submit_btn.click(
134
+ fn=add_text,
135
+ inputs=[chatbot, txt],
136
+ outputs=[chatbot],
137
+ queue=False
138
+ ).success(
139
+ fn=generate_response,
140
+ inputs=[chatbot, txt, btn],
141
+ outputs=[chatbot, txt]
142
+ ).success(
143
+ fn=render_file,
144
+ inputs=[btn],
145
+ outputs=[show_img]
146
+ )
147
+ demo.queue()
148
+ if __name__ == "__main__":
149
+ #为了测试将app部署到Hugginface上,临时将下面的外链共享关闭!
150
+ # demo.launch(share=True)
151
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ gradio == 3.34.0
2
+ openai == 0.27.8
3
+ langchain == 0.0.195
4
+ chromadb == 0.3.26
5
+ tiktoken == 0.4.0
6
+ pypdf == 3.9.1
7
+ pymupdf == 1.22.3
valuation.pdf ADDED
Binary file (278 kB). View file