Lucas commited on
Commit
b9018ab
·
1 Parent(s): fce9f0e

Change to summarization.

Browse files
Files changed (1) hide show
  1. main.py +95 -6
main.py CHANGED
@@ -5,6 +5,9 @@ from langchain.text_splitter import CharacterTextSplitter
5
  from langchain.embeddings import OpenAIEmbeddings
6
  from langchain.vectorstores import Chroma
7
  from langchain.document_loaders import PyPDFLoader
 
 
 
8
  import panel as pn
9
  import PyPDF2
10
 
@@ -24,6 +27,8 @@ prompt = pn.widgets.TextEditor(
24
  )
25
  run_button = pn.widgets.Button(name="Run!")
26
 
 
 
27
  select_k = pn.widgets.IntSlider(
28
  name="Number of relevant chunks", start=1, end=5, step=1, value=2
29
  )
@@ -41,6 +46,10 @@ widgets = pn.Row(
41
  ), width=600
42
  )
43
 
 
 
 
 
44
 
45
  def is_valid_pdf(file_path):
46
  try:
@@ -74,12 +83,57 @@ def qa(file, query, chain_type, k):
74
  # create a chain to answer questions
75
 
76
  qa = RetrievalQA.from_chain_type(
77
- llm=OpenAI(temperature=0), chain_type=chain_type, retriever=retriever, return_source_documents=True)
78
  result = qa({"query": query})
79
  print(result['result'])
80
  return result
81
 
82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  convos = [] # store all panel objects in a list
84
 
85
 
@@ -91,11 +145,11 @@ def qa_result(_):
91
 
92
  # save pdf file to a temp file
93
  if file_input.value is not None:
94
- file_input.save("/.cache/temp.pdf")
95
 
96
  prompt_text = prompt.value
97
  if prompt_text:
98
- result = qa(file="/.cache/temp.pdf", query=prompt_text, chain_type=select_chain_type.value,
99
  k=select_k.value)
100
  if result.get('error') is None:
101
  convos.extend([
@@ -123,9 +177,43 @@ def qa_result(_):
123
  pn.state.notifications.error('Missing file.', duration=2000)
124
  return pn.Column(*convos, margin=15, width=575, min_height=400)
125
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
 
127
  qa_interactive = pn.panel(
128
- pn.bind(qa_result, run_button),
 
129
  loading_indicator=True,
130
  )
131
 
@@ -137,11 +225,12 @@ pn.Column(
137
  ## \U0001F4D3 Perguntas e respostas com um PDF
138
  (original implementation: @sophiamyang)
139
 
140
- 1) Suba o PDF. 2) Entre com a OpenAI API key. 3) Digite a pergunta e clique "Run".
141
 
142
  """),
143
  pn.Row(file_input, openaikey),
 
144
  output,
145
- widgets
146
 
147
  ).servable()
 
5
  from langchain.embeddings import OpenAIEmbeddings
6
  from langchain.vectorstores import Chroma
7
  from langchain.document_loaders import PyPDFLoader
8
+ from langchain import PromptTemplate
9
+ from langchain.chains.summarize import load_summarize_chain
10
+ import textwrap
11
  import panel as pn
12
  import PyPDF2
13
 
 
27
  )
28
  run_button = pn.widgets.Button(name="Run!")
29
 
30
+ summary_button = pn.widgets.Button(name="Resumo!")
31
+
32
  select_k = pn.widgets.IntSlider(
33
  name="Number of relevant chunks", start=1, end=5, step=1, value=2
34
  )
 
46
  ), width=600
47
  )
48
 
49
+ summary_filed = pn.Row(
50
+ pn.Column(summary_button),
51
+ width=630
52
+ )
53
 
54
  def is_valid_pdf(file_path):
55
  try:
 
83
  # create a chain to answer questions
84
 
85
  qa = RetrievalQA.from_chain_type(
86
+ llm=OpenAI(model_name="gpt-3.5-turbo", temperature=0), chain_type=chain_type, retriever=retriever, return_source_documents=False)
87
  result = qa({"query": query})
88
  print(result['result'])
89
  return result
90
 
91
 
92
+ def summary(file):
93
+ # load document
94
+ result = {}
95
+ if not is_valid_pdf(file):
96
+ result = {'error': 'Invalid PDF file.'}
97
+ return result
98
+
99
+ loader = PyPDFLoader(file)
100
+ documents = loader.load()
101
+
102
+ combine_template = """Write a summary of the following in Portuguese in 100 words:
103
+
104
+ {text}
105
+
106
+ SUMMARY IN PORTUGUESE IN 100 WORDS:"""
107
+ COMBINE_TEMPLATE = PromptTemplate(template=combine_template, input_variables=["text"])
108
+
109
+ map_template = """Write a concise summary of the following in Portuguese in 40 words or less:
110
+
111
+ {text}
112
+
113
+ CONCISE SUMMARY IN PORTUGUESE IN 40 WORDS OR LESS:"""
114
+ MAP_TEMPLATE = PromptTemplate(template=map_template, input_variables=["text"])
115
+
116
+ chain = load_summarize_chain(OpenAI(temperature=0),
117
+ chain_type="map_reduce",
118
+ return_intermediate_steps=True,
119
+ combine_prompt=COMBINE_TEMPLATE,
120
+ map_prompt=MAP_TEMPLATE)
121
+
122
+ output_summary = chain({"input_documents": documents}, return_only_outputs=True)
123
+ result['summary'] = textwrap.fill(output_summary['output_text'],
124
+ width=100,
125
+ break_long_words=False,
126
+ replace_whitespace=False)
127
+
128
+ output_steps = output_summary['intermediate_steps']
129
+ result['steps'] = textwrap.fill('\n'.join(output_steps),
130
+ width=100,
131
+ break_long_words=False,
132
+ replace_whitespace=False)
133
+ return result
134
+
135
+
136
+
137
  convos = [] # store all panel objects in a list
138
 
139
 
 
145
 
146
  # save pdf file to a temp file
147
  if file_input.value is not None:
148
+ file_input.save("temp.pdf")
149
 
150
  prompt_text = prompt.value
151
  if prompt_text:
152
+ result = qa(file="temp.pdf", query=prompt_text, chain_type=select_chain_type.value,
153
  k=select_k.value)
154
  if result.get('error') is None:
155
  convos.extend([
 
177
  pn.state.notifications.error('Missing file.', duration=2000)
178
  return pn.Column(*convos, margin=15, width=575, min_height=400)
179
 
180
+ def summary_result(_):
181
+ os.environ["OPENAI_API_KEY"] = openaikey.value
182
+ if not openaikey.value:
183
+ pn.state.notifications.error('Missing API key.', duration=2000)
184
+ return pn.Column(*convos, margin=15, width=575, min_height=400)
185
+
186
+ # save pdf file to a temp file
187
+ if file_input.value is not None:
188
+ file_input.save("temp.pdf")
189
+
190
+ result = summary(file="temp.pdf")
191
+ if result.get('error') is None:
192
+ convos.extend([
193
+ pn.Row(
194
+ pn.panel("\U0001F60A", width=10),
195
+ "Resumo geral: ",
196
+ result['summary'],
197
+ width=600
198
+ ),
199
+ pn.Row(
200
+ pn.panel("\U0001F916", width=10),
201
+ pn.Column(
202
+ "Resumo por página:",
203
+ result['steps']
204
+ )
205
+ )
206
+ ])
207
+ else:
208
+ pn.state.notifications.error(result['error'], duration=2000)
209
+ else:
210
+ pn.state.notifications.error('Missing file.', duration=2000)
211
+ return pn.Column(*convos, margin=15, width=575, min_height=400)
212
+
213
 
214
  qa_interactive = pn.panel(
215
+ #pn.bind(qa_result, run_button),
216
+ pn.bind(summary_result, summary_button),
217
  loading_indicator=True,
218
  )
219
 
 
225
  ## \U0001F4D3 Perguntas e respostas com um PDF
226
  (original implementation: @sophiamyang)
227
 
228
+ 1) Suba o PDF. 2) Entre com a OpenAI API key. 3) Clique "Resumo!".
229
 
230
  """),
231
  pn.Row(file_input, openaikey),
232
+ summary_filed,
233
  output,
234
+ #widgets
235
 
236
  ).servable()