GiantPandas commited on
Commit
c10a7e3
·
verified ·
1 Parent(s): b2afe40

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -35
app.py CHANGED
@@ -63,9 +63,10 @@ def send_pdf_to_parse(file_path, server_ip, port, route="/upload", api_key=None)
63
  return response
64
 
65
 
 
 
66
  async def send_pdf_async_aiohttp(file_path, server_ip, route="/upload", Authorization=None):
67
  """使用aiohttp异步发送PDF"""
68
- # url = f"http://{server_ip}:{port}{route}"
69
  url = f"{server_ip}{route}"
70
  headers = {}
71
  if Authorization:
@@ -108,7 +109,7 @@ client = AsyncOpenAI(
108
 
109
 
110
  async def request(messages):
111
-
112
  chat_completion_from_base64 = await client.chat.completions.create(
113
  messages=messages,
114
  extra_headers={
@@ -116,27 +117,24 @@ async def request(messages):
116
  },
117
  model="Qwen2_5VL",
118
  max_completion_tokens=4096,
119
- extra_headers={
120
- "Authorization": f"Bearer {Authorization}"
121
- },
122
  stream=True,
123
  temperature=0.0,
124
  top_p=0.95
125
  )
126
-
127
  page = ""
128
  async for chunk in chat_completion_from_base64:
129
  if chunk.choices[0].delta.content:
130
  content = chunk.choices[0].delta.content
131
-
132
  choice = chunk.choices[0]
133
  if choice.finish_reason is not None:
134
  print(f"end reason = {choice.finish_reason}")
135
  break
136
  page += content
137
-
138
  yield content
139
-
140
 
141
  def images_to_pdf(img_paths, pdf_path):
142
 
@@ -170,7 +168,7 @@ def encode_image(image_path):
170
  return base64.b64encode(image_file.read()).decode("utf-8")
171
 
172
  def build_message(image_path, prompt):
173
-
174
  content = [
175
  {
176
  "type": "image_url",
@@ -180,14 +178,14 @@ def build_message(image_path, prompt):
180
  },
181
  {"type": "text", 'text': prompt}
182
  ]
183
-
184
-
185
  messages = [
186
  {"role": "system", "content": "You are a helpful assistant."},
187
  {'role': 'user', 'content': content}
188
-
189
  ]
190
-
191
  return messages
192
 
193
 
@@ -216,14 +214,14 @@ async def doc_parser(doc_path, prompt):
216
  for idx, page in enumerate(pages, start=1):
217
  img_path = tmpdir / f"page_{idx}.png"
218
  page.save(img_path, "PNG")
219
-
220
  messages = build_message(img_path, prompt)
221
  queries.append(messages)
222
-
223
  else:
224
  messages = build_message(doc_path, prompt)
225
  queries.append(messages)
226
-
227
  all_pages = []
228
  all_pages_raw = []
229
  for query in queries:
@@ -236,19 +234,19 @@ async def doc_parser(doc_path, prompt):
236
  print(all_pages)
237
  yield "\n---\n".join(all_pages), "\n\n".join(all_pages_raw)
238
 
239
-
240
  def compress_directory_to_zip(directory_path, output_zip_path):
241
  try:
242
  with zipfile.ZipFile(output_zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
243
 
244
-
245
  for root, dirs, files in os.walk(directory_path):
246
  for file in files:
247
-
248
  file_path = os.path.join(root, file)
249
 
250
  arcname = os.path.relpath(file_path, directory_path)
251
-
252
  zipf.write(file_path, arcname)
253
  return 0
254
  except Exception as e:
@@ -268,7 +266,7 @@ def check_prompt(prompt):
268
  return prompt
269
 
270
  def to_file(image_path):
271
-
272
  if image_path.endswith("Academic_Papers.png"):
273
  image_path = image_path.replace("Academic_Papers.png", "Academic_Papers.pdf")
274
 
@@ -307,7 +305,7 @@ if __name__ == '__main__':
307
  with gr.Blocks() as demo:
308
  with gr.Row():
309
  with gr.Column(variant='panel', scale=5):
310
-
311
  file = gr.File(label='Please upload a PDF or image', file_types=['.pdf', '.png', '.jpeg', '.jpg'], type="filepath")
312
  prompts = gr.Dropdown(
313
  choices=preset_prompts,
@@ -325,7 +323,7 @@ if __name__ == '__main__':
325
  pdf_show = PDF(label='Preview', interactive=False, visible=True, height=800)
326
 
327
 
328
-
329
  example_root = os.path.join(os.path.dirname(__file__), 'examples')
330
  images = [
331
  os.path.join(example_root, f)
@@ -339,9 +337,9 @@ if __name__ == '__main__':
339
  file_path = [
340
  os.path.join(example_root, f)
341
  for f in ["Financial_Reports.png", "Books.png", "Magazines.png", "Academic_Papers.png"]
342
-
343
  ]
344
-
345
  with gr.Row():
346
  for i, label in enumerate(["Financial Reports(IMG)", "Books(IMG)", "Magazines(IMG)", "Academic Papers(PDF)"]):
347
  with gr.Column(scale=1, min_width=120):
@@ -353,11 +351,11 @@ if __name__ == '__main__':
353
  show_download_button=False
354
  )
355
  gr.Button(label).click(fn=to_file, inputs=gr.State(file_path[i]), outputs=file)
356
-
357
-
358
  download_btn = gr.Button("⬇️ Generate download link", size="sm")
359
  output_file = gr.File(label='Parse result', interactive=False, elem_id="down-file-box",visible=False)
360
-
361
  gr.HTML("""
362
  <style>
363
  #down-file-box {
@@ -372,9 +370,9 @@ if __name__ == '__main__':
372
  line_breaks=True)
373
  with gr.Tab('Markdown text'):
374
  md_text = gr.TextArea(lines=45, show_copy_button=True)
375
-
376
-
377
-
378
  file.change(fn=process_file, inputs=file, outputs=pdf_show)
379
 
380
 
@@ -391,9 +389,9 @@ if __name__ == '__main__':
391
  inputs=[file, prompts],
392
  outputs=[md, md_text]
393
  )
394
-
395
  clear_bu.add([file, md, pdf_show, md_text])
396
-
397
  download_btn.click(
398
  fn=download_markdown_file,
399
  inputs=md_text,
@@ -405,4 +403,4 @@ if __name__ == '__main__':
405
  )
406
 
407
 
408
- demo.launch(server_name='0.0.0.0',share=True)
 
63
  return response
64
 
65
 
66
+
67
+
68
  async def send_pdf_async_aiohttp(file_path, server_ip, route="/upload", Authorization=None):
69
  """使用aiohttp异步发送PDF"""
 
70
  url = f"{server_ip}{route}"
71
  headers = {}
72
  if Authorization:
 
109
 
110
 
111
  async def request(messages):
112
+
113
  chat_completion_from_base64 = await client.chat.completions.create(
114
  messages=messages,
115
  extra_headers={
 
117
  },
118
  model="Qwen2_5VL",
119
  max_completion_tokens=4096,
 
 
 
120
  stream=True,
121
  temperature=0.0,
122
  top_p=0.95
123
  )
124
+
125
  page = ""
126
  async for chunk in chat_completion_from_base64:
127
  if chunk.choices[0].delta.content:
128
  content = chunk.choices[0].delta.content
129
+
130
  choice = chunk.choices[0]
131
  if choice.finish_reason is not None:
132
  print(f"end reason = {choice.finish_reason}")
133
  break
134
  page += content
135
+
136
  yield content
137
+
138
 
139
  def images_to_pdf(img_paths, pdf_path):
140
 
 
168
  return base64.b64encode(image_file.read()).decode("utf-8")
169
 
170
  def build_message(image_path, prompt):
171
+
172
  content = [
173
  {
174
  "type": "image_url",
 
178
  },
179
  {"type": "text", 'text': prompt}
180
  ]
181
+
182
+
183
  messages = [
184
  {"role": "system", "content": "You are a helpful assistant."},
185
  {'role': 'user', 'content': content}
186
+
187
  ]
188
+
189
  return messages
190
 
191
 
 
214
  for idx, page in enumerate(pages, start=1):
215
  img_path = tmpdir / f"page_{idx}.png"
216
  page.save(img_path, "PNG")
217
+
218
  messages = build_message(img_path, prompt)
219
  queries.append(messages)
220
+
221
  else:
222
  messages = build_message(doc_path, prompt)
223
  queries.append(messages)
224
+
225
  all_pages = []
226
  all_pages_raw = []
227
  for query in queries:
 
234
  print(all_pages)
235
  yield "\n---\n".join(all_pages), "\n\n".join(all_pages_raw)
236
 
237
+
238
  def compress_directory_to_zip(directory_path, output_zip_path):
239
  try:
240
  with zipfile.ZipFile(output_zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
241
 
242
+
243
  for root, dirs, files in os.walk(directory_path):
244
  for file in files:
245
+
246
  file_path = os.path.join(root, file)
247
 
248
  arcname = os.path.relpath(file_path, directory_path)
249
+
250
  zipf.write(file_path, arcname)
251
  return 0
252
  except Exception as e:
 
266
  return prompt
267
 
268
  def to_file(image_path):
269
+
270
  if image_path.endswith("Academic_Papers.png"):
271
  image_path = image_path.replace("Academic_Papers.png", "Academic_Papers.pdf")
272
 
 
305
  with gr.Blocks() as demo:
306
  with gr.Row():
307
  with gr.Column(variant='panel', scale=5):
308
+
309
  file = gr.File(label='Please upload a PDF or image', file_types=['.pdf', '.png', '.jpeg', '.jpg'], type="filepath")
310
  prompts = gr.Dropdown(
311
  choices=preset_prompts,
 
323
  pdf_show = PDF(label='Preview', interactive=False, visible=True, height=800)
324
 
325
 
326
+
327
  example_root = os.path.join(os.path.dirname(__file__), 'examples')
328
  images = [
329
  os.path.join(example_root, f)
 
337
  file_path = [
338
  os.path.join(example_root, f)
339
  for f in ["Financial_Reports.png", "Books.png", "Magazines.png", "Academic_Papers.png"]
340
+
341
  ]
342
+
343
  with gr.Row():
344
  for i, label in enumerate(["Financial Reports(IMG)", "Books(IMG)", "Magazines(IMG)", "Academic Papers(PDF)"]):
345
  with gr.Column(scale=1, min_width=120):
 
351
  show_download_button=False
352
  )
353
  gr.Button(label).click(fn=to_file, inputs=gr.State(file_path[i]), outputs=file)
354
+
355
+
356
  download_btn = gr.Button("⬇️ Generate download link", size="sm")
357
  output_file = gr.File(label='Parse result', interactive=False, elem_id="down-file-box",visible=False)
358
+
359
  gr.HTML("""
360
  <style>
361
  #down-file-box {
 
370
  line_breaks=True)
371
  with gr.Tab('Markdown text'):
372
  md_text = gr.TextArea(lines=45, show_copy_button=True)
373
+
374
+
375
+
376
  file.change(fn=process_file, inputs=file, outputs=pdf_show)
377
 
378
 
 
389
  inputs=[file, prompts],
390
  outputs=[md, md_text]
391
  )
392
+
393
  clear_bu.add([file, md, pdf_show, md_text])
394
+
395
  download_btn.click(
396
  fn=download_markdown_file,
397
  inputs=md_text,
 
403
  )
404
 
405
 
406
+ demo.launch(server_name='0.0.0.0',share=True)