GiantPandas commited on
Commit
a88f6a2
·
verified ·
1 Parent(s): 53ef3cd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -6
app.py CHANGED
@@ -29,7 +29,7 @@ from io import BytesIO
29
  from pdf2image import convert_from_bytes, convert_from_path # pip install pdf2image
30
 
31
  import requests
32
- from utils import convert_json_to_markdown
33
 
34
  def setup_poppler_linux():
35
  poppler_dir = "/tmp/poppler"
@@ -255,11 +255,18 @@ async def doc_parser(doc_path, prompt, model_id):
255
  async for chunk in request(query, model_name, client, Authorization):
256
  pages += chunk
257
  yield extract_makrdown(pages), pages
258
- all_pages.append(extract_makrdown(pages))
259
-
260
- pages = convert_json_to_markdown(pages)
261
- all_pages_raw.append(pages)
262
- print(all_pages)
 
 
 
 
 
 
 
263
  yield "\n---\n".join(all_pages), "\n\n".join(all_pages_raw)
264
 
265
 
 
29
  from pdf2image import convert_from_bytes, convert_from_path # pip install pdf2image
30
 
31
  import requests
32
+ from utils import convert_json_to_markdown, extract_json_content
33
 
34
  def setup_poppler_linux():
35
  poppler_dir = "/tmp/poppler"
 
255
  async for chunk in request(query, model_name, client, Authorization):
256
  pages += chunk
257
  yield extract_makrdown(pages), pages
258
+
259
+ try:
260
+ json_pages = extract_json_content(pages)
261
+ json_pages = json.dumps(json.loads(json_pages), indent==4, ensure_ascii=False)
262
+ except Exception as e:
263
+ json_pages = pages
264
+ all_pages.append(extract_makrdown(json_pages))
265
+ try:
266
+ markdown_pages = convert_json_to_markdown(pages)
267
+ except Exception as e:
268
+ markdown_pages = pages
269
+ all_pages_raw.append(markdown_pages)
270
  yield "\n---\n".join(all_pages), "\n\n".join(all_pages_raw)
271
 
272