Update app.py
Browse files
app.py
CHANGED
|
@@ -29,7 +29,7 @@ from io import BytesIO
|
|
| 29 |
from pdf2image import convert_from_bytes, convert_from_path # pip install pdf2image
|
| 30 |
|
| 31 |
import requests
|
| 32 |
-
from utils import convert_json_to_markdown
|
| 33 |
|
| 34 |
def setup_poppler_linux():
|
| 35 |
poppler_dir = "/tmp/poppler"
|
|
@@ -255,11 +255,18 @@ async def doc_parser(doc_path, prompt, model_id):
|
|
| 255 |
async for chunk in request(query, model_name, client, Authorization):
|
| 256 |
pages += chunk
|
| 257 |
yield extract_makrdown(pages), pages
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 263 |
yield "\n---\n".join(all_pages), "\n\n".join(all_pages_raw)
|
| 264 |
|
| 265 |
|
|
|
|
| 29 |
from pdf2image import convert_from_bytes, convert_from_path # pip install pdf2image
|
| 30 |
|
| 31 |
import requests
|
| 32 |
+
from utils import convert_json_to_markdown, extract_json_content
|
| 33 |
|
| 34 |
def setup_poppler_linux():
|
| 35 |
poppler_dir = "/tmp/poppler"
|
|
|
|
| 255 |
async for chunk in request(query, model_name, client, Authorization):
|
| 256 |
pages += chunk
|
| 257 |
yield extract_makrdown(pages), pages
|
| 258 |
+
|
| 259 |
+
try:
|
| 260 |
+
json_pages = extract_json_content(pages)
|
| 261 |
+
json_pages = json.dumps(json.loads(json_pages), indent==4, ensure_ascii=False)
|
| 262 |
+
except Exception as e:
|
| 263 |
+
json_pages = pages
|
| 264 |
+
all_pages.append(extract_makrdown(json_pages))
|
| 265 |
+
try:
|
| 266 |
+
markdown_pages = convert_json_to_markdown(pages)
|
| 267 |
+
except Exception as e:
|
| 268 |
+
markdown_pages = pages
|
| 269 |
+
all_pages_raw.append(markdown_pages)
|
| 270 |
yield "\n---\n".join(all_pages), "\n\n".join(all_pages_raw)
|
| 271 |
|
| 272 |
|