Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
from docling.document_converter import DocumentConverter
|
| 3 |
import json
|
|
|
|
| 4 |
|
| 5 |
# 1) Initialize DocumentConverter
|
| 6 |
converter = DocumentConverter()
|
|
@@ -20,8 +21,20 @@ def convert(file, out_format):
|
|
| 20 |
elif out_format == "HTML":
|
| 21 |
return doc.document.export_to_html()
|
| 22 |
elif out_format == "DocTags":
|
| 23 |
-
#
|
| 24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
return json.dumps(doctags, indent=4)
|
| 26 |
else:
|
| 27 |
return "Unsupported output format"
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
from docling.document_converter import DocumentConverter
|
| 3 |
import json
|
| 4 |
+
import os
|
| 5 |
|
| 6 |
# 1) Initialize DocumentConverter
|
| 7 |
converter = DocumentConverter()
|
|
|
|
| 21 |
elif out_format == "HTML":
|
| 22 |
return doc.document.export_to_html()
|
| 23 |
elif out_format == "DocTags":
|
| 24 |
+
# Define a temporary file path
|
| 25 |
+
temp_filename = "converted_document.doctags"
|
| 26 |
+
|
| 27 |
+
# Save the document as DocTags (JSON-like format)
|
| 28 |
+
doc.document.save_as_doctags(temp_filename)
|
| 29 |
+
|
| 30 |
+
# Read the saved DocTags file back into memory
|
| 31 |
+
with open(temp_filename, 'r') as file:
|
| 32 |
+
doctags = json.load(file)
|
| 33 |
+
|
| 34 |
+
# Clean up the temporary file (optional, to avoid clutter)
|
| 35 |
+
os.remove(temp_filename)
|
| 36 |
+
|
| 37 |
+
# Return the content as a formatted JSON string
|
| 38 |
return json.dumps(doctags, indent=4)
|
| 39 |
else:
|
| 40 |
return "Unsupported output format"
|