aimal-khan commited on
Commit
916651f
·
verified ·
1 Parent(s): d13b416

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -2
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import gradio as gr
2
  from docling.document_converter import DocumentConverter
3
  import json
 
4
 
5
  # 1) Initialize DocumentConverter
6
  converter = DocumentConverter()
@@ -20,8 +21,20 @@ def convert(file, out_format):
20
  elif out_format == "HTML":
21
  return doc.document.export_to_html()
22
  elif out_format == "DocTags":
23
- # Export as DocTags and convert to JSON
24
- doctags = doc.document.save_as_doctags() # This is the JSON-like format
 
 
 
 
 
 
 
 
 
 
 
 
25
  return json.dumps(doctags, indent=4)
26
  else:
27
  return "Unsupported output format"
 
1
  import gradio as gr
2
  from docling.document_converter import DocumentConverter
3
  import json
4
+ import os
5
 
6
  # 1) Initialize DocumentConverter
7
  converter = DocumentConverter()
 
21
  elif out_format == "HTML":
22
  return doc.document.export_to_html()
23
  elif out_format == "DocTags":
24
+ # Define a temporary file path
25
+ temp_filename = "converted_document.doctags"
26
+
27
+ # Save the document as DocTags (JSON-like format)
28
+ doc.document.save_as_doctags(temp_filename)
29
+
30
+ # Read the saved DocTags file back into memory
31
+ with open(temp_filename, 'r') as file:
32
+ doctags = json.load(file)
33
+
34
+ # Clean up the temporary file (optional, to avoid clutter)
35
+ os.remove(temp_filename)
36
+
37
+ # Return the content as a formatted JSON string
38
  return json.dumps(doctags, indent=4)
39
  else:
40
  return "Unsupported output format"