aimal-khan commited on
Commit
d23abdf
·
verified ·
1 Parent(s): 8376c49

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -23
app.py CHANGED
@@ -4,10 +4,10 @@ import json
4
  import os
5
  import tempfile
6
 
7
- # Initialize DocumentConverter
8
  converter = DocumentConverter()
9
 
10
- # Conversion function
11
  def convert(file, out_format):
12
  if file is None:
13
  return "Please upload a file first."
@@ -18,39 +18,52 @@ def convert(file, out_format):
18
 
19
  # Export based on the selected format
20
  if out_format == "Markdown":
21
- # Show Markdown content directly in the output box
22
- return doc.document.export_to_markdown()
 
 
 
 
 
23
  elif out_format == "HTML":
24
- # Show HTML content directly in the output box
25
- return doc.document.export_to_html()
 
 
 
 
 
26
  elif out_format == "JSON":
27
- # Export to JSON format
28
- doctags = doc.document.export_to_dict()
29
- return json.dumps(doctags, indent=4)
 
 
 
 
 
30
  elif out_format == "Text":
31
- # Extract plain text
32
- return doc.document.export_to_markdown() # Updated to the correct method
 
 
 
 
 
33
  elif out_format == "Doctags":
34
- # Create a temporary file to save Doctags and allow download
35
  with tempfile.NamedTemporaryFile(delete=False, suffix=".doctags") as temp_file:
36
  temp_filename = temp_file.name
37
-
38
- # Save the document as Doctags (JSON-like format)
39
  doc.document.save_as_doctags(temp_filename)
 
40
 
41
- # Ensure the file is not empty
42
- if os.path.getsize(temp_filename) > 0:
43
- # Return the file path for download
44
- return temp_filename
45
- else:
46
- return "Error: The Doctags file is empty."
47
  else:
48
  return "Unsupported output format"
49
 
50
  except Exception as e:
51
  return f"Error converting document: {str(e)}"
52
 
53
- # Build Gradio interface
54
  with gr.Blocks() as demo:
55
  # Top: explanatory text about Docling and a link to the official repository
56
  gr.Markdown(
@@ -89,12 +102,12 @@ with gr.Blocks() as demo:
89
  )
90
  btn = gr.Button("Convert")
91
 
92
- # Output panel (file output for Doctags)
93
  out = gr.File(label="📄 Download Output File")
94
 
95
  # Wire it up
96
  btn.click(fn=convert, inputs=[inp, fmt], outputs=out)
97
 
98
- # Launch
99
  if __name__ == "__main__":
100
  demo.launch()
 
4
  import os
5
  import tempfile
6
 
7
+ # 1) Initialize DocumentConverter
8
  converter = DocumentConverter()
9
 
10
+ # 2) Conversion function
11
  def convert(file, out_format):
12
  if file is None:
13
  return "Please upload a file first."
 
18
 
19
  # Export based on the selected format
20
  if out_format == "Markdown":
21
+ # Save as Markdown file
22
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".md") as temp_file:
23
+ temp_filename = temp_file.name
24
+ with open(temp_filename, 'w') as f:
25
+ f.write(doc.document.export_to_markdown())
26
+ return temp_filename # Return file for download
27
+
28
  elif out_format == "HTML":
29
+ # Save as HTML file
30
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".html") as temp_file:
31
+ temp_filename = temp_file.name
32
+ with open(temp_filename, 'w') as f:
33
+ f.write(doc.document.export_to_html())
34
+ return temp_filename # Return file for download
35
+
36
  elif out_format == "JSON":
37
+ # Save as JSON file (lossless serialization)
38
+ doctags = doc.document.export_to_dict() # Correct method for JSON export
39
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".json") as temp_file:
40
+ temp_filename = temp_file.name
41
+ with open(temp_filename, 'w') as f:
42
+ json.dump(doctags, f, indent=4)
43
+ return temp_filename # Return file for download
44
+
45
  elif out_format == "Text":
46
+ # Save as Text file (plain text)
47
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".txt") as temp_file:
48
+ temp_filename = temp_file.name
49
+ with open(temp_filename, 'w') as f:
50
+ f.write(doc.document.export_to_text()) # Correct method for plain text extraction
51
+ return temp_filename # Return file for download
52
+
53
  elif out_format == "Doctags":
54
+ # Save as Doctags file
55
  with tempfile.NamedTemporaryFile(delete=False, suffix=".doctags") as temp_file:
56
  temp_filename = temp_file.name
 
 
57
  doc.document.save_as_doctags(temp_filename)
58
+ return temp_filename # Return file for download
59
 
 
 
 
 
 
 
60
  else:
61
  return "Unsupported output format"
62
 
63
  except Exception as e:
64
  return f"Error converting document: {str(e)}"
65
 
66
+ # 3) Build Gradio interface
67
  with gr.Blocks() as demo:
68
  # Top: explanatory text about Docling and a link to the official repository
69
  gr.Markdown(
 
102
  )
103
  btn = gr.Button("Convert")
104
 
105
+ # Output panel (file output for all formats)
106
  out = gr.File(label="📄 Download Output File")
107
 
108
  # Wire it up
109
  btn.click(fn=convert, inputs=[inp, fmt], outputs=out)
110
 
111
+ # 4) Launch
112
  if __name__ == "__main__":
113
  demo.launch()