aimal-khan commited on
Commit
8376c49
·
verified ·
1 Parent(s): 4b5fe0e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -36
app.py CHANGED
@@ -4,10 +4,10 @@ import json
4
  import os
5
  import tempfile
6
 
7
- # 1) Initialize DocumentConverter
8
  converter = DocumentConverter()
9
 
10
- # 2) Conversion function
11
  def convert(file, out_format):
12
  if file is None:
13
  return "Please upload a file first."
@@ -18,52 +18,39 @@ def convert(file, out_format):
18
 
19
  # Export based on the selected format
20
  if out_format == "Markdown":
21
- # Save as Markdown file
22
- with tempfile.NamedTemporaryFile(delete=False, suffix=".md") as temp_file:
23
- temp_filename = temp_file.name
24
- with open(temp_filename, 'w') as f:
25
- f.write(doc.document.export_to_markdown())
26
- return temp_filename # Return file for download
27
-
28
  elif out_format == "HTML":
29
- # Save as HTML file
30
- with tempfile.NamedTemporaryFile(delete=False, suffix=".html") as temp_file:
31
- temp_filename = temp_file.name
32
- with open(temp_filename, 'w') as f:
33
- f.write(doc.document.export_to_html())
34
- return temp_filename # Return file for download
35
-
36
  elif out_format == "JSON":
37
- # Save as JSON file (lossless serialization)
38
- doctags = doc.document.save_as_doctags()
39
- with tempfile.NamedTemporaryFile(delete=False, suffix=".json") as temp_file:
40
- temp_filename = temp_file.name
41
- with open(temp_filename, 'w') as f:
42
- json.dump(doctags, f, indent=4)
43
- return temp_filename # Return file for download
44
-
45
  elif out_format == "Text":
46
- # Save as Text file (plain text)
47
- with tempfile.NamedTemporaryFile(delete=False, suffix=".txt") as temp_file:
48
- temp_filename = temp_file.name
49
- with open(temp_filename, 'w') as f:
50
- f.write(doc.document.text)
51
- return temp_filename # Return file for download
52
-
53
  elif out_format == "Doctags":
54
- # Save as Doctags file
55
  with tempfile.NamedTemporaryFile(delete=False, suffix=".doctags") as temp_file:
56
  temp_filename = temp_file.name
 
 
57
  doc.document.save_as_doctags(temp_filename)
58
- return temp_filename # Return file for download
59
 
 
 
 
 
 
 
60
  else:
61
  return "Unsupported output format"
62
 
63
  except Exception as e:
64
  return f"Error converting document: {str(e)}"
65
 
66
- # 3) Build Gradio interface
67
  with gr.Blocks() as demo:
68
  # Top: explanatory text about Docling and a link to the official repository
69
  gr.Markdown(
@@ -102,12 +89,12 @@ with gr.Blocks() as demo:
102
  )
103
  btn = gr.Button("Convert")
104
 
105
- # Output panel (file output for all formats)
106
  out = gr.File(label="📄 Download Output File")
107
 
108
  # Wire it up
109
  btn.click(fn=convert, inputs=[inp, fmt], outputs=out)
110
 
111
- # 4) Launch
112
  if __name__ == "__main__":
113
  demo.launch()
 
4
  import os
5
  import tempfile
6
 
7
+ # Initialize DocumentConverter
8
  converter = DocumentConverter()
9
 
10
+ # Conversion function
11
  def convert(file, out_format):
12
  if file is None:
13
  return "Please upload a file first."
 
18
 
19
  # Export based on the selected format
20
  if out_format == "Markdown":
21
+ # Show Markdown content directly in the output box
22
+ return doc.document.export_to_markdown()
 
 
 
 
 
23
  elif out_format == "HTML":
24
+ # Show HTML content directly in the output box
25
+ return doc.document.export_to_html()
 
 
 
 
 
26
  elif out_format == "JSON":
27
+ # Export to JSON format
28
+ doctags = doc.document.export_to_dict()
29
+ return json.dumps(doctags, indent=4)
 
 
 
 
 
30
  elif out_format == "Text":
31
+ # Extract plain text
32
+ return doc.document.export_to_markdown() # Updated to the correct method
 
 
 
 
 
33
  elif out_format == "Doctags":
34
+ # Create a temporary file to save Doctags and allow download
35
  with tempfile.NamedTemporaryFile(delete=False, suffix=".doctags") as temp_file:
36
  temp_filename = temp_file.name
37
+
38
+ # Save the document as Doctags (JSON-like format)
39
  doc.document.save_as_doctags(temp_filename)
 
40
 
41
+ # Ensure the file is not empty
42
+ if os.path.getsize(temp_filename) > 0:
43
+ # Return the file path for download
44
+ return temp_filename
45
+ else:
46
+ return "Error: The Doctags file is empty."
47
  else:
48
  return "Unsupported output format"
49
 
50
  except Exception as e:
51
  return f"Error converting document: {str(e)}"
52
 
53
+ # Build Gradio interface
54
  with gr.Blocks() as demo:
55
  # Top: explanatory text about Docling and a link to the official repository
56
  gr.Markdown(
 
89
  )
90
  btn = gr.Button("Convert")
91
 
92
+ # Output panel (file output for Doctags)
93
  out = gr.File(label="📄 Download Output File")
94
 
95
  # Wire it up
96
  btn.click(fn=convert, inputs=[inp, fmt], outputs=out)
97
 
98
+ # Launch
99
  if __name__ == "__main__":
100
  demo.launch()