nazib61 commited on
Commit
0c3692a
·
verified ·
1 Parent(s): b50c33c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -32
app.py CHANGED
@@ -1,26 +1,34 @@
1
  import gradio as gr
2
  import subprocess
3
  import os
4
- import shutil
 
5
 
6
- def convert_docx_to_pdf(docx_file):
7
- if docx_file is None:
8
- return None
9
 
10
- # Get the file path from the Gradio temp file object
11
- input_path = docx_file.name
12
- filename = os.path.basename(input_path)
13
- output_dir = "output_pdfs"
14
-
15
- # Create output directory if it doesn't exist
16
  if not os.path.exists(output_dir):
17
  os.makedirs(output_dir)
18
-
19
  try:
20
- # Run LibreOffice headless conversion command
21
- # --headless: Runs without a GUI
22
- # --convert-to pdf: Specifies target format
23
- # --outdir: Specifies where the result should go
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  subprocess.run([
25
  'libreoffice',
26
  '--headless',
@@ -28,28 +36,54 @@ def convert_docx_to_pdf(docx_file):
28
  '--outdir', output_dir,
29
  input_path
30
  ], check=True)
31
-
32
- # Determine the name of the generated PDF
 
 
33
  pdf_filename = os.path.splitext(filename)[0] + ".pdf"
34
  pdf_path = os.path.join(output_dir, pdf_filename)
35
-
36
  if os.path.exists(pdf_path):
37
- return pdf_path
38
- else:
39
- return "Error: PDF was not generated."
 
40
 
 
 
 
 
 
 
 
 
41
  except Exception as e:
42
- return f"An error occurred: {str(e)}"
43
-
44
- # Define Gradio Interface
45
- demo = gr.Interface(
46
- fn=convert_docx_to_pdf,
47
- inputs=gr.File(label="Upload DOCX File", file_types=[".docx"]),
48
- outputs=gr.File(label="Download PDF"),
49
- title="DOCX to PDF Converter",
50
- description="Upload a .docx file and convert it to .pdf using LibreOffice Headless. High accuracy conversion for Hugging Face Spaces.",
51
- allow_flagging="never"
52
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
  if __name__ == "__main__":
55
  demo.launch()
 
1
  import gradio as gr
2
  import subprocess
3
  import os
4
+ import base64
5
+ import uuid
6
 
7
+ def process_conversion(file_input, base64_input):
8
+ temp_docx = f"input_{uuid.uuid4().hex}.docx"
9
+ output_dir = "outputs"
10
 
 
 
 
 
 
 
11
  if not os.path.exists(output_dir):
12
  os.makedirs(output_dir)
13
+
14
  try:
15
+ # 1. Determine Input Source
16
+ if file_input is not None:
17
+ # User uploaded a file
18
+ input_path = file_input.name
19
+ elif base64_input and base64_input.strip():
20
+ # User provided a base64 string
21
+ # Clean base64 string (remove headers if present, e.g., "data:application/..." )
22
+ if "," in base64_input:
23
+ base64_input = base64_input.split(",")[1]
24
+
25
+ with open(temp_docx, "wb") as f:
26
+ f.write(base64.b64decode(base64_input))
27
+ input_path = temp_docx
28
+ else:
29
+ return None, "Error: Please provide either a file or a Base64 string."
30
+
31
+ # 2. Run LibreOffice Conversion
32
  subprocess.run([
33
  'libreoffice',
34
  '--headless',
 
36
  '--outdir', output_dir,
37
  input_path
38
  ], check=True)
39
+
40
+ # 3. Locate the Output PDF
41
+ # LibreOffice names the output the same as input but with .pdf
42
+ filename = os.path.basename(input_path)
43
  pdf_filename = os.path.splitext(filename)[0] + ".pdf"
44
  pdf_path = os.path.join(output_dir, pdf_filename)
45
+
46
  if os.path.exists(pdf_path):
47
+ # Read PDF to return as Base64 string
48
+ with open(pdf_path, "rb") as f:
49
+ pdf_bytes = f.read()
50
+ pdf_base64 = base64.b64encode(pdf_bytes).decode('utf-8')
51
 
52
+ # Clean up temp docx if we created one
53
+ if os.path.exists(temp_docx):
54
+ os.remove(temp_docx)
55
+
56
+ return pdf_path, pdf_base64
57
+ else:
58
+ return None, "Error: Conversion failed."
59
+
60
  except Exception as e:
61
+ return None, f"Error: {str(e)}"
62
+
63
+ # Custom CSS for a better look
64
+ css = """
65
+ #container { max-width: 800px; margin: auto; }
66
+ """
67
+
68
+ with gr.Blocks(css=css) as demo:
69
+ gr.Markdown("# 📄 DOCX to PDF Converter")
70
+ gr.Markdown("Upload a Word file **OR** paste a Base64 encoded string of the binary DOCX.")
71
+
72
+ with gr.Row():
73
+ with gr.Column():
74
+ file_in = gr.File(label="Upload .docx", file_types=[".docx"])
75
+ text_in = gr.Textbox(label="Paste Base64 DOCX", placeholder="Base64 data here...", lines=5)
76
+ submit_btn = gr.Button("Convert to PDF", variant="primary")
77
+
78
+ with gr.Column():
79
+ file_out = gr.File(label="Download PDF")
80
+ text_out = gr.Textbox(label="Result Base64 PDF", show_copy_button=True, lines=5)
81
+
82
+ submit_btn.click(
83
+ fn=process_conversion,
84
+ inputs=[file_in, text_in],
85
+ outputs=[file_out, text_out]
86
+ )
87
 
88
  if __name__ == "__main__":
89
  demo.launch()