doc2pdf / app.py
nazib61's picture
Update app.py
ef75d31 verified
import gradio as gr
import subprocess
import os
import base64
import uuid
def process_conversion(base64_input):
# Setup paths in /tmp for Hugging Face environment
unique_id = uuid.uuid4().hex
temp_docx = f"/tmp/input_{unique_id}.docx"
output_dir = "/tmp/outputs"
if not os.path.exists(output_dir):
os.makedirs(output_dir)
try:
# 1. Handle Base64 Input
if not base64_input or not base64_input.strip():
return None, "Error: No Base64 data provided."
# Clean base64 string (remove data header if present)
if "," in base64_input:
base64_input = base64_input.split(",")[1]
# Write the binary data to a temporary .docx file
with open(temp_docx, "wb") as f:
f.write(base64.b64decode(base64_input))
# 2. Run LibreOffice Conversion
result = subprocess.run([
'libreoffice', '--headless', '--convert-to', 'pdf',
'--outdir', output_dir, temp_docx
], capture_output=True, text=True, timeout=30)
if result.returncode != 0:
return None, f"LibreOffice Error: {result.stderr}"
# 3. Locate and Read the Resulting PDF
pdf_filename = f"input_{unique_id}.pdf"
pdf_path = os.path.join(output_dir, pdf_filename)
if os.path.exists(pdf_path):
with open(pdf_path, "rb") as f:
pdf_bytes = f.read()
pdf_base64 = base64.b64encode(pdf_bytes).decode('utf-8')
# Cleanup the temporary docx file
if os.path.exists(temp_docx):
os.remove(temp_docx)
# Return File object for download and the Base64 string for API use
return gr.FileData(path=pdf_path, mime_type="application/pdf"), pdf_base64
return None, "Error: Conversion failed - PDF not generated."
except Exception as e:
return None, f"System Error: {str(e)}"
# UI Layout
with gr.Blocks() as demo:
gr.Markdown("# 📄 DOCX to PDF (Base64 Only)")
gr.Markdown("Paste the Base64 string of a binary `.docx` file to convert it to `.pdf`.")
with gr.Row():
with gr.Column():
text_in = gr.Textbox(
label="Paste Base64 DOCX",
placeholder="Input base64 string here...",
lines=8
)
submit_btn = gr.Button("Convert to PDF", variant="primary")
with gr.Column():
file_out = gr.File(label="Download PDF File")
# Removed 'show_copy_button' to fix the TypeError
text_out = gr.Textbox(label="Result Base64 PDF", lines=8)
submit_btn.click(
fn=process_conversion,
inputs=[text_in],
outputs=[file_out, text_out]
)
if __name__ == "__main__":
demo.launch()