File size: 2,872 Bytes
0f9b225 0c3692a 0f9b225 aec0998 2964053 0f9b225 0c3692a 0f9b225 aec0998 0c3692a aec0998 2964053 aec0998 2964053 0c3692a 2964053 aec0998 0f9b225 0c3692a 0f9b225 0c3692a aec0998 d8f1a51 2964053 aec0998 0c3692a 0f9b225 2964053 0c3692a aec0998 9355d4d aec0998 ef75d31 2964053 aec0998 ef75d31 aec0998 0f9b225 aec0998 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 | import gradio as gr
import subprocess
import os
import base64
import uuid
def process_conversion(base64_input):
# Setup paths in /tmp for Hugging Face environment
unique_id = uuid.uuid4().hex
temp_docx = f"/tmp/input_{unique_id}.docx"
output_dir = "/tmp/outputs"
if not os.path.exists(output_dir):
os.makedirs(output_dir)
try:
# 1. Handle Base64 Input
if not base64_input or not base64_input.strip():
return None, "Error: No Base64 data provided."
# Clean base64 string (remove data header if present)
if "," in base64_input:
base64_input = base64_input.split(",")[1]
# Write the binary data to a temporary .docx file
with open(temp_docx, "wb") as f:
f.write(base64.b64decode(base64_input))
# 2. Run LibreOffice Conversion
result = subprocess.run([
'libreoffice', '--headless', '--convert-to', 'pdf',
'--outdir', output_dir, temp_docx
], capture_output=True, text=True, timeout=30)
if result.returncode != 0:
return None, f"LibreOffice Error: {result.stderr}"
# 3. Locate and Read the Resulting PDF
pdf_filename = f"input_{unique_id}.pdf"
pdf_path = os.path.join(output_dir, pdf_filename)
if os.path.exists(pdf_path):
with open(pdf_path, "rb") as f:
pdf_bytes = f.read()
pdf_base64 = base64.b64encode(pdf_bytes).decode('utf-8')
# Cleanup the temporary docx file
if os.path.exists(temp_docx):
os.remove(temp_docx)
# Return File object for download and the Base64 string for API use
return gr.FileData(path=pdf_path, mime_type="application/pdf"), pdf_base64
return None, "Error: Conversion failed - PDF not generated."
except Exception as e:
return None, f"System Error: {str(e)}"
# UI Layout
with gr.Blocks() as demo:
gr.Markdown("# 📄 DOCX to PDF (Base64 Only)")
gr.Markdown("Paste the Base64 string of a binary `.docx` file to convert it to `.pdf`.")
with gr.Row():
with gr.Column():
text_in = gr.Textbox(
label="Paste Base64 DOCX",
placeholder="Input base64 string here...",
lines=8
)
submit_btn = gr.Button("Convert to PDF", variant="primary")
with gr.Column():
file_out = gr.File(label="Download PDF File")
# Removed 'show_copy_button' to fix the TypeError
text_out = gr.Textbox(label="Result Base64 PDF", lines=8)
submit_btn.click(
fn=process_conversion,
inputs=[text_in],
outputs=[file_out, text_out]
)
if __name__ == "__main__":
demo.launch() |