File size: 2,872 Bytes
0f9b225
 
 
0c3692a
 
0f9b225
aec0998
 
2964053
 
 
0f9b225
 
 
0c3692a
0f9b225
aec0998
 
 
0c3692a
aec0998
 
 
 
 
 
 
 
 
2964053
 
aec0998
2964053
0c3692a
2964053
 
 
aec0998
 
0f9b225
0c3692a
0f9b225
0c3692a
aec0998
 
 
 
 
 
 
 
d8f1a51
2964053
aec0998
0c3692a
0f9b225
2964053
0c3692a
aec0998
9355d4d
aec0998
ef75d31
2964053
aec0998
 
 
 
 
 
 
 
 
 
 
ef75d31
 
aec0998
 
 
 
 
 
0f9b225
aec0998
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import gradio as gr
import subprocess
import os
import base64
import uuid

def process_conversion(base64_input):
    # Setup paths in /tmp for Hugging Face environment
    unique_id = uuid.uuid4().hex
    temp_docx = f"/tmp/input_{unique_id}.docx"
    output_dir = "/tmp/outputs"
    
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    try:
        # 1. Handle Base64 Input
        if not base64_input or not base64_input.strip():
            return None, "Error: No Base64 data provided."

        # Clean base64 string (remove data header if present)
        if "," in base64_input:
            base64_input = base64_input.split(",")[1]
        
        # Write the binary data to a temporary .docx file
        with open(temp_docx, "wb") as f:
            f.write(base64.b64decode(base64_input))
        
        # 2. Run LibreOffice Conversion
        result = subprocess.run([
            'libreoffice', '--headless', '--convert-to', 'pdf', 
            '--outdir', output_dir, temp_docx
        ], capture_output=True, text=True, timeout=30)

        if result.returncode != 0:
            return None, f"LibreOffice Error: {result.stderr}"

        # 3. Locate and Read the Resulting PDF
        pdf_filename = f"input_{unique_id}.pdf"
        pdf_path = os.path.join(output_dir, pdf_filename)

        if os.path.exists(pdf_path):
            with open(pdf_path, "rb") as f:
                pdf_bytes = f.read()
                pdf_base64 = base64.b64encode(pdf_bytes).decode('utf-8')
            
            # Cleanup the temporary docx file
            if os.path.exists(temp_docx):
                os.remove(temp_docx)
                
            # Return File object for download and the Base64 string for API use
            return gr.FileData(path=pdf_path, mime_type="application/pdf"), pdf_base64
        
        return None, "Error: Conversion failed - PDF not generated."

    except Exception as e:
        return None, f"System Error: {str(e)}"

# UI Layout
with gr.Blocks() as demo:
    gr.Markdown("# 📄 DOCX to PDF (Base64 Only)")
    gr.Markdown("Paste the Base64 string of a binary `.docx` file to convert it to `.pdf`.")
    
    with gr.Row():
        with gr.Column():
            text_in = gr.Textbox(
                label="Paste Base64 DOCX", 
                placeholder="Input base64 string here...", 
                lines=8
            )
            submit_btn = gr.Button("Convert to PDF", variant="primary")
            
        with gr.Column():
            file_out = gr.File(label="Download PDF File")
            # Removed 'show_copy_button' to fix the TypeError
            text_out = gr.Textbox(label="Result Base64 PDF", lines=8)

    submit_btn.click(
        fn=process_conversion,
        inputs=[text_in],
        outputs=[file_out, text_out]
    )

if __name__ == "__main__":
    demo.launch()