File size: 5,036 Bytes
49c4d8f
 
 
 
 
e45c224
49c4d8f
 
e45c224
49c4d8f
e45c224
49c4d8f
 
 
e45c224
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e83f3b8
 
 
 
e45c224
 
 
49c4d8f
 
 
e45c224
49c4d8f
 
 
e45c224
49c4d8f
e45c224
 
49c4d8f
 
e45c224
49c4d8f
e45c224
49c4d8f
 
 
 
e45c224
 
 
49c4d8f
e45c224
49c4d8f
e45c224
49c4d8f
e45c224
 
49c4d8f
e45c224
49c4d8f
 
e45c224
 
49c4d8f
 
 
e45c224
 
 
 
 
49c4d8f
 
 
 
 
 
 
 
e83f3b8
49c4d8f
 
 
 
 
 
 
 
 
 
 
e45c224
49c4d8f
e45c224
49c4d8f
 
 
 
 
e45c224
49c4d8f
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# app.py
import gradio as gr
import os
import zipfile
import shutil
import subprocess
from pathlib import Path

# --- Configuration ---
# Set up temporary directories
TEMP_DIR = Path("./temp_conversion_data")
TEMP_INPUT_DIR = TEMP_DIR / "input"
TEMP_OUTPUT_DIR = TEMP_DIR / "output"

def setup_conversion_dirs():
    """Cleans up and ensures all required directories exist before a new conversion."""
    try:
        if TEMP_DIR.exists():
            shutil.rmtree(TEMP_DIR)
        
        TEMP_INPUT_DIR.mkdir(parents=True, exist_ok=True)
        TEMP_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
        return True
    except Exception as e:
        print(f"Error setting up directories: {e}")
        return False

def convert_single_file_with_unoconv(input_path, output_dir):
    """
    Converts a single DOC/DOCX to PDF using the unoconv command-line utility.
    """
    filename = Path(input_path).name
    
    try:
        # Command: unoconv -f pdf -o [output_dir] [input_file]
        result = subprocess.run(
            ['unoconv', '-f', 'pdf', '-o', str(output_dir), str(input_path)],
            check=True,  # Raises CalledProcessError on non-zero exit code
            capture_output=True,
            text=True,
            timeout=60 # Timeout for a single conversion (60 seconds)
        )
        print(f"Successfully converted {filename}. Output: {result.stdout}")
        return True
    except subprocess.CalledProcessError as e:
        # Detailed error log for debugging
        print(f"UNOCONV FAILED for {filename}. Stderr: {e.stderr}, Stdout: {e.stdout}")
        return False
    except subprocess.TimeoutExpired:
        print(f"Conversion of {filename} timed out.")
        return False
    except FileNotFoundError:
        # This is the exact error you are seeing: [Errno 2] No such file or directory: 'unoconv'
        print(f"CRITICAL ERROR: 'unoconv' command not found. It is not installed or not in PATH.")
        return False
    except Exception as e:
        print(f"An unexpected error occurred during conversion of {filename}: {e}")
        return False

def convert_docs_to_pdf(doc_files):
    """
    Takes a list of uploaded docx files, converts them to PDF using unoconv, 
    and zips the results for download.
    """
    if not doc_files:
        return gr.update(visible=False), "Please upload one or more .docx or .doc files."

    if not setup_conversion_dirs():
        return gr.update(visible=False), "Error: Could not set up temporary directories."

    success_count = 0
    total_count = len(doc_files)
    
    # 1. Process each uploaded file
    for file_obj in doc_files:
        original_filepath = file_obj.name
        filename = Path(original_filepath).name
        
        # Copy the file to the clean input directory for unoconv
        input_file_copy = TEMP_INPUT_DIR / filename
        shutil.copy(original_filepath, input_file_copy)
            
        if convert_single_file_with_unoconv(input_file_copy, TEMP_OUTPUT_DIR):
            success_count += 1
        
    if success_count == 0:
        # Hide download component if conversion failed
        return gr.update(visible=False), "No files were converted successfully. Check the Space logs for details."

    # 2. Zip the successful results
    zip_filename = TEMP_DIR / "converted_pdfs.zip"
    
    with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
        # Only zip the successfully created PDF files
        for file in TEMP_OUTPUT_DIR.iterdir():
            zipf.write(file, arcname=file.name)
            
    # 3. Return the results
    status = f"✅ Successfully converted {success_count} of {total_count} files and zipped them. Download ready."
    
    # Show download component and return zip path
    return gr.update(value=str(zip_filename), visible=True), status

# --- Gradio Interface Definition ---
with gr.Blocks(title="Multi DOC/DOCX to PDF Converter") as demo:
    gr.Markdown(
        """
        # Multi DOC/DOCX to PDF Converter 📄➡️📜
        Upload multiple Microsoft Word files (.doc or .docx) and get them all converted to PDF in a single downloadable ZIP file.
        
        **Note**: The system needs LibreOffice and `unoconv` installed to function.
        """
    )

    with gr.Row():
        file_input = gr.File(
            file_count="multiple",
            label="Upload Word Files (.docx or .doc)",
            file_types=[".doc", ".docx"]
        )
        
        with gr.Column():
            convert_button = gr.Button("Convert to PDF", variant="primary")
            status_message = gr.Textbox(label="Status", value="Upload your files and click Convert.", interactive=False)
            download_zip = gr.File(label="Download Converted PDFs (ZIP)", visible=False)
            
    # Connect the button click to the conversion function
    convert_button.click(
        fn=convert_docs_to_pdf,
        inputs=[file_input],
        outputs=[download_zip, status_message]
    )

if __name__ == "__main__":
    demo.launch()