File size: 5,036 Bytes
49c4d8f e45c224 49c4d8f e45c224 49c4d8f e45c224 49c4d8f e45c224 e83f3b8 e45c224 49c4d8f e45c224 49c4d8f e45c224 49c4d8f e45c224 49c4d8f e45c224 49c4d8f e45c224 49c4d8f e45c224 49c4d8f e45c224 49c4d8f e45c224 49c4d8f e45c224 49c4d8f e45c224 49c4d8f e45c224 49c4d8f e45c224 49c4d8f e83f3b8 49c4d8f e45c224 49c4d8f e45c224 49c4d8f e45c224 49c4d8f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 | # app.py
import gradio as gr
import os
import zipfile
import shutil
import subprocess
from pathlib import Path
# --- Configuration ---
# Set up temporary directories
TEMP_DIR = Path("./temp_conversion_data")
TEMP_INPUT_DIR = TEMP_DIR / "input"
TEMP_OUTPUT_DIR = TEMP_DIR / "output"
def setup_conversion_dirs():
"""Cleans up and ensures all required directories exist before a new conversion."""
try:
if TEMP_DIR.exists():
shutil.rmtree(TEMP_DIR)
TEMP_INPUT_DIR.mkdir(parents=True, exist_ok=True)
TEMP_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
return True
except Exception as e:
print(f"Error setting up directories: {e}")
return False
def convert_single_file_with_unoconv(input_path, output_dir):
"""
Converts a single DOC/DOCX to PDF using the unoconv command-line utility.
"""
filename = Path(input_path).name
try:
# Command: unoconv -f pdf -o [output_dir] [input_file]
result = subprocess.run(
['unoconv', '-f', 'pdf', '-o', str(output_dir), str(input_path)],
check=True, # Raises CalledProcessError on non-zero exit code
capture_output=True,
text=True,
timeout=60 # Timeout for a single conversion (60 seconds)
)
print(f"Successfully converted {filename}. Output: {result.stdout}")
return True
except subprocess.CalledProcessError as e:
# Detailed error log for debugging
print(f"UNOCONV FAILED for {filename}. Stderr: {e.stderr}, Stdout: {e.stdout}")
return False
except subprocess.TimeoutExpired:
print(f"Conversion of {filename} timed out.")
return False
except FileNotFoundError:
# This is the exact error you are seeing: [Errno 2] No such file or directory: 'unoconv'
print(f"CRITICAL ERROR: 'unoconv' command not found. It is not installed or not in PATH.")
return False
except Exception as e:
print(f"An unexpected error occurred during conversion of {filename}: {e}")
return False
def convert_docs_to_pdf(doc_files):
"""
Takes a list of uploaded docx files, converts them to PDF using unoconv,
and zips the results for download.
"""
if not doc_files:
return gr.update(visible=False), "Please upload one or more .docx or .doc files."
if not setup_conversion_dirs():
return gr.update(visible=False), "Error: Could not set up temporary directories."
success_count = 0
total_count = len(doc_files)
# 1. Process each uploaded file
for file_obj in doc_files:
original_filepath = file_obj.name
filename = Path(original_filepath).name
# Copy the file to the clean input directory for unoconv
input_file_copy = TEMP_INPUT_DIR / filename
shutil.copy(original_filepath, input_file_copy)
if convert_single_file_with_unoconv(input_file_copy, TEMP_OUTPUT_DIR):
success_count += 1
if success_count == 0:
# Hide download component if conversion failed
return gr.update(visible=False), "No files were converted successfully. Check the Space logs for details."
# 2. Zip the successful results
zip_filename = TEMP_DIR / "converted_pdfs.zip"
with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
# Only zip the successfully created PDF files
for file in TEMP_OUTPUT_DIR.iterdir():
zipf.write(file, arcname=file.name)
# 3. Return the results
status = f"✅ Successfully converted {success_count} of {total_count} files and zipped them. Download ready."
# Show download component and return zip path
return gr.update(value=str(zip_filename), visible=True), status
# --- Gradio Interface Definition ---
with gr.Blocks(title="Multi DOC/DOCX to PDF Converter") as demo:
gr.Markdown(
"""
# Multi DOC/DOCX to PDF Converter 📄➡️📜
Upload multiple Microsoft Word files (.doc or .docx) and get them all converted to PDF in a single downloadable ZIP file.
**Note**: The system needs LibreOffice and `unoconv` installed to function.
"""
)
with gr.Row():
file_input = gr.File(
file_count="multiple",
label="Upload Word Files (.docx or .doc)",
file_types=[".doc", ".docx"]
)
with gr.Column():
convert_button = gr.Button("Convert to PDF", variant="primary")
status_message = gr.Textbox(label="Status", value="Upload your files and click Convert.", interactive=False)
download_zip = gr.File(label="Download Converted PDFs (ZIP)", visible=False)
# Connect the button click to the conversion function
convert_button.click(
fn=convert_docs_to_pdf,
inputs=[file_input],
outputs=[download_zip, status_message]
)
if __name__ == "__main__":
demo.launch() |