doc2pdf / app.py
mobenta's picture
Update app.py
e83f3b8 verified
# app.py
import gradio as gr
import os
import zipfile
import shutil
import subprocess
from pathlib import Path
# --- Configuration ---
# Set up temporary directories
TEMP_DIR = Path("./temp_conversion_data")
TEMP_INPUT_DIR = TEMP_DIR / "input"
TEMP_OUTPUT_DIR = TEMP_DIR / "output"
def setup_conversion_dirs():
"""Cleans up and ensures all required directories exist before a new conversion."""
try:
if TEMP_DIR.exists():
shutil.rmtree(TEMP_DIR)
TEMP_INPUT_DIR.mkdir(parents=True, exist_ok=True)
TEMP_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
return True
except Exception as e:
print(f"Error setting up directories: {e}")
return False
def convert_single_file_with_unoconv(input_path, output_dir):
"""
Converts a single DOC/DOCX to PDF using the unoconv command-line utility.
"""
filename = Path(input_path).name
try:
# Command: unoconv -f pdf -o [output_dir] [input_file]
result = subprocess.run(
['unoconv', '-f', 'pdf', '-o', str(output_dir), str(input_path)],
check=True, # Raises CalledProcessError on non-zero exit code
capture_output=True,
text=True,
timeout=60 # Timeout for a single conversion (60 seconds)
)
print(f"Successfully converted {filename}. Output: {result.stdout}")
return True
except subprocess.CalledProcessError as e:
# Detailed error log for debugging
print(f"UNOCONV FAILED for {filename}. Stderr: {e.stderr}, Stdout: {e.stdout}")
return False
except subprocess.TimeoutExpired:
print(f"Conversion of {filename} timed out.")
return False
except FileNotFoundError:
# This is the exact error you are seeing: [Errno 2] No such file or directory: 'unoconv'
print(f"CRITICAL ERROR: 'unoconv' command not found. It is not installed or not in PATH.")
return False
except Exception as e:
print(f"An unexpected error occurred during conversion of {filename}: {e}")
return False
def convert_docs_to_pdf(doc_files):
"""
Takes a list of uploaded docx files, converts them to PDF using unoconv,
and zips the results for download.
"""
if not doc_files:
return gr.update(visible=False), "Please upload one or more .docx or .doc files."
if not setup_conversion_dirs():
return gr.update(visible=False), "Error: Could not set up temporary directories."
success_count = 0
total_count = len(doc_files)
# 1. Process each uploaded file
for file_obj in doc_files:
original_filepath = file_obj.name
filename = Path(original_filepath).name
# Copy the file to the clean input directory for unoconv
input_file_copy = TEMP_INPUT_DIR / filename
shutil.copy(original_filepath, input_file_copy)
if convert_single_file_with_unoconv(input_file_copy, TEMP_OUTPUT_DIR):
success_count += 1
if success_count == 0:
# Hide download component if conversion failed
return gr.update(visible=False), "No files were converted successfully. Check the Space logs for details."
# 2. Zip the successful results
zip_filename = TEMP_DIR / "converted_pdfs.zip"
with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
# Only zip the successfully created PDF files
for file in TEMP_OUTPUT_DIR.iterdir():
zipf.write(file, arcname=file.name)
# 3. Return the results
status = f"✅ Successfully converted {success_count} of {total_count} files and zipped them. Download ready."
# Show download component and return zip path
return gr.update(value=str(zip_filename), visible=True), status
# --- Gradio Interface Definition ---
with gr.Blocks(title="Multi DOC/DOCX to PDF Converter") as demo:
gr.Markdown(
"""
# Multi DOC/DOCX to PDF Converter 📄➡️📜
Upload multiple Microsoft Word files (.doc or .docx) and get them all converted to PDF in a single downloadable ZIP file.
**Note**: The system needs LibreOffice and `unoconv` installed to function.
"""
)
with gr.Row():
file_input = gr.File(
file_count="multiple",
label="Upload Word Files (.docx or .doc)",
file_types=[".doc", ".docx"]
)
with gr.Column():
convert_button = gr.Button("Convert to PDF", variant="primary")
status_message = gr.Textbox(label="Status", value="Upload your files and click Convert.", interactive=False)
download_zip = gr.File(label="Download Converted PDFs (ZIP)", visible=False)
# Connect the button click to the conversion function
convert_button.click(
fn=convert_docs_to_pdf,
inputs=[file_input],
outputs=[download_zip, status_message]
)
if __name__ == "__main__":
demo.launch()