| |
| import gradio as gr |
| import os |
| import zipfile |
| import shutil |
| import subprocess |
| from pathlib import Path |
|
|
| |
| |
| TEMP_DIR = Path("./temp_conversion_data") |
| TEMP_INPUT_DIR = TEMP_DIR / "input" |
| TEMP_OUTPUT_DIR = TEMP_DIR / "output" |
|
|
| def setup_conversion_dirs(): |
| """Cleans up and ensures all required directories exist before a new conversion.""" |
| try: |
| if TEMP_DIR.exists(): |
| shutil.rmtree(TEMP_DIR) |
| |
| TEMP_INPUT_DIR.mkdir(parents=True, exist_ok=True) |
| TEMP_OUTPUT_DIR.mkdir(parents=True, exist_ok=True) |
| return True |
| except Exception as e: |
| print(f"Error setting up directories: {e}") |
| return False |
|
|
| def convert_single_file_with_unoconv(input_path, output_dir): |
| """ |
| Converts a single DOC/DOCX to PDF using the unoconv command-line utility. |
| """ |
| filename = Path(input_path).name |
| |
| try: |
| |
| result = subprocess.run( |
| ['unoconv', '-f', 'pdf', '-o', str(output_dir), str(input_path)], |
| check=True, |
| capture_output=True, |
| text=True, |
| timeout=60 |
| ) |
| print(f"Successfully converted {filename}. Output: {result.stdout}") |
| return True |
| except subprocess.CalledProcessError as e: |
| |
| print(f"UNOCONV FAILED for {filename}. Stderr: {e.stderr}, Stdout: {e.stdout}") |
| return False |
| except subprocess.TimeoutExpired: |
| print(f"Conversion of {filename} timed out.") |
| return False |
| except FileNotFoundError: |
| |
| print(f"CRITICAL ERROR: 'unoconv' command not found. It is not installed or not in PATH.") |
| return False |
| except Exception as e: |
| print(f"An unexpected error occurred during conversion of {filename}: {e}") |
| return False |
|
|
| def convert_docs_to_pdf(doc_files): |
| """ |
| Takes a list of uploaded docx files, converts them to PDF using unoconv, |
| and zips the results for download. |
| """ |
| if not doc_files: |
| return gr.update(visible=False), "Please upload one or more .docx or .doc files." |
|
|
| if not setup_conversion_dirs(): |
| return gr.update(visible=False), "Error: Could not set up temporary directories." |
|
|
| success_count = 0 |
| total_count = len(doc_files) |
| |
| |
| for file_obj in doc_files: |
| original_filepath = file_obj.name |
| filename = Path(original_filepath).name |
| |
| |
| input_file_copy = TEMP_INPUT_DIR / filename |
| shutil.copy(original_filepath, input_file_copy) |
| |
| if convert_single_file_with_unoconv(input_file_copy, TEMP_OUTPUT_DIR): |
| success_count += 1 |
| |
| if success_count == 0: |
| |
| return gr.update(visible=False), "No files were converted successfully. Check the Space logs for details." |
|
|
| |
| zip_filename = TEMP_DIR / "converted_pdfs.zip" |
| |
| with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf: |
| |
| for file in TEMP_OUTPUT_DIR.iterdir(): |
| zipf.write(file, arcname=file.name) |
| |
| |
| status = f"✅ Successfully converted {success_count} of {total_count} files and zipped them. Download ready." |
| |
| |
| return gr.update(value=str(zip_filename), visible=True), status |
|
|
| |
| with gr.Blocks(title="Multi DOC/DOCX to PDF Converter") as demo: |
| gr.Markdown( |
| """ |
| # Multi DOC/DOCX to PDF Converter 📄➡️📜 |
| Upload multiple Microsoft Word files (.doc or .docx) and get them all converted to PDF in a single downloadable ZIP file. |
| |
| **Note**: The system needs LibreOffice and `unoconv` installed to function. |
| """ |
| ) |
|
|
| with gr.Row(): |
| file_input = gr.File( |
| file_count="multiple", |
| label="Upload Word Files (.docx or .doc)", |
| file_types=[".doc", ".docx"] |
| ) |
| |
| with gr.Column(): |
| convert_button = gr.Button("Convert to PDF", variant="primary") |
| status_message = gr.Textbox(label="Status", value="Upload your files and click Convert.", interactive=False) |
| download_zip = gr.File(label="Download Converted PDFs (ZIP)", visible=False) |
| |
| |
| convert_button.click( |
| fn=convert_docs_to_pdf, |
| inputs=[file_input], |
| outputs=[download_zip, status_message] |
| ) |
|
|
| if __name__ == "__main__": |
| demo.launch() |