#!/usr/bin/env python3 """ Document conversion utilities for the DOCX to PDF converter """ import os import subprocess import logging import base64 from typing import Optional logger = logging.getLogger(__name__) class DocumentConverter: """Handle document conversion operations""" def __init__(self): self.max_conversion_time = 120 # 2 minutes def convert_docx_to_pdf(self, input_path: str, output_path: str) -> bool: """Convert DOCX to PDF using LibreOffice""" try: # Validate input file exists if not os.path.exists(input_path): logger.error(f"Input file does not exist: {input_path}") return False # Use LibreOffice headless mode for conversion cmd = [ "libreoffice", "--headless", "--convert-to", "pdf", "--outdir", os.path.dirname(output_path), input_path ] logger.info(f"Converting {input_path} to PDF...") result = subprocess.run( cmd, capture_output=True, text=True, timeout=self.max_conversion_time ) if result.returncode != 0: logger.error(f"Conversion failed: {result.stderr}") return False # Check if PDF was created if not os.path.exists(output_path): logger.error("PDF file was not created") return False logger.info(f"Successfully converted {input_path} to {output_path}") return True except subprocess.TimeoutExpired: logger.error("Conversion timed out") return False except Exception as e: logger.error(f"Conversion error: {e}") return False def decode_base64_content(self, base64_content: str) -> Optional[bytes]: """Decode base64 encoded content""" try: return base64.b64decode(base64_content) except Exception as e: logger.error(f"Failed to decode base64 content: {e}") return None def validate_libreoffice(self) -> bool: """Validate LibreOffice installation""" try: result = subprocess.run( ["libreoffice", "--version"], capture_output=True, text=True, timeout=10 ) if result.returncode != 0: logger.error("LibreOffice not found or not working") return False logger.info(f"LibreOffice version: {result.stdout.strip()}") return True except Exception as e: logger.error(f"LibreOffice validation error: {e}") return False