|
|
|
|
|
""" |
|
|
Document conversion utilities for the DOCX to PDF converter |
|
|
""" |
|
|
|
|
|
import os |
|
|
import subprocess |
|
|
import logging |
|
|
import base64 |
|
|
from typing import Optional |
|
|
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
class DocumentConverter: |
|
|
"""Handle document conversion operations""" |
|
|
|
|
|
def __init__(self): |
|
|
self.max_conversion_time = 120 |
|
|
|
|
|
def convert_docx_to_pdf(self, input_path: str, output_path: str) -> bool: |
|
|
"""Convert DOCX to PDF using LibreOffice""" |
|
|
try: |
|
|
|
|
|
if not os.path.exists(input_path): |
|
|
logger.error(f"Input file does not exist: {input_path}") |
|
|
return False |
|
|
|
|
|
|
|
|
cmd = [ |
|
|
"libreoffice", |
|
|
"--headless", |
|
|
"--convert-to", "pdf", |
|
|
"--outdir", os.path.dirname(output_path), |
|
|
input_path |
|
|
] |
|
|
|
|
|
logger.info(f"Converting {input_path} to PDF...") |
|
|
|
|
|
result = subprocess.run( |
|
|
cmd, |
|
|
capture_output=True, |
|
|
text=True, |
|
|
timeout=self.max_conversion_time |
|
|
) |
|
|
|
|
|
if result.returncode != 0: |
|
|
logger.error(f"Conversion failed: {result.stderr}") |
|
|
return False |
|
|
|
|
|
|
|
|
if not os.path.exists(output_path): |
|
|
logger.error("PDF file was not created") |
|
|
return False |
|
|
|
|
|
logger.info(f"Successfully converted {input_path} to {output_path}") |
|
|
return True |
|
|
|
|
|
except subprocess.TimeoutExpired: |
|
|
logger.error("Conversion timed out") |
|
|
return False |
|
|
except Exception as e: |
|
|
logger.error(f"Conversion error: {e}") |
|
|
return False |
|
|
|
|
|
def decode_base64_content(self, base64_content: str) -> Optional[bytes]: |
|
|
"""Decode base64 encoded content""" |
|
|
try: |
|
|
return base64.b64decode(base64_content) |
|
|
except Exception as e: |
|
|
logger.error(f"Failed to decode base64 content: {e}") |
|
|
return None |
|
|
|
|
|
def validate_libreoffice(self) -> bool: |
|
|
"""Validate LibreOffice installation""" |
|
|
try: |
|
|
result = subprocess.run( |
|
|
["libreoffice", "--version"], |
|
|
capture_output=True, |
|
|
text=True, |
|
|
timeout=10 |
|
|
) |
|
|
if result.returncode != 0: |
|
|
logger.error("LibreOffice not found or not working") |
|
|
return False |
|
|
|
|
|
logger.info(f"LibreOffice version: {result.stdout.strip()}") |
|
|
return True |
|
|
except Exception as e: |
|
|
logger.error(f"LibreOffice validation error: {e}") |
|
|
return False |