pdf-4 / src /utils /converter.py
fokan's picture
first
61b4298
#!/usr/bin/env python3
"""
Document conversion utilities for the DOCX to PDF converter
"""
import os
import subprocess
import logging
import base64
from typing import Optional
logger = logging.getLogger(__name__)
class DocumentConverter:
"""Handle document conversion operations"""
def __init__(self):
self.max_conversion_time = 120 # 2 minutes
def convert_docx_to_pdf(self, input_path: str, output_path: str) -> bool:
"""Convert DOCX to PDF using LibreOffice"""
try:
# Validate input file exists
if not os.path.exists(input_path):
logger.error(f"Input file does not exist: {input_path}")
return False
# Use LibreOffice headless mode for conversion
cmd = [
"libreoffice",
"--headless",
"--convert-to", "pdf",
"--outdir", os.path.dirname(output_path),
input_path
]
logger.info(f"Converting {input_path} to PDF...")
result = subprocess.run(
cmd,
capture_output=True,
text=True,
timeout=self.max_conversion_time
)
if result.returncode != 0:
logger.error(f"Conversion failed: {result.stderr}")
return False
# Check if PDF was created
if not os.path.exists(output_path):
logger.error("PDF file was not created")
return False
logger.info(f"Successfully converted {input_path} to {output_path}")
return True
except subprocess.TimeoutExpired:
logger.error("Conversion timed out")
return False
except Exception as e:
logger.error(f"Conversion error: {e}")
return False
def decode_base64_content(self, base64_content: str) -> Optional[bytes]:
"""Decode base64 encoded content"""
try:
return base64.b64decode(base64_content)
except Exception as e:
logger.error(f"Failed to decode base64 content: {e}")
return None
def validate_libreoffice(self) -> bool:
"""Validate LibreOffice installation"""
try:
result = subprocess.run(
["libreoffice", "--version"],
capture_output=True,
text=True,
timeout=10
)
if result.returncode != 0:
logger.error("LibreOffice not found or not working")
return False
logger.info(f"LibreOffice version: {result.stdout.strip()}")
return True
except Exception as e:
logger.error(f"LibreOffice validation error: {e}")
return False