File size: 2,944 Bytes
623e14e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61b4298
623e14e
 
 
 
 
 
 
 
 
61b4298
623e14e
 
 
61b4298
623e14e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61b4298
623e14e
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#!/usr/bin/env python3
"""
Document conversion utilities for the DOCX to PDF converter
"""

import os
import subprocess
import logging
import base64
from typing import Optional

logger = logging.getLogger(__name__)

class DocumentConverter:
    """Handle document conversion operations"""
    
    def __init__(self):
        self.max_conversion_time = 120  # 2 minutes
    
    def convert_docx_to_pdf(self, input_path: str, output_path: str) -> bool:
        """Convert DOCX to PDF using LibreOffice"""
        try:
            # Validate input file exists
            if not os.path.exists(input_path):
                logger.error(f"Input file does not exist: {input_path}")
                return False
            
            # Use LibreOffice headless mode for conversion
            cmd = [
                "libreoffice",
                "--headless",
                "--convert-to", "pdf",
                "--outdir", os.path.dirname(output_path),
                input_path
            ]
            
            logger.info(f"Converting {input_path} to PDF...")
            
            result = subprocess.run(
                cmd,
                capture_output=True,
                text=True,
                timeout=self.max_conversion_time
            )
            
            if result.returncode != 0:
                logger.error(f"Conversion failed: {result.stderr}")
                return False
                
            # Check if PDF was created
            if not os.path.exists(output_path):
                logger.error("PDF file was not created")
                return False
                
            logger.info(f"Successfully converted {input_path} to {output_path}")
            return True
            
        except subprocess.TimeoutExpired:
            logger.error("Conversion timed out")
            return False
        except Exception as e:
            logger.error(f"Conversion error: {e}")
            return False
    
    def decode_base64_content(self, base64_content: str) -> Optional[bytes]:
        """Decode base64 encoded content"""
        try:
            return base64.b64decode(base64_content)
        except Exception as e:
            logger.error(f"Failed to decode base64 content: {e}")
            return None
    
    def validate_libreoffice(self) -> bool:
        """Validate LibreOffice installation"""
        try:
            result = subprocess.run(
                ["libreoffice", "--version"],
                capture_output=True,
                text=True,
                timeout=10
            )
            if result.returncode != 0:
                logger.error("LibreOffice not found or not working")
                return False
            
            logger.info(f"LibreOffice version: {result.stdout.strip()}")
            return True
        except Exception as e:
            logger.error(f"LibreOffice validation error: {e}")
            return False