File size: 3,830 Bytes
1df1e0b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
from abc import ABC, abstractmethod
from typing import List, Tuple, Optional, Generator
from pathlib import Path
from core.base_translator import BaseTranslator
from core.exceptions import ProcessorError
import os

class DocumentProcessor(ABC):
    """Abstract base class for document processors"""
    
    def __init__(self, translator: BaseTranslator):
        self.translator = translator
    
    @abstractmethod
    def extract_text_elements(self, file_path: Path) -> Generator[Tuple[str, dict], None, None]:
        """
        Extract text elements from document
        
        Args:
            file_path: Path to the document
            
        Yields:
            Tuple of (text_content, metadata) for each translatable element
        """
        pass
    
    @abstractmethod
    def apply_translations(self, file_path: Path, translations: List[Tuple[str, dict]]) -> Path:
        """
        Apply translations back to the document
        
        Args:
            file_path: Path to original document
            translations: List of (translated_text, metadata) tuples
            
        Returns:
            Path to the translated document
        """
        pass
    
    def process_document(
        self, 
        file_path: Path, 
        source_lang: str, 
        target_lang: str,
        progress_callback: Optional[callable] = None
    ) -> Tuple[Path, str]:
        """
        Process entire document translation
        
        Args:
            file_path: Path to document
            source_lang: Source language
            target_lang: Target language  
            progress_callback: Optional progress callback
            
        Returns:
            Tuple of (output_file_path, summary_text)
        """
        try:
            # Extract text elements
            text_elements = list(self.extract_text_elements(file_path))
            total_elements = len(text_elements)
            
            if total_elements == 0:
                raise ProcessorError("No translatable text found in document")
            
            # Translate each element
            translations = []
            all_translated_text = ""
            
            for i, (text, metadata) in enumerate(text_elements):
                if text.strip():  # Only translate non-empty text
                    translated = self.translator.translate_text(text, source_lang, target_lang)
                    translations.append((translated, metadata))
                    all_translated_text += translated + "\n"
                else:
                    translations.append((text, metadata))  # Keep empty text as-is
                
                # Update progress
                if progress_callback:
                    progress_callback((i + 1) / total_elements, f"Translating element {i + 1}/{total_elements}")
            
            # Apply translations to document
            output_path = self.apply_translations(file_path, translations)
            
            return output_path, all_translated_text
            
        except Exception as e:
            raise ProcessorError(f"Document processing failed: {str(e)}")
    
    def generate_output_path(self, original_path: Path, suffix: str = "translated") -> Path:
        """
        Generate output file path
        
        Args:
            original_path: Original file path
            suffix: Suffix to add to filename
            
        Returns:
            New file path with suffix
        """
        stem = original_path.stem
        extension = original_path.suffix
        directory = original_path.parent
        
        return directory / f"{stem}_{suffix}{extension}"
    
    @property
    @abstractmethod
    def supported_extensions(self) -> List[str]:
        """Return list of supported file extensions"""
        pass