from abc import ABC, abstractmethod from typing import List, Tuple, Optional, Generator from pathlib import Path from core.base_translator import BaseTranslator from core.exceptions import ProcessorError import os class DocumentProcessor(ABC): """Abstract base class for document processors""" def __init__(self, translator: BaseTranslator): self.translator = translator @abstractmethod def extract_text_elements(self, file_path: Path) -> Generator[Tuple[str, dict], None, None]: """ Extract text elements from document Args: file_path: Path to the document Yields: Tuple of (text_content, metadata) for each translatable element """ pass @abstractmethod def apply_translations(self, file_path: Path, translations: List[Tuple[str, dict]]) -> Path: """ Apply translations back to the document Args: file_path: Path to original document translations: List of (translated_text, metadata) tuples Returns: Path to the translated document """ pass def process_document( self, file_path: Path, source_lang: str, target_lang: str, progress_callback: Optional[callable] = None ) -> Tuple[Path, str]: """ Process entire document translation Args: file_path: Path to document source_lang: Source language target_lang: Target language progress_callback: Optional progress callback Returns: Tuple of (output_file_path, summary_text) """ try: # Extract text elements text_elements = list(self.extract_text_elements(file_path)) total_elements = len(text_elements) if total_elements == 0: raise ProcessorError("No translatable text found in document") # Translate each element translations = [] all_translated_text = "" for i, (text, metadata) in enumerate(text_elements): if text.strip(): # Only translate non-empty text translated = self.translator.translate_text(text, source_lang, target_lang) translations.append((translated, metadata)) all_translated_text += translated + "\n" else: translations.append((text, metadata)) # Keep empty text as-is # Update progress if progress_callback: progress_callback((i + 1) / total_elements, f"Translating element {i + 1}/{total_elements}") # Apply translations to document output_path = self.apply_translations(file_path, translations) return output_path, all_translated_text except Exception as e: raise ProcessorError(f"Document processing failed: {str(e)}") def generate_output_path(self, original_path: Path, suffix: str = "translated") -> Path: """ Generate output file path Args: original_path: Original file path suffix: Suffix to add to filename Returns: New file path with suffix """ stem = original_path.stem extension = original_path.suffix directory = original_path.parent return directory / f"{stem}_{suffix}{extension}" @property @abstractmethod def supported_extensions(self) -> List[str]: """Return list of supported file extensions""" pass