Spaces:
Sleeping
Sleeping
| from abc import ABC, abstractmethod | |
| from typing import List, Tuple, Optional, Generator | |
| from pathlib import Path | |
| from core.base_translator import BaseTranslator | |
| from core.exceptions import ProcessorError | |
| import os | |
| class DocumentProcessor(ABC): | |
| """Abstract base class for document processors""" | |
| def __init__(self, translator: BaseTranslator): | |
| self.translator = translator | |
| def extract_text_elements(self, file_path: Path) -> Generator[Tuple[str, dict], None, None]: | |
| """ | |
| Extract text elements from document | |
| Args: | |
| file_path: Path to the document | |
| Yields: | |
| Tuple of (text_content, metadata) for each translatable element | |
| """ | |
| pass | |
| def apply_translations(self, file_path: Path, translations: List[Tuple[str, dict]]) -> Path: | |
| """ | |
| Apply translations back to the document | |
| Args: | |
| file_path: Path to original document | |
| translations: List of (translated_text, metadata) tuples | |
| Returns: | |
| Path to the translated document | |
| """ | |
| pass | |
| def process_document( | |
| self, | |
| file_path: Path, | |
| source_lang: str, | |
| target_lang: str, | |
| progress_callback: Optional[callable] = None | |
| ) -> Tuple[Path, str]: | |
| """ | |
| Process entire document translation | |
| Args: | |
| file_path: Path to document | |
| source_lang: Source language | |
| target_lang: Target language | |
| progress_callback: Optional progress callback | |
| Returns: | |
| Tuple of (output_file_path, summary_text) | |
| """ | |
| try: | |
| # Extract text elements | |
| text_elements = list(self.extract_text_elements(file_path)) | |
| total_elements = len(text_elements) | |
| if total_elements == 0: | |
| raise ProcessorError("No translatable text found in document") | |
| # Translate each element | |
| translations = [] | |
| all_translated_text = "" | |
| for i, (text, metadata) in enumerate(text_elements): | |
| if text.strip(): # Only translate non-empty text | |
| translated = self.translator.translate_text(text, source_lang, target_lang) | |
| translations.append((translated, metadata)) | |
| all_translated_text += translated + "\n" | |
| else: | |
| translations.append((text, metadata)) # Keep empty text as-is | |
| # Update progress | |
| if progress_callback: | |
| progress_callback((i + 1) / total_elements, f"Translating element {i + 1}/{total_elements}") | |
| # Apply translations to document | |
| output_path = self.apply_translations(file_path, translations) | |
| return output_path, all_translated_text | |
| except Exception as e: | |
| raise ProcessorError(f"Document processing failed: {str(e)}") | |
| def generate_output_path(self, original_path: Path, suffix: str = "translated") -> Path: | |
| """ | |
| Generate output file path | |
| Args: | |
| original_path: Original file path | |
| suffix: Suffix to add to filename | |
| Returns: | |
| New file path with suffix | |
| """ | |
| stem = original_path.stem | |
| extension = original_path.suffix | |
| directory = original_path.parent | |
| return directory / f"{stem}_{suffix}{extension}" | |
| def supported_extensions(self) -> List[str]: | |
| """Return list of supported file extensions""" | |
| pass |