from typing import List, Tuple, Generator from pathlib import Path from pptx import Presentation from core.base_processor import DocumentProcessor from core.exceptions import ProcessorError class PPTXProcessor(DocumentProcessor): """PowerPoint presentation processor""" def extract_text_elements(self, file_path: Path) -> Generator[Tuple[str, dict], None, None]: """Extract text from PowerPoint slides""" try: prs = Presentation(file_path) for slide_idx, slide in enumerate(prs.slides): for shape_idx, shape in enumerate(slide.shapes): if hasattr(shape, "text") and shape.text.strip(): metadata = { 'slide_index': slide_idx, 'shape_index': shape_idx, 'shape_type': str(type(shape)), 'original_text': shape.text } yield shape.text, metadata except Exception as e: raise ProcessorError(f"Failed to extract text from PowerPoint: {str(e)}") def apply_translations(self, file_path: Path, translations: List[Tuple[str, dict]]) -> Path: """Apply translations to PowerPoint presentation""" try: # Load the original presentation prs = Presentation(file_path) # Create a mapping of translations by slide and shape index translation_map = {} for translated_text, metadata in translations: slide_idx = metadata['slide_index'] shape_idx = metadata['shape_index'] if slide_idx not in translation_map: translation_map[slide_idx] = {} translation_map[slide_idx][shape_idx] = translated_text # Apply translations for slide_idx, slide in enumerate(prs.slides): if slide_idx in translation_map: slide_translations = translation_map[slide_idx] for shape_idx, shape in enumerate(slide.shapes): if shape_idx in slide_translations and hasattr(shape, "text"): shape.text = slide_translations[shape_idx] # Save translated presentation output_path = self.generate_output_path(file_path, "translated") prs.save(output_path) return output_path except Exception as e: raise ProcessorError(f"Failed to apply translations to PowerPoint: {str(e)}") @property def supported_extensions(self) -> List[str]: return ['.pptx']