Spaces:
Sleeping
Sleeping
| from typing import List, Tuple, Generator | |
| from pathlib import Path | |
| from pptx import Presentation | |
| from core.base_processor import DocumentProcessor | |
| from core.exceptions import ProcessorError | |
| class PPTXProcessor(DocumentProcessor): | |
| """PowerPoint presentation processor""" | |
| def extract_text_elements(self, file_path: Path) -> Generator[Tuple[str, dict], None, None]: | |
| """Extract text from PowerPoint slides""" | |
| try: | |
| prs = Presentation(file_path) | |
| for slide_idx, slide in enumerate(prs.slides): | |
| for shape_idx, shape in enumerate(slide.shapes): | |
| if hasattr(shape, "text") and shape.text.strip(): | |
| metadata = { | |
| 'slide_index': slide_idx, | |
| 'shape_index': shape_idx, | |
| 'shape_type': str(type(shape)), | |
| 'original_text': shape.text | |
| } | |
| yield shape.text, metadata | |
| except Exception as e: | |
| raise ProcessorError(f"Failed to extract text from PowerPoint: {str(e)}") | |
| def apply_translations(self, file_path: Path, translations: List[Tuple[str, dict]]) -> Path: | |
| """Apply translations to PowerPoint presentation""" | |
| try: | |
| # Load the original presentation | |
| prs = Presentation(file_path) | |
| # Create a mapping of translations by slide and shape index | |
| translation_map = {} | |
| for translated_text, metadata in translations: | |
| slide_idx = metadata['slide_index'] | |
| shape_idx = metadata['shape_index'] | |
| if slide_idx not in translation_map: | |
| translation_map[slide_idx] = {} | |
| translation_map[slide_idx][shape_idx] = translated_text | |
| # Apply translations | |
| for slide_idx, slide in enumerate(prs.slides): | |
| if slide_idx in translation_map: | |
| slide_translations = translation_map[slide_idx] | |
| for shape_idx, shape in enumerate(slide.shapes): | |
| if shape_idx in slide_translations and hasattr(shape, "text"): | |
| shape.text = slide_translations[shape_idx] | |
| # Save translated presentation | |
| output_path = self.generate_output_path(file_path, "translated") | |
| prs.save(output_path) | |
| return output_path | |
| except Exception as e: | |
| raise ProcessorError(f"Failed to apply translations to PowerPoint: {str(e)}") | |
| def supported_extensions(self) -> List[str]: | |
| return ['.pptx'] |