Spaces:
Sleeping
Sleeping
File size: 2,775 Bytes
1df1e0b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
from typing import List, Tuple, Generator
from pathlib import Path
from pptx import Presentation
from core.base_processor import DocumentProcessor
from core.exceptions import ProcessorError
class PPTXProcessor(DocumentProcessor):
"""PowerPoint presentation processor"""
def extract_text_elements(self, file_path: Path) -> Generator[Tuple[str, dict], None, None]:
"""Extract text from PowerPoint slides"""
try:
prs = Presentation(file_path)
for slide_idx, slide in enumerate(prs.slides):
for shape_idx, shape in enumerate(slide.shapes):
if hasattr(shape, "text") and shape.text.strip():
metadata = {
'slide_index': slide_idx,
'shape_index': shape_idx,
'shape_type': str(type(shape)),
'original_text': shape.text
}
yield shape.text, metadata
except Exception as e:
raise ProcessorError(f"Failed to extract text from PowerPoint: {str(e)}")
def apply_translations(self, file_path: Path, translations: List[Tuple[str, dict]]) -> Path:
"""Apply translations to PowerPoint presentation"""
try:
# Load the original presentation
prs = Presentation(file_path)
# Create a mapping of translations by slide and shape index
translation_map = {}
for translated_text, metadata in translations:
slide_idx = metadata['slide_index']
shape_idx = metadata['shape_index']
if slide_idx not in translation_map:
translation_map[slide_idx] = {}
translation_map[slide_idx][shape_idx] = translated_text
# Apply translations
for slide_idx, slide in enumerate(prs.slides):
if slide_idx in translation_map:
slide_translations = translation_map[slide_idx]
for shape_idx, shape in enumerate(slide.shapes):
if shape_idx in slide_translations and hasattr(shape, "text"):
shape.text = slide_translations[shape_idx]
# Save translated presentation
output_path = self.generate_output_path(file_path, "translated")
prs.save(output_path)
return output_path
except Exception as e:
raise ProcessorError(f"Failed to apply translations to PowerPoint: {str(e)}")
@property
def supported_extensions(self) -> List[str]:
return ['.pptx'] |