File size: 2,775 Bytes
1df1e0b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
from typing import List, Tuple, Generator
from pathlib import Path
from pptx import Presentation
from core.base_processor import DocumentProcessor
from core.exceptions import ProcessorError

class PPTXProcessor(DocumentProcessor):
    """PowerPoint presentation processor"""
    
    def extract_text_elements(self, file_path: Path) -> Generator[Tuple[str, dict], None, None]:
        """Extract text from PowerPoint slides"""
        try:
            prs = Presentation(file_path)
            
            for slide_idx, slide in enumerate(prs.slides):
                for shape_idx, shape in enumerate(slide.shapes):
                    if hasattr(shape, "text") and shape.text.strip():
                        metadata = {
                            'slide_index': slide_idx,
                            'shape_index': shape_idx,
                            'shape_type': str(type(shape)),
                            'original_text': shape.text
                        }
                        yield shape.text, metadata
                        
        except Exception as e:
            raise ProcessorError(f"Failed to extract text from PowerPoint: {str(e)}")
    
    def apply_translations(self, file_path: Path, translations: List[Tuple[str, dict]]) -> Path:
        """Apply translations to PowerPoint presentation"""
        try:
            # Load the original presentation
            prs = Presentation(file_path)
            
            # Create a mapping of translations by slide and shape index
            translation_map = {}
            for translated_text, metadata in translations:
                slide_idx = metadata['slide_index']
                shape_idx = metadata['shape_index']
                if slide_idx not in translation_map:
                    translation_map[slide_idx] = {}
                translation_map[slide_idx][shape_idx] = translated_text
            
            # Apply translations
            for slide_idx, slide in enumerate(prs.slides):
                if slide_idx in translation_map:
                    slide_translations = translation_map[slide_idx]
                    for shape_idx, shape in enumerate(slide.shapes):
                        if shape_idx in slide_translations and hasattr(shape, "text"):
                            shape.text = slide_translations[shape_idx]
            
            # Save translated presentation
            output_path = self.generate_output_path(file_path, "translated")
            prs.save(output_path)
            
            return output_path
            
        except Exception as e:
            raise ProcessorError(f"Failed to apply translations to PowerPoint: {str(e)}")
    
    @property
    def supported_extensions(self) -> List[str]:
        return ['.pptx']