Ilia Tambovtsev commited on
Commit
42b8733
·
1 Parent(s): 80be769

feat: add page2image chain

Browse files
Files changed (1) hide show
  1. src/pdf_utils/chains.py +45 -0
src/pdf_utils/chains.py CHANGED
@@ -23,6 +23,51 @@ from .pdf2image import page2image
23
 
24
  logger = logging.getLogger(__name__)
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
  class Pdf2ImageChain(Chain):
28
  """Chain for converting PDF pages to PIL Images using PyMuPDF"""
 
23
 
24
  logger = logging.getLogger(__name__)
25
 
26
+ class Page2ImageChain(Chain):
27
+ """Chain for converting PyMuPDF page to PIL Image"""
28
+
29
+ def __init__(self, default_dpi: int = 72, **kwargs):
30
+ """Initialize Page to Image conversion chain
31
+
32
+ Args:
33
+ default_dpi: Default resolution for PDF rendering
34
+ """
35
+ super().__init__(**kwargs)
36
+ self._default_dpi = default_dpi
37
+
38
+ @property
39
+ def input_keys(self) -> List[str]:
40
+ """Required input keys"""
41
+ return ["page"]
42
+
43
+ @property
44
+ def output_keys(self) -> List[str]:
45
+ """Output keys provided by the chain"""
46
+ return ["image"]
47
+
48
+ def _call(
49
+ self,
50
+ inputs: Dict[str, Any],
51
+ run_manager: Optional[CallbackManagerForChainRun] = None
52
+ ) -> Dict[str, Any]:
53
+ """Convert PyMuPDF page to PIL Image
54
+
55
+ Args:
56
+ inputs: Dictionary containing:
57
+ - page: PyMuPDF page object
58
+ - dpi: Optional DPI value for rendering
59
+ run_manager: Callback manager
60
+
61
+ Returns:
62
+ Dictionary with PIL Image
63
+ """
64
+ page: fitz.Page = inputs["page"]
65
+ dpi = get_param_or_default(inputs, "dpi", self._default_dpi)
66
+
67
+ image = page2image(page, dpi)
68
+
69
+ return dict(image=image)
70
+
71
 
72
  class Pdf2ImageChain(Chain):
73
  """Chain for converting PDF pages to PIL Images using PyMuPDF"""