File size: 869 Bytes
ec3d86e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
# loader.py
import fitz
from PIL import Image
from typing import List
from tqdm.asyncio import tqdm
class ImagePDFLoader:
    def __init__(self, file_path: str, dpi: int = 250):
        self.file_path = file_path
        self.dpi = dpi

    def load(self) -> List[Image.Image]:
        images = []
        try:
            doc = fitz.open(self.file_path)
            for page in doc:
                zoom_matrix = fitz.Matrix(self.dpi / 72, self.dpi / 72)
                pix = page.get_pixmap(matrix=zoom_matrix, alpha=False)
                if pix.width > 0 and pix.height > 0:
                    image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
                    images.append(image)
            doc.close()
        except Exception as e:
            tqdm.write(f"Error during PDF processing: {e}")
            return []
        return images