Spaces:
Runtime error
Runtime error
| import fitz | |
| import numpy as np | |
| import enum | |
| from pydantic import BaseModel, Field | |
| from PIL import Image | |
| class SupportedPdfParseMethod(enum.Enum): | |
| OCR = 'ocr' | |
| TXT = 'txt' | |
| class PageInfo(BaseModel): | |
| """The width and height of page | |
| """ | |
| w: float = Field(description='the width of page') | |
| h: float = Field(description='the height of page') | |
| def fitz_doc_to_image(doc, target_dpi=200, origin_dpi=None) -> dict: | |
| """Convert fitz.Document to image, Then convert the image to numpy array. | |
| Args: | |
| doc (_type_): pymudoc page | |
| dpi (int, optional): reset the dpi of dpi. Defaults to 200. | |
| Returns: | |
| dict: {'img': numpy array, 'width': width, 'height': height } | |
| """ | |
| from PIL import Image | |
| mat = fitz.Matrix(target_dpi / 72, target_dpi / 72) | |
| pm = doc.get_pixmap(matrix=mat, alpha=False) | |
| if pm.width > 4500 or pm.height > 4500: | |
| mat = fitz.Matrix(72 / 72, 72 / 72) # use fitz default dpi | |
| pm = doc.get_pixmap(matrix=mat, alpha=False) | |
| image = Image.frombytes('RGB', (pm.width, pm.height), pm.samples) | |
| return image | |
| def load_images_from_pdf(pdf_file, dpi=200, start_page_id=0, end_page_id=None) -> list: | |
| images = [] | |
| with fitz.open(pdf_file) as doc: | |
| pdf_page_num = doc.page_count | |
| end_page_id = ( | |
| end_page_id | |
| if end_page_id is not None and end_page_id >= 0 | |
| else pdf_page_num - 1 | |
| ) | |
| if end_page_id > pdf_page_num - 1: | |
| print('end_page_id is out of range, use images length') | |
| end_page_id = pdf_page_num - 1 | |
| for index in range(0, doc.page_count): | |
| if start_page_id <= index <= end_page_id: | |
| page = doc[index] | |
| img = fitz_doc_to_image(page, target_dpi=dpi) | |
| images.append(img) | |
| return images |