Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
| doc = None | |
| def render_page(page): | |
| # Render the page as a PNG image with a resolution of 150 DPI | |
| pix = page.get_pixmap(dpi=150) | |
| image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) | |
| return image | |
| def ready(file, page_num:int): | |
| global doc | |
| # Use PyMuPDF to render the pages of the uploaded document | |
| doc = pymupdf.open(file) | |
| if page_num > doc.page_count: | |
| page_num = doc.page_count | |
| page_range = range(0, page_num) | |
| images = [] | |
| for item in page_range: | |
| images.append(render_page(doc[item])) | |
| return images | |
| def convertToMD(page_num:int, checkboxes:str = None, radios:str = None): | |
| version = f"Processed using: {pymupdf.version=}, {pymupdf4llm.version=}" | |
| choice_table_strategy = radios | |
| choice_page_separators = False | |
| choice_embed_images = False | |
| if checkboxes is not None: | |
| for n in checkboxes: | |
| if n == "Separate pages": | |
| choice_page_separators = True | |
| if n == "Embed images": | |
| choice_embed_images = True | |
| if doc == None: | |
| raise gr.Error(message="Please upload a PDF") | |
| if page_num > doc.page_count: | |
| page_num = doc.page_count | |
| print(f"page num={page_num}") | |
| page_range = range(0, page_num) | |
| md = pymupdf4llm.to_markdown(doc, | |
| pages = page_range) | |
| return md, md, version | |
| def convertComplete(): | |
| print("conversion complete") | |