pymupdfextraction / my_io.py
JamieLemon's picture
Prints out the version info
2ae1039
doc = None
def render_page(page):
# Render the page as a PNG image with a resolution of 150 DPI
pix = page.get_pixmap(dpi=150)
image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
return image
def ready(file, page_num:int):
global doc
# Use PyMuPDF to render the pages of the uploaded document
doc = pymupdf.open(file)
if page_num > doc.page_count:
page_num = doc.page_count
page_range = range(0, page_num)
images = []
for item in page_range:
images.append(render_page(doc[item]))
return images
def convertToMD(page_num:int, checkboxes:str = None, radios:str = None):
version = f"Processed using: {pymupdf.version=}, {pymupdf4llm.version=}"
choice_table_strategy = radios
choice_page_separators = False
choice_embed_images = False
if checkboxes is not None:
for n in checkboxes:
if n == "Separate pages":
choice_page_separators = True
if n == "Embed images":
choice_embed_images = True
if doc == None:
raise gr.Error(message="Please upload a PDF")
if page_num > doc.page_count:
page_num = doc.page_count
print(f"page num={page_num}")
page_range = range(0, page_num)
md = pymupdf4llm.to_markdown(doc,
pages = page_range)
return md, md, version
def convertComplete():
print("conversion complete")