Spaces:

artifex-software
/

pymupdfextraction

Running on CPU Upgrade

Prints out the version info

2ae1039 5 months ago

1.47 kB

	doc = None

	def render_page(page):
	# Render the page as a PNG image with a resolution of 150 DPI
	pix = page.get_pixmap(dpi=150)
	image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
	return image

	def ready(file, page_num:int):

	global doc

	# Use PyMuPDF to render the pages of the uploaded document
	doc = pymupdf.open(file)

	if page_num > doc.page_count:
	page_num = doc.page_count

	page_range = range(0, page_num)

	images = []

	for item in page_range:
	images.append(render_page(doc[item]))

	return images

	def convertToMD(page_num:int, checkboxes:str = None, radios:str = None):

	version = f"Processed using: {pymupdf.version=}, {pymupdf4llm.version=}"
	choice_table_strategy = radios
	choice_page_separators = False
	choice_embed_images = False

	if checkboxes is not None:
	for n in checkboxes:
	if n == "Separate pages":
	choice_page_separators = True
	if n == "Embed images":
	choice_embed_images = True

	if doc == None:
	raise gr.Error(message="Please upload a PDF")

	if page_num > doc.page_count:
	page_num = doc.page_count

	print(f"page num={page_num}")
	page_range = range(0, page_num)
	md = pymupdf4llm.to_markdown(doc,
	pages = page_range)
	return md, md, version

	def convertComplete():
	print("conversion complete")