Spaces:

Jiangxz01
/

SuryaOCR

Running

App Files Files

SuryaOCR / surya /input /processing.py

Jiangxz01

Upload 56 files

52f1bcb verified about 1 year ago

raw

history blame

3.99 kB

	from typing import List

	import cv2
	import numpy as np
	import math
	import pypdfium2
	from PIL import Image, ImageOps, ImageDraw
	import torch
	from surya.settings import settings


	def convert_if_not_rgb(images: List[Image.Image]) -> List[Image.Image]:
	new_images = []
	for image in images:
	if image.mode != "RGB":
	image = image.convert("RGB")
	new_images.append(image)
	return new_images


	def get_total_splits(image_size, processor):
	img_height = list(image_size)[1]
	max_height = settings.DETECTOR_IMAGE_CHUNK_HEIGHT
	processor_height = processor.size["height"]
	if img_height > max_height:
	num_splits = math.ceil(img_height / processor_height)
	return num_splits
	return 1


	def split_image(img, processor):
	# This will not modify/return the original image - it will either crop, or copy the image
	img_height = list(img.size)[1]
	max_height = settings.DETECTOR_IMAGE_CHUNK_HEIGHT
	processor_height = processor.size["height"]
	if img_height > max_height:
	num_splits = math.ceil(img_height / processor_height)
	splits = []
	split_heights = []
	for i in range(num_splits):
	top = i * processor_height
	bottom = (i + 1) * processor_height
	if bottom > img_height:
	bottom = img_height
	cropped = img.crop((0, top, img.size[0], bottom))
	height = bottom - top
	if height < processor_height:
	cropped = ImageOps.pad(cropped, (img.size[0], processor_height), color=255, centering=(0, 0))
	splits.append(cropped)
	split_heights.append(height)
	return splits, split_heights
	return [img.copy()], [img_height]


	def prepare_image_detection(img, processor):
	new_size = (processor.size["width"], processor.size["height"])

	# This double resize actually necessary for downstream accuracy
	img.thumbnail(new_size, Image.Resampling.LANCZOS)
	img = img.resize(new_size, Image.Resampling.LANCZOS) # Stretch smaller dimension to fit new size

	img = np.asarray(img, dtype=np.uint8)
	img = processor(img)["pixel_values"][0]
	img = torch.from_numpy(img)
	return img


	def open_pdf(pdf_filepath):
	return pypdfium2.PdfDocument(pdf_filepath)


	def get_page_images(doc, indices: List, dpi=settings.IMAGE_DPI):
	renderer = doc.render(
	pypdfium2.PdfBitmap.to_pil,
	page_indices=indices,
	scale=dpi / 72,
	)
	images = list(renderer)
	images = [image.convert("RGB") for image in images]
	return images


	def slice_bboxes_from_image(image: Image.Image, bboxes):
	lines = []
	for bbox in bboxes:
	line = image.crop((bbox[0], bbox[1], bbox[2], bbox[3]))
	if line.size[0] == 0:
	print(f"Warning: found an empty line with bbox {bbox}")
	lines.append(line)
	return lines


	def slice_polys_from_image(image: Image.Image, polys):
	image_array = np.array(image, dtype=np.uint8)
	lines = []
	for idx, poly in enumerate(polys):
	lines.append(slice_and_pad_poly(image_array, poly))
	return lines


	def slice_and_pad_poly(image_array: np.array, coordinates):
	# Draw polygon onto mask
	coordinates = [(corner[0], corner[1]) for corner in coordinates]
	bbox = [min([x[0] for x in coordinates]), min([x[1] for x in coordinates]), max([x[0] for x in coordinates]), max([x[1] for x in coordinates])]

	# We mask out anything not in the polygon
	cropped_polygon = image_array[bbox[1]:bbox[3], bbox[0]:bbox[2]].copy()
	coordinates = [(x - bbox[0], y - bbox[1]) for x, y in coordinates]

	# Pad the area outside the polygon with the pad value
	mask = np.zeros(cropped_polygon.shape[:2], dtype=np.uint8)
	cv2.fillPoly(mask, [np.int32(coordinates)], 1)
	mask = np.stack([mask] * 3, axis=-1)

	cropped_polygon[mask == 0] = settings.RECOGNITION_PAD_VALUE
	rectangle_image = Image.fromarray(cropped_polygon)

	return rectangle_image