Spaces:

tomofi
/

MaskTextSpotterV3-OCR

Runtime error

App Files Files Community

MaskTextSpotterV3-OCR / maskrcnn_benchmark /data /transforms /transforms.py

3v324v23

add

c310e19 almost 4 years ago

raw

history blame contribute delete

12.6 kB

	# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
	import random

	import cv2
	import numpy as np
	from PIL import Image
	from shapely import affinity
	from shapely.geometry import Polygon
	from torchvision.transforms import functional as F


	class Compose(object):
	def __init__(self, transforms):
	self.transforms = transforms

	def __call__(self, image, target):
	for t in self.transforms:
	image, target = t(image, target)
	return image, target

	def __repr__(self):
	format_string = self.__class__.__name__ + "("
	for t in self.transforms:
	format_string += "\n"
	format_string += " {0}".format(t)
	format_string += "\n)"
	return format_string


	class Resize(object):
	def __init__(self, min_size, max_size, strict_resize):
	self.min_size = min_size
	self.max_size = max_size
	self.strict_resize = strict_resize

	# modified from torchvision to add support for max size
	def get_size(self, image_size):
	w, h = image_size
	if isinstance(self.min_size, tuple):
	if len(self.min_size) == 1:
	size = self.min_size[0]
	else:
	random_size_index = random.randint(0, len(self.min_size) - 1)
	size = self.min_size[random_size_index]
	else:
	size = self.min_size
	max_size = self.max_size
	if max_size is not None:
	min_original_size = float(min((w, h)))
	max_original_size = float(max((w, h)))
	if max_original_size / min_original_size * size > max_size:
	size = int(round(max_size * min_original_size / max_original_size))

	if (w <= h and w == size) or (h <= w and h == size):
	if self.strict_resize:
	h = h if h % 32 == 0 else (h // 32) * 32
	w = w if w % 32 == 0 else (w // 32) * 32
	return (h, w)

	if w < h:
	ow = size
	oh = int(size * h / w)
	else:
	oh = size
	ow = int(size * w / h)
	if self.strict_resize:
	oh = oh if oh % 32 == 0 else (oh // 32) * 32
	ow = ow if ow % 32 == 0 else (ow // 32) * 32

	return (oh, ow)

	def __call__(self, image, target):
	size = self.get_size(image.size)
	image = F.resize(image, size)
	if target is not None:
	target = target.resize(image.size)
	return image, target


	class RandomCrop(object):
	def __init__(self, prob, crop_min_size=500, crop_max_size=1000, max_trys=50):
	self.min_size = crop_min_size
	self.max_size = crop_max_size
	self.max_trys = max_trys
	self.prob = prob

	def __call__(self, image, target):
	if random.random() < self.prob:
	im = np.array(image)
	w, h = image.size
	h_array = np.zeros((h), dtype=np.int32)
	w_array = np.zeros((w), dtype=np.int32)
	boxes = target.bbox.numpy()
	if len(boxes) == 0:
	return image, target
	for box in boxes:
	box = np.round(box, decimals=0).astype(np.int32)
	minx = box[0]
	maxx = box[2]
	w_array[minx:maxx] = 1
	miny = box[1]
	maxy = box[3]
	h_array[miny:maxy] = 1
	h_axis = np.where(h_array == 0)[0]
	w_axis = np.where(w_array == 0)[0]
	if len(h_axis) == 0 or len(w_axis) == 0:
	return image, target
	for _ in range(self.max_trys):
	xx = np.random.choice(w_axis, size=2)
	xmin = min(xx)
	xmax = max(xx)
	x_size = xmax - xmin
	if x_size > self.max_size or x_size < self.min_size:
	continue
	yy = np.random.choice(h_axis, size=2)
	ymin = min(yy)
	ymax = max(yy)
	y_size = ymax - ymin
	if y_size > self.max_size or y_size < self.min_size:
	continue
	box_in_area = (
	(boxes[:, 0] >= xmin)
	& (boxes[:, 1] >= ymin)
	& (boxes[:, 2] <= xmax)
	& (boxes[:, 3] <= ymax)
	)
	if len(np.where(box_in_area)[0]) == 0:
	continue
	im = im[ymin:ymax, xmin:xmax]
	target = target.crop([xmin, ymin, xmax, ymax])
	return Image.fromarray(im), target
	return image, target
	else:
	return image, target


	# class RandomCropFixSize(object):
	# def __init__(self, prob, crop_size=512, max_trys=50):
	# self.crop_size = crop_size
	# self.max_trys = max_trys
	# self.prob = prob

	# def __call__(self, image, target):
	# if random.random() < self.prob:
	# im = np.array(image)
	# w, h = image.size
	# h_array = np.zeros((h), dtype=np.int32)
	# w_array = np.zeros((w), dtype=np.int32)
	# boxes = target.bbox.numpy()
	# if len(boxes) == 0:
	# return image, target
	# for box in boxes:
	# box = np.round(box, decimals=0).astype(np.int32)
	# minx = box[0]
	# maxx = box[2]
	# w_array[minx:maxx] = 1
	# miny = box[1]
	# maxy = box[3]
	# h_array[miny:maxy] = 1
	# h_axis = np.where(h_array == 0)[0]
	# w_axis = np.where(w_array == 0)[0]
	# if len(h_axis) == 0 or len(w_axis) == 0:
	# return image, target
	# for _ in range(self.max_trys):
	# xx = np.random.choice(w_axis, size=2)
	# xmin = min(xx)
	# xmax = max(xx)
	# x_size = xmax - xmin
	# if x_size > self.max_size or x_size < self.min_size:
	# continue
	# yy = np.random.choice(h_axis, size=2)
	# ymin = min(yy)
	# ymax = max(yy)
	# y_size = ymax - ymin
	# if y_size > self.max_size or y_size < self.min_size:
	# continue
	# box_in_area = (
	# (boxes[:, 0] >= xmin)
	# & (boxes[:, 1] >= ymin)
	# & (boxes[:, 2] <= xmax)
	# & (boxes[:, 3] <= ymax)
	# )
	# if len(np.where(box_in_area)[0]) == 0:
	# continue
	# im = im[ymin:ymax, xmin:xmax]
	# target = target.crop([xmin, ymin, xmax, ymax])
	# return Image.fromarray(im), target
	# return image, target
	# else:
	# return image, target


	class RandomHorizontalFlip(object):
	def __init__(self, prob=0.5):
	self.prob = prob

	def __call__(self, image, target):
	if random.random() < self.prob:
	image = F.hflip(image)
	target = target.transpose(0)
	return image, target


	class ToTensor(object):
	def __call__(self, image, target):
	return F.to_tensor(image), target


	class Normalize(object):
	def __init__(self, mean, std, to_bgr255=True):
	self.mean = mean
	self.std = std
	self.to_bgr255 = to_bgr255

	def __call__(self, image, target):
	if self.to_bgr255:
	image = image[[2, 1, 0]] * 255
	image = F.normalize(image, mean=self.mean, std=self.std)
	return image, target


	class RandomBrightness(object):
	def __init__(self, prob=0.5):
	self.prob = prob

	def __call__(self, image, target):
	if random.random() < self.prob:
	brightness_factor = random.uniform(0.5, 2)
	image = F.adjust_brightness(image, brightness_factor)
	return image, target


	class RandomContrast(object):
	def __init__(self, prob=0.5):
	self.prob = prob

	def __call__(self, image, target):
	if random.random() < self.prob:
	contrast_factor = random.uniform(0.5, 2)
	image = F.adjust_contrast(image, contrast_factor)
	return image, target


	class RandomHue(object):
	def __init__(self, prob=0.5):
	self.prob = prob

	def __call__(self, image, target):
	if random.random() < self.prob:
	hue_factor = random.uniform(-0.25, 0.25)
	image = F.adjust_hue(image, hue_factor)
	return image, target


	class RandomSaturation(object):
	def __init__(self, prob=0.5):
	self.prob = prob

	def __call__(self, image, target):
	if random.random() < self.prob:
	saturation_factor = random.uniform(0.5, 2)
	image = F.adjust_saturation(image, saturation_factor)
	return image, target


	class RandomGamma(object):
	def __init__(self, prob=0.5):
	self.prob = prob

	def __call__(self, image, target):
	if random.random() < self.prob:
	gamma_factor = random.uniform(0.5, 2)
	image = F.adjust_gamma(image, gamma_factor)
	return image, target


	class RandomRotate(object):
	def __init__(self, prob, max_theta=30, fix_rotate=False):
	self.prob = prob
	self.max_theta = max_theta
	self.fix_rotate = fix_rotate

	def __call__(self, image, target):
	if random.random() < self.prob and target is not None:
	# try:
	if self.fix_rotate:
	delta = 30
	else:
	delta = random.uniform(-1 * self.max_theta, self.max_theta)
	width, height = image.size
	## get the minimal rect to cover the rotated image
	img_box = [[[0, 0], [width, 0], [width, height], [0, height]]]
	rotated_img_box = _quad2minrect(
	_rotate_polygons(img_box, delta, (width / 2, height / 2))
	)
	r_height = int(
	max(rotated_img_box[0][3], rotated_img_box[0][1])
	- min(rotated_img_box[0][3], rotated_img_box[0][1])
	)
	r_width = int(
	max(rotated_img_box[0][2], rotated_img_box[0][0])
	- min(rotated_img_box[0][2], rotated_img_box[0][0])
	)
	r_height = max(r_height, height + 1)
	r_width = max(r_width, width + 1)

	## padding im
	im_padding = np.zeros((r_height, r_width, 3))
	start_h, start_w = (
	int((r_height - height) / 2.0),
	int((r_width - width) / 2.0),
	)
	end_h, end_w = start_h + height, start_w + width
	im_padding[start_h:end_h, start_w:end_w, :] = image

	M = cv2.getRotationMatrix2D((r_width / 2, r_height / 2), delta, 1)
	im = cv2.warpAffine(im_padding, M, (r_width, r_height))
	im = Image.fromarray(im.astype(np.uint8))
	target = target.rotate(
	-delta, (r_width / 2, r_height / 2), start_h, start_w
	)
	return im, target
	# except:
	# return image, target
	else:
	return image, target


	def _quad2minrect(boxes):
	## trans a quad(N4) to a rectangle(N4) which has miniual area to cover it
	return np.hstack(
	(
	boxes[:, ::2].min(axis=1).reshape((-1, 1)),
	boxes[:, 1::2].min(axis=1).reshape((-1, 1)),
	boxes[:, ::2].max(axis=1).reshape((-1, 1)),
	boxes[:, 1::2].max(axis=1).reshape((-1, 1)),
	)
	)


	def _boxlist2quads(boxlist):
	res = np.zeros((len(boxlist), 8))
	for i, box in enumerate(boxlist):
	# print(box)
	res[i] = np.array(
	[
	box[0][0],
	box[0][1],
	box[1][0],
	box[1][1],
	box[2][0],
	box[2][1],
	box[3][0],
	box[3][1],
	]
	)
	return res


	def _rotate_polygons(polygons, angle, r_c):
	## polygons: N*8
	## r_x: rotate center x
	## r_y: rotate center y
	## angle: -15~15

	rotate_boxes_list = []
	for poly in polygons:
	box = Polygon(poly)
	rbox = affinity.rotate(box, angle, r_c)
	if len(list(rbox.exterior.coords)) < 5:
	print("img_box_ori:", poly)
	print("img_box_rotated:", rbox)
	# assert(len(list(rbox.exterior.coords))>=5)
	rotate_boxes_list.append(rbox.boundary.coords[:-1])
	res = _boxlist2quads(rotate_boxes_list)
	return res