MMOCR

Runtime error

App Files Files Community

MMOCR / mmocr /datasets /pipelines /transforms.py

tomofi

Add application file

2366e36 almost 4 years ago

raw

history blame contribute delete

37.5 kB

	# Copyright (c) OpenMMLab. All rights reserved.
	import math

	import cv2
	import mmcv
	import numpy as np
	import torchvision.transforms as transforms
	from mmdet.core import BitmapMasks, PolygonMasks
	from mmdet.datasets.builder import PIPELINES
	from mmdet.datasets.pipelines.transforms import Resize
	from PIL import Image
	from shapely.geometry import Polygon as plg

	import mmocr.core.evaluation.utils as eval_utils
	from mmocr.utils import check_argument


	@PIPELINES.register_module()
	class RandomCropInstances:
	"""Randomly crop images and make sure to contain text instances.

	Args:
	target_size (tuple or int): (height, width)
	positive_sample_ratio (float): The probability of sampling regions
	that go through positive regions.
	"""

	def __init__(
	self,
	target_size,
	instance_key,
	mask_type='inx0', # 'inx0' or 'union_all'
	positive_sample_ratio=5.0 / 8.0):

	assert mask_type in ['inx0', 'union_all']

	self.mask_type = mask_type
	self.instance_key = instance_key
	self.positive_sample_ratio = positive_sample_ratio
	self.target_size = target_size if (target_size is None or isinstance(
	target_size, tuple)) else (target_size, target_size)

	def sample_offset(self, img_gt, img_size):
	h, w = img_size
	t_h, t_w = self.target_size

	# target size is bigger than origin size
	t_h = t_h if t_h < h else h
	t_w = t_w if t_w < w else w
	if (img_gt is not None
	and np.random.random_sample() < self.positive_sample_ratio
	and np.max(img_gt) > 0):

	# make sure to crop the positive region

	# the minimum top left to crop positive region (h,w)
	tl = np.min(np.where(img_gt > 0), axis=1) - (t_h, t_w)
	tl[tl < 0] = 0
	# the maximum top left to crop positive region
	br = np.max(np.where(img_gt > 0), axis=1) - (t_h, t_w)
	br[br < 0] = 0
	# if br is too big so that crop the outside region of img
	br[0] = min(br[0], h - t_h)
	br[1] = min(br[1], w - t_w)
	#
	h = np.random.randint(tl[0], br[0]) if tl[0] < br[0] else 0
	w = np.random.randint(tl[1], br[1]) if tl[1] < br[1] else 0
	else:
	# make sure not to crop outside of img

	h = np.random.randint(0, h - t_h) if h - t_h > 0 else 0
	w = np.random.randint(0, w - t_w) if w - t_w > 0 else 0

	return (h, w)

	@staticmethod
	def crop_img(img, offset, target_size):
	h, w = img.shape[:2]
	br = np.min(
	np.stack((np.array(offset) + np.array(target_size), np.array(
	(h, w)))),
	axis=0)
	return img[offset[0]:br[0], offset[1]:br[1]], np.array(
	[offset[1], offset[0], br[1], br[0]])

	def crop_bboxes(self, bboxes, canvas_bbox):
	kept_bboxes = []
	kept_inx = []
	canvas_poly = eval_utils.box2polygon(canvas_bbox)
	tl = canvas_bbox[0:2]

	for idx, bbox in enumerate(bboxes):
	poly = eval_utils.box2polygon(bbox)
	area, inters = eval_utils.poly_intersection(
	poly, canvas_poly, return_poly=True)
	if area == 0:
	continue
	xmin, ymin, xmax, ymax = inters.bounds
	kept_bboxes += [
	np.array(
	[xmin - tl[0], ymin - tl[1], xmax - tl[0], ymax - tl[1]],
	dtype=np.float32)
	]
	kept_inx += [idx]

	if len(kept_inx) == 0:
	return np.array([]).astype(np.float32).reshape(0, 4), kept_inx

	return np.stack(kept_bboxes), kept_inx

	@staticmethod
	def generate_mask(gt_mask, type):

	if type == 'inx0':
	return gt_mask.masks[0]
	if type == 'union_all':
	mask = gt_mask.masks[0].copy()
	for idx in range(1, len(gt_mask.masks)):
	mask = np.logical_or(mask, gt_mask.masks[idx])
	return mask

	raise NotImplementedError

	def __call__(self, results):

	gt_mask = results[self.instance_key]
	mask = None
	if len(gt_mask.masks) > 0:
	mask = self.generate_mask(gt_mask, self.mask_type)
	results['crop_offset'] = self.sample_offset(mask,
	results['img'].shape[:2])

	# crop img. bbox = [x1,y1,x2,y2]
	img, bbox = self.crop_img(results['img'], results['crop_offset'],
	self.target_size)
	results['img'] = img
	img_shape = img.shape
	results['img_shape'] = img_shape

	# crop masks
	for key in results.get('mask_fields', []):
	results[key] = results[key].crop(bbox)

	# for mask rcnn
	for key in results.get('bbox_fields', []):
	results[key], kept_inx = self.crop_bboxes(results[key], bbox)
	if key == 'gt_bboxes':
	# ignore gt_labels accordingly
	if 'gt_labels' in results:
	ori_labels = results['gt_labels']
	ori_inst_num = len(ori_labels)
	results['gt_labels'] = [
	ori_labels[idx] for idx in range(ori_inst_num)
	if idx in kept_inx
	]
	# ignore g_masks accordingly
	if 'gt_masks' in results:
	ori_mask = results['gt_masks'].masks
	kept_mask = [
	ori_mask[idx] for idx in range(ori_inst_num)
	if idx in kept_inx
	]
	target_h, target_w = bbox[3] - bbox[1], bbox[2] - bbox[0]
	if len(kept_inx) > 0:
	kept_mask = np.stack(kept_mask)
	else:
	kept_mask = np.empty((0, target_h, target_w),
	dtype=np.float32)
	results['gt_masks'] = BitmapMasks(kept_mask, target_h,
	target_w)

	return results

	def __repr__(self):
	repr_str = self.__class__.__name__
	return repr_str


	@PIPELINES.register_module()
	class RandomRotateTextDet:
	"""Randomly rotate images."""

	def __init__(self, rotate_ratio=1.0, max_angle=10):
	self.rotate_ratio = rotate_ratio
	self.max_angle = max_angle

	@staticmethod
	def sample_angle(max_angle):
	angle = np.random.random_sample() * 2 * max_angle - max_angle
	return angle

	@staticmethod
	def rotate_img(img, angle):
	h, w = img.shape[:2]
	rotation_matrix = cv2.getRotationMatrix2D((w / 2, h / 2), angle, 1)
	img_target = cv2.warpAffine(
	img, rotation_matrix, (w, h), flags=cv2.INTER_NEAREST)
	assert img_target.shape == img.shape
	return img_target

	def __call__(self, results):
	if np.random.random_sample() < self.rotate_ratio:
	# rotate imgs
	results['rotated_angle'] = self.sample_angle(self.max_angle)
	img = self.rotate_img(results['img'], results['rotated_angle'])
	results['img'] = img
	img_shape = img.shape
	results['img_shape'] = img_shape

	# rotate masks
	for key in results.get('mask_fields', []):
	masks = results[key].masks
	mask_list = []
	for m in masks:
	rotated_m = self.rotate_img(m, results['rotated_angle'])
	mask_list.append(rotated_m)
	results[key] = BitmapMasks(mask_list, *(img_shape[:2]))

	return results

	def __repr__(self):
	repr_str = self.__class__.__name__
	return repr_str


	@PIPELINES.register_module()
	class ColorJitter:
	"""An interface for torch color jitter so that it can be invoked in
	mmdetection pipeline."""

	def __init__(self, **kwargs):
	self.transform = transforms.ColorJitter(**kwargs)

	def __call__(self, results):
	# img is bgr
	img = results['img'][..., ::-1]
	img = Image.fromarray(img)
	img = self.transform(img)
	img = np.asarray(img)
	img = img[..., ::-1]
	results['img'] = img
	return results

	def __repr__(self):
	repr_str = self.__class__.__name__
	return repr_str


	@PIPELINES.register_module()
	class ScaleAspectJitter(Resize):
	"""Resize image and segmentation mask encoded by coordinates.

	Allowed resize types are `around_min_img_scale`, `long_short_bound`, and
	`indep_sample_in_range`.
	"""

	def __init__(self,
	img_scale=None,
	multiscale_mode='range',
	ratio_range=None,
	keep_ratio=False,
	resize_type='around_min_img_scale',
	aspect_ratio_range=None,
	long_size_bound=None,
	short_size_bound=None,
	scale_range=None):
	super().__init__(
	img_scale=img_scale,
	multiscale_mode=multiscale_mode,
	ratio_range=ratio_range,
	keep_ratio=keep_ratio)
	assert not keep_ratio
	assert resize_type in [
	'around_min_img_scale', 'long_short_bound', 'indep_sample_in_range'
	]
	self.resize_type = resize_type

	if resize_type == 'indep_sample_in_range':
	assert ratio_range is None
	assert aspect_ratio_range is None
	assert short_size_bound is None
	assert long_size_bound is None
	assert scale_range is not None
	else:
	assert scale_range is None
	assert isinstance(ratio_range, tuple)
	assert isinstance(aspect_ratio_range, tuple)
	assert check_argument.equal_len(ratio_range, aspect_ratio_range)

	if resize_type in ['long_short_bound']:
	assert short_size_bound is not None
	assert long_size_bound is not None

	self.aspect_ratio_range = aspect_ratio_range
	self.long_size_bound = long_size_bound
	self.short_size_bound = short_size_bound
	self.scale_range = scale_range

	@staticmethod
	def sample_from_range(range):
	assert len(range) == 2
	min_value, max_value = min(range), max(range)
	value = np.random.random_sample() * (max_value - min_value) + min_value

	return value

	def _random_scale(self, results):

	if self.resize_type == 'indep_sample_in_range':
	w = self.sample_from_range(self.scale_range)
	h = self.sample_from_range(self.scale_range)
	results['scale'] = (int(w), int(h)) # (w,h)
	results['scale_idx'] = None
	return
	h, w = results['img'].shape[0:2]
	if self.resize_type == 'long_short_bound':
	scale1 = 1
	if max(h, w) > self.long_size_bound:
	scale1 = self.long_size_bound / max(h, w)
	scale2 = self.sample_from_range(self.ratio_range)
	scale = scale1 * scale2
	if min(h, w) * scale <= self.short_size_bound:
	scale = (self.short_size_bound + 10) * 1.0 / min(h, w)
	elif self.resize_type == 'around_min_img_scale':
	short_size = min(self.img_scale[0])
	ratio = self.sample_from_range(self.ratio_range)
	scale = (ratio * short_size) / min(h, w)
	else:
	raise NotImplementedError

	aspect = self.sample_from_range(self.aspect_ratio_range)
	h_scale = scale * math.sqrt(aspect)
	w_scale = scale / math.sqrt(aspect)
	results['scale'] = (int(w * w_scale), int(h * h_scale)) # (w,h)
	results['scale_idx'] = None


	@PIPELINES.register_module()
	class AffineJitter:
	"""An interface for torchvision random affine so that it can be invoked in
	mmdet pipeline."""

	def __init__(self,
	degrees=4,
	translate=(0.02, 0.04),
	scale=(0.9, 1.1),
	shear=None,
	resample=False,
	fillcolor=0):
	self.transform = transforms.RandomAffine(
	degrees=degrees,
	translate=translate,
	scale=scale,
	shear=shear,
	resample=resample,
	fillcolor=fillcolor)

	def __call__(self, results):
	# img is bgr
	img = results['img'][..., ::-1]
	img = Image.fromarray(img)
	img = self.transform(img)
	img = np.asarray(img)
	img = img[..., ::-1]
	results['img'] = img
	return results

	def __repr__(self):
	repr_str = self.__class__.__name__
	return repr_str


	@PIPELINES.register_module()
	class RandomCropPolyInstances:
	"""Randomly crop images and make sure to contain at least one intact
	instance."""

	def __init__(self,
	instance_key='gt_masks',
	crop_ratio=5.0 / 8.0,
	min_side_ratio=0.4):
	super().__init__()
	self.instance_key = instance_key
	self.crop_ratio = crop_ratio
	self.min_side_ratio = min_side_ratio

	def sample_valid_start_end(self, valid_array, min_len, max_start, min_end):

	assert isinstance(min_len, int)
	assert len(valid_array) > min_len

	start_array = valid_array.copy()
	max_start = min(len(start_array) - min_len, max_start)
	start_array[max_start:] = 0
	start_array[0] = 1
	diff_array = np.hstack([0, start_array]) - np.hstack([start_array, 0])
	region_starts = np.where(diff_array < 0)[0]
	region_ends = np.where(diff_array > 0)[0]
	region_ind = np.random.randint(0, len(region_starts))
	start = np.random.randint(region_starts[region_ind],
	region_ends[region_ind])

	end_array = valid_array.copy()
	min_end = max(start + min_len, min_end)
	end_array[:min_end] = 0
	end_array[-1] = 1
	diff_array = np.hstack([0, end_array]) - np.hstack([end_array, 0])
	region_starts = np.where(diff_array < 0)[0]
	region_ends = np.where(diff_array > 0)[0]
	region_ind = np.random.randint(0, len(region_starts))
	end = np.random.randint(region_starts[region_ind],
	region_ends[region_ind])
	return start, end

	def sample_crop_box(self, img_size, results):
	"""Generate crop box and make sure not to crop the polygon instances.

	Args:
	img_size (tuple(int)): The image size (h, w).
	results (dict): The results dict.
	"""

	assert isinstance(img_size, tuple)
	h, w = img_size[:2]

	key_masks = results[self.instance_key].masks
	x_valid_array = np.ones(w, dtype=np.int32)
	y_valid_array = np.ones(h, dtype=np.int32)

	selected_mask = key_masks[np.random.randint(0, len(key_masks))]
	selected_mask = selected_mask[0].reshape((-1, 2)).astype(np.int32)
	max_x_start = max(np.min(selected_mask[:, 0]) - 2, 0)
	min_x_end = min(np.max(selected_mask[:, 0]) + 3, w - 1)
	max_y_start = max(np.min(selected_mask[:, 1]) - 2, 0)
	min_y_end = min(np.max(selected_mask[:, 1]) + 3, h - 1)

	for key in results.get('mask_fields', []):
	if len(results[key].masks) == 0:
	continue
	masks = results[key].masks
	for mask in masks:
	assert len(mask) == 1
	mask = mask[0].reshape((-1, 2)).astype(np.int32)
	clip_x = np.clip(mask[:, 0], 0, w - 1)
	clip_y = np.clip(mask[:, 1], 0, h - 1)
	min_x, max_x = np.min(clip_x), np.max(clip_x)
	min_y, max_y = np.min(clip_y), np.max(clip_y)

	x_valid_array[min_x - 2:max_x + 3] = 0
	y_valid_array[min_y - 2:max_y + 3] = 0

	min_w = int(w * self.min_side_ratio)
	min_h = int(h * self.min_side_ratio)

	x1, x2 = self.sample_valid_start_end(x_valid_array, min_w, max_x_start,
	min_x_end)
	y1, y2 = self.sample_valid_start_end(y_valid_array, min_h, max_y_start,
	min_y_end)

	return np.array([x1, y1, x2, y2])

	def crop_img(self, img, bbox):
	assert img.ndim == 3
	h, w, _ = img.shape
	assert 0 <= bbox[1] < bbox[3] <= h
	assert 0 <= bbox[0] < bbox[2] <= w
	return img[bbox[1]:bbox[3], bbox[0]:bbox[2]]

	def __call__(self, results):
	if len(results[self.instance_key].masks) < 1:
	return results
	if np.random.random_sample() < self.crop_ratio:
	crop_box = self.sample_crop_box(results['img'].shape, results)
	results['crop_region'] = crop_box
	img = self.crop_img(results['img'], crop_box)
	results['img'] = img
	results['img_shape'] = img.shape

	# crop and filter masks
	x1, y1, x2, y2 = crop_box
	w = max(x2 - x1, 1)
	h = max(y2 - y1, 1)
	labels = results['gt_labels']
	valid_labels = []
	for key in results.get('mask_fields', []):
	if len(results[key].masks) == 0:
	continue
	results[key] = results[key].crop(crop_box)
	# filter out polygons beyond crop box.
	masks = results[key].masks
	valid_masks_list = []

	for ind, mask in enumerate(masks):
	assert len(mask) == 1
	polygon = mask[0].reshape((-1, 2))
	if (polygon[:, 0] >
	-4).all() and (polygon[:, 0] < w + 4).all() and (
	polygon[:, 1] > -4).all() and (polygon[:, 1] <
	h + 4).all():
	mask[0][::2] = np.clip(mask[0][::2], 0, w)
	mask[0][1::2] = np.clip(mask[0][1::2], 0, h)
	if key == self.instance_key:
	valid_labels.append(labels[ind])
	valid_masks_list.append(mask)

	results[key] = PolygonMasks(valid_masks_list, h, w)
	results['gt_labels'] = np.array(valid_labels)

	return results

	def __repr__(self):
	repr_str = self.__class__.__name__
	return repr_str


	@PIPELINES.register_module()
	class RandomRotatePolyInstances:

	def __init__(self,
	rotate_ratio=0.5,
	max_angle=10,
	pad_with_fixed_color=False,
	pad_value=(0, 0, 0)):
	"""Randomly rotate images and polygon masks.

	Args:
	rotate_ratio (float): The ratio of samples to operate rotation.
	max_angle (int): The maximum rotation angle.
	pad_with_fixed_color (bool): The flag for whether to pad rotated
	image with fixed value. If set to False, the rotated image will
	be padded onto cropped image.
	pad_value (tuple(int)): The color value for padding rotated image.
	"""
	self.rotate_ratio = rotate_ratio
	self.max_angle = max_angle
	self.pad_with_fixed_color = pad_with_fixed_color
	self.pad_value = pad_value

	def rotate(self, center, points, theta, center_shift=(0, 0)):
	# rotate points.
	(center_x, center_y) = center
	center_y = -center_y
	x, y = points[::2], points[1::2]
	y = -y

	theta = theta / 180 * math.pi
	cos = math.cos(theta)
	sin = math.sin(theta)

	x = (x - center_x)
	y = (y - center_y)

	_x = center_x + x * cos - y * sin + center_shift[0]
	_y = -(center_y + x * sin + y * cos) + center_shift[1]

	points[::2], points[1::2] = _x, _y
	return points

	def cal_canvas_size(self, ori_size, degree):
	assert isinstance(ori_size, tuple)
	angle = degree * math.pi / 180.0
	h, w = ori_size[:2]

	cos = math.cos(angle)
	sin = math.sin(angle)
	canvas_h = int(w * math.fabs(sin) + h * math.fabs(cos))
	canvas_w = int(w * math.fabs(cos) + h * math.fabs(sin))

	canvas_size = (canvas_h, canvas_w)
	return canvas_size

	def sample_angle(self, max_angle):
	angle = np.random.random_sample() * 2 * max_angle - max_angle
	return angle

	def rotate_img(self, img, angle, canvas_size):
	h, w = img.shape[:2]
	rotation_matrix = cv2.getRotationMatrix2D((w / 2, h / 2), angle, 1)
	rotation_matrix[0, 2] += int((canvas_size[1] - w) / 2)
	rotation_matrix[1, 2] += int((canvas_size[0] - h) / 2)

	if self.pad_with_fixed_color:
	target_img = cv2.warpAffine(
	img,
	rotation_matrix, (canvas_size[1], canvas_size[0]),
	flags=cv2.INTER_NEAREST,
	borderValue=self.pad_value)
	else:
	mask = np.zeros_like(img)
	(h_ind, w_ind) = (np.random.randint(0, h * 7 // 8),
	np.random.randint(0, w * 7 // 8))
	img_cut = img[h_ind:(h_ind + h // 9), w_ind:(w_ind + w // 9)]
	img_cut = mmcv.imresize(img_cut, (canvas_size[1], canvas_size[0]))
	mask = cv2.warpAffine(
	mask,
	rotation_matrix, (canvas_size[1], canvas_size[0]),
	borderValue=[1, 1, 1])
	target_img = cv2.warpAffine(
	img,
	rotation_matrix, (canvas_size[1], canvas_size[0]),
	borderValue=[0, 0, 0])
	target_img = target_img + img_cut * mask

	return target_img

	def __call__(self, results):
	if np.random.random_sample() < self.rotate_ratio:
	img = results['img']
	h, w = img.shape[:2]
	angle = self.sample_angle(self.max_angle)
	canvas_size = self.cal_canvas_size((h, w), angle)
	center_shift = (int(
	(canvas_size[1] - w) / 2), int((canvas_size[0] - h) / 2))

	# rotate image
	results['rotated_poly_angle'] = angle
	img = self.rotate_img(img, angle, canvas_size)
	results['img'] = img
	img_shape = img.shape
	results['img_shape'] = img_shape

	# rotate polygons
	for key in results.get('mask_fields', []):
	if len(results[key].masks) == 0:
	continue
	masks = results[key].masks
	rotated_masks = []
	for mask in masks:
	rotated_mask = self.rotate((w / 2, h / 2), mask[0], angle,
	center_shift)
	rotated_masks.append([rotated_mask])

	results[key] = PolygonMasks(rotated_masks, *(img_shape[:2]))

	return results

	def __repr__(self):
	repr_str = self.__class__.__name__
	return repr_str


	@PIPELINES.register_module()
	class SquareResizePad:

	def __init__(self,
	target_size,
	pad_ratio=0.6,
	pad_with_fixed_color=False,
	pad_value=(0, 0, 0)):
	"""Resize or pad images to be square shape.

	Args:
	target_size (int): The target size of square shaped image.
	pad_with_fixed_color (bool): The flag for whether to pad rotated
	image with fixed value. If set to False, the rescales image will
	be padded onto cropped image.
	pad_value (tuple(int)): The color value for padding rotated image.
	"""
	assert isinstance(target_size, int)
	assert isinstance(pad_ratio, float)
	assert isinstance(pad_with_fixed_color, bool)
	assert isinstance(pad_value, tuple)

	self.target_size = target_size
	self.pad_ratio = pad_ratio
	self.pad_with_fixed_color = pad_with_fixed_color
	self.pad_value = pad_value

	def resize_img(self, img, keep_ratio=True):
	h, w, _ = img.shape
	if keep_ratio:
	t_h = self.target_size if h >= w else int(h * self.target_size / w)
	t_w = self.target_size if h <= w else int(w * self.target_size / h)
	else:
	t_h = t_w = self.target_size
	img = mmcv.imresize(img, (t_w, t_h))
	return img, (t_h, t_w)

	def square_pad(self, img):
	h, w = img.shape[:2]
	if h == w:
	return img, (0, 0)
	pad_size = max(h, w)
	if self.pad_with_fixed_color:
	expand_img = np.ones((pad_size, pad_size, 3), dtype=np.uint8)
	expand_img[:] = self.pad_value
	else:
	(h_ind, w_ind) = (np.random.randint(0, h * 7 // 8),
	np.random.randint(0, w * 7 // 8))
	img_cut = img[h_ind:(h_ind + h // 9), w_ind:(w_ind + w // 9)]
	expand_img = mmcv.imresize(img_cut, (pad_size, pad_size))
	if h > w:
	y0, x0 = 0, (h - w) // 2
	else:
	y0, x0 = (w - h) // 2, 0
	expand_img[y0:y0 + h, x0:x0 + w] = img
	offset = (x0, y0)

	return expand_img, offset

	def square_pad_mask(self, points, offset):
	x0, y0 = offset
	pad_points = points.copy()
	pad_points[::2] = pad_points[::2] + x0
	pad_points[1::2] = pad_points[1::2] + y0
	return pad_points

	def __call__(self, results):
	img = results['img']

	if np.random.random_sample() < self.pad_ratio:
	img, out_size = self.resize_img(img, keep_ratio=True)
	img, offset = self.square_pad(img)
	else:
	img, out_size = self.resize_img(img, keep_ratio=False)
	offset = (0, 0)

	results['img'] = img
	results['img_shape'] = img.shape

	for key in results.get('mask_fields', []):
	if len(results[key].masks) == 0:
	continue
	results[key] = results[key].resize(out_size)
	masks = results[key].masks
	processed_masks = []
	for mask in masks:
	square_pad_mask = self.square_pad_mask(mask[0], offset)
	processed_masks.append([square_pad_mask])

	results[key] = PolygonMasks(processed_masks, *(img.shape[:2]))

	return results

	def __repr__(self):
	repr_str = self.__class__.__name__
	return repr_str


	@PIPELINES.register_module()
	class RandomScaling:

	def __init__(self, size=800, scale=(3. / 4, 5. / 2)):
	"""Random scale the image while keeping aspect.

	Args:
	size (int) : Base size before scaling.
	scale (tuple(float)) : The range of scaling.
	"""
	assert isinstance(size, int)
	assert isinstance(scale, float) or isinstance(scale, tuple)
	self.size = size
	self.scale = scale if isinstance(scale, tuple) \
	else (1 - scale, 1 + scale)

	def __call__(self, results):
	image = results['img']
	h, w, _ = results['img_shape']

	aspect_ratio = np.random.uniform(min(self.scale), max(self.scale))
	scales = self.size * 1.0 / max(h, w) * aspect_ratio
	scales = np.array([scales, scales])
	out_size = (int(h * scales[1]), int(w * scales[0]))
	image = mmcv.imresize(image, out_size[::-1])

	results['img'] = image
	results['img_shape'] = image.shape

	for key in results.get('mask_fields', []):
	if len(results[key].masks) == 0:
	continue
	results[key] = results[key].resize(out_size)

	return results


	@PIPELINES.register_module()
	class RandomCropFlip:

	def __init__(self,
	pad_ratio=0.1,
	crop_ratio=0.5,
	iter_num=1,
	min_area_ratio=0.2):
	"""Random crop and flip a patch of the image.

	Args:
	crop_ratio (float): The ratio of cropping.
	iter_num (int): Number of operations.
	min_area_ratio (float): Minimal area ratio between cropped patch
	and original image.
	"""
	assert isinstance(crop_ratio, float)
	assert isinstance(iter_num, int)
	assert isinstance(min_area_ratio, float)

	self.pad_ratio = pad_ratio
	self.epsilon = 1e-2
	self.crop_ratio = crop_ratio
	self.iter_num = iter_num
	self.min_area_ratio = min_area_ratio

	def __call__(self, results):
	for i in range(self.iter_num):
	results = self.random_crop_flip(results)
	return results

	def random_crop_flip(self, results):
	image = results['img']
	polygons = results['gt_masks'].masks
	ignore_polygons = results['gt_masks_ignore'].masks
	all_polygons = polygons + ignore_polygons
	if len(polygons) == 0:
	return results

	if np.random.random() >= self.crop_ratio:
	return results

	h, w, _ = results['img_shape']
	area = h * w
	pad_h = int(h * self.pad_ratio)
	pad_w = int(w * self.pad_ratio)
	h_axis, w_axis = self.generate_crop_target(image, all_polygons, pad_h,
	pad_w)
	if len(h_axis) == 0 or len(w_axis) == 0:
	return results

	attempt = 0
	while attempt < 10:
	attempt += 1
	polys_keep = []
	polys_new = []
	ign_polys_keep = []
	ign_polys_new = []
	xx = np.random.choice(w_axis, size=2)
	xmin = np.min(xx) - pad_w
	xmax = np.max(xx) - pad_w
	xmin = np.clip(xmin, 0, w - 1)
	xmax = np.clip(xmax, 0, w - 1)
	yy = np.random.choice(h_axis, size=2)
	ymin = np.min(yy) - pad_h
	ymax = np.max(yy) - pad_h
	ymin = np.clip(ymin, 0, h - 1)
	ymax = np.clip(ymax, 0, h - 1)
	if (xmax - xmin) * (ymax - ymin) < area * self.min_area_ratio:
	# area too small
	continue

	pts = np.stack([[xmin, xmax, xmax, xmin],
	[ymin, ymin, ymax, ymax]]).T.astype(np.int32)
	pp = plg(pts)
	fail_flag = False
	for polygon in polygons:
	ppi = plg(polygon[0].reshape(-1, 2))
	ppiou = eval_utils.poly_intersection(ppi, pp)
	if np.abs(ppiou - float(ppi.area)) > self.epsilon and \
	np.abs(ppiou) > self.epsilon:
	fail_flag = True
	break
	elif np.abs(ppiou - float(ppi.area)) < self.epsilon:
	polys_new.append(polygon)
	else:
	polys_keep.append(polygon)

	for polygon in ignore_polygons:
	ppi = plg(polygon[0].reshape(-1, 2))
	ppiou = eval_utils.poly_intersection(ppi, pp)
	if np.abs(ppiou - float(ppi.area)) > self.epsilon and \
	np.abs(ppiou) > self.epsilon:
	fail_flag = True
	break
	elif np.abs(ppiou - float(ppi.area)) < self.epsilon:
	ign_polys_new.append(polygon)
	else:
	ign_polys_keep.append(polygon)

	if fail_flag:
	continue
	else:
	break

	cropped = image[ymin:ymax, xmin:xmax, :]
	select_type = np.random.randint(3)
	if select_type == 0:
	img = np.ascontiguousarray(cropped[:, ::-1])
	elif select_type == 1:
	img = np.ascontiguousarray(cropped[::-1, :])
	else:
	img = np.ascontiguousarray(cropped[::-1, ::-1])
	image[ymin:ymax, xmin:xmax, :] = img
	results['img'] = image

	if len(polys_new) + len(ign_polys_new) != 0:
	height, width, _ = cropped.shape
	if select_type == 0:
	for idx, polygon in enumerate(polys_new):
	poly = polygon[0].reshape(-1, 2)
	poly[:, 0] = width - poly[:, 0] + 2 * xmin
	polys_new[idx] = [poly.reshape(-1, )]
	for idx, polygon in enumerate(ign_polys_new):
	poly = polygon[0].reshape(-1, 2)
	poly[:, 0] = width - poly[:, 0] + 2 * xmin
	ign_polys_new[idx] = [poly.reshape(-1, )]
	elif select_type == 1:
	for idx, polygon in enumerate(polys_new):
	poly = polygon[0].reshape(-1, 2)
	poly[:, 1] = height - poly[:, 1] + 2 * ymin
	polys_new[idx] = [poly.reshape(-1, )]
	for idx, polygon in enumerate(ign_polys_new):
	poly = polygon[0].reshape(-1, 2)
	poly[:, 1] = height - poly[:, 1] + 2 * ymin
	ign_polys_new[idx] = [poly.reshape(-1, )]
	else:
	for idx, polygon in enumerate(polys_new):
	poly = polygon[0].reshape(-1, 2)
	poly[:, 0] = width - poly[:, 0] + 2 * xmin
	poly[:, 1] = height - poly[:, 1] + 2 * ymin
	polys_new[idx] = [poly.reshape(-1, )]
	for idx, polygon in enumerate(ign_polys_new):
	poly = polygon[0].reshape(-1, 2)
	poly[:, 0] = width - poly[:, 0] + 2 * xmin
	poly[:, 1] = height - poly[:, 1] + 2 * ymin
	ign_polys_new[idx] = [poly.reshape(-1, )]
	polygons = polys_keep + polys_new
	ignore_polygons = ign_polys_keep + ign_polys_new
	results['gt_masks'] = PolygonMasks(polygons, *(image.shape[:2]))
	results['gt_masks_ignore'] = PolygonMasks(ignore_polygons,
	*(image.shape[:2]))

	return results

	def generate_crop_target(self, image, all_polys, pad_h, pad_w):
	"""Generate crop target and make sure not to crop the polygon
	instances.

	Args:
	image (ndarray): The image waited to be crop.
	all_polys (list[list[ndarray]]): All polygons including ground
	truth polygons and ground truth ignored polygons.
	pad_h (int): Padding length of height.
	pad_w (int): Padding length of width.
	Returns:
	h_axis (ndarray): Vertical cropping range.
	w_axis (ndarray): Horizontal cropping range.
	"""
	h, w, _ = image.shape
	h_array = np.zeros((h + pad_h * 2), dtype=np.int32)
	w_array = np.zeros((w + pad_w * 2), dtype=np.int32)

	text_polys = []
	for polygon in all_polys:
	rect = cv2.minAreaRect(polygon[0].astype(np.int32).reshape(-1, 2))
	box = cv2.boxPoints(rect)
	box = np.int0(box)
	text_polys.append([box[0], box[1], box[2], box[3]])

	polys = np.array(text_polys, dtype=np.int32)
	for poly in polys:
	poly = np.round(poly, decimals=0).astype(np.int32)
	minx = np.min(poly[:, 0])
	maxx = np.max(poly[:, 0])
	w_array[minx + pad_w:maxx + pad_w] = 1
	miny = np.min(poly[:, 1])
	maxy = np.max(poly[:, 1])
	h_array[miny + pad_h:maxy + pad_h] = 1

	h_axis = np.where(h_array == 0)[0]
	w_axis = np.where(w_array == 0)[0]
	return h_axis, w_axis


	@PIPELINES.register_module()
	class PyramidRescale:
	"""Resize the image to the base shape, downsample it with gaussian pyramid,
	and rescale it back to original size.

	Adapted from https://github.com/FangShancheng/ABINet.

	Args:
	factor (int): The decay factor from base size, or the number of
	downsampling operations from the base layer.
	base_shape (tuple(int)): The shape of the base layer of the pyramid.
	randomize_factor (bool): If True, the final factor would be a random
	integer in [0, factor].

	:Required Keys:
	- \| ``img`` (ndarray): The input image.

	:Affected Keys:
	:Modified:
	- \| ``img`` (ndarray): The modified image.
	"""

	def __init__(self, factor=4, base_shape=(128, 512), randomize_factor=True):
	assert isinstance(factor, int)
	assert isinstance(base_shape, list) or isinstance(base_shape, tuple)
	assert len(base_shape) == 2
	assert isinstance(randomize_factor, bool)
	self.factor = factor if not randomize_factor else np.random.randint(
	0, factor + 1)
	self.base_w, self.base_h = base_shape

	def __call__(self, results):
	assert 'img' in results
	if self.factor == 0:
	return results
	img = results['img']
	src_h, src_w = img.shape[:2]
	scale_img = mmcv.imresize(img, (self.base_w, self.base_h))
	for _ in range(self.factor):
	scale_img = cv2.pyrDown(scale_img)
	scale_img = mmcv.imresize(scale_img, (src_w, src_h))
	results['img'] = scale_img
	return results

	def __repr__(self):
	repr_str = self.__class__.__name__
	repr_str += f'(factor={self.factor}, '
	repr_str += f'basew={self.basew}, baseh={self.baseh})'
	return repr_str