Spaces:

adirathor07
/

AutoEval

Sleeping

App Files Files Community

AutoEval / doctr /transforms /functional /pytorch.py

adirathor07

added doctr folder

153628e over 1 year ago

raw

history blame contribute delete

5.01 kB

	# Copyright (C) 2021-2024, Mindee.

	# This program is licensed under the Apache License 2.0.
	# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.

	from copy import deepcopy
	from typing import Tuple

	import numpy as np
	import torch
	from torchvision.transforms import functional as F

	from doctr.utils.geometry import rotate_abs_geoms

	from .base import create_shadow_mask, crop_boxes

	__all__ = ["invert_colors", "rotate_sample", "crop_detection", "random_shadow"]


	def invert_colors(img: torch.Tensor, min_val: float = 0.6) -> torch.Tensor:
	"""Invert the colors of an image

	Args:
	----
	img : torch.Tensor, the image to invert
	min_val : minimum value of the random shift

	Returns:
	-------
	the inverted image
	"""
	out = F.rgb_to_grayscale(img, num_output_channels=3)
	# Random RGB shift
	shift_shape = [img.shape[0], 3, 1, 1] if img.ndim == 4 else [3, 1, 1]
	rgb_shift = min_val + (1 - min_val) * torch.rand(shift_shape)
	# Inverse the color
	if out.dtype == torch.uint8:
	out = (out.to(dtype=rgb_shift.dtype) * rgb_shift).to(dtype=torch.uint8)
	else:
	out = out * rgb_shift.to(dtype=out.dtype)
	# Inverse the color
	out = 255 - out if out.dtype == torch.uint8 else 1 - out
	return out


	def rotate_sample(
	img: torch.Tensor,
	geoms: np.ndarray,
	angle: float,
	expand: bool = False,
	) -> Tuple[torch.Tensor, np.ndarray]:
	"""Rotate image around the center, interpolation=NEAREST, pad with 0 (black)

	Args:
	----
	img: image to rotate
	geoms: array of geometries of shape (N, 4) or (N, 4, 2)
	angle: angle in degrees. +: counter-clockwise, -: clockwise
	expand: whether the image should be padded before the rotation

	Returns:
	-------
	A tuple of rotated img (tensor), rotated geometries of shape (N, 4, 2)
	"""
	rotated_img = F.rotate(img, angle=angle, fill=0, expand=expand) # Interpolation NEAREST by default
	rotated_img = rotated_img[:3] # when expand=True, it expands to RGBA channels
	# Get absolute coords
	_geoms = deepcopy(geoms)
	if _geoms.shape[1:] == (4,):
	if np.max(_geoms) <= 1:
	_geoms[:, [0, 2]] *= img.shape[-1]
	_geoms[:, [1, 3]] *= img.shape[-2]
	elif _geoms.shape[1:] == (4, 2):
	if np.max(_geoms) <= 1:
	_geoms[..., 0] *= img.shape[-1]
	_geoms[..., 1] *= img.shape[-2]
	else:
	raise AssertionError("invalid format for arg `geoms`")

	# Rotate the boxes: xmin, ymin, xmax, ymax or polygons --> (4, 2) polygon
	rotated_geoms: np.ndarray = rotate_abs_geoms(
	_geoms,
	angle,
	img.shape[1:], # type: ignore[arg-type]
	expand,
	).astype(np.float32)

	# Always return relative boxes to avoid label confusions when resizing is performed aferwards
	rotated_geoms[..., 0] = rotated_geoms[..., 0] / rotated_img.shape[2]
	rotated_geoms[..., 1] = rotated_geoms[..., 1] / rotated_img.shape[1]

	return rotated_img, np.clip(rotated_geoms, 0, 1)


	def crop_detection(
	img: torch.Tensor, boxes: np.ndarray, crop_box: Tuple[float, float, float, float]
	) -> Tuple[torch.Tensor, np.ndarray]:
	"""Crop and image and associated bboxes

	Args:
	----
	img: image to crop
	boxes: array of boxes to clip, absolute (int) or relative (float)
	crop_box: box (xmin, ymin, xmax, ymax) to crop the image. Relative coords.

	Returns:
	-------
	A tuple of cropped image, cropped boxes, where the image is not resized.
	"""
	if any(val < 0 or val > 1 for val in crop_box):
	raise AssertionError("coordinates of arg `crop_box` should be relative")
	h, w = img.shape[-2:]
	xmin, ymin = int(round(crop_box[0] * (w - 1))), int(round(crop_box[1] * (h - 1)))
	xmax, ymax = int(round(crop_box[2] * (w - 1))), int(round(crop_box[3] * (h - 1)))
	cropped_img = F.crop(img, ymin, xmin, ymax - ymin, xmax - xmin)
	# Crop the box
	boxes = crop_boxes(boxes, crop_box if boxes.max() <= 1 else (xmin, ymin, xmax, ymax))

	return cropped_img, boxes


	def random_shadow(img: torch.Tensor, opacity_range: Tuple[float, float], **kwargs) -> torch.Tensor:
	"""Crop and image and associated bboxes

	Args:
	----
	img: image to modify
	opacity_range: the minimum and maximum desired opacity of the shadow
	**kwargs: additional arguments to pass to `create_shadow_mask`

	Returns:
	-------
	shaded image
	"""
	shadow_mask = create_shadow_mask(img.shape[1:], **kwargs) # type: ignore[arg-type]

	opacity = np.random.uniform(*opacity_range)
	shadow_tensor = 1 - torch.from_numpy(shadow_mask[None, ...])

	# Add some blur to make it believable
	k = 7 + 2 * int(4 * np.random.rand(1))
	sigma = np.random.uniform(0.5, 5.0)
	shadow_tensor = F.gaussian_blur(shadow_tensor, k, sigma=[sigma, sigma])

	return opacity * shadow_tensor * img + (1 - opacity) * img