File size: 4,841 Bytes
77f8d5f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
"""
Paper: "UTRNet: High-Resolution Urdu Text Recognition In Printed Documents" presented at ICDAR 2023
Authors: Abdur Rahman, Arjun Ghosh, Chetan Arora
GitHub Repository: https://github.com/abdur75648/UTRNet-High-Resolution-Urdu-Text-Recognition
Project Website: https://abdur75648.github.io/UTRNet/
Copyright (c) 2023-present: This work is licensed under the Creative Commons Attribution-NonCommercial
4.0 International License (http://creativecommons.org/licenses/by-nc/4.0/)
"""
from functools import partial
import random as rnd
import imgaug.augmenters as iaa
import numpy as np
from PIL import ImageFilter, Image
from timm.data import auto_augment
_OP_CACHE = {}
def _get_op(key, factory):
try:
op = _OP_CACHE[key]
except KeyError:
op = factory()
_OP_CACHE[key] = op
return op
def _get_param(level, img, max_dim_factor, min_level=1):
max_level = max(min_level, max_dim_factor * max(img.size))
return round(min(level, max_level))
def gaussian_blur(img, radius, **__):
radius = _get_param(radius, img, 0.02)
key = 'gaussian_blur_' + str(radius)
op = _get_op(key, lambda: ImageFilter.GaussianBlur(radius))
return img.filter(op)
def motion_blur(img, k, **__):
k = _get_param(k, img, 0.08, 3) | 1 # bin to odd values
key = 'motion_blur_' + str(k)
op = _get_op(key, lambda: iaa.MotionBlur(k))
return Image.fromarray(op(image=np.asarray(img)))
def gaussian_noise(img, scale, **_):
scale = _get_param(scale, img, 0.25) | 1 # bin to odd values
key = 'gaussian_noise_' + str(scale)
op = _get_op(key, lambda: iaa.AdditiveGaussianNoise(scale=scale))
return Image.fromarray(op(image=np.asarray(img)))
def poisson_noise(img, lam, **_):
lam = _get_param(lam, img, 0.2) | 1 # bin to odd values
key = 'poisson_noise_' + str(lam)
op = _get_op(key, lambda: iaa.AdditivePoissonNoise(lam))
return Image.fromarray(op(image=np.asarray(img)))
def salt_and_pepper_noise(image, prob=0.05):
if prob <= 0:
return image
arr = np.asarray(image)
original_dtype = arr.dtype
intensity_levels = 2 ** (arr[0, 0].nbytes * 8)
min_intensity = 0
max_intensity = intensity_levels - 1
random_image_arr = np.random.choice([min_intensity, 1, np.nan], p=[prob / 2, 1 - prob, prob / 2], size=arr.shape)
salt_and_peppered_arr = arr.astype(np.float) * random_image_arr
salt_and_peppered_arr = np.nan_to_num(salt_and_peppered_arr, nan=max_intensity).astype(original_dtype)
return Image.fromarray(salt_and_peppered_arr)
def random_border_crop(image):
img_width,img_height = image.size
crop_left = int(img_width * rnd.uniform(0.0, 0.025))
crop_top = int(img_height * rnd.uniform(0.0, 0.075))
crop_right = int(img_width * rnd.uniform(0.975, 1.0))
crop_bottom = int(img_height * rnd.uniform(0.925, 1.0))
final_image = image.crop((crop_left, crop_top, crop_right, crop_bottom))
return final_image
def random_resize(image):
size = image.size
new_size = [rnd.randint(int(0.5*size[0]), int(1.5*size[0])), rnd.randint(int(0.5*size[1]), int(1.5*size[1]))]
reduce_factor = rnd.randint(1,4)
new_size = tuple([int(x/reduce_factor) for x in new_size])
final_image = image.resize(new_size)
return final_image
def _level_to_arg(level, _hparams, max):
level = max * level / auto_augment._LEVEL_DENOM
return level,
_RAND_TRANSFORMS = [
'AutoContrast',
'Equalize',
'Invert',
# 'Rotate',
'Posterize',
'Solarize',
'SolarizeAdd',
'Color',
'Contrast',
'Brightness',
'Sharpness',
'ShearX',
]
#_RAND_TRANSFORMS.remove('SharpnessIncreasing') # remove, interferes with *blur ops
_RAND_TRANSFORMS.extend([
'GaussianBlur',
'GaussianNoise',
'PoissonNoise'
])
auto_augment.LEVEL_TO_ARG.update({
'GaussianBlur': partial(_level_to_arg, max=4),
'MotionBlur': partial(_level_to_arg, max=20),
'GaussianNoise': partial(_level_to_arg, max=0.1 * 255),
'PoissonNoise': partial(_level_to_arg, max=40)
})
auto_augment.NAME_TO_OP.update({
'GaussianBlur': gaussian_blur,
'MotionBlur': motion_blur,
'GaussianNoise': gaussian_noise,
'PoissonNoise': poisson_noise
})
def rand_augment_transform(magnitude=5, num_layers=3):
# These are tuned for magnitude=5, which means that effective magnitudes are half of these values.
hparams = {
'img_mean':128,
# 'rotate_deg': 5,
'shear_x_pct': 0.9,
'shear_y_pct': 0.0,
}
ra_ops = auto_augment.rand_augment_ops(magnitude, hparams, transforms=_RAND_TRANSFORMS)
# Supply weights to disable replacement in random selection (i.e. avoid applying the same op twice)
choice_weights = [1. / len(ra_ops) for _ in range(len(ra_ops))]
return auto_augment.RandAugment(ra_ops, num_layers, choice_weights)
|