File size: 3,493 Bytes
f71ac1d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
"""Random erasing data augmentation."""

import numpy as np

from vis4d.common.typing import NDArrayNumber
from vis4d.data.const import CommonKeys as K

from .base import Transform


@Transform(in_keys=K.images, out_keys=K.images)
class RandomErasing:
    """Randomly erase a rectangular region in an image tensor."""

    def __init__(
        self,
        min_area: float = 0.02,
        max_area: float = 0.4,
        min_aspect_ratio: float = 0.3,
        max_aspect_ratio: float = 1 / 0.3,
        mean: tuple[float, float, float] = (0.0, 0.0, 0.0),
        num_attempt: int = 10,
    ):
        """Creates an instance of RandomErasing.

        Recommended to use this transform after normalization. The erased
        region will be filled with the mean value. See
        `https://arxiv.org/abs/1708.04896`.

        Args:
            min_area (float, optional): Minimum area of the erased region.
                Defaults to 0.02.
            max_area (float, optional): Maximum area of the erased region.
                Defaults to 0.4.
            min_aspect_ratio (float, optional): Minimum aspect ratio of the
                erased region. Defaults to 0.3.
            max_aspect_ratio (float, optional): Maximum aspect ratio of the
                erased region. Defaults to 1 / 0.3.
            mean (tuple[float, float, float], optional): Mean of the dataset.
                Defaults to (0.0, 0.0, 0.0).
            num_attempt (int, optional): Number of maximum attempts to find a
                valid erased region. This is used to avoid infinite attempts of
                resampling the region, though such cases are very unlikely to
                happen. Defaults to 10.

        Returns:
            Callable: A function that takes a tensor of shape [N, H, W, C] and
                returns a tensor of the same shape.
        """
        self.min_area = min_area
        self.max_area = max_area
        self.min_aspect_ratio = min_aspect_ratio
        self.max_aspect_ratio = max_aspect_ratio
        self.mean = mean
        self.num_attempt = num_attempt

    def do_erasing(self, images: NDArrayNumber) -> NDArrayNumber:
        """Execute the random erasing."""
        fill = np.array(self.mean)
        for i in range(images.shape[0]):
            image = images[i]
            h, w = image.shape[0:2]
            area = h * w

            for _ in range(self.num_attempt):
                target_area = (
                    np.random.uniform(self.min_area, self.max_area) * area
                )
                aspect_ratio = np.random.uniform(
                    self.min_aspect_ratio, self.max_aspect_ratio
                )
                h_erase = int(round(np.sqrt(target_area * aspect_ratio)))
                w_erase = int(round(np.sqrt(target_area / aspect_ratio)))
                if w_erase < w and h_erase < h:
                    x_erase = np.random.randint(0, w - w_erase)
                    y_erase = np.random.randint(0, h - h_erase)
                    image[
                        y_erase : y_erase + h_erase,
                        x_erase : x_erase + w_erase,
                        :,
                    ] = fill
                    break
        return images

    def __call__(
        self, images_list: list[NDArrayNumber]
    ) -> list[NDArrayNumber]:
        """Execute the transform."""
        for i, images in enumerate(images_list):
            images_list[i] = self.do_erasing(images)
        return images_list