File size: 9,404 Bytes
01a7a8b
 
 
 
 
 
 
 
b2214a3
01a7a8b
 
 
 
 
 
 
 
9a0312e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
01a7a8b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b2214a3
01a7a8b
 
 
9a0312e
 
01a7a8b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b2214a3
01a7a8b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9a0312e
 
01a7a8b
 
 
 
 
 
b2214a3
9a0312e
 
01a7a8b
 
 
 
 
 
 
 
9a0312e
 
 
 
 
01a7a8b
 
 
9a0312e
 
01a7a8b
 
 
 
 
 
9a0312e
 
01a7a8b
 
 
 
 
 
 
 
 
 
 
9a0312e
 
 
 
01a7a8b
9a0312e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
01a7a8b
 
 
9a0312e
 
01a7a8b
 
9a0312e
 
 
 
01a7a8b
 
 
 
 
 
 
 
 
 
9a0312e
 
01a7a8b
 
 
 
 
 
9a0312e
01a7a8b
 
9a0312e
01a7a8b
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
import os
from typing import List, Literal, Optional, Union

import cv2
import datasets as ds
import evaluate
import numpy as np
import numpy.typing as npt
from evaluate.utils.file_utils import add_start_docstrings
from PIL import Image
from PIL.Image import Image as PilImage

_DESCRIPTION = r"""\
Computes the non-flatness of regions that text elements are solely put on, referring to CGL-GAN.
"""

_KWARGS_DESCRIPTION = """\
Args:
    predictions (`list` of `list` of `float`): A list of lists of floats representing normalized `ltrb`-format bounding boxes.
    gold_labels (`list` of `list` of `int`): A list of lists of integers representing class labels.
    image_canvases (`list` of `str`): A list of file paths to canvas images (background images).
    canvas_width (`int`, *optional*): Width of the canvas in pixels. Can be provided at initialization or during computation.
    canvas_height (`int`, *optional*): Height of the canvas in pixels. Can be provided at initialization or during computation.
    text_label_index (`int`, *optional*, defaults to 1): The label index for text elements.
    decoration_label_index (`int`, *optional*, defaults to 3): The label index for decoration (underlay) elements.

Returns:
    float: The unreadability score measuring the non-flatness of regions where text elements are placed. Computed using gradient analysis (Sobel operator) on the canvas image. Lower values indicate better readability (text on flatter/cleaner backgrounds).

Examples:
    >>> import evaluate
    >>> metric = evaluate.load("creative-graphic-design/layout-unreadability")
    >>> predictions = [[[0.1, 0.1, 0.5, 0.3], [0.6, 0.6, 0.9, 0.8]]]
    >>> gold_labels = [[1, 2]]  # 1 is text, 2 is other element
    >>> image_canvases = ["/path/to/canvas.png"]
    >>> result = metric.compute(
    ...     predictions=predictions,
    ...     gold_labels=gold_labels,
    ...     image_canvases=image_canvases,
    ...     canvas_width=512,
    ...     canvas_height=512
    ... )
    >>> print(f"Unreadability score: {result:.4f}")
"""

_CITATION = """\
@inproceedings{hsu2023posterlayout,
  title={Posterlayout: A new benchmark and approach for content-aware visual-textual presentation layout},
  author={Hsu, Hsiao Yuan and He, Xiangteng and Peng, Yuxin and Kong, Hao and Zhang, Qing},
  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages={6018--6026},
  year={2023}
}
"""

ReqType = Literal["pil2cv", "cv2pil"]


@add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
class LayoutUnreadability(evaluate.Metric):
    def __init__(
        self,
        canvas_width: int | None = None,
        canvas_height: int | None = None,
        text_label_index: int = 1,
        decoration_label_index: int = 3,
        **kwargs,
    ) -> None:
        super().__init__(**kwargs)
        self.canvas_width = canvas_width
        self.canvas_height = canvas_height

        self.text_label_index = text_label_index
        self.decoration_label_index = decoration_label_index

    def _info(self) -> evaluate.EvaluationModuleInfo:
        return evaluate.MetricInfo(
            description=_DESCRIPTION,
            citation=_CITATION,
            inputs_description=_KWARGS_DESCRIPTION,
            features=ds.Features(
                {
                    "predictions": ds.Sequence(ds.Sequence(ds.Value("float64"))),
                    "gold_labels": ds.Sequence(ds.Sequence(ds.Value("int64"))),
                    "image_canvases": ds.Sequence(ds.Value("string")),
                }
            ),
            codebase_urls=[
                "https://github.com/PKU-ICST-MIPL/PosterLayout-CVPR2023/blob/main/eval.py#L144-L171"
            ],
        )

    def cvt_pilcv(
        self,
        img: Union[PilImage, npt.NDArray[np.float64]],
        req: ReqType = "pil2cv",
        color_code: Optional[int] = None,
    ) -> Union[PilImage, npt.NDArray[np.float64]]:
        if req == "pil2cv":
            assert isinstance(img, PilImage)
            color_code = color_code or cv2.COLOR_RGB2BGR
            return cv2.cvtColor(np.asarray(img), color_code)  # type: ignore
        elif req == "cv2pil":
            assert isinstance(img, np.ndarray)
            color_code = color_code or cv2.COLOR_BGR2RGB
            return Image.fromarray(cv2.cvtColor(img, color_code))
        else:
            raise ValueError("req should be 'pil2cv' or 'cv2pil'")

    def img_to_g_xy(self, img):
        img_cv_gs = self.cvt_pilcv(img, req="pil2cv", color_code=cv2.COLOR_RGB2GRAY)
        assert isinstance(img_cv_gs, np.ndarray)
        img_cv_gs = np.uint8(img_cv_gs)

        # Sobel(src, ddepth, dx, dy)
        grad_x = cv2.Sobel(img_cv_gs, -1, 1, 0)
        grad_y = cv2.Sobel(img_cv_gs, -1, 0, 1)
        grad_xy = ((grad_x**2 + grad_y**2) / 2) ** 0.5
        grad_xy = grad_xy / np.max(grad_xy) * 255
        img_g_xy = Image.fromarray(grad_xy).convert("L")
        return img_g_xy

    def load_image_canvas(
        self,
        filepath: Union[os.PathLike, List[os.PathLike]],
        canvas_width: int,
        canvas_height: int,
    ) -> npt.NDArray[np.float64]:
        if isinstance(filepath, list):
            assert len(filepath) == 1, filepath
            filepath = filepath[0]

        canvas_pil = Image.open(filepath)  # type: ignore
        canvas_pil = canvas_pil.convert("RGB")  # type: ignore
        if canvas_pil.size != (canvas_width, canvas_height):
            canvas_pil = canvas_pil.resize((canvas_width, canvas_height))  # type: ignore

        canvas_pil = self.img_to_g_xy(canvas_pil)
        assert isinstance(canvas_pil, PilImage)
        canvas_arr = np.array(canvas_pil) / 255.0

        return canvas_arr

    def get_rid_of_invalid(
        self,
        predictions: npt.NDArray[np.float64],
        gold_labels: npt.NDArray[np.int64],
        canvas_width: int,
        canvas_height: int,
    ) -> npt.NDArray[np.int64]:
        assert len(predictions) == len(gold_labels)

        w = canvas_width / 100
        h = canvas_height / 100

        for i, prediction in enumerate(predictions):
            for j, b in enumerate(prediction):
                xl, yl, xr, yr = b
                xl = max(0, xl)
                yl = max(0, yl)
                xr = min(canvas_width, xr)
                yr = min(canvas_height, yr)
                if abs((xr - xl) * (yr - yl)) < w * h * 10:
                    if gold_labels[i, j]:
                        gold_labels[i, j] = 0
        return gold_labels

    def _compute(
        self,
        *,
        predictions: Union[npt.NDArray[np.float64], List[List[float]]],
        gold_labels: Union[npt.NDArray[np.int64], List[int]],
        image_canvases: List[os.PathLike],
        canvas_width: int | None = None,
        canvas_height: int | None = None,
        text_label_index: int | None = None,
        decoration_label_index: int | None = None,
    ):
        # パラメータの優先順位処理
        canvas_width = canvas_width if canvas_width is not None else self.canvas_width
        canvas_height = (
            canvas_height if canvas_height is not None else self.canvas_height
        )
        text_label_index = (
            text_label_index if text_label_index is not None else self.text_label_index
        )
        decoration_label_index = (
            decoration_label_index
            if decoration_label_index is not None
            else self.decoration_label_index
        )

        if canvas_width is None or canvas_height is None:
            raise ValueError(
                "canvas_width and canvas_height must be provided either "
                "at initialization or during computation"
            )

        predictions = np.array(predictions)
        gold_labels = np.array(gold_labels)

        predictions[:, :, ::2] *= canvas_width
        predictions[:, :, 1::2] *= canvas_height

        gold_labels = self.get_rid_of_invalid(
            predictions=predictions,
            gold_labels=gold_labels,
            canvas_width=canvas_width,
            canvas_height=canvas_height,
        )
        score = 0.0

        assert len(predictions) == len(gold_labels) == len(image_canvases)
        num_predictions = len(predictions)
        it = zip(predictions, gold_labels, image_canvases)

        for prediction, gold_label, image_canvas in it:
            canvas_arr = self.load_image_canvas(
                image_canvas,
                canvas_width,
                canvas_height,
            )
            cal_mask = np.zeros_like(canvas_arr)

            prediction = np.array(prediction, dtype=int)
            gold_label = np.array(gold_label, dtype=int)

            is_text = (gold_label == text_label_index).reshape(-1)
            prediction_text = prediction[is_text]

            is_decoration = (gold_label == decoration_label_index).reshape(-1)
            prediction_deco = prediction[is_decoration]

            for mp in prediction_text:
                xl, yl, xr, yr = mp
                cal_mask[yl:yr, xl:xr] = 1
            for mp in prediction_deco:
                xl, yl, xr, yr = mp
                cal_mask[yl:yr, xl:xr] = 0

            total_area = np.sum(cal_mask)
            total_grad = np.sum(canvas_arr[cal_mask == 1])
            if total_area and total_grad:
                score += total_grad / total_area
        return score / num_predictions