File size: 7,530 Bytes
47c6f77
 
 
 
 
 
 
e6ddeda
47c6f77
 
 
 
 
 
 
080abbb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47c6f77
 
 
 
 
 
 
 
 
 
 
 
 
e6ddeda
47c6f77
 
 
080abbb
 
47c6f77
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
080abbb
 
47c6f77
 
 
 
 
 
e6ddeda
47c6f77
080abbb
 
47c6f77
 
 
 
 
 
080abbb
 
 
 
 
47c6f77
 
 
080abbb
 
47c6f77
 
 
 
 
 
080abbb
 
47c6f77
 
 
 
 
 
 
 
 
 
 
 
080abbb
 
47c6f77
080abbb
 
 
 
 
 
 
 
 
 
 
 
47c6f77
 
 
080abbb
 
47c6f77
 
080abbb
 
 
 
47c6f77
 
e6ddeda
47c6f77
 
 
 
 
 
 
 
 
 
080abbb
 
47c6f77
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e6ddeda
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
import os
from typing import List, Union

import datasets as ds
import evaluate
import numpy as np
import numpy.typing as npt
from evaluate.utils.file_utils import add_start_docstrings
from PIL import Image

_DESCRIPTION = r"""\
Computes the utilization rate of space suitable for arranging elements, implemented by the negative image S' of the compounded saliency map S.
"""

_KWARGS_DESCRIPTION = """\
Args:
    predictions (`list` of `list` of `float`): A list of lists of floats representing normalized `ltrb`-format bounding boxes.
    gold_labels (`list` of `list` of `int`): A list of lists of integers representing class labels.
    saliency_maps_1 (`list` of `str`): A list of file paths to the first set of saliency maps (grayscale images).
    saliency_maps_2 (`list` of `str`): A list of file paths to the second set of saliency maps (grayscale images).
    canvas_width (`int`, *optional*): Width of the canvas in pixels. Can be provided at initialization or during computation.
    canvas_height (`int`, *optional*): Height of the canvas in pixels. Can be provided at initialization or during computation.

Returns:
    float: The utilization rate of space suitable for arranging elements. Computed as the ratio of elements placed in non-salient regions (the inverse of the saliency map). Higher values indicate better utilization of appropriate space.

Examples:
    >>> import evaluate
    >>> metric = evaluate.load("creative-graphic-design/layout-utility")
    >>> predictions = [[[0.1, 0.1, 0.3, 0.3], [0.6, 0.6, 0.9, 0.9]]]
    >>> gold_labels = [[1, 2]]
    >>> saliency_maps_1 = ["/path/to/saliency_map1.png"]
    >>> saliency_maps_2 = ["/path/to/saliency_map2.png"]
    >>> result = metric.compute(
    ...     predictions=predictions,
    ...     gold_labels=gold_labels,
    ...     saliency_maps_1=saliency_maps_1,
    ...     saliency_maps_2=saliency_maps_2,
    ...     canvas_width=512,
    ...     canvas_height=512
    ... )
    >>> print(f"Utility score: {result:.4f}")
"""

_CITATION = """\
@inproceedings{hsu2023posterlayout,
  title={Posterlayout: A new benchmark and approach for content-aware visual-textual presentation layout},
  author={Hsu, Hsiao Yuan and He, Xiangteng and Peng, Yuxin and Kong, Hao and Zhang, Qing},
  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages={6018--6026},
  year={2023}
}
"""


@add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
class LayoutUtility(evaluate.Metric):
    def __init__(
        self,
        canvas_width: int | None = None,
        canvas_height: int | None = None,
        **kwargs,
    ) -> None:
        super().__init__(**kwargs)
        self.canvas_width = canvas_width
        self.canvas_height = canvas_height

    def _info(self) -> evaluate.EvaluationModuleInfo:
        return evaluate.MetricInfo(
            description=_DESCRIPTION,
            citation=_CITATION,
            inputs_description=_KWARGS_DESCRIPTION,
            features=ds.Features(
                {
                    "predictions": ds.Sequence(ds.Sequence(ds.Value("float64"))),
                    "gold_labels": ds.Sequence(ds.Sequence(ds.Value("int64"))),
                    "saliency_maps_1": ds.Sequence(ds.Value("string")),
                    "saliency_maps_2": ds.Sequence(ds.Value("string")),
                }
            ),
            codebase_urls=[
                "https://github.com/PKU-ICST-MIPL/PosterLayout-CVPR2023/blob/main/eval.py#L144-L171"
            ],
        )

    def load_saliency_map(
        self,
        filepath: Union[os.PathLike, List[os.PathLike]],
        canvas_width: int,
        canvas_height: int,
    ) -> npt.NDArray[np.float64]:
        if isinstance(filepath, list):
            assert len(filepath) == 1, filepath
            filepath = filepath[0]

        map_pil = Image.open(filepath)  # type: ignore
        map_pil = map_pil.convert("L")  # type: ignore

        if map_pil.size != (canvas_width, canvas_height):
            map_pil = map_pil.resize((canvas_width, canvas_height))  # type: ignore

        map_arr = np.array(map_pil)
        map_arr = map_arr / 255.0
        return map_arr

    def get_rid_of_invalid(
        self,
        predictions: npt.NDArray[np.float64],
        gold_labels: npt.NDArray[np.int64],
        canvas_width: int,
        canvas_height: int,
    ) -> npt.NDArray[np.int64]:
        assert len(predictions) == len(gold_labels)

        w = canvas_width / 100
        h = canvas_height / 100

        for i, prediction in enumerate(predictions):
            for j, b in enumerate(prediction):
                xl, yl, xr, yr = b
                xl = max(0, xl)
                yl = max(0, yl)
                xr = min(canvas_width, xr)
                yr = min(canvas_height, yr)
                if abs((xr - xl) * (yr - yl)) < w * h * 10:
                    if gold_labels[i, j]:
                        gold_labels[i, j] = 0
        return gold_labels

    def _compute(
        self,
        *,
        predictions: Union[npt.NDArray[np.float64], List[List[float]]],
        gold_labels: Union[npt.NDArray[np.int64], List[int]],
        saliency_maps_1: List[os.PathLike],
        saliency_maps_2: List[os.PathLike],
        canvas_width: int | None = None,
        canvas_height: int | None = None,
    ) -> float:
        # パラメータの優先順位処理
        canvas_width = canvas_width if canvas_width is not None else self.canvas_width
        canvas_height = (
            canvas_height if canvas_height is not None else self.canvas_height
        )

        if canvas_width is None or canvas_height is None:
            raise ValueError(
                "canvas_width and canvas_height must be provided either "
                "at initialization or during computation"
            )

        predictions = np.array(predictions)
        gold_labels = np.array(gold_labels)

        predictions[:, :, ::2] *= canvas_width
        predictions[:, :, 1::2] *= canvas_height

        gold_labels = self.get_rid_of_invalid(
            predictions=predictions,
            gold_labels=gold_labels,
            canvas_width=canvas_width,
            canvas_height=canvas_height,
        )

        score = []

        assert (
            len(predictions)
            == len(gold_labels)
            == len(saliency_maps_1)
            == len(saliency_maps_2)
        )
        it = zip(predictions, gold_labels, saliency_maps_1, saliency_maps_2)

        for prediction, gold_label, smap_1, smap_2 in it:
            smap_arr_1 = self.load_saliency_map(smap_1, canvas_width, canvas_height)
            smap_arr_2 = self.load_saliency_map(smap_2, canvas_width, canvas_height)

            smap_arr = np.maximum(smap_arr_1, smap_arr_2)
            c_smap = np.ones_like(smap_arr) - smap_arr

            cal_mask = np.zeros_like(smap_arr)

            prediction = np.array(prediction, dtype=int)
            gold_label = np.array(gold_label, dtype=int)

            mask = (gold_label > 0).reshape(-1)
            mask_prediction = prediction[mask]

            for mp in mask_prediction:
                xl, yl, xr, yr = mp
                cal_mask[yl:yr, xl:xr] = 1

            total_not_sal = np.sum(c_smap)
            total_utils = np.sum(c_smap * cal_mask)

            if total_not_sal and total_utils:
                # score += total_utils / total_not_sal
                score.append(total_utils / total_not_sal)

        # return score / num_predictions
        return np.mean(score)