shunk031 commited on
Commit
1ca1180
·
verified ·
1 Parent(s): f95a9d3

Delete layout_average_iou.py

Browse files
Files changed (1) hide show
  1. layout_average_iou.py +0 -221
layout_average_iou.py DELETED
@@ -1,221 +0,0 @@
1
- from typing import Dict, List, Tuple, TypedDict
2
-
3
- import datasets as ds
4
- import evaluate
5
- import numpy as np
6
- import numpy.typing as npt
7
-
8
- _DESCRIPTION = """\
9
- Computes some average IoU metrics that are different to each other in previous works.
10
- """
11
-
12
- _KWARGS_DESCRIPTION = """\
13
- Args:
14
- layouts (`list` of `dict`): A list of dictionaries representing layouts including `list` of `bboxes` (float) and `list` of `categories` (int).
15
-
16
- Returns:
17
- dicrionaly: A set of average IoU scores.
18
-
19
- Examples:
20
-
21
- Example 1: Single processing
22
- >>> metric = evaluate.load("pytorch-layout-generation/layout-average-iou")
23
- >>> num_samples, num_categories = 24, 4
24
- >>> layout = {
25
- >>> "bboxes": np.random.rand(num_samples, num_categories),
26
- >>> "categories": np.random.randint(0, num_categories, size=(num_samples,)),
27
- >>> }
28
- >>> metric.add(layouts=layout)
29
- >>> print(metric.compute())
30
-
31
- Example 2: Batch processing
32
- >>> metric = evaluate.load("pytorch-layout-generation/layout-average-iou")
33
- >>> batch_size, num_samples, num_categories = 512, 24, 4
34
- >>> layouts = [
35
- >>> {
36
- >>> "bboxes": np.random.rand(num_samples, num_categories),
37
- >>> "categories": np.random.randint(0, num_categories, size=(num_samples,)),
38
- >>> }
39
- >>> for _ in range(batch_size)
40
- >>> ]
41
- >>> metric.add_batch(layouts=layouts)
42
- >>> print(metric.compute())
43
- """
44
-
45
- _CITATION = """\
46
- @inproceedings{arroyo2021variational,
47
- title={Variational transformer networks for layout generation},
48
- author={Arroyo, Diego Martin and Postels, Janis and Tombari, Federico},
49
- booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
50
- pages={13642--13652},
51
- year={2021}
52
- }
53
-
54
- @inproceedings{kong2022blt,
55
- title={BLT: bidirectional layout transformer for controllable layout generation},
56
- author={Kong, Xiang and Jiang, Lu and Chang, Huiwen and Zhang, Han and Hao, Yuan and Gong, Haifeng and Essa, Irfan},
57
- booktitle={European Conference on Computer Vision},
58
- pages={474--490},
59
- year={2022},
60
- organization={Springer}
61
- }
62
- """
63
-
64
-
65
- def convert_xywh_to_ltrb(
66
- batch_bbox: npt.NDArray[np.float64],
67
- ) -> Tuple[
68
- npt.NDArray[np.float64],
69
- npt.NDArray[np.float64],
70
- npt.NDArray[np.float64],
71
- npt.NDArray[np.float64],
72
- ]:
73
- xc, yc, w, h = batch_bbox
74
- x1 = xc - w / 2
75
- y1 = yc - h / 2
76
- x2 = xc + w / 2
77
- y2 = yc + h / 2
78
- return (x1, y1, x2, y2)
79
-
80
-
81
- class Layout(TypedDict):
82
- bboxes: npt.NDArray[np.float64]
83
- categories: npt.NDArray[np.int64]
84
-
85
-
86
- def compute_iou(
87
- bbox1: npt.NDArray[np.float64],
88
- bbox2: npt.NDArray[np.float64],
89
- generalized: bool = False,
90
- ) -> npt.NDArray[np.float64]:
91
- # shape: bbox1 (N, 4), bbox2 (N, 4)
92
- assert bbox1.shape[0] == bbox2.shape[0]
93
- assert bbox1.shape[1] == bbox1.shape[1] == 4
94
-
95
- l1, t1, r1, b1 = convert_xywh_to_ltrb(bbox1.T)
96
- l2, t2, r2, b2 = convert_xywh_to_ltrb(bbox2.T)
97
- a1, a2 = (r1 - l1) * (b1 - t1), (r2 - l2) * (b2 - t2)
98
-
99
- # intersection
100
- l_max = np.maximum(l1, l2)
101
- r_min = np.minimum(r1, r2)
102
- t_max = np.maximum(t1, t2)
103
- b_min = np.minimum(b1, b2)
104
- cond = (l_max < r_min) & (t_max < b_min)
105
- ai = np.where(cond, (r_min - l_max) * (b_min - t_max), np.zeros_like(a1[0]))
106
-
107
- au = a1 + a2 - ai
108
- iou = ai / au
109
-
110
- if not generalized:
111
- return iou
112
-
113
- # outer region
114
- l_min = np.minimum(l1, l2)
115
- r_max = np.maximum(r1, r2)
116
- t_min = np.minimum(t1, t2)
117
- b_max = np.maximum(b1, b2)
118
- ac = (r_max - l_min) * (b_max - t_min)
119
-
120
- giou = iou - (ac - au) / ac
121
-
122
- return giou
123
-
124
-
125
- def compute_perceptual_iou(
126
- bbox1: npt.NDArray[np.float64],
127
- bbox2: npt.NDArray[np.float64],
128
- N: int = 32,
129
- ) -> npt.NDArray[np.float64]:
130
- """
131
- Computes 'Perceptual' IoU [Kong+, BLT'22]
132
- """
133
-
134
- # shape: bbox1 (N, 4), bbox2 (N, 4)
135
- assert bbox1.shape[0] == bbox2.shape[0]
136
- assert bbox1.shape[1] == bbox1.shape[1] == 4
137
-
138
- l1, t1, r1, b1 = convert_xywh_to_ltrb(bbox1.T)
139
- l2, t2, r2, b2 = convert_xywh_to_ltrb(bbox2.T)
140
- a1 = (r1 - l1) * (b1 - t1)
141
-
142
- # intersection
143
- l_max = np.maximum(l1, l2)
144
- r_min = np.minimum(r1, r2)
145
- t_max = np.maximum(t1, t2)
146
- b_min = np.minimum(b1, b2)
147
- cond = (l_max < r_min) & (t_max < b_min)
148
- ai = np.where(cond, (r_min - l_max) * (b_min - t_max), np.zeros_like(a1[0]))
149
-
150
- unique_box_1 = np.unique(bbox1, axis=0)
151
-
152
- l1, t1, r1, b1 = [
153
- (x * N).round().astype(np.int32).clip(0, N)
154
- for x in convert_xywh_to_ltrb(unique_box_1.T)
155
- ]
156
- canvas = np.zeros((N, N))
157
- for left, top, right, bottom in zip(l1, t1, r1, b1):
158
- canvas[top:bottom, left:right] = 1
159
- global_area_union = canvas.sum() / (N**2)
160
-
161
- return ai / global_area_union if global_area_union > 0.0 else np.zeros((1,))
162
-
163
-
164
- def compute_average_iou(layout: Layout, perceptual: bool) -> float:
165
- bboxes = np.asarray(layout["bboxes"])
166
-
167
- N = len(bboxes)
168
- if N in [0, 1]:
169
- return 0.0 # no overlap in principle
170
-
171
- ii, jj = np.meshgrid(range(N), range(N))
172
- ii, jj = ii.flatten(), jj.flatten()
173
- is_non_diag = ii != jj # IoU for diag is always 1.0
174
- ii, jj = ii[is_non_diag], jj[is_non_diag]
175
-
176
- iou = (
177
- compute_perceptual_iou(bboxes[ii], bboxes[jj])
178
- if perceptual
179
- else compute_iou(bboxes[ii], bboxes[jj])
180
- )
181
- # pick all pairs of overlapped objects
182
- cond = iou > np.finfo(np.float32).eps # to avoid very-small nonzero
183
- score = iou[cond].mean().item() if len(iou[cond]) > 0 else 0.0
184
-
185
- return score
186
-
187
-
188
- class LayoutAverageIoU(evaluate.Metric):
189
- def _info(self) -> evaluate.EvaluationModuleInfo:
190
- return evaluate.EvaluationModuleInfo(
191
- description=_DESCRIPTION,
192
- citation=_CITATION,
193
- inputs_description=_KWARGS_DESCRIPTION,
194
- features=ds.Features(
195
- {
196
- "layouts": {
197
- "bboxes": ds.Sequence(ds.Sequence((ds.Value("float64")))),
198
- "categories": ds.Sequence(ds.Value("int64")),
199
- }
200
- }
201
- ),
202
- codebase_urls=[
203
- "https://github.com/CyberAgentAILab/layout-dm/blob/main/src/trainer/trainer/helpers/metric.py#L399-L431",
204
- ],
205
- )
206
-
207
- def _compute(self, *, layouts: List[Layout]) -> Dict[str, float]:
208
- scores_blt = [
209
- compute_average_iou(layout, perceptual=True) for layout in layouts
210
- ]
211
- scores_vnt = [
212
- compute_average_iou(layout, perceptual=False) for layout in layouts
213
- ]
214
- score_blt = np.mean(scores_blt).item()
215
- score_vnt = np.mean(scores_vnt).item()
216
-
217
- results = {
218
- "average-iou_BLT": score_blt,
219
- "average-iou_VTN": score_vnt,
220
- }
221
- return results