Delete layout_average_iou.py
Browse files- layout_average_iou.py +0 -221
layout_average_iou.py
DELETED
|
@@ -1,221 +0,0 @@
|
|
| 1 |
-
from typing import Dict, List, Tuple, TypedDict
|
| 2 |
-
|
| 3 |
-
import datasets as ds
|
| 4 |
-
import evaluate
|
| 5 |
-
import numpy as np
|
| 6 |
-
import numpy.typing as npt
|
| 7 |
-
|
| 8 |
-
_DESCRIPTION = """\
|
| 9 |
-
Computes some average IoU metrics that are different to each other in previous works.
|
| 10 |
-
"""
|
| 11 |
-
|
| 12 |
-
_KWARGS_DESCRIPTION = """\
|
| 13 |
-
Args:
|
| 14 |
-
layouts (`list` of `dict`): A list of dictionaries representing layouts including `list` of `bboxes` (float) and `list` of `categories` (int).
|
| 15 |
-
|
| 16 |
-
Returns:
|
| 17 |
-
dicrionaly: A set of average IoU scores.
|
| 18 |
-
|
| 19 |
-
Examples:
|
| 20 |
-
|
| 21 |
-
Example 1: Single processing
|
| 22 |
-
>>> metric = evaluate.load("pytorch-layout-generation/layout-average-iou")
|
| 23 |
-
>>> num_samples, num_categories = 24, 4
|
| 24 |
-
>>> layout = {
|
| 25 |
-
>>> "bboxes": np.random.rand(num_samples, num_categories),
|
| 26 |
-
>>> "categories": np.random.randint(0, num_categories, size=(num_samples,)),
|
| 27 |
-
>>> }
|
| 28 |
-
>>> metric.add(layouts=layout)
|
| 29 |
-
>>> print(metric.compute())
|
| 30 |
-
|
| 31 |
-
Example 2: Batch processing
|
| 32 |
-
>>> metric = evaluate.load("pytorch-layout-generation/layout-average-iou")
|
| 33 |
-
>>> batch_size, num_samples, num_categories = 512, 24, 4
|
| 34 |
-
>>> layouts = [
|
| 35 |
-
>>> {
|
| 36 |
-
>>> "bboxes": np.random.rand(num_samples, num_categories),
|
| 37 |
-
>>> "categories": np.random.randint(0, num_categories, size=(num_samples,)),
|
| 38 |
-
>>> }
|
| 39 |
-
>>> for _ in range(batch_size)
|
| 40 |
-
>>> ]
|
| 41 |
-
>>> metric.add_batch(layouts=layouts)
|
| 42 |
-
>>> print(metric.compute())
|
| 43 |
-
"""
|
| 44 |
-
|
| 45 |
-
_CITATION = """\
|
| 46 |
-
@inproceedings{arroyo2021variational,
|
| 47 |
-
title={Variational transformer networks for layout generation},
|
| 48 |
-
author={Arroyo, Diego Martin and Postels, Janis and Tombari, Federico},
|
| 49 |
-
booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
|
| 50 |
-
pages={13642--13652},
|
| 51 |
-
year={2021}
|
| 52 |
-
}
|
| 53 |
-
|
| 54 |
-
@inproceedings{kong2022blt,
|
| 55 |
-
title={BLT: bidirectional layout transformer for controllable layout generation},
|
| 56 |
-
author={Kong, Xiang and Jiang, Lu and Chang, Huiwen and Zhang, Han and Hao, Yuan and Gong, Haifeng and Essa, Irfan},
|
| 57 |
-
booktitle={European Conference on Computer Vision},
|
| 58 |
-
pages={474--490},
|
| 59 |
-
year={2022},
|
| 60 |
-
organization={Springer}
|
| 61 |
-
}
|
| 62 |
-
"""
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
def convert_xywh_to_ltrb(
|
| 66 |
-
batch_bbox: npt.NDArray[np.float64],
|
| 67 |
-
) -> Tuple[
|
| 68 |
-
npt.NDArray[np.float64],
|
| 69 |
-
npt.NDArray[np.float64],
|
| 70 |
-
npt.NDArray[np.float64],
|
| 71 |
-
npt.NDArray[np.float64],
|
| 72 |
-
]:
|
| 73 |
-
xc, yc, w, h = batch_bbox
|
| 74 |
-
x1 = xc - w / 2
|
| 75 |
-
y1 = yc - h / 2
|
| 76 |
-
x2 = xc + w / 2
|
| 77 |
-
y2 = yc + h / 2
|
| 78 |
-
return (x1, y1, x2, y2)
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
class Layout(TypedDict):
|
| 82 |
-
bboxes: npt.NDArray[np.float64]
|
| 83 |
-
categories: npt.NDArray[np.int64]
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
def compute_iou(
|
| 87 |
-
bbox1: npt.NDArray[np.float64],
|
| 88 |
-
bbox2: npt.NDArray[np.float64],
|
| 89 |
-
generalized: bool = False,
|
| 90 |
-
) -> npt.NDArray[np.float64]:
|
| 91 |
-
# shape: bbox1 (N, 4), bbox2 (N, 4)
|
| 92 |
-
assert bbox1.shape[0] == bbox2.shape[0]
|
| 93 |
-
assert bbox1.shape[1] == bbox1.shape[1] == 4
|
| 94 |
-
|
| 95 |
-
l1, t1, r1, b1 = convert_xywh_to_ltrb(bbox1.T)
|
| 96 |
-
l2, t2, r2, b2 = convert_xywh_to_ltrb(bbox2.T)
|
| 97 |
-
a1, a2 = (r1 - l1) * (b1 - t1), (r2 - l2) * (b2 - t2)
|
| 98 |
-
|
| 99 |
-
# intersection
|
| 100 |
-
l_max = np.maximum(l1, l2)
|
| 101 |
-
r_min = np.minimum(r1, r2)
|
| 102 |
-
t_max = np.maximum(t1, t2)
|
| 103 |
-
b_min = np.minimum(b1, b2)
|
| 104 |
-
cond = (l_max < r_min) & (t_max < b_min)
|
| 105 |
-
ai = np.where(cond, (r_min - l_max) * (b_min - t_max), np.zeros_like(a1[0]))
|
| 106 |
-
|
| 107 |
-
au = a1 + a2 - ai
|
| 108 |
-
iou = ai / au
|
| 109 |
-
|
| 110 |
-
if not generalized:
|
| 111 |
-
return iou
|
| 112 |
-
|
| 113 |
-
# outer region
|
| 114 |
-
l_min = np.minimum(l1, l2)
|
| 115 |
-
r_max = np.maximum(r1, r2)
|
| 116 |
-
t_min = np.minimum(t1, t2)
|
| 117 |
-
b_max = np.maximum(b1, b2)
|
| 118 |
-
ac = (r_max - l_min) * (b_max - t_min)
|
| 119 |
-
|
| 120 |
-
giou = iou - (ac - au) / ac
|
| 121 |
-
|
| 122 |
-
return giou
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
def compute_perceptual_iou(
|
| 126 |
-
bbox1: npt.NDArray[np.float64],
|
| 127 |
-
bbox2: npt.NDArray[np.float64],
|
| 128 |
-
N: int = 32,
|
| 129 |
-
) -> npt.NDArray[np.float64]:
|
| 130 |
-
"""
|
| 131 |
-
Computes 'Perceptual' IoU [Kong+, BLT'22]
|
| 132 |
-
"""
|
| 133 |
-
|
| 134 |
-
# shape: bbox1 (N, 4), bbox2 (N, 4)
|
| 135 |
-
assert bbox1.shape[0] == bbox2.shape[0]
|
| 136 |
-
assert bbox1.shape[1] == bbox1.shape[1] == 4
|
| 137 |
-
|
| 138 |
-
l1, t1, r1, b1 = convert_xywh_to_ltrb(bbox1.T)
|
| 139 |
-
l2, t2, r2, b2 = convert_xywh_to_ltrb(bbox2.T)
|
| 140 |
-
a1 = (r1 - l1) * (b1 - t1)
|
| 141 |
-
|
| 142 |
-
# intersection
|
| 143 |
-
l_max = np.maximum(l1, l2)
|
| 144 |
-
r_min = np.minimum(r1, r2)
|
| 145 |
-
t_max = np.maximum(t1, t2)
|
| 146 |
-
b_min = np.minimum(b1, b2)
|
| 147 |
-
cond = (l_max < r_min) & (t_max < b_min)
|
| 148 |
-
ai = np.where(cond, (r_min - l_max) * (b_min - t_max), np.zeros_like(a1[0]))
|
| 149 |
-
|
| 150 |
-
unique_box_1 = np.unique(bbox1, axis=0)
|
| 151 |
-
|
| 152 |
-
l1, t1, r1, b1 = [
|
| 153 |
-
(x * N).round().astype(np.int32).clip(0, N)
|
| 154 |
-
for x in convert_xywh_to_ltrb(unique_box_1.T)
|
| 155 |
-
]
|
| 156 |
-
canvas = np.zeros((N, N))
|
| 157 |
-
for left, top, right, bottom in zip(l1, t1, r1, b1):
|
| 158 |
-
canvas[top:bottom, left:right] = 1
|
| 159 |
-
global_area_union = canvas.sum() / (N**2)
|
| 160 |
-
|
| 161 |
-
return ai / global_area_union if global_area_union > 0.0 else np.zeros((1,))
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
def compute_average_iou(layout: Layout, perceptual: bool) -> float:
|
| 165 |
-
bboxes = np.asarray(layout["bboxes"])
|
| 166 |
-
|
| 167 |
-
N = len(bboxes)
|
| 168 |
-
if N in [0, 1]:
|
| 169 |
-
return 0.0 # no overlap in principle
|
| 170 |
-
|
| 171 |
-
ii, jj = np.meshgrid(range(N), range(N))
|
| 172 |
-
ii, jj = ii.flatten(), jj.flatten()
|
| 173 |
-
is_non_diag = ii != jj # IoU for diag is always 1.0
|
| 174 |
-
ii, jj = ii[is_non_diag], jj[is_non_diag]
|
| 175 |
-
|
| 176 |
-
iou = (
|
| 177 |
-
compute_perceptual_iou(bboxes[ii], bboxes[jj])
|
| 178 |
-
if perceptual
|
| 179 |
-
else compute_iou(bboxes[ii], bboxes[jj])
|
| 180 |
-
)
|
| 181 |
-
# pick all pairs of overlapped objects
|
| 182 |
-
cond = iou > np.finfo(np.float32).eps # to avoid very-small nonzero
|
| 183 |
-
score = iou[cond].mean().item() if len(iou[cond]) > 0 else 0.0
|
| 184 |
-
|
| 185 |
-
return score
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
class LayoutAverageIoU(evaluate.Metric):
|
| 189 |
-
def _info(self) -> evaluate.EvaluationModuleInfo:
|
| 190 |
-
return evaluate.EvaluationModuleInfo(
|
| 191 |
-
description=_DESCRIPTION,
|
| 192 |
-
citation=_CITATION,
|
| 193 |
-
inputs_description=_KWARGS_DESCRIPTION,
|
| 194 |
-
features=ds.Features(
|
| 195 |
-
{
|
| 196 |
-
"layouts": {
|
| 197 |
-
"bboxes": ds.Sequence(ds.Sequence((ds.Value("float64")))),
|
| 198 |
-
"categories": ds.Sequence(ds.Value("int64")),
|
| 199 |
-
}
|
| 200 |
-
}
|
| 201 |
-
),
|
| 202 |
-
codebase_urls=[
|
| 203 |
-
"https://github.com/CyberAgentAILab/layout-dm/blob/main/src/trainer/trainer/helpers/metric.py#L399-L431",
|
| 204 |
-
],
|
| 205 |
-
)
|
| 206 |
-
|
| 207 |
-
def _compute(self, *, layouts: List[Layout]) -> Dict[str, float]:
|
| 208 |
-
scores_blt = [
|
| 209 |
-
compute_average_iou(layout, perceptual=True) for layout in layouts
|
| 210 |
-
]
|
| 211 |
-
scores_vnt = [
|
| 212 |
-
compute_average_iou(layout, perceptual=False) for layout in layouts
|
| 213 |
-
]
|
| 214 |
-
score_blt = np.mean(scores_blt).item()
|
| 215 |
-
score_vnt = np.mean(scores_vnt).item()
|
| 216 |
-
|
| 217 |
-
results = {
|
| 218 |
-
"average-iou_BLT": score_blt,
|
| 219 |
-
"average-iou_VTN": score_vnt,
|
| 220 |
-
}
|
| 221 |
-
return results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|