File size: 10,909 Bytes
aceb1b2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
"""
CV Export Utilities

Shared helper functions for computer vision export formats (COCO, YOLO, VOC).
"""

from typing import Dict, List, Tuple, Any, Optional
import logging

logger = logging.getLogger(__name__)


def build_category_mapping(annotations: List[dict], schemas: List[dict]) -> Dict[str, int]:
    """
    Build a mapping from label names to integer category IDs.

    Extracts labels from image_annotation schemas first (preserving config order),
    then discovers any additional labels from annotations.

    Args:
        annotations: List of annotation records
        schemas: List of annotation_scheme config dicts

    Returns:
        Dict mapping label name -> integer ID (starting from 1 for COCO, 0-indexed for YOLO)
    """
    labels = []
    seen = set()

    # First, collect labels from schema configs (preserves defined order)
    for schema in schemas:
        if schema.get("annotation_type") == "image_annotation":
            for label_def in schema.get("labels", []):
                name = label_def if isinstance(label_def, str) else label_def.get("name", "")
                if name and name not in seen:
                    labels.append(name)
                    seen.add(name)

    # Then discover any labels in annotation data not already in config
    for ann in annotations:
        for schema_name, img_annotations in ann.get("image_annotations", {}).items():
            if not isinstance(img_annotations, list):
                continue
            for obj in img_annotations:
                label = obj.get("label", "")
                if label and label not in seen:
                    labels.append(label)
                    seen.add(label)

    return {name: idx for idx, name in enumerate(labels)}


def polygon_to_bbox(points: List[List[float]]) -> Tuple[float, float, float, float]:
    """
    Compute axis-aligned bounding box from a polygon.

    Args:
        points: List of [x, y] coordinate pairs

    Returns:
        Tuple of (x_min, y_min, width, height)
    """
    if not points:
        return (0, 0, 0, 0)

    xs = [p[0] for p in points]
    ys = [p[1] for p in points]
    x_min = min(xs)
    y_min = min(ys)
    return (x_min, y_min, max(xs) - x_min, max(ys) - y_min)


def polygon_area(points: List[List[float]]) -> float:
    """
    Compute the area of a polygon using the shoelace formula.

    Args:
        points: List of [x, y] coordinate pairs

    Returns:
        Absolute area of the polygon
    """
    n = len(points)
    if n < 3:
        return 0.0
    area = 0.0
    for i in range(n):
        j = (i + 1) % n
        area += points[i][0] * points[j][1]
        area -= points[j][0] * points[i][1]
    return abs(area) / 2.0


def normalize_bbox(x: float, y: float, w: float, h: float,
                   img_w: float, img_h: float) -> Tuple[float, float, float, float]:
    """
    Normalize bounding box coordinates to [0, 1] range.

    Args:
        x, y: Top-left corner coordinates
        w, h: Width and height
        img_w, img_h: Image dimensions

    Returns:
        Tuple of (center_x, center_y, width, height) normalized to [0, 1]
    """
    if img_w <= 0 or img_h <= 0:
        return (0, 0, 0, 0)
    cx = max(0.0, min(1.0, (x + w / 2) / img_w))
    cy = max(0.0, min(1.0, (y + h / 2) / img_h))
    nw = max(0.0, min(1.0, w / img_w))
    nh = max(0.0, min(1.0, h / img_h))
    return (cx, cy, nw, nh)


def flatten_polygon(points: List[List[float]]) -> List[float]:
    """
    Flatten a list of [x, y] points into a flat coordinate list [x1, y1, x2, y2, ...].

    This is the format used by COCO segmentation.

    Args:
        points: List of [x, y] coordinate pairs

    Returns:
        Flat list of coordinates
    """
    result = []
    for p in points:
        result.extend(p[:2])
    return result


def extract_image_annotations(annotation: dict) -> List[Tuple[str, List[dict]]]:
    """
    Extract image annotation objects from an annotation record.

    Args:
        annotation: Single annotation record with image_annotations field

    Returns:
        List of (schema_name, annotation_objects) tuples
    """
    results = []
    for schema_name, objects in annotation.get("image_annotations", {}).items():
        if isinstance(objects, list) and objects:
            results.append((schema_name, objects))
    return results


def get_image_dimensions(item: dict, default_width: int = 0,
                         default_height: int = 0) -> Tuple[int, int]:
    """
    Extract image dimensions from item metadata.

    Checks common field names for image width/height.

    Args:
        item: Item data dict
        default_width: Fallback width
        default_height: Fallback height

    Returns:
        Tuple of (width, height)
    """
    # Check common field patterns
    width = default_width
    for w_key in ("image_width", "width", "img_width", "w"):
        if w_key in item:
            try:
                width = int(item[w_key])
            except (ValueError, TypeError):
                pass
            break

    height = default_height
    for h_key in ("image_height", "height", "img_height", "h"):
        if h_key in item:
            try:
                height = int(item[h_key])
            except (ValueError, TypeError):
                pass
            break

    return (width, height)


def get_image_filename(item: dict) -> Optional[str]:
    """
    Extract image filename from item data.

    Args:
        item: Item data dict

    Returns:
        Image filename/path string or None
    """
    for key in ("image", "image_path", "image_url", "file_name", "filename", "img"):
        if key in item and item[key]:
            return str(item[key])
    return None


# ---------------------------------------------------------------------------
# RLE mask utilities (Potato RLE <-> COCO RLE conversion)
# ---------------------------------------------------------------------------


def decode_rle(rle: dict, width: int, height: int) -> List[int]:
    """
    Decode Potato RLE-encoded mask to a flat binary array (row-major order).

    Potato RLE stores counts alternating between 0-pixels and 1-pixels,
    starting with 0s, in row-major (left-to-right, top-to-bottom) order.

    Args:
        rle: Dict with 'counts' (list of ints) and 'size' [height, width]
        width: Image width
        height: Image height

    Returns:
        Flat list of 0/1 values in row-major order
    """
    counts = rle.get("counts", [])
    total = width * height
    mask = [0] * total
    pos = 0
    val = 0
    for count in counts:
        for _ in range(count):
            if pos < total:
                mask[pos] = val
                pos += 1
        val = 1 - val
    return mask


def rle_bbox(mask: List[int], width: int, height: int) -> List[float]:
    """
    Compute axis-aligned bounding box [x, y, w, h] from a flat binary mask.

    Args:
        mask: Flat list of 0/1 values (row-major)
        width: Image width
        height: Image height

    Returns:
        [x_min, y_min, bbox_width, bbox_height] or [0, 0, 0, 0] if empty
    """
    x_min, y_min = width, height
    x_max, y_max = -1, -1
    for i, val in enumerate(mask):
        if val:
            y = i // width
            x = i % width
            if x < x_min:
                x_min = x
            if x > x_max:
                x_max = x
            if y < y_min:
                y_min = y
            if y > y_max:
                y_max = y
    if x_max < 0:
        return [0, 0, 0, 0]
    return [float(x_min), float(y_min),
            float(x_max - x_min + 1), float(y_max - y_min + 1)]


def rle_area(mask: List[int]) -> int:
    """
    Compute mask area as the count of foreground pixels.

    Args:
        mask: Flat list of 0/1 values

    Returns:
        Number of 1-pixels
    """
    return sum(mask)


def _column_major_rle_counts(mask_2d: List[List[int]], height: int,
                              width: int) -> List[int]:
    """
    Read a 2D mask in column-major order and compute RLE counts.

    Counts alternate between 0-pixels and 1-pixels, starting with 0s.

    Args:
        mask_2d: 2D list [height][width] of 0/1 values
        height: Image height
        width: Image width

    Returns:
        List of integer run counts in column-major order
    """
    counts: List[int] = []
    current_val = 0
    current_run = 0

    for x in range(width):
        for y in range(height):
            pixel = mask_2d[y][x]
            if pixel == current_val:
                current_run += 1
            else:
                counts.append(current_run)
                current_val = 1 - current_val
                current_run = 1
    counts.append(current_run)
    return counts


def _encode_coco_rle_string(counts: List[int]) -> str:
    """
    Encode RLE integer counts as a COCO compressed ASCII string.

    Implements the exact algorithm from pycocotools maskApi.c rleToString():
    - Delta encoding for i > 2: x = counts[i] - counts[i-2]
    - Each value encoded as 6-bit groups (5 data bits + 1 continuation bit)
    - Each group offset by 48 to produce printable ASCII
    - Signed values supported via arithmetic right shift

    Args:
        counts: List of integer run counts

    Returns:
        Encoded ASCII string
    """
    chars = []
    for i, cnt in enumerate(counts):
        # Delta encoding: for i > 2, encode difference from counts[i-2]
        x = cnt - counts[i - 2] if i > 2 else cnt
        while True:
            c = x & 0x1F
            x >>= 5
            # If bit 4 set, sign bit is 1 → more groups unless x is all-ones (-1)
            # If bit 4 clear, sign bit is 0 → more groups unless x is all-zeros (0)
            if c & 0x10:
                more = (x != -1)
            else:
                more = (x != 0)
            if more:
                c |= 0x20
            chars.append(chr(c + 48))
            if not more:
                break
    return "".join(chars)


def rle_to_coco_rle(rle: dict, width: int, height: int) -> Dict[str, Any]:
    """
    Convert Potato RLE to COCO RLE format.

    Potato RLE is row-major; COCO RLE is column-major with compressed
    ASCII string encoding.

    Args:
        rle: Potato RLE dict with 'counts' and 'size'
        width: Image width
        height: Image height

    Returns:
        COCO RLE dict {"counts": "encoded_string", "size": [height, width]}
    """
    # Decode to flat row-major mask
    flat = decode_rle(rle, width, height)

    # Reshape to 2D
    mask_2d = []
    for y in range(height):
        row = flat[y * width:(y + 1) * width]
        mask_2d.append(row)

    # Compute column-major RLE counts
    col_counts = _column_major_rle_counts(mask_2d, height, width)

    # Encode as COCO compressed string
    encoded = _encode_coco_rle_string(col_counts)

    return {"counts": encoded, "size": [height, width]}