File size: 4,573 Bytes
47cb9bd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
"""
export.py — COCO JSON exporter.

Reads all per-image labeled JSON files from the labeled/ directory and
assembles a valid COCO-format JSON file.  No pycocotools dependency — the
format is built from scratch.

COCO format reference:
  https://cocodataset.org/#format-data

Output structure:
{
  "info": {...},
  "licenses": [],
  "categories": [{"id": 1, "name": "cup", "supercategory": "object"}, ...],
  "images": [{"id": 1, "file_name": "img.jpg", "width": W, "height": H}, ...],
  "annotations": [
    {
      "id": 1,
      "image_id": 1,
      "category_id": 2,
      "bbox": [x, y, w, h],          # COCO uses [x_min, y_min, width, height]
      "area": w * h,
      "iscrowd": 0
    },
    ...
  ]
}
"""

from __future__ import annotations

import logging
from datetime import datetime, timezone
from pathlib import Path
from typing import Optional

from autolabel.config import settings as default_settings, Settings
from autolabel.utils import load_json, save_json

logger = logging.getLogger(__name__)


def _xyxy_to_xywh(box: list[float]) -> list[float]:
    """Convert [x1, y1, x2, y2] → [x, y, width, height] (COCO format)."""
    x1, y1, x2, y2 = box
    return [x1, y1, x2 - x1, y2 - y1]


def build_coco(labeled_dir: Path) -> dict:
    """
    Read all JSON files in *labeled_dir* and build a COCO-format dict.

    Returns the COCO dict ready for serialisation.
    """
    json_files = sorted(labeled_dir.glob("*.json"))
    # Exclude any existing coco_export.json to avoid self-inclusion
    json_files = [f for f in json_files if f.name != "coco_export.json"]

    if not json_files:
        logger.warning("No labeled JSON files found in %s", labeled_dir)
        return {}

    logger.info("Building COCO export from %d file(s)…", len(json_files))

    # Collect all category names in encounter order, deduplicating
    category_index: dict[str, int] = {}  # name → category_id
    images_list: list[dict] = []
    annotations_list: list[dict] = []

    ann_id = 1

    for img_id, json_path in enumerate(json_files, start=1):
        data = load_json(json_path)

        image_path = Path(data["image_path"])
        images_list.append(
            {
                "id": img_id,
                "file_name": image_path.name,
                "width": data["image_width"],
                "height": data["image_height"],
            }
        )

        for det in data.get("detections", []):
            label: str = det["label"]
            if label not in category_index:
                category_index[label] = len(category_index) + 1

            cat_id = category_index[label]
            xywh = _xyxy_to_xywh(det["box_xyxy"])
            area = round(xywh[2] * xywh[3], 2)

            annotations_list.append(
                {
                    "id": ann_id,
                    "image_id": img_id,
                    "category_id": cat_id,
                    "bbox": [round(v, 1) for v in xywh],
                    "area": area,
                    "iscrowd": 0,
                    "segmentation": det.get("segmentation", []),
                }
            )
            ann_id += 1

    categories = [
        {"id": cat_id, "name": name, "supercategory": "object"}
        for name, cat_id in sorted(category_index.items(), key=lambda x: x[1])
    ]

    coco = {
        "info": {
            "description": "autolabel — OWLv2 household object dataset",
            "version": "1.0",
            "year": datetime.now(tz=timezone.utc).year,
            "date_created": datetime.now(tz=timezone.utc).isoformat(),
        },
        "licenses": [],
        "categories": categories,
        "images": images_list,
        "annotations": annotations_list,
    }

    logger.info(
        "COCO export: %d image(s), %d annotation(s), %d categor(ies)",
        len(images_list),
        len(annotations_list),
        len(categories),
    )
    return coco


def run_export(
    labeled_dir: Path,
    output_path: Path,
    cfg: Optional[Settings] = None,
) -> None:
    """
    Build COCO JSON from *labeled_dir* and write to *output_path*.

    Args:
        labeled_dir: Directory containing per-image labeled JSON files.
        output_path: Destination path for the COCO JSON file.
        cfg:         Settings instance (module default if None).
    """
    _ = cfg or default_settings  # reserved for future use

    coco = build_coco(labeled_dir)
    if not coco:
        logger.error("Nothing to export.")
        return

    save_json(coco, output_path)
    logger.info("COCO JSON written → %s", output_path)