""" export.py — COCO JSON exporter. Reads all per-image labeled JSON files from the labeled/ directory and assembles a valid COCO-format JSON file. No pycocotools dependency — the format is built from scratch. COCO format reference: https://cocodataset.org/#format-data Output structure: { "info": {...}, "licenses": [], "categories": [{"id": 1, "name": "cup", "supercategory": "object"}, ...], "images": [{"id": 1, "file_name": "img.jpg", "width": W, "height": H}, ...], "annotations": [ { "id": 1, "image_id": 1, "category_id": 2, "bbox": [x, y, w, h], # COCO uses [x_min, y_min, width, height] "area": w * h, "iscrowd": 0 }, ... ] } """ from __future__ import annotations import logging from datetime import datetime, timezone from pathlib import Path from typing import Optional from autolabel.config import settings as default_settings, Settings from autolabel.utils import load_json, save_json logger = logging.getLogger(__name__) def _xyxy_to_xywh(box: list[float]) -> list[float]: """Convert [x1, y1, x2, y2] → [x, y, width, height] (COCO format).""" x1, y1, x2, y2 = box return [x1, y1, x2 - x1, y2 - y1] def build_coco(labeled_dir: Path) -> dict: """ Read all JSON files in *labeled_dir* and build a COCO-format dict. Returns the COCO dict ready for serialisation. """ json_files = sorted(labeled_dir.glob("*.json")) # Exclude any existing coco_export.json to avoid self-inclusion json_files = [f for f in json_files if f.name != "coco_export.json"] if not json_files: logger.warning("No labeled JSON files found in %s", labeled_dir) return {} logger.info("Building COCO export from %d file(s)…", len(json_files)) # Collect all category names in encounter order, deduplicating category_index: dict[str, int] = {} # name → category_id images_list: list[dict] = [] annotations_list: list[dict] = [] ann_id = 1 for img_id, json_path in enumerate(json_files, start=1): data = load_json(json_path) image_path = Path(data["image_path"]) images_list.append( { "id": img_id, "file_name": image_path.name, "width": data["image_width"], "height": data["image_height"], } ) for det in data.get("detections", []): label: str = det["label"] if label not in category_index: category_index[label] = len(category_index) + 1 cat_id = category_index[label] xywh = _xyxy_to_xywh(det["box_xyxy"]) area = round(xywh[2] * xywh[3], 2) annotations_list.append( { "id": ann_id, "image_id": img_id, "category_id": cat_id, "bbox": [round(v, 1) for v in xywh], "area": area, "iscrowd": 0, "segmentation": det.get("segmentation", []), } ) ann_id += 1 categories = [ {"id": cat_id, "name": name, "supercategory": "object"} for name, cat_id in sorted(category_index.items(), key=lambda x: x[1]) ] coco = { "info": { "description": "autolabel — OWLv2 household object dataset", "version": "1.0", "year": datetime.now(tz=timezone.utc).year, "date_created": datetime.now(tz=timezone.utc).isoformat(), }, "licenses": [], "categories": categories, "images": images_list, "annotations": annotations_list, } logger.info( "COCO export: %d image(s), %d annotation(s), %d categor(ies)", len(images_list), len(annotations_list), len(categories), ) return coco def run_export( labeled_dir: Path, output_path: Path, cfg: Optional[Settings] = None, ) -> None: """ Build COCO JSON from *labeled_dir* and write to *output_path*. Args: labeled_dir: Directory containing per-image labeled JSON files. output_path: Destination path for the COCO JSON file. cfg: Settings instance (module default if None). """ _ = cfg or default_settings # reserved for future use coco = build_coco(labeled_dir) if not coco: logger.error("Nothing to export.") return save_json(coco, output_path) logger.info("COCO JSON written → %s", output_path)