Spaces:
Sleeping
Sleeping
File size: 4,573 Bytes
47cb9bd | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 | """
export.py — COCO JSON exporter.
Reads all per-image labeled JSON files from the labeled/ directory and
assembles a valid COCO-format JSON file. No pycocotools dependency — the
format is built from scratch.
COCO format reference:
https://cocodataset.org/#format-data
Output structure:
{
"info": {...},
"licenses": [],
"categories": [{"id": 1, "name": "cup", "supercategory": "object"}, ...],
"images": [{"id": 1, "file_name": "img.jpg", "width": W, "height": H}, ...],
"annotations": [
{
"id": 1,
"image_id": 1,
"category_id": 2,
"bbox": [x, y, w, h], # COCO uses [x_min, y_min, width, height]
"area": w * h,
"iscrowd": 0
},
...
]
}
"""
from __future__ import annotations
import logging
from datetime import datetime, timezone
from pathlib import Path
from typing import Optional
from autolabel.config import settings as default_settings, Settings
from autolabel.utils import load_json, save_json
logger = logging.getLogger(__name__)
def _xyxy_to_xywh(box: list[float]) -> list[float]:
"""Convert [x1, y1, x2, y2] → [x, y, width, height] (COCO format)."""
x1, y1, x2, y2 = box
return [x1, y1, x2 - x1, y2 - y1]
def build_coco(labeled_dir: Path) -> dict:
"""
Read all JSON files in *labeled_dir* and build a COCO-format dict.
Returns the COCO dict ready for serialisation.
"""
json_files = sorted(labeled_dir.glob("*.json"))
# Exclude any existing coco_export.json to avoid self-inclusion
json_files = [f for f in json_files if f.name != "coco_export.json"]
if not json_files:
logger.warning("No labeled JSON files found in %s", labeled_dir)
return {}
logger.info("Building COCO export from %d file(s)…", len(json_files))
# Collect all category names in encounter order, deduplicating
category_index: dict[str, int] = {} # name → category_id
images_list: list[dict] = []
annotations_list: list[dict] = []
ann_id = 1
for img_id, json_path in enumerate(json_files, start=1):
data = load_json(json_path)
image_path = Path(data["image_path"])
images_list.append(
{
"id": img_id,
"file_name": image_path.name,
"width": data["image_width"],
"height": data["image_height"],
}
)
for det in data.get("detections", []):
label: str = det["label"]
if label not in category_index:
category_index[label] = len(category_index) + 1
cat_id = category_index[label]
xywh = _xyxy_to_xywh(det["box_xyxy"])
area = round(xywh[2] * xywh[3], 2)
annotations_list.append(
{
"id": ann_id,
"image_id": img_id,
"category_id": cat_id,
"bbox": [round(v, 1) for v in xywh],
"area": area,
"iscrowd": 0,
"segmentation": det.get("segmentation", []),
}
)
ann_id += 1
categories = [
{"id": cat_id, "name": name, "supercategory": "object"}
for name, cat_id in sorted(category_index.items(), key=lambda x: x[1])
]
coco = {
"info": {
"description": "autolabel — OWLv2 household object dataset",
"version": "1.0",
"year": datetime.now(tz=timezone.utc).year,
"date_created": datetime.now(tz=timezone.utc).isoformat(),
},
"licenses": [],
"categories": categories,
"images": images_list,
"annotations": annotations_list,
}
logger.info(
"COCO export: %d image(s), %d annotation(s), %d categor(ies)",
len(images_list),
len(annotations_list),
len(categories),
)
return coco
def run_export(
labeled_dir: Path,
output_path: Path,
cfg: Optional[Settings] = None,
) -> None:
"""
Build COCO JSON from *labeled_dir* and write to *output_path*.
Args:
labeled_dir: Directory containing per-image labeled JSON files.
output_path: Destination path for the COCO JSON file.
cfg: Settings instance (module default if None).
"""
_ = cfg or default_settings # reserved for future use
coco = build_coco(labeled_dir)
if not coco:
logger.error("Nothing to export.")
return
save_json(coco, output_path)
logger.info("COCO JSON written → %s", output_path) |