Erick
Upload folder using huggingface_hub
47cb9bd verified
"""
export.py β€” COCO JSON exporter.
Reads all per-image labeled JSON files from the labeled/ directory and
assembles a valid COCO-format JSON file. No pycocotools dependency β€” the
format is built from scratch.
COCO format reference:
https://cocodataset.org/#format-data
Output structure:
{
"info": {...},
"licenses": [],
"categories": [{"id": 1, "name": "cup", "supercategory": "object"}, ...],
"images": [{"id": 1, "file_name": "img.jpg", "width": W, "height": H}, ...],
"annotations": [
{
"id": 1,
"image_id": 1,
"category_id": 2,
"bbox": [x, y, w, h], # COCO uses [x_min, y_min, width, height]
"area": w * h,
"iscrowd": 0
},
...
]
}
"""
from __future__ import annotations
import logging
from datetime import datetime, timezone
from pathlib import Path
from typing import Optional
from autolabel.config import settings as default_settings, Settings
from autolabel.utils import load_json, save_json
logger = logging.getLogger(__name__)
def _xyxy_to_xywh(box: list[float]) -> list[float]:
"""Convert [x1, y1, x2, y2] β†’ [x, y, width, height] (COCO format)."""
x1, y1, x2, y2 = box
return [x1, y1, x2 - x1, y2 - y1]
def build_coco(labeled_dir: Path) -> dict:
"""
Read all JSON files in *labeled_dir* and build a COCO-format dict.
Returns the COCO dict ready for serialisation.
"""
json_files = sorted(labeled_dir.glob("*.json"))
# Exclude any existing coco_export.json to avoid self-inclusion
json_files = [f for f in json_files if f.name != "coco_export.json"]
if not json_files:
logger.warning("No labeled JSON files found in %s", labeled_dir)
return {}
logger.info("Building COCO export from %d file(s)…", len(json_files))
# Collect all category names in encounter order, deduplicating
category_index: dict[str, int] = {} # name β†’ category_id
images_list: list[dict] = []
annotations_list: list[dict] = []
ann_id = 1
for img_id, json_path in enumerate(json_files, start=1):
data = load_json(json_path)
image_path = Path(data["image_path"])
images_list.append(
{
"id": img_id,
"file_name": image_path.name,
"width": data["image_width"],
"height": data["image_height"],
}
)
for det in data.get("detections", []):
label: str = det["label"]
if label not in category_index:
category_index[label] = len(category_index) + 1
cat_id = category_index[label]
xywh = _xyxy_to_xywh(det["box_xyxy"])
area = round(xywh[2] * xywh[3], 2)
annotations_list.append(
{
"id": ann_id,
"image_id": img_id,
"category_id": cat_id,
"bbox": [round(v, 1) for v in xywh],
"area": area,
"iscrowd": 0,
"segmentation": det.get("segmentation", []),
}
)
ann_id += 1
categories = [
{"id": cat_id, "name": name, "supercategory": "object"}
for name, cat_id in sorted(category_index.items(), key=lambda x: x[1])
]
coco = {
"info": {
"description": "autolabel β€” OWLv2 household object dataset",
"version": "1.0",
"year": datetime.now(tz=timezone.utc).year,
"date_created": datetime.now(tz=timezone.utc).isoformat(),
},
"licenses": [],
"categories": categories,
"images": images_list,
"annotations": annotations_list,
}
logger.info(
"COCO export: %d image(s), %d annotation(s), %d categor(ies)",
len(images_list),
len(annotations_list),
len(categories),
)
return coco
def run_export(
labeled_dir: Path,
output_path: Path,
cfg: Optional[Settings] = None,
) -> None:
"""
Build COCO JSON from *labeled_dir* and write to *output_path*.
Args:
labeled_dir: Directory containing per-image labeled JSON files.
output_path: Destination path for the COCO JSON file.
cfg: Settings instance (module default if None).
"""
_ = cfg or default_settings # reserved for future use
coco = build_coco(labeled_dir)
if not coco:
logger.error("Nothing to export.")
return
save_json(coco, output_path)
logger.info("COCO JSON written β†’ %s", output_path)