Spaces:
Sleeping
Sleeping
| """ | |
| export.py β COCO JSON exporter. | |
| Reads all per-image labeled JSON files from the labeled/ directory and | |
| assembles a valid COCO-format JSON file. No pycocotools dependency β the | |
| format is built from scratch. | |
| COCO format reference: | |
| https://cocodataset.org/#format-data | |
| Output structure: | |
| { | |
| "info": {...}, | |
| "licenses": [], | |
| "categories": [{"id": 1, "name": "cup", "supercategory": "object"}, ...], | |
| "images": [{"id": 1, "file_name": "img.jpg", "width": W, "height": H}, ...], | |
| "annotations": [ | |
| { | |
| "id": 1, | |
| "image_id": 1, | |
| "category_id": 2, | |
| "bbox": [x, y, w, h], # COCO uses [x_min, y_min, width, height] | |
| "area": w * h, | |
| "iscrowd": 0 | |
| }, | |
| ... | |
| ] | |
| } | |
| """ | |
| from __future__ import annotations | |
| import logging | |
| from datetime import datetime, timezone | |
| from pathlib import Path | |
| from typing import Optional | |
| from autolabel.config import settings as default_settings, Settings | |
| from autolabel.utils import load_json, save_json | |
| logger = logging.getLogger(__name__) | |
| def _xyxy_to_xywh(box: list[float]) -> list[float]: | |
| """Convert [x1, y1, x2, y2] β [x, y, width, height] (COCO format).""" | |
| x1, y1, x2, y2 = box | |
| return [x1, y1, x2 - x1, y2 - y1] | |
| def build_coco(labeled_dir: Path) -> dict: | |
| """ | |
| Read all JSON files in *labeled_dir* and build a COCO-format dict. | |
| Returns the COCO dict ready for serialisation. | |
| """ | |
| json_files = sorted(labeled_dir.glob("*.json")) | |
| # Exclude any existing coco_export.json to avoid self-inclusion | |
| json_files = [f for f in json_files if f.name != "coco_export.json"] | |
| if not json_files: | |
| logger.warning("No labeled JSON files found in %s", labeled_dir) | |
| return {} | |
| logger.info("Building COCO export from %d file(s)β¦", len(json_files)) | |
| # Collect all category names in encounter order, deduplicating | |
| category_index: dict[str, int] = {} # name β category_id | |
| images_list: list[dict] = [] | |
| annotations_list: list[dict] = [] | |
| ann_id = 1 | |
| for img_id, json_path in enumerate(json_files, start=1): | |
| data = load_json(json_path) | |
| image_path = Path(data["image_path"]) | |
| images_list.append( | |
| { | |
| "id": img_id, | |
| "file_name": image_path.name, | |
| "width": data["image_width"], | |
| "height": data["image_height"], | |
| } | |
| ) | |
| for det in data.get("detections", []): | |
| label: str = det["label"] | |
| if label not in category_index: | |
| category_index[label] = len(category_index) + 1 | |
| cat_id = category_index[label] | |
| xywh = _xyxy_to_xywh(det["box_xyxy"]) | |
| area = round(xywh[2] * xywh[3], 2) | |
| annotations_list.append( | |
| { | |
| "id": ann_id, | |
| "image_id": img_id, | |
| "category_id": cat_id, | |
| "bbox": [round(v, 1) for v in xywh], | |
| "area": area, | |
| "iscrowd": 0, | |
| "segmentation": det.get("segmentation", []), | |
| } | |
| ) | |
| ann_id += 1 | |
| categories = [ | |
| {"id": cat_id, "name": name, "supercategory": "object"} | |
| for name, cat_id in sorted(category_index.items(), key=lambda x: x[1]) | |
| ] | |
| coco = { | |
| "info": { | |
| "description": "autolabel β OWLv2 household object dataset", | |
| "version": "1.0", | |
| "year": datetime.now(tz=timezone.utc).year, | |
| "date_created": datetime.now(tz=timezone.utc).isoformat(), | |
| }, | |
| "licenses": [], | |
| "categories": categories, | |
| "images": images_list, | |
| "annotations": annotations_list, | |
| } | |
| logger.info( | |
| "COCO export: %d image(s), %d annotation(s), %d categor(ies)", | |
| len(images_list), | |
| len(annotations_list), | |
| len(categories), | |
| ) | |
| return coco | |
| def run_export( | |
| labeled_dir: Path, | |
| output_path: Path, | |
| cfg: Optional[Settings] = None, | |
| ) -> None: | |
| """ | |
| Build COCO JSON from *labeled_dir* and write to *output_path*. | |
| Args: | |
| labeled_dir: Directory containing per-image labeled JSON files. | |
| output_path: Destination path for the COCO JSON file. | |
| cfg: Settings instance (module default if None). | |
| """ | |
| _ = cfg or default_settings # reserved for future use | |
| coco = build_coco(labeled_dir) | |
| if not coco: | |
| logger.error("Nothing to export.") | |
| return | |
| save_json(coco, output_path) | |
| logger.info("COCO JSON written β %s", output_path) |