Spaces:

Blablablab
/

codebook

Paused

App Files Files Community

codebook / potato /export /coco_exporter.py

davidjurgens

Deploy: Potato — Codebook Annotation

aceb1b2 verified 11 days ago

Raw

History Blame Contribute Delete

6.7 kB

	"""
	COCO JSON Exporter

	Exports image annotations to COCO format with images[], annotations[],
	and categories[] arrays. Supports bbox, polygon/freeform segmentation.
	"""

	import json
	import os
	import logging
	from typing import Optional, Tuple

	from .base import BaseExporter, ExportContext, ExportResult
	from .cv_utils import (
	build_category_mapping,
	polygon_to_bbox,
	polygon_area,
	flatten_polygon,
	extract_image_annotations,
	get_image_dimensions,
	get_image_filename,
	decode_rle,
	rle_to_coco_rle,
	rle_bbox,
	rle_area,
	)

	logger = logging.getLogger(__name__)


	class COCOExporter(BaseExporter):
	format_name = "coco"
	description = "COCO JSON format for object detection and segmentation"
	file_extensions = [".json"]

	def can_export(self, context: ExportContext) -> Tuple[bool, str]:
	has_image_schema = any(
	s.get("annotation_type") == "image_annotation"
	for s in context.schemas
	)
	if not has_image_schema:
	return False, "No image_annotation schema found in config"
	return True, ""

	def export(self, context: ExportContext, output_path: str,
	options: Optional[dict] = None) -> ExportResult:
	options = options or {}
	warnings = []
	annotation_id_counter = 1

	category_map = build_category_mapping(context.annotations, context.schemas)
	# COCO uses 1-indexed category IDs
	coco_categories = [
	{"id": idx + 1, "name": name, "supercategory": ""}
	for name, idx in sorted(category_map.items(), key=lambda kv: kv[1])
	]

	coco_images = []
	coco_annotations = []
	image_id_map = {} # instance_id -> image_id
	image_id_counter = 1

	for ann in context.annotations:
	instance_id = ann.get("instance_id", "")
	item = context.items.get(instance_id, {})
	img_anns = extract_image_annotations(ann)
	if not img_anns:
	continue

	# Assign image ID (deduplicate by instance_id)
	if instance_id not in image_id_map:
	image_id = image_id_counter
	image_id_counter += 1
	image_id_map[instance_id] = image_id

	width, height = get_image_dimensions(item)
	file_name = get_image_filename(item) or instance_id

	coco_images.append({
	"id": image_id,
	"file_name": file_name,
	"width": width,
	"height": height,
	})
	else:
	image_id = image_id_map[instance_id]

	for schema_name, objects in img_anns:
	for obj in objects:
	obj_type = obj.get("type", "")
	label = obj.get("label", "")

	if label not in category_map:
	warnings.append(
	f"Unknown label '{label}' in {instance_id}, skipping"
	)
	continue

	cat_id = category_map[label] + 1 # 1-indexed for COCO

	coco_ann = {
	"id": annotation_id_counter,
	"image_id": image_id,
	"category_id": cat_id,
	"iscrowd": 0,
	}
	annotation_id_counter += 1

	if obj_type == "bbox":
	x = obj.get("x", 0)
	y = obj.get("y", 0)
	w = obj.get("width", 0)
	h = obj.get("height", 0)
	coco_ann["bbox"] = [x, y, w, h]
	coco_ann["area"] = w * h
	coco_ann["segmentation"] = []

	elif obj_type in ("polygon", "freeform"):
	points = obj.get("points", [])
	if not points:
	warnings.append(
	f"Empty points for {obj_type} in {instance_id}"
	)
	continue
	flat = flatten_polygon(points)
	coco_ann["segmentation"] = [flat]
	bx, by, bw, bh = polygon_to_bbox(points)
	coco_ann["bbox"] = [bx, by, bw, bh]
	coco_ann["area"] = polygon_area(points)

	elif obj_type == "mask":
	rle = obj.get("rle", {})
	if not rle.get("counts"):
	warnings.append(
	f"Empty RLE mask in {instance_id}"
	)
	continue
	size = rle.get("size", [])
	mask_h = size[0] if len(size) >= 2 else height
	mask_w = size[1] if len(size) >= 2 else width
	coco_rle = rle_to_coco_rle(rle, mask_w, mask_h)
	decoded = decode_rle(rle, mask_w, mask_h)
	coco_ann["segmentation"] = coco_rle
	coco_ann["bbox"] = rle_bbox(decoded, mask_w, mask_h)
	coco_ann["area"] = rle_area(decoded)
	coco_ann["iscrowd"] = 1

	elif obj_type == "landmark":
	warnings.append(
	f"Landmark annotation in {instance_id} skipped "
	f"(not standard in COCO detection format)"
	)
	continue

	else:
	warnings.append(
	f"Unknown annotation type '{obj_type}' in {instance_id}"
	)
	continue

	coco_annotations.append(coco_ann)

	coco_output = {
	"images": coco_images,
	"annotations": coco_annotations,
	"categories": coco_categories,
	}

	os.makedirs(output_path, exist_ok=True)
	out_file = os.path.join(output_path, "annotations.json")
	with open(out_file, "w") as f:
	json.dump(coco_output, f, indent=2)

	return ExportResult(
	success=True,
	format_name=self.format_name,
	files_written=[out_file],
	warnings=warnings,
	stats={
	"num_images": len(coco_images),
	"num_annotations": len(coco_annotations),
	"num_categories": len(coco_categories),
	},
	)