Spaces:
Sleeping
Sleeping
| """Convert YOLO-format car_data dataset to COCO format for RF-DETR training.""" | |
| import json | |
| from pathlib import Path | |
| from PIL import Image | |
| def yolo_to_coco( | |
| data_root: Path, split: str, class_names: list[str] | None = None | |
| ) -> dict: | |
| """Convert a YOLO-format split (train/test) to COCO format. | |
| Args: | |
| data_root: Path to mydata/ directory containing images/ and labels/. | |
| split: Either "train" or "test". | |
| class_names: Category names (defaults to ["car"]). | |
| Returns: | |
| COCO-format annotation dictionary. | |
| """ | |
| if class_names is None: | |
| class_names = ["car"] | |
| images_dir = data_root / "images" / split | |
| labels_dir = data_root / "labels" / split | |
| coco: dict = { | |
| "images": [], | |
| "annotations": [], | |
| "categories": [ | |
| {"id": i, "name": n, "supercategory": "vehicle"} | |
| for i, n in enumerate(class_names) | |
| ], | |
| } | |
| annotation_id = 0 | |
| for image_id, image_path in enumerate(sorted(images_dir.glob("*.jpg"))): | |
| img = Image.open(image_path) | |
| w, h = img.size | |
| coco["images"].append({ | |
| "id": image_id, | |
| "file_name": image_path.name, | |
| "width": w, | |
| "height": h, | |
| }) | |
| label_path = labels_dir / image_path.with_suffix(".txt").name | |
| if not label_path.exists(): | |
| continue | |
| for line in label_path.read_text().strip().splitlines(): | |
| parts = line.strip().split() | |
| if len(parts) != 5: | |
| continue | |
| class_id = int(parts[0]) | |
| cx, cy, bw, bh = (float(v) for v in parts[1:]) | |
| # YOLO (normalized center x, y, w, h) -> COCO (absolute x, y, w, h) | |
| abs_w = bw * w | |
| abs_h = bh * h | |
| abs_x = (cx * w) - (abs_w / 2) | |
| abs_y = (cy * h) - (abs_h / 2) | |
| coco["annotations"].append({ | |
| "id": annotation_id, | |
| "image_id": image_id, | |
| "category_id": class_id, | |
| "bbox": [abs_x, abs_y, abs_w, abs_h], | |
| "area": abs_w * abs_h, | |
| "iscrowd": 0, | |
| }) | |
| annotation_id += 1 | |
| return coco | |
| def convert_dataset( | |
| data_root: Path, | |
| output_dir: Path, | |
| splits: list[str] | None = None, | |
| class_names: list[str] | None = None, | |
| ) -> None: | |
| """Convert YOLO dataset to COCO format and symlink images. | |
| Args: | |
| data_root: Path to YOLO dataset root (contains images/ and labels/). | |
| output_dir: Path to write COCO-format output. | |
| splits: Split names to convert (defaults to ["train", "test"]). | |
| class_names: Category names (defaults to ["car"]). | |
| """ | |
| if splits is None: | |
| splits = ["train", "test"] | |
| for split in splits: | |
| coco = yolo_to_coco(data_root, split, class_names) | |
| split_dir = output_dir / split | |
| split_dir.mkdir(parents=True, exist_ok=True) | |
| annotations_path = split_dir / "_annotations.coco.json" | |
| annotations_path.write_text(json.dumps(coco, indent=2)) | |
| # Symlink images into the split directory so RF-DETR can find them | |
| images_src = data_root / "images" / split | |
| for img in images_src.glob("*.jpg"): | |
| dest = split_dir / img.name | |
| if not dest.exists(): | |
| dest.symlink_to(img.resolve()) | |
| n_images = len(coco["images"]) | |
| n_annotations = len(coco["annotations"]) | |
| print(f"{split}: {n_images} images, {n_annotations} annotations -> {annotations_path}") | |
| def main() -> None: | |
| training_dir = Path(__file__).resolve().parent | |
| convert_dataset( | |
| data_root=training_dir / "car_data" / "mydata", | |
| output_dir=training_dir / "car_data" / "coco", | |
| ) | |
| if __name__ == "__main__": | |
| main() | |