| """ | |
| Dataset loader for training RF-DETR models. | |
| Supports COCO format annotations. | |
| """ | |
| import json | |
| import os | |
| from pathlib import Path | |
| from typing import Dict, List, Tuple | |
| def load_coco_annotations(annotation_path: str) -> Dict: | |
| """Load COCO format annotation file.""" | |
| if not os.path.exists(annotation_path): | |
| raise FileNotFoundError(f"Annotation file not found: {annotation_path}") | |
| with open(annotation_path, 'r') as f: | |
| return json.load(f) | |
| def get_image_paths(images_dir: str) -> List[str]: | |
| """Get all image file paths from directory.""" | |
| image_extensions = ['.jpg', '.jpeg', '.png', '.bmp'] | |
| image_paths = [] | |
| for ext in image_extensions: | |
| image_paths.extend(Path(images_dir).glob(f'*{ext}')) | |
| image_paths.extend(Path(images_dir).glob(f'*{ext.upper()}')) | |
| return sorted([str(p) for p in image_paths]) | |
| def validate_dataset(dataset_path: str) -> Tuple[bool, List[str]]: | |
| """Validate dataset structure and return issues.""" | |
| issues = [] | |
| dataset_path = Path(dataset_path) | |
| if not dataset_path.exists(): | |
| issues.append(f"Dataset directory does not exist: {dataset_path}") | |
| return False, issues | |
| images_dir = dataset_path / "images" | |
| annotations_file = dataset_path / "annotations" / "annotations.json" | |
| if not images_dir.exists(): | |
| issues.append(f"Images directory missing: {images_dir}") | |
| if not annotations_file.exists(): | |
| issues.append(f"Annotations file missing: {annotations_file}") | |
| else: | |
| try: | |
| coco_data = load_coco_annotations(str(annotations_file)) | |
| if 'images' not in coco_data: | |
| issues.append("COCO format: missing 'images' key") | |
| if 'annotations' not in coco_data: | |
| issues.append("COCO format: missing 'annotations' key") | |
| if 'categories' not in coco_data: | |
| issues.append("COCO format: missing 'categories' key") | |
| except json.JSONDecodeError as e: | |
| issues.append(f"Invalid JSON in annotations: {e}") | |
| return len(issues) == 0, issues | |
| def get_dataset_info(dataset_path: str) -> Dict: | |
| """Get summary information about the dataset.""" | |
| dataset_path = Path(dataset_path) | |
| info = { | |
| 'path': str(dataset_path), | |
| 'images_count': 0, | |
| 'annotations_count': 0, | |
| 'categories_count': 0, | |
| 'categories': [] | |
| } | |
| images_dir = dataset_path / "images" | |
| annotations_file = dataset_path / "annotations" / "annotations.json" | |
| if images_dir.exists(): | |
| info['images_count'] = len(get_image_paths(str(images_dir))) | |
| if annotations_file.exists(): | |
| try: | |
| coco_data = load_coco_annotations(str(annotations_file)) | |
| info['annotations_count'] = len(coco_data.get('annotations', [])) | |
| info['categories_count'] = len(coco_data.get('categories', [])) | |
| info['categories'] = [cat['name'] for cat in coco_data.get('categories', [])] | |
| except Exception as e: | |
| info['error'] = str(e) | |
| return info | |