import csv import pickle as pkl from pathlib import Path from collections import defaultdict def create_annotations(dataset_path, dataset_split='train'): dataset_path = Path(dataset_path) _split_path = dataset_path / dataset_split _images_path = _split_path / 'images' _masks_path = _split_path / 'masks' clean_anno_path = _split_path / f'{dataset_split}-annotations-object-segmentation_clean.pkl' annotations = { 'image_id_to_masks': defaultdict(list), # mapping from image_id to a list of masks 'dataset_samples': [] # list of unique image ids } with open(_split_path / f'{dataset_split}-annotations-object-segmentation.csv', 'r') as f: reader = csv.DictReader(f, delimiter=',') for row in reader: image_id = row['ImageID'] mask_path = row['MaskPath'] if (_images_path / f'{image_id}.jpg').is_file() \ and (_masks_path / mask_path).is_file(): annotations['image_id_to_masks'][image_id].append(mask_path) annotations['dataset_samples'] = list(annotations['image_id_to_masks'].keys()) with clean_anno_path.open('wb') as f: pkl.dump(annotations, f) return annotations