Spaces:
Build error
Build error
| import os | |
| import glob | |
| from pathlib import Path | |
| from datetime import date | |
| from collections import defaultdict | |
| from warnings import warn | |
| from path_utils import * | |
| def merge_cats_get_id(cats, this_cat): | |
| cat_nms = [c['name'] for c in cats] | |
| if this_cat['name'] not in cat_nms: | |
| this_cat['id'] = len(cats) + 1 | |
| cats.append(this_cat) | |
| return this_cat["id"] | |
| else: | |
| return this_cat["id"] | |
| def filter_images(images, annotations): | |
| img_ids_from_anns = [ann['image_id'] for ann in annotations] | |
| images_ = [ | |
| img_info for img_info in images if img_info['id'] in img_ids_from_anns | |
| ] | |
| return images_ | |
| def merge(jsons, img_roots, output_dir, output_nm="merged", verbose=True): | |
| assert len(jsons) == len(img_roots) | |
| out_dir_path = Path(output_dir) | |
| out_imgs_dir_path = out_dir_path / "images" | |
| merged_img_id_state = 1 | |
| merged_ann_id_state = 1 | |
| merged_names = [] | |
| merged_dict = { | |
| "info" : {"description": "", "data_created": f"{date.today():%Y/%m/%d}"}, | |
| "annotations": [], | |
| "categories" : [], | |
| "images" : [] | |
| } | |
| for i, (json_path, imgs_dir_path) in enumerate(zip(jsons, img_roots)): | |
| coco_dict = read_coco_json(json_path) | |
| dataset_name = get_setname(json_path) | |
| merged_names.append(dataset_name) | |
| # categories | |
| cat_id_old2new = {} | |
| for cat in coco_dict['categories']: | |
| old_cat_id = cat['id'] | |
| new_cat_id = merge_cats_get_id(merged_dict['categories'], cat) | |
| cat_id_old2new[old_cat_id] = new_cat_id | |
| # images | |
| coco_dict['images'] = filter_images( | |
| coco_dict['images'], coco_dict['annotations'] | |
| ) | |
| img_id_old2new = {} | |
| for img in coco_dict['images']: | |
| img_id_old2new[img["id"]] = merged_img_id_state | |
| img["id"] = merged_img_id_state | |
| old_img_path = Path(imgs_dir_path) / img['file_name'] | |
| img['file_name'] = dataset_name + "_" + img['file_name'] | |
| new_img_path = out_imgs_dir_path / img['file_name'] | |
| assure_copy(old_img_path, new_img_path) | |
| merged_img_id_state += 1 | |
| merged_dict['images'].append(img) | |
| # annotations | |
| for ann in coco_dict['annotations']: | |
| ann['id'] = merged_ann_id_state | |
| ann['image_id'] = img_id_old2new[ann['image_id']] | |
| ann['category_id'] = cat_id_old2new[ann['category_id']] | |
| merged_ann_id_state += 1 | |
| merged_dict['annotations'].append(ann) | |
| merged_dict["info"]["description"] = "+".join(merged_names) | |
| out_json = out_dir_path / f"{output_nm}.json" | |
| write_json(out_json, merged_dict) | |
| if verbose: | |
| print(f"Number of images: {len(merged_dict['images'])}") | |
| print(f"Number of annotations: {len(merged_dict['annotations'])}") | |
| if __name__ == '__main__': | |
| paths2images = [] | |
| paths2json = [] | |
| for dataset in glob.glob("dataset_*"): | |
| paths2images.append(os.path.join(dataset, "images")) | |
| paths2json.append(os.path.join(dataset, "annotations/instances_default.json")) | |
| merge(paths2json, paths2images, './merged_cocos', 'merged', verbose=True) | |