File size: 3,253 Bytes
032e687 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 | # Copyright (c) Aishwarya Kamath & Nicolas Carion. Licensed under the Apache License 2.0. All Rights Reserved
import json
from typing import Any, List, NamedTuple, Optional, Tuple
class Annotation(NamedTuple):
area: float
iscrowd: int
category_id: int
bbox: List[float]
giou_friendly_bbox: List[float]
tokens_positive: List[Tuple[int, int]]
class Datapoint(NamedTuple):
image_id: int
dataset_name: str
tokens_negative: List[Tuple[int, int]]
original_id: int
caption: str
annotations: List[Annotation]
def convert2dataset_combined(
datapoint_list_coco: List[Datapoint],
datapoint_list_vg: List[Datapoint],
imgid2imginfo_coco,
imgid2imginfo_vg,
output_path,
):
""""""
print(f"Dumping combined coco and vg images related all training examples...")
next_img_id = 0
next_id = 0
annotations = []
images = []
for datapoint in datapoint_list_coco:
img_id = datapoint.image_id
filename = imgid2imginfo_coco[img_id]["file_name"]
cur_img = {
"file_name": filename,
"height": imgid2imginfo_coco[img_id]["height"],
"width": imgid2imginfo_coco[img_id]["width"],
"id": next_img_id,
"original_id": img_id,
"caption": datapoint.caption,
"tokens_negative": datapoint.tokens_negative,
"data_source": "coco",
"dataset_name": datapoint.dataset_name,
}
for anns in datapoint.annotations:
cur_obj = {
"area": float(anns.area),
"iscrowd": anns.iscrowd,
"image_id": next_img_id,
"category_id": anns.category_id,
"id": next_id,
"bbox": anns.bbox,
"tokens_positive": anns.tokens_positive,
}
next_id += 1
annotations.append(cur_obj)
next_img_id += 1
images.append(cur_img)
for datapoint in datapoint_list_vg:
img_id = datapoint.image_id
filename = f"{img_id}.jpg"
cur_img = {
"file_name": filename,
"height": imgid2imginfo_vg[img_id]["height"],
"width": imgid2imginfo_vg[img_id]["width"],
"id": next_img_id,
"original_id": img_id,
"caption": datapoint.caption,
"tokens_negative": datapoint.tokens_negative,
"data_source": "vg",
"dataset_name": datapoint.dataset_name,
}
for anns in datapoint.annotations:
cur_obj = {
"area": float(anns.area),
"iscrowd": anns.iscrowd,
"image_id": next_img_id,
"category_id": anns.category_id,
"id": next_id,
"bbox": anns.bbox,
"tokens_positive": anns.tokens_positive,
}
next_id += 1
annotations.append(cur_obj)
next_img_id += 1
images.append(cur_img)
ds = {"info": [], "licenses": [], "images": images, "annotations": annotations, "categories": []}
with open(output_path / f"final_mixed_train.json", "w") as j_file:
json.dump(ds, j_file)
return next_img_id, next_id
|