|
|
|
|
|
import json |
|
|
from typing import Any, List, NamedTuple, Optional, Tuple |
|
|
|
|
|
|
|
|
class Annotation(NamedTuple): |
|
|
area: float |
|
|
iscrowd: int |
|
|
category_id: int |
|
|
bbox: List[float] |
|
|
giou_friendly_bbox: List[float] |
|
|
tokens_positive: List[Tuple[int, int]] |
|
|
|
|
|
|
|
|
class Datapoint(NamedTuple): |
|
|
image_id: int |
|
|
dataset_name: str |
|
|
tokens_negative: List[Tuple[int, int]] |
|
|
original_id: int |
|
|
caption: str |
|
|
annotations: List[Annotation] |
|
|
|
|
|
|
|
|
def convert2dataset_combined( |
|
|
datapoint_list_coco: List[Datapoint], |
|
|
datapoint_list_vg: List[Datapoint], |
|
|
imgid2imginfo_coco, |
|
|
imgid2imginfo_vg, |
|
|
output_path, |
|
|
): |
|
|
"""""" |
|
|
print(f"Dumping combined coco and vg images related all training examples...") |
|
|
next_img_id = 0 |
|
|
next_id = 0 |
|
|
|
|
|
annotations = [] |
|
|
images = [] |
|
|
|
|
|
for datapoint in datapoint_list_coco: |
|
|
img_id = datapoint.image_id |
|
|
filename = imgid2imginfo_coco[img_id]["file_name"] |
|
|
cur_img = { |
|
|
"file_name": filename, |
|
|
"height": imgid2imginfo_coco[img_id]["height"], |
|
|
"width": imgid2imginfo_coco[img_id]["width"], |
|
|
"id": next_img_id, |
|
|
"original_id": img_id, |
|
|
"caption": datapoint.caption, |
|
|
"tokens_negative": datapoint.tokens_negative, |
|
|
"data_source": "coco", |
|
|
"dataset_name": datapoint.dataset_name, |
|
|
} |
|
|
|
|
|
for anns in datapoint.annotations: |
|
|
cur_obj = { |
|
|
"area": float(anns.area), |
|
|
"iscrowd": anns.iscrowd, |
|
|
"image_id": next_img_id, |
|
|
"category_id": anns.category_id, |
|
|
"id": next_id, |
|
|
"bbox": anns.bbox, |
|
|
"tokens_positive": anns.tokens_positive, |
|
|
} |
|
|
next_id += 1 |
|
|
annotations.append(cur_obj) |
|
|
|
|
|
next_img_id += 1 |
|
|
images.append(cur_img) |
|
|
|
|
|
for datapoint in datapoint_list_vg: |
|
|
img_id = datapoint.image_id |
|
|
filename = f"{img_id}.jpg" |
|
|
cur_img = { |
|
|
"file_name": filename, |
|
|
"height": imgid2imginfo_vg[img_id]["height"], |
|
|
"width": imgid2imginfo_vg[img_id]["width"], |
|
|
"id": next_img_id, |
|
|
"original_id": img_id, |
|
|
"caption": datapoint.caption, |
|
|
"tokens_negative": datapoint.tokens_negative, |
|
|
"data_source": "vg", |
|
|
"dataset_name": datapoint.dataset_name, |
|
|
} |
|
|
|
|
|
for anns in datapoint.annotations: |
|
|
cur_obj = { |
|
|
"area": float(anns.area), |
|
|
"iscrowd": anns.iscrowd, |
|
|
"image_id": next_img_id, |
|
|
"category_id": anns.category_id, |
|
|
"id": next_id, |
|
|
"bbox": anns.bbox, |
|
|
"tokens_positive": anns.tokens_positive, |
|
|
} |
|
|
next_id += 1 |
|
|
annotations.append(cur_obj) |
|
|
|
|
|
next_img_id += 1 |
|
|
images.append(cur_img) |
|
|
|
|
|
ds = {"info": [], "licenses": [], "images": images, "annotations": annotations, "categories": []} |
|
|
with open(output_path / f"final_mixed_train.json", "w") as j_file: |
|
|
json.dump(ds, j_file) |
|
|
return next_img_id, next_id |
|
|
|