zhouyik
/

DenseLabelDev

Model card Files Files and versions

Metrics Training metrics Community

DenseLabelDev / third_parts /APE /datasets /tools /phrasecut2coco /convert.py

zhouyik's picture

Upload folder using huggingface_hub

032e687 verified about 1 year ago

history blame contribute delete

4.47 kB

	# Copyright (c) Aishwarya Kamath & Nicolas Carion. Licensed under the Apache License 2.0. All Rights Reserved
	import argparse
	import json
	import os
	import pickle
	import re
	from collections import defaultdict
	from pathlib import Path

	from tqdm import tqdm

	from detectron2.data.detection_utils import read_image


	def parse_args():
	parser = argparse.ArgumentParser("Conversion script")

	parser.add_argument(
	"--data_path",
	required=True,
	type=str,
	help="Path to the gqa dataset",
	)
	parser.add_argument(
	"--img_path",
	required=True,
	type=str,
	help="Path to the gqa image dataset",
	)

	parser.add_argument(
	"--out_path",
	default="",
	type=str,
	help="Path where to export the resulting dataset. Leave it to None to use the same path as above",
	)
	return parser.parse_args()


	def convert(split, data_path, output_path, imid2data):

	with open(data_path / f"refer_{split}.json", "r") as f:
	data = json.load(f)

	img2ann = defaultdict(list)
	for datapoint in data:
	img2ann[datapoint["image_id"]].append(datapoint)

	print(f"Dumping {split}...")
	next_img_id = 1
	next_id = 1

	categories = [{"supercategory": "object", "id": 1, "name": "object"}]
	annotations = []
	images = []

	d_name = "phrasecut"

	for image_id, annotation_list in tqdm(img2ann.items()):
	filename = f"{image_id}.jpg"

	cur_img = {
	"file_name": filename,
	"height": imid2data[int(image_id)]["height"],
	"width": imid2data[int(image_id)]["width"],
	"id": next_img_id,
	"original_id": image_id,
	}

	image = read_image(data_path / "images" / filename, format="BGR")
	if image.shape[1] != cur_img["width"] or image.shape[0] != cur_img["height"]:
	print("before exif correction: ", cur_img)
	cur_img["width"], cur_img["height"] = image.shape[1], image.shape[0]
	print("after exif correction: ", cur_img)

	for annotation in annotation_list:
	phrase = annotation["phrase"]
	task_id = annotation["task_id"]

	assert len(annotation["Polygons"]) == len(annotation["instance_boxes"])

	instance_polygons_flattened = []
	for instance_polygons_list in annotation[
	"Polygons"
	]: # as many polygons as number of boxes ie len(annotation['Polygons']) == len(annotation['instance_boxes'])
	for polygon in instance_polygons_list:
	polygon_flattened = []
	for xy in polygon:
	polygon_flattened.extend(xy)
	instance_polygons_flattened.append(polygon_flattened)

	assert len(instance_polygons_flattened) == len(
	annotation["instance_boxes"]
	), "Number of combined polygons must be equal to the number of boxes"

	if len(annotation["instance_boxes"]) > 0:

	for i, target_bbox in enumerate(annotation["instance_boxes"]):
	x, y, w, h = target_bbox
	cur_obj = {
	"area": h * w,
	"iscrowd": 0,
	"category_id": 1,
	"bbox": target_bbox,
	"segmentation": [instance_polygons_flattened[i]],
	"image_id": next_img_id,
	"id": next_id,
	"phrase": phrase,
	}

	next_id += 1
	annotations.append(cur_obj)

	next_img_id += 1
	images.append(cur_img)

	ds = {"info": [], "licenses": [], "images": images, "annotations": annotations, "categories": categories}
	with open(output_path / f"phrasecut_{split}.json", "w") as j_file:
	json.dump(ds, j_file)
	return next_img_id, next_id


	def main(args):
	data_path = Path(args.data_path)
	output_path = Path(args.out_path)

	with open(data_path / "image_data_split.json", "r") as f:
	imdata = json.load(f)
	imid2data = {x["image_id"]: x for x in imdata}

	os.makedirs(str(output_path), exist_ok=True)

	# Phrasecut has 4 splits: train val miniv and test
	for split in ["miniv", "train", "val", "test"]:
	convert(split, data_path, output_path, imid2data)


	if __name__ == "__main__":
	main(parse_args())