|
|
import argparse |
|
|
import json |
|
|
import os |
|
|
from pathlib import Path |
|
|
|
|
|
from tqdm import tqdm |
|
|
|
|
|
from detectron2.data.detection_utils import read_image |
|
|
|
|
|
|
|
|
def parse_args(): |
|
|
""" |
|
|
Parse input arguments |
|
|
""" |
|
|
parser = argparse.ArgumentParser( |
|
|
description="Convert Objects365 annotations into MS Coco format" |
|
|
) |
|
|
parser.add_argument("--root_dir", dest="root_dir", help="path to objects365 data", type=str) |
|
|
parser.add_argument( |
|
|
"--apply_exif", |
|
|
dest="apply_exif", |
|
|
action="store_true", |
|
|
help="apply the exif orientation correctly", |
|
|
) |
|
|
parser.add_argument( |
|
|
"--subsets", |
|
|
type=str, |
|
|
nargs="+", |
|
|
default=["val", "train"], |
|
|
choices=["train", "val", "test", "minival"], |
|
|
help="subsets to convert", |
|
|
) |
|
|
parser.add_argument("--image_info_path", type=str, help="image_info_path") |
|
|
args = parser.parse_args() |
|
|
return args |
|
|
|
|
|
|
|
|
args = parse_args() |
|
|
root_dir = args.root_dir |
|
|
|
|
|
if args.apply_exif: |
|
|
print("-" * 60) |
|
|
print("We will apply exif orientation...") |
|
|
print("-" * 60) |
|
|
|
|
|
if not isinstance(args.subsets, list): |
|
|
args.subsets = [args.subsets] |
|
|
|
|
|
|
|
|
for subset in args.subsets: |
|
|
|
|
|
print("converting {} data".format(subset)) |
|
|
|
|
|
|
|
|
if subset == "train": |
|
|
json_name = "zhiyuan_objv2_train.json" |
|
|
elif subset == "val": |
|
|
json_name = "zhiyuan_objv2_val.json" |
|
|
elif subset == "minival": |
|
|
json_name = "zhiyuan_objv2_val.json" |
|
|
|
|
|
|
|
|
print("loading original annotations ...") |
|
|
json_path = os.path.join(root_dir, "annotations", json_name) |
|
|
json_data = json.load(open(json_path, "r")) |
|
|
print("loading original annotations ... Done") |
|
|
|
|
|
print(json_data.keys()) |
|
|
oi = {} |
|
|
|
|
|
|
|
|
print("adding basic dataset info") |
|
|
|
|
|
|
|
|
print("adding basic license info") |
|
|
oi["licenses"] = json_data["licenses"] |
|
|
|
|
|
|
|
|
print("converting category info") |
|
|
oi["categories"] = json_data["categories"] |
|
|
|
|
|
|
|
|
print("converting image info ...") |
|
|
images = json_data["images"] |
|
|
if subset == "minival": |
|
|
images = images[:5000] |
|
|
print(f"{len(images)} images get") |
|
|
rm_image_ids = [] |
|
|
|
|
|
if args.apply_exif: |
|
|
image_info = {} |
|
|
with open(args.image_info_path, "r") as f: |
|
|
for line in f.readlines(): |
|
|
line = line.strip().split() |
|
|
image_id, file_name, height, width, channel = line |
|
|
|
|
|
image_id = int(image_id) |
|
|
height = int(height) |
|
|
width = int(width) |
|
|
|
|
|
image_info[image_id] = [file_name, height, width] |
|
|
|
|
|
print(f"{len(image_info)} image_info get") |
|
|
|
|
|
new_images = [] |
|
|
for img in tqdm(images): |
|
|
image_id = img["id"] |
|
|
|
|
|
if image_id not in image_info.keys(): |
|
|
rm_image_ids.append(image_id) |
|
|
print("removing", img) |
|
|
continue |
|
|
|
|
|
file_name, height, width = image_info[image_id] |
|
|
|
|
|
assert file_name == img["file_name"] |
|
|
|
|
|
if width != img["width"] or height != img["height"]: |
|
|
print("before exif correction: ", img) |
|
|
img["width"], img["height"] = width, height |
|
|
print("after exif correction: ", img) |
|
|
|
|
|
new_images.append(img) |
|
|
images = new_images |
|
|
|
|
|
oi["images"] = images |
|
|
print(f"{len(images)} images keep") |
|
|
|
|
|
|
|
|
print("converting annotations ...") |
|
|
annotations = json_data["annotations"] |
|
|
print(f"{len(annotations)} annotations get") |
|
|
|
|
|
annotations = [ann for ann in annotations if ann["image_id"] not in rm_image_ids] |
|
|
if subset == "minival": |
|
|
keep_image_ids = [img["id"] for img in images] |
|
|
annotations = [ann for ann in annotations if ann["image_id"] in keep_image_ids] |
|
|
|
|
|
oi["annotations"] = annotations |
|
|
print(f"{len(annotations)} annotations keep") |
|
|
|
|
|
|
|
|
json_path = os.path.join(root_dir, "annotations/", "objects365_{}.json".format(subset)) |
|
|
print("writing output to {}".format(json_path)) |
|
|
json.dump(oi, open(json_path, "w")) |
|
|
print("Done") |
|
|
|