anas
Initial deployment of Raster2Seq floor plan vectorization API
fadb92b
import glob
import json
import os
import shutil
from pathlib import Path
def combine_json_files(input_pattern, data_path, split_type, output_file, output_image_dir, start_image_id=0):
"""
Combines multiple COCO-style JSON annotation files into a single file.
Args:
input_pattern: Glob pattern to match the input JSON files (e.g., "annotations/*.json")
output_file: Path to the output combined JSON file
"""
os.makedirs(output_image_dir, exist_ok=True)
# Initialize combined data structure
combined_data = {"images": [], "annotations": [], "categories": []}
# Track image and annotation IDs to avoid duplicates
annotation_ids_seen = set()
next_image_id = start_image_id
next_annotation_id = 0
skip_file_list = []
image_id_mapping = {}
# Find all matching JSON files
json_files = sorted(glob.glob(input_pattern))
print(f"Found {len(json_files)} JSON files to combine")
# Process each file
for i, json_file in enumerate(json_files):
print(f"Processing file {i + 1}/{len(json_files)}: {json_file}")
with open(json_file, "r") as f:
data = json.load(f)
# Store categories from the first file
if i == 0 and data.get("categories"):
combined_data["categories"] = data["categories"]
# empty annos
if len(data["annotations"]) == 0:
skip_file_list.append(data["images"][0]["id"])
continue
# Process images
for image in data.get("images", []):
if image["id"] not in image_id_mapping:
image_id_mapping[image["id"]] = next_image_id
else:
skip_file_list.append(image["id"])
continue
image["id"] = next_image_id
next_image_id += 1
# org_file_name = copy(image['file_name'])
image["file_name"] = str(image["id"]).zfill(6) + ".png"
org_file_name = os.path.basename(json_file).replace(".json", ".png")
if image["file_name"] != org_file_name and os.path.exists(f"{data_path}/{split_type}/{org_file_name}"):
shutil.copy(f"{data_path}/{split_type}/{org_file_name}", f"{output_image_dir}/{image['file_name']}")
combined_data["images"].append(image)
# Process annotations
for annotation in data.get("annotations", []):
annotation["id"] = next_annotation_id
next_annotation_id += 1
annotation["image_id"] = image_id_mapping[annotation["image_id"]]
annotation_ids_seen.add(annotation["id"])
combined_data["annotations"].append(annotation)
# Write combined data to output file
output_path = Path(output_file)
output_path.parent.mkdir(exist_ok=True, parents=True)
with open(output_file, "w") as f:
json.dump(combined_data, f, indent=2)
with open(output_path.parent / f"{output_path.name.split('.')[0]}_image_id_mapping.json", "w") as f:
json.dump(image_id_mapping, f, indent=2)
if len(skip_file_list):
with open(output_path.parent / f"{output_path.name.split('.')[0]}_skipped.txt", "w") as f:
f.write("\n".join([str(x) for x in skip_file_list]))
print(f"Combined data written to {output_file}")
print(f"Total images: {len(combined_data['images'])}")
print(f"Total annotations: {len(combined_data['annotations'])}")
print(f"Total categories: {len(combined_data['categories'])}")
print(f"Skipped images: {len(skip_file_list)}")
image_id_mapping_list = [[f"{k} {v}"] for k, v in image_id_mapping.items()] # Reverse mapping for easier lookup
return combined_data, image_id_mapping_list
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Combine multiple COCO-style JSON annotation files")
parser.add_argument("--input", required=True, help="Glob pattern for input JSON files, e.g., 'annotations/*.json'")
parser.add_argument("--output", required=True, help="Output JSON file path")
args = parser.parse_args()
splits = ["train", "val", "test"]
for i, split in enumerate(splits):
if split == "train":
start_image_id = 0
else:
start_image_id += len(list(Path(f"{args.input}/{splits[i - 1]}").glob("*.png")))
_, image_id_mapping_list = combine_json_files(
f"{args.input}/{split}_jsons/*.json",
args.input,
split,
f"{args.output}/annotations/{split}.json",
output_image_dir=f"{args.output}/{split}",
start_image_id=start_image_id,
)