Spaces:
Running
Running
| import json | |
| import os | |
| INPUT_PATH = "data/relationship_dataset/subset.json" | |
| OUTPUT_PATH = "data/relationship_dataset/image_paths.json" | |
| IMAGE_DIR_1 = "data/visual_genome/images/VG_100K" | |
| IMAGE_DIR_2 = "data/visual_genome/images2/VG_100K_2" | |
| with open(INPUT_PATH) as f: | |
| data = json.load(f) | |
| image_ids = set([item["image_id"] for item in data]) | |
| image_map = {} | |
| for img_id in image_ids: | |
| filename = f"{img_id}.jpg" | |
| path1 = os.path.join(IMAGE_DIR_1, filename) | |
| path2 = os.path.join(IMAGE_DIR_2, filename) | |
| if os.path.exists(path1): | |
| image_map[img_id] = path1 | |
| elif os.path.exists(path2): | |
| image_map[img_id] = path2 | |
| with open(OUTPUT_PATH, "w") as f: | |
| json.dump(image_map, f) | |
| print("Total images found:", len(image_map)) |