SceneGraphNet / src /prepare_images.py
Kalp Kanungo
Initial commit - Multimodal AI project
c858478
raw
history blame contribute delete
755 Bytes
import json
import os
INPUT_PATH = "data/relationship_dataset/subset.json"
OUTPUT_PATH = "data/relationship_dataset/image_paths.json"
IMAGE_DIR_1 = "data/visual_genome/images/VG_100K"
IMAGE_DIR_2 = "data/visual_genome/images2/VG_100K_2"
with open(INPUT_PATH) as f:
data = json.load(f)
image_ids = set([item["image_id"] for item in data])
image_map = {}
for img_id in image_ids:
filename = f"{img_id}.jpg"
path1 = os.path.join(IMAGE_DIR_1, filename)
path2 = os.path.join(IMAGE_DIR_2, filename)
if os.path.exists(path1):
image_map[img_id] = path1
elif os.path.exists(path2):
image_map[img_id] = path2
with open(OUTPUT_PATH, "w") as f:
json.dump(image_map, f)
print("Total images found:", len(image_map))