|
|
|
|
|
""" |
|
|
Preprocess scenes by sorting images and generating image/video collections. |
|
|
|
|
|
This script processes scenes in parallel using a thread pool, updating metadata |
|
|
with sorted images, trajectories, intrinsics, and generating pair, image collection, |
|
|
and video collection data. The processed metadata is saved to a new file in each scene directory. |
|
|
|
|
|
Usage: |
|
|
python generate_set_arkitscenes.py --root /path/to/data --splits Training Test --max_interval 5.0 --num_workers 8 |
|
|
""" |
|
|
|
|
|
import os |
|
|
import os.path as osp |
|
|
import argparse |
|
|
import numpy as np |
|
|
from concurrent.futures import ThreadPoolExecutor, as_completed |
|
|
from tqdm import tqdm |
|
|
|
|
|
|
|
|
def get_timestamp(img_name): |
|
|
""" |
|
|
Extract the timestamp from an image filename. |
|
|
Assumes the timestamp is the last underscore-separated token in the name (before the file extension). |
|
|
|
|
|
Args: |
|
|
img_name (str): The image filename. |
|
|
|
|
|
Returns: |
|
|
float: The extracted timestamp. |
|
|
""" |
|
|
return float(img_name[:-4].split("_")[-1]) |
|
|
|
|
|
|
|
|
def process_scene(root, split, scene, max_interval): |
|
|
""" |
|
|
Process a single scene by sorting its images by timestamp, updating trajectories, |
|
|
intrinsics, and pairings, and generating image/video collections. |
|
|
|
|
|
Args: |
|
|
root (str): Root directory of the dataset. |
|
|
split (str): The dataset split (e.g., 'Training', 'Test'). |
|
|
scene (str): The scene identifier. |
|
|
max_interval (float): Maximum allowed time interval (in seconds) between images to consider them in the same video collection. |
|
|
""" |
|
|
scene_dir = osp.join(root, split, scene) |
|
|
metadata_path = osp.join(scene_dir, "scene_metadata.npz") |
|
|
|
|
|
|
|
|
with np.load(metadata_path) as data: |
|
|
images = data["images"] |
|
|
trajectories = data["trajectories"] |
|
|
intrinsics = data["intrinsics"] |
|
|
pairs = data["pairs"] |
|
|
|
|
|
|
|
|
imgs_with_indices = sorted(enumerate(images), key=lambda x: x[1]) |
|
|
indices, images = zip(*imgs_with_indices) |
|
|
indices = np.array(indices) |
|
|
index2sorted = {index: i for i, index in enumerate(indices)} |
|
|
|
|
|
|
|
|
trajectories = trajectories[indices] |
|
|
intrinsics = intrinsics[indices] |
|
|
|
|
|
|
|
|
pairs = [(index2sorted[id1], index2sorted[id2], score) for id1, id2, score in pairs] |
|
|
|
|
|
|
|
|
image_collection = {} |
|
|
for id1, id2, score in pairs: |
|
|
image_collection.setdefault(id1, []).append((id2, score)) |
|
|
|
|
|
|
|
|
video_collection = {} |
|
|
for i, image in enumerate(images): |
|
|
j = i + 1 |
|
|
for j in range(i + 1, len(images)): |
|
|
if get_timestamp(images[j]) - get_timestamp(image) > max_interval: |
|
|
break |
|
|
video_collection[i] = list(range(i + 1, j)) |
|
|
|
|
|
|
|
|
output_path = osp.join(scene_dir, "new_scene_metadata.npz") |
|
|
np.savez( |
|
|
output_path, |
|
|
images=images, |
|
|
trajectories=trajectories, |
|
|
intrinsics=intrinsics, |
|
|
pairs=pairs, |
|
|
image_collection=image_collection, |
|
|
video_collection=video_collection, |
|
|
) |
|
|
print(f"Processed scene: {scene}") |
|
|
|
|
|
|
|
|
def main(args): |
|
|
""" |
|
|
Main function to process scenes across specified dataset splits in parallel. |
|
|
""" |
|
|
root = args.root |
|
|
splits = args.splits |
|
|
max_interval = args.max_interval |
|
|
num_workers = args.num_workers |
|
|
|
|
|
futures = [] |
|
|
|
|
|
|
|
|
with ThreadPoolExecutor(max_workers=num_workers) as executor: |
|
|
for split in splits: |
|
|
all_meta_path = osp.join(root, split, "all_metadata.npz") |
|
|
with np.load(all_meta_path) as data: |
|
|
scenes = data["scenes"] |
|
|
|
|
|
|
|
|
for scene in scenes: |
|
|
futures.append( |
|
|
executor.submit(process_scene, root, split, scene, max_interval) |
|
|
) |
|
|
|
|
|
|
|
|
for future in tqdm( |
|
|
as_completed(futures), total=len(futures), desc="Processing scenes" |
|
|
): |
|
|
|
|
|
future.result() |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
parser = argparse.ArgumentParser( |
|
|
description="Preprocess scene data to update metadata with sorted images and collections." |
|
|
) |
|
|
parser.add_argument( |
|
|
"--root", |
|
|
type=str, |
|
|
default="", |
|
|
help="Root directory containing the dataset splits.", |
|
|
) |
|
|
parser.add_argument( |
|
|
"--splits", |
|
|
type=str, |
|
|
nargs="+", |
|
|
default=["Training", "Test"], |
|
|
help="List of dataset splits to process (e.g., Training Test).", |
|
|
) |
|
|
parser.add_argument( |
|
|
"--max_interval", |
|
|
type=float, |
|
|
default=5.0, |
|
|
help="Maximum time interval (in seconds) between images to consider them in the same video sequence.", |
|
|
) |
|
|
parser.add_argument( |
|
|
"--num_workers", |
|
|
type=int, |
|
|
default=8, |
|
|
help="Number of worker threads for parallel processing.", |
|
|
) |
|
|
args = parser.parse_args() |
|
|
main(args) |
|
|
|