""" This script verifies that the scenes in the chunk files match the scenes in the colmap directory. """ import json import torch from tqdm import tqdm from optgs.misc.io import CustomPath from optgs.scripts.dl3dv_hf_download import validate_sfm_structure if __name__ == '__main__': chunk_dir = CustomPath("datasets/dl3dv-480p-chunks/train") colmap_dir = CustomPath("datasets/dl3dv-colmap-sfm") assert chunk_dir.is_dir(), f"Chunk directory {chunk_dir:link}" assert colmap_dir.is_dir(), f"Colmap directory {colmap_dir:link}" # First check if we have already saved the chunk scene names to a text file chunk_scene_names_file = chunk_dir / "dl3dv_chunk_scenes.txt" if chunk_scene_names_file.is_file(): with chunk_scene_names_file.open("r") as f: chunk_scene_names = set(line.strip() for line in f) print(f"Loaded {len(chunk_scene_names)} scene names from {chunk_scene_names_file}") else: # Collect scene names from chunk files chunk_scene_names = set() for i, chunk_path in tqdm(enumerate(chunk_dir.glob("*.torch"))): chunk = torch.load(chunk_path) for scene in chunk: scene_name = scene["key"] scene_name = scene_name.replace("dl3dv_", "") chunk_scene_names.add(scene_name) if (i + 1) % 10 == 0: print(f"Processed {i + 1} chunk files, collected {len(chunk_scene_names)} unique scene names so far...") print(f"Scenes in chunk files: {len(chunk_scene_names)}") # Save chunk scene names to a text file for reuse with open(chunk_scene_names_file, "w") as f: for scene_name in sorted(chunk_scene_names): f.write(f"{scene_name}\n") # Collect scene names from colmap directory colmap_scene_names = set() unsucc_count = 0 for scene in colmap_dir.iterdir(): # Verify dir structure: should be # scene_name/ # - transforms.json (for now, we don't have this) # - sparse/ # - 0/ # - cameras.bin # - images.bin # - points3D.bin if not validate_sfm_structure(scene, unsucc_count=unsucc_count): unsucc_count += 1 continue # if not scene.is_dir(): # print(f"Warning: {scene:link} is not a directory, skipping...") # continue # # if not (scene / "sparse").is_dir(): # print(f"Warning: {scene:link} does not contain a 'sparse' directory, skipping...") # continue # # if not (scene / "sparse" / "0").is_dir(): # print(f"Warning: {scene:link} does not contain a 'sparse/0' directory, skipping...") # continue # for file in ["cameras.bin", "images.bin", "points3D.bin"]: # if not (scene / "sparse" / "0" / file).is_file(): # print(f"Warning: {scene:link} does not contain a 'sparse/0/{file}' file, skipping...") # continue colmap_scene_names.add(scene.name) # Compare the two sets in_chunk_not_colmap = chunk_scene_names - colmap_scene_names in_colmap_not_chunk = colmap_scene_names - chunk_scene_names print(f"Scenes in chunk but not in colmap: {len(in_chunk_not_colmap)}") for scene_name in sorted(in_chunk_not_colmap): print(f"- {scene_name}") print(f"\nScenes in colmap but not in chunk: {len(in_colmap_not_chunk)}") # for scene_name in sorted(in_colmap_not_chunk): # print(f"- {scene_name}") # Generate index_colmap.json target_train_path = CustomPath("datasets/dl3dv-480p-chunks/train/index_colmap.json") target_test_path = CustomPath("datasets/dl3dv-480p-chunks/test/index_colmap.json") full_train_index_path = CustomPath("datasets/dl3dv-480p-chunks/train/index.json") full_test_index_path = CustomPath("datasets/dl3dv-480p-chunks/test/index.json") # Load the full index files with open(full_train_index_path, "r") as f: full_train_index = json.load(f) # with "dl3dv_" prefix in scene names with open(full_test_index_path, "r") as f: full_test_index = json.load(f) # without "dl3dv_" prefix in scene names # Filter the full index to only include scenes that has colmap data filtered_train_index = {scene_name: data for scene_name, data in full_train_index.items() if scene_name.replace("dl3dv_", "") in colmap_scene_names} filtered_test_index = {scene_name: data for scene_name, data in full_test_index.items() if scene_name in colmap_scene_names} # Save the filtered index files target_train_path.parent.mkdir(parents=True, exist_ok=True) target_test_path.parent.mkdir(parents=True, exist_ok=True) with target_train_path.open("w") as f: json.dump(filtered_train_index, f, indent=4) with target_test_path.open("w") as f: json.dump(filtered_test_index, f, indent=4) print(f"Saved filtered train index with {len(filtered_train_index)} scenes to {target_train_path.resolve()}") print(f"Saved filtered test index with {len(filtered_test_index)} scenes to {target_test_path.resolve()}")