| |
| |
|
|
| """ |
| ScanNet Dataset Checker |
| 用于检查数据集中是否存在点数为0或负数的无效场景。 |
| Author: Assistant |
| """ |
|
|
| import os |
| import numpy as np |
| import argparse |
|
|
| def check_scene(scene_path): |
| """ |
| 检查单个场景的数据。 |
| Args: |
| scene_path (str): 场景目录的完整路径,例如 ".../train/scene0007_00" |
| Returns: |
| tuple: (is_valid, point_count) |
| is_valid (bool): 场景是否有效 (点数 > 0) |
| point_count (int): 点的数量 |
| """ |
| coord_file = os.path.join(scene_path, "coord.npy") |
| |
| if not os.path.exists(coord_file): |
| print(f"❌ [MISSING] coord.npy not found: {scene_path}") |
| return False, 0 |
|
|
| try: |
| coord = np.load(coord_file) |
| point_count = coord.shape[0] |
| |
| if point_count <= 0: |
| print(f"❌ [INVALID] Point count <= 0: {scene_path} (Count: {point_count})") |
| return False, point_count |
| else: |
| print(f"✅ [OK] {scene_path} (Count: {point_count})") |
| return True, point_count |
|
|
| except Exception as e: |
| print(f"❌ [ERROR] Failed to load {coord_file}: {e}") |
| return False, 0 |
|
|
| def check_split_file(split_file_path, data_root): |
| """ |
| 检查一个分割文件(如 clean_train.txt)中列出的所有场景。 |
| Args: |
| split_file_path (str): 分割文件的路径。 |
| data_root (str): 数据根目录,分割文件中的场景名会拼接到此目录下。 |
| """ |
| print(f"\n🔍 Checking split file: {split_file_path}") |
| print(f"📁 Data root: {data_root}\n") |
|
|
| if not os.path.exists(split_file_path): |
| print(f"❌ Split file not found: {split_file_path}") |
| return |
|
|
| with open(split_file_path, 'r') as f: |
| scene_names = [line.strip() for line in f.readlines() if line.strip()] |
|
|
| invalid_scenes = [] |
| total_scenes = len(scene_names) |
|
|
| for i, scene_name in enumerate(scene_names, 1): |
| scene_path = os.path.join(data_root, scene_name) |
| is_valid, point_count = check_scene(scene_path) |
| |
| if not is_valid: |
| invalid_scenes.append({ |
| 'name': scene_name, |
| 'path': scene_path, |
| 'count': point_count |
| }) |
|
|
| |
| if i % 10 == 0 or i == total_scenes: |
| print(f" Progress: {i}/{total_scenes}") |
|
|
| print(f"\n📊 Summary for {os.path.basename(split_file_path)}:") |
| print(f" Total Scenes: {total_scenes}") |
| print(f" Invalid Scenes: {len(invalid_scenes)}") |
|
|
| if invalid_scenes: |
| print(f"\n❗ Found {len(invalid_scenes)} INVALID scenes:") |
| for scene in invalid_scenes: |
| print(f" - {scene['path']} (Point Count: {scene['count']})") |
| else: |
| print(" ✅ All scenes are valid.") |
|
|
| def main(): |
| parser = argparse.ArgumentParser(description="Check ScanNet dataset for invalid scenes with zero or negative points.") |
| parser.add_argument("--train_split", type=str, required=True, help="Path to the train split file (e.g., clean_train.txt)") |
| parser.add_argument("--val_split", type=str, required=True, help="Path to the validation split file (e.g., clean_val.txt)") |
| parser.add_argument("--train_root", type=str, required=True, help="Root directory for training data (e.g., .../processed/train)") |
| parser.add_argument("--val_root", type=str, required=True, help="Root directory for validation data (e.g., .../processed/val)") |
|
|
| args = parser.parse_args() |
|
|
| print("============================================") |
| print(" ScanNet Dataset Integrity Checker") |
| print("============================================") |
|
|
| |
| check_split_file(args.train_split, args.train_root) |
|
|
| |
| check_split_file(args.val_split, args.val_root) |
|
|
| print("\n============================================") |
| print(" Check Finished") |
| print("============================================") |
|
|
| if __name__ == "__main__": |
| main() |