#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ ScanNet Dataset Checker 用于检查数据集中是否存在点数为0或负数的无效场景。 Author: Assistant """ import os import numpy as np import argparse def check_scene(scene_path): """ 检查单个场景的数据。 Args: scene_path (str): 场景目录的完整路径,例如 ".../train/scene0007_00" Returns: tuple: (is_valid, point_count) is_valid (bool): 场景是否有效 (点数 > 0) point_count (int): 点的数量 """ coord_file = os.path.join(scene_path, "coord.npy") if not os.path.exists(coord_file): print(f"❌ [MISSING] coord.npy not found: {scene_path}") return False, 0 try: coord = np.load(coord_file) point_count = coord.shape[0] if point_count <= 0: print(f"❌ [INVALID] Point count <= 0: {scene_path} (Count: {point_count})") return False, point_count else: print(f"✅ [OK] {scene_path} (Count: {point_count})") # 如果想看所有正常场景,取消注释此行 return True, point_count except Exception as e: print(f"❌ [ERROR] Failed to load {coord_file}: {e}") return False, 0 def check_split_file(split_file_path, data_root): """ 检查一个分割文件(如 clean_train.txt)中列出的所有场景。 Args: split_file_path (str): 分割文件的路径。 data_root (str): 数据根目录,分割文件中的场景名会拼接到此目录下。 """ print(f"\n🔍 Checking split file: {split_file_path}") print(f"📁 Data root: {data_root}\n") if not os.path.exists(split_file_path): print(f"❌ Split file not found: {split_file_path}") return with open(split_file_path, 'r') as f: scene_names = [line.strip() for line in f.readlines() if line.strip()] invalid_scenes = [] total_scenes = len(scene_names) for i, scene_name in enumerate(scene_names, 1): scene_path = os.path.join(data_root, scene_name) is_valid, point_count = check_scene(scene_path) if not is_valid: invalid_scenes.append({ 'name': scene_name, 'path': scene_path, 'count': point_count }) # 每检查10个场景打印一次进度 if i % 10 == 0 or i == total_scenes: print(f" Progress: {i}/{total_scenes}") print(f"\n📊 Summary for {os.path.basename(split_file_path)}:") print(f" Total Scenes: {total_scenes}") print(f" Invalid Scenes: {len(invalid_scenes)}") if invalid_scenes: print(f"\n❗ Found {len(invalid_scenes)} INVALID scenes:") for scene in invalid_scenes: print(f" - {scene['path']} (Point Count: {scene['count']})") else: print(" ✅ All scenes are valid.") def main(): parser = argparse.ArgumentParser(description="Check ScanNet dataset for invalid scenes with zero or negative points.") parser.add_argument("--train_split", type=str, required=True, help="Path to the train split file (e.g., clean_train.txt)") parser.add_argument("--val_split", type=str, required=True, help="Path to the validation split file (e.g., clean_val.txt)") parser.add_argument("--train_root", type=str, required=True, help="Root directory for training data (e.g., .../processed/train)") parser.add_argument("--val_root", type=str, required=True, help="Root directory for validation data (e.g., .../processed/val)") args = parser.parse_args() print("============================================") print(" ScanNet Dataset Integrity Checker") print("============================================") # 检查训练集 check_split_file(args.train_split, args.train_root) # 检查验证集 check_split_file(args.val_split, args.val_root) print("\n============================================") print(" Check Finished") print("============================================") if __name__ == "__main__": main()