File size: 4,154 Bytes

7b95dc2

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
ScanNet Dataset Checker
用于检查数据集中是否存在点数为0或负数的无效场景。
Author: Assistant
"""

import os
import numpy as np
import argparse

def check_scene(scene_path):
    """
    检查单个场景的数据。
    Args:
        scene_path (str): 场景目录的完整路径，例如 ".../train/scene0007_00"
    Returns:
        tuple: (is_valid, point_count)
            is_valid (bool): 场景是否有效 (点数 > 0)
            point_count (int): 点的数量
    """
    coord_file = os.path.join(scene_path, "coord.npy")
    
    if not os.path.exists(coord_file):
        print(f"❌ [MISSING] coord.npy not found: {scene_path}")
        return False, 0

    try:
        coord = np.load(coord_file)
        point_count = coord.shape[0]
        
        if point_count <= 0:
            print(f"❌ [INVALID] Point count <= 0: {scene_path} (Count: {point_count})")
            return False, point_count
        else:
            print(f"✅ [OK] {scene_path} (Count: {point_count})") # 如果想看所有正常场景，取消注释此行
            return True, point_count

    except Exception as e:
        print(f"❌ [ERROR] Failed to load {coord_file}: {e}")
        return False, 0

def check_split_file(split_file_path, data_root):
    """
    检查一个分割文件（如 clean_train.txt）中列出的所有场景。
    Args:
        split_file_path (str): 分割文件的路径。
        data_root (str): 数据根目录，分割文件中的场景名会拼接到此目录下。
    """
    print(f"\n🔍 Checking split file: {split_file_path}")
    print(f"📁 Data root: {data_root}\n")

    if not os.path.exists(split_file_path):
        print(f"❌ Split file not found: {split_file_path}")
        return

    with open(split_file_path, 'r') as f:
        scene_names = [line.strip() for line in f.readlines() if line.strip()]

    invalid_scenes = []
    total_scenes = len(scene_names)

    for i, scene_name in enumerate(scene_names, 1):
        scene_path = os.path.join(data_root, scene_name)
        is_valid, point_count = check_scene(scene_path)
        
        if not is_valid:
            invalid_scenes.append({
                'name': scene_name,
                'path': scene_path,
                'count': point_count
            })

        # 每检查10个场景打印一次进度
        if i % 10 == 0 or i == total_scenes:
            print(f"  Progress: {i}/{total_scenes}")

    print(f"\n📊 Summary for {os.path.basename(split_file_path)}:")
    print(f"   Total Scenes: {total_scenes}")
    print(f"   Invalid Scenes: {len(invalid_scenes)}")

    if invalid_scenes:
        print(f"\n❗ Found {len(invalid_scenes)} INVALID scenes:")
        for scene in invalid_scenes:
            print(f"   - {scene['path']} (Point Count: {scene['count']})")
    else:
        print("   ✅ All scenes are valid.")

def main():
    parser = argparse.ArgumentParser(description="Check ScanNet dataset for invalid scenes with zero or negative points.")
    parser.add_argument("--train_split", type=str, required=True, help="Path to the train split file (e.g., clean_train.txt)")
    parser.add_argument("--val_split", type=str, required=True, help="Path to the validation split file (e.g., clean_val.txt)")
    parser.add_argument("--train_root", type=str, required=True, help="Root directory for training data (e.g., .../processed/train)")
    parser.add_argument("--val_root", type=str, required=True, help="Root directory for validation data (e.g., .../processed/val)")

    args = parser.parse_args()

    print("============================================")
    print("      ScanNet Dataset Integrity Checker")
    print("============================================")

    # 检查训练集
    check_split_file(args.train_split, args.train_root)

    # 检查验证集
    check_split_file(args.val_split, args.val_root)

    print("\n============================================")
    print("                Check Finished")
    print("============================================")

if __name__ == "__main__":
    main()