biptv3 / code /pointcept_framework /check_scannet_data.py
YYYYYYUUU's picture
Add core reproduction code (binarization layers, PTv3, superpoint ops, min-repro pack)
7b95dc2 verified
Raw
History Blame Contribute Delete
4.15 kB
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
ScanNet Dataset Checker
用于检查数据集中是否存在点数为0或负数的无效场景。
Author: Assistant
"""
import os
import numpy as np
import argparse
def check_scene(scene_path):
"""
检查单个场景的数据。
Args:
scene_path (str): 场景目录的完整路径,例如 ".../train/scene0007_00"
Returns:
tuple: (is_valid, point_count)
is_valid (bool): 场景是否有效 (点数 > 0)
point_count (int): 点的数量
"""
coord_file = os.path.join(scene_path, "coord.npy")
if not os.path.exists(coord_file):
print(f"❌ [MISSING] coord.npy not found: {scene_path}")
return False, 0
try:
coord = np.load(coord_file)
point_count = coord.shape[0]
if point_count <= 0:
print(f"❌ [INVALID] Point count <= 0: {scene_path} (Count: {point_count})")
return False, point_count
else:
print(f"✅ [OK] {scene_path} (Count: {point_count})") # 如果想看所有正常场景,取消注释此行
return True, point_count
except Exception as e:
print(f"❌ [ERROR] Failed to load {coord_file}: {e}")
return False, 0
def check_split_file(split_file_path, data_root):
"""
检查一个分割文件(如 clean_train.txt)中列出的所有场景。
Args:
split_file_path (str): 分割文件的路径。
data_root (str): 数据根目录,分割文件中的场景名会拼接到此目录下。
"""
print(f"\n🔍 Checking split file: {split_file_path}")
print(f"📁 Data root: {data_root}\n")
if not os.path.exists(split_file_path):
print(f"❌ Split file not found: {split_file_path}")
return
with open(split_file_path, 'r') as f:
scene_names = [line.strip() for line in f.readlines() if line.strip()]
invalid_scenes = []
total_scenes = len(scene_names)
for i, scene_name in enumerate(scene_names, 1):
scene_path = os.path.join(data_root, scene_name)
is_valid, point_count = check_scene(scene_path)
if not is_valid:
invalid_scenes.append({
'name': scene_name,
'path': scene_path,
'count': point_count
})
# 每检查10个场景打印一次进度
if i % 10 == 0 or i == total_scenes:
print(f" Progress: {i}/{total_scenes}")
print(f"\n📊 Summary for {os.path.basename(split_file_path)}:")
print(f" Total Scenes: {total_scenes}")
print(f" Invalid Scenes: {len(invalid_scenes)}")
if invalid_scenes:
print(f"\n❗ Found {len(invalid_scenes)} INVALID scenes:")
for scene in invalid_scenes:
print(f" - {scene['path']} (Point Count: {scene['count']})")
else:
print(" ✅ All scenes are valid.")
def main():
parser = argparse.ArgumentParser(description="Check ScanNet dataset for invalid scenes with zero or negative points.")
parser.add_argument("--train_split", type=str, required=True, help="Path to the train split file (e.g., clean_train.txt)")
parser.add_argument("--val_split", type=str, required=True, help="Path to the validation split file (e.g., clean_val.txt)")
parser.add_argument("--train_root", type=str, required=True, help="Root directory for training data (e.g., .../processed/train)")
parser.add_argument("--val_root", type=str, required=True, help="Root directory for validation data (e.g., .../processed/val)")
args = parser.parse_args()
print("============================================")
print(" ScanNet Dataset Integrity Checker")
print("============================================")
# 检查训练集
check_split_file(args.train_split, args.train_root)
# 检查验证集
check_split_file(args.val_split, args.val_root)
print("\n============================================")
print(" Check Finished")
print("============================================")
if __name__ == "__main__":
main()