biptv3 / code /pointcept_framework /check_scannet_data.py

Add core reproduction code (binarization layers, PTv3, superpoint ops, min-repro pack)

7b95dc2 verified 14 days ago

4.15 kB

	#!/usr/bin/env python3
	# -- coding: utf-8 --

	"""
	ScanNet Dataset Checker
	用于检查数据集中是否存在点数为0或负数的无效场景。
	Author: Assistant
	"""

	import os
	import numpy as np
	import argparse

	def check_scene(scene_path):
	"""
	检查单个场景的数据。
	Args:
	scene_path (str): 场景目录的完整路径，例如 ".../train/scene0007_00"
	Returns:
	tuple: (is_valid, point_count)
	is_valid (bool): 场景是否有效 (点数 > 0)
	point_count (int): 点的数量
	"""
	coord_file = os.path.join(scene_path, "coord.npy")

	if not os.path.exists(coord_file):
	print(f"❌ [MISSING] coord.npy not found: {scene_path}")
	return False, 0

	try:
	coord = np.load(coord_file)
	point_count = coord.shape[0]

	if point_count <= 0:
	print(f"❌ [INVALID] Point count <= 0: {scene_path} (Count: {point_count})")
	return False, point_count
	else:
	print(f"✅ [OK] {scene_path} (Count: {point_count})") # 如果想看所有正常场景，取消注释此行
	return True, point_count

	except Exception as e:
	print(f"❌ [ERROR] Failed to load {coord_file}: {e}")
	return False, 0

	def check_split_file(split_file_path, data_root):
	"""
	检查一个分割文件（如 clean_train.txt）中列出的所有场景。
	Args:
	split_file_path (str): 分割文件的路径。
	data_root (str): 数据根目录，分割文件中的场景名会拼接到此目录下。
	"""
	print(f"\n🔍 Checking split file: {split_file_path}")
	print(f"📁 Data root: {data_root}\n")

	if not os.path.exists(split_file_path):
	print(f"❌ Split file not found: {split_file_path}")
	return

	with open(split_file_path, 'r') as f:
	scene_names = [line.strip() for line in f.readlines() if line.strip()]

	invalid_scenes = []
	total_scenes = len(scene_names)

	for i, scene_name in enumerate(scene_names, 1):
	scene_path = os.path.join(data_root, scene_name)
	is_valid, point_count = check_scene(scene_path)

	if not is_valid:
	invalid_scenes.append({
	'name': scene_name,
	'path': scene_path,
	'count': point_count
	})

	# 每检查10个场景打印一次进度
	if i % 10 == 0 or i == total_scenes:
	print(f" Progress: {i}/{total_scenes}")

	print(f"\n📊 Summary for {os.path.basename(split_file_path)}:")
	print(f" Total Scenes: {total_scenes}")
	print(f" Invalid Scenes: {len(invalid_scenes)}")

	if invalid_scenes:
	print(f"\n❗ Found {len(invalid_scenes)} INVALID scenes:")
	for scene in invalid_scenes:
	print(f" - {scene['path']} (Point Count: {scene['count']})")
	else:
	print(" ✅ All scenes are valid.")

	def main():
	parser = argparse.ArgumentParser(description="Check ScanNet dataset for invalid scenes with zero or negative points.")
	parser.add_argument("--train_split", type=str, required=True, help="Path to the train split file (e.g., clean_train.txt)")
	parser.add_argument("--val_split", type=str, required=True, help="Path to the validation split file (e.g., clean_val.txt)")
	parser.add_argument("--train_root", type=str, required=True, help="Root directory for training data (e.g., .../processed/train)")
	parser.add_argument("--val_root", type=str, required=True, help="Root directory for validation data (e.g., .../processed/val)")

	args = parser.parse_args()

	print("============================================")
	print(" ScanNet Dataset Integrity Checker")
	print("============================================")

	# 检查训练集
	check_split_file(args.train_split, args.train_root)

	# 检查验证集
	check_split_file(args.val_split, args.val_root)

	print("\n============================================")
	print(" Check Finished")
	print("============================================")

	if __name__ == "__main__":
	main()