Spaces:

bulatko
/

zoo3d

Paused

App Files Files Community

zoo3d / MaskClustering /proc_masks_fixed.py

bulatko

adding real MK

55e58d1 3 days ago

raw

history blame

8.45 kB

	import numpy as np
	import os
	import cv2
	from pathlib import Path
	import trimesh as tm
	from sklearn.neighbors import KDTree

	def unproject_depth_to_world(depth_x, depth_y, depths, intrinsics, extrinsics):
	"""
	Правильная функция для обратной проекции пикселей в мировые координаты

	Args:
	depth_x, depth_y: координаты пикселей
	depths: значения глубины в метрах
	intrinsics: внутренние параметры камеры (3x3)
	extrinsics: внешние параметры камеры (4x4, world-to-camera)

	Returns:
	points: мировые координаты точек (Nx3)
	"""
	# 1. Создаем однородные координаты пикселей
	pixel_coords = np.vstack([depth_x, depth_y, np.ones(len(depth_x))])

	# 2. Обратная проекция в координаты камеры
	# K^-1 * [u, v, 1]^T дает нормализованные координаты
	camera_rays = np.linalg.inv(intrinsics) @ pixel_coords

	# 3. Масштабируем на глубину для получения 3D точек в системе камеры
	camera_points = camera_rays * depths[np.newaxis, :]

	# 4. Добавляем однородную координату
	camera_points_homogeneous = np.vstack([camera_points, np.ones(len(depth_x))])

	# 5. Преобразуем в мировые координаты
	# Extrinsics - это world-to-camera, нам нужна обратная матрица
	world_points_homogeneous = np.linalg.inv(extrinsics) @ camera_points_homogeneous

	# 6. Нормализуем однородные координаты
	world_points = world_points_homogeneous[:3, :] / world_points_homogeneous[3, :]

	return world_points.T

	def process_mask_with_morphology(mask, kernel_size=5,
	post_process_erosion=True,
	post_process_dilation=True,
	post_process_component=True,
	post_process_component_num=1):
	"""
	Обработка маски с морфологическими операциями
	"""
	img = np.uint8(mask) * 255

	# Определяем ядро для морфологических операций
	kernel = np.ones((kernel_size * 2 + 1, kernel_size * 2 + 1), np.uint8)

	# Применяем эрозию
	if post_process_erosion:
	img = cv2.erode(img, kernel, iterations=1)

	# Применяем дилатацию
	if post_process_dilation:
	img = cv2.dilate(img, kernel, iterations=1)

	# Находим связанные компоненты
	num_labels, labels_im = cv2.connectedComponents(img)

	if post_process_component and num_labels > 1:
	# Вычисляем площадь каждой компоненты и сортируем
	component_areas = [
	(label, np.sum(labels_im == label)) for label in range(1, num_labels)
	]
	component_areas.sort(key=lambda x: x[1], reverse=True)
	largest_components = [
	x[0] for x in component_areas[:post_process_component_num]
	]
	img = np.isin(labels_im, largest_components).astype(np.uint8)

	return img.astype(bool)

	if __name__ == "__main__":
	scene_id = "scene0011_00"
	path = Path(f"/home/jovyan/users/bulat/workspace/3drec/Indoor/MaskClustering/data/prediction/scannet/test/{scene_id}.npz")
	base_path = Path(f"/home/jovyan/users/lemeshko/scripts/gsam_result/yolo/{scene_id}")
	source_path = Path(f"/home/jovyan/users/kolodiazhnyi/data/scannet/posed_images/{scene_id}")
	scan_path = Path(f"data/scannet/processed/{scene_id}/{scene_id}_vh_clean_2.ply")
	info_path = base_path / "infos.npy"

	# Проверяем существование файлов
	if not scan_path.exists():
	raise FileNotFoundError(f"Mesh file not found: {scan_path}")
	if not info_path.exists():
	raise FileNotFoundError(f"Info file not found: {info_path}")

	# Загружаем данные
	mesh = tm.load(scan_path)
	vertices = mesh.vertices
	data = np.load(path, allow_pickle=True)
	info_data = np.load(info_path, allow_pickle=True).item()

	key, item = next(iter(info_data.items()))
	print(f"Processing object {key}")
	print(f"Object info: {item}")

	frames = item['frames']
	intrinsics = np.loadtxt(source_path / 'intrinsic.txt')

	frame = frames[0]
	frame_id = str(frame['frame_id']).zfill(5)
	mask_path = frame['mask_path']
	mask_path = base_path / mask_path

	# Проверяем существование файлов кадра
	depth_file = source_path / f'{frame_id}.png'
	extrinsics_file = source_path / f'{frame_id}.txt'

	if not depth_file.exists():
	raise FileNotFoundError(f"Depth file not found: {depth_file}")
	if not extrinsics_file.exists():
	raise FileNotFoundError(f"Extrinsics file not found: {extrinsics_file}")

	# Загружаем маску, глубину и экстринсики
	mask = np.load(mask_path, allow_pickle=True)
	mask = mask == key
	depth = cv2.imread(str(depth_file), -1) / 1000.0 # Конвертируем в метры
	extrinsics = np.loadtxt(extrinsics_file)

	print(f"Original mask shape: {mask.shape}")
	print(f"Depth shape: {depth.shape}")
	print(f"Mask pixels count: {mask.sum()}")

	# Обрабатываем маску морфологическими операциями
	processed_mask = process_mask_with_morphology(
	mask,
	kernel_size=5,
	post_process_erosion=True,
	post_process_dilation=True,
	post_process_component=True,
	post_process_component_num=1
	)

	# Изменяем размер маски под размер карты глубины
	final_mask = cv2.resize(
	processed_mask.astype(np.uint8),
	depth.shape[::-1],
	interpolation=cv2.INTER_NEAREST_EXACT
	).astype(bool)

	print(f"Final mask shape: {final_mask.shape}")
	print(f"Final mask pixels count: {final_mask.sum()}")

	# Находим координаты пикселей с маской
	depth_y, depth_x = np.where(final_mask)
	depths = depth[final_mask]

	# Фильтруем точки с недействительной глубиной
	valid_depth = (depths > 0) & (depths < 10.0) # Разумные пределы глубины
	depth_x = depth_x[valid_depth]
	depth_y = depth_y[valid_depth]
	depths = depths[valid_depth]

	print(f"Valid depth points: {len(depths)}")

	if len(depths) == 0:
	print("No valid depth points found!")
	else:
	# ИСПРАВЛЕННАЯ проекция в мировые координаты
	world_points = unproject_depth_to_world(depth_x, depth_y, depths, intrinsics, extrinsics)

	print(f"World points shape: {world_points.shape}")
	print(f"World points range:")
	print(f" X: [{world_points[:, 0].min():.3f}, {world_points[:, 0].max():.3f}]")
	print(f" Y: [{world_points[:, 1].min():.3f}, {world_points[:, 1].max():.3f}]")
	print(f" Z: [{world_points[:, 2].min():.3f}, {world_points[:, 2].max():.3f}]")

	# Находим ближайшие вершины с ограничением по расстоянию
	tree = KDTree(vertices)
	distances, indices = tree.query(world_points, k=1)

	# Фильтруем по максимальному расстоянию (например, 0.05 метра)
	max_distance = 0.05
	valid_matches = distances.flatten() < max_distance

	print(f"Points within {max_distance}m: {valid_matches.sum()}/{len(valid_matches)}")

	# Создаем маску точек
	point_mask = np.zeros(len(vertices), dtype=bool)
	if valid_matches.sum() > 0:
	point_mask[indices.flatten()[valid_matches]] = True

	print(f"Final point mask sum: {point_mask.sum()}")
	print(f"Mesh vertices total: {len(vertices)}")
	print(f"Coverage: {point_mask.sum()/len(vertices)*100:.2f}%")