Spaces:

bulatko
/

zoo3d

Paused

File size: 8,448 Bytes

55e58d1

import numpy as np
import os
import cv2
from pathlib import Path
import trimesh as tm
from sklearn.neighbors import KDTree

def unproject_depth_to_world(depth_x, depth_y, depths, intrinsics, extrinsics):
    """
    Правильная функция для обратной проекции пикселей в мировые координаты
    
    Args:
        depth_x, depth_y: координаты пикселей
        depths: значения глубины в метрах
        intrinsics: внутренние параметры камеры (3x3)
        extrinsics: внешние параметры камеры (4x4, world-to-camera)
    
    Returns:
        points: мировые координаты точек (Nx3)
    """
    # 1. Создаем однородные координаты пикселей
    pixel_coords = np.vstack([depth_x, depth_y, np.ones(len(depth_x))])
    
    # 2. Обратная проекция в координаты камеры
    # K^-1 * [u, v, 1]^T дает нормализованные координаты
    camera_rays = np.linalg.inv(intrinsics) @ pixel_coords
    
    # 3. Масштабируем на глубину для получения 3D точек в системе камеры
    camera_points = camera_rays * depths[np.newaxis, :]
    
    # 4. Добавляем однородную координату
    camera_points_homogeneous = np.vstack([camera_points, np.ones(len(depth_x))])
    
    # 5. Преобразуем в мировые координаты
    # Extrinsics - это world-to-camera, нам нужна обратная матрица
    world_points_homogeneous = np.linalg.inv(extrinsics) @ camera_points_homogeneous
    
    # 6. Нормализуем однородные координаты
    world_points = world_points_homogeneous[:3, :] / world_points_homogeneous[3, :]
    
    return world_points.T

def process_mask_with_morphology(mask, kernel_size=5, 
                                post_process_erosion=True,
                                post_process_dilation=True,
                                post_process_component=True,
                                post_process_component_num=1):
    """
    Обработка маски с морфологическими операциями
    """
    img = np.uint8(mask) * 255
    
    # Определяем ядро для морфологических операций
    kernel = np.ones((kernel_size * 2 + 1, kernel_size * 2 + 1), np.uint8)
    
    # Применяем эрозию
    if post_process_erosion:
        img = cv2.erode(img, kernel, iterations=1)
    
    # Применяем дилатацию
    if post_process_dilation:
        img = cv2.dilate(img, kernel, iterations=1)
    
    # Находим связанные компоненты
    num_labels, labels_im = cv2.connectedComponents(img)
    
    if post_process_component and num_labels > 1:
        # Вычисляем площадь каждой компоненты и сортируем
        component_areas = [
            (label, np.sum(labels_im == label)) for label in range(1, num_labels)
        ]
        component_areas.sort(key=lambda x: x[1], reverse=True)
        largest_components = [
            x[0] for x in component_areas[:post_process_component_num]
        ]
        img = np.isin(labels_im, largest_components).astype(np.uint8)
    
    return img.astype(bool)

if __name__ == "__main__":
    scene_id = "scene0011_00"
    path = Path(f"/home/jovyan/users/bulat/workspace/3drec/Indoor/MaskClustering/data/prediction/scannet/test/{scene_id}.npz")
    base_path = Path(f"/home/jovyan/users/lemeshko/scripts/gsam_result/yolo/{scene_id}")
    source_path = Path(f"/home/jovyan/users/kolodiazhnyi/data/scannet/posed_images/{scene_id}")
    scan_path = Path(f"data/scannet/processed/{scene_id}/{scene_id}_vh_clean_2.ply")
    info_path = base_path / "infos.npy"
    
    # Проверяем существование файлов
    if not scan_path.exists():
        raise FileNotFoundError(f"Mesh file not found: {scan_path}")
    if not info_path.exists():
        raise FileNotFoundError(f"Info file not found: {info_path}")
    
    # Загружаем данные
    mesh = tm.load(scan_path)
    vertices = mesh.vertices
    data = np.load(path, allow_pickle=True)
    info_data = np.load(info_path, allow_pickle=True).item()
    
    key, item = next(iter(info_data.items()))
    print(f"Processing object {key}")
    print(f"Object info: {item}")
    
    frames = item['frames']
    intrinsics = np.loadtxt(source_path / 'intrinsic.txt')
    
    frame = frames[0]
    frame_id = str(frame['frame_id']).zfill(5)
    mask_path = frame['mask_path']
    mask_path = base_path / mask_path
    
    # Проверяем существование файлов кадра
    depth_file = source_path / f'{frame_id}.png'
    extrinsics_file = source_path / f'{frame_id}.txt'
    
    if not depth_file.exists():
        raise FileNotFoundError(f"Depth file not found: {depth_file}")
    if not extrinsics_file.exists():
        raise FileNotFoundError(f"Extrinsics file not found: {extrinsics_file}")
    
    # Загружаем маску, глубину и экстринсики
    mask = np.load(mask_path, allow_pickle=True)
    mask = mask == key
    depth = cv2.imread(str(depth_file), -1) / 1000.0  # Конвертируем в метры
    extrinsics = np.loadtxt(extrinsics_file)
    
    print(f"Original mask shape: {mask.shape}")
    print(f"Depth shape: {depth.shape}")
    print(f"Mask pixels count: {mask.sum()}")
    
    # Обрабатываем маску морфологическими операциями
    processed_mask = process_mask_with_morphology(
        mask,
        kernel_size=5,
        post_process_erosion=True,
        post_process_dilation=True,
        post_process_component=True,
        post_process_component_num=1
    )
    
    # Изменяем размер маски под размер карты глубины
    final_mask = cv2.resize(
        processed_mask.astype(np.uint8), 
        depth.shape[::-1], 
        interpolation=cv2.INTER_NEAREST_EXACT
    ).astype(bool)
    
    print(f"Final mask shape: {final_mask.shape}")
    print(f"Final mask pixels count: {final_mask.sum()}")
    
    # Находим координаты пикселей с маской
    depth_y, depth_x = np.where(final_mask)
    depths = depth[final_mask]
    
    # Фильтруем точки с недействительной глубиной
    valid_depth = (depths > 0) & (depths < 10.0)  # Разумные пределы глубины
    depth_x = depth_x[valid_depth]
    depth_y = depth_y[valid_depth]
    depths = depths[valid_depth]
    
    print(f"Valid depth points: {len(depths)}")
    
    if len(depths) == 0:
        print("No valid depth points found!")
    else:
        # ИСПРАВЛЕННАЯ проекция в мировые координаты
        world_points = unproject_depth_to_world(depth_x, depth_y, depths, intrinsics, extrinsics)
        
        print(f"World points shape: {world_points.shape}")
        print(f"World points range:")
        print(f"  X: [{world_points[:, 0].min():.3f}, {world_points[:, 0].max():.3f}]")
        print(f"  Y: [{world_points[:, 1].min():.3f}, {world_points[:, 1].max():.3f}]")
        print(f"  Z: [{world_points[:, 2].min():.3f}, {world_points[:, 2].max():.3f}]")
        
        # Находим ближайшие вершины с ограничением по расстоянию
        tree = KDTree(vertices)
        distances, indices = tree.query(world_points, k=1)
        
        # Фильтруем по максимальному расстоянию (например, 0.05 метра)
        max_distance = 0.05
        valid_matches = distances.flatten() < max_distance
        
        print(f"Points within {max_distance}m: {valid_matches.sum()}/{len(valid_matches)}")
        
        # Создаем маску точек
        point_mask = np.zeros(len(vertices), dtype=bool)
        if valid_matches.sum() > 0:
            point_mask[indices.flatten()[valid_matches]] = True
        
        print(f"Final point mask sum: {point_mask.sum()}")
        print(f"Mesh vertices total: {len(vertices)}")
        print(f"Coverage: {point_mask.sum()/len(vertices)*100:.2f}%")