File size: 8,032 Bytes
55e58d1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
import numpy as np
import os
import cv2
from pathlib import Path
import trimesh as tm
from sklearn.neighbors import KDTree
from tqdm import tqdm
from tqdm.contrib.concurrent import thread_map

def process_frame(frame, vertices, intrinsics, source_path, base_path, key):
    frame_id = str(frame['frame_id']).zfill(5)
    mask_path = frame['mask_path']
    mask_path = base_path / mask_path
    mask = np.load(mask_path, allow_pickle=True)
    mask = mask == key
    depth = cv2.imread(source_path / f'{frame_id}.png', cv2.IMREAD_UNCHANGED) / 1000.
    
    extrinsics = np.loadtxt(source_path / f'{frame_id}.txt')

    point_mask = np.zeros(len(vertices), dtype=bool)
    
    
    kernel_size = 3
    post_process_erosion = True
    post_process_dilation = False
    post_process_component = True
    post_process_component_num = 1

    
    img = np.uint8(mask) * 255

    # Define the kernel for morphological operations using cv.getStructuringElement
    # Поддержка различных форм ядер: MORPH_RECT, MORPH_CROSS, MORPH_ELLIPSE
    kernel_shape = cv2.MORPH_ELLIPSE  # Эллиптическая форма для более плавной эрозии
    kernel = cv2.getStructuringElement(kernel_shape, 
                                    (2 * kernel_size + 1, 2 * kernel_size + 1),
                                    (kernel_size, kernel_size))

    # Apply morphological erosion if requested
    if post_process_erosion:
        # Увеличиваем количество итераций эрозии для более сильного уменьшения
        img = cv2.erode(img, kernel, iterations=1)

    # Apply morphological dilation if requested
    if post_process_dilation:
        # Уменьшаем дилатацию, чтобы не компенсировать эрозию полностью
        img = cv2.dilate(img, kernel, iterations=1)

    # Find all connected components
    num_labels, labels_im = cv2.connectedComponents(
        img
    )  # label 0 is background, so start from 1
    if post_process_component and num_labels > 1:
        # Calculate the area of each component and sort them, keeping the largest k
        component_areas = [
            (label, np.sum(labels_im == label)) for label in range(1, num_labels)
        ]
        component_areas.sort(key=lambda x: x[1], reverse=True)
        largest_components = [
            x[0] for x in component_areas[: post_process_component_num]
        ]
        img = np.isin(labels_im, largest_components).astype(np.uint8)

    # Return the processed image as a boolean mask
    

    # cv2.imwrite("new_mask.png", img * 255)
    mask = cv2.resize(img, depth.shape[::-1])
    mask = mask > 0.5
    mask = mask & (depth > 0)

    cv2.imwrite("mask.png", (mask * 255).astype(np.uint8))

    # cv2.imwrite("new_mask_wd.png", (mask).astype(np.uint8) * 255)
    depth_y, depth_x = np.where(mask)
    depths = depth[mask]

    
    

    if len(depth_x) == 0:
        return np.zeros(len(vertices), dtype=bool)
    
    # Создаем однородные координаты пикселей
    pixel_coords = np.vstack([depth_x, depth_y, np.ones(len(depth_x))])
    
    
    # Шаг 1: Обратная проекция пикселей в нормализованные координаты камеры
    normalized_coords = np.linalg.inv(intrinsics) @ pixel_coords
    
    # Шаг 2: Масштабируем нормализованные координаты на глубину для получения 3D точек в системе камеры
    camera_points_3d = normalized_coords * depths[np.newaxis, :]
    
    # Шаг 3: Добавляем однородную координату для трансформации в мировые координаты
    camera_points_homogeneous = np.vstack([camera_points_3d, np.ones(len(depth_x))])
    
    # Шаг 4: Трансформируем из координат камеры в мировые координаты
    # Используем прямую трансформацию extrinsics (camera-to-world)
    world_points_homogeneous = extrinsics @ camera_points_homogeneous
    
    # Шаг 5: Нормализуем однородные координаты
    points = (world_points_homogeneous[:3, :] / world_points_homogeneous[3, :]).T
    
    points = points[~np.isnan(points).any(axis=1)]
    if len(points) == 0:
        return np.zeros(len(vertices), dtype=bool)
    tree = KDTree(vertices)
    
    dist, ind = tree.query(points, k=1)
    ind = ind.flatten()
    dist = dist.flatten()
    
    max_distance = 0.05  # 10 см максимальное расстояние
    valid_matches = dist < max_distance
    ind = ind[valid_matches]
    ind = np.unique(ind)
    print(f"unique ind: {len(ind)}")
    
    
    if valid_matches.sum() > 0:
        point_mask[ind] = True
    
    return point_mask

def process_object(data):
    key, item, vertices, intrinsics, source_path, base_path, num_frames = data
    frames = item['frames']
    total_points_mask = np.zeros(len(vertices), dtype=bool)
    for frame in frames[:num_frames]:
        point_mask = process_frame(frame, vertices, intrinsics, source_path, base_path, key)
        total_points_mask = total_points_mask | point_mask
    return total_points_mask


def load_scan(pcd_path):
    pcd_data = np.fromfile(pcd_path, dtype=np.float32).reshape(-1, 6)[:, :3]
    return pcd_data

def process_scene(data):
    scene_id, exp_name = data 
    pred_path = Path(f"data/prediction/scannet/baseline_scannet200/{scene_id}.npz")
    out_path = Path(f"data/prediction/scannet/{exp_name}/{scene_id}.npz")
    base_path = Path(f"/home/jovyan/users/lemeshko/scripts/gsam_result/yolo/{scene_id}")
    source_path = Path(f"/home/jovyan/users/kolodiazhnyi/data/scannet/posed_images/{scene_id}")
    scan_path = Path(f"/home/jovyan/users/bulat/workspace/3drec/Indoor/OKNO/data/scannet200/points/{scene_id}.bin")
    info_path = base_path / "infos.npy"

    # if out_path.exists():
    #     return
    vertices = load_scan(scan_path)
    info_data = np.load(info_path, allow_pickle=True).item()

    base_data = np.load(pred_path, allow_pickle=True)
    
    # Диагностика меша
    print(f"Mesh vertices shape: {vertices.shape}")
    print(f"Mesh vertices range:")
    print(f"  X: [{vertices[:, 0].min():.3f}, {vertices[:, 0].max():.3f}]")
    print(f"  Y: [{vertices[:, 1].min():.3f}, {vertices[:, 1].max():.3f}]")
    print(f"  Z: [{vertices[:, 2].min():.3f}, {vertices[:, 2].max():.3f}]")


    intrinsics = np.loadtxt(source_path / 'intrinsic.txt')[:3, :3]
    intrinsics[0, :] *= 640 / 1296
    intrinsics[1, :] *= 480 / 968
    
    num_frames = 500
    object_data = [[key, item, vertices, intrinsics, source_path, base_path, num_frames] for key, item in info_data.items()]
    total_points_masks = thread_map(process_object, object_data, chunksize=100)
    
    
    new_data = {
        k: v for k, v in base_data.items()
    }
    for i, key in enumerate(info_data.keys()):
        new_data['pred_masks'][:, i] = total_points_masks[i]
    out_path.parent.mkdir(parents=True, exist_ok=True)
    vs = []
    cs = []
    for i in range(new_data['pred_masks'].shape[1]):
        os.makedirs(f"pred_masks", exist_ok=True)
        v = vertices[new_data['pred_masks'][:, i]]
        c = np.random.rand(3)
        c = np.repeat(c[np.newaxis, :], len(v), axis=0)
        vs.append(v)
        cs.append(c)
    tm.PointCloud(np.concatenate(vs, axis=0), colors=np.concatenate(cs, axis=0)).export(f"pred_masks/{scene_id}_mask.ply")
    
    print("uniques", np.unique(new_data['pred_masks'].sum(1)), [[k, v.shape] for k, v in new_data.items()])
    np.savez(out_path, **new_data)

    

if __name__ == "__main__":
    exp_name = "erode_mask"
    scenes = np.loadtxt("/home/jovyan/users/bulat/workspace/3drec/Indoor/MaskClustering/splits/scannet200_subset.txt", dtype=str)
    for scene in scenes:
        process_scene((scene, exp_name))