File size: 7,101 Bytes
31f43c9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
"""Depth-discontinuity edge source.

Independent from the gestalt segmentation: extracts 2D line segments
along sharp depth jumps inside the house silhouette, lifts them to 3D
via the affine-fitted depth map, then merges across views.

Pipeline:
1. Affine-fit COLMAP-calibrated depth (same as the rest of the pipeline).
2. Inside the eroded ADE20k house mask, run Canny on normalised depth.
3. Connected components → fit 2D line per component.
4. Sample N depth values along each 2D segment, unproject to 3D.
5. RANSAC-fit a 3D line through the unprojected samples.
6. Merge lines across views (direction + midpoint proximity).

The merged 3D lines have endpoints (p1, p2) suitable for the same
'edges-only lift onto merged_v' integration that v11 does for gestalt
line cloud. Since gestalt and depth-discontinuity sources are independent,
their lifts should be additive.

Entry point:
    extract_depth_3d_lines(entry) -> list[Line3D]
"""

from __future__ import annotations

import numpy as np
import cv2

from hoho2025.example_solutions import (
    convert_entry_to_human_readable,
    get_sparse_depth, get_house_mask,
)

try:
    from line_cloud import Line3D, _fit_3d_line_ransac, _unproject_pixel, merge_3d_lines
    from mvs_utils import collect_views
    from sklearn_submission import fit_affine_ransac
except ImportError:
    from submission.line_cloud import Line3D, _fit_3d_line_ransac, _unproject_pixel, merge_3d_lines
    from submission.mvs_utils import collect_views
    from submission.sklearn_submission import fit_affine_ransac


def _detect_depth_segments_2d(
    depth_fitted: np.ndarray,
    house_mask: np.ndarray,
    canny_lo: int = 30,
    canny_hi: int = 80,
    erode_px: int = 9,
    min_area_px: int = 20,
    min_seglen_px: int = 25,
):
    """Return list of (xs, ys, p1, p2) for each detected 2D line segment."""
    if depth_fitted.size == 0:
        return []
    H, W = depth_fitted.shape[:2]
    eroded = cv2.erode(
        house_mask.astype(np.uint8),
        np.ones((erode_px, erode_px), np.uint8),
    ).astype(bool)
    if eroded.sum() < 100:
        return []

    # Normalise depth inside the eroded house mask to [0, 255]
    d_in = depth_fitted.copy()
    in_d = d_in[eroded]
    if in_d.size == 0:
        return []
    d_min, d_max = float(in_d.min()), float(in_d.max())
    if d_max - d_min < 0.5:
        return []
    d_norm = np.clip((d_in - d_min) / (d_max - d_min), 0.0, 1.0)
    d_u8 = (d_norm * 255).astype(np.uint8)
    d_u8 = cv2.GaussianBlur(d_u8, (5, 5), 0)

    canny = cv2.Canny(d_u8, canny_lo, canny_hi)
    canny[~eroded] = 0
    if canny.sum() == 0:
        return []

    n_lbl, lbl, stats, _ = cv2.connectedComponentsWithStats(canny, 8)
    out = []
    for i in range(1, n_lbl):
        area = int(stats[i, cv2.CC_STAT_AREA])
        if area < min_area_px:
            continue
        ys, xs = np.where(lbl == i)
        if len(xs) < 3:
            continue
        pts = np.column_stack([xs, ys]).astype(np.float32)
        line = cv2.fitLine(pts, cv2.DIST_L2, 0, 0.01, 0.01)
        vx, vy, x0, y0 = line.ravel()
        proj = (xs - x0) * vx + (ys - y0) * vy
        t_min, t_max = float(proj.min()), float(proj.max())
        seglen = t_max - t_min
        if seglen < min_seglen_px:
            continue
        p1 = np.array([x0 + t_min * vx, y0 + t_min * vy])
        p2 = np.array([x0 + t_max * vx, y0 + t_max * vy])
        out.append((xs, ys, p1, p2, (vx, vy, x0, y0, t_min, t_max)))
    return out


def extract_depth_3d_lines_single_view(
    depth_fitted: np.ndarray,
    house_mask: np.ndarray,
    view_info: dict,
    n_samples: int = 30,
) -> list[Line3D]:
    """Extract 3D lines from depth discontinuities in a single view."""
    H, W = depth_fitted.shape[:2]
    K = view_info['K']
    R = view_info['R']
    t = view_info['t']
    K_inv = np.linalg.inv(K)
    R_inv = R.T
    cam_center = -R_inv @ t

    segments = _detect_depth_segments_2d(depth_fitted, house_mask)
    out: list[Line3D] = []
    view_id = view_info['image_id']

    for _, _, _, _, params in segments:
        vx, vy, x0, y0, t_min, t_max = params
        ts = np.linspace(t_min, t_max, n_samples)
        pts3d_list = []
        for tv in ts:
            u = x0 + tv * vx
            v_px = y0 + tv * vy
            ui, vi = int(round(u)), int(round(v_px))
            if 0 <= ui < W and 0 <= vi < H:
                d = depth_fitted[vi, ui]
                p = _unproject_pixel(u, v_px, d, K_inv, R_inv, cam_center)
                if p is not None:
                    pts3d_list.append(p)

        if len(pts3d_list) < 5:
            continue

        pts3d = np.array(pts3d_list, dtype=np.float64)
        result = _fit_3d_line_ransac(pts3d, n_iter=50, inlier_th=0.3, min_inliers=5)
        if result is None:
            continue
        centroid, direction, inlier_pts = result
        s = (inlier_pts - centroid) @ direction
        p1 = centroid + float(s.min()) * direction
        p2 = centroid + float(s.max()) * direction
        length = float(np.linalg.norm(p2 - p1))
        if length < 0.4:
            continue

        out.append(Line3D(
            point=centroid,
            direction=direction,
            p1=p1, p2=p2,
            length=length,
            n_inliers=len(inlier_pts),
            edge_class='depth_discontinuity',
            view_id=view_id,
        ))
    return out


def extract_depth_3d_lines(entry) -> tuple[list[Line3D], dict]:
    """Extract depth-discontinuity 3D lines from all views.

    Returns (all_lines, good_entry).
    """
    good = convert_entry_to_human_readable(entry)
    colmap_rec = good.get('colmap') or good.get('colmap_binary')
    if colmap_rec is None:
        return [], good

    views = collect_views(colmap_rec, good['image_ids'])
    all_lines: list[Line3D] = []

    for gest, depth, img_id, ade_seg in zip(
        good['gestalt'], good['depth'], good['image_ids'], good['ade']
    ):
        info = views.get(img_id)
        if info is None:
            continue
        depth_np = np.array(depth).astype(np.float64) / 1000.0
        H, W = depth_np.shape[:2]

        # Affine fit (same as main pipeline)
        try:
            depth_sparse, found, _, _ = get_sparse_depth(colmap_rec, img_id, depth_np)
            if found:
                _, _, depth_np = fit_affine_ransac(
                    depth_np, depth_sparse, get_house_mask(ade_seg),
                )
        except Exception:
            pass

        try:
            house = get_house_mask(ade_seg)
            house_resized = cv2.resize(
                house.astype(np.uint8), (W, H), interpolation=cv2.INTER_NEAREST,
            ) > 0
        except Exception:
            continue

        view_lines = extract_depth_3d_lines_single_view(
            depth_np, house_resized, info,
        )
        all_lines.extend(view_lines)

    return all_lines, good


def extract_and_merge_depth_lines(entry) -> list[Line3D]:
    """Convenience: extract + merge across views."""
    lines, _ = extract_depth_3d_lines(entry)
    if not lines:
        return []
    return merge_3d_lines(lines)