Commit ·
d40cee6
1
Parent(s): 9506584
Add missing core Python files
Browse files- bundle_adjust.py +221 -0
- colmap_refine.py +240 -0
- depth_edges.py +217 -0
- dgcnn.py +181 -0
- junction.py +193 -0
- line_cloud.py +542 -0
- plane_wireframe.py +472 -0
- triangulation.py +618 -0
- winner_candidates.py +270 -0
- winner_inference.py +267 -0
bundle_adjust.py
ADDED
|
@@ -0,0 +1,221 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Post-hoc bundle adjustment of merged 3D wireframe vertices.
|
| 2 |
+
|
| 3 |
+
For each vertex in ``merged_v``, we:
|
| 4 |
+
1. Project its current 3D position into every available view.
|
| 5 |
+
2. Find the nearest gestalt corner (from ``get_vertices_and_edges_improved``)
|
| 6 |
+
in each view within ``match_px`` pixels.
|
| 7 |
+
3. If observations are found in ≥ ``min_views`` views, refine the 3D
|
| 8 |
+
position to minimise the sum of squared reprojection errors via
|
| 9 |
+
``scipy.optimize.least_squares`` with a Huber loss.
|
| 10 |
+
|
| 11 |
+
Cameras are fixed (COLMAP cameras are accurate). Only vertex positions
|
| 12 |
+
are optimised. No thresholds are tuned — just pure geometric
|
| 13 |
+
optimisation that converges to the correct answer given the cameras.
|
| 14 |
+
|
| 15 |
+
Entry point: ``refine_vertices_ba(merged_v, entry)``.
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
from __future__ import annotations
|
| 19 |
+
|
| 20 |
+
import numpy as np
|
| 21 |
+
import cv2
|
| 22 |
+
from scipy.optimize import least_squares
|
| 23 |
+
|
| 24 |
+
from hoho2025.example_solutions import (
|
| 25 |
+
convert_entry_to_human_readable,
|
| 26 |
+
filter_vertices_by_background,
|
| 27 |
+
)
|
| 28 |
+
from hoho2025.color_mappings import gestalt_color_mapping
|
| 29 |
+
|
| 30 |
+
try:
|
| 31 |
+
from mvs_utils import collect_views, project_world_to_image
|
| 32 |
+
except ImportError:
|
| 33 |
+
from submission.mvs_utils import collect_views, project_world_to_image
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
VERTEX_CLASSES = ['apex', 'eave_end_point', 'flashing_end_point']
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def _detect_2d_corners(gest_np):
|
| 40 |
+
"""Detect 2D gestalt corners in a single view (same as pipeline).
|
| 41 |
+
|
| 42 |
+
Returns (N, 2) float32 array of pixel coordinates.
|
| 43 |
+
"""
|
| 44 |
+
corners = []
|
| 45 |
+
for v_class in VERTEX_CLASSES:
|
| 46 |
+
color = np.array(gestalt_color_mapping[v_class])
|
| 47 |
+
mask = cv2.inRange(gest_np, color - 0.5, color + 0.5)
|
| 48 |
+
if mask.sum() == 0:
|
| 49 |
+
continue
|
| 50 |
+
_, _, _, centroids = cv2.connectedComponentsWithStats(mask, 8, cv2.CV_32S)
|
| 51 |
+
for c in centroids[1:]:
|
| 52 |
+
corners.append(c)
|
| 53 |
+
if not corners:
|
| 54 |
+
return np.empty((0, 2), dtype=np.float32)
|
| 55 |
+
return np.array(corners, dtype=np.float32)
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def _collect_observations(
|
| 59 |
+
merged_v: np.ndarray,
|
| 60 |
+
views: dict,
|
| 61 |
+
corners_per_view: dict[str, np.ndarray],
|
| 62 |
+
match_px: float = 8.0,
|
| 63 |
+
) -> list[list[tuple[str, np.ndarray]]]:
|
| 64 |
+
"""For each vertex, find its 2D observation in each view.
|
| 65 |
+
|
| 66 |
+
Returns a list (one per vertex) of lists of ``(view_id, uv_observed)``.
|
| 67 |
+
"""
|
| 68 |
+
n = len(merged_v)
|
| 69 |
+
observations: list[list[tuple[str, np.ndarray]]] = [[] for _ in range(n)]
|
| 70 |
+
|
| 71 |
+
for vid, info in views.items():
|
| 72 |
+
corners_2d = corners_per_view.get(vid)
|
| 73 |
+
if corners_2d is None or len(corners_2d) == 0:
|
| 74 |
+
continue
|
| 75 |
+
P = info['P']
|
| 76 |
+
# Project all merged_v into this view
|
| 77 |
+
uv, z = project_world_to_image(P, merged_v)
|
| 78 |
+
H, W = info['height'], info['width']
|
| 79 |
+
for i in range(n):
|
| 80 |
+
if z[i] <= 0:
|
| 81 |
+
continue
|
| 82 |
+
u, v_px = uv[i]
|
| 83 |
+
if u < -50 or u > W + 50 or v_px < -50 or v_px > H + 50:
|
| 84 |
+
continue
|
| 85 |
+
# Find nearest 2D corner
|
| 86 |
+
d = np.linalg.norm(corners_2d - uv[i], axis=1)
|
| 87 |
+
j = int(np.argmin(d))
|
| 88 |
+
if d[j] <= match_px:
|
| 89 |
+
observations[i].append((vid, corners_2d[j].copy()))
|
| 90 |
+
|
| 91 |
+
return observations
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
def _ba_residuals(params, Ps, obs_2d):
|
| 95 |
+
"""Reprojection residuals for a single 3D point.
|
| 96 |
+
|
| 97 |
+
params: (3,) — x, y, z of the 3D point.
|
| 98 |
+
Ps: list of (3, 4) projection matrices.
|
| 99 |
+
obs_2d: list of (2,) observed 2D points.
|
| 100 |
+
|
| 101 |
+
Returns: (2*N,) residual vector.
|
| 102 |
+
"""
|
| 103 |
+
X = params
|
| 104 |
+
res = []
|
| 105 |
+
homog = np.array([X[0], X[1], X[2], 1.0])
|
| 106 |
+
for P, uv_obs in zip(Ps, obs_2d):
|
| 107 |
+
proj = P @ homog
|
| 108 |
+
if proj[2] <= 1e-6:
|
| 109 |
+
res.extend([100.0, 100.0]) # large penalty
|
| 110 |
+
continue
|
| 111 |
+
u = proj[0] / proj[2]
|
| 112 |
+
v = proj[1] / proj[2]
|
| 113 |
+
res.extend([u - uv_obs[0], v - uv_obs[1]])
|
| 114 |
+
return np.array(res, dtype=np.float64)
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
def refine_vertices_ba(
|
| 118 |
+
merged_v: np.ndarray,
|
| 119 |
+
entry,
|
| 120 |
+
match_px: float = 8.0,
|
| 121 |
+
min_views: int = 2,
|
| 122 |
+
max_reproj_px: float = 5.0,
|
| 123 |
+
min_initial_err_px: float = 3.0,
|
| 124 |
+
) -> np.ndarray:
|
| 125 |
+
"""Refine 3D vertex positions via bundle adjustment.
|
| 126 |
+
|
| 127 |
+
Only vertices with observations in ≥ ``min_views`` views are refined;
|
| 128 |
+
the rest keep their original positions. If the optimised position has
|
| 129 |
+
a mean reprojection error > ``max_reproj_px``, the original position
|
| 130 |
+
is kept (optimiser diverged).
|
| 131 |
+
|
| 132 |
+
Parameters
|
| 133 |
+
----------
|
| 134 |
+
merged_v : (N, 3) array of vertex positions.
|
| 135 |
+
entry : the raw dataset sample (passed to ``convert_entry_to_human_readable``).
|
| 136 |
+
match_px : maximum pixel distance to match a projected vertex to a
|
| 137 |
+
gestalt corner in a view.
|
| 138 |
+
min_views : minimum number of views with a matching observation for
|
| 139 |
+
BA to fire.
|
| 140 |
+
max_reproj_px : if post-BA mean reprojection error exceeds this,
|
| 141 |
+
revert to the original position.
|
| 142 |
+
|
| 143 |
+
Returns
|
| 144 |
+
-------
|
| 145 |
+
refined_v : (N, 3) array with refined positions.
|
| 146 |
+
"""
|
| 147 |
+
merged_v = np.asarray(merged_v, dtype=np.float64)
|
| 148 |
+
refined = merged_v.copy()
|
| 149 |
+
|
| 150 |
+
if len(merged_v) == 0:
|
| 151 |
+
return refined
|
| 152 |
+
|
| 153 |
+
good = convert_entry_to_human_readable(entry)
|
| 154 |
+
colmap_rec = good.get('colmap') or good.get('colmap_binary')
|
| 155 |
+
if colmap_rec is None:
|
| 156 |
+
return refined
|
| 157 |
+
|
| 158 |
+
views = collect_views(colmap_rec, good['image_ids'])
|
| 159 |
+
if len(views) < 2:
|
| 160 |
+
return refined
|
| 161 |
+
|
| 162 |
+
# Detect 2D corners in each view
|
| 163 |
+
corners_per_view: dict[str, np.ndarray] = {}
|
| 164 |
+
for gest, depth, img_id in zip(good['gestalt'], good['depth'], good['image_ids']):
|
| 165 |
+
if img_id not in views:
|
| 166 |
+
continue
|
| 167 |
+
depth_np = np.array(depth)
|
| 168 |
+
H, W = depth_np.shape[:2]
|
| 169 |
+
gest_np = np.array(gest.resize((W, H))).astype(np.uint8)
|
| 170 |
+
corners_per_view[img_id] = _detect_2d_corners(gest_np)
|
| 171 |
+
|
| 172 |
+
# Collect multi-view observations for each vertex
|
| 173 |
+
observations = _collect_observations(merged_v, views, corners_per_view, match_px)
|
| 174 |
+
|
| 175 |
+
# Run BA on each vertex independently.
|
| 176 |
+
# Key: only refine vertices whose INITIAL reprojection error is high
|
| 177 |
+
# (> min_initial_err_px). This targets the depth-estimation failures
|
| 178 |
+
# without disturbing already-good vertices.
|
| 179 |
+
n_refined = 0
|
| 180 |
+
for i in range(len(merged_v)):
|
| 181 |
+
obs = observations[i]
|
| 182 |
+
if len(obs) < min_views:
|
| 183 |
+
continue
|
| 184 |
+
|
| 185 |
+
Ps = [views[vid]['P'] for vid, _ in obs]
|
| 186 |
+
pts_2d = [uv for _, uv in obs]
|
| 187 |
+
|
| 188 |
+
x0 = merged_v[i].copy()
|
| 189 |
+
|
| 190 |
+
# Check initial reprojection error — skip if already low.
|
| 191 |
+
res0 = _ba_residuals(x0, Ps, pts_2d)
|
| 192 |
+
res0_pairs = res0.reshape(-1, 2)
|
| 193 |
+
initial_err = float(np.sqrt((res0_pairs ** 2).sum(axis=1)).mean())
|
| 194 |
+
if initial_err <= min_initial_err_px:
|
| 195 |
+
continue # already well-localised, leave it alone
|
| 196 |
+
|
| 197 |
+
try:
|
| 198 |
+
result = least_squares(
|
| 199 |
+
_ba_residuals, x0,
|
| 200 |
+
args=(Ps, pts_2d),
|
| 201 |
+
method='trf',
|
| 202 |
+
loss='huber',
|
| 203 |
+
f_scale=2.0,
|
| 204 |
+
max_nfev=50,
|
| 205 |
+
)
|
| 206 |
+
except Exception:
|
| 207 |
+
continue
|
| 208 |
+
|
| 209 |
+
X_opt = result.x
|
| 210 |
+
# Sanity: check post-BA reprojection error and displacement.
|
| 211 |
+
res = _ba_residuals(X_opt, Ps, pts_2d)
|
| 212 |
+
res_pairs = res.reshape(-1, 2)
|
| 213 |
+
final_err = float(np.sqrt((res_pairs ** 2).sum(axis=1)).mean())
|
| 214 |
+
displacement = float(np.linalg.norm(X_opt - x0))
|
| 215 |
+
|
| 216 |
+
# Accept only if: (a) reproj improved, (b) didn't move too far.
|
| 217 |
+
if final_err < initial_err and final_err <= max_reproj_px and displacement <= 2.0:
|
| 218 |
+
refined[i] = X_opt
|
| 219 |
+
n_refined += 1
|
| 220 |
+
|
| 221 |
+
return refined
|
colmap_refine.py
ADDED
|
@@ -0,0 +1,240 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""COLMAP-based vertex position refinement.
|
| 2 |
+
|
| 3 |
+
Two complementary refinement strategies that use the COLMAP sparse point
|
| 4 |
+
cloud as a high-precision 3D landmark source:
|
| 5 |
+
|
| 6 |
+
1. ``refine_vertices_3d_plane`` — Variant (a+c).
|
| 7 |
+
For each merged_v vertex, find its K nearest COLMAP points in 3D,
|
| 8 |
+
fit a local plane, and project the vertex onto that plane. Cancels
|
| 9 |
+
depth-noise residuals after the initial unprojection.
|
| 10 |
+
|
| 11 |
+
2. ``refine_vertices_multiview_plane`` — Variant (b).
|
| 12 |
+
For each merged_v vertex, project it into every view, find the K
|
| 13 |
+
nearest COLMAP points in 2D within each view's image, fit a local
|
| 14 |
+
plane in 3D from those points, project the vertex onto the plane,
|
| 15 |
+
and average the resulting 3D positions across views weighted by the
|
| 16 |
+
plane fit quality.
|
| 17 |
+
|
| 18 |
+
Both methods only use ``pycolmap`` + ``numpy`` + ``scipy``. Purely
|
| 19 |
+
geometric — no thresholds tuned on local validation.
|
| 20 |
+
"""
|
| 21 |
+
|
| 22 |
+
from __future__ import annotations
|
| 23 |
+
|
| 24 |
+
import numpy as np
|
| 25 |
+
from scipy.spatial import cKDTree
|
| 26 |
+
|
| 27 |
+
from hoho2025.example_solutions import convert_entry_to_human_readable
|
| 28 |
+
|
| 29 |
+
try:
|
| 30 |
+
from mvs_utils import collect_views, project_world_to_image
|
| 31 |
+
except ImportError:
|
| 32 |
+
from submission.mvs_utils import collect_views, project_world_to_image
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
# ---------------------------------------------------------------------------
|
| 36 |
+
# Helpers
|
| 37 |
+
# ---------------------------------------------------------------------------
|
| 38 |
+
|
| 39 |
+
def _fit_plane_pca(points: np.ndarray) -> tuple[np.ndarray, np.ndarray, float]:
|
| 40 |
+
"""PCA plane fit. Returns (centroid, unit_normal, fit_quality).
|
| 41 |
+
|
| 42 |
+
fit_quality = 1 - (smallest_eigval / largest_eigval). 1.0 = perfectly
|
| 43 |
+
planar, 0.0 = sphere. Used as a weight when combining multi-view
|
| 44 |
+
refinements.
|
| 45 |
+
"""
|
| 46 |
+
centroid = points.mean(axis=0)
|
| 47 |
+
centred = points - centroid
|
| 48 |
+
# SVD instead of eig to be numerically stable on small N
|
| 49 |
+
_, s, Vt = np.linalg.svd(centred, full_matrices=False)
|
| 50 |
+
if len(s) < 3:
|
| 51 |
+
return centroid, np.array([0.0, 1.0, 0.0]), 0.0
|
| 52 |
+
normal = Vt[2] # smallest variance direction
|
| 53 |
+
# quality: ratio of last to first singular value, inverted
|
| 54 |
+
if s[0] < 1e-9:
|
| 55 |
+
return centroid, normal, 0.0
|
| 56 |
+
quality = 1.0 - float(s[2] / s[0])
|
| 57 |
+
return centroid, normal, max(0.0, min(1.0, quality))
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def _project_point_to_plane(
|
| 61 |
+
point: np.ndarray, plane_centroid: np.ndarray, plane_normal: np.ndarray,
|
| 62 |
+
) -> np.ndarray:
|
| 63 |
+
"""Orthogonal projection of ``point`` onto a plane defined by
|
| 64 |
+
``(centroid, unit normal)``.
|
| 65 |
+
"""
|
| 66 |
+
rel = point - plane_centroid
|
| 67 |
+
d = float(np.dot(rel, plane_normal))
|
| 68 |
+
return point - d * plane_normal
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
# ---------------------------------------------------------------------------
|
| 72 |
+
# Variant (a+c): 3D KD-tree neighbours → local plane → snap
|
| 73 |
+
# ---------------------------------------------------------------------------
|
| 74 |
+
|
| 75 |
+
def refine_vertices_3d_plane(
|
| 76 |
+
vertices: np.ndarray,
|
| 77 |
+
colmap_xyz: np.ndarray,
|
| 78 |
+
knn_radius: float = 0.5,
|
| 79 |
+
knn_k: int = 12,
|
| 80 |
+
min_neighbours: int = 6,
|
| 81 |
+
max_displacement: float = 0.5,
|
| 82 |
+
min_quality: float = 0.6,
|
| 83 |
+
) -> tuple[np.ndarray, np.ndarray]:
|
| 84 |
+
"""Refine each vertex by snapping to a local plane fit through its
|
| 85 |
+
nearest COLMAP neighbours in 3D.
|
| 86 |
+
|
| 87 |
+
Parameters
|
| 88 |
+
----------
|
| 89 |
+
vertices : (N, 3) array of merged 3D vertex positions.
|
| 90 |
+
colmap_xyz : (M, 3) all COLMAP points3D world coordinates.
|
| 91 |
+
knn_radius : maximum distance for a neighbour to count.
|
| 92 |
+
knn_k : maximum number of neighbours to use (for speed).
|
| 93 |
+
min_neighbours : refuse to refine when fewer neighbours found.
|
| 94 |
+
max_displacement : reject the snap if it moves the vertex by more
|
| 95 |
+
than this many metres (likely a wall plane, not the roof).
|
| 96 |
+
min_quality : reject when the local plane fit is not flat enough
|
| 97 |
+
(PCA quality below this).
|
| 98 |
+
|
| 99 |
+
Returns
|
| 100 |
+
-------
|
| 101 |
+
refined : (N, 3) refined vertex positions.
|
| 102 |
+
snapped : (N,) bool — which vertices were moved.
|
| 103 |
+
"""
|
| 104 |
+
verts = np.asarray(vertices, dtype=np.float64)
|
| 105 |
+
refined = verts.copy()
|
| 106 |
+
snapped = np.zeros(len(verts), dtype=bool)
|
| 107 |
+
|
| 108 |
+
if len(verts) == 0 or len(colmap_xyz) < min_neighbours:
|
| 109 |
+
return refined, snapped
|
| 110 |
+
|
| 111 |
+
tree = cKDTree(colmap_xyz)
|
| 112 |
+
for i, v in enumerate(verts):
|
| 113 |
+
idx = tree.query_ball_point(v, knn_radius)
|
| 114 |
+
if len(idx) < min_neighbours:
|
| 115 |
+
continue
|
| 116 |
+
if len(idx) > knn_k:
|
| 117 |
+
# Pick the closest knn_k of the candidates
|
| 118 |
+
d = np.linalg.norm(colmap_xyz[idx] - v, axis=1)
|
| 119 |
+
order = np.argsort(d)[:knn_k]
|
| 120 |
+
idx = [idx[j] for j in order]
|
| 121 |
+
|
| 122 |
+
nbrs = colmap_xyz[idx]
|
| 123 |
+
centroid, normal, quality = _fit_plane_pca(nbrs)
|
| 124 |
+
if quality < min_quality:
|
| 125 |
+
continue
|
| 126 |
+
|
| 127 |
+
projected = _project_point_to_plane(v, centroid, normal)
|
| 128 |
+
if float(np.linalg.norm(projected - v)) > max_displacement:
|
| 129 |
+
continue
|
| 130 |
+
refined[i] = projected
|
| 131 |
+
snapped[i] = True
|
| 132 |
+
|
| 133 |
+
return refined, snapped
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
# ---------------------------------------------------------------------------
|
| 137 |
+
# Variant (b): multi-view consensus plane refinement
|
| 138 |
+
# ---------------------------------------------------------------------------
|
| 139 |
+
|
| 140 |
+
def refine_vertices_multiview_plane(
|
| 141 |
+
vertices: np.ndarray,
|
| 142 |
+
entry,
|
| 143 |
+
knn_2d_px: float = 30.0,
|
| 144 |
+
knn_k: int = 12,
|
| 145 |
+
min_neighbours: int = 6,
|
| 146 |
+
max_displacement: float = 0.5,
|
| 147 |
+
min_quality: float = 0.5,
|
| 148 |
+
min_views: int = 2,
|
| 149 |
+
) -> tuple[np.ndarray, np.ndarray]:
|
| 150 |
+
"""Multi-view consensus refinement.
|
| 151 |
+
|
| 152 |
+
For each vertex:
|
| 153 |
+
1. Project it into every available view.
|
| 154 |
+
2. In each view, find COLMAP points whose own 2D projection is
|
| 155 |
+
within ``knn_2d_px`` of the vertex projection.
|
| 156 |
+
3. Take the corresponding 3D points and fit a local plane.
|
| 157 |
+
4. Project the vertex onto that plane → one candidate 3D position
|
| 158 |
+
per view, weighted by the plane's PCA quality.
|
| 159 |
+
5. Combine the per-view candidates as a quality-weighted mean.
|
| 160 |
+
|
| 161 |
+
Crucially, the 2D pixel neighbourhood ensures the COLMAP points used
|
| 162 |
+
for the plane fit are the **ones the camera sees near this vertex** —
|
| 163 |
+
not just close in 3D — so it does not blend roof + wall + ground
|
| 164 |
+
points like a 3D KNN would.
|
| 165 |
+
|
| 166 |
+
Returns ``(refined, snapped)`` arrays in the same shape as the input.
|
| 167 |
+
"""
|
| 168 |
+
verts = np.asarray(vertices, dtype=np.float64)
|
| 169 |
+
refined = verts.copy()
|
| 170 |
+
snapped = np.zeros(len(verts), dtype=bool)
|
| 171 |
+
|
| 172 |
+
if len(verts) == 0:
|
| 173 |
+
return refined, snapped
|
| 174 |
+
|
| 175 |
+
good = convert_entry_to_human_readable(entry)
|
| 176 |
+
colmap_rec = good.get('colmap') or good.get('colmap_binary')
|
| 177 |
+
if colmap_rec is None:
|
| 178 |
+
return refined, snapped
|
| 179 |
+
|
| 180 |
+
views = collect_views(colmap_rec, good['image_ids'])
|
| 181 |
+
if len(views) < 1:
|
| 182 |
+
return refined, snapped
|
| 183 |
+
|
| 184 |
+
colmap_xyz = np.array(
|
| 185 |
+
[p.xyz for p in colmap_rec.points3D.values()], dtype=np.float64
|
| 186 |
+
)
|
| 187 |
+
if len(colmap_xyz) < min_neighbours:
|
| 188 |
+
return refined, snapped
|
| 189 |
+
|
| 190 |
+
# Pre-project all COLMAP points into each view once
|
| 191 |
+
per_view_proj: dict[str, tuple[np.ndarray, np.ndarray]] = {}
|
| 192 |
+
for vid, info in views.items():
|
| 193 |
+
uv, z = project_world_to_image(info['P'], colmap_xyz)
|
| 194 |
+
in_front = z > 0
|
| 195 |
+
per_view_proj[vid] = (uv[in_front], np.where(in_front)[0])
|
| 196 |
+
|
| 197 |
+
for i, v in enumerate(verts):
|
| 198 |
+
candidates: list[tuple[np.ndarray, float]] = []
|
| 199 |
+
for vid, info in views.items():
|
| 200 |
+
uv_v, z_v = project_world_to_image(info['P'], v.reshape(1, 3))
|
| 201 |
+
if z_v[0] <= 0:
|
| 202 |
+
continue
|
| 203 |
+
target_uv = uv_v[0]
|
| 204 |
+
H, W = info['height'], info['width']
|
| 205 |
+
if not (0 <= target_uv[0] < W and 0 <= target_uv[1] < H):
|
| 206 |
+
continue
|
| 207 |
+
view_uv, view_idx = per_view_proj[vid]
|
| 208 |
+
if len(view_uv) == 0:
|
| 209 |
+
continue
|
| 210 |
+
d = np.linalg.norm(view_uv - target_uv, axis=1)
|
| 211 |
+
mask = d <= knn_2d_px
|
| 212 |
+
if mask.sum() < min_neighbours:
|
| 213 |
+
continue
|
| 214 |
+
cand_idx = view_idx[mask]
|
| 215 |
+
d_in = d[mask]
|
| 216 |
+
if len(cand_idx) > knn_k:
|
| 217 |
+
order = np.argsort(d_in)[:knn_k]
|
| 218 |
+
cand_idx = cand_idx[order]
|
| 219 |
+
nbrs = colmap_xyz[cand_idx]
|
| 220 |
+
centroid, normal, quality = _fit_plane_pca(nbrs)
|
| 221 |
+
if quality < min_quality:
|
| 222 |
+
continue
|
| 223 |
+
projected = _project_point_to_plane(v, centroid, normal)
|
| 224 |
+
if float(np.linalg.norm(projected - v)) > max_displacement:
|
| 225 |
+
continue
|
| 226 |
+
candidates.append((projected, quality))
|
| 227 |
+
|
| 228 |
+
if len(candidates) < min_views:
|
| 229 |
+
continue
|
| 230 |
+
|
| 231 |
+
# Quality-weighted mean
|
| 232 |
+
weights = np.array([c[1] for c in candidates], dtype=np.float64)
|
| 233 |
+
positions = np.array([c[0] for c in candidates], dtype=np.float64)
|
| 234 |
+
if weights.sum() < 1e-6:
|
| 235 |
+
continue
|
| 236 |
+
new_pos = (positions * weights[:, None]).sum(axis=0) / weights.sum()
|
| 237 |
+
refined[i] = new_pos
|
| 238 |
+
snapped[i] = True
|
| 239 |
+
|
| 240 |
+
return refined, snapped
|
depth_edges.py
ADDED
|
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Depth-discontinuity edge source.
|
| 2 |
+
|
| 3 |
+
Independent from the gestalt segmentation: extracts 2D line segments
|
| 4 |
+
along sharp depth jumps inside the house silhouette, lifts them to 3D
|
| 5 |
+
via the affine-fitted depth map, then merges across views.
|
| 6 |
+
|
| 7 |
+
Pipeline:
|
| 8 |
+
1. Affine-fit COLMAP-calibrated depth (same as the rest of the pipeline).
|
| 9 |
+
2. Inside the eroded ADE20k house mask, run Canny on normalised depth.
|
| 10 |
+
3. Connected components → fit 2D line per component.
|
| 11 |
+
4. Sample N depth values along each 2D segment, unproject to 3D.
|
| 12 |
+
5. RANSAC-fit a 3D line through the unprojected samples.
|
| 13 |
+
6. Merge lines across views (direction + midpoint proximity).
|
| 14 |
+
|
| 15 |
+
The merged 3D lines have endpoints (p1, p2) suitable for the same
|
| 16 |
+
'edges-only lift onto merged_v' integration that v11 does for gestalt
|
| 17 |
+
line cloud. Since gestalt and depth-discontinuity sources are independent,
|
| 18 |
+
their lifts should be additive.
|
| 19 |
+
|
| 20 |
+
Entry point:
|
| 21 |
+
extract_depth_3d_lines(entry) -> list[Line3D]
|
| 22 |
+
"""
|
| 23 |
+
|
| 24 |
+
from __future__ import annotations
|
| 25 |
+
|
| 26 |
+
import numpy as np
|
| 27 |
+
import cv2
|
| 28 |
+
|
| 29 |
+
from hoho2025.example_solutions import (
|
| 30 |
+
convert_entry_to_human_readable,
|
| 31 |
+
get_sparse_depth, get_house_mask,
|
| 32 |
+
)
|
| 33 |
+
|
| 34 |
+
try:
|
| 35 |
+
from line_cloud import Line3D, _fit_3d_line_ransac, _unproject_pixel, merge_3d_lines
|
| 36 |
+
from mvs_utils import collect_views
|
| 37 |
+
from sklearn_submission import fit_affine_ransac
|
| 38 |
+
except ImportError:
|
| 39 |
+
from submission.line_cloud import Line3D, _fit_3d_line_ransac, _unproject_pixel, merge_3d_lines
|
| 40 |
+
from submission.mvs_utils import collect_views
|
| 41 |
+
from submission.sklearn_submission import fit_affine_ransac
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def _detect_depth_segments_2d(
|
| 45 |
+
depth_fitted: np.ndarray,
|
| 46 |
+
house_mask: np.ndarray,
|
| 47 |
+
canny_lo: int = 30,
|
| 48 |
+
canny_hi: int = 80,
|
| 49 |
+
erode_px: int = 9,
|
| 50 |
+
min_area_px: int = 20,
|
| 51 |
+
min_seglen_px: int = 25,
|
| 52 |
+
):
|
| 53 |
+
"""Return list of (xs, ys, p1, p2) for each detected 2D line segment."""
|
| 54 |
+
if depth_fitted.size == 0:
|
| 55 |
+
return []
|
| 56 |
+
H, W = depth_fitted.shape[:2]
|
| 57 |
+
eroded = cv2.erode(
|
| 58 |
+
house_mask.astype(np.uint8),
|
| 59 |
+
np.ones((erode_px, erode_px), np.uint8),
|
| 60 |
+
).astype(bool)
|
| 61 |
+
if eroded.sum() < 100:
|
| 62 |
+
return []
|
| 63 |
+
|
| 64 |
+
# Normalise depth inside the eroded house mask to [0, 255]
|
| 65 |
+
d_in = depth_fitted.copy()
|
| 66 |
+
in_d = d_in[eroded]
|
| 67 |
+
if in_d.size == 0:
|
| 68 |
+
return []
|
| 69 |
+
d_min, d_max = float(in_d.min()), float(in_d.max())
|
| 70 |
+
if d_max - d_min < 0.5:
|
| 71 |
+
return []
|
| 72 |
+
d_norm = np.clip((d_in - d_min) / (d_max - d_min), 0.0, 1.0)
|
| 73 |
+
d_u8 = (d_norm * 255).astype(np.uint8)
|
| 74 |
+
d_u8 = cv2.GaussianBlur(d_u8, (5, 5), 0)
|
| 75 |
+
|
| 76 |
+
canny = cv2.Canny(d_u8, canny_lo, canny_hi)
|
| 77 |
+
canny[~eroded] = 0
|
| 78 |
+
if canny.sum() == 0:
|
| 79 |
+
return []
|
| 80 |
+
|
| 81 |
+
n_lbl, lbl, stats, _ = cv2.connectedComponentsWithStats(canny, 8)
|
| 82 |
+
out = []
|
| 83 |
+
for i in range(1, n_lbl):
|
| 84 |
+
area = int(stats[i, cv2.CC_STAT_AREA])
|
| 85 |
+
if area < min_area_px:
|
| 86 |
+
continue
|
| 87 |
+
ys, xs = np.where(lbl == i)
|
| 88 |
+
if len(xs) < 3:
|
| 89 |
+
continue
|
| 90 |
+
pts = np.column_stack([xs, ys]).astype(np.float32)
|
| 91 |
+
line = cv2.fitLine(pts, cv2.DIST_L2, 0, 0.01, 0.01)
|
| 92 |
+
vx, vy, x0, y0 = line.ravel()
|
| 93 |
+
proj = (xs - x0) * vx + (ys - y0) * vy
|
| 94 |
+
t_min, t_max = float(proj.min()), float(proj.max())
|
| 95 |
+
seglen = t_max - t_min
|
| 96 |
+
if seglen < min_seglen_px:
|
| 97 |
+
continue
|
| 98 |
+
p1 = np.array([x0 + t_min * vx, y0 + t_min * vy])
|
| 99 |
+
p2 = np.array([x0 + t_max * vx, y0 + t_max * vy])
|
| 100 |
+
out.append((xs, ys, p1, p2, (vx, vy, x0, y0, t_min, t_max)))
|
| 101 |
+
return out
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
def extract_depth_3d_lines_single_view(
|
| 105 |
+
depth_fitted: np.ndarray,
|
| 106 |
+
house_mask: np.ndarray,
|
| 107 |
+
view_info: dict,
|
| 108 |
+
n_samples: int = 30,
|
| 109 |
+
) -> list[Line3D]:
|
| 110 |
+
"""Extract 3D lines from depth discontinuities in a single view."""
|
| 111 |
+
H, W = depth_fitted.shape[:2]
|
| 112 |
+
K = view_info['K']
|
| 113 |
+
R = view_info['R']
|
| 114 |
+
t = view_info['t']
|
| 115 |
+
K_inv = np.linalg.inv(K)
|
| 116 |
+
R_inv = R.T
|
| 117 |
+
cam_center = -R_inv @ t
|
| 118 |
+
|
| 119 |
+
segments = _detect_depth_segments_2d(depth_fitted, house_mask)
|
| 120 |
+
out: list[Line3D] = []
|
| 121 |
+
view_id = view_info['image_id']
|
| 122 |
+
|
| 123 |
+
for _, _, _, _, params in segments:
|
| 124 |
+
vx, vy, x0, y0, t_min, t_max = params
|
| 125 |
+
ts = np.linspace(t_min, t_max, n_samples)
|
| 126 |
+
pts3d_list = []
|
| 127 |
+
for tv in ts:
|
| 128 |
+
u = x0 + tv * vx
|
| 129 |
+
v_px = y0 + tv * vy
|
| 130 |
+
ui, vi = int(round(u)), int(round(v_px))
|
| 131 |
+
if 0 <= ui < W and 0 <= vi < H:
|
| 132 |
+
d = depth_fitted[vi, ui]
|
| 133 |
+
p = _unproject_pixel(u, v_px, d, K_inv, R_inv, cam_center)
|
| 134 |
+
if p is not None:
|
| 135 |
+
pts3d_list.append(p)
|
| 136 |
+
|
| 137 |
+
if len(pts3d_list) < 5:
|
| 138 |
+
continue
|
| 139 |
+
|
| 140 |
+
pts3d = np.array(pts3d_list, dtype=np.float64)
|
| 141 |
+
result = _fit_3d_line_ransac(pts3d, n_iter=50, inlier_th=0.3, min_inliers=5)
|
| 142 |
+
if result is None:
|
| 143 |
+
continue
|
| 144 |
+
centroid, direction, inlier_pts = result
|
| 145 |
+
s = (inlier_pts - centroid) @ direction
|
| 146 |
+
p1 = centroid + float(s.min()) * direction
|
| 147 |
+
p2 = centroid + float(s.max()) * direction
|
| 148 |
+
length = float(np.linalg.norm(p2 - p1))
|
| 149 |
+
if length < 0.4:
|
| 150 |
+
continue
|
| 151 |
+
|
| 152 |
+
out.append(Line3D(
|
| 153 |
+
point=centroid,
|
| 154 |
+
direction=direction,
|
| 155 |
+
p1=p1, p2=p2,
|
| 156 |
+
length=length,
|
| 157 |
+
n_inliers=len(inlier_pts),
|
| 158 |
+
edge_class='depth_discontinuity',
|
| 159 |
+
view_id=view_id,
|
| 160 |
+
))
|
| 161 |
+
return out
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
def extract_depth_3d_lines(entry) -> tuple[list[Line3D], dict]:
|
| 165 |
+
"""Extract depth-discontinuity 3D lines from all views.
|
| 166 |
+
|
| 167 |
+
Returns (all_lines, good_entry).
|
| 168 |
+
"""
|
| 169 |
+
good = convert_entry_to_human_readable(entry)
|
| 170 |
+
colmap_rec = good.get('colmap') or good.get('colmap_binary')
|
| 171 |
+
if colmap_rec is None:
|
| 172 |
+
return [], good
|
| 173 |
+
|
| 174 |
+
views = collect_views(colmap_rec, good['image_ids'])
|
| 175 |
+
all_lines: list[Line3D] = []
|
| 176 |
+
|
| 177 |
+
for gest, depth, img_id, ade_seg in zip(
|
| 178 |
+
good['gestalt'], good['depth'], good['image_ids'], good['ade']
|
| 179 |
+
):
|
| 180 |
+
info = views.get(img_id)
|
| 181 |
+
if info is None:
|
| 182 |
+
continue
|
| 183 |
+
depth_np = np.array(depth).astype(np.float64) / 1000.0
|
| 184 |
+
H, W = depth_np.shape[:2]
|
| 185 |
+
|
| 186 |
+
# Affine fit (same as main pipeline)
|
| 187 |
+
try:
|
| 188 |
+
depth_sparse, found, _, _ = get_sparse_depth(colmap_rec, img_id, depth_np)
|
| 189 |
+
if found:
|
| 190 |
+
_, _, depth_np = fit_affine_ransac(
|
| 191 |
+
depth_np, depth_sparse, get_house_mask(ade_seg),
|
| 192 |
+
)
|
| 193 |
+
except Exception:
|
| 194 |
+
pass
|
| 195 |
+
|
| 196 |
+
try:
|
| 197 |
+
house = get_house_mask(ade_seg)
|
| 198 |
+
house_resized = cv2.resize(
|
| 199 |
+
house.astype(np.uint8), (W, H), interpolation=cv2.INTER_NEAREST,
|
| 200 |
+
) > 0
|
| 201 |
+
except Exception:
|
| 202 |
+
continue
|
| 203 |
+
|
| 204 |
+
view_lines = extract_depth_3d_lines_single_view(
|
| 205 |
+
depth_np, house_resized, info,
|
| 206 |
+
)
|
| 207 |
+
all_lines.extend(view_lines)
|
| 208 |
+
|
| 209 |
+
return all_lines, good
|
| 210 |
+
|
| 211 |
+
|
| 212 |
+
def extract_and_merge_depth_lines(entry) -> list[Line3D]:
|
| 213 |
+
"""Convenience: extract + merge across views."""
|
| 214 |
+
lines, _ = extract_depth_3d_lines(entry)
|
| 215 |
+
if not lines:
|
| 216 |
+
return []
|
| 217 |
+
return merge_3d_lines(lines)
|
dgcnn.py
ADDED
|
@@ -0,0 +1,181 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""DGCNN backbone — drop-in replacement for PointNet.
|
| 2 |
+
|
| 3 |
+
EdgeConv with dynamic graph KNN captures local geometric structure
|
| 4 |
+
better than PointNet's global aggregation.
|
| 5 |
+
|
| 6 |
+
Ref: Wang et al., "Dynamic Graph CNN for Learning on Point Clouds", TOG 2019
|
| 7 |
+
https://github.com/antao97/dgcnn.pytorch
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
import torch
|
| 11 |
+
import torch.nn as nn
|
| 12 |
+
import torch.nn.functional as F
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def knn(x, k):
|
| 16 |
+
"""Compute KNN graph. x: (B, C, N). Returns (B, N, k) indices."""
|
| 17 |
+
inner = -2 * torch.matmul(x.transpose(2, 1), x) # (B, N, N)
|
| 18 |
+
xx = torch.sum(x ** 2, dim=1, keepdim=True) # (B, 1, N)
|
| 19 |
+
pairwise_dist = -xx - inner - xx.transpose(2, 1) # (B, N, N) negative distances
|
| 20 |
+
idx = pairwise_dist.topk(k=k, dim=-1)[1] # (B, N, k)
|
| 21 |
+
return idx
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def get_graph_feature(x, k=20, idx=None):
|
| 25 |
+
"""Build edge features for EdgeConv.
|
| 26 |
+
|
| 27 |
+
For each point, concatenate [x_j - x_i, x_i] for its k neighbors.
|
| 28 |
+
Returns (B, 2*C, N, k).
|
| 29 |
+
"""
|
| 30 |
+
B, C, N = x.shape
|
| 31 |
+
device = x.device
|
| 32 |
+
|
| 33 |
+
if idx is None:
|
| 34 |
+
idx = knn(x, k=k) # (B, N, k)
|
| 35 |
+
|
| 36 |
+
idx_base = torch.arange(0, B, device=device).view(-1, 1, 1) * N
|
| 37 |
+
idx = idx + idx_base
|
| 38 |
+
idx = idx.view(-1)
|
| 39 |
+
|
| 40 |
+
x = x.transpose(2, 1).contiguous() # (B, N, C)
|
| 41 |
+
feature = x.view(B * N, -1)[idx, :] # (B*N*k, C)
|
| 42 |
+
feature = feature.view(B, N, k, C)
|
| 43 |
+
|
| 44 |
+
x = x.view(B, N, 1, C).repeat(1, 1, k, 1) # (B, N, k, C)
|
| 45 |
+
|
| 46 |
+
feature = torch.cat((feature - x, x), dim=3).permute(0, 3, 1, 2).contiguous()
|
| 47 |
+
# (B, 2*C, N, k)
|
| 48 |
+
return feature
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
class EdgeConv(nn.Module):
|
| 52 |
+
"""Single EdgeConv layer."""
|
| 53 |
+
|
| 54 |
+
def __init__(self, in_channels, out_channels, k=20):
|
| 55 |
+
super().__init__()
|
| 56 |
+
self.k = k
|
| 57 |
+
self.conv = nn.Sequential(
|
| 58 |
+
nn.Conv2d(in_channels * 2, out_channels, 1, bias=False),
|
| 59 |
+
nn.BatchNorm2d(out_channels),
|
| 60 |
+
nn.LeakyReLU(0.2, inplace=True),
|
| 61 |
+
)
|
| 62 |
+
|
| 63 |
+
def forward(self, x):
|
| 64 |
+
# x: (B, C, N)
|
| 65 |
+
feat = get_graph_feature(x, k=self.k) # (B, 2*C, N, k)
|
| 66 |
+
feat = self.conv(feat) # (B, out, N, k)
|
| 67 |
+
feat = feat.max(dim=-1)[0] # (B, out, N)
|
| 68 |
+
return feat
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
class DGCNNBackbone(nn.Module):
|
| 72 |
+
"""DGCNN backbone with multiple EdgeConv layers.
|
| 73 |
+
|
| 74 |
+
Same interface as PointNetBackbone: (B, C, N) → (B, out_dim).
|
| 75 |
+
"""
|
| 76 |
+
|
| 77 |
+
def __init__(self, in_channels, k=20, emb_dims=1024):
|
| 78 |
+
super().__init__()
|
| 79 |
+
self.k = k
|
| 80 |
+
|
| 81 |
+
self.edge_conv1 = EdgeConv(in_channels, 64, k)
|
| 82 |
+
self.edge_conv2 = EdgeConv(64, 64, k)
|
| 83 |
+
self.edge_conv3 = EdgeConv(64, 128, k)
|
| 84 |
+
self.edge_conv4 = EdgeConv(128, 256, k)
|
| 85 |
+
|
| 86 |
+
# Aggregate all EdgeConv outputs
|
| 87 |
+
self.conv5 = nn.Sequential(
|
| 88 |
+
nn.Conv1d(64 + 64 + 128 + 256, emb_dims, 1, bias=False),
|
| 89 |
+
nn.BatchNorm1d(emb_dims),
|
| 90 |
+
nn.LeakyReLU(0.2, inplace=True),
|
| 91 |
+
)
|
| 92 |
+
|
| 93 |
+
self.out_dim = emb_dims * 2 # max + avg pooling
|
| 94 |
+
|
| 95 |
+
def forward(self, x):
|
| 96 |
+
"""
|
| 97 |
+
Args:
|
| 98 |
+
x: (B, C, N)
|
| 99 |
+
Returns:
|
| 100 |
+
global_feat: (B, out_dim)
|
| 101 |
+
"""
|
| 102 |
+
x1 = self.edge_conv1(x) # (B, 64, N)
|
| 103 |
+
x2 = self.edge_conv2(x1) # (B, 64, N)
|
| 104 |
+
x3 = self.edge_conv3(x2) # (B, 128, N)
|
| 105 |
+
x4 = self.edge_conv4(x3) # (B, 256, N)
|
| 106 |
+
|
| 107 |
+
x_cat = torch.cat([x1, x2, x3, x4], dim=1) # (B, 512, N)
|
| 108 |
+
x5 = self.conv5(x_cat) # (B, emb_dims, N)
|
| 109 |
+
|
| 110 |
+
x_max = x5.max(dim=-1)[0] # (B, emb_dims)
|
| 111 |
+
x_avg = x5.mean(dim=-1) # (B, emb_dims)
|
| 112 |
+
global_feat = torch.cat([x_max, x_avg], dim=1) # (B, 2*emb_dims)
|
| 113 |
+
return global_feat
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
class DGCNNVertexClassifier(nn.Module):
|
| 117 |
+
"""DGCNN vertex classifier — same heads as PointNet version."""
|
| 118 |
+
|
| 119 |
+
def __init__(self, in_channels=11, k=10, emb_dims=512):
|
| 120 |
+
super().__init__()
|
| 121 |
+
self.backbone = DGCNNBackbone(in_channels, k, emb_dims)
|
| 122 |
+
feat_dim = self.backbone.out_dim
|
| 123 |
+
|
| 124 |
+
self.cls_head = nn.Sequential(
|
| 125 |
+
nn.Linear(feat_dim, 512),
|
| 126 |
+
nn.BatchNorm1d(512),
|
| 127 |
+
nn.LeakyReLU(0.2, inplace=True),
|
| 128 |
+
nn.Dropout(0.3),
|
| 129 |
+
nn.Linear(512, 128),
|
| 130 |
+
nn.LeakyReLU(0.2, inplace=True),
|
| 131 |
+
nn.Linear(128, 1),
|
| 132 |
+
)
|
| 133 |
+
|
| 134 |
+
self.offset_head = nn.Sequential(
|
| 135 |
+
nn.Linear(feat_dim, 512),
|
| 136 |
+
nn.BatchNorm1d(512),
|
| 137 |
+
nn.LeakyReLU(0.2, inplace=True),
|
| 138 |
+
nn.Dropout(0.3),
|
| 139 |
+
nn.Linear(512, 128),
|
| 140 |
+
nn.LeakyReLU(0.2, inplace=True),
|
| 141 |
+
nn.Linear(128, 3),
|
| 142 |
+
)
|
| 143 |
+
|
| 144 |
+
self.conf_head = nn.Sequential(
|
| 145 |
+
nn.Linear(feat_dim, 256),
|
| 146 |
+
nn.BatchNorm1d(256),
|
| 147 |
+
nn.LeakyReLU(0.2, inplace=True),
|
| 148 |
+
nn.Linear(256, 1),
|
| 149 |
+
nn.Sigmoid(),
|
| 150 |
+
)
|
| 151 |
+
|
| 152 |
+
def forward(self, x):
|
| 153 |
+
feat = self.backbone(x)
|
| 154 |
+
cls_logits = self.cls_head(feat)
|
| 155 |
+
offset = self.offset_head(feat)
|
| 156 |
+
confidence = self.conf_head(feat)
|
| 157 |
+
return cls_logits, offset, confidence
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
class DGCNNEdgeClassifier(nn.Module):
|
| 161 |
+
"""DGCNN edge classifier — same heads as PointNet version."""
|
| 162 |
+
|
| 163 |
+
def __init__(self, in_channels=6, k=10, emb_dims=256):
|
| 164 |
+
super().__init__()
|
| 165 |
+
self.backbone = DGCNNBackbone(in_channels, k, emb_dims)
|
| 166 |
+
feat_dim = self.backbone.out_dim
|
| 167 |
+
|
| 168 |
+
self.head = nn.Sequential(
|
| 169 |
+
nn.Linear(feat_dim, 512),
|
| 170 |
+
nn.BatchNorm1d(512),
|
| 171 |
+
nn.LeakyReLU(0.2, inplace=True),
|
| 172 |
+
nn.Dropout(0.5),
|
| 173 |
+
nn.Linear(512, 256),
|
| 174 |
+
nn.LeakyReLU(0.2, inplace=True),
|
| 175 |
+
nn.Dropout(0.3),
|
| 176 |
+
nn.Linear(256, 1),
|
| 177 |
+
)
|
| 178 |
+
|
| 179 |
+
def forward(self, x):
|
| 180 |
+
feat = self.backbone(x)
|
| 181 |
+
return self.head(feat)
|
junction.py
ADDED
|
@@ -0,0 +1,193 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Junction-type constraints for 3D roof wireframes.
|
| 2 |
+
|
| 3 |
+
After merging per-view detections into a 3D graph, we apply simple topology
|
| 4 |
+
priors to drop obviously wrong edges/vertices:
|
| 5 |
+
|
| 6 |
+
1. Collinear merge: if a vertex has degree 2 with two nearly antiparallel edges,
|
| 7 |
+
it is most likely a spurious point on a longer edge — merge the edges and
|
| 8 |
+
drop the vertex.
|
| 9 |
+
2. Duplicate-direction prune: if a vertex has two incident edges that point in
|
| 10 |
+
(nearly) the same direction, keep only the stronger one (stronger = higher
|
| 11 |
+
sklearn score if available, else longer edge).
|
| 12 |
+
3. Isolated leaf prune: vertices with degree 1 whose only edge is very short
|
| 13 |
+
(< 0.4 m) are dropped — they are almost always noise.
|
| 14 |
+
|
| 15 |
+
The module is intentionally pure-numpy and side-effect-free so it can be
|
| 16 |
+
dropped into both the heuristic and the triangulation pipelines.
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
from __future__ import annotations
|
| 20 |
+
|
| 21 |
+
import numpy as np
|
| 22 |
+
from typing import Sequence
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def _edge_directions(vertices: np.ndarray, edges: np.ndarray) -> np.ndarray:
|
| 26 |
+
"""Unit vectors for each edge (from a→b). Shape (E, 3)."""
|
| 27 |
+
if len(edges) == 0:
|
| 28 |
+
return np.empty((0, 3), dtype=np.float32)
|
| 29 |
+
diffs = vertices[edges[:, 1]] - vertices[edges[:, 0]]
|
| 30 |
+
norms = np.linalg.norm(diffs, axis=1, keepdims=True)
|
| 31 |
+
norms = np.where(norms < 1e-6, 1.0, norms)
|
| 32 |
+
return diffs / norms
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def _build_adj(n_vertices: int, edges: np.ndarray):
|
| 36 |
+
"""Return list[list[(neighbour, edge_index)]]."""
|
| 37 |
+
adj = [[] for _ in range(n_vertices)]
|
| 38 |
+
for ei, (a, b) in enumerate(edges):
|
| 39 |
+
adj[int(a)].append((int(b), ei))
|
| 40 |
+
adj[int(b)].append((int(a), ei))
|
| 41 |
+
return adj
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def apply_junction_constraints(
|
| 45 |
+
vertices: np.ndarray,
|
| 46 |
+
edges: Sequence[tuple],
|
| 47 |
+
edge_scores: np.ndarray | None = None,
|
| 48 |
+
collinear_cos: float = 0.97,
|
| 49 |
+
duplicate_cos: float = 0.985,
|
| 50 |
+
leaf_min_len: float = 0.4,
|
| 51 |
+
max_passes: int = 3,
|
| 52 |
+
) -> tuple[np.ndarray, list]:
|
| 53 |
+
"""Apply junction-type constraints to a 3D wireframe.
|
| 54 |
+
|
| 55 |
+
Parameters
|
| 56 |
+
----------
|
| 57 |
+
vertices : (N, 3) array of 3D vertex positions.
|
| 58 |
+
edges : list of (i, j) undirected edges.
|
| 59 |
+
edge_scores : optional (E,) array in [0, 1] giving edge confidence.
|
| 60 |
+
When missing, all edges are treated as equal (tie-break by length).
|
| 61 |
+
collinear_cos : cosine threshold above which two incident edges are
|
| 62 |
+
considered antiparallel → triggers collinear merge.
|
| 63 |
+
duplicate_cos : cosine threshold above which two incident edges pointing
|
| 64 |
+
the same way are treated as duplicates → keep only the stronger one.
|
| 65 |
+
leaf_min_len : edges shorter than this feeding a degree-1 vertex get cut.
|
| 66 |
+
max_passes : how many passes to iterate since removing one edge can
|
| 67 |
+
create new opportunities.
|
| 68 |
+
|
| 69 |
+
Returns
|
| 70 |
+
-------
|
| 71 |
+
(vertices_new, edges_new) where vertices_new may keep indices identical
|
| 72 |
+
to the input (we do not reindex; instead we return only the surviving
|
| 73 |
+
subset of edges). Fully-isolated vertices are filtered by callers that
|
| 74 |
+
already run `prune_not_connected`.
|
| 75 |
+
"""
|
| 76 |
+
verts = np.asarray(vertices, dtype=np.float32)
|
| 77 |
+
edges_arr = np.asarray(list(edges), dtype=np.int64) if len(edges) else np.empty((0, 2), dtype=np.int64)
|
| 78 |
+
|
| 79 |
+
if len(edges_arr) == 0 or len(verts) == 0:
|
| 80 |
+
return verts, list(edges)
|
| 81 |
+
|
| 82 |
+
if edge_scores is None:
|
| 83 |
+
scores = np.ones(len(edges_arr), dtype=np.float32)
|
| 84 |
+
else:
|
| 85 |
+
scores = np.asarray(edge_scores, dtype=np.float32)
|
| 86 |
+
if len(scores) != len(edges_arr):
|
| 87 |
+
scores = np.ones(len(edges_arr), dtype=np.float32)
|
| 88 |
+
|
| 89 |
+
alive = np.ones(len(edges_arr), dtype=bool)
|
| 90 |
+
|
| 91 |
+
for _ in range(max_passes):
|
| 92 |
+
changed = False
|
| 93 |
+
directions = _edge_directions(verts, edges_arr)
|
| 94 |
+
lengths = np.linalg.norm(
|
| 95 |
+
verts[edges_arr[:, 1]] - verts[edges_arr[:, 0]], axis=1
|
| 96 |
+
)
|
| 97 |
+
adj = _build_adj(len(verts), edges_arr[alive])
|
| 98 |
+
|
| 99 |
+
# We need the original edge indices, not the compacted ones, for mutation.
|
| 100 |
+
# Rebuild adjacency using absolute indices.
|
| 101 |
+
adj = [[] for _ in range(len(verts))]
|
| 102 |
+
for ei, (a, b) in enumerate(edges_arr):
|
| 103 |
+
if not alive[ei]:
|
| 104 |
+
continue
|
| 105 |
+
adj[int(a)].append((int(b), ei))
|
| 106 |
+
adj[int(b)].append((int(a), ei))
|
| 107 |
+
|
| 108 |
+
# Pass 1: collinear merge on degree-2 vertices
|
| 109 |
+
for v in range(len(verts)):
|
| 110 |
+
if len(adj[v]) != 2:
|
| 111 |
+
continue
|
| 112 |
+
(n1, e1), (n2, e2) = adj[v]
|
| 113 |
+
if n1 == n2:
|
| 114 |
+
continue
|
| 115 |
+
# Direction from v outward
|
| 116 |
+
d1 = verts[n1] - verts[v]
|
| 117 |
+
d2 = verts[n2] - verts[v]
|
| 118 |
+
l1, l2 = np.linalg.norm(d1), np.linalg.norm(d2)
|
| 119 |
+
if l1 < 1e-6 or l2 < 1e-6:
|
| 120 |
+
continue
|
| 121 |
+
d1 /= l1
|
| 122 |
+
d2 /= l2
|
| 123 |
+
# Antiparallel = straight line through v
|
| 124 |
+
if float(np.dot(d1, d2)) < -collinear_cos:
|
| 125 |
+
# Merge: kill e1, reroute e2 to connect (n1, n2)
|
| 126 |
+
if (n1, n2) in {tuple(edges_arr[i]) for i in range(len(edges_arr)) if alive[i]} or \
|
| 127 |
+
(n2, n1) in {tuple(edges_arr[i]) for i in range(len(edges_arr)) if alive[i]}:
|
| 128 |
+
# Already exists — just drop both incident edges (degenerate)
|
| 129 |
+
alive[e1] = False
|
| 130 |
+
alive[e2] = False
|
| 131 |
+
else:
|
| 132 |
+
alive[e1] = False
|
| 133 |
+
edges_arr[e2] = (min(n1, n2), max(n1, n2))
|
| 134 |
+
changed = True
|
| 135 |
+
break
|
| 136 |
+
|
| 137 |
+
if changed:
|
| 138 |
+
continue
|
| 139 |
+
|
| 140 |
+
# Pass 2: duplicate-direction prune
|
| 141 |
+
for v in range(len(verts)):
|
| 142 |
+
if len(adj[v]) < 2:
|
| 143 |
+
continue
|
| 144 |
+
nbrs = adj[v]
|
| 145 |
+
# Build direction vectors for each incident alive edge
|
| 146 |
+
dirs = []
|
| 147 |
+
for nb, ei in nbrs:
|
| 148 |
+
d = verts[nb] - verts[v]
|
| 149 |
+
nrm = np.linalg.norm(d)
|
| 150 |
+
if nrm < 1e-6:
|
| 151 |
+
dirs.append(None)
|
| 152 |
+
else:
|
| 153 |
+
dirs.append(d / nrm)
|
| 154 |
+
# Find any duplicate pair
|
| 155 |
+
drop_ei = None
|
| 156 |
+
for i in range(len(nbrs)):
|
| 157 |
+
if dirs[i] is None:
|
| 158 |
+
continue
|
| 159 |
+
for j in range(i + 1, len(nbrs)):
|
| 160 |
+
if dirs[j] is None:
|
| 161 |
+
continue
|
| 162 |
+
if float(np.dot(dirs[i], dirs[j])) > duplicate_cos:
|
| 163 |
+
ei_i, ei_j = nbrs[i][1], nbrs[j][1]
|
| 164 |
+
# Keep the one with higher score; tiebreak by length
|
| 165 |
+
s_i = (scores[ei_i], lengths[ei_i])
|
| 166 |
+
s_j = (scores[ei_j], lengths[ei_j])
|
| 167 |
+
drop_ei = ei_j if s_i >= s_j else ei_i
|
| 168 |
+
break
|
| 169 |
+
if drop_ei is not None:
|
| 170 |
+
break
|
| 171 |
+
if drop_ei is not None:
|
| 172 |
+
alive[drop_ei] = False
|
| 173 |
+
changed = True
|
| 174 |
+
break
|
| 175 |
+
|
| 176 |
+
if changed:
|
| 177 |
+
continue
|
| 178 |
+
|
| 179 |
+
# Pass 3: leaf prune (degree-1 short edge)
|
| 180 |
+
for v in range(len(verts)):
|
| 181 |
+
if len(adj[v]) != 1:
|
| 182 |
+
continue
|
| 183 |
+
nb, ei = adj[v][0]
|
| 184 |
+
if lengths[ei] < leaf_min_len:
|
| 185 |
+
alive[ei] = False
|
| 186 |
+
changed = True
|
| 187 |
+
break
|
| 188 |
+
|
| 189 |
+
if not changed:
|
| 190 |
+
break
|
| 191 |
+
|
| 192 |
+
surviving = [tuple(map(int, edges_arr[i])) for i in range(len(edges_arr)) if alive[i]]
|
| 193 |
+
return verts, surviving
|
line_cloud.py
ADDED
|
@@ -0,0 +1,542 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""LC2WF-inspired 3D line cloud wireframe module.
|
| 2 |
+
|
| 3 |
+
Instead of lifting individual 2D corners to 3D via a single depth sample,
|
| 4 |
+
this module:
|
| 5 |
+
|
| 6 |
+
1. Extracts 2D line segments from gestalt edge masks (eave/ridge/rake/etc).
|
| 7 |
+
2. Samples many depth values along each 2D segment.
|
| 8 |
+
3. Fits a robust 3D line through the unprojected samples (RANSAC).
|
| 9 |
+
4. Merges similar 3D lines across views (direction + proximity).
|
| 10 |
+
5. Computes closest-point intersections of 3D line pairs → vertex candidates.
|
| 11 |
+
|
| 12 |
+
The resulting vertices average over many depth samples, cancelling noise
|
| 13 |
+
that single-pixel corner depth estimates cannot. The 3D line intersections
|
| 14 |
+
give overdetermined vertex positions.
|
| 15 |
+
|
| 16 |
+
Entry points:
|
| 17 |
+
extract_3d_lines(entry) → list[Line3D]
|
| 18 |
+
intersect_lines_to_vertices(lines, ...) → np.ndarray
|
| 19 |
+
predict_wireframe_lines(entry) → (vertices, edges)
|
| 20 |
+
"""
|
| 21 |
+
|
| 22 |
+
from __future__ import annotations
|
| 23 |
+
|
| 24 |
+
import numpy as np
|
| 25 |
+
import cv2
|
| 26 |
+
from dataclasses import dataclass
|
| 27 |
+
|
| 28 |
+
from hoho2025.example_solutions import (
|
| 29 |
+
convert_entry_to_human_readable,
|
| 30 |
+
empty_solution,
|
| 31 |
+
point_to_segment_dist,
|
| 32 |
+
)
|
| 33 |
+
from hoho2025.color_mappings import gestalt_color_mapping
|
| 34 |
+
|
| 35 |
+
try:
|
| 36 |
+
from mvs_utils import collect_views, project_world_to_image
|
| 37 |
+
except ImportError:
|
| 38 |
+
from submission.mvs_utils import collect_views, project_world_to_image
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
EDGE_CLASSES = ['eave', 'ridge', 'rake', 'valley', 'hip']
|
| 42 |
+
VERTEX_CLASSES = ['apex', 'eave_end_point', 'flashing_end_point']
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
@dataclass
|
| 46 |
+
class Line3D:
|
| 47 |
+
"""A 3D line segment fitted from depth samples."""
|
| 48 |
+
point: np.ndarray # (3,) — a point on the line
|
| 49 |
+
direction: np.ndarray # (3,) — unit direction vector
|
| 50 |
+
p1: np.ndarray # (3,) — endpoint 1
|
| 51 |
+
p2: np.ndarray # (3,) — endpoint 2
|
| 52 |
+
length: float
|
| 53 |
+
n_inliers: int
|
| 54 |
+
edge_class: str
|
| 55 |
+
view_id: str
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
# ---------------------------------------------------------------------------
|
| 59 |
+
# Step 1-2: Extract 2D segments, sample depth, fit 3D lines
|
| 60 |
+
# ---------------------------------------------------------------------------
|
| 61 |
+
|
| 62 |
+
def _unproject_pixel(u, v, depth, K_inv, R_t_inv, t_world):
|
| 63 |
+
"""Unproject a single pixel (u, v) at the given depth to world coords.
|
| 64 |
+
|
| 65 |
+
K_inv : (3,3) — inverse intrinsics
|
| 66 |
+
R_t_inv : (3,3) — R^T (inverse rotation)
|
| 67 |
+
t_world : (3,) — camera centre in world = -R^T @ t
|
| 68 |
+
"""
|
| 69 |
+
z = float(depth)
|
| 70 |
+
if z <= 0.01 or z > 80.0:
|
| 71 |
+
return None
|
| 72 |
+
cam = K_inv @ np.array([u * z, v * z, z])
|
| 73 |
+
world = R_t_inv @ cam + t_world
|
| 74 |
+
return world
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
def _fit_3d_line_ransac(
|
| 78 |
+
pts3d: np.ndarray,
|
| 79 |
+
n_iter: int = 100,
|
| 80 |
+
inlier_th: float = 0.3,
|
| 81 |
+
min_inliers: int = 5,
|
| 82 |
+
) -> tuple[np.ndarray, np.ndarray, np.ndarray] | None:
|
| 83 |
+
"""RANSAC-fit a 3D line through a set of 3D points.
|
| 84 |
+
|
| 85 |
+
Returns (point_on_line, unit_direction, inlier_pts) or None.
|
| 86 |
+
"""
|
| 87 |
+
n = len(pts3d)
|
| 88 |
+
if n < 2:
|
| 89 |
+
return None
|
| 90 |
+
|
| 91 |
+
best_inliers = None
|
| 92 |
+
best_dir = None
|
| 93 |
+
best_pt = None
|
| 94 |
+
best_count = 0
|
| 95 |
+
|
| 96 |
+
for _ in range(n_iter):
|
| 97 |
+
idx = np.random.choice(n, 2, replace=False)
|
| 98 |
+
p1, p2 = pts3d[idx[0]], pts3d[idx[1]]
|
| 99 |
+
d = p2 - p1
|
| 100 |
+
length = np.linalg.norm(d)
|
| 101 |
+
if length < 0.05:
|
| 102 |
+
continue
|
| 103 |
+
d = d / length
|
| 104 |
+
# Distance from each point to the line (p1, d)
|
| 105 |
+
rel = pts3d - p1
|
| 106 |
+
proj = rel @ d
|
| 107 |
+
perp = rel - proj[:, None] * d
|
| 108 |
+
dists = np.linalg.norm(perp, axis=1)
|
| 109 |
+
inlier_mask = dists <= inlier_th
|
| 110 |
+
count = int(inlier_mask.sum())
|
| 111 |
+
if count > best_count:
|
| 112 |
+
best_count = count
|
| 113 |
+
best_inliers = inlier_mask
|
| 114 |
+
best_dir = d
|
| 115 |
+
best_pt = p1
|
| 116 |
+
|
| 117 |
+
if best_count < min_inliers or best_inliers is None:
|
| 118 |
+
return None
|
| 119 |
+
|
| 120 |
+
# Refit on inliers using PCA
|
| 121 |
+
inlier_pts = pts3d[best_inliers]
|
| 122 |
+
centroid = inlier_pts.mean(axis=0)
|
| 123 |
+
_, _, Vt = np.linalg.svd(inlier_pts - centroid)
|
| 124 |
+
direction = Vt[0]
|
| 125 |
+
if np.dot(direction, best_dir) < 0:
|
| 126 |
+
direction = -direction
|
| 127 |
+
|
| 128 |
+
return centroid, direction, inlier_pts
|
| 129 |
+
|
| 130 |
+
|
| 131 |
+
def extract_3d_lines_single_view(
|
| 132 |
+
gest_np: np.ndarray,
|
| 133 |
+
depth_np: np.ndarray,
|
| 134 |
+
view_info: dict,
|
| 135 |
+
n_samples: int = 30,
|
| 136 |
+
min_line_px: int = 20,
|
| 137 |
+
) -> list[Line3D]:
|
| 138 |
+
"""Extract 3D lines from a single view's gestalt + depth."""
|
| 139 |
+
H, W = depth_np.shape[:2]
|
| 140 |
+
K = view_info['K']
|
| 141 |
+
R = view_info['R']
|
| 142 |
+
t = view_info['t']
|
| 143 |
+
K_inv = np.linalg.inv(K)
|
| 144 |
+
R_inv = R.T
|
| 145 |
+
cam_center = -R_inv @ t
|
| 146 |
+
|
| 147 |
+
lines: list[Line3D] = []
|
| 148 |
+
view_id = view_info['image_id']
|
| 149 |
+
|
| 150 |
+
for edge_class in EDGE_CLASSES:
|
| 151 |
+
color = np.array(gestalt_color_mapping[edge_class])
|
| 152 |
+
mask = cv2.inRange(gest_np, color - 0.5, color + 0.5)
|
| 153 |
+
mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, np.ones((5, 5), np.uint8))
|
| 154 |
+
if mask.sum() == 0:
|
| 155 |
+
continue
|
| 156 |
+
|
| 157 |
+
_, labels, stats, _ = cv2.connectedComponentsWithStats(mask, 8, cv2.CV_32S)
|
| 158 |
+
for lbl in range(1, labels.max() + 1):
|
| 159 |
+
area = stats[lbl, cv2.CC_STAT_AREA]
|
| 160 |
+
if area < min_line_px:
|
| 161 |
+
continue
|
| 162 |
+
|
| 163 |
+
ys, xs = np.where(labels == lbl)
|
| 164 |
+
if len(xs) < 3:
|
| 165 |
+
continue
|
| 166 |
+
|
| 167 |
+
# Fit 2D line to get direction + endpoints
|
| 168 |
+
pts2d = np.column_stack([xs, ys]).astype(np.float32)
|
| 169 |
+
line_params = cv2.fitLine(pts2d, cv2.DIST_L2, 0, 0.01, 0.01)
|
| 170 |
+
vx, vy, x0, y0 = line_params.ravel()
|
| 171 |
+
proj = (xs - x0) * vx + (ys - y0) * vy
|
| 172 |
+
t_min, t_max = float(proj.min()), float(proj.max())
|
| 173 |
+
|
| 174 |
+
# Sample N points along the 2D line
|
| 175 |
+
ts = np.linspace(t_min, t_max, n_samples)
|
| 176 |
+
pts3d_list = []
|
| 177 |
+
for t_val in ts:
|
| 178 |
+
u = x0 + t_val * vx
|
| 179 |
+
v_px = y0 + t_val * vy
|
| 180 |
+
ui, vi = int(round(u)), int(round(v_px))
|
| 181 |
+
if 0 <= ui < W and 0 <= vi < H:
|
| 182 |
+
d = depth_np[vi, ui]
|
| 183 |
+
p = _unproject_pixel(u, v_px, d, K_inv, R_inv, cam_center)
|
| 184 |
+
if p is not None:
|
| 185 |
+
pts3d_list.append(p)
|
| 186 |
+
|
| 187 |
+
if len(pts3d_list) < 5:
|
| 188 |
+
continue
|
| 189 |
+
|
| 190 |
+
pts3d = np.array(pts3d_list, dtype=np.float64)
|
| 191 |
+
result = _fit_3d_line_ransac(pts3d, n_iter=50, inlier_th=0.3, min_inliers=5)
|
| 192 |
+
if result is None:
|
| 193 |
+
continue
|
| 194 |
+
|
| 195 |
+
centroid, direction, inlier_pts = result
|
| 196 |
+
# Endpoints: project inliers onto direction, take extremes
|
| 197 |
+
s = (inlier_pts - centroid) @ direction
|
| 198 |
+
p1 = centroid + float(s.min()) * direction
|
| 199 |
+
p2 = centroid + float(s.max()) * direction
|
| 200 |
+
length = float(np.linalg.norm(p2 - p1))
|
| 201 |
+
if length < 0.3:
|
| 202 |
+
continue
|
| 203 |
+
|
| 204 |
+
lines.append(Line3D(
|
| 205 |
+
point=centroid,
|
| 206 |
+
direction=direction,
|
| 207 |
+
p1=p1, p2=p2,
|
| 208 |
+
length=length,
|
| 209 |
+
n_inliers=len(inlier_pts),
|
| 210 |
+
edge_class=edge_class,
|
| 211 |
+
view_id=view_id,
|
| 212 |
+
))
|
| 213 |
+
|
| 214 |
+
return lines
|
| 215 |
+
|
| 216 |
+
|
| 217 |
+
# ---------------------------------------------------------------------------
|
| 218 |
+
# Step 1-2 entry: all views
|
| 219 |
+
# ---------------------------------------------------------------------------
|
| 220 |
+
|
| 221 |
+
def extract_3d_lines(entry) -> tuple[list[Line3D], dict]:
|
| 222 |
+
"""Extract 3D lines from all views.
|
| 223 |
+
|
| 224 |
+
Returns (all_lines, good_entry).
|
| 225 |
+
"""
|
| 226 |
+
good = convert_entry_to_human_readable(entry)
|
| 227 |
+
colmap_rec = good.get('colmap') or good.get('colmap_binary')
|
| 228 |
+
if colmap_rec is None:
|
| 229 |
+
return [], good
|
| 230 |
+
|
| 231 |
+
views = collect_views(colmap_rec, good['image_ids'])
|
| 232 |
+
all_lines: list[Line3D] = []
|
| 233 |
+
|
| 234 |
+
for gest, depth, img_id in zip(good['gestalt'], good['depth'], good['image_ids']):
|
| 235 |
+
info = views.get(img_id)
|
| 236 |
+
if info is None:
|
| 237 |
+
continue
|
| 238 |
+
depth_np = np.array(depth).astype(np.float64) / 1000.0
|
| 239 |
+
H, W = depth_np.shape[:2]
|
| 240 |
+
gest_np = np.array(gest.resize((W, H))).astype(np.uint8)
|
| 241 |
+
|
| 242 |
+
# Affine depth calibration using COLMAP sparse depth (same as pipeline)
|
| 243 |
+
try:
|
| 244 |
+
from hoho2025.example_solutions import get_sparse_depth, get_house_mask
|
| 245 |
+
from sklearn_submission import fit_affine_ransac
|
| 246 |
+
depth_sparse, found, _, _ = get_sparse_depth(colmap_rec, img_id, depth_np)
|
| 247 |
+
if found:
|
| 248 |
+
_, _, depth_np = fit_affine_ransac(depth_np, depth_sparse,
|
| 249 |
+
get_house_mask(good['ade'][good['image_ids'].index(img_id)]))
|
| 250 |
+
except Exception:
|
| 251 |
+
pass # use raw depth if calibration fails
|
| 252 |
+
|
| 253 |
+
view_lines = extract_3d_lines_single_view(gest_np, depth_np, info)
|
| 254 |
+
all_lines.extend(view_lines)
|
| 255 |
+
|
| 256 |
+
return all_lines, good
|
| 257 |
+
|
| 258 |
+
|
| 259 |
+
# ---------------------------------------------------------------------------
|
| 260 |
+
# Step 3: Merge similar 3D lines across views
|
| 261 |
+
# ---------------------------------------------------------------------------
|
| 262 |
+
|
| 263 |
+
def merge_3d_lines(
|
| 264 |
+
lines: list[Line3D],
|
| 265 |
+
direction_cos: float = 0.95,
|
| 266 |
+
midpoint_dist: float = 1.0,
|
| 267 |
+
) -> list[Line3D]:
|
| 268 |
+
"""Merge 3D lines that have similar direction and nearby midpoints.
|
| 269 |
+
|
| 270 |
+
Uses greedy clustering: each line is assigned to the first compatible
|
| 271 |
+
cluster. The cluster representative is recomputed as the mean of its
|
| 272 |
+
members (direction via PCA, endpoints via extremal projections).
|
| 273 |
+
"""
|
| 274 |
+
if len(lines) <= 1:
|
| 275 |
+
return lines
|
| 276 |
+
|
| 277 |
+
clusters: list[list[int]] = []
|
| 278 |
+
reps: list[Line3D] = []
|
| 279 |
+
|
| 280 |
+
for i, line in enumerate(lines):
|
| 281 |
+
matched = False
|
| 282 |
+
for ci, rep in enumerate(reps):
|
| 283 |
+
cos = abs(float(np.dot(line.direction, rep.direction)))
|
| 284 |
+
if cos < direction_cos:
|
| 285 |
+
continue
|
| 286 |
+
mid_d = float(np.linalg.norm(
|
| 287 |
+
(line.p1 + line.p2) / 2 - (rep.p1 + rep.p2) / 2
|
| 288 |
+
))
|
| 289 |
+
if mid_d > midpoint_dist:
|
| 290 |
+
continue
|
| 291 |
+
clusters[ci].append(i)
|
| 292 |
+
# Recompute representative
|
| 293 |
+
members = [lines[j] for j in clusters[ci]]
|
| 294 |
+
all_pts = np.vstack([np.vstack([m.p1, m.p2]) for m in members])
|
| 295 |
+
centroid = all_pts.mean(axis=0)
|
| 296 |
+
_, _, Vt = np.linalg.svd(all_pts - centroid)
|
| 297 |
+
direction = Vt[0]
|
| 298 |
+
if np.dot(direction, rep.direction) < 0:
|
| 299 |
+
direction = -direction
|
| 300 |
+
s = (all_pts - centroid) @ direction
|
| 301 |
+
new_p1 = centroid + float(s.min()) * direction
|
| 302 |
+
new_p2 = centroid + float(s.max()) * direction
|
| 303 |
+
reps[ci] = Line3D(
|
| 304 |
+
point=centroid, direction=direction,
|
| 305 |
+
p1=new_p1, p2=new_p2,
|
| 306 |
+
length=float(np.linalg.norm(new_p2 - new_p1)),
|
| 307 |
+
n_inliers=sum(m.n_inliers for m in members),
|
| 308 |
+
edge_class=members[0].edge_class,
|
| 309 |
+
view_id='merged',
|
| 310 |
+
)
|
| 311 |
+
matched = True
|
| 312 |
+
break
|
| 313 |
+
if not matched:
|
| 314 |
+
clusters.append([i])
|
| 315 |
+
reps.append(Line3D(
|
| 316 |
+
point=line.point.copy(), direction=line.direction.copy(),
|
| 317 |
+
p1=line.p1.copy(), p2=line.p2.copy(),
|
| 318 |
+
length=line.length, n_inliers=line.n_inliers,
|
| 319 |
+
edge_class=line.edge_class, view_id=line.view_id,
|
| 320 |
+
))
|
| 321 |
+
|
| 322 |
+
return reps
|
| 323 |
+
|
| 324 |
+
|
| 325 |
+
# ---------------------------------------------------------------------------
|
| 326 |
+
# Step 4: Intersect pairs of 3D lines → vertex candidates
|
| 327 |
+
# ---------------------------------------------------------------------------
|
| 328 |
+
|
| 329 |
+
def closest_point_on_two_lines(
|
| 330 |
+
p1: np.ndarray, d1: np.ndarray,
|
| 331 |
+
p2: np.ndarray, d2: np.ndarray,
|
| 332 |
+
) -> tuple[np.ndarray, float] | None:
|
| 333 |
+
"""Find the closest point between two 3D lines.
|
| 334 |
+
|
| 335 |
+
Returns (midpoint_of_closest_approach, distance_between_lines) or None
|
| 336 |
+
if the lines are nearly parallel.
|
| 337 |
+
"""
|
| 338 |
+
w0 = p1 - p2
|
| 339 |
+
a = float(np.dot(d1, d1))
|
| 340 |
+
b = float(np.dot(d1, d2))
|
| 341 |
+
c = float(np.dot(d2, d2))
|
| 342 |
+
d = float(np.dot(d1, w0))
|
| 343 |
+
e = float(np.dot(d2, w0))
|
| 344 |
+
|
| 345 |
+
denom = a * c - b * b
|
| 346 |
+
if abs(denom) < 1e-8:
|
| 347 |
+
return None # parallel
|
| 348 |
+
|
| 349 |
+
sc = (b * e - c * d) / denom
|
| 350 |
+
tc = (a * e - b * d) / denom
|
| 351 |
+
|
| 352 |
+
closest_on_1 = p1 + sc * d1
|
| 353 |
+
closest_on_2 = p2 + tc * d2
|
| 354 |
+
midpoint = (closest_on_1 + closest_on_2) / 2.0
|
| 355 |
+
dist = float(np.linalg.norm(closest_on_1 - closest_on_2))
|
| 356 |
+
|
| 357 |
+
return midpoint, dist
|
| 358 |
+
|
| 359 |
+
|
| 360 |
+
def intersect_lines_to_vertices(
|
| 361 |
+
lines: list[Line3D],
|
| 362 |
+
max_dist: float = 0.5,
|
| 363 |
+
parallel_cos: float = 0.95,
|
| 364 |
+
segment_margin: float = 0.5,
|
| 365 |
+
) -> np.ndarray:
|
| 366 |
+
"""Generate vertex candidates from 3D line intersections.
|
| 367 |
+
|
| 368 |
+
For each pair of non-parallel lines:
|
| 369 |
+
- compute the closest approach point;
|
| 370 |
+
- accept if the distance between the lines at that point is ≤ max_dist;
|
| 371 |
+
- accept only if the closest point is within ``segment_margin`` of
|
| 372 |
+
both line segments (not too far outside the actual edge extent).
|
| 373 |
+
"""
|
| 374 |
+
if len(lines) < 2:
|
| 375 |
+
return np.empty((0, 3), dtype=np.float64)
|
| 376 |
+
|
| 377 |
+
vertices: list[np.ndarray] = []
|
| 378 |
+
for i in range(len(lines)):
|
| 379 |
+
for j in range(i + 1, len(lines)):
|
| 380 |
+
cos = abs(float(np.dot(lines[i].direction, lines[j].direction)))
|
| 381 |
+
if cos >= parallel_cos:
|
| 382 |
+
continue
|
| 383 |
+
|
| 384 |
+
result = closest_point_on_two_lines(
|
| 385 |
+
lines[i].point, lines[i].direction,
|
| 386 |
+
lines[j].point, lines[j].direction,
|
| 387 |
+
)
|
| 388 |
+
if result is None:
|
| 389 |
+
continue
|
| 390 |
+
midpoint, dist = result
|
| 391 |
+
if dist > max_dist:
|
| 392 |
+
continue
|
| 393 |
+
|
| 394 |
+
# Check that the intersection is near both line segments
|
| 395 |
+
ok = True
|
| 396 |
+
for line in (lines[i], lines[j]):
|
| 397 |
+
s = float(np.dot(midpoint - line.point, line.direction))
|
| 398 |
+
s_min = float(np.dot(line.p1 - line.point, line.direction))
|
| 399 |
+
s_max = float(np.dot(line.p2 - line.point, line.direction))
|
| 400 |
+
if s < s_min - segment_margin or s > s_max + segment_margin:
|
| 401 |
+
ok = False
|
| 402 |
+
break
|
| 403 |
+
if ok:
|
| 404 |
+
vertices.append(midpoint)
|
| 405 |
+
|
| 406 |
+
if not vertices:
|
| 407 |
+
return np.empty((0, 3), dtype=np.float64)
|
| 408 |
+
return np.array(vertices, dtype=np.float64)
|
| 409 |
+
|
| 410 |
+
|
| 411 |
+
# ---------------------------------------------------------------------------
|
| 412 |
+
# Step 5: Integration helper
|
| 413 |
+
# ---------------------------------------------------------------------------
|
| 414 |
+
|
| 415 |
+
def snap_vertices_to_lines(
|
| 416 |
+
vertices: np.ndarray,
|
| 417 |
+
lines: list[Line3D],
|
| 418 |
+
snap_radius: float = 0.4,
|
| 419 |
+
min_line_inliers: int = 10,
|
| 420 |
+
segment_margin: float = 0.3,
|
| 421 |
+
require_agree: int = 1,
|
| 422 |
+
) -> tuple[np.ndarray, np.ndarray]:
|
| 423 |
+
"""Snap each vertex to the nearest 3D line if the line is trustworthy
|
| 424 |
+
and the vertex sits within ``snap_radius`` perpendicular distance.
|
| 425 |
+
|
| 426 |
+
The snap is a perpendicular projection of the vertex onto the line. If
|
| 427 |
+
the projected point falls outside the segment ``[p1, p2]`` by more than
|
| 428 |
+
``segment_margin``, we clamp it to the nearest endpoint (so we never
|
| 429 |
+
slide a vertex off the ends of the real edge).
|
| 430 |
+
|
| 431 |
+
A line is considered "trustworthy" if it has ≥ ``min_line_inliers``
|
| 432 |
+
depth samples (the more, the better the depth-noise averaging).
|
| 433 |
+
|
| 434 |
+
When ``require_agree`` ≥ 2 we only snap if the vertex is within
|
| 435 |
+
``snap_radius`` of **multiple** independent lines and they all agree
|
| 436 |
+
on roughly the same 3D location — this is a "consensus" mode that
|
| 437 |
+
avoids snapping to a single noisy line.
|
| 438 |
+
|
| 439 |
+
Returns
|
| 440 |
+
-------
|
| 441 |
+
refined : (N, 3) float64 — refined vertex positions
|
| 442 |
+
snapped : (N,) bool — which vertices were moved
|
| 443 |
+
"""
|
| 444 |
+
verts = np.asarray(vertices, dtype=np.float64)
|
| 445 |
+
refined = verts.copy()
|
| 446 |
+
snapped = np.zeros(len(verts), dtype=bool)
|
| 447 |
+
|
| 448 |
+
if len(verts) == 0 or not lines:
|
| 449 |
+
return refined, snapped
|
| 450 |
+
|
| 451 |
+
# Pre-filter trustworthy lines
|
| 452 |
+
trusted = [ln for ln in lines if ln.n_inliers >= min_line_inliers]
|
| 453 |
+
if not trusted:
|
| 454 |
+
return refined, snapped
|
| 455 |
+
|
| 456 |
+
for i, v in enumerate(verts):
|
| 457 |
+
# Compute perpendicular distance and projected point for each line
|
| 458 |
+
candidates: list[tuple[float, np.ndarray, Line3D]] = []
|
| 459 |
+
for ln in trusted:
|
| 460 |
+
rel = v - ln.point
|
| 461 |
+
s = float(np.dot(rel, ln.direction))
|
| 462 |
+
projected = ln.point + s * ln.direction
|
| 463 |
+
perp = float(np.linalg.norm(v - projected))
|
| 464 |
+
if perp > snap_radius:
|
| 465 |
+
continue
|
| 466 |
+
# Clamp projection to segment
|
| 467 |
+
s_min = float(np.dot(ln.p1 - ln.point, ln.direction))
|
| 468 |
+
s_max = float(np.dot(ln.p2 - ln.point, ln.direction))
|
| 469 |
+
if s_min > s_max:
|
| 470 |
+
s_min, s_max = s_max, s_min
|
| 471 |
+
if s < s_min - segment_margin:
|
| 472 |
+
projected = ln.point + (s_min - segment_margin) * ln.direction
|
| 473 |
+
elif s > s_max + segment_margin:
|
| 474 |
+
projected = ln.point + (s_max + segment_margin) * ln.direction
|
| 475 |
+
candidates.append((perp, projected, ln))
|
| 476 |
+
|
| 477 |
+
if len(candidates) < require_agree:
|
| 478 |
+
continue
|
| 479 |
+
|
| 480 |
+
if require_agree >= 2:
|
| 481 |
+
# Consensus: keep only if ≥2 candidates agree within snap_radius.
|
| 482 |
+
candidates.sort(key=lambda c: c[0])
|
| 483 |
+
best_proj = candidates[0][1]
|
| 484 |
+
agree = 0
|
| 485 |
+
for _, cp, _ in candidates:
|
| 486 |
+
if np.linalg.norm(cp - best_proj) <= snap_radius:
|
| 487 |
+
agree += 1
|
| 488 |
+
if agree < require_agree:
|
| 489 |
+
continue
|
| 490 |
+
# Snap to the mean of agreeing projections
|
| 491 |
+
agreeing = [c[1] for c in candidates
|
| 492 |
+
if np.linalg.norm(c[1] - best_proj) <= snap_radius]
|
| 493 |
+
refined[i] = np.mean(agreeing, axis=0)
|
| 494 |
+
snapped[i] = True
|
| 495 |
+
else:
|
| 496 |
+
# Single-line snap: pick the closest
|
| 497 |
+
candidates.sort(key=lambda c: c[0])
|
| 498 |
+
refined[i] = candidates[0][1]
|
| 499 |
+
snapped[i] = True
|
| 500 |
+
|
| 501 |
+
return refined, snapped
|
| 502 |
+
|
| 503 |
+
|
| 504 |
+
def line_based_vertices(
|
| 505 |
+
entry,
|
| 506 |
+
max_intersection_dist: float = 0.5,
|
| 507 |
+
merge_radius: float = 0.4,
|
| 508 |
+
) -> np.ndarray:
|
| 509 |
+
"""High-level: extract 3D lines, merge, intersect → vertex candidates.
|
| 510 |
+
|
| 511 |
+
Returns (K, 3) array of deduplicated vertex positions.
|
| 512 |
+
"""
|
| 513 |
+
lines, good = extract_3d_lines(entry)
|
| 514 |
+
if not lines:
|
| 515 |
+
return np.empty((0, 3), dtype=np.float64)
|
| 516 |
+
|
| 517 |
+
merged_lines = merge_3d_lines(lines)
|
| 518 |
+
if len(merged_lines) < 2:
|
| 519 |
+
return np.empty((0, 3), dtype=np.float64)
|
| 520 |
+
|
| 521 |
+
raw_verts = intersect_lines_to_vertices(
|
| 522 |
+
merged_lines, max_dist=max_intersection_dist,
|
| 523 |
+
)
|
| 524 |
+
if len(raw_verts) == 0:
|
| 525 |
+
return np.empty((0, 3), dtype=np.float64)
|
| 526 |
+
|
| 527 |
+
# Simple NMS merge
|
| 528 |
+
from scipy.spatial import cKDTree
|
| 529 |
+
tree = cKDTree(raw_verts)
|
| 530 |
+
clusters = tree.query_ball_point(raw_verts, merge_radius)
|
| 531 |
+
used = set()
|
| 532 |
+
out = []
|
| 533 |
+
for i, cl in enumerate(clusters):
|
| 534 |
+
if i in used:
|
| 535 |
+
continue
|
| 536 |
+
members = [j for j in cl if j not in used]
|
| 537 |
+
if not members:
|
| 538 |
+
continue
|
| 539 |
+
out.append(raw_verts[members].mean(axis=0))
|
| 540 |
+
used.update(members)
|
| 541 |
+
|
| 542 |
+
return np.array(out, dtype=np.float64) if out else np.empty((0, 3), dtype=np.float64)
|
plane_wireframe.py
ADDED
|
@@ -0,0 +1,472 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Plane-intersection wireframe predictor (Tier 2).
|
| 2 |
+
|
| 3 |
+
Classical-geometry pipeline, orthogonal to the gestalt + depth path:
|
| 4 |
+
|
| 5 |
+
1. Crop the COLMAP sparse cloud to the top portion along the up-axis so that
|
| 6 |
+
only roof points remain (the dataset uses +Y as up).
|
| 7 |
+
2. Iteratively RANSAC-segment the cropped cloud into planes (open3d).
|
| 8 |
+
3. Keep only planes whose normal has a significant +Y component (roof
|
| 9 |
+
slopes) or is near-horizontal (flat roof / eaves).
|
| 10 |
+
4. For each pair of surviving planes, compute the infinite intersection
|
| 11 |
+
line via scikit-spatial and clip it to the overlap of the two inlier
|
| 12 |
+
sets (percentile endpoints with a perpendicular tolerance).
|
| 13 |
+
5. Vertices = segment endpoints ∪ triple-plane intersections, merged at
|
| 14 |
+
a small radius.
|
| 15 |
+
6. Edges = clipped segments remapped onto the merged vertex set.
|
| 16 |
+
|
| 17 |
+
Only numpy / open3d / scikit-spatial / pycolmap are used — no torch.
|
| 18 |
+
|
| 19 |
+
The main entry point is :func:`predict_wireframe_planes`, which returns
|
| 20 |
+
``(vertices, edges)`` in the format expected by ``hss()``.
|
| 21 |
+
"""
|
| 22 |
+
|
| 23 |
+
from __future__ import annotations
|
| 24 |
+
|
| 25 |
+
import numpy as np
|
| 26 |
+
import open3d as o3d
|
| 27 |
+
from skspatial.objects import Plane as SkPlane
|
| 28 |
+
|
| 29 |
+
from hoho2025.example_solutions import (
|
| 30 |
+
convert_entry_to_human_readable,
|
| 31 |
+
empty_solution,
|
| 32 |
+
)
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
UP_AXIS = 1 # +Y is up in this dataset (verified across 15 validation samples)
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
# ---------------------------------------------------------------------------
|
| 39 |
+
# Plane data structure
|
| 40 |
+
# ---------------------------------------------------------------------------
|
| 41 |
+
|
| 42 |
+
class RoofPlane:
|
| 43 |
+
"""A planar segment of the roof point cloud.
|
| 44 |
+
|
| 45 |
+
``eq`` stores a normalised (a, b, c, d) plane equation such that
|
| 46 |
+
``|n| = 1`` and ``a*x + b*y + c*z + d = 0``.
|
| 47 |
+
"""
|
| 48 |
+
|
| 49 |
+
__slots__ = ("eq", "normal", "d", "inliers")
|
| 50 |
+
|
| 51 |
+
def __init__(self, eq: np.ndarray, inliers: np.ndarray):
|
| 52 |
+
eq = np.asarray(eq, dtype=np.float64)
|
| 53 |
+
n = eq[:3]
|
| 54 |
+
nn = np.linalg.norm(n)
|
| 55 |
+
if nn > 1e-9:
|
| 56 |
+
eq = eq / nn
|
| 57 |
+
self.eq = eq
|
| 58 |
+
self.normal = eq[:3]
|
| 59 |
+
self.d = float(eq[3])
|
| 60 |
+
self.inliers = np.asarray(inliers, dtype=np.float64)
|
| 61 |
+
|
| 62 |
+
def signed_distance(self, pts: np.ndarray) -> np.ndarray:
|
| 63 |
+
return pts @ self.normal + self.d
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
# ---------------------------------------------------------------------------
|
| 67 |
+
# Roof crop
|
| 68 |
+
# ---------------------------------------------------------------------------
|
| 69 |
+
|
| 70 |
+
def crop_to_roof(
|
| 71 |
+
xyz: np.ndarray,
|
| 72 |
+
up_axis: int = UP_AXIS,
|
| 73 |
+
top_frac: float = 0.70,
|
| 74 |
+
pad: float = 1.0,
|
| 75 |
+
) -> np.ndarray:
|
| 76 |
+
"""Keep points whose up-axis coordinate is in the top ``top_frac`` of the
|
| 77 |
+
distribution.
|
| 78 |
+
|
| 79 |
+
COLMAP reconstructions include ground, walls, vegetation and roof. The
|
| 80 |
+
roof corners live in the upper Y range. A fractional cut along the up
|
| 81 |
+
axis is a robust proxy that does not need any external scale calibration
|
| 82 |
+
and works for both peaked and flat roofs.
|
| 83 |
+
"""
|
| 84 |
+
if len(xyz) == 0:
|
| 85 |
+
return xyz
|
| 86 |
+
up = xyz[:, up_axis]
|
| 87 |
+
lo, hi = float(up.min()), float(up.max())
|
| 88 |
+
if hi - lo < 1e-6:
|
| 89 |
+
return xyz
|
| 90 |
+
threshold = lo + (hi - lo) * (1.0 - top_frac) - pad
|
| 91 |
+
mask = up >= threshold
|
| 92 |
+
return xyz[mask]
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
def _is_roof_normal(normal: np.ndarray, up_axis: int = UP_AXIS,
|
| 96 |
+
min_up: float = 0.15) -> bool:
|
| 97 |
+
"""A roof plane either has significant vertical component (pitched
|
| 98 |
+
surface) or is nearly horizontal (flat roof). Walls have ``|n_up| ≈ 0``
|
| 99 |
+
and are rejected.
|
| 100 |
+
"""
|
| 101 |
+
return abs(float(normal[up_axis])) >= min_up
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
# ---------------------------------------------------------------------------
|
| 105 |
+
# T2.1 Iterative RANSAC plane segmentation (open3d backend)
|
| 106 |
+
# ---------------------------------------------------------------------------
|
| 107 |
+
|
| 108 |
+
def segment_roof_planes(
|
| 109 |
+
xyz: np.ndarray,
|
| 110 |
+
distance_threshold: float = 0.15,
|
| 111 |
+
ransac_n: int = 3,
|
| 112 |
+
num_iterations: int = 1000,
|
| 113 |
+
min_inliers: int = 60,
|
| 114 |
+
max_planes: int = 8,
|
| 115 |
+
roof_crop_top_frac: float = 0.70,
|
| 116 |
+
crop_pad: float = 1.0,
|
| 117 |
+
keep_walls: bool = True,
|
| 118 |
+
) -> list[RoofPlane]:
|
| 119 |
+
"""Sequentially RANSAC-fit roof planes.
|
| 120 |
+
|
| 121 |
+
Crops the cloud to the top ``roof_crop_top_frac`` along +Y first, then
|
| 122 |
+
iteratively removes inliers until no plane with at least ``min_inliers``
|
| 123 |
+
remains or ``max_planes`` have been found. Planes whose normal is nearly
|
| 124 |
+
perpendicular to the up axis (walls) are dropped.
|
| 125 |
+
"""
|
| 126 |
+
cropped = crop_to_roof(xyz, top_frac=roof_crop_top_frac, pad=crop_pad)
|
| 127 |
+
if len(cropped) < min_inliers * 2:
|
| 128 |
+
# Fall back to the full cloud if the crop is too aggressive.
|
| 129 |
+
cropped = np.asarray(xyz, dtype=np.float64)
|
| 130 |
+
if len(cropped) < min_inliers:
|
| 131 |
+
return []
|
| 132 |
+
|
| 133 |
+
remaining = cropped.copy()
|
| 134 |
+
planes: list[RoofPlane] = []
|
| 135 |
+
|
| 136 |
+
pcd = o3d.geometry.PointCloud()
|
| 137 |
+
for _ in range(max_planes):
|
| 138 |
+
if len(remaining) < min_inliers:
|
| 139 |
+
break
|
| 140 |
+
pcd.points = o3d.utility.Vector3dVector(remaining)
|
| 141 |
+
try:
|
| 142 |
+
eq, inlier_idx = pcd.segment_plane(
|
| 143 |
+
distance_threshold=distance_threshold,
|
| 144 |
+
ransac_n=ransac_n,
|
| 145 |
+
num_iterations=num_iterations,
|
| 146 |
+
)
|
| 147 |
+
except Exception:
|
| 148 |
+
break
|
| 149 |
+
if len(inlier_idx) < min_inliers:
|
| 150 |
+
break
|
| 151 |
+
eq = np.asarray(eq, dtype=np.float64)
|
| 152 |
+
inliers = remaining[np.asarray(inlier_idx, dtype=np.int64)]
|
| 153 |
+
normal = eq[:3] / (np.linalg.norm(eq[:3]) + 1e-12)
|
| 154 |
+
if keep_walls or _is_roof_normal(normal):
|
| 155 |
+
planes.append(RoofPlane(eq, inliers))
|
| 156 |
+
# Always remove inliers from the remaining cloud even for rejected
|
| 157 |
+
# planes, otherwise RANSAC keeps returning the same ones.
|
| 158 |
+
keep_mask = np.ones(len(remaining), dtype=bool)
|
| 159 |
+
keep_mask[np.asarray(inlier_idx, dtype=np.int64)] = False
|
| 160 |
+
remaining = remaining[keep_mask]
|
| 161 |
+
|
| 162 |
+
return planes
|
| 163 |
+
|
| 164 |
+
|
| 165 |
+
# ---------------------------------------------------------------------------
|
| 166 |
+
# T2.2 Plane-pair intersection line (scikit-spatial)
|
| 167 |
+
# ---------------------------------------------------------------------------
|
| 168 |
+
|
| 169 |
+
def intersect_two_planes(
|
| 170 |
+
p1: RoofPlane, p2: RoofPlane, parallel_cos: float = 0.995,
|
| 171 |
+
) -> tuple[np.ndarray, np.ndarray] | None:
|
| 172 |
+
"""Return ``(point_on_line, unit_direction)`` or ``None`` if near parallel."""
|
| 173 |
+
dot = abs(float(np.dot(p1.normal, p2.normal)))
|
| 174 |
+
if dot >= parallel_cos:
|
| 175 |
+
return None
|
| 176 |
+
sk1 = SkPlane(point=-p1.d * p1.normal, normal=p1.normal)
|
| 177 |
+
sk2 = SkPlane(point=-p2.d * p2.normal, normal=p2.normal)
|
| 178 |
+
try:
|
| 179 |
+
line = sk1.intersect_plane(sk2)
|
| 180 |
+
except Exception:
|
| 181 |
+
return None
|
| 182 |
+
point = np.asarray(line.point, dtype=np.float64)
|
| 183 |
+
direction = np.asarray(line.direction, dtype=np.float64)
|
| 184 |
+
norm = np.linalg.norm(direction)
|
| 185 |
+
if norm < 1e-9:
|
| 186 |
+
return None
|
| 187 |
+
return point, direction / norm
|
| 188 |
+
|
| 189 |
+
|
| 190 |
+
# ---------------------------------------------------------------------------
|
| 191 |
+
# T2.3 Clip the line to a real segment
|
| 192 |
+
# ---------------------------------------------------------------------------
|
| 193 |
+
|
| 194 |
+
def clip_line_to_segment(
|
| 195 |
+
point: np.ndarray,
|
| 196 |
+
direction: np.ndarray,
|
| 197 |
+
p1: RoofPlane,
|
| 198 |
+
p2: RoofPlane,
|
| 199 |
+
perp_tol: float = 0.4,
|
| 200 |
+
trim_pct: float = 5.0,
|
| 201 |
+
min_length: float = 0.3,
|
| 202 |
+
) -> tuple[np.ndarray, np.ndarray] | None:
|
| 203 |
+
"""Clip the infinite line to the overlap region of the two inlier sets.
|
| 204 |
+
|
| 205 |
+
Only inliers whose projection onto the line is within ``perp_tol`` of the
|
| 206 |
+
line contribute — otherwise a large plane would stretch the intersection
|
| 207 |
+
far outside the real roof feature. The segment endpoints are the
|
| 208 |
+
5th / 95th percentile of projected scalars taken over the union of the
|
| 209 |
+
two filtered sets.
|
| 210 |
+
"""
|
| 211 |
+
endpoints_s = []
|
| 212 |
+
for plane in (p1, p2):
|
| 213 |
+
rel = plane.inliers - point
|
| 214 |
+
s = rel @ direction
|
| 215 |
+
perp = rel - s[:, None] * direction
|
| 216 |
+
d_perp = np.linalg.norm(perp, axis=1)
|
| 217 |
+
near = s[d_perp <= perp_tol]
|
| 218 |
+
if len(near) >= 5:
|
| 219 |
+
endpoints_s.append(near)
|
| 220 |
+
if not endpoints_s:
|
| 221 |
+
return None
|
| 222 |
+
all_s = np.concatenate(endpoints_s)
|
| 223 |
+
if len(all_s) < 5:
|
| 224 |
+
return None
|
| 225 |
+
lo, hi = np.percentile(all_s, [trim_pct, 100.0 - trim_pct])
|
| 226 |
+
if hi - lo < min_length:
|
| 227 |
+
return None
|
| 228 |
+
a = point + lo * direction
|
| 229 |
+
b = point + hi * direction
|
| 230 |
+
return a, b
|
| 231 |
+
|
| 232 |
+
|
| 233 |
+
# ---------------------------------------------------------------------------
|
| 234 |
+
# T2.4 Triple-plane corners + vertex dedup
|
| 235 |
+
# ---------------------------------------------------------------------------
|
| 236 |
+
|
| 237 |
+
def _triple_plane_corners(
|
| 238 |
+
planes: list[RoofPlane], max_dist_to_inlier: float = 1.0,
|
| 239 |
+
) -> list[np.ndarray]:
|
| 240 |
+
"""Solve the 3x3 linear system for every non-collinear triple.
|
| 241 |
+
|
| 242 |
+
A corner is kept only if every one of the three parent planes has at
|
| 243 |
+
least one inlier within ``max_dist_to_inlier`` of the computed point,
|
| 244 |
+
which removes ghost intersections far outside the roof.
|
| 245 |
+
"""
|
| 246 |
+
out: list[np.ndarray] = []
|
| 247 |
+
n = len(planes)
|
| 248 |
+
for i in range(n):
|
| 249 |
+
for j in range(i + 1, n):
|
| 250 |
+
for k in range(j + 1, n):
|
| 251 |
+
A = np.vstack([planes[i].normal, planes[j].normal, planes[k].normal])
|
| 252 |
+
if abs(float(np.linalg.det(A))) < 1e-3:
|
| 253 |
+
continue
|
| 254 |
+
b = -np.array([planes[i].d, planes[j].d, planes[k].d])
|
| 255 |
+
try:
|
| 256 |
+
X = np.linalg.solve(A, b)
|
| 257 |
+
except np.linalg.LinAlgError:
|
| 258 |
+
continue
|
| 259 |
+
ok = True
|
| 260 |
+
for p in (planes[i], planes[j], planes[k]):
|
| 261 |
+
if np.linalg.norm(p.inliers - X, axis=1).min() > max_dist_to_inlier:
|
| 262 |
+
ok = False
|
| 263 |
+
break
|
| 264 |
+
if ok:
|
| 265 |
+
out.append(X)
|
| 266 |
+
return out
|
| 267 |
+
|
| 268 |
+
|
| 269 |
+
def _merge_points(points: np.ndarray, radius: float) -> tuple[np.ndarray, np.ndarray]:
|
| 270 |
+
"""Greedy dedup by nearest-cluster assignment."""
|
| 271 |
+
pts = np.asarray(points, dtype=np.float64)
|
| 272 |
+
if len(pts) == 0:
|
| 273 |
+
return np.empty((0, 3)), np.empty((0,), dtype=np.int64)
|
| 274 |
+
mapping = np.full(len(pts), -1, dtype=np.int64)
|
| 275 |
+
clusters: list[list[int]] = []
|
| 276 |
+
centroids: list[np.ndarray] = []
|
| 277 |
+
for i, p in enumerate(pts):
|
| 278 |
+
if not centroids:
|
| 279 |
+
clusters.append([i])
|
| 280 |
+
centroids.append(p.copy())
|
| 281 |
+
mapping[i] = 0
|
| 282 |
+
continue
|
| 283 |
+
c_arr = np.array(centroids)
|
| 284 |
+
d = np.linalg.norm(c_arr - p, axis=1)
|
| 285 |
+
j = int(np.argmin(d))
|
| 286 |
+
if d[j] <= radius:
|
| 287 |
+
clusters[j].append(i)
|
| 288 |
+
centroids[j] = pts[clusters[j]].mean(axis=0)
|
| 289 |
+
mapping[i] = j
|
| 290 |
+
else:
|
| 291 |
+
clusters.append([i])
|
| 292 |
+
centroids.append(p.copy())
|
| 293 |
+
mapping[i] = len(centroids) - 1
|
| 294 |
+
merged = np.array(centroids, dtype=np.float64)
|
| 295 |
+
return merged, mapping
|
| 296 |
+
|
| 297 |
+
|
| 298 |
+
# ---------------------------------------------------------------------------
|
| 299 |
+
# T2.7 Hybrid integration helpers: snap intersection lines to existing
|
| 300 |
+
# sklearn-derived vertices.
|
| 301 |
+
# ---------------------------------------------------------------------------
|
| 302 |
+
|
| 303 |
+
def edges_from_planes_and_vertices(
|
| 304 |
+
vertices: np.ndarray,
|
| 305 |
+
planes: list[RoofPlane],
|
| 306 |
+
perp_tol: float = 0.6,
|
| 307 |
+
min_length: float = 0.5,
|
| 308 |
+
max_length: float = 10.0,
|
| 309 |
+
) -> list[tuple[int, int]]:
|
| 310 |
+
"""Vote edges between vertices using plane-pair intersection lines.
|
| 311 |
+
|
| 312 |
+
For each line ``L_ij = plane_i ∩ plane_j``:
|
| 313 |
+
* find all ``vertices`` whose perpendicular distance to L_ij is
|
| 314 |
+
below ``perp_tol``,
|
| 315 |
+
* pair the two extremes along the line direction as an edge.
|
| 316 |
+
|
| 317 |
+
The result is a set of 3D edges supported by plane geometry. Because
|
| 318 |
+
the vertices come from sklearn's depth-based detection, positions are
|
| 319 |
+
noisy but complete — while the lines come from RANSAC on thousands
|
| 320 |
+
of COLMAP points and are very accurate in direction. Matching the two
|
| 321 |
+
gives clean roof ridges / eaves without depending on 2D fitLine noise.
|
| 322 |
+
"""
|
| 323 |
+
if len(vertices) < 2 or len(planes) < 2:
|
| 324 |
+
return []
|
| 325 |
+
V = np.asarray(vertices, dtype=np.float64)
|
| 326 |
+
edges: set[tuple[int, int]] = set()
|
| 327 |
+
|
| 328 |
+
for i in range(len(planes)):
|
| 329 |
+
for j in range(i + 1, len(planes)):
|
| 330 |
+
inter = intersect_two_planes(planes[i], planes[j])
|
| 331 |
+
if inter is None:
|
| 332 |
+
continue
|
| 333 |
+
point, direction = inter
|
| 334 |
+
rel = V - point
|
| 335 |
+
s = rel @ direction
|
| 336 |
+
perp = rel - s[:, None] * direction
|
| 337 |
+
d_perp = np.linalg.norm(perp, axis=1)
|
| 338 |
+
near_idx = np.where(d_perp <= perp_tol)[0]
|
| 339 |
+
if len(near_idx) < 2:
|
| 340 |
+
continue
|
| 341 |
+
# Take the two vertices with the most extreme projections
|
| 342 |
+
s_near = s[near_idx]
|
| 343 |
+
a = int(near_idx[np.argmin(s_near)])
|
| 344 |
+
b = int(near_idx[np.argmax(s_near)])
|
| 345 |
+
if a == b:
|
| 346 |
+
continue
|
| 347 |
+
dist3d = float(np.linalg.norm(V[a] - V[b]))
|
| 348 |
+
if dist3d < min_length or dist3d > max_length:
|
| 349 |
+
continue
|
| 350 |
+
lo, hi = (a, b) if a < b else (b, a)
|
| 351 |
+
edges.add((lo, hi))
|
| 352 |
+
|
| 353 |
+
# Additionally, for each adjacent pair of projections along the
|
| 354 |
+
# line, add them as an edge if the 3D distance is reasonable.
|
| 355 |
+
order = np.argsort(s[near_idx])
|
| 356 |
+
sorted_idx = near_idx[order]
|
| 357 |
+
for k in range(len(sorted_idx) - 1):
|
| 358 |
+
x = int(sorted_idx[k])
|
| 359 |
+
y = int(sorted_idx[k + 1])
|
| 360 |
+
d = float(np.linalg.norm(V[x] - V[y]))
|
| 361 |
+
if d < min_length or d > max_length:
|
| 362 |
+
continue
|
| 363 |
+
lo, hi = (x, y) if x < y else (y, x)
|
| 364 |
+
edges.add((lo, hi))
|
| 365 |
+
|
| 366 |
+
return list(edges)
|
| 367 |
+
|
| 368 |
+
|
| 369 |
+
def predict_plane_edges(entry, vertices: np.ndarray,
|
| 370 |
+
distance_threshold: float = 0.20,
|
| 371 |
+
min_inliers: int = 60,
|
| 372 |
+
max_planes: int = 10,
|
| 373 |
+
roof_crop_top_frac: float = 0.95,
|
| 374 |
+
perp_tol: float = 0.8,
|
| 375 |
+
) -> list[tuple[int, int]]:
|
| 376 |
+
"""High-level helper: given a sklearn wireframe's vertices, return a
|
| 377 |
+
list of extra edges supported by plane-pair intersection geometry.
|
| 378 |
+
"""
|
| 379 |
+
good = convert_entry_to_human_readable(entry)
|
| 380 |
+
colmap_rec = good.get("colmap") or good.get("colmap_binary")
|
| 381 |
+
if colmap_rec is None:
|
| 382 |
+
return []
|
| 383 |
+
all_xyz = np.array([p.xyz for p in colmap_rec.points3D.values()], dtype=np.float64)
|
| 384 |
+
if len(all_xyz) < min_inliers * 2:
|
| 385 |
+
return []
|
| 386 |
+
planes = segment_roof_planes(
|
| 387 |
+
all_xyz,
|
| 388 |
+
distance_threshold=distance_threshold,
|
| 389 |
+
min_inliers=min_inliers,
|
| 390 |
+
max_planes=max_planes,
|
| 391 |
+
roof_crop_top_frac=roof_crop_top_frac,
|
| 392 |
+
)
|
| 393 |
+
if len(planes) < 2:
|
| 394 |
+
return []
|
| 395 |
+
return edges_from_planes_and_vertices(vertices, planes, perp_tol=perp_tol)
|
| 396 |
+
|
| 397 |
+
|
| 398 |
+
# ---------------------------------------------------------------------------
|
| 399 |
+
# T2.6 Standalone predictor
|
| 400 |
+
# ---------------------------------------------------------------------------
|
| 401 |
+
|
| 402 |
+
def predict_wireframe_planes(
|
| 403 |
+
entry,
|
| 404 |
+
distance_threshold: float = 0.15,
|
| 405 |
+
min_inliers: int = 60,
|
| 406 |
+
max_planes: int = 8,
|
| 407 |
+
perp_tol: float = 0.4,
|
| 408 |
+
merge_radius: float = 0.35,
|
| 409 |
+
roof_crop_top_frac: float = 0.55,
|
| 410 |
+
) -> tuple[np.ndarray, list[tuple[int, int]]]:
|
| 411 |
+
"""Build a wireframe from COLMAP sparse points via plane intersection."""
|
| 412 |
+
good = convert_entry_to_human_readable(entry)
|
| 413 |
+
colmap_rec = good.get("colmap") or good.get("colmap_binary")
|
| 414 |
+
if colmap_rec is None:
|
| 415 |
+
return empty_solution()
|
| 416 |
+
|
| 417 |
+
all_xyz = np.array([p.xyz for p in colmap_rec.points3D.values()], dtype=np.float64)
|
| 418 |
+
if len(all_xyz) < min_inliers * 2:
|
| 419 |
+
return empty_solution()
|
| 420 |
+
|
| 421 |
+
planes = segment_roof_planes(
|
| 422 |
+
all_xyz,
|
| 423 |
+
distance_threshold=distance_threshold,
|
| 424 |
+
min_inliers=min_inliers,
|
| 425 |
+
max_planes=max_planes,
|
| 426 |
+
roof_crop_top_frac=roof_crop_top_frac,
|
| 427 |
+
)
|
| 428 |
+
if len(planes) < 2:
|
| 429 |
+
return empty_solution()
|
| 430 |
+
|
| 431 |
+
endpoint_pool: list[np.ndarray] = []
|
| 432 |
+
segments: list[tuple[int, int]] = []
|
| 433 |
+
for i in range(len(planes)):
|
| 434 |
+
for j in range(i + 1, len(planes)):
|
| 435 |
+
inter = intersect_two_planes(planes[i], planes[j])
|
| 436 |
+
if inter is None:
|
| 437 |
+
continue
|
| 438 |
+
point, direction = inter
|
| 439 |
+
seg = clip_line_to_segment(
|
| 440 |
+
point, direction, planes[i], planes[j], perp_tol=perp_tol
|
| 441 |
+
)
|
| 442 |
+
if seg is None:
|
| 443 |
+
continue
|
| 444 |
+
a, b = seg
|
| 445 |
+
ia = len(endpoint_pool)
|
| 446 |
+
endpoint_pool.append(a)
|
| 447 |
+
ib = len(endpoint_pool)
|
| 448 |
+
endpoint_pool.append(b)
|
| 449 |
+
segments.append((ia, ib))
|
| 450 |
+
|
| 451 |
+
if not segments:
|
| 452 |
+
return empty_solution()
|
| 453 |
+
|
| 454 |
+
corners = _triple_plane_corners(planes)
|
| 455 |
+
endpoint_pool.extend(corners)
|
| 456 |
+
|
| 457 |
+
all_pts = np.asarray(endpoint_pool, dtype=np.float64)
|
| 458 |
+
merged, mapping = _merge_points(all_pts, radius=merge_radius)
|
| 459 |
+
|
| 460 |
+
edge_set: set[tuple[int, int]] = set()
|
| 461 |
+
for ia, ib in segments:
|
| 462 |
+
ma = int(mapping[ia])
|
| 463 |
+
mb = int(mapping[ib])
|
| 464 |
+
if ma == mb:
|
| 465 |
+
continue
|
| 466 |
+
lo, hi = (ma, mb) if ma < mb else (mb, ma)
|
| 467 |
+
edge_set.add((lo, hi))
|
| 468 |
+
|
| 469 |
+
if not edge_set or len(merged) < 2:
|
| 470 |
+
return empty_solution()
|
| 471 |
+
|
| 472 |
+
return merged, [(int(a), int(b)) for a, b in edge_set]
|
triangulation.py
ADDED
|
@@ -0,0 +1,618 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Multi-view corner triangulation pipeline (T1.2 – T1.6).
|
| 2 |
+
|
| 3 |
+
Drop-in replacement for the depth-based ``project_vertices_to_3d`` step in
|
| 4 |
+
``sklearn_submission.py``. The depth map is only used as a sanity filter, never
|
| 5 |
+
as the source of 3D positions — the actual geometry comes from COLMAP cameras
|
| 6 |
+
via DLT triangulation.
|
| 7 |
+
|
| 8 |
+
Entry points:
|
| 9 |
+
detect_corners_per_view(entry) → dict[view_id → List[Corner]]
|
| 10 |
+
triangulate_wireframe(entry, corners_per_view) → Tracks + per-track obs
|
| 11 |
+
|
| 12 |
+
Everything is pure numpy + pycolmap + cv2 — no torch, no kornia.
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
+
from __future__ import annotations
|
| 16 |
+
|
| 17 |
+
import numpy as np
|
| 18 |
+
import cv2
|
| 19 |
+
from dataclasses import dataclass, field
|
| 20 |
+
|
| 21 |
+
from hoho2025.example_solutions import (
|
| 22 |
+
convert_entry_to_human_readable,
|
| 23 |
+
filter_vertices_by_background,
|
| 24 |
+
point_to_segment_dist,
|
| 25 |
+
)
|
| 26 |
+
from hoho2025.color_mappings import gestalt_color_mapping
|
| 27 |
+
|
| 28 |
+
try:
|
| 29 |
+
from mvs_utils import (
|
| 30 |
+
collect_views, triangulate_dlt, mean_reprojection_error,
|
| 31 |
+
fundamental_matrix, epipolar_line, point_to_line_distance,
|
| 32 |
+
project_world_to_image,
|
| 33 |
+
)
|
| 34 |
+
except ImportError:
|
| 35 |
+
from submission.mvs_utils import (
|
| 36 |
+
collect_views, triangulate_dlt, mean_reprojection_error,
|
| 37 |
+
fundamental_matrix, epipolar_line, point_to_line_distance,
|
| 38 |
+
project_world_to_image,
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
# Vertex classes we consider (minus 'post' — added later in T1.7 when safe).
|
| 43 |
+
VERTEX_CLASSES = ['apex', 'eave_end_point', 'flashing_end_point']
|
| 44 |
+
EDGE_CLASSES = ['eave', 'ridge', 'rake', 'valley', 'hip']
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
@dataclass
|
| 48 |
+
class Corner:
|
| 49 |
+
"""A 2D corner detected on a single view."""
|
| 50 |
+
view_id: str
|
| 51 |
+
xy: np.ndarray # (2,) float32 pixel coords at COLMAP-native resolution
|
| 52 |
+
cls: str # gestalt class label
|
| 53 |
+
blob_area: int # area of the connected component, for tie-breaks
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
@dataclass
|
| 57 |
+
class Track:
|
| 58 |
+
"""A 3D wireframe vertex with its per-view observations."""
|
| 59 |
+
xyz: np.ndarray # (3,) float64
|
| 60 |
+
cls: str
|
| 61 |
+
observations: list[tuple[str, np.ndarray]] = field(default_factory=list)
|
| 62 |
+
reproj_err: float = float("inf")
|
| 63 |
+
# view_id → index into corners_per_view[view_id]. Populated by build_tracks
|
| 64 |
+
# when per-view edges need to be lifted to 3D.
|
| 65 |
+
corner_indices: dict[str, int] = field(default_factory=dict)
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def _refine_centroids_subpix(gest_seg_np, centroids, max_shift=4.0, win=5):
|
| 69 |
+
"""cv2.cornerSubPix refinement inside an apex blob. Identical to the
|
| 70 |
+
version in sklearn_submission.py — duplicated here to keep triangulation.py
|
| 71 |
+
importable on its own.
|
| 72 |
+
"""
|
| 73 |
+
if len(centroids) == 0:
|
| 74 |
+
return centroids
|
| 75 |
+
gray = cv2.cvtColor(gest_seg_np, cv2.COLOR_RGB2GRAY)
|
| 76 |
+
gray = cv2.GaussianBlur(gray, (3, 3), 0)
|
| 77 |
+
pts = np.asarray(centroids, dtype=np.float32).reshape(-1, 1, 2).copy()
|
| 78 |
+
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.01)
|
| 79 |
+
try:
|
| 80 |
+
refined = cv2.cornerSubPix(gray, pts, (win, win), (-1, -1), criteria)
|
| 81 |
+
except cv2.error:
|
| 82 |
+
return centroids
|
| 83 |
+
refined = refined.reshape(-1, 2)
|
| 84 |
+
orig = np.asarray(centroids, dtype=np.float32)
|
| 85 |
+
shifts = np.linalg.norm(refined - orig, axis=1)
|
| 86 |
+
mask = shifts <= max_shift
|
| 87 |
+
out = orig.copy()
|
| 88 |
+
out[mask] = refined[mask]
|
| 89 |
+
return out
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
def _detect_edges_2d(
|
| 93 |
+
gest_np: np.ndarray,
|
| 94 |
+
corners: list[Corner],
|
| 95 |
+
edge_th: float = 15.0,
|
| 96 |
+
) -> list[tuple[int, int, str]]:
|
| 97 |
+
"""Detect 2D gestalt edges and connect them to existing corner indices.
|
| 98 |
+
|
| 99 |
+
Mirrors ``get_vertices_and_edges_improved`` from sklearn_submission but
|
| 100 |
+
keeps *all* edge classes and returns triples ``(ci, cj, edge_cls)`` so
|
| 101 |
+
we can aggregate edge-class votes downstream.
|
| 102 |
+
"""
|
| 103 |
+
if len(corners) < 2:
|
| 104 |
+
return []
|
| 105 |
+
apex_pts = np.array([c.xy for c in corners], dtype=np.float32)
|
| 106 |
+
connections: list[tuple[int, int, str]] = []
|
| 107 |
+
for edge_class in EDGE_CLASSES:
|
| 108 |
+
color = np.array(gestalt_color_mapping[edge_class])
|
| 109 |
+
mask_raw = cv2.inRange(gest_np, color - 0.5, color + 0.5)
|
| 110 |
+
mask = cv2.morphologyEx(mask_raw, cv2.MORPH_CLOSE, np.ones((5, 5), np.uint8))
|
| 111 |
+
if mask.sum() == 0:
|
| 112 |
+
continue
|
| 113 |
+
_, labels, _, _ = cv2.connectedComponentsWithStats(mask, 8, cv2.CV_32S)
|
| 114 |
+
for lbl in range(1, labels.max() + 1):
|
| 115 |
+
ys, xs = np.where(labels == lbl)
|
| 116 |
+
if len(xs) < 2:
|
| 117 |
+
continue
|
| 118 |
+
pts = np.column_stack([xs, ys]).astype(np.float32)
|
| 119 |
+
line_params = cv2.fitLine(pts, cv2.DIST_L2, 0, 0.01, 0.01)
|
| 120 |
+
vx, vy, x0, y0 = line_params.ravel()
|
| 121 |
+
proj = (xs - x0) * vx + (ys - y0) * vy
|
| 122 |
+
p1 = np.array([x0 + proj.min() * vx, y0 + proj.min() * vy])
|
| 123 |
+
p2 = np.array([x0 + proj.max() * vx, y0 + proj.max() * vy])
|
| 124 |
+
dists = np.array(
|
| 125 |
+
[point_to_segment_dist(apex_pts[i], p1, p2) for i in range(len(apex_pts))]
|
| 126 |
+
)
|
| 127 |
+
near = np.where(dists <= edge_th)[0]
|
| 128 |
+
if len(near) < 2:
|
| 129 |
+
continue
|
| 130 |
+
near_pts = apex_pts[near]
|
| 131 |
+
a = int(near[np.argmin(np.linalg.norm(near_pts - p1, axis=1))])
|
| 132 |
+
b = int(near[np.argmin(np.linalg.norm(near_pts - p2, axis=1))])
|
| 133 |
+
if a != b:
|
| 134 |
+
lo, hi = (a, b) if a < b else (b, a)
|
| 135 |
+
connections.append((lo, hi, edge_class))
|
| 136 |
+
return connections
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
def detect_corners_per_view(
|
| 140 |
+
entry,
|
| 141 |
+
vertex_classes: list[str] | None = None,
|
| 142 |
+
filter_background: bool = True,
|
| 143 |
+
return_edges: bool = False,
|
| 144 |
+
):
|
| 145 |
+
"""Run per-view corner detection + subpixel refinement.
|
| 146 |
+
|
| 147 |
+
Returns
|
| 148 |
+
-------
|
| 149 |
+
corners_per_view : dict[image_id → list[Corner]]
|
| 150 |
+
good_entry : the convert_entry_to_human_readable output (caller reuses it)
|
| 151 |
+
edges_per_view (if ``return_edges``) : dict[image_id → list[(ci, cj, edge_cls)]]
|
| 152 |
+
"""
|
| 153 |
+
if vertex_classes is None:
|
| 154 |
+
vertex_classes = VERTEX_CLASSES
|
| 155 |
+
|
| 156 |
+
good = convert_entry_to_human_readable(entry)
|
| 157 |
+
corners_per_view: dict[str, list[Corner]] = {}
|
| 158 |
+
edges_per_view: dict[str, list[tuple[int, int, str]]] = {}
|
| 159 |
+
|
| 160 |
+
for i, (gest, depth, img_id, ade_seg) in enumerate(zip(
|
| 161 |
+
good['gestalt'], good['depth'], good['image_ids'], good['ade']
|
| 162 |
+
)):
|
| 163 |
+
# Native resolution used by the COLMAP camera is the depth resolution
|
| 164 |
+
# (768×576 in practice). Resize gestalt to match so pixel coordinates
|
| 165 |
+
# are compatible with our projection matrices.
|
| 166 |
+
depth_np = np.array(depth)
|
| 167 |
+
H, W = depth_np.shape[:2]
|
| 168 |
+
gest_np = np.array(gest.resize((W, H))).astype(np.uint8)
|
| 169 |
+
ade_np = np.array(ade_seg.resize((W, H))).astype(np.uint8)
|
| 170 |
+
|
| 171 |
+
corners: list[Corner] = []
|
| 172 |
+
for v_class in vertex_classes:
|
| 173 |
+
color = np.array(gestalt_color_mapping[v_class])
|
| 174 |
+
mask = cv2.inRange(gest_np, color - 0.5, color + 0.5)
|
| 175 |
+
if mask.sum() == 0:
|
| 176 |
+
continue
|
| 177 |
+
_, _, stats, centroids = cv2.connectedComponentsWithStats(mask, 8, cv2.CV_32S)
|
| 178 |
+
blob_centroids = centroids[1:]
|
| 179 |
+
areas = stats[1:, cv2.CC_STAT_AREA]
|
| 180 |
+
if len(blob_centroids) == 0:
|
| 181 |
+
continue
|
| 182 |
+
refined = _refine_centroids_subpix(gest_np, blob_centroids)
|
| 183 |
+
for xy, area in zip(refined, areas):
|
| 184 |
+
corners.append(Corner(
|
| 185 |
+
view_id=img_id,
|
| 186 |
+
xy=np.asarray(xy, dtype=np.float32),
|
| 187 |
+
cls=v_class,
|
| 188 |
+
blob_area=int(area),
|
| 189 |
+
))
|
| 190 |
+
|
| 191 |
+
if filter_background and corners:
|
| 192 |
+
fake_verts = [{"xy": c.xy, "type": c.cls} for c in corners]
|
| 193 |
+
fake_verts, _ = filter_vertices_by_background(fake_verts, [], ade_np)
|
| 194 |
+
kept_keys = {(float(v['xy'][0]), float(v['xy'][1]), v['type']) for v in fake_verts}
|
| 195 |
+
corners = [c for c in corners
|
| 196 |
+
if (float(c.xy[0]), float(c.xy[1]), c.cls) in kept_keys]
|
| 197 |
+
|
| 198 |
+
corners_per_view[img_id] = corners
|
| 199 |
+
if return_edges:
|
| 200 |
+
edges_per_view[img_id] = _detect_edges_2d(gest_np, corners)
|
| 201 |
+
|
| 202 |
+
if return_edges:
|
| 203 |
+
return corners_per_view, good, edges_per_view
|
| 204 |
+
return corners_per_view, good
|
| 205 |
+
|
| 206 |
+
|
| 207 |
+
def build_tracks(
|
| 208 |
+
corners_per_view: dict[str, list[Corner]],
|
| 209 |
+
views: dict[str, dict],
|
| 210 |
+
class_strict: bool = True,
|
| 211 |
+
epipolar_px: float = 6.0,
|
| 212 |
+
reproj_px: float = 4.0,
|
| 213 |
+
min_views: int = 2,
|
| 214 |
+
) -> list[Track]:
|
| 215 |
+
"""Greedy multi-view matching and triangulation with epipolar gating.
|
| 216 |
+
|
| 217 |
+
Strategy (classical, mirrors PC2WF / COLMAP incremental triangulation):
|
| 218 |
+
|
| 219 |
+
1. Build a pool of unmatched corners from every view.
|
| 220 |
+
2. For every ordered pair of views compute the fundamental matrix.
|
| 221 |
+
3. For each corner in view_a, find all corners in view_b of the same class
|
| 222 |
+
whose perpendicular distance to the epipolar line is below
|
| 223 |
+
``epipolar_px``. Triangulate each candidate pair via DLT.
|
| 224 |
+
4. For each candidate 3D point, reproject it back into every other view.
|
| 225 |
+
A corner of the same class within ``reproj_px`` of the reprojection
|
| 226 |
+
becomes an additional observation. Re-triangulate with the enlarged
|
| 227 |
+
observation list.
|
| 228 |
+
5. Accept the track if it has ≥ ``min_views`` observations, mean
|
| 229 |
+
reprojection error < ``reproj_px``, and positive depth everywhere.
|
| 230 |
+
6. Mark all corners in the track as matched so they are not reused.
|
| 231 |
+
|
| 232 |
+
Parameters are intentionally tight — noise-reducing rather than
|
| 233 |
+
permissive — because a wrongly triangulated vertex can sit meters
|
| 234 |
+
away from any real roof feature.
|
| 235 |
+
"""
|
| 236 |
+
# Stable ordering: view ids sorted
|
| 237 |
+
view_ids = [vid for vid in corners_per_view.keys() if vid in views]
|
| 238 |
+
view_ids.sort()
|
| 239 |
+
|
| 240 |
+
# Index remaining corners (view_id, idx) → Corner
|
| 241 |
+
remaining: dict[tuple[str, int], Corner] = {}
|
| 242 |
+
for vid in view_ids:
|
| 243 |
+
for idx, c in enumerate(corners_per_view[vid]):
|
| 244 |
+
remaining[(vid, idx)] = c
|
| 245 |
+
|
| 246 |
+
tracks: list[Track] = []
|
| 247 |
+
|
| 248 |
+
for anchor_vid in view_ids:
|
| 249 |
+
for (r_vid, r_idx), anchor in list(remaining.items()):
|
| 250 |
+
if r_vid != anchor_vid:
|
| 251 |
+
continue
|
| 252 |
+
# Try matching this anchor against each other view.
|
| 253 |
+
best_track: Track | None = None
|
| 254 |
+
|
| 255 |
+
for other_vid in view_ids:
|
| 256 |
+
if other_vid == anchor_vid:
|
| 257 |
+
continue
|
| 258 |
+
F = fundamental_matrix(views[anchor_vid], views[other_vid])
|
| 259 |
+
line = epipolar_line(F, anchor.xy)
|
| 260 |
+
|
| 261 |
+
for (o_vid, o_idx), cand in remaining.items():
|
| 262 |
+
if o_vid != other_vid:
|
| 263 |
+
continue
|
| 264 |
+
if class_strict and cand.cls != anchor.cls:
|
| 265 |
+
continue
|
| 266 |
+
d = point_to_line_distance(line, cand.xy)
|
| 267 |
+
if d > epipolar_px:
|
| 268 |
+
continue
|
| 269 |
+
|
| 270 |
+
# Two-view DLT
|
| 271 |
+
Ps = [views[anchor_vid]["P"], views[other_vid]["P"]]
|
| 272 |
+
pts = [anchor.xy, cand.xy]
|
| 273 |
+
X = triangulate_dlt(Ps, pts)
|
| 274 |
+
if not np.all(np.isfinite(X)):
|
| 275 |
+
continue
|
| 276 |
+
|
| 277 |
+
# Extend with all other views that also see this point.
|
| 278 |
+
obs = [(anchor_vid, anchor.xy), (other_vid, cand.xy)]
|
| 279 |
+
used_keys = {(anchor_vid, r_idx), (other_vid, o_idx)}
|
| 280 |
+
for ext_vid in view_ids:
|
| 281 |
+
if ext_vid in (anchor_vid, other_vid):
|
| 282 |
+
continue
|
| 283 |
+
uv, z = project_world_to_image(views[ext_vid]["P"], X.reshape(1, 3))
|
| 284 |
+
if z[0] <= 0:
|
| 285 |
+
continue
|
| 286 |
+
u_pred = uv[0]
|
| 287 |
+
best_match = None
|
| 288 |
+
best_dist = reproj_px
|
| 289 |
+
for (e_vid, e_idx), ec in remaining.items():
|
| 290 |
+
if e_vid != ext_vid:
|
| 291 |
+
continue
|
| 292 |
+
if class_strict and ec.cls != anchor.cls:
|
| 293 |
+
continue
|
| 294 |
+
d2 = float(np.linalg.norm(ec.xy - u_pred))
|
| 295 |
+
if d2 < best_dist:
|
| 296 |
+
best_dist = d2
|
| 297 |
+
best_match = (e_vid, e_idx, ec)
|
| 298 |
+
if best_match is not None:
|
| 299 |
+
obs.append((best_match[0], best_match[2].xy))
|
| 300 |
+
used_keys.add((best_match[0], best_match[1]))
|
| 301 |
+
|
| 302 |
+
if len(obs) < min_views:
|
| 303 |
+
continue
|
| 304 |
+
|
| 305 |
+
# Retriangulate on full observation set for stability
|
| 306 |
+
Ps_full = [views[vid]["P"] for vid, _ in obs]
|
| 307 |
+
pts_full = [uv for _, uv in obs]
|
| 308 |
+
X_full = triangulate_dlt(Ps_full, pts_full)
|
| 309 |
+
if not np.all(np.isfinite(X_full)):
|
| 310 |
+
continue
|
| 311 |
+
err = mean_reprojection_error(X_full, Ps_full, pts_full)
|
| 312 |
+
if err > reproj_px:
|
| 313 |
+
continue
|
| 314 |
+
|
| 315 |
+
track = Track(
|
| 316 |
+
xyz=X_full,
|
| 317 |
+
cls=anchor.cls,
|
| 318 |
+
observations=obs,
|
| 319 |
+
reproj_err=err,
|
| 320 |
+
)
|
| 321 |
+
track._used_keys = used_keys # type: ignore[attr-defined]
|
| 322 |
+
if best_track is None or len(track.observations) > len(best_track.observations) \
|
| 323 |
+
or (len(track.observations) == len(best_track.observations) and err < best_track.reproj_err):
|
| 324 |
+
best_track = track
|
| 325 |
+
|
| 326 |
+
if best_track is not None:
|
| 327 |
+
# Freeze the corner-index mapping and forget the private attr.
|
| 328 |
+
used = getattr(best_track, "_used_keys", set())
|
| 329 |
+
best_track.corner_indices = {vid: int(idx) for vid, idx in used}
|
| 330 |
+
try:
|
| 331 |
+
delattr(best_track, "_used_keys")
|
| 332 |
+
except AttributeError:
|
| 333 |
+
pass
|
| 334 |
+
tracks.append(best_track)
|
| 335 |
+
# Retire matched corners so they aren't reused.
|
| 336 |
+
for key in used:
|
| 337 |
+
remaining.pop(key, None)
|
| 338 |
+
|
| 339 |
+
return tracks
|
| 340 |
+
|
| 341 |
+
|
| 342 |
+
def get_high_confidence_tracks(
|
| 343 |
+
entry,
|
| 344 |
+
min_views: int = 3,
|
| 345 |
+
max_reproj_px: float = 2.0,
|
| 346 |
+
epipolar_px: float = 6.0,
|
| 347 |
+
build_reproj_px: float = 4.0,
|
| 348 |
+
) -> list[Track]:
|
| 349 |
+
"""Run the full triangulation pipeline and return only the tracks
|
| 350 |
+
that pass a stricter quality gate.
|
| 351 |
+
|
| 352 |
+
The default ``min_views=3`` and ``max_reproj_px=2.0`` are tighter
|
| 353 |
+
than ``predict_wireframe_tracks`` defaults and are designed for
|
| 354 |
+
using these tracks as **vertex sources** rather than just edge
|
| 355 |
+
sources. A ≥3-view DLT triangulation with <2 px mean reprojection
|
| 356 |
+
error has a 3D accuracy of 5–10 cm — substantially better than
|
| 357 |
+
depth-based unprojection.
|
| 358 |
+
"""
|
| 359 |
+
tracks, _views, _good = triangulate_wireframe(
|
| 360 |
+
entry,
|
| 361 |
+
epipolar_px=epipolar_px,
|
| 362 |
+
reproj_px=build_reproj_px,
|
| 363 |
+
min_views=2,
|
| 364 |
+
want_edges=False,
|
| 365 |
+
)
|
| 366 |
+
return [
|
| 367 |
+
t for t in tracks
|
| 368 |
+
if len(t.observations) >= min_views and t.reproj_err <= max_reproj_px
|
| 369 |
+
]
|
| 370 |
+
|
| 371 |
+
|
| 372 |
+
def predict_wireframe_tracks(
|
| 373 |
+
entry,
|
| 374 |
+
min_views: int = 2,
|
| 375 |
+
min_votes: int = 1,
|
| 376 |
+
epipolar_px: float = 6.0,
|
| 377 |
+
reproj_px: float = 4.0,
|
| 378 |
+
merge_radius: float = 0.3,
|
| 379 |
+
) -> tuple[np.ndarray, list[tuple[int, int]]]:
|
| 380 |
+
"""Standalone triangulation-based wireframe predictor.
|
| 381 |
+
|
| 382 |
+
Returns (vertices, edges) in the same format as
|
| 383 |
+
``predict_wireframe_sklearn`` — ready to feed into ``hss()``.
|
| 384 |
+
"""
|
| 385 |
+
import numpy as _np
|
| 386 |
+
|
| 387 |
+
tracks, _views, _good, t_edges = triangulate_wireframe(
|
| 388 |
+
entry,
|
| 389 |
+
epipolar_px=epipolar_px,
|
| 390 |
+
reproj_px=reproj_px,
|
| 391 |
+
min_views=min_views,
|
| 392 |
+
want_edges=True,
|
| 393 |
+
)
|
| 394 |
+
if not tracks:
|
| 395 |
+
return _np.zeros((2, 3), dtype=_np.float64), [(0, 1)]
|
| 396 |
+
|
| 397 |
+
xyz = _np.array([t.xyz for t in tracks], dtype=_np.float64)
|
| 398 |
+
|
| 399 |
+
# Merge vertices closer than ``merge_radius``. A simple greedy union-find
|
| 400 |
+
# keyed on first-touched neighbour keeps it O(N^2) but N ≤ 200 in practice.
|
| 401 |
+
n = len(xyz)
|
| 402 |
+
parent = list(range(n))
|
| 403 |
+
|
| 404 |
+
def find(x):
|
| 405 |
+
while parent[x] != x:
|
| 406 |
+
parent[x] = parent[parent[x]]
|
| 407 |
+
x = parent[x]
|
| 408 |
+
return x
|
| 409 |
+
|
| 410 |
+
def union(a, b):
|
| 411 |
+
ra, rb = find(a), find(b)
|
| 412 |
+
if ra != rb:
|
| 413 |
+
parent[ra] = rb
|
| 414 |
+
|
| 415 |
+
diff = xyz[:, None, :] - xyz[None, :, :]
|
| 416 |
+
dists = _np.sqrt((diff ** 2).sum(-1))
|
| 417 |
+
for i in range(n):
|
| 418 |
+
for j in range(i + 1, n):
|
| 419 |
+
if dists[i, j] <= merge_radius:
|
| 420 |
+
union(i, j)
|
| 421 |
+
|
| 422 |
+
groups: dict[int, list[int]] = {}
|
| 423 |
+
for i in range(n):
|
| 424 |
+
r = find(i)
|
| 425 |
+
groups.setdefault(r, []).append(i)
|
| 426 |
+
|
| 427 |
+
old_to_new: dict[int, int] = {}
|
| 428 |
+
new_xyz = []
|
| 429 |
+
for new_idx, (root, members) in enumerate(groups.items()):
|
| 430 |
+
for m in members:
|
| 431 |
+
old_to_new[m] = new_idx
|
| 432 |
+
new_xyz.append(xyz[members].mean(axis=0))
|
| 433 |
+
new_xyz = _np.array(new_xyz, dtype=_np.float64)
|
| 434 |
+
|
| 435 |
+
# Remap edges, dedup
|
| 436 |
+
edge_set: dict[tuple[int, int], int] = {}
|
| 437 |
+
for ti, tj, votes in t_edges:
|
| 438 |
+
if votes < min_votes:
|
| 439 |
+
continue
|
| 440 |
+
a = old_to_new[ti]
|
| 441 |
+
b = old_to_new[tj]
|
| 442 |
+
if a == b:
|
| 443 |
+
continue
|
| 444 |
+
key = (a, b) if a < b else (b, a)
|
| 445 |
+
edge_set[key] = edge_set.get(key, 0) + votes
|
| 446 |
+
|
| 447 |
+
edges = list(edge_set.keys())
|
| 448 |
+
if not edges or len(new_xyz) < 2:
|
| 449 |
+
return _np.zeros((2, 3), dtype=_np.float64), [(0, 1)]
|
| 450 |
+
|
| 451 |
+
return new_xyz, [(int(a), int(b)) for a, b in edges]
|
| 452 |
+
|
| 453 |
+
|
| 454 |
+
def build_track_edges(
|
| 455 |
+
tracks: list[Track],
|
| 456 |
+
edges_per_view: dict[str, list[tuple[int, int, str]]],
|
| 457 |
+
min_votes: int = 1,
|
| 458 |
+
max_3d_len: float = 8.0,
|
| 459 |
+
) -> list[tuple[int, int, int]]:
|
| 460 |
+
"""Aggregate 3D edges from per-view 2D gestalt edges.
|
| 461 |
+
|
| 462 |
+
Parameters
|
| 463 |
+
----------
|
| 464 |
+
tracks : list of Track
|
| 465 |
+
edges_per_view : dict[view_id → list[(corner_i_idx, corner_j_idx, edge_cls)]]
|
| 466 |
+
min_votes : minimum number of views that must agree on an edge.
|
| 467 |
+
max_3d_len : drop edges that would be absurdly long in 3D.
|
| 468 |
+
|
| 469 |
+
Returns
|
| 470 |
+
-------
|
| 471 |
+
list of (track_i, track_j, vote_count)
|
| 472 |
+
"""
|
| 473 |
+
# (view_id, corner_idx) → track_idx
|
| 474 |
+
key_to_track: dict[tuple[str, int], int] = {}
|
| 475 |
+
for t_idx, t in enumerate(tracks):
|
| 476 |
+
for vid, cidx in t.corner_indices.items():
|
| 477 |
+
key_to_track[(vid, cidx)] = t_idx
|
| 478 |
+
|
| 479 |
+
votes: dict[tuple[int, int], int] = {}
|
| 480 |
+
for vid, edges in edges_per_view.items():
|
| 481 |
+
for ci, cj, _ecls in edges:
|
| 482 |
+
ti = key_to_track.get((vid, ci))
|
| 483 |
+
tj = key_to_track.get((vid, cj))
|
| 484 |
+
if ti is None or tj is None or ti == tj:
|
| 485 |
+
continue
|
| 486 |
+
key = (ti, tj) if ti < tj else (tj, ti)
|
| 487 |
+
votes[key] = votes.get(key, 0) + 1
|
| 488 |
+
|
| 489 |
+
out: list[tuple[int, int, int]] = []
|
| 490 |
+
for (ti, tj), v in votes.items():
|
| 491 |
+
if v < min_votes:
|
| 492 |
+
continue
|
| 493 |
+
d = float(np.linalg.norm(tracks[ti].xyz - tracks[tj].xyz))
|
| 494 |
+
if d > max_3d_len:
|
| 495 |
+
continue
|
| 496 |
+
out.append((ti, tj, v))
|
| 497 |
+
return out
|
| 498 |
+
|
| 499 |
+
|
| 500 |
+
def triangulate_wireframe(
|
| 501 |
+
entry,
|
| 502 |
+
epipolar_px: float = 6.0,
|
| 503 |
+
reproj_px: float = 4.0,
|
| 504 |
+
min_views: int = 2,
|
| 505 |
+
want_edges: bool = False,
|
| 506 |
+
):
|
| 507 |
+
"""High-level wrapper: detect corners, build views, triangulate tracks.
|
| 508 |
+
|
| 509 |
+
Returns
|
| 510 |
+
-------
|
| 511 |
+
(tracks, views, good_entry)
|
| 512 |
+
when ``want_edges=False`` (default, backwards compatible).
|
| 513 |
+
(tracks, views, good_entry, track_edges)
|
| 514 |
+
when ``want_edges=True``. ``track_edges`` is the output of
|
| 515 |
+
:func:`build_track_edges` — a list of ``(track_i, track_j, vote_count)``.
|
| 516 |
+
"""
|
| 517 |
+
if want_edges:
|
| 518 |
+
corners_per_view, good, edges_per_view = detect_corners_per_view(
|
| 519 |
+
entry, return_edges=True
|
| 520 |
+
)
|
| 521 |
+
else:
|
| 522 |
+
corners_per_view, good = detect_corners_per_view(entry)
|
| 523 |
+
edges_per_view = None
|
| 524 |
+
|
| 525 |
+
colmap_rec = good.get('colmap') or good.get('colmap_binary')
|
| 526 |
+
views = collect_views(colmap_rec, good['image_ids'])
|
| 527 |
+
tracks = build_tracks(
|
| 528 |
+
corners_per_view, views,
|
| 529 |
+
epipolar_px=epipolar_px,
|
| 530 |
+
reproj_px=reproj_px,
|
| 531 |
+
min_views=min_views,
|
| 532 |
+
)
|
| 533 |
+
if not want_edges:
|
| 534 |
+
return tracks, views, good
|
| 535 |
+
track_edges = build_track_edges(tracks, edges_per_view or {})
|
| 536 |
+
return tracks, views, good, track_edges
|
| 537 |
+
|
| 538 |
+
|
| 539 |
+
# ---------------------------------------------------------------------------
|
| 540 |
+
# T1.6: integration helper — refine an existing depth-based 3D vertex set
|
| 541 |
+
# by snapping each vertex to its closest triangulated track.
|
| 542 |
+
# ---------------------------------------------------------------------------
|
| 543 |
+
|
| 544 |
+
def refine_vertices_with_tracks(
|
| 545 |
+
merged_v: np.ndarray,
|
| 546 |
+
tracks: list[Track],
|
| 547 |
+
snap_radius: float = 1.0,
|
| 548 |
+
min_views_for_snap: int = 2,
|
| 549 |
+
max_reproj_err_px: float = float("inf"),
|
| 550 |
+
) -> tuple[np.ndarray, np.ndarray]:
|
| 551 |
+
"""For each vertex in ``merged_v``, find the closest triangulated track
|
| 552 |
+
(by 3D distance) and, if it sits within ``snap_radius`` metres, move the
|
| 553 |
+
vertex to that track's position.
|
| 554 |
+
|
| 555 |
+
The graph structure is preserved — only positions move. Tracks with
|
| 556 |
+
fewer than ``min_views_for_snap`` observations are ignored (2-view DLT
|
| 557 |
+
is noisy on short baselines).
|
| 558 |
+
|
| 559 |
+
Returns
|
| 560 |
+
-------
|
| 561 |
+
refined_v : (N, 3) float64 — refined vertex positions
|
| 562 |
+
snap_mask : (N,) bool — True where a snap happened
|
| 563 |
+
"""
|
| 564 |
+
refined = np.asarray(merged_v, dtype=np.float64).copy()
|
| 565 |
+
snap = np.zeros(len(refined), dtype=bool)
|
| 566 |
+
|
| 567 |
+
good_tracks = [
|
| 568 |
+
t for t in tracks
|
| 569 |
+
if len(t.observations) >= min_views_for_snap and t.reproj_err <= max_reproj_err_px
|
| 570 |
+
]
|
| 571 |
+
if not good_tracks or len(refined) == 0:
|
| 572 |
+
return refined, snap
|
| 573 |
+
|
| 574 |
+
track_xyz = np.array([t.xyz for t in good_tracks], dtype=np.float64)
|
| 575 |
+
for i in range(len(refined)):
|
| 576 |
+
d = np.linalg.norm(track_xyz - refined[i], axis=1)
|
| 577 |
+
j = int(np.argmin(d))
|
| 578 |
+
if d[j] <= snap_radius:
|
| 579 |
+
refined[i] = track_xyz[j]
|
| 580 |
+
snap[i] = True
|
| 581 |
+
return refined, snap
|
| 582 |
+
|
| 583 |
+
|
| 584 |
+
def augment_with_tracks(
|
| 585 |
+
merged_v: np.ndarray,
|
| 586 |
+
heur_edges: list,
|
| 587 |
+
tracks: list[Track],
|
| 588 |
+
dup_radius: float = 0.4,
|
| 589 |
+
min_views_for_add: int = 3,
|
| 590 |
+
max_reproj_err_px: float = 2.5,
|
| 591 |
+
) -> tuple[np.ndarray, list]:
|
| 592 |
+
"""Append high-confidence triangulated tracks as new vertices.
|
| 593 |
+
|
| 594 |
+
Unlike ``refine_vertices_with_tracks`` (which moves existing vertices and
|
| 595 |
+
risks regressions on already-good ones), this only adds new points that
|
| 596 |
+
sit more than ``dup_radius`` metres from any existing vertex.
|
| 597 |
+
|
| 598 |
+
The edge list is returned unchanged — new vertices only get edges via the
|
| 599 |
+
downstream sklearn classifier or heuristic edge-detection step, not here.
|
| 600 |
+
"""
|
| 601 |
+
merged = np.asarray(merged_v, dtype=np.float64)
|
| 602 |
+
confident = [t for t in tracks
|
| 603 |
+
if len(t.observations) >= min_views_for_add
|
| 604 |
+
and t.reproj_err <= max_reproj_err_px]
|
| 605 |
+
if not confident:
|
| 606 |
+
return merged, heur_edges
|
| 607 |
+
tvs = np.array([t.xyz for t in confident], dtype=np.float64)
|
| 608 |
+
if len(merged) == 0:
|
| 609 |
+
return tvs, heur_edges
|
| 610 |
+
# Keep tracks that are not a duplicate of any existing merged vertex.
|
| 611 |
+
diffs = tvs[:, None, :] - merged[None, :, :]
|
| 612 |
+
dists = np.sqrt((diffs ** 2).sum(-1))
|
| 613 |
+
min_d = dists.min(axis=1)
|
| 614 |
+
new = tvs[min_d > dup_radius]
|
| 615 |
+
if len(new) == 0:
|
| 616 |
+
return merged, heur_edges
|
| 617 |
+
augmented = np.vstack([merged, new])
|
| 618 |
+
return augmented, heur_edges
|
winner_candidates.py
ADDED
|
@@ -0,0 +1,270 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""3D vertex candidate generation in the style of the S23DR 2025 winner.
|
| 2 |
+
|
| 3 |
+
The original baseline (and our v11) detects 2D corners on gestalt images
|
| 4 |
+
then unprojects them via depth — which introduces 30–100 cm of error from
|
| 5 |
+
the monocular depth ambiguity.
|
| 6 |
+
|
| 7 |
+
The winner generates candidates **directly in 3D** by selecting the COLMAP
|
| 8 |
+
points whose projection lands inside a gestalt corner-class blob:
|
| 9 |
+
|
| 10 |
+
1. Per view, per gestalt corner class (apex, eave_end_point, flashing_end_point):
|
| 11 |
+
a. Find connected components of the class mask.
|
| 12 |
+
b. For each blob, iteratively binary-dilate it until at least
|
| 13 |
+
``min_colmap_points`` projected COLMAP points fall inside.
|
| 14 |
+
c. Record those COLMAP point indices as a "cluster" tagged with class+view.
|
| 15 |
+
|
| 16 |
+
2. Globally:
|
| 17 |
+
a. Take the union of all clustered point indices.
|
| 18 |
+
b. For each cluster compute its 3D centroid, then redefine it as all
|
| 19 |
+
filtered points within ``cluster_radius`` of that centroid.
|
| 20 |
+
c. Merge any pair of clusters whose smaller member shares >50% of its
|
| 21 |
+
points with the other.
|
| 22 |
+
|
| 23 |
+
The output is a list of 3D vertex candidates with sub-decimetre accuracy
|
| 24 |
+
(limited only by COLMAP triangulation precision).
|
| 25 |
+
|
| 26 |
+
Entry point: ``generate_winner_candidates(entry)``.
|
| 27 |
+
"""
|
| 28 |
+
|
| 29 |
+
from __future__ import annotations
|
| 30 |
+
|
| 31 |
+
import numpy as np
|
| 32 |
+
import cv2
|
| 33 |
+
from dataclasses import dataclass
|
| 34 |
+
|
| 35 |
+
from hoho2025.example_solutions import convert_entry_to_human_readable
|
| 36 |
+
from hoho2025.color_mappings import gestalt_color_mapping
|
| 37 |
+
|
| 38 |
+
try:
|
| 39 |
+
from mvs_utils import collect_views, project_world_to_image
|
| 40 |
+
except ImportError:
|
| 41 |
+
from submission.mvs_utils import collect_views, project_world_to_image
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
VERTEX_CLASSES = ['apex', 'eave_end_point', 'flashing_end_point']
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
@dataclass
|
| 48 |
+
class WinnerCandidate:
|
| 49 |
+
"""A 3D vertex candidate produced by the winner-2025 algorithm."""
|
| 50 |
+
centroid: np.ndarray # (3,) world coords
|
| 51 |
+
point_indices: set[int] # COLMAP point3D indices it owns
|
| 52 |
+
classes: set[str] # gestalt vertex classes that voted for it
|
| 53 |
+
view_count: int # how many views the cluster came from
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def _project_colmap_to_view(colmap_xyz: np.ndarray, P: np.ndarray, W: int, H: int):
|
| 57 |
+
"""Return (uv int, in_bounds_mask, in_front_mask)."""
|
| 58 |
+
uv, z = project_world_to_image(P, colmap_xyz)
|
| 59 |
+
in_front = z > 0
|
| 60 |
+
uv_int = np.round(uv).astype(np.int64)
|
| 61 |
+
in_bounds = (
|
| 62 |
+
(uv_int[:, 0] >= 0) & (uv_int[:, 0] < W) &
|
| 63 |
+
(uv_int[:, 1] >= 0) & (uv_int[:, 1] < H)
|
| 64 |
+
)
|
| 65 |
+
return uv_int, in_bounds & in_front
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def _expand_blob_to_min_colmap(
|
| 69 |
+
blob_mask: np.ndarray,
|
| 70 |
+
uv_int: np.ndarray,
|
| 71 |
+
valid_mask: np.ndarray,
|
| 72 |
+
min_points: int = 5,
|
| 73 |
+
max_iters: int = 20,
|
| 74 |
+
) -> tuple[np.ndarray, np.ndarray]:
|
| 75 |
+
"""Iteratively dilate a 2D blob mask until at least ``min_points`` of the
|
| 76 |
+
valid projected COLMAP points fall inside it.
|
| 77 |
+
|
| 78 |
+
Returns (final_mask, point_indices_inside).
|
| 79 |
+
"""
|
| 80 |
+
H, W = blob_mask.shape
|
| 81 |
+
valid_uv = uv_int[valid_mask]
|
| 82 |
+
valid_idx = np.where(valid_mask)[0]
|
| 83 |
+
|
| 84 |
+
def hit_indices(mask):
|
| 85 |
+
# Indices into valid_uv that fall inside the mask.
|
| 86 |
+
# Critical: cast to bool — masks are uint8 0/255 and integer
|
| 87 |
+
# indexing would otherwise be silently wrong (fancy indexing).
|
| 88 |
+
h_inside = mask[valid_uv[:, 1], valid_uv[:, 0]] > 0
|
| 89 |
+
return valid_idx[h_inside]
|
| 90 |
+
|
| 91 |
+
inside = hit_indices(blob_mask)
|
| 92 |
+
if len(inside) >= min_points:
|
| 93 |
+
return blob_mask, inside
|
| 94 |
+
|
| 95 |
+
kernel = np.ones((3, 3), np.uint8)
|
| 96 |
+
cur = blob_mask.copy()
|
| 97 |
+
for _ in range(max_iters):
|
| 98 |
+
cur = cv2.dilate(cur, kernel, iterations=1)
|
| 99 |
+
inside = hit_indices(cur)
|
| 100 |
+
if len(inside) >= min_points:
|
| 101 |
+
return cur, inside
|
| 102 |
+
return cur, inside
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
def _per_view_clusters(
|
| 106 |
+
gest_np: np.ndarray,
|
| 107 |
+
colmap_xyz: np.ndarray,
|
| 108 |
+
P: np.ndarray,
|
| 109 |
+
W: int, H: int,
|
| 110 |
+
view_id: str,
|
| 111 |
+
min_colmap_points: int = 5,
|
| 112 |
+
min_blob_area: int = 4,
|
| 113 |
+
) -> list[tuple[set[int], str, str]]:
|
| 114 |
+
"""Yield clusters from a single view.
|
| 115 |
+
|
| 116 |
+
Returns list of (point_indices_set, gestalt_class, view_id).
|
| 117 |
+
"""
|
| 118 |
+
uv_int, valid = _project_colmap_to_view(colmap_xyz, P, W, H)
|
| 119 |
+
out: list[tuple[set[int], str, str]] = []
|
| 120 |
+
if not np.any(valid):
|
| 121 |
+
return out
|
| 122 |
+
|
| 123 |
+
for v_class in VERTEX_CLASSES:
|
| 124 |
+
color = np.array(gestalt_color_mapping[v_class])
|
| 125 |
+
mask = cv2.inRange(gest_np, color - 0.5, color + 0.5)
|
| 126 |
+
if mask.sum() == 0:
|
| 127 |
+
continue
|
| 128 |
+
n_lbl, lbl, stats, _ = cv2.connectedComponentsWithStats(mask, 8, cv2.CV_32S)
|
| 129 |
+
for i in range(1, n_lbl):
|
| 130 |
+
area = int(stats[i, cv2.CC_STAT_AREA])
|
| 131 |
+
if area < min_blob_area:
|
| 132 |
+
continue
|
| 133 |
+
blob_mask = (lbl == i).astype(np.uint8)
|
| 134 |
+
_, inside = _expand_blob_to_min_colmap(
|
| 135 |
+
blob_mask, uv_int, valid,
|
| 136 |
+
min_points=min_colmap_points,
|
| 137 |
+
)
|
| 138 |
+
if len(inside) >= min_colmap_points:
|
| 139 |
+
out.append((set(inside.tolist()), v_class, view_id))
|
| 140 |
+
return out
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
def _merge_clusters(
|
| 144 |
+
raw_clusters: list[tuple[set[int], str, str]],
|
| 145 |
+
colmap_xyz: np.ndarray,
|
| 146 |
+
cluster_radius: float = 0.5,
|
| 147 |
+
overlap_threshold: float = 0.5,
|
| 148 |
+
) -> list[WinnerCandidate]:
|
| 149 |
+
"""Global merge step.
|
| 150 |
+
|
| 151 |
+
1. Filter the global cloud to points that appear in at least one cluster.
|
| 152 |
+
2. For each cluster: centroid → all filtered points within cluster_radius.
|
| 153 |
+
3. Merge any pair sharing >50% of its points (smaller side).
|
| 154 |
+
"""
|
| 155 |
+
if not raw_clusters:
|
| 156 |
+
return []
|
| 157 |
+
|
| 158 |
+
used_idx = set()
|
| 159 |
+
for pts, _, _ in raw_clusters:
|
| 160 |
+
used_idx.update(pts)
|
| 161 |
+
used_idx_arr = np.array(sorted(used_idx), dtype=np.int64)
|
| 162 |
+
if len(used_idx_arr) == 0:
|
| 163 |
+
return []
|
| 164 |
+
filtered_xyz = colmap_xyz[used_idx_arr]
|
| 165 |
+
# Map global → filtered index for fast neighbour query
|
| 166 |
+
g_to_f = -np.ones(len(colmap_xyz), dtype=np.int64)
|
| 167 |
+
g_to_f[used_idx_arr] = np.arange(len(used_idx_arr))
|
| 168 |
+
|
| 169 |
+
# Build KDTree on filtered cloud
|
| 170 |
+
from scipy.spatial import cKDTree
|
| 171 |
+
tree = cKDTree(filtered_xyz)
|
| 172 |
+
|
| 173 |
+
# Step 2: redefine each cluster by ball query around its centroid
|
| 174 |
+
candidates: list[WinnerCandidate] = []
|
| 175 |
+
for pts, cls, vid in raw_clusters:
|
| 176 |
+
if not pts:
|
| 177 |
+
continue
|
| 178 |
+
pts_arr = np.array([p for p in pts if g_to_f[p] >= 0])
|
| 179 |
+
if len(pts_arr) == 0:
|
| 180 |
+
continue
|
| 181 |
+
local = filtered_xyz[g_to_f[pts_arr]]
|
| 182 |
+
centroid = local.mean(axis=0)
|
| 183 |
+
# Ball query in 0.5 m
|
| 184 |
+
nbr_f_idx = tree.query_ball_point(centroid, cluster_radius)
|
| 185 |
+
if not nbr_f_idx:
|
| 186 |
+
continue
|
| 187 |
+
nbr_global = set(int(used_idx_arr[i]) for i in nbr_f_idx)
|
| 188 |
+
candidates.append(WinnerCandidate(
|
| 189 |
+
centroid=centroid,
|
| 190 |
+
point_indices=nbr_global,
|
| 191 |
+
classes={cls},
|
| 192 |
+
view_count=1,
|
| 193 |
+
))
|
| 194 |
+
|
| 195 |
+
if not candidates:
|
| 196 |
+
return []
|
| 197 |
+
|
| 198 |
+
# Step 3: greedy merge by overlap > 50%
|
| 199 |
+
changed = True
|
| 200 |
+
while changed:
|
| 201 |
+
changed = False
|
| 202 |
+
i = 0
|
| 203 |
+
while i < len(candidates):
|
| 204 |
+
j = i + 1
|
| 205 |
+
while j < len(candidates):
|
| 206 |
+
a, b = candidates[i], candidates[j]
|
| 207 |
+
inter = len(a.point_indices & b.point_indices)
|
| 208 |
+
smaller = min(len(a.point_indices), len(b.point_indices))
|
| 209 |
+
if smaller > 0 and inter / smaller > overlap_threshold:
|
| 210 |
+
# Merge b into a
|
| 211 |
+
merged_pts = a.point_indices | b.point_indices
|
| 212 |
+
merged_xyz = colmap_xyz[np.array(sorted(merged_pts))]
|
| 213 |
+
a.centroid = merged_xyz.mean(axis=0)
|
| 214 |
+
a.point_indices = merged_pts
|
| 215 |
+
a.classes |= b.classes
|
| 216 |
+
a.view_count = a.view_count + b.view_count
|
| 217 |
+
candidates.pop(j)
|
| 218 |
+
changed = True
|
| 219 |
+
else:
|
| 220 |
+
j += 1
|
| 221 |
+
i += 1
|
| 222 |
+
|
| 223 |
+
return candidates
|
| 224 |
+
|
| 225 |
+
|
| 226 |
+
def generate_winner_candidates(
|
| 227 |
+
entry,
|
| 228 |
+
min_colmap_points: int = 5,
|
| 229 |
+
cluster_radius: float = 0.5,
|
| 230 |
+
overlap_threshold: float = 0.5,
|
| 231 |
+
min_blob_area: int = 4,
|
| 232 |
+
) -> tuple[list[WinnerCandidate], dict]:
|
| 233 |
+
"""Run the winner-2025 3D vertex candidate generator.
|
| 234 |
+
|
| 235 |
+
Returns (candidates, good_entry).
|
| 236 |
+
"""
|
| 237 |
+
good = convert_entry_to_human_readable(entry)
|
| 238 |
+
colmap_rec = good.get('colmap') or good.get('colmap_binary')
|
| 239 |
+
if colmap_rec is None:
|
| 240 |
+
return [], good
|
| 241 |
+
|
| 242 |
+
colmap_xyz = np.array(
|
| 243 |
+
[p.xyz for p in colmap_rec.points3D.values()], dtype=np.float64
|
| 244 |
+
)
|
| 245 |
+
if len(colmap_xyz) == 0:
|
| 246 |
+
return [], good
|
| 247 |
+
|
| 248 |
+
views = collect_views(colmap_rec, good['image_ids'])
|
| 249 |
+
raw_clusters: list[tuple[set[int], str, str]] = []
|
| 250 |
+
|
| 251 |
+
for gest, depth, img_id in zip(good['gestalt'], good['depth'], good['image_ids']):
|
| 252 |
+
info = views.get(img_id)
|
| 253 |
+
if info is None:
|
| 254 |
+
continue
|
| 255 |
+
depth_np = np.array(depth)
|
| 256 |
+
H, W = depth_np.shape[:2]
|
| 257 |
+
gest_np = np.array(gest.resize((W, H))).astype(np.uint8)
|
| 258 |
+
view_clusters = _per_view_clusters(
|
| 259 |
+
gest_np, colmap_xyz, info['P'], W, H, img_id,
|
| 260 |
+
min_colmap_points=min_colmap_points,
|
| 261 |
+
min_blob_area=min_blob_area,
|
| 262 |
+
)
|
| 263 |
+
raw_clusters.extend(view_clusters)
|
| 264 |
+
|
| 265 |
+
candidates = _merge_clusters(
|
| 266 |
+
raw_clusters, colmap_xyz,
|
| 267 |
+
cluster_radius=cluster_radius,
|
| 268 |
+
overlap_threshold=overlap_threshold,
|
| 269 |
+
)
|
| 270 |
+
return candidates, good
|
winner_inference.py
ADDED
|
@@ -0,0 +1,267 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Inference adapter for the winner-2025 pipeline.
|
| 2 |
+
|
| 3 |
+
Loads:
|
| 4 |
+
- DGCNN vertex classifier (3 heads: cls/offset/conf)
|
| 5 |
+
- DGCNN edge classifier (1 head)
|
| 6 |
+
|
| 7 |
+
And exposes:
|
| 8 |
+
- refine_winner_candidates(candidates, sample, model, device, threshold)
|
| 9 |
+
For each candidate, build the 4×4×4 m cubic patch with 11D point
|
| 10 |
+
features (winner spec), run the model, return only candidates that
|
| 11 |
+
pass the classification threshold and were shifted to the model's
|
| 12 |
+
offset.
|
| 13 |
+
- score_edges(vertices, sample, model, device, threshold)
|
| 14 |
+
For each pair of vertices within MAX_PAIR_DIST, build the 6D
|
| 15 |
+
cylindrical patch and ask the model whether the edge exists.
|
| 16 |
+
|
| 17 |
+
Both functions degrade gracefully if torch is missing or the checkpoint
|
| 18 |
+
is not found — they return None and the caller falls back to the
|
| 19 |
+
heuristic pipeline.
|
| 20 |
+
"""
|
| 21 |
+
|
| 22 |
+
from __future__ import annotations
|
| 23 |
+
|
| 24 |
+
import os
|
| 25 |
+
import numpy as np
|
| 26 |
+
from pathlib import Path
|
| 27 |
+
|
| 28 |
+
# Lazy torch import — only required at training/inference time, not at
|
| 29 |
+
# submission package import time.
|
| 30 |
+
_torch = None
|
| 31 |
+
_DGCNNVertexClassifier = None
|
| 32 |
+
_DGCNNEdgeClassifier = None
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def _ensure_torch():
|
| 36 |
+
global _torch, _DGCNNVertexClassifier, _DGCNNEdgeClassifier
|
| 37 |
+
if _torch is not None:
|
| 38 |
+
return True
|
| 39 |
+
try:
|
| 40 |
+
import torch as _t
|
| 41 |
+
_torch = _t
|
| 42 |
+
except Exception:
|
| 43 |
+
return False
|
| 44 |
+
# Try multiple import paths for DGCNN classes:
|
| 45 |
+
# 1. Full package (local development)
|
| 46 |
+
# 2. Submission-directory copy (HF container)
|
| 47 |
+
for _module_path in [
|
| 48 |
+
"s23dr.models.dgcnn",
|
| 49 |
+
"dgcnn",
|
| 50 |
+
"submission.dgcnn",
|
| 51 |
+
]:
|
| 52 |
+
try:
|
| 53 |
+
_mod = __import__(_module_path, fromlist=["DGCNNVertexClassifier", "DGCNNEdgeClassifier"])
|
| 54 |
+
_DGCNNVertexClassifier = _mod.DGCNNVertexClassifier
|
| 55 |
+
_DGCNNEdgeClassifier = _mod.DGCNNEdgeClassifier
|
| 56 |
+
break
|
| 57 |
+
except Exception:
|
| 58 |
+
continue
|
| 59 |
+
if _DGCNNVertexClassifier is None:
|
| 60 |
+
return False
|
| 61 |
+
return True
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def _resolve_model_path(path: str) -> str | None:
|
| 65 |
+
"""Try multiple locations for a model checkpoint."""
|
| 66 |
+
candidates = [
|
| 67 |
+
path,
|
| 68 |
+
os.path.join(os.path.dirname(__file__), os.path.basename(path)),
|
| 69 |
+
os.path.join(os.path.dirname(__file__), path),
|
| 70 |
+
os.path.basename(path),
|
| 71 |
+
]
|
| 72 |
+
for c in candidates:
|
| 73 |
+
if os.path.exists(c):
|
| 74 |
+
return c
|
| 75 |
+
return None
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
def load_vertex_model(path="checkpoints/vertex_model_dgcnn.pt", device="cuda"):
|
| 79 |
+
if not _ensure_torch():
|
| 80 |
+
return None
|
| 81 |
+
path = _resolve_model_path(path)
|
| 82 |
+
if path is None:
|
| 83 |
+
return None
|
| 84 |
+
try:
|
| 85 |
+
ckpt = _torch.load(path, map_location=device, weights_only=False)
|
| 86 |
+
state = ckpt['model'] if isinstance(ckpt, dict) and 'model' in ckpt else ckpt
|
| 87 |
+
model = _DGCNNVertexClassifier(in_channels=11).to(device)
|
| 88 |
+
model.load_state_dict(state)
|
| 89 |
+
model.eval()
|
| 90 |
+
return model
|
| 91 |
+
except Exception:
|
| 92 |
+
return None
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
def load_edge_model(path="checkpoints/edge_model_dgcnn.pt", device="cuda"):
|
| 96 |
+
if not _ensure_torch():
|
| 97 |
+
return None
|
| 98 |
+
path = _resolve_model_path(path)
|
| 99 |
+
if path is None:
|
| 100 |
+
return None
|
| 101 |
+
try:
|
| 102 |
+
ckpt = _torch.load(path, map_location=device, weights_only=False)
|
| 103 |
+
state = ckpt['model'] if isinstance(ckpt, dict) and 'model' in ckpt else ckpt
|
| 104 |
+
model = _DGCNNEdgeClassifier(in_channels=6).to(device)
|
| 105 |
+
model.load_state_dict(state)
|
| 106 |
+
model.eval()
|
| 107 |
+
return model
|
| 108 |
+
except Exception:
|
| 109 |
+
return None
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
def refine_winner_candidates(
|
| 113 |
+
candidates,
|
| 114 |
+
sample,
|
| 115 |
+
model,
|
| 116 |
+
device="cuda",
|
| 117 |
+
cls_threshold: float = 0.5,
|
| 118 |
+
apply_offset: bool = True,
|
| 119 |
+
batch_size: int = 64,
|
| 120 |
+
max_points: int = 1024,
|
| 121 |
+
patch_size: float = 4.0,
|
| 122 |
+
):
|
| 123 |
+
"""Run DGCNN vertex refinement on Stage 1 winner candidates.
|
| 124 |
+
|
| 125 |
+
Args:
|
| 126 |
+
candidates: list of dicts from generate_vertex_candidates
|
| 127 |
+
(each must have 'xyz' and 'point_ids').
|
| 128 |
+
sample: raw HF dataset entry.
|
| 129 |
+
model: loaded DGCNNVertexClassifier (or compatible).
|
| 130 |
+
device: torch device.
|
| 131 |
+
cls_threshold: keep candidate if sigmoid(cls_logit) ≥ threshold.
|
| 132 |
+
apply_offset: shift accepted candidates by predicted offset.
|
| 133 |
+
|
| 134 |
+
Returns:
|
| 135 |
+
list of (xyz, score) for accepted candidates, OR None on failure.
|
| 136 |
+
"""
|
| 137 |
+
if model is None or not candidates:
|
| 138 |
+
return None
|
| 139 |
+
if not _ensure_torch():
|
| 140 |
+
return None
|
| 141 |
+
|
| 142 |
+
try:
|
| 143 |
+
from hoho2025.example_solutions import convert_entry_to_human_readable
|
| 144 |
+
from s23dr.data_prep.patch_extraction import (
|
| 145 |
+
_get_all_points_with_features, _project_and_get_gestalt_labels,
|
| 146 |
+
extract_vertex_patch,
|
| 147 |
+
)
|
| 148 |
+
except Exception:
|
| 149 |
+
return None
|
| 150 |
+
|
| 151 |
+
good = convert_entry_to_human_readable(sample)
|
| 152 |
+
colmap_rec = good.get('colmap') or good.get('colmap_binary')
|
| 153 |
+
if colmap_rec is None:
|
| 154 |
+
return None
|
| 155 |
+
|
| 156 |
+
all_xyz, all_rgb, all_pids = _get_all_points_with_features(colmap_rec)
|
| 157 |
+
if len(all_xyz) == 0:
|
| 158 |
+
return None
|
| 159 |
+
|
| 160 |
+
depth_shapes = [(np.array(d).shape[0], np.array(d).shape[1]) for d in good['depth']]
|
| 161 |
+
all_gestalt = _project_and_get_gestalt_labels(
|
| 162 |
+
all_xyz, colmap_rec, good['gestalt'], good['image_ids'], depth_shapes,
|
| 163 |
+
)
|
| 164 |
+
|
| 165 |
+
patches = []
|
| 166 |
+
cand_idx = []
|
| 167 |
+
for i, cand in enumerate(candidates):
|
| 168 |
+
patch = extract_vertex_patch(
|
| 169 |
+
cand['xyz'], all_xyz, all_rgb, all_gestalt,
|
| 170 |
+
cand.get('point_ids', set()), all_pids,
|
| 171 |
+
patch_size=patch_size, max_points=max_points,
|
| 172 |
+
)
|
| 173 |
+
if patch is None:
|
| 174 |
+
continue
|
| 175 |
+
patches.append(patch)
|
| 176 |
+
cand_idx.append(i)
|
| 177 |
+
if not patches:
|
| 178 |
+
return []
|
| 179 |
+
|
| 180 |
+
accepted = []
|
| 181 |
+
with _torch.no_grad():
|
| 182 |
+
for start in range(0, len(patches), batch_size):
|
| 183 |
+
end = min(start + batch_size, len(patches))
|
| 184 |
+
batch = np.stack(patches[start:end], axis=0) # (B, 11, N)
|
| 185 |
+
x = _torch.from_numpy(batch).to(device)
|
| 186 |
+
cls_logits, pred_offset, pred_conf = model(x)
|
| 187 |
+
cls_logits = cls_logits.squeeze(-1).cpu().numpy()
|
| 188 |
+
pred_offset = pred_offset.cpu().numpy()
|
| 189 |
+
pred_conf = pred_conf.squeeze(-1).cpu().numpy()
|
| 190 |
+
probs = 1.0 / (1.0 + np.exp(-cls_logits))
|
| 191 |
+
for k in range(end - start):
|
| 192 |
+
if probs[k] < cls_threshold:
|
| 193 |
+
continue
|
| 194 |
+
ci = cand_idx[start + k]
|
| 195 |
+
xyz = candidates[ci]['xyz'].copy()
|
| 196 |
+
if apply_offset:
|
| 197 |
+
xyz = xyz + pred_offset[k]
|
| 198 |
+
accepted.append((xyz.astype(np.float64), float(probs[k])))
|
| 199 |
+
return accepted
|
| 200 |
+
|
| 201 |
+
|
| 202 |
+
def score_edges(
|
| 203 |
+
vertices: np.ndarray,
|
| 204 |
+
sample,
|
| 205 |
+
model,
|
| 206 |
+
device: str = "cuda",
|
| 207 |
+
threshold: float = 0.5,
|
| 208 |
+
max_pair_dist: float = 8.0,
|
| 209 |
+
batch_size: int = 64,
|
| 210 |
+
max_points: int = 1024,
|
| 211 |
+
):
|
| 212 |
+
"""Run DGCNN edge classifier over all vertex pairs within max_pair_dist.
|
| 213 |
+
|
| 214 |
+
Returns list of (i, j, prob) for pairs where the model says "edge".
|
| 215 |
+
"""
|
| 216 |
+
if model is None or vertices is None or len(vertices) < 2:
|
| 217 |
+
return None
|
| 218 |
+
if not _ensure_torch():
|
| 219 |
+
return None
|
| 220 |
+
|
| 221 |
+
try:
|
| 222 |
+
from hoho2025.example_solutions import convert_entry_to_human_readable
|
| 223 |
+
from s23dr.data_prep.patch_extraction import (
|
| 224 |
+
_get_all_points_with_features, extract_edge_patch,
|
| 225 |
+
)
|
| 226 |
+
except Exception:
|
| 227 |
+
return None
|
| 228 |
+
|
| 229 |
+
good = convert_entry_to_human_readable(sample)
|
| 230 |
+
colmap_rec = good.get('colmap') or good.get('colmap_binary')
|
| 231 |
+
if colmap_rec is None:
|
| 232 |
+
return None
|
| 233 |
+
all_xyz, all_rgb, _ = _get_all_points_with_features(colmap_rec)
|
| 234 |
+
if len(all_xyz) == 0:
|
| 235 |
+
return None
|
| 236 |
+
|
| 237 |
+
n = len(vertices)
|
| 238 |
+
pairs = []
|
| 239 |
+
patches = []
|
| 240 |
+
for i in range(n):
|
| 241 |
+
for j in range(i + 1, n):
|
| 242 |
+
dist = float(np.linalg.norm(vertices[i] - vertices[j]))
|
| 243 |
+
if dist > max_pair_dist:
|
| 244 |
+
continue
|
| 245 |
+
patch = extract_edge_patch(
|
| 246 |
+
vertices[i], vertices[j], all_xyz, all_rgb, max_points=max_points,
|
| 247 |
+
)
|
| 248 |
+
if patch is None:
|
| 249 |
+
continue
|
| 250 |
+
pairs.append((i, j))
|
| 251 |
+
patches.append(patch)
|
| 252 |
+
if not patches:
|
| 253 |
+
return []
|
| 254 |
+
|
| 255 |
+
out = []
|
| 256 |
+
with _torch.no_grad():
|
| 257 |
+
for start in range(0, len(patches), batch_size):
|
| 258 |
+
end = min(start + batch_size, len(patches))
|
| 259 |
+
batch = np.stack(patches[start:end], axis=0)
|
| 260 |
+
x = _torch.from_numpy(batch).to(device)
|
| 261 |
+
logits = model(x).squeeze(-1).cpu().numpy()
|
| 262 |
+
probs = 1.0 / (1.0 + np.exp(-logits))
|
| 263 |
+
for k in range(end - start):
|
| 264 |
+
if probs[k] >= threshold:
|
| 265 |
+
i, j = pairs[start + k]
|
| 266 |
+
out.append((int(i), int(j), float(probs[k])))
|
| 267 |
+
return out
|