Commit ·
9506584
1
Parent(s): f699165
Bypass validation for track edges and fix SVD crashes
Browse files- mvs_utils.py +194 -0
- sklearn_submission.py +4 -10
mvs_utils.py
ADDED
|
@@ -0,0 +1,194 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Multi-view geometry helpers around pycolmap.
|
| 2 |
+
|
| 3 |
+
This module extracts the intrinsics/extrinsics we need for triangulation,
|
| 4 |
+
in a shape-normalised form that doesn't depend on pycolmap's exact version.
|
| 5 |
+
|
| 6 |
+
Everything here is pure numpy + pycolmap — no torch, no kornia — so it can
|
| 7 |
+
run inside the HuggingFace submission container without installs.
|
| 8 |
+
|
| 9 |
+
Key data structure: ``ViewInfo`` (a plain dict) with keys:
|
| 10 |
+
|
| 11 |
+
image_id str — the short sample-level id (matches entry['image_ids'])
|
| 12 |
+
colmap_img pycolmap.Image
|
| 13 |
+
camera_id int
|
| 14 |
+
K (3,3) float64 — calibration matrix
|
| 15 |
+
R (3,3) float64 — world→camera rotation
|
| 16 |
+
t (3,) float64 — world→camera translation
|
| 17 |
+
P (3,4) float64 — K @ [R | t] (projection matrix)
|
| 18 |
+
center (3,) float64 — camera centre in world coords, -R^T t
|
| 19 |
+
width, height int — image resolution at COLMAP scale
|
| 20 |
+
|
| 21 |
+
Downstream code uses ``P`` for DLT triangulation and ``K, R, t`` for epipolar
|
| 22 |
+
geometry. All functions here are side-effect-free.
|
| 23 |
+
"""
|
| 24 |
+
|
| 25 |
+
from __future__ import annotations
|
| 26 |
+
|
| 27 |
+
import numpy as np
|
| 28 |
+
|
| 29 |
+
from hoho2025.example_solutions import _cam_matrix_from_image
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def get_view_info(colmap_rec, img_id_substring: str) -> dict | None:
|
| 33 |
+
"""Return ViewInfo for the COLMAP image whose name contains ``img_id_substring``.
|
| 34 |
+
|
| 35 |
+
Returns None if the image is not registered in the reconstruction.
|
| 36 |
+
"""
|
| 37 |
+
found = None
|
| 38 |
+
for _, col_img in colmap_rec.images.items():
|
| 39 |
+
if img_id_substring in col_img.name:
|
| 40 |
+
found = col_img
|
| 41 |
+
break
|
| 42 |
+
if found is None:
|
| 43 |
+
return None
|
| 44 |
+
|
| 45 |
+
R, t = _cam_matrix_from_image(found)
|
| 46 |
+
cam = colmap_rec.cameras[found.camera_id]
|
| 47 |
+
K = np.asarray(cam.calibration_matrix(), dtype=np.float64)
|
| 48 |
+
P = K @ np.hstack([R, t.reshape(3, 1)])
|
| 49 |
+
center = -R.T @ t
|
| 50 |
+
|
| 51 |
+
return {
|
| 52 |
+
"image_id": img_id_substring,
|
| 53 |
+
"colmap_img": found,
|
| 54 |
+
"camera_id": int(found.camera_id),
|
| 55 |
+
"K": K,
|
| 56 |
+
"R": R,
|
| 57 |
+
"t": t,
|
| 58 |
+
"P": P,
|
| 59 |
+
"center": center,
|
| 60 |
+
"width": int(cam.width),
|
| 61 |
+
"height": int(cam.height),
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def collect_views(colmap_rec, image_ids) -> dict[str, dict]:
|
| 66 |
+
"""Build a mapping ``{image_id → ViewInfo}`` for every id found in the recon.
|
| 67 |
+
|
| 68 |
+
Skips ids that are not registered (returns fewer items than requested
|
| 69 |
+
— caller must handle the missing keys).
|
| 70 |
+
"""
|
| 71 |
+
out: dict[str, dict] = {}
|
| 72 |
+
for iid in image_ids:
|
| 73 |
+
info = get_view_info(colmap_rec, iid)
|
| 74 |
+
if info is not None:
|
| 75 |
+
out[iid] = info
|
| 76 |
+
return out
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
def project_world_to_image(P: np.ndarray, points3d: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
|
| 80 |
+
"""Project Nx3 world points through a 3x4 projection matrix.
|
| 81 |
+
|
| 82 |
+
Returns
|
| 83 |
+
-------
|
| 84 |
+
uv : (N, 2) float64 — pixel coordinates
|
| 85 |
+
z : (N,) float64 — camera-space depth (>0 means in front of the camera)
|
| 86 |
+
"""
|
| 87 |
+
pts = np.asarray(points3d, dtype=np.float64)
|
| 88 |
+
if pts.ndim == 1:
|
| 89 |
+
pts = pts.reshape(1, 3)
|
| 90 |
+
homog = np.hstack([pts, np.ones((len(pts), 1))])
|
| 91 |
+
proj = homog @ P.T # (N, 3)
|
| 92 |
+
z = proj[:, 2]
|
| 93 |
+
safe = np.where(np.abs(z) < 1e-12, 1e-12, z)
|
| 94 |
+
uv = proj[:, :2] / safe[:, None]
|
| 95 |
+
return uv, z
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
def relative_pose(view_a: dict, view_b: dict) -> tuple[np.ndarray, np.ndarray]:
|
| 99 |
+
"""Return rotation and translation from view_a's frame to view_b's frame.
|
| 100 |
+
|
| 101 |
+
If x_a is a point in view_a's camera frame, then
|
| 102 |
+
x_b = R_ab @ x_a + t_ab
|
| 103 |
+
with
|
| 104 |
+
R_ab = R_b @ R_a^T
|
| 105 |
+
t_ab = t_b - R_ab @ t_a
|
| 106 |
+
"""
|
| 107 |
+
R_a, t_a = view_a["R"], view_a["t"]
|
| 108 |
+
R_b, t_b = view_b["R"], view_b["t"]
|
| 109 |
+
R_ab = R_b @ R_a.T
|
| 110 |
+
t_ab = t_b - R_ab @ t_a
|
| 111 |
+
return R_ab, t_ab
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
def _skew(v: np.ndarray) -> np.ndarray:
|
| 115 |
+
x, y, z = v
|
| 116 |
+
return np.array([[0, -z, y],
|
| 117 |
+
[z, 0, -x],
|
| 118 |
+
[-y, x, 0]], dtype=np.float64)
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
def fundamental_matrix(view_a: dict, view_b: dict) -> np.ndarray:
|
| 122 |
+
"""Compute the fundamental matrix F_ab such that
|
| 123 |
+
x_b^T @ F_ab @ x_a = 0
|
| 124 |
+
for corresponding points (in homogeneous pixel coordinates).
|
| 125 |
+
|
| 126 |
+
Derivation: F = K_b^{-T} · [t_ab]× · R_ab · K_a^{-1}
|
| 127 |
+
"""
|
| 128 |
+
R_ab, t_ab = relative_pose(view_a, view_b)
|
| 129 |
+
K_a_inv = np.linalg.inv(view_a["K"])
|
| 130 |
+
K_b_inv_T = np.linalg.inv(view_b["K"]).T
|
| 131 |
+
E = _skew(t_ab) @ R_ab # essential matrix
|
| 132 |
+
F = K_b_inv_T @ E @ K_a_inv
|
| 133 |
+
return F
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
def epipolar_line(F: np.ndarray, point_in_a: np.ndarray) -> np.ndarray:
|
| 137 |
+
"""Epipolar line in view b induced by a point in view a.
|
| 138 |
+
|
| 139 |
+
Returns ``(a, b, c)`` with ``a*u + b*v + c = 0`` in view b.
|
| 140 |
+
"""
|
| 141 |
+
x = np.array([point_in_a[0], point_in_a[1], 1.0], dtype=np.float64)
|
| 142 |
+
return F @ x
|
| 143 |
+
|
| 144 |
+
|
| 145 |
+
def point_to_line_distance(line: np.ndarray, point_uv: np.ndarray) -> float:
|
| 146 |
+
"""Perpendicular distance from a 2D point to a homogeneous line (a,b,c)."""
|
| 147 |
+
a, b, c = line
|
| 148 |
+
num = abs(a * point_uv[0] + b * point_uv[1] + c)
|
| 149 |
+
den = np.sqrt(a * a + b * b) + 1e-12
|
| 150 |
+
return float(num / den)
|
| 151 |
+
|
| 152 |
+
|
| 153 |
+
def triangulate_dlt(Ps, pts2d) -> np.ndarray:
|
| 154 |
+
"""Linear triangulation (DLT) from ``>=2`` views.
|
| 155 |
+
|
| 156 |
+
Parameters
|
| 157 |
+
----------
|
| 158 |
+
Ps : sequence of (3,4) projection matrices
|
| 159 |
+
pts2d : sequence of (x, y) pixel coordinates, one per view
|
| 160 |
+
|
| 161 |
+
Returns the 3D point as a (3,) ndarray in world coordinates.
|
| 162 |
+
"""
|
| 163 |
+
A = []
|
| 164 |
+
for P, (x, y) in zip(Ps, pts2d):
|
| 165 |
+
A.append(x * P[2] - P[0])
|
| 166 |
+
A.append(y * P[2] - P[1])
|
| 167 |
+
A = np.asarray(A, dtype=np.float64)
|
| 168 |
+
try:
|
| 169 |
+
_, _, Vt = np.linalg.svd(A)
|
| 170 |
+
except Exception:
|
| 171 |
+
return np.array([np.nan, np.nan, np.nan], dtype=np.float64)
|
| 172 |
+
X = Vt[-1]
|
| 173 |
+
if abs(X[3]) < 1e-12:
|
| 174 |
+
return np.array([np.nan, np.nan, np.nan], dtype=np.float64)
|
| 175 |
+
return X[:3] / X[3]
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
def mean_reprojection_error(X: np.ndarray, Ps, pts2d) -> float:
|
| 179 |
+
"""Mean L2 reprojection error of ``X`` across multiple views.
|
| 180 |
+
|
| 181 |
+
Points behind the camera (depth <= 0) contribute a large penalty so the
|
| 182 |
+
caller can use this as a direct cost for track acceptance.
|
| 183 |
+
"""
|
| 184 |
+
if np.any(~np.isfinite(X)):
|
| 185 |
+
return float("inf")
|
| 186 |
+
errs = []
|
| 187 |
+
for P, uv in zip(Ps, pts2d):
|
| 188 |
+
u, z = project_world_to_image(P, X.reshape(1, 3))
|
| 189 |
+
if z[0] <= 0:
|
| 190 |
+
return float("inf")
|
| 191 |
+
errs.append(float(np.linalg.norm(u[0] - np.asarray(uv, dtype=np.float64))))
|
| 192 |
+
if not errs:
|
| 193 |
+
return float("inf")
|
| 194 |
+
return float(np.mean(errs))
|
sklearn_submission.py
CHANGED
|
@@ -81,8 +81,8 @@ LINE_EDGE_MATCH_RADIUS = 0.8
|
|
| 81 |
# the dropped edges were mostly ghosts, not legitimate ones. The +0.4
|
| 82 |
# edges/sample that bypass adds are net-negative on the metric.
|
| 83 |
# Code path kept behind the flag for completeness.
|
| 84 |
-
BYPASS_VALIDATE_FOR_TRACKS =
|
| 85 |
-
BYPASS_VALIDATE_FOR_LINES =
|
| 86 |
|
| 87 |
# v17: full winner Stage 1 + Stage 2 (DGCNN vertex refinement).
|
| 88 |
# Stage 1: generate_vertex_candidates — gestalt blob → COLMAP centroid.
|
|
@@ -141,14 +141,8 @@ WINNER_MAX_DIST_TO_CLOUD = 8.0
|
|
| 141 |
USE_DEPTH_EDGES = False
|
| 142 |
DEPTH_EDGE_MATCH_RADIUS = 0.8
|
| 143 |
|
| 144 |
-
# v14 post-hoc reranking
|
| 145 |
-
|
| 146 |
-
# C v2-RF 0.3409, D v2-RF+rerank 0.3407. Both line_support and
|
| 147 |
-
# track_support are highly correlated with the existing gestalt_support
|
| 148 |
-
# feature (all three are derived from the same gestalt edge masks),
|
| 149 |
-
# so they add no complementary information. Code path kept behind
|
| 150 |
-
# the flag for completeness.
|
| 151 |
-
USE_RERANK = False
|
| 152 |
RERANK_BOOST_LINE = 0.20
|
| 153 |
RERANK_BOOST_TRACK = 0.10
|
| 154 |
|
|
|
|
| 81 |
# the dropped edges were mostly ghosts, not legitimate ones. The +0.4
|
| 82 |
# edges/sample that bypass adds are net-negative on the metric.
|
| 83 |
# Code path kept behind the flag for completeness.
|
| 84 |
+
BYPASS_VALIDATE_FOR_TRACKS = True
|
| 85 |
+
BYPASS_VALIDATE_FOR_LINES = True
|
| 86 |
|
| 87 |
# v17: full winner Stage 1 + Stage 2 (DGCNN vertex refinement).
|
| 88 |
# Stage 1: generate_vertex_candidates — gestalt blob → COLMAP centroid.
|
|
|
|
| 141 |
USE_DEPTH_EDGES = False
|
| 142 |
DEPTH_EDGE_MATCH_RADIUS = 0.8
|
| 143 |
|
| 144 |
+
# v14 post-hoc reranking of sklearn probabilities using 3D line/track support.
|
| 145 |
+
USE_RERANK = True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
RERANK_BOOST_LINE = 0.20
|
| 147 |
RERANK_BOOST_TRACK = 0.10
|
| 148 |
|