IhorIvanyshyn01 commited on
Commit
9506584
·
1 Parent(s): f699165

Bypass validation for track edges and fix SVD crashes

Browse files
Files changed (2) hide show
  1. mvs_utils.py +194 -0
  2. sklearn_submission.py +4 -10
mvs_utils.py ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Multi-view geometry helpers around pycolmap.
2
+
3
+ This module extracts the intrinsics/extrinsics we need for triangulation,
4
+ in a shape-normalised form that doesn't depend on pycolmap's exact version.
5
+
6
+ Everything here is pure numpy + pycolmap — no torch, no kornia — so it can
7
+ run inside the HuggingFace submission container without installs.
8
+
9
+ Key data structure: ``ViewInfo`` (a plain dict) with keys:
10
+
11
+ image_id str — the short sample-level id (matches entry['image_ids'])
12
+ colmap_img pycolmap.Image
13
+ camera_id int
14
+ K (3,3) float64 — calibration matrix
15
+ R (3,3) float64 — world→camera rotation
16
+ t (3,) float64 — world→camera translation
17
+ P (3,4) float64 — K @ [R | t] (projection matrix)
18
+ center (3,) float64 — camera centre in world coords, -R^T t
19
+ width, height int — image resolution at COLMAP scale
20
+
21
+ Downstream code uses ``P`` for DLT triangulation and ``K, R, t`` for epipolar
22
+ geometry. All functions here are side-effect-free.
23
+ """
24
+
25
+ from __future__ import annotations
26
+
27
+ import numpy as np
28
+
29
+ from hoho2025.example_solutions import _cam_matrix_from_image
30
+
31
+
32
+ def get_view_info(colmap_rec, img_id_substring: str) -> dict | None:
33
+ """Return ViewInfo for the COLMAP image whose name contains ``img_id_substring``.
34
+
35
+ Returns None if the image is not registered in the reconstruction.
36
+ """
37
+ found = None
38
+ for _, col_img in colmap_rec.images.items():
39
+ if img_id_substring in col_img.name:
40
+ found = col_img
41
+ break
42
+ if found is None:
43
+ return None
44
+
45
+ R, t = _cam_matrix_from_image(found)
46
+ cam = colmap_rec.cameras[found.camera_id]
47
+ K = np.asarray(cam.calibration_matrix(), dtype=np.float64)
48
+ P = K @ np.hstack([R, t.reshape(3, 1)])
49
+ center = -R.T @ t
50
+
51
+ return {
52
+ "image_id": img_id_substring,
53
+ "colmap_img": found,
54
+ "camera_id": int(found.camera_id),
55
+ "K": K,
56
+ "R": R,
57
+ "t": t,
58
+ "P": P,
59
+ "center": center,
60
+ "width": int(cam.width),
61
+ "height": int(cam.height),
62
+ }
63
+
64
+
65
+ def collect_views(colmap_rec, image_ids) -> dict[str, dict]:
66
+ """Build a mapping ``{image_id → ViewInfo}`` for every id found in the recon.
67
+
68
+ Skips ids that are not registered (returns fewer items than requested
69
+ — caller must handle the missing keys).
70
+ """
71
+ out: dict[str, dict] = {}
72
+ for iid in image_ids:
73
+ info = get_view_info(colmap_rec, iid)
74
+ if info is not None:
75
+ out[iid] = info
76
+ return out
77
+
78
+
79
+ def project_world_to_image(P: np.ndarray, points3d: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
80
+ """Project Nx3 world points through a 3x4 projection matrix.
81
+
82
+ Returns
83
+ -------
84
+ uv : (N, 2) float64 — pixel coordinates
85
+ z : (N,) float64 — camera-space depth (>0 means in front of the camera)
86
+ """
87
+ pts = np.asarray(points3d, dtype=np.float64)
88
+ if pts.ndim == 1:
89
+ pts = pts.reshape(1, 3)
90
+ homog = np.hstack([pts, np.ones((len(pts), 1))])
91
+ proj = homog @ P.T # (N, 3)
92
+ z = proj[:, 2]
93
+ safe = np.where(np.abs(z) < 1e-12, 1e-12, z)
94
+ uv = proj[:, :2] / safe[:, None]
95
+ return uv, z
96
+
97
+
98
+ def relative_pose(view_a: dict, view_b: dict) -> tuple[np.ndarray, np.ndarray]:
99
+ """Return rotation and translation from view_a's frame to view_b's frame.
100
+
101
+ If x_a is a point in view_a's camera frame, then
102
+ x_b = R_ab @ x_a + t_ab
103
+ with
104
+ R_ab = R_b @ R_a^T
105
+ t_ab = t_b - R_ab @ t_a
106
+ """
107
+ R_a, t_a = view_a["R"], view_a["t"]
108
+ R_b, t_b = view_b["R"], view_b["t"]
109
+ R_ab = R_b @ R_a.T
110
+ t_ab = t_b - R_ab @ t_a
111
+ return R_ab, t_ab
112
+
113
+
114
+ def _skew(v: np.ndarray) -> np.ndarray:
115
+ x, y, z = v
116
+ return np.array([[0, -z, y],
117
+ [z, 0, -x],
118
+ [-y, x, 0]], dtype=np.float64)
119
+
120
+
121
+ def fundamental_matrix(view_a: dict, view_b: dict) -> np.ndarray:
122
+ """Compute the fundamental matrix F_ab such that
123
+ x_b^T @ F_ab @ x_a = 0
124
+ for corresponding points (in homogeneous pixel coordinates).
125
+
126
+ Derivation: F = K_b^{-T} · [t_ab]× · R_ab · K_a^{-1}
127
+ """
128
+ R_ab, t_ab = relative_pose(view_a, view_b)
129
+ K_a_inv = np.linalg.inv(view_a["K"])
130
+ K_b_inv_T = np.linalg.inv(view_b["K"]).T
131
+ E = _skew(t_ab) @ R_ab # essential matrix
132
+ F = K_b_inv_T @ E @ K_a_inv
133
+ return F
134
+
135
+
136
+ def epipolar_line(F: np.ndarray, point_in_a: np.ndarray) -> np.ndarray:
137
+ """Epipolar line in view b induced by a point in view a.
138
+
139
+ Returns ``(a, b, c)`` with ``a*u + b*v + c = 0`` in view b.
140
+ """
141
+ x = np.array([point_in_a[0], point_in_a[1], 1.0], dtype=np.float64)
142
+ return F @ x
143
+
144
+
145
+ def point_to_line_distance(line: np.ndarray, point_uv: np.ndarray) -> float:
146
+ """Perpendicular distance from a 2D point to a homogeneous line (a,b,c)."""
147
+ a, b, c = line
148
+ num = abs(a * point_uv[0] + b * point_uv[1] + c)
149
+ den = np.sqrt(a * a + b * b) + 1e-12
150
+ return float(num / den)
151
+
152
+
153
+ def triangulate_dlt(Ps, pts2d) -> np.ndarray:
154
+ """Linear triangulation (DLT) from ``>=2`` views.
155
+
156
+ Parameters
157
+ ----------
158
+ Ps : sequence of (3,4) projection matrices
159
+ pts2d : sequence of (x, y) pixel coordinates, one per view
160
+
161
+ Returns the 3D point as a (3,) ndarray in world coordinates.
162
+ """
163
+ A = []
164
+ for P, (x, y) in zip(Ps, pts2d):
165
+ A.append(x * P[2] - P[0])
166
+ A.append(y * P[2] - P[1])
167
+ A = np.asarray(A, dtype=np.float64)
168
+ try:
169
+ _, _, Vt = np.linalg.svd(A)
170
+ except Exception:
171
+ return np.array([np.nan, np.nan, np.nan], dtype=np.float64)
172
+ X = Vt[-1]
173
+ if abs(X[3]) < 1e-12:
174
+ return np.array([np.nan, np.nan, np.nan], dtype=np.float64)
175
+ return X[:3] / X[3]
176
+
177
+
178
+ def mean_reprojection_error(X: np.ndarray, Ps, pts2d) -> float:
179
+ """Mean L2 reprojection error of ``X`` across multiple views.
180
+
181
+ Points behind the camera (depth <= 0) contribute a large penalty so the
182
+ caller can use this as a direct cost for track acceptance.
183
+ """
184
+ if np.any(~np.isfinite(X)):
185
+ return float("inf")
186
+ errs = []
187
+ for P, uv in zip(Ps, pts2d):
188
+ u, z = project_world_to_image(P, X.reshape(1, 3))
189
+ if z[0] <= 0:
190
+ return float("inf")
191
+ errs.append(float(np.linalg.norm(u[0] - np.asarray(uv, dtype=np.float64))))
192
+ if not errs:
193
+ return float("inf")
194
+ return float(np.mean(errs))
sklearn_submission.py CHANGED
@@ -81,8 +81,8 @@ LINE_EDGE_MATCH_RADIUS = 0.8
81
  # the dropped edges were mostly ghosts, not legitimate ones. The +0.4
82
  # edges/sample that bypass adds are net-negative on the metric.
83
  # Code path kept behind the flag for completeness.
84
- BYPASS_VALIDATE_FOR_TRACKS = False
85
- BYPASS_VALIDATE_FOR_LINES = False
86
 
87
  # v17: full winner Stage 1 + Stage 2 (DGCNN vertex refinement).
88
  # Stage 1: generate_vertex_candidates — gestalt blob → COLMAP centroid.
@@ -141,14 +141,8 @@ WINNER_MAX_DIST_TO_CLOUD = 8.0
141
  USE_DEPTH_EDGES = False
142
  DEPTH_EDGE_MATCH_RADIUS = 0.8
143
 
144
- # v14 post-hoc reranking — DISABLED.
145
- # 100-sample ablation: A v1 baseline 0.3426, B v1+rerank 0.3426 (parity),
146
- # C v2-RF 0.3409, D v2-RF+rerank 0.3407. Both line_support and
147
- # track_support are highly correlated with the existing gestalt_support
148
- # feature (all three are derived from the same gestalt edge masks),
149
- # so they add no complementary information. Code path kept behind
150
- # the flag for completeness.
151
- USE_RERANK = False
152
  RERANK_BOOST_LINE = 0.20
153
  RERANK_BOOST_TRACK = 0.10
154
 
 
81
  # the dropped edges were mostly ghosts, not legitimate ones. The +0.4
82
  # edges/sample that bypass adds are net-negative on the metric.
83
  # Code path kept behind the flag for completeness.
84
+ BYPASS_VALIDATE_FOR_TRACKS = True
85
+ BYPASS_VALIDATE_FOR_LINES = True
86
 
87
  # v17: full winner Stage 1 + Stage 2 (DGCNN vertex refinement).
88
  # Stage 1: generate_vertex_candidates — gestalt blob → COLMAP centroid.
 
141
  USE_DEPTH_EDGES = False
142
  DEPTH_EDGE_MATCH_RADIUS = 0.8
143
 
144
+ # v14 post-hoc reranking of sklearn probabilities using 3D line/track support.
145
+ USE_RERANK = True
 
 
 
 
 
 
146
  RERANK_BOOST_LINE = 0.20
147
  RERANK_BOOST_TRACK = 0.10
148