subm / depth_edges.py
Neritz's picture
Add handcrafted_submission_2026 contents (model-repo form for S23DR2026 submission)
31f43c9 verified
"""Depth-discontinuity edge source.
Independent from the gestalt segmentation: extracts 2D line segments
along sharp depth jumps inside the house silhouette, lifts them to 3D
via the affine-fitted depth map, then merges across views.
Pipeline:
1. Affine-fit COLMAP-calibrated depth (same as the rest of the pipeline).
2. Inside the eroded ADE20k house mask, run Canny on normalised depth.
3. Connected components → fit 2D line per component.
4. Sample N depth values along each 2D segment, unproject to 3D.
5. RANSAC-fit a 3D line through the unprojected samples.
6. Merge lines across views (direction + midpoint proximity).
The merged 3D lines have endpoints (p1, p2) suitable for the same
'edges-only lift onto merged_v' integration that v11 does for gestalt
line cloud. Since gestalt and depth-discontinuity sources are independent,
their lifts should be additive.
Entry point:
extract_depth_3d_lines(entry) -> list[Line3D]
"""
from __future__ import annotations
import numpy as np
import cv2
from hoho2025.example_solutions import (
convert_entry_to_human_readable,
get_sparse_depth, get_house_mask,
)
try:
from line_cloud import Line3D, _fit_3d_line_ransac, _unproject_pixel, merge_3d_lines
from mvs_utils import collect_views
from sklearn_submission import fit_affine_ransac
except ImportError:
from submission.line_cloud import Line3D, _fit_3d_line_ransac, _unproject_pixel, merge_3d_lines
from submission.mvs_utils import collect_views
from submission.sklearn_submission import fit_affine_ransac
def _detect_depth_segments_2d(
depth_fitted: np.ndarray,
house_mask: np.ndarray,
canny_lo: int = 30,
canny_hi: int = 80,
erode_px: int = 9,
min_area_px: int = 20,
min_seglen_px: int = 25,
):
"""Return list of (xs, ys, p1, p2) for each detected 2D line segment."""
if depth_fitted.size == 0:
return []
H, W = depth_fitted.shape[:2]
eroded = cv2.erode(
house_mask.astype(np.uint8),
np.ones((erode_px, erode_px), np.uint8),
).astype(bool)
if eroded.sum() < 100:
return []
# Normalise depth inside the eroded house mask to [0, 255]
d_in = depth_fitted.copy()
in_d = d_in[eroded]
if in_d.size == 0:
return []
d_min, d_max = float(in_d.min()), float(in_d.max())
if d_max - d_min < 0.5:
return []
d_norm = np.clip((d_in - d_min) / (d_max - d_min), 0.0, 1.0)
d_u8 = (d_norm * 255).astype(np.uint8)
d_u8 = cv2.GaussianBlur(d_u8, (5, 5), 0)
canny = cv2.Canny(d_u8, canny_lo, canny_hi)
canny[~eroded] = 0
if canny.sum() == 0:
return []
n_lbl, lbl, stats, _ = cv2.connectedComponentsWithStats(canny, 8)
out = []
for i in range(1, n_lbl):
area = int(stats[i, cv2.CC_STAT_AREA])
if area < min_area_px:
continue
ys, xs = np.where(lbl == i)
if len(xs) < 3:
continue
pts = np.column_stack([xs, ys]).astype(np.float32)
line = cv2.fitLine(pts, cv2.DIST_L2, 0, 0.01, 0.01)
vx, vy, x0, y0 = line.ravel()
proj = (xs - x0) * vx + (ys - y0) * vy
t_min, t_max = float(proj.min()), float(proj.max())
seglen = t_max - t_min
if seglen < min_seglen_px:
continue
p1 = np.array([x0 + t_min * vx, y0 + t_min * vy])
p2 = np.array([x0 + t_max * vx, y0 + t_max * vy])
out.append((xs, ys, p1, p2, (vx, vy, x0, y0, t_min, t_max)))
return out
def extract_depth_3d_lines_single_view(
depth_fitted: np.ndarray,
house_mask: np.ndarray,
view_info: dict,
n_samples: int = 30,
) -> list[Line3D]:
"""Extract 3D lines from depth discontinuities in a single view."""
H, W = depth_fitted.shape[:2]
K = view_info['K']
R = view_info['R']
t = view_info['t']
K_inv = np.linalg.inv(K)
R_inv = R.T
cam_center = -R_inv @ t
segments = _detect_depth_segments_2d(depth_fitted, house_mask)
out: list[Line3D] = []
view_id = view_info['image_id']
for _, _, _, _, params in segments:
vx, vy, x0, y0, t_min, t_max = params
ts = np.linspace(t_min, t_max, n_samples)
pts3d_list = []
for tv in ts:
u = x0 + tv * vx
v_px = y0 + tv * vy
ui, vi = int(round(u)), int(round(v_px))
if 0 <= ui < W and 0 <= vi < H:
d = depth_fitted[vi, ui]
p = _unproject_pixel(u, v_px, d, K_inv, R_inv, cam_center)
if p is not None:
pts3d_list.append(p)
if len(pts3d_list) < 5:
continue
pts3d = np.array(pts3d_list, dtype=np.float64)
result = _fit_3d_line_ransac(pts3d, n_iter=50, inlier_th=0.3, min_inliers=5)
if result is None:
continue
centroid, direction, inlier_pts = result
s = (inlier_pts - centroid) @ direction
p1 = centroid + float(s.min()) * direction
p2 = centroid + float(s.max()) * direction
length = float(np.linalg.norm(p2 - p1))
if length < 0.4:
continue
out.append(Line3D(
point=centroid,
direction=direction,
p1=p1, p2=p2,
length=length,
n_inliers=len(inlier_pts),
edge_class='depth_discontinuity',
view_id=view_id,
))
return out
def extract_depth_3d_lines(entry) -> tuple[list[Line3D], dict]:
"""Extract depth-discontinuity 3D lines from all views.
Returns (all_lines, good_entry).
"""
good = convert_entry_to_human_readable(entry)
colmap_rec = good.get('colmap') or good.get('colmap_binary')
if colmap_rec is None:
return [], good
views = collect_views(colmap_rec, good['image_ids'])
all_lines: list[Line3D] = []
for gest, depth, img_id, ade_seg in zip(
good['gestalt'], good['depth'], good['image_ids'], good['ade']
):
info = views.get(img_id)
if info is None:
continue
depth_np = np.array(depth).astype(np.float64) / 1000.0
H, W = depth_np.shape[:2]
# Affine fit (same as main pipeline)
try:
depth_sparse, found, _, _ = get_sparse_depth(colmap_rec, img_id, depth_np)
if found:
_, _, depth_np = fit_affine_ransac(
depth_np, depth_sparse, get_house_mask(ade_seg),
)
except Exception:
pass
try:
house = get_house_mask(ade_seg)
house_resized = cv2.resize(
house.astype(np.uint8), (W, H), interpolation=cv2.INTER_NEAREST,
) > 0
except Exception:
continue
view_lines = extract_depth_3d_lines_single_view(
depth_np, house_resized, info,
)
all_lines.extend(view_lines)
return all_lines, good
def extract_and_merge_depth_lines(entry) -> list[Line3D]:
"""Convenience: extract + merge across views."""
lines, _ = extract_depth_3d_lines(entry)
if not lines:
return []
return merge_3d_lines(lines)