bugfixes, move setup.py to pyproject.toml
Browse files- hoho2025/example_solutions.py +121 -182
- hoho2025/metric_helper.py +2 -2
- pyproject.toml +30 -0
- setup.py +0 -37
hoho2025/example_solutions.py
CHANGED
|
@@ -8,7 +8,6 @@ import cv2
|
|
| 8 |
import numpy as np
|
| 9 |
import pycolmap
|
| 10 |
from PIL import Image as PImage
|
| 11 |
-
from scipy.spatial.distance import cdist
|
| 12 |
|
| 13 |
from hoho2025.color_mappings import ade20k_color_mapping, gestalt_color_mapping
|
| 14 |
|
|
@@ -19,10 +18,10 @@ def empty_solution():
|
|
| 19 |
|
| 20 |
|
| 21 |
def read_colmap_rec(colmap_data):
|
|
|
|
| 22 |
with tempfile.TemporaryDirectory() as tmpdir:
|
| 23 |
with zipfile.ZipFile(io.BytesIO(colmap_data), "r") as zf:
|
| 24 |
-
zf.extractall(tmpdir)
|
| 25 |
-
# Now parse with pycolmap
|
| 26 |
rec = pycolmap.Reconstruction(tmpdir)
|
| 27 |
return rec
|
| 28 |
|
|
@@ -58,14 +57,21 @@ def _colmap_project_point(img, cam, xyz):
|
|
| 58 |
return (u, v), p_cam[2]
|
| 59 |
|
| 60 |
def convert_entry_to_human_readable(entry):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
out = {}
|
| 62 |
for k, v in entry.items():
|
| 63 |
-
if 'colmap' in k and k!= 'pose_only_in_colmap':
|
| 64 |
out[k] = read_colmap_rec(v)
|
| 65 |
-
elif k in ['wf_vertices', 'wf_edges', 'K', 'R', 't'
|
| 66 |
out[k] = np.array(v)
|
| 67 |
else:
|
| 68 |
-
out[k]=v
|
| 69 |
out['__key__'] = entry['order_id']
|
| 70 |
return out
|
| 71 |
|
|
@@ -107,116 +113,79 @@ def point_to_segment_dist(pt, seg_p1, seg_p2):
|
|
| 107 |
|
| 108 |
|
| 109 |
def get_vertices_and_edges_from_segmentation(gest_seg_np, edge_th=25.0):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
"""
|
| 111 |
-
Identify apex and eave-end vertices, then detect lines for eave/ridge/rake/valley.
|
| 112 |
-
For each connected component, we do a line fit with cv2.fitLine, then measure
|
| 113 |
-
segment endpoints more robustly. We then associate apex points that are within
|
| 114 |
-
'edge_th' of the line segment. We record those apex–apex connections for edges
|
| 115 |
-
if at least 2 apexes lie near the same component line.
|
| 116 |
-
"""
|
| 117 |
-
#--------------------------------------------------------------------------------
|
| 118 |
-
# Step A: Collect apex and eave_end vertices
|
| 119 |
-
#--------------------------------------------------------------------------------
|
| 120 |
if not isinstance(gest_seg_np, np.ndarray):
|
| 121 |
gest_seg_np = np.array(gest_seg_np)
|
|
|
|
|
|
|
| 122 |
vertices = []
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
eave_end_color = np.array(gestalt_color_mapping['eave_end_point'])
|
| 136 |
-
eave_end_mask = cv2.inRange(gest_seg_np, eave_end_color-0.5, eave_end_color+0.5)
|
| 137 |
-
if eave_end_mask.sum() > 0:
|
| 138 |
-
output = cv2.connectedComponentsWithStats(eave_end_mask, 8, cv2.CV_32S)
|
| 139 |
-
(numLabels, labels, stats, centroids) = output
|
| 140 |
-
stats, centroids = stats[1:], centroids[1:]
|
| 141 |
-
for i in range(numLabels-1):
|
| 142 |
-
vert = {"xy": centroids[i], "type": "eave_end_point"}
|
| 143 |
-
vertices.append(vert)
|
| 144 |
-
|
| 145 |
-
# Consolidate apex points as array:
|
| 146 |
-
apex_pts = []
|
| 147 |
-
apex_idx_map = [] # keep track of index in 'vertices'
|
| 148 |
-
for idx, v in enumerate(vertices):
|
| 149 |
-
apex_pts.append(v['xy'])
|
| 150 |
-
apex_idx_map.append(idx)
|
| 151 |
-
apex_pts = np.array(apex_pts)
|
| 152 |
|
|
|
|
| 153 |
connections = []
|
| 154 |
edge_classes = ['eave', 'ridge', 'rake', 'valley']
|
| 155 |
for edge_class in edge_classes:
|
| 156 |
edge_color = np.array(gestalt_color_mapping[edge_class])
|
| 157 |
-
mask_raw = cv2.inRange(gest_seg_np, edge_color-0.5, edge_color+0.5)
|
| 158 |
-
#
|
| 159 |
-
|
| 160 |
-
mask = cv2.morphologyEx(mask_raw, cv2.MORPH_CLOSE, kernel)
|
| 161 |
if mask.sum() == 0:
|
| 162 |
continue
|
| 163 |
|
| 164 |
-
|
| 165 |
-
output = cv2.connectedComponentsWithStats(mask, 8, cv2.CV_32S)
|
| 166 |
-
(numLabels, labels, stats, centroids) = output
|
| 167 |
-
# skip the background
|
| 168 |
-
stats, centroids = stats[1:], centroids[1:]
|
| 169 |
-
label_indices = range(1, numLabels)
|
| 170 |
|
| 171 |
-
|
| 172 |
-
for lbl in label_indices:
|
| 173 |
ys, xs = np.where(labels == lbl)
|
| 174 |
if len(xs) < 2:
|
| 175 |
continue
|
| 176 |
-
|
|
|
|
| 177 |
pts_for_fit = np.column_stack([xs, ys]).astype(np.float32)
|
| 178 |
-
|
| 179 |
-
line_params = cv2.fitLine(pts_for_fit, distType=cv2.DIST_L2,
|
| 180 |
param=0, reps=0.01, aeps=0.01)
|
| 181 |
vx, vy, x0, y0 = line_params.ravel()
|
| 182 |
-
|
| 183 |
-
#
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
p2 = np.array([x0 + proj_max*vx, y0 + proj_max*vy])
|
| 190 |
-
|
| 191 |
-
#--------------------------------------------------------------------------------
|
| 192 |
-
# Step C: If apex points are within 'edge_th' of segment, they are connected
|
| 193 |
-
#--------------------------------------------------------------------------------
|
| 194 |
if len(apex_pts) < 2:
|
| 195 |
continue
|
| 196 |
|
| 197 |
-
#
|
| 198 |
-
dists = np.array([
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
])
|
| 202 |
-
|
| 203 |
-
# Indices of apex points that are near
|
| 204 |
-
near_mask = (dists <= edge_th)
|
| 205 |
-
near_indices = np.where(near_mask)[0]
|
| 206 |
if len(near_indices) < 2:
|
| 207 |
continue
|
| 208 |
|
| 209 |
-
# Connect each pair among these near apex points
|
| 210 |
for i in range(len(near_indices)):
|
| 211 |
-
for j in range(i+1, len(near_indices)):
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
vA = apex_idx_map[a_idx]
|
| 216 |
-
vB = apex_idx_map[b_idx]
|
| 217 |
-
# Store the connection using sorted indexing
|
| 218 |
-
conn = tuple(sorted((vA, vB)))
|
| 219 |
-
connections.append(conn)
|
| 220 |
|
| 221 |
return vertices, connections
|
| 222 |
|
|
@@ -254,49 +223,32 @@ def get_uv_depth(vertices: List[dict],
|
|
| 254 |
Depth value chosen for each vertex.
|
| 255 |
"""
|
| 256 |
|
| 257 |
-
# Collect each vertex's (x, y)
|
| 258 |
uv = np.array([vert['xy'] for vert in vertices], dtype=np.float32)
|
| 259 |
-
|
| 260 |
-
# Convert to integer pixel coordinates (round or floor)
|
| 261 |
uv_int = np.round(uv).astype(np.int32)
|
| 262 |
H, W = depth_fitted.shape[:2]
|
| 263 |
-
|
| 264 |
-
# Clip coordinates to stay within image bounds
|
| 265 |
uv_int[:, 0] = np.clip(uv_int[:, 0], 0, W - 1)
|
| 266 |
uv_int[:, 1] = np.clip(uv_int[:, 1], 0, H - 1)
|
| 267 |
-
|
| 268 |
-
# Prepare output array of depths
|
| 269 |
vertex_depth = np.zeros(len(vertices), dtype=np.float32)
|
| 270 |
dense_count = 0
|
| 271 |
-
|
| 272 |
for i, (x_i, y_i) in enumerate(uv_int):
|
| 273 |
-
# Local region in [x_i - search_radius, x_i + search_radius]
|
| 274 |
x0 = max(0, x_i - search_radius)
|
| 275 |
x1 = min(W, x_i + search_radius + 1)
|
| 276 |
y0 = max(0, y_i - search_radius)
|
| 277 |
y1 = min(H, y_i + search_radius + 1)
|
| 278 |
-
|
| 279 |
-
# Crop out the local window in sparse_depth
|
| 280 |
region = sparse_depth[y0:y1, x0:x1]
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
valid_mask = (region > 0)
|
| 284 |
-
valid_y, valid_x = np.where(valid_mask)
|
| 285 |
-
|
| 286 |
if valid_y.size > 0:
|
| 287 |
-
#
|
|
|
|
| 288 |
global_x = x0 + valid_x
|
| 289 |
global_y = y0 + valid_y
|
| 290 |
-
|
| 291 |
-
# Compute squared distance to center (x_i, y_i)
|
| 292 |
dist_sq = (global_x - x_i)**2 + (global_y - y_i)**2
|
| 293 |
-
|
| 294 |
-
# Find the nearest valid pixel
|
| 295 |
min_idx = np.argmin(dist_sq)
|
| 296 |
-
|
| 297 |
-
vertex_depth[i] = nearest_depth
|
| 298 |
else:
|
| 299 |
-
# Fallback to the dense depth
|
| 300 |
vertex_depth[i] = depth_fitted[y_i, x_i]
|
| 301 |
dense_count += 1
|
| 302 |
return uv, vertex_depth
|
|
@@ -408,48 +360,49 @@ def create_3d_wireframe_single_image(vertices: List[dict],
|
|
| 408 |
|
| 409 |
|
| 410 |
def merge_vertices_3d(vert_edge_per_image, th=0.5):
|
| 411 |
-
|
| 412 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 413 |
all_3d_vertices = []
|
| 414 |
connections_3d = []
|
| 415 |
-
all_indexes = []
|
| 416 |
cur_start = 0
|
| 417 |
types = []
|
| 418 |
-
|
| 419 |
-
# Combine vertices and update connection indices across all images
|
| 420 |
for cimg_idx, (vertices, connections, vertices_3d) in vert_edge_per_image.items():
|
| 421 |
-
types += [int(v['type']=='apex') for v in vertices]
|
| 422 |
all_3d_vertices.append(vertices_3d)
|
| 423 |
-
connections_3d+=[(x+cur_start,y+cur_start) for (x,y) in connections]
|
| 424 |
-
cur_start+=len(vertices_3d)
|
| 425 |
all_3d_vertices = np.concatenate(all_3d_vertices, axis=0)
|
| 426 |
-
|
| 427 |
-
|
| 428 |
-
distmat =
|
| 429 |
-
types = np.array(types)
|
| 430 |
-
|
| 431 |
-
|
| 432 |
-
|
| 433 |
-
mask_to_merge = (distmat <= th) & (same_types==0)
|
| 434 |
new_vertices = []
|
| 435 |
new_connections = []
|
| 436 |
-
|
| 437 |
-
#
|
|
|
|
|
|
|
| 438 |
to_merge = sorted(list(set([tuple(a.nonzero()[0].tolist()) for a in mask_to_merge])))
|
| 439 |
-
|
| 440 |
-
# Build groups of vertices to merge (transitive grouping)
|
| 441 |
to_merge_final = defaultdict(list)
|
| 442 |
for i in range(len(all_3d_vertices)):
|
| 443 |
for j in to_merge:
|
| 444 |
if i in j:
|
| 445 |
-
to_merge_final[i]+=j
|
| 446 |
-
|
| 447 |
-
# Remove duplicates in each group
|
| 448 |
for k, v in to_merge_final.items():
|
| 449 |
to_merge_final[k] = list(set(v))
|
| 450 |
-
|
| 451 |
-
|
| 452 |
-
already_there = set()
|
| 453 |
merged = []
|
| 454 |
for k, v in to_merge_final.items():
|
| 455 |
if k in already_there:
|
|
@@ -457,18 +410,14 @@ def merge_vertices_3d(vert_edge_per_image, th=0.5):
|
|
| 457 |
merged.append(v)
|
| 458 |
for vv in v:
|
| 459 |
already_there.add(vv)
|
| 460 |
-
|
| 461 |
-
# Calculate new vertex positions (average of merged groups)
|
| 462 |
old_idx_to_new = {}
|
| 463 |
-
count
|
| 464 |
-
for idxs in merged:
|
| 465 |
new_vertices.append(all_3d_vertices[idxs].mean(axis=0))
|
| 466 |
for idx in idxs:
|
| 467 |
old_idx_to_new[idx] = count
|
| 468 |
-
|
| 469 |
-
|
| 470 |
-
|
| 471 |
-
# Update connections to use new vertex indices
|
| 472 |
for conn in connections_3d:
|
| 473 |
new_con = sorted((old_idx_to_new[conn[0]], old_idx_to_new[conn[1]]))
|
| 474 |
if new_con[0] == new_con[1]:
|
|
@@ -608,17 +557,21 @@ def get_sparse_depth(colmap_rec, img_id_substring, depth):
|
|
| 608 |
|
| 609 |
|
| 610 |
def fit_scale_robust_median(depth, sparse_depth, validity_mask=None):
|
| 611 |
-
"""
|
| 612 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 613 |
"""
|
| 614 |
if validity_mask is None:
|
| 615 |
mask = (sparse_depth != 0)
|
| 616 |
else:
|
| 617 |
mask = (sparse_depth != 0) & validity_mask
|
| 618 |
-
mask = mask & (depth <50) & (sparse_depth <50)
|
| 619 |
X = depth[mask]
|
| 620 |
Y = sparse_depth[mask]
|
| 621 |
-
alpha =np.median(Y/X)
|
| 622 |
depth_fitted = alpha * depth
|
| 623 |
return alpha, depth_fitted
|
| 624 |
|
|
@@ -630,20 +583,14 @@ def get_fitted_dense_depth(depth, colmap_rec, img_id, ade20k_seg):
|
|
| 630 |
|
| 631 |
Parameters
|
| 632 |
----------
|
| 633 |
-
depth :
|
| 634 |
-
|
| 635 |
colmap_rec : pycolmap.Reconstruction
|
| 636 |
-
COLMAP reconstruction
|
| 637 |
img_id : str
|
| 638 |
-
|
| 639 |
-
K : np.ndarray
|
| 640 |
-
Camera intrinsic matrix (3x3).
|
| 641 |
-
R : np.ndarray
|
| 642 |
-
Camera rotation matrix (3x3).
|
| 643 |
-
t : np.ndarray
|
| 644 |
-
Camera translation vector (3,).
|
| 645 |
ade20k_seg : PIL.Image
|
| 646 |
-
ADE20k segmentation
|
| 647 |
|
| 648 |
Returns
|
| 649 |
-------
|
|
@@ -673,17 +620,18 @@ def get_fitted_dense_depth(depth, colmap_rec, img_id, ade20k_seg):
|
|
| 673 |
return depth_fitted, depth_sparse, True, col_img
|
| 674 |
|
| 675 |
|
| 676 |
-
def prune_too_far(all_3d_vertices, connections_3d, colmap_rec, th
|
| 677 |
-
"""
|
| 678 |
-
|
| 679 |
-
|
|
|
|
| 680 |
"""
|
| 681 |
-
xyz_sfm=[]
|
| 682 |
for k, v in colmap_rec.points3D.items():
|
| 683 |
xyz_sfm.append(v.xyz)
|
| 684 |
xyz_sfm = np.array(xyz_sfm)
|
| 685 |
-
|
| 686 |
-
mindist =
|
| 687 |
mask = mindist <= th
|
| 688 |
all_3d_vertices_new = all_3d_vertices[mask]
|
| 689 |
old_idx_survived = np.arange(len(all_3d_vertices))[mask]
|
|
@@ -699,24 +647,15 @@ def predict_wireframe(entry) -> Tuple[np.ndarray, List[int]]:
|
|
| 699 |
"""
|
| 700 |
good_entry = convert_entry_to_human_readable(entry)
|
| 701 |
vert_edge_per_image = {}
|
| 702 |
-
for i, (gest, depth,
|
| 703 |
-
good_entry['depth'],
|
| 704 |
-
good_entry['K'],
|
| 705 |
-
good_entry['R'],
|
| 706 |
-
good_entry['t'],
|
| 707 |
good_entry['image_ids'],
|
| 708 |
-
good_entry['ade']
|
| 709 |
)):
|
| 710 |
if 'colmap' in good_entry:
|
| 711 |
colmap_rec = good_entry['colmap']
|
| 712 |
else:
|
| 713 |
colmap_rec = good_entry['colmap_binary']
|
| 714 |
-
process_image = True
|
| 715 |
-
if 'pose_only_in_colmap' in good_entry:
|
| 716 |
-
process_image = good_entry['pose_only_in_colmap'][i]
|
| 717 |
-
K = np.array(K)
|
| 718 |
-
R = np.array(R)
|
| 719 |
-
t = np.array(t)
|
| 720 |
# Resize gestalt segmentation to match depth map size
|
| 721 |
depth_size = (np.array(depth).shape[1], np.array(depth).shape[0]) # W, H
|
| 722 |
gest_seg = gest.resize(depth_size)
|
|
|
|
| 8 |
import numpy as np
|
| 9 |
import pycolmap
|
| 10 |
from PIL import Image as PImage
|
|
|
|
| 11 |
|
| 12 |
from hoho2025.color_mappings import ade20k_color_mapping, gestalt_color_mapping
|
| 13 |
|
|
|
|
| 18 |
|
| 19 |
|
| 20 |
def read_colmap_rec(colmap_data):
|
| 21 |
+
"""Decode COLMAP reconstruction from the in-memory zip bytes stored in the dataset."""
|
| 22 |
with tempfile.TemporaryDirectory() as tmpdir:
|
| 23 |
with zipfile.ZipFile(io.BytesIO(colmap_data), "r") as zf:
|
| 24 |
+
zf.extractall(tmpdir)
|
|
|
|
| 25 |
rec = pycolmap.Reconstruction(tmpdir)
|
| 26 |
return rec
|
| 27 |
|
|
|
|
| 57 |
return (u, v), p_cam[2]
|
| 58 |
|
| 59 |
def convert_entry_to_human_readable(entry):
|
| 60 |
+
"""Decode raw dataset fields into usable Python objects.
|
| 61 |
+
|
| 62 |
+
COLMAP fields (zipped bytes) → pycolmap.Reconstruction.
|
| 63 |
+
Geometry fields (wf_vertices, wf_edges, K, R, t) → numpy arrays.
|
| 64 |
+
Note: K/R/t here are the dataset's BPO camera parameters, separate from
|
| 65 |
+
the camera model stored inside the COLMAP reconstruction.
|
| 66 |
+
"""
|
| 67 |
out = {}
|
| 68 |
for k, v in entry.items():
|
| 69 |
+
if 'colmap' in k and k != 'pose_only_in_colmap':
|
| 70 |
out[k] = read_colmap_rec(v)
|
| 71 |
+
elif k in ['wf_vertices', 'wf_edges', 'K', 'R', 't']:
|
| 72 |
out[k] = np.array(v)
|
| 73 |
else:
|
| 74 |
+
out[k] = v
|
| 75 |
out['__key__'] = entry['order_id']
|
| 76 |
return out
|
| 77 |
|
|
|
|
| 113 |
|
| 114 |
|
| 115 |
def get_vertices_and_edges_from_segmentation(gest_seg_np, edge_th=25.0):
|
| 116 |
+
"""Extract 2D wireframe vertices and edges from a gestalt segmentation map.
|
| 117 |
+
|
| 118 |
+
Gestalt marks vertex locations (apex, eave-end) as colored pixel blobs and
|
| 119 |
+
edge classes (eave, ridge, rake, valley) as thick painted strokes.
|
| 120 |
+
|
| 121 |
+
Vertex strategy: centroid of each connected blob → one vertex.
|
| 122 |
+
Edge strategy: fit a line to each connected stroke component, then connect
|
| 123 |
+
any two vertex blobs that both lie within `edge_th` pixels of that line.
|
| 124 |
+
We never use the fitted line's own endpoints as vertices.
|
| 125 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
if not isinstance(gest_seg_np, np.ndarray):
|
| 127 |
gest_seg_np = np.array(gest_seg_np)
|
| 128 |
+
|
| 129 |
+
# --- Collect vertices from blob centroids ---
|
| 130 |
vertices = []
|
| 131 |
+
for v_class, v_type in [('apex', 'apex'), ('eave_end_point', 'eave_end_point')]:
|
| 132 |
+
color = np.array(gestalt_color_mapping[v_class])
|
| 133 |
+
mask = cv2.inRange(gest_seg_np, color - 0.5, color + 0.5)
|
| 134 |
+
if mask.sum() == 0:
|
| 135 |
+
continue
|
| 136 |
+
_, _, _, centroids = cv2.connectedComponentsWithStats(mask, 8, cv2.CV_32S)
|
| 137 |
+
for centroid in centroids[1:]: # skip background label
|
| 138 |
+
vertices.append({"xy": centroid, "type": v_type})
|
| 139 |
+
|
| 140 |
+
# Flat list of all vertex positions for distance queries below.
|
| 141 |
+
apex_pts = np.array([v['xy'] for v in vertices])
|
| 142 |
+
apex_idx_map = list(range(len(vertices)))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 143 |
|
| 144 |
+
# --- Collect edges by fitting lines to stroke components ---
|
| 145 |
connections = []
|
| 146 |
edge_classes = ['eave', 'ridge', 'rake', 'valley']
|
| 147 |
for edge_class in edge_classes:
|
| 148 |
edge_color = np.array(gestalt_color_mapping[edge_class])
|
| 149 |
+
mask_raw = cv2.inRange(gest_seg_np, edge_color - 0.5, edge_color + 0.5)
|
| 150 |
+
# Morphological close bridges small gaps without over-merging distinct strokes.
|
| 151 |
+
mask = cv2.morphologyEx(mask_raw, cv2.MORPH_CLOSE, np.ones((5, 5), np.uint8))
|
|
|
|
| 152 |
if mask.sum() == 0:
|
| 153 |
continue
|
| 154 |
|
| 155 |
+
_, labels, _, _ = cv2.connectedComponentsWithStats(mask, 8, cv2.CV_32S)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
|
| 157 |
+
for lbl in range(1, labels.max() + 1):
|
|
|
|
| 158 |
ys, xs = np.where(labels == lbl)
|
| 159 |
if len(xs) < 2:
|
| 160 |
continue
|
| 161 |
+
|
| 162 |
+
# Fit a line to the stroke pixels: returns (vx, vy, x0, y0).
|
| 163 |
pts_for_fit = np.column_stack([xs, ys]).astype(np.float32)
|
| 164 |
+
line_params = cv2.fitLine(pts_for_fit, distType=cv2.DIST_L2,
|
|
|
|
| 165 |
param=0, reps=0.01, aeps=0.01)
|
| 166 |
vx, vy, x0, y0 = line_params.ravel()
|
| 167 |
+
|
| 168 |
+
# Recover the segment extent by projecting all stroke pixels onto the line
|
| 169 |
+
# and taking the extremes.
|
| 170 |
+
proj = (xs - x0) * vx + (ys - y0) * vy
|
| 171 |
+
p1 = np.array([x0 + proj.min() * vx, y0 + proj.min() * vy])
|
| 172 |
+
p2 = np.array([x0 + proj.max() * vx, y0 + proj.max() * vy])
|
| 173 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
if len(apex_pts) < 2:
|
| 175 |
continue
|
| 176 |
|
| 177 |
+
# Connect vertex blobs that lie close to this fitted segment.
|
| 178 |
+
dists = np.array([point_to_segment_dist(apex_pts[i], p1, p2)
|
| 179 |
+
for i in range(len(apex_pts))])
|
| 180 |
+
near_indices = np.where(dists <= edge_th)[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
if len(near_indices) < 2:
|
| 182 |
continue
|
| 183 |
|
|
|
|
| 184 |
for i in range(len(near_indices)):
|
| 185 |
+
for j in range(i + 1, len(near_indices)):
|
| 186 |
+
vA = apex_idx_map[near_indices[i]]
|
| 187 |
+
vB = apex_idx_map[near_indices[j]]
|
| 188 |
+
connections.append(tuple(sorted((vA, vB))))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 189 |
|
| 190 |
return vertices, connections
|
| 191 |
|
|
|
|
| 223 |
Depth value chosen for each vertex.
|
| 224 |
"""
|
| 225 |
|
|
|
|
| 226 |
uv = np.array([vert['xy'] for vert in vertices], dtype=np.float32)
|
|
|
|
|
|
|
| 227 |
uv_int = np.round(uv).astype(np.int32)
|
| 228 |
H, W = depth_fitted.shape[:2]
|
|
|
|
|
|
|
| 229 |
uv_int[:, 0] = np.clip(uv_int[:, 0], 0, W - 1)
|
| 230 |
uv_int[:, 1] = np.clip(uv_int[:, 1], 0, H - 1)
|
| 231 |
+
|
|
|
|
| 232 |
vertex_depth = np.zeros(len(vertices), dtype=np.float32)
|
| 233 |
dense_count = 0
|
| 234 |
+
|
| 235 |
for i, (x_i, y_i) in enumerate(uv_int):
|
|
|
|
| 236 |
x0 = max(0, x_i - search_radius)
|
| 237 |
x1 = min(W, x_i + search_radius + 1)
|
| 238 |
y0 = max(0, y_i - search_radius)
|
| 239 |
y1 = min(H, y_i + search_radius + 1)
|
|
|
|
|
|
|
| 240 |
region = sparse_depth[y0:y1, x0:x1]
|
| 241 |
+
valid_y, valid_x = np.where(region > 0)
|
| 242 |
+
|
|
|
|
|
|
|
|
|
|
| 243 |
if valid_y.size > 0:
|
| 244 |
+
# Prefer the COLMAP sparse point nearest to the vertex over dense depth,
|
| 245 |
+
# since COLMAP depth is closer to GT metric while the dense map scale might be wrong.
|
| 246 |
global_x = x0 + valid_x
|
| 247 |
global_y = y0 + valid_y
|
|
|
|
|
|
|
| 248 |
dist_sq = (global_x - x_i)**2 + (global_y - y_i)**2
|
|
|
|
|
|
|
| 249 |
min_idx = np.argmin(dist_sq)
|
| 250 |
+
vertex_depth[i] = region[valid_y[min_idx], valid_x[min_idx]]
|
|
|
|
| 251 |
else:
|
|
|
|
| 252 |
vertex_depth[i] = depth_fitted[y_i, x_i]
|
| 253 |
dense_count += 1
|
| 254 |
return uv, vertex_depth
|
|
|
|
| 360 |
|
| 361 |
|
| 362 |
def merge_vertices_3d(vert_edge_per_image, th=0.5):
|
| 363 |
+
"""Merge vertices across all views into a single consistent 3D vertex set.
|
| 364 |
+
|
| 365 |
+
The same physical corner is independently lifted to 3D from each image that
|
| 366 |
+
sees it, producing near-duplicate vertices. We merge any two vertices of the
|
| 367 |
+
same type whose 3D distance is within `th` metres.
|
| 368 |
+
|
| 369 |
+
Grouping is transitive: if A~B and B~C then all three collapse to one vertex
|
| 370 |
+
at their mean position.
|
| 371 |
+
"""
|
| 372 |
all_3d_vertices = []
|
| 373 |
connections_3d = []
|
|
|
|
| 374 |
cur_start = 0
|
| 375 |
types = []
|
| 376 |
+
|
|
|
|
| 377 |
for cimg_idx, (vertices, connections, vertices_3d) in vert_edge_per_image.items():
|
| 378 |
+
types += [int(v['type'] == 'apex') for v in vertices]
|
| 379 |
all_3d_vertices.append(vertices_3d)
|
| 380 |
+
connections_3d += [(x + cur_start, y + cur_start) for (x, y) in connections]
|
| 381 |
+
cur_start += len(vertices_3d)
|
| 382 |
all_3d_vertices = np.concatenate(all_3d_vertices, axis=0)
|
| 383 |
+
|
| 384 |
+
diff = all_3d_vertices[:, None, :] - all_3d_vertices[None, :, :]
|
| 385 |
+
distmat = np.sqrt((diff ** 2).sum(axis=-1))
|
| 386 |
+
types = np.array(types)
|
| 387 |
+
same_types_mask = (types[:, None] == types[None, :])
|
| 388 |
+
mask_to_merge = (distmat <= th) & same_types_mask
|
| 389 |
+
|
|
|
|
| 390 |
new_vertices = []
|
| 391 |
new_connections = []
|
| 392 |
+
|
| 393 |
+
# Each row of mask_to_merge gives the set of vertices close to vertex i.
|
| 394 |
+
# Collect unique such sets, then union-find style: every vertex collects all
|
| 395 |
+
# other vertices it co-occurs with in any group.
|
| 396 |
to_merge = sorted(list(set([tuple(a.nonzero()[0].tolist()) for a in mask_to_merge])))
|
|
|
|
|
|
|
| 397 |
to_merge_final = defaultdict(list)
|
| 398 |
for i in range(len(all_3d_vertices)):
|
| 399 |
for j in to_merge:
|
| 400 |
if i in j:
|
| 401 |
+
to_merge_final[i] += j
|
|
|
|
|
|
|
| 402 |
for k, v in to_merge_final.items():
|
| 403 |
to_merge_final[k] = list(set(v))
|
| 404 |
+
|
| 405 |
+
already_there = set()
|
|
|
|
| 406 |
merged = []
|
| 407 |
for k, v in to_merge_final.items():
|
| 408 |
if k in already_there:
|
|
|
|
| 410 |
merged.append(v)
|
| 411 |
for vv in v:
|
| 412 |
already_there.add(vv)
|
| 413 |
+
|
|
|
|
| 414 |
old_idx_to_new = {}
|
| 415 |
+
for count, idxs in enumerate(merged):
|
|
|
|
| 416 |
new_vertices.append(all_3d_vertices[idxs].mean(axis=0))
|
| 417 |
for idx in idxs:
|
| 418 |
old_idx_to_new[idx] = count
|
| 419 |
+
new_vertices = np.array(new_vertices)
|
| 420 |
+
|
|
|
|
|
|
|
| 421 |
for conn in connections_3d:
|
| 422 |
new_con = sorted((old_idx_to_new[conn[0]], old_idx_to_new[conn[1]]))
|
| 423 |
if new_con[0] == new_con[1]:
|
|
|
|
| 557 |
|
| 558 |
|
| 559 |
def fit_scale_robust_median(depth, sparse_depth, validity_mask=None):
|
| 560 |
+
"""Recover the absolute scale of a monocular depth map using COLMAP sparse points.
|
| 561 |
+
|
| 562 |
+
Monocular depth is scale-ambiguous; COLMAP gives metric depth at sparse locations.
|
| 563 |
+
We estimate a single scale factor alpha = median(sparse / dense) over pixels where
|
| 564 |
+
both are available. The 50 m cap excludes sky / background points that would skew
|
| 565 |
+
the ratio toward large values.
|
| 566 |
"""
|
| 567 |
if validity_mask is None:
|
| 568 |
mask = (sparse_depth != 0)
|
| 569 |
else:
|
| 570 |
mask = (sparse_depth != 0) & validity_mask
|
| 571 |
+
mask = mask & (depth < 50) & (sparse_depth < 50)
|
| 572 |
X = depth[mask]
|
| 573 |
Y = sparse_depth[mask]
|
| 574 |
+
alpha = np.median(Y / X)
|
| 575 |
depth_fitted = alpha * depth
|
| 576 |
return alpha, depth_fitted
|
| 577 |
|
|
|
|
| 583 |
|
| 584 |
Parameters
|
| 585 |
----------
|
| 586 |
+
depth : PIL.Image
|
| 587 |
+
Dense monocular depth map (pixel values in mm).
|
| 588 |
colmap_rec : pycolmap.Reconstruction
|
| 589 |
+
COLMAP reconstruction used to obtain metric sparse depth.
|
| 590 |
img_id : str
|
| 591 |
+
Substring matched against COLMAP image names to locate the right camera.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 592 |
ade20k_seg : PIL.Image
|
| 593 |
+
ADE20k segmentation used to restrict scale fitting to the building region.
|
| 594 |
|
| 595 |
Returns
|
| 596 |
-------
|
|
|
|
| 620 |
return depth_fitted, depth_sparse, True, col_img
|
| 621 |
|
| 622 |
|
| 623 |
+
def prune_too_far(all_3d_vertices, connections_3d, colmap_rec, th=3.0):
|
| 624 |
+
"""Remove vertices that have no SfM support within `th` metres.
|
| 625 |
+
|
| 626 |
+
Vertices lifted from noisy monocular depth with no nearby COLMAP point are
|
| 627 |
+
likely hallucinations; discarding them improves geometric accuracy.
|
| 628 |
"""
|
| 629 |
+
xyz_sfm = []
|
| 630 |
for k, v in colmap_rec.points3D.items():
|
| 631 |
xyz_sfm.append(v.xyz)
|
| 632 |
xyz_sfm = np.array(xyz_sfm)
|
| 633 |
+
diff = all_3d_vertices[:, None, :] - xyz_sfm[None, :, :]
|
| 634 |
+
mindist = np.sqrt((diff ** 2).sum(axis=-1)).min(axis=1)
|
| 635 |
mask = mindist <= th
|
| 636 |
all_3d_vertices_new = all_3d_vertices[mask]
|
| 637 |
old_idx_survived = np.arange(len(all_3d_vertices))[mask]
|
|
|
|
| 647 |
"""
|
| 648 |
good_entry = convert_entry_to_human_readable(entry)
|
| 649 |
vert_edge_per_image = {}
|
| 650 |
+
for i, (gest, depth, img_id, ade_seg) in enumerate(zip(good_entry['gestalt'],
|
| 651 |
+
good_entry['depth'],
|
|
|
|
|
|
|
|
|
|
| 652 |
good_entry['image_ids'],
|
| 653 |
+
good_entry['ade']
|
| 654 |
)):
|
| 655 |
if 'colmap' in good_entry:
|
| 656 |
colmap_rec = good_entry['colmap']
|
| 657 |
else:
|
| 658 |
colmap_rec = good_entry['colmap_binary']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 659 |
# Resize gestalt segmentation to match depth map size
|
| 660 |
depth_size = (np.array(depth).shape[1], np.array(depth).shape[0]) # W, H
|
| 661 |
gest_seg = gest.resize(depth_size)
|
hoho2025/metric_helper.py
CHANGED
|
@@ -1,5 +1,4 @@
|
|
| 1 |
import numpy as np
|
| 2 |
-
from scipy.spatial.distance import cdist
|
| 3 |
from scipy.optimize import linear_sum_assignment
|
| 4 |
import torch
|
| 5 |
import trimesh
|
|
@@ -135,7 +134,8 @@ def compute_ap_metrics(pd_vertices, gt_vertices, thresh=25):
|
|
| 135 |
if len(pd_vertices) == 0 or len(gt_vertices) == 0:
|
| 136 |
return 0.0
|
| 137 |
|
| 138 |
-
|
|
|
|
| 139 |
row_ind, col_ind = linear_sum_assignment(dists)
|
| 140 |
|
| 141 |
tp = (dists[row_ind, col_ind] <= thresh).sum()
|
|
|
|
| 1 |
import numpy as np
|
|
|
|
| 2 |
from scipy.optimize import linear_sum_assignment
|
| 3 |
import torch
|
| 4 |
import trimesh
|
|
|
|
| 134 |
if len(pd_vertices) == 0 or len(gt_vertices) == 0:
|
| 135 |
return 0.0
|
| 136 |
|
| 137 |
+
diff = np.asarray(pd_vertices)[:, None, :] - np.asarray(gt_vertices)[None, :, :]
|
| 138 |
+
dists = np.sqrt((diff ** 2).sum(axis=-1))
|
| 139 |
row_ind, col_ind = linear_sum_assignment(dists)
|
| 140 |
|
| 141 |
tp = (dists[row_ind, col_ind] <= thresh).sum()
|
pyproject.toml
CHANGED
|
@@ -1,3 +1,33 @@
|
|
| 1 |
[build-system]
|
| 2 |
requires = ["setuptools>=42", "wheel"]
|
| 3 |
build-backend = "setuptools.build_meta"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
[build-system]
|
| 2 |
requires = ["setuptools>=42", "wheel"]
|
| 3 |
build-backend = "setuptools.build_meta"
|
| 4 |
+
|
| 5 |
+
[project]
|
| 6 |
+
name = "hoho2025"
|
| 7 |
+
version = "0.2.2"
|
| 8 |
+
description = "Tools and utilities for the HoHo Dataset and S23DR Competition"
|
| 9 |
+
readme = "README.md"
|
| 10 |
+
requires-python = ">=3.10"
|
| 11 |
+
authors = [
|
| 12 |
+
{ name = "Jack Langerman, Dmytro Mishkin, S23DR Organizing Team", email = "hoho@jackml.com" },
|
| 13 |
+
]
|
| 14 |
+
urls = { homepage = "https://github.com/s23dr/hoho2025" }
|
| 15 |
+
dependencies = [
|
| 16 |
+
"datasets",
|
| 17 |
+
"huggingface-hub",
|
| 18 |
+
"ipywidgets",
|
| 19 |
+
"matplotlib",
|
| 20 |
+
"numpy",
|
| 21 |
+
"opencv-python",
|
| 22 |
+
"Pillow",
|
| 23 |
+
"plotly",
|
| 24 |
+
"pycolmap>=0.6",
|
| 25 |
+
"scipy",
|
| 26 |
+
"torch",
|
| 27 |
+
"trimesh",
|
| 28 |
+
"webdataset",
|
| 29 |
+
"manifold3d",
|
| 30 |
+
]
|
| 31 |
+
|
| 32 |
+
[tool.setuptools.packages.find]
|
| 33 |
+
where = ["."]
|
setup.py
DELETED
|
@@ -1,37 +0,0 @@
|
|
| 1 |
-
from setuptools import setup, find_packages
|
| 2 |
-
import glob
|
| 3 |
-
import os
|
| 4 |
-
|
| 5 |
-
# Try to read from requirements.txt, but have fallback
|
| 6 |
-
try:
|
| 7 |
-
here = os.path.abspath(os.path.dirname(__file__))
|
| 8 |
-
with open(os.path.join(here, 'requirements.txt')) as f:
|
| 9 |
-
required = f.read().splitlines()
|
| 10 |
-
except FileNotFoundError:
|
| 11 |
-
# Fallback to hardcoded dependencies
|
| 12 |
-
required = [
|
| 13 |
-
'datasets',
|
| 14 |
-
'huggingface-hub',
|
| 15 |
-
'ipywidgets',
|
| 16 |
-
'matplotlib',
|
| 17 |
-
'numpy',
|
| 18 |
-
'opencv-python',
|
| 19 |
-
'Pillow',
|
| 20 |
-
'plotly',
|
| 21 |
-
'pycolmap',
|
| 22 |
-
'scipy',
|
| 23 |
-
'torch',
|
| 24 |
-
'trimesh',
|
| 25 |
-
'webdataset==0.2.111',
|
| 26 |
-
]
|
| 27 |
-
|
| 28 |
-
setup(name='hoho2025',
|
| 29 |
-
version='0.2.1',
|
| 30 |
-
description='Tools and utilites for the HoHo Dataset and S23DR Competition',
|
| 31 |
-
url='https://github.com/s23dr/hoho2025',
|
| 32 |
-
author='Jack Langerman, Dmytro Mishkin, S23DR Orgainizing Team',
|
| 33 |
-
author_email='hoho@jackml.com',
|
| 34 |
-
install_requires=required,
|
| 35 |
-
packages=find_packages(),
|
| 36 |
-
python_requires='>=3.10',
|
| 37 |
-
include_package_data=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|