Spaces:
Running on T4
Running on T4
GitHub Actions commited on
Commit ·
b20c82e
1
Parent(s): 6da2618
Deploy from GitHub commit ca72656c17476e5aa37a4735af6e47ff9f94fa1a
Browse files- app.py +315 -7
- golden_render.py +225 -0
- verify_goldens.py +93 -0
- verify_n1_parity.sh +74 -0
- verify_r1_metric.py +124 -0
- verify_r1_plane_sim.py +126 -0
- verify_r1_scale.py +152 -0
- verify_r1_scale_sim.py +105 -0
- visualizer.gpu.toml +1 -1
- visualizer.hf.toml +1 -1
- visualizer.local.toml +1 -1
- visualizer.segformer.toml +1 -1
app.py
CHANGED
|
@@ -112,8 +112,17 @@ DEPTH_MODEL_NAME = str(config_value(
|
|
| 112 |
"DEPTH_MODEL_NAME",
|
| 113 |
"models",
|
| 114 |
"depth_model_name",
|
| 115 |
-
|
|
|
|
|
|
|
| 116 |
))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
ENABLE_DEPTH_ESTIMATION = str(config_value(
|
| 118 |
"ENABLE_DEPTH_ESTIMATION",
|
| 119 |
"runtime",
|
|
@@ -301,6 +310,18 @@ def estimate_depth(img: Image.Image, width: int, height: int):
|
|
| 301 |
depth_min, depth_max = float(np.min(depth)), float(np.max(depth))
|
| 302 |
if depth_max - depth_min < 1e-6:
|
| 303 |
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 304 |
return (depth - depth_min) / (depth_max - depth_min)
|
| 305 |
except Exception as exc:
|
| 306 |
print(f"Depth estimation skipped ({exc}).", flush=True)
|
|
@@ -811,7 +832,144 @@ def detect_dual_vanishing_points(
|
|
| 811 |
return primary, secondary
|
| 812 |
|
| 813 |
|
| 814 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 815 |
ys, xs = np.where(mask > 0)
|
| 816 |
if len(xs) < 1000:
|
| 817 |
return None, None
|
|
@@ -891,7 +1049,9 @@ def estimate_floor_plane(mask: np.ndarray, img_np: np.ndarray):
|
|
| 891 |
vp_y = float(vanishing_point["y"])
|
| 892 |
else:
|
| 893 |
# No usable VP: assume an eye-level shot with the horizon a little
|
| 894 |
-
# above the floor's top edge, centred over the bottom edge.
|
|
|
|
|
|
|
| 895 |
vp_x = (bl_x + br_x) * 0.5
|
| 896 |
vp_y = top_y_f - 0.35 * span_y
|
| 897 |
# Keep the horizon clear of the top edge so the transform stays
|
|
@@ -923,6 +1083,27 @@ def estimate_floor_plane(mask: np.ndarray, img_np: np.ndarray):
|
|
| 923 |
H = cv2.getPerspectiveTransform(src, dst)
|
| 924 |
homography = H.flatten().tolist()
|
| 925 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 926 |
default_rotation = estimate_default_rotation(mask, H, vanishing_point2)
|
| 927 |
|
| 928 |
return homography, {
|
|
@@ -938,6 +1119,110 @@ def estimate_floor_plane(mask: np.ndarray, img_np: np.ndarray):
|
|
| 938 |
}
|
| 939 |
|
| 940 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 941 |
# ---------------------------------------------------------------------------
|
| 942 |
# P1-4 — Default Tile Rotation
|
| 943 |
# Tiles were laid along the plane's x-axis (= the image bbox axis), which is
|
|
@@ -1022,8 +1307,12 @@ def estimate_default_rotation(
|
|
| 1022 |
[cx + (vanishing_point2["x"] - cx) * 0.25, cy + (vanishing_point2["y"] - cy) * 0.25],
|
| 1023 |
])
|
| 1024 |
angle = _plane_angle(toward)
|
| 1025 |
-
|
| 1026 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1027 |
|
| 1028 |
return 0.0
|
| 1029 |
|
|
@@ -1112,7 +1401,9 @@ def build_floor_surface_mask(
|
|
| 1112 |
if depth is not None and floor_mask.any():
|
| 1113 |
floor_depth = depth[floor_mask > 0]
|
| 1114 |
lo, hi = float(np.percentile(floor_depth, 2)), float(np.percentile(floor_depth, 98))
|
| 1115 |
-
|
|
|
|
|
|
|
| 1116 |
depth_keep = (depth >= lo - margin) & (depth <= hi + margin)
|
| 1117 |
surface = (surface & depth_keep.astype(np.uint8)).astype(np.uint8)
|
| 1118 |
surface[floor_mask > 0] = np.maximum(surface[floor_mask > 0], 1)
|
|
@@ -1336,7 +1627,11 @@ def build_segmentation_bundle(contents: bytes):
|
|
| 1336 |
print(f"[TIMING] Depth estimation took {time.perf_counter() - t0:.3f} seconds", flush=True)
|
| 1337 |
|
| 1338 |
t0 = time.perf_counter()
|
| 1339 |
-
homography, plane = estimate_floor_plane(floor_mask, img_np)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1340 |
print(f"[TIMING] Plane fitting / homography calculation took {time.perf_counter() - t0:.3f} seconds", flush=True)
|
| 1341 |
|
| 1342 |
t0 = time.perf_counter()
|
|
@@ -1408,6 +1703,19 @@ def build_segmentation_bundle(contents: bytes):
|
|
| 1408 |
"depthEnabled": depth is not None,
|
| 1409 |
"shadingEnabled": shade_map is not None,
|
| 1410 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1411 |
})
|
| 1412 |
|
| 1413 |
if not segments:
|
|
|
|
| 112 |
"DEPTH_MODEL_NAME",
|
| 113 |
"models",
|
| 114 |
"depth_model_name",
|
| 115 |
+
# R1-1 — metric indoor checkpoint: predicts depth in metres (and is ~10x
|
| 116 |
+
# smaller than dpt-large). Foundation for R1-2 plane fit and R1-3 scale.
|
| 117 |
+
"depth-anything/Depth-Anything-V2-Metric-Indoor-Small-hf",
|
| 118 |
))
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
def depth_model_is_metric(name: str | None = None) -> bool:
|
| 122 |
+
"""Metric checkpoints predict metres; relative ones predict unitless
|
| 123 |
+
inverse depth that we min-max normalise. Convention: metric model ids
|
| 124 |
+
carry 'metric' in the name (Depth Anything V2 Metric, Metric3D...)."""
|
| 125 |
+
return "metric" in (name or DEPTH_MODEL_NAME).lower()
|
| 126 |
ENABLE_DEPTH_ESTIMATION = str(config_value(
|
| 127 |
"ENABLE_DEPTH_ESTIMATION",
|
| 128 |
"runtime",
|
|
|
|
| 310 |
depth_min, depth_max = float(np.min(depth)), float(np.max(depth))
|
| 311 |
if depth_max - depth_min < 1e-6:
|
| 312 |
return None
|
| 313 |
+
if depth_model_is_metric(model_name):
|
| 314 |
+
# R1-1 — keep the metres. Downstream filters are scale-agnostic
|
| 315 |
+
# (percentile bands), and R1-2/R1-3 need real geometry. Gate on
|
| 316 |
+
# plausibility: an indoor scene lives within ~0.1-30 m; anything
|
| 317 |
+
# else means the checkpoint or processor mis-loaded.
|
| 318 |
+
if depth_min < -0.5 or depth_max > 50.0 or depth_max < 0.2:
|
| 319 |
+
print(
|
| 320 |
+
f"Metric depth implausible [{depth_min:.2f}, {depth_max:.2f}] m — skipped.",
|
| 321 |
+
flush=True,
|
| 322 |
+
)
|
| 323 |
+
return None
|
| 324 |
+
return np.clip(depth, 0.05, 40.0)
|
| 325 |
return (depth - depth_min) / (depth_max - depth_min)
|
| 326 |
except Exception as exc:
|
| 327 |
print(f"Depth estimation skipped ({exc}).", flush=True)
|
|
|
|
| 832 |
return primary, secondary
|
| 833 |
|
| 834 |
|
| 835 |
+
def plane_homography_from_depth(
|
| 836 |
+
depth: np.ndarray | None, mask: np.ndarray, w: int, h: int
|
| 837 |
+
):
|
| 838 |
+
"""R1-2 — analytic floor homography from a metric 3D plane fit.
|
| 839 |
+
|
| 840 |
+
When vanishing-point detection fails, the old fallback invented a
|
| 841 |
+
synthetic horizon, which warped steep/wide rooms (sheared checkers, the
|
| 842 |
+
far-centre pinch). With metric depth the ground plane can be measured
|
| 843 |
+
instead: back-project floor pixels (pinhole, f ~ image width), fit the
|
| 844 |
+
plane robustly, and build the image->plane homography exactly as
|
| 845 |
+
inv(K @ [u v c]) where u,v are orthonormal in-plane axes and c is a point
|
| 846 |
+
on the plane. Plane units are METRES, so metersPerUnit is 1 by
|
| 847 |
+
construction and true-to-size tiling (R1-3) activates in precisely the
|
| 848 |
+
rooms the trapezoid fallback used to distort.
|
| 849 |
+
|
| 850 |
+
Returns (homography_list, plane_dict) or None (caller falls back to the
|
| 851 |
+
synthetic-VP construction).
|
| 852 |
+
"""
|
| 853 |
+
if depth is None or not depth_model_is_metric():
|
| 854 |
+
return None
|
| 855 |
+
ys, xs = np.nonzero(mask)
|
| 856 |
+
if len(xs) < 2000:
|
| 857 |
+
return None
|
| 858 |
+
rng = np.random.default_rng(7)
|
| 859 |
+
sel = rng.choice(len(xs), size=min(6000, len(xs)), replace=False)
|
| 860 |
+
x_f = xs[sel].astype(np.float64)
|
| 861 |
+
y_f = ys[sel].astype(np.float64)
|
| 862 |
+
z = depth[ys[sel], xs[sel]].astype(np.float64)
|
| 863 |
+
|
| 864 |
+
f = float(w)
|
| 865 |
+
cx, cy = w / 2.0, h / 2.0
|
| 866 |
+
P = np.stack([(x_f - cx) / f * z, (y_f - cy) / f * z, z], axis=1)
|
| 867 |
+
|
| 868 |
+
# iteratively trimmed total-least-squares plane fit
|
| 869 |
+
keep = np.ones(len(P), bool)
|
| 870 |
+
n = np.array([0.0, -1.0, 0.0])
|
| 871 |
+
c = P.mean(axis=0)
|
| 872 |
+
for _ in range(4):
|
| 873 |
+
c = P[keep].mean(axis=0)
|
| 874 |
+
q = P[keep] - c
|
| 875 |
+
_, _, vt = np.linalg.svd(q, full_matrices=False)
|
| 876 |
+
n = vt[2]
|
| 877 |
+
resid = np.abs((P - c) @ n)
|
| 878 |
+
thr = max(float(np.percentile(resid[keep], 80)), 1e-4)
|
| 879 |
+
keep = resid <= thr
|
| 880 |
+
|
| 881 |
+
resid = np.abs((P - c) @ n)
|
| 882 |
+
med_z = float(np.median(z))
|
| 883 |
+
inliers = resid <= max(0.03, 0.025 * med_z)
|
| 884 |
+
if inliers.mean() < 0.6:
|
| 885 |
+
return None
|
| 886 |
+
c = P[inliers].mean(axis=0)
|
| 887 |
+
q = P[inliers] - c
|
| 888 |
+
_, _, vt = np.linalg.svd(q, full_matrices=False)
|
| 889 |
+
n = vt[2]
|
| 890 |
+
if n @ c > 0:
|
| 891 |
+
n = -n # face the camera
|
| 892 |
+
# floor sanity: the normal must point up-ish (camera y is down), not at a
|
| 893 |
+
# wall-like angle — protects against fitting a dominant wall/cabinet face.
|
| 894 |
+
if n[1] > -0.5:
|
| 895 |
+
return None
|
| 896 |
+
|
| 897 |
+
# in-plane axes: u image-horizontal-ish, v toward the camera (near field),
|
| 898 |
+
# matching the existing convention that plane-y grows toward the viewer
|
| 899 |
+
u = np.cross(n, [0.0, 0.0, 1.0])
|
| 900 |
+
if np.linalg.norm(u) < 0.2: # camera looking straight down
|
| 901 |
+
u = np.cross(n, [0.0, 1.0, 0.0])
|
| 902 |
+
u /= np.linalg.norm(u)
|
| 903 |
+
v = np.cross(n, u)
|
| 904 |
+
v /= np.linalg.norm(v)
|
| 905 |
+
|
| 906 |
+
K = np.array([[f, 0, cx], [0, f, cy], [0, 0, 1.0]])
|
| 907 |
+
M = K @ np.stack([u, v, c], axis=1)
|
| 908 |
+
if abs(np.linalg.det(M)) < 1e-9:
|
| 909 |
+
return None
|
| 910 |
+
H = np.linalg.inv(M)
|
| 911 |
+
H = H / H[2, 2]
|
| 912 |
+
|
| 913 |
+
# orient axes by their image-space direction at the floor centroid
|
| 914 |
+
def to_plane(px, py):
|
| 915 |
+
den = H[2, 0] * px + H[2, 1] * py + H[2, 2]
|
| 916 |
+
return (
|
| 917 |
+
(H[0, 0] * px + H[0, 1] * py + H[0, 2]) / den,
|
| 918 |
+
(H[1, 0] * px + H[1, 1] * py + H[1, 2]) / den,
|
| 919 |
+
)
|
| 920 |
+
|
| 921 |
+
mx, my = float(np.median(x_f)), float(np.median(y_f))
|
| 922 |
+
a0, b0 = to_plane(mx, my)
|
| 923 |
+
a_dx, _ = to_plane(mx + 10, my)
|
| 924 |
+
_, b_dy = to_plane(mx, my + 10)
|
| 925 |
+
flip = np.diag([
|
| 926 |
+
-1.0 if a_dx - a0 < 0 else 1.0, # plane-x grows image-right
|
| 927 |
+
-1.0 if b_dy - b0 < 0 else 1.0, # plane-y grows image-down (nearer)
|
| 928 |
+
1.0,
|
| 929 |
+
])
|
| 930 |
+
H = flip @ H
|
| 931 |
+
H = H / H[2, 2]
|
| 932 |
+
|
| 933 |
+
# plane-space extent of the floor (percentile box, metres)
|
| 934 |
+
den = H[2, 0] * x_f + H[2, 1] * y_f + H[2, 2]
|
| 935 |
+
pa = (H[0, 0] * x_f + H[0, 1] * y_f + H[0, 2]) / den
|
| 936 |
+
pb = (H[1, 0] * x_f + H[1, 1] * y_f + H[1, 2]) / den
|
| 937 |
+
a1, a2 = float(np.percentile(pa, 1)), float(np.percentile(pa, 99))
|
| 938 |
+
b1, b2 = float(np.percentile(pb, 1)), float(np.percentile(pb, 99))
|
| 939 |
+
if not (0.8 <= a2 - a1 <= 30.0 and 0.8 <= b2 - b1 <= 60.0):
|
| 940 |
+
return None # implausible physical footprint
|
| 941 |
+
|
| 942 |
+
# image quad of the plane box (bl, br, tr, tl — existing convention)
|
| 943 |
+
Hinv = np.linalg.inv(H)
|
| 944 |
+
|
| 945 |
+
def to_img(ap, bp):
|
| 946 |
+
den_i = Hinv[2, 0] * ap + Hinv[2, 1] * bp + Hinv[2, 2]
|
| 947 |
+
return (
|
| 948 |
+
float(np.clip((Hinv[0, 0] * ap + Hinv[0, 1] * bp + Hinv[0, 2]) / den_i, 0, w - 1)),
|
| 949 |
+
float(np.clip((Hinv[1, 0] * ap + Hinv[1, 1] * bp + Hinv[1, 2]) / den_i, 0, h - 1)),
|
| 950 |
+
)
|
| 951 |
+
|
| 952 |
+
quad = [to_img(a1, b2), to_img(a2, b2), to_img(a2, b1), to_img(a1, b1)]
|
| 953 |
+
|
| 954 |
+
homography = H.flatten().tolist()
|
| 955 |
+
# self-check with the certified R1-3 gate chain (isotropy, shear, spread):
|
| 956 |
+
# on a correct fit the measured scale must be ~1 m per plane unit.
|
| 957 |
+
mpu = estimate_meters_per_unit(depth, mask, homography, w, h)
|
| 958 |
+
if mpu is None or not (0.85 <= mpu <= 1.15):
|
| 959 |
+
return None
|
| 960 |
+
|
| 961 |
+
plane = {
|
| 962 |
+
"x": a1,
|
| 963 |
+
"y": b1,
|
| 964 |
+
"width": a2 - a1,
|
| 965 |
+
"height": b2 - b1,
|
| 966 |
+
"quad": [coord for pt in quad for coord in pt],
|
| 967 |
+
"geometrySource": "depth-plane", # R1-2 — vs the VP trapezoid path
|
| 968 |
+
}
|
| 969 |
+
return homography, plane
|
| 970 |
+
|
| 971 |
+
|
| 972 |
+
def estimate_floor_plane(mask: np.ndarray, img_np: np.ndarray, depth: np.ndarray | None = None):
|
| 973 |
ys, xs = np.where(mask > 0)
|
| 974 |
if len(xs) < 1000:
|
| 975 |
return None, None
|
|
|
|
| 1049 |
vp_y = float(vanishing_point["y"])
|
| 1050 |
else:
|
| 1051 |
# No usable VP: assume an eye-level shot with the horizon a little
|
| 1052 |
+
# above the floor's top edge, centred over the bottom edge. (If this
|
| 1053 |
+
# guess disagrees with measured depth, the R1-2 arbitration below
|
| 1054 |
+
# replaces it with the analytic depth-plane homography.)
|
| 1055 |
vp_x = (bl_x + br_x) * 0.5
|
| 1056 |
vp_y = top_y_f - 0.35 * span_y
|
| 1057 |
# Keep the horizon clear of the top edge so the transform stays
|
|
|
|
| 1083 |
H = cv2.getPerspectiveTransform(src, dst)
|
| 1084 |
homography = H.flatten().tolist()
|
| 1085 |
|
| 1086 |
+
# R1-2 — geometry arbitration: when the trapezoid homography (real or
|
| 1087 |
+
# synthetic VP) is inconsistent with the measured 3D floor — the certified
|
| 1088 |
+
# metric-scale gates fail on it — prefer the analytic depth-plane
|
| 1089 |
+
# homography. This covers both failure classes seen in the reference
|
| 1090 |
+
# rooms: no VP found (guessed horizon) and a detected-but-wrong VP
|
| 1091 |
+
# (sheared trapezoid). With no metric depth both probes return None and
|
| 1092 |
+
# the trapezoid ships unchanged.
|
| 1093 |
+
if depth is not None and estimate_meters_per_unit(depth, mask, homography, w, h) is None:
|
| 1094 |
+
fitted = plane_homography_from_depth(depth, mask, w, h)
|
| 1095 |
+
if fitted is not None:
|
| 1096 |
+
homography_m, plane_meta = fitted
|
| 1097 |
+
plane_meta["hullQuad"] = hull_quad_list
|
| 1098 |
+
plane_meta["vanishingPoint"] = vanishing_point
|
| 1099 |
+
plane_meta["vanishingPoint2"] = vanishing_point2
|
| 1100 |
+
plane_meta["defaultRotation"] = estimate_default_rotation(
|
| 1101 |
+
mask,
|
| 1102 |
+
np.asarray(homography_m, np.float64).reshape(3, 3),
|
| 1103 |
+
vanishing_point2,
|
| 1104 |
+
)
|
| 1105 |
+
return homography_m, plane_meta
|
| 1106 |
+
|
| 1107 |
default_rotation = estimate_default_rotation(mask, H, vanishing_point2)
|
| 1108 |
|
| 1109 |
return homography, {
|
|
|
|
| 1119 |
}
|
| 1120 |
|
| 1121 |
|
| 1122 |
+
def estimate_meters_per_unit(
|
| 1123 |
+
depth: np.ndarray | None,
|
| 1124 |
+
mask: np.ndarray,
|
| 1125 |
+
homography: list[float] | None,
|
| 1126 |
+
w: int,
|
| 1127 |
+
h: int,
|
| 1128 |
+
) -> float | None:
|
| 1129 |
+
"""R1-3 — physical scale of the floor plane: metres per plane unit.
|
| 1130 |
+
|
| 1131 |
+
Back-project floor pixels to 3D through the metric depth (pinhole, f ~
|
| 1132 |
+
image width — the P0 convention), map the same pixels into plane space
|
| 1133 |
+
through the homography, and take the median ratio of 3D distance to
|
| 1134 |
+
plane-space distance over random point pairs. Replaces the heuristic
|
| 1135 |
+
repeat coefficient on the frontend: a 60 cm tile becomes 0.6/metersPerUnit
|
| 1136 |
+
plane units in every room. None when depth is relative, the floor is too
|
| 1137 |
+
small, or the ratios are inconsistent (non-planar depth — let the
|
| 1138 |
+
heuristic handle it).
|
| 1139 |
+
"""
|
| 1140 |
+
if depth is None or homography is None or not depth_model_is_metric():
|
| 1141 |
+
return None
|
| 1142 |
+
ys, xs = np.nonzero(mask)
|
| 1143 |
+
if len(xs) < 500:
|
| 1144 |
+
return None
|
| 1145 |
+
# Near field only: rows below the floor's 40th percentile. The far floor
|
| 1146 |
+
# is where a synthetic-VP homography distorts most (R1-2's territory) and
|
| 1147 |
+
# where depth is noisiest; the near field anchors perceived tile size.
|
| 1148 |
+
near = ys >= np.percentile(ys, 40)
|
| 1149 |
+
ys, xs = ys[near], xs[near]
|
| 1150 |
+
if len(xs) < 500:
|
| 1151 |
+
return None
|
| 1152 |
+
rng = np.random.default_rng(12345)
|
| 1153 |
+
sel = rng.choice(len(xs), size=min(2000, len(xs)), replace=False)
|
| 1154 |
+
xs_f = xs[sel].astype(np.float64)
|
| 1155 |
+
ys_f = ys[sel].astype(np.float64)
|
| 1156 |
+
z = depth[ys[sel], xs[sel]].astype(np.float64)
|
| 1157 |
+
|
| 1158 |
+
f = float(w) # P0 convention: focal ~ image width
|
| 1159 |
+
cx, cy = w / 2.0, h / 2.0
|
| 1160 |
+
pts3 = np.stack([(xs_f - cx) / f * z, (ys_f - cy) / f * z, z], axis=1)
|
| 1161 |
+
|
| 1162 |
+
H = np.asarray(homography, np.float64).reshape(3, 3)
|
| 1163 |
+
den = H[2, 0] * xs_f + H[2, 1] * ys_f + H[2, 2]
|
| 1164 |
+
den = np.where(np.abs(den) < 1e-9, 1e-9, den)
|
| 1165 |
+
pts_p = np.stack(
|
| 1166 |
+
[
|
| 1167 |
+
(H[0, 0] * xs_f + H[0, 1] * ys_f + H[0, 2]) / den,
|
| 1168 |
+
(H[1, 0] * xs_f + H[1, 1] * ys_f + H[1, 2]) / den,
|
| 1169 |
+
],
|
| 1170 |
+
axis=1,
|
| 1171 |
+
)
|
| 1172 |
+
|
| 1173 |
+
i = rng.integers(0, len(sel), 4000)
|
| 1174 |
+
j = rng.integers(0, len(sel), 4000)
|
| 1175 |
+
keep = i != j
|
| 1176 |
+
d3 = np.linalg.norm(pts3[i[keep]] - pts3[j[keep]], axis=1)
|
| 1177 |
+
dp = np.linalg.norm(pts_p[i[keep]] - pts_p[j[keep]], axis=1)
|
| 1178 |
+
# separation threshold relative to the floor's own plane-space span —
|
| 1179 |
+
# plane units may be pixel-ish (VP path) or metres (R1-2 path)
|
| 1180 |
+
span = float(np.hypot(
|
| 1181 |
+
np.percentile(pts_p[:, 0], 95) - np.percentile(pts_p[:, 0], 5),
|
| 1182 |
+
np.percentile(pts_p[:, 1], 95) - np.percentile(pts_p[:, 1], 5),
|
| 1183 |
+
))
|
| 1184 |
+
far = dp > 0.05 * max(span, 1e-9)
|
| 1185 |
+
if far.sum() < 200:
|
| 1186 |
+
return None
|
| 1187 |
+
dpx = np.abs(pts_p[i[keep], 0] - pts_p[j[keep], 0])
|
| 1188 |
+
dpy = np.abs(pts_p[i[keep], 1] - pts_p[j[keep], 1])
|
| 1189 |
+
ratios = d3[far] / dp[far]
|
| 1190 |
+
med = float(np.median(ratios))
|
| 1191 |
+
q1, q3 = np.percentile(ratios, 25), np.percentile(ratios, 75)
|
| 1192 |
+
# Tight consistency gate: a synthetic-VP homography distorts the plane,
|
| 1193 |
+
# making the ratio position-dependent. Returning None there is correct —
|
| 1194 |
+
# the heuristic repeat takes over until R1-2 fixes the geometry.
|
| 1195 |
+
if med <= 1e-6 or (q3 - q1) / med > 0.35:
|
| 1196 |
+
return None # inconsistent — depth and homography disagree
|
| 1197 |
+
# Isotropy gate: a constant shear/anisotropy passes the spread check
|
| 1198 |
+
# (direction-averaged ratios stay narrow) but renders distorted tiles.
|
| 1199 |
+
# The scale measured along plane-x must match plane-y.
|
| 1200 |
+
horiz = far & (dpx > 2 * dpy)
|
| 1201 |
+
vert = far & (dpy > 2 * dpx)
|
| 1202 |
+
if horiz.sum() >= 50 and vert.sum() >= 50:
|
| 1203 |
+
med_h = float(np.median(d3[horiz] / dp[horiz]))
|
| 1204 |
+
med_v = float(np.median(d3[vert] / dp[vert]))
|
| 1205 |
+
if abs(med_h - med_v) / med > 0.2:
|
| 1206 |
+
return None # anisotropic plane mapping — not metric-trustworthy
|
| 1207 |
+
# Shear leaves axis-aligned lengths almost unchanged and shows up in the
|
| 1208 |
+
# diagonals instead: +45 deg pairs stretch while -45 deg pairs shrink.
|
| 1209 |
+
sx = pts_p[i[keep], 0] - pts_p[j[keep], 0]
|
| 1210 |
+
sy = pts_p[i[keep], 1] - pts_p[j[keep], 1]
|
| 1211 |
+
diag = far & (dpx > 0.5 * dpy) & (dpy > 0.5 * dpx)
|
| 1212 |
+
d1 = diag & (sx * sy > 0)
|
| 1213 |
+
d2 = diag & (sx * sy < 0)
|
| 1214 |
+
if d1.sum() >= 50 and d2.sum() >= 50:
|
| 1215 |
+
med_1 = float(np.median(d3[d1] / dp[d1]))
|
| 1216 |
+
med_2 = float(np.median(d3[d2] / dp[d2]))
|
| 1217 |
+
if abs(med_1 - med_2) / med > 0.2:
|
| 1218 |
+
return None # sheared plane mapping — not metric-trustworthy
|
| 1219 |
+
# plausible range: ~1e-3 m/unit for pixel-scale planes (VP trapezoid path)
|
| 1220 |
+
# up to ~1.0 m/unit for metre-scale planes (R1-2 depth-plane path)
|
| 1221 |
+
if not (1e-5 <= med <= 2.0):
|
| 1222 |
+
return None
|
| 1223 |
+
return round(med, 6)
|
| 1224 |
+
|
| 1225 |
+
|
| 1226 |
# ---------------------------------------------------------------------------
|
| 1227 |
# P1-4 — Default Tile Rotation
|
| 1228 |
# Tiles were laid along the plane's x-axis (= the image bbox axis), which is
|
|
|
|
| 1307 |
[cx + (vanishing_point2["x"] - cx) * 0.25, cy + (vanishing_point2["y"] - cy) * 0.25],
|
| 1308 |
])
|
| 1309 |
angle = _plane_angle(toward)
|
| 1310 |
+
# R1-2 — reject rather than clamp: an angle beyond the band means the
|
| 1311 |
+
# cue is unreliable in this plane chart (both depth-plane kitchens
|
| 1312 |
+
# saturated the old +/-30 clip), and a saturated guess lays every
|
| 1313 |
+
# tile visibly wrong. Same philosophy as the primary path's gates.
|
| 1314 |
+
if angle is not None and abs(angle) <= 30.0:
|
| 1315 |
+
return float(angle)
|
| 1316 |
|
| 1317 |
return 0.0
|
| 1318 |
|
|
|
|
| 1401 |
if depth is not None and floor_mask.any():
|
| 1402 |
floor_depth = depth[floor_mask > 0]
|
| 1403 |
lo, hi = float(np.percentile(floor_depth, 2)), float(np.percentile(floor_depth, 98))
|
| 1404 |
+
# R1-1 — scale-free margin: works for normalised [0,1] relative depth
|
| 1405 |
+
# and for metric metres alike (the old 0.08 floor assumed [0,1]).
|
| 1406 |
+
margin = max((hi - lo) * 0.35, 0.04 * max(abs(hi), 1e-6))
|
| 1407 |
depth_keep = (depth >= lo - margin) & (depth <= hi + margin)
|
| 1408 |
surface = (surface & depth_keep.astype(np.uint8)).astype(np.uint8)
|
| 1409 |
surface[floor_mask > 0] = np.maximum(surface[floor_mask > 0], 1)
|
|
|
|
| 1627 |
print(f"[TIMING] Depth estimation took {time.perf_counter() - t0:.3f} seconds", flush=True)
|
| 1628 |
|
| 1629 |
t0 = time.perf_counter()
|
| 1630 |
+
homography, plane = estimate_floor_plane(floor_mask, img_np, depth)
|
| 1631 |
+
if plane is not None:
|
| 1632 |
+
# R1-3 — physical plane scale; null on relative depth so the frontend
|
| 1633 |
+
# falls back to the heuristic repeat.
|
| 1634 |
+
plane["metersPerUnit"] = estimate_meters_per_unit(depth, floor_mask, homography, w, h)
|
| 1635 |
print(f"[TIMING] Plane fitting / homography calculation took {time.perf_counter() - t0:.3f} seconds", flush=True)
|
| 1636 |
|
| 1637 |
t0 = time.perf_counter()
|
|
|
|
| 1703 |
"depthEnabled": depth is not None,
|
| 1704 |
"shadingEnabled": shade_map is not None,
|
| 1705 |
},
|
| 1706 |
+
# R1-1 — metric floor depth stats (metres). The seam for R1-2
|
| 1707 |
+
# (plane fit) and R1-3 (true tile scale); null on relative
|
| 1708 |
+
# checkpoints so the frontend can feature-gate.
|
| 1709 |
+
"metricDepth": (
|
| 1710 |
+
{
|
| 1711 |
+
"unit": "m",
|
| 1712 |
+
"floorP5": round(float(np.percentile(depth[region_mask > 0], 5)), 3),
|
| 1713 |
+
"floorP50": round(float(np.percentile(depth[region_mask > 0], 50)), 3),
|
| 1714 |
+
"floorP95": round(float(np.percentile(depth[region_mask > 0], 95)), 3),
|
| 1715 |
+
}
|
| 1716 |
+
if depth is not None and depth_model_is_metric() and (region_mask > 0).any()
|
| 1717 |
+
else None
|
| 1718 |
+
),
|
| 1719 |
})
|
| 1720 |
|
| 1721 |
if not segments:
|
golden_render.py
ADDED
|
@@ -0,0 +1,225 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""R0-1 — deterministic golden render: bundle x tile -> PNG.
|
| 2 |
+
|
| 3 |
+
A faithful Python port of the CURRENT frontend composite (canvas-engine.ts):
|
| 4 |
+
texture prep (wrap detection -> period-snap -> masked-shift fallback), mip
|
| 5 |
+
pyramid + trilinear with per-pixel footprint LOD, shade-map decode, homography-
|
| 6 |
+
mapped light vector + gloss-gated specular, colour cast, soft highlight clip,
|
| 7 |
+
confidence-map alpha. Texture-prep and sampling primitives are imported from
|
| 8 |
+
verify_n1_sim so this stays in lockstep with the certified implementations.
|
| 9 |
+
|
| 10 |
+
Usage:
|
| 11 |
+
python golden_render.py <bundle.json[.gz]> <tile-image> <out.png>
|
| 12 |
+
|
| 13 |
+
The output is resized to max-dim 720 so goldens stay small and stable.
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
import base64
|
| 17 |
+
import gzip
|
| 18 |
+
import json
|
| 19 |
+
import sys
|
| 20 |
+
|
| 21 |
+
import numpy as np
|
| 22 |
+
from PIL import Image
|
| 23 |
+
|
| 24 |
+
from verify_n1_sim import (
|
| 25 |
+
build_mips,
|
| 26 |
+
detect_wrap_mode,
|
| 27 |
+
make_seamless,
|
| 28 |
+
period_snap,
|
| 29 |
+
sample_bilinear_wrap,
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
OUT_MAX_DIM = 720
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def load_bundle(path):
|
| 36 |
+
if path.endswith(".gz"):
|
| 37 |
+
with gzip.open(path, "rt") as f:
|
| 38 |
+
return json.load(f)
|
| 39 |
+
return json.load(open(path))
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def estimate_gloss(tex):
|
| 43 |
+
"""Port of estimateGloss (canvas-engine.ts): mean 4px luminance gradient."""
|
| 44 |
+
lum = tex[:, :, 0] * 0.299 + tex[:, :, 1] * 0.587 + tex[:, :, 2] * 0.114
|
| 45 |
+
a = lum[::4, 4::4]
|
| 46 |
+
b = lum[::4, :-4:4][:, : a.shape[1]]
|
| 47 |
+
mean_grad = float(np.mean(np.abs(a - b))) if a.size else 0.0
|
| 48 |
+
return float(np.clip(1 - mean_grad / 24, 0, 1))
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def soft_clip(v):
|
| 52 |
+
"""Port of softClipByte: linear below 220, rational shoulder above."""
|
| 53 |
+
knee, rng = 220.0, 35.0
|
| 54 |
+
t = v - knee
|
| 55 |
+
return np.where(v <= knee, v, knee + (t * rng) / (t + rng))
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def prepare_texture(tile_path):
|
| 59 |
+
tex = np.asarray(Image.open(tile_path).convert("RGB"))
|
| 60 |
+
h, w, _ = tex.shape
|
| 61 |
+
mode, _, _ = detect_wrap_mode(tex)
|
| 62 |
+
repeat_scale = 1.0
|
| 63 |
+
if mode != "wrap":
|
| 64 |
+
prepared, info = period_snap(tex)
|
| 65 |
+
if info[0] == "snap":
|
| 66 |
+
repeat_scale = prepared.shape[1] / w
|
| 67 |
+
tex = prepared
|
| 68 |
+
else:
|
| 69 |
+
tex = make_seamless(tex)
|
| 70 |
+
return tex, repeat_scale
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
def render(bundle_path, tile_path):
|
| 74 |
+
d = load_bundle(bundle_path)
|
| 75 |
+
w, h = d["width"], d["height"]
|
| 76 |
+
base = np.asarray(
|
| 77 |
+
Image.open(__import__("io").BytesIO(base64.b64decode(d["pixels"]))).convert("RGB")
|
| 78 |
+
).astype(np.float64)
|
| 79 |
+
seg = max(d["segments"], key=lambda s: len(s["mask"]))
|
| 80 |
+
|
| 81 |
+
mask_idx = np.frombuffer(base64.b64decode(seg["mask"]), dtype=np.uint32)
|
| 82 |
+
mask = np.zeros(w * h, bool)
|
| 83 |
+
mask[mask_idx] = True
|
| 84 |
+
mask = mask.reshape(h, w)
|
| 85 |
+
|
| 86 |
+
H = np.asarray(seg["homography"], np.float64).reshape(3, 3)
|
| 87 |
+
plane = seg.get("plane") or {}
|
| 88 |
+
plane_w = max(plane.get("width", w), 1)
|
| 89 |
+
plane_h = max(plane.get("height", h), 1)
|
| 90 |
+
plane_cx = plane.get("x", 0) + plane_w / 2
|
| 91 |
+
plane_cy = plane.get("y", 0) + plane_h / 2
|
| 92 |
+
rot_deg = plane.get("defaultRotation") or 0.0
|
| 93 |
+
rad = np.deg2rad(rot_deg)
|
| 94 |
+
cos, sin = np.cos(-rad), np.sin(-rad)
|
| 95 |
+
|
| 96 |
+
shade_map = (
|
| 97 |
+
np.frombuffer(base64.b64decode(seg["shadeMap"]), np.uint8).reshape(h, w).astype(np.float64)
|
| 98 |
+
if seg.get("shadeMap")
|
| 99 |
+
else None
|
| 100 |
+
)
|
| 101 |
+
shade_lo, shade_hi = seg.get("shadeRange") or (0.55, 1.35)
|
| 102 |
+
conf = (
|
| 103 |
+
np.frombuffer(base64.b64decode(seg["confidenceMap"]), np.uint8).reshape(h, w).astype(np.float64) / 255.0
|
| 104 |
+
if seg.get("confidenceMap")
|
| 105 |
+
else None
|
| 106 |
+
)
|
| 107 |
+
ct = seg.get("colorTemperature") or {}
|
| 108 |
+
if "cast" in ct:
|
| 109 |
+
ct = ct["cast"]
|
| 110 |
+
col = np.array([ct.get("r", 1.0), ct.get("g", 1.0), ct.get("b", 1.0)])
|
| 111 |
+
lv = seg.get("lightVector")
|
| 112 |
+
|
| 113 |
+
tex, repeat_scale = prepare_texture(tile_path)
|
| 114 |
+
gloss = estimate_gloss(np.asarray(Image.open(tile_path).convert("RGB")).astype(np.float64))
|
| 115 |
+
th, tw, _ = tex.shape
|
| 116 |
+
mips = build_mips(tex)
|
| 117 |
+
max_l = len(mips) - 1
|
| 118 |
+
|
| 119 |
+
# R1-3 — mirror of canvas-engine.ts: metric plane scale when present
|
| 120 |
+
# (pixel-ish or metre plane units alike — backend gates mpu hard),
|
| 121 |
+
# heuristic fallback otherwise (info.scale = 1 in goldens).
|
| 122 |
+
DEFAULT_TILE_M = 0.6
|
| 123 |
+
mpu = plane.get("metersPerUnit")
|
| 124 |
+
repeat_w = 0.0
|
| 125 |
+
if mpu and mpu > 0:
|
| 126 |
+
repeat_w = (DEFAULT_TILE_M / mpu) * repeat_scale
|
| 127 |
+
if not (np.isfinite(repeat_w) and repeat_w > 0):
|
| 128 |
+
repeat_w = 0.0
|
| 129 |
+
if not repeat_w:
|
| 130 |
+
repeat_w = max(48.0, min(plane_w, plane_h) * 0.22) * repeat_scale
|
| 131 |
+
repeat_h = repeat_w * (th / tw)
|
| 132 |
+
|
| 133 |
+
ys, xs = np.nonzero(mask)
|
| 134 |
+
xs_f, ys_f = xs.astype(np.float64), ys.astype(np.float64)
|
| 135 |
+
|
| 136 |
+
def to_plane(px, py):
|
| 137 |
+
z = H[2, 0] * px + H[2, 1] * py + H[2, 2]
|
| 138 |
+
z = np.where(np.abs(z) < 1e-6, 1e-6, z)
|
| 139 |
+
return (
|
| 140 |
+
(H[0, 0] * px + H[0, 1] * py + H[0, 2]) / z,
|
| 141 |
+
(H[1, 0] * px + H[1, 1] * py + H[1, 2]) / z,
|
| 142 |
+
)
|
| 143 |
+
|
| 144 |
+
fx, fy = to_plane(xs_f, ys_f)
|
| 145 |
+
fx1, fy1 = to_plane(xs_f + 1, ys_f)
|
| 146 |
+
fx2, fy2 = to_plane(xs_f, ys_f + 1)
|
| 147 |
+
|
| 148 |
+
def rot(ax, ay):
|
| 149 |
+
dx = ax - plane_cx
|
| 150 |
+
dy = ay - plane_cy
|
| 151 |
+
return dx * cos - dy * sin, dx * sin + dy * cos
|
| 152 |
+
|
| 153 |
+
rx, ry = rot(fx, fy)
|
| 154 |
+
rx1, ry1 = rot(fx1, fy1)
|
| 155 |
+
rx2, ry2 = rot(fx2, fy2)
|
| 156 |
+
|
| 157 |
+
u = np.mod(rx / repeat_w, 1.0)
|
| 158 |
+
v = np.mod(ry / repeat_h, 1.0)
|
| 159 |
+
tcx, tcy = (rx / repeat_w) * tw, (ry / repeat_h) * th
|
| 160 |
+
du = np.hypot((rx1 / repeat_w) * tw - tcx, (ry1 / repeat_h) * th - tcy)
|
| 161 |
+
dv = np.hypot((rx2 / repeat_w) * tw - tcx, (ry2 / repeat_h) * th - tcy)
|
| 162 |
+
lod = np.log2(np.maximum(np.maximum(du, dv), 1e-3)) + 0.5
|
| 163 |
+
l0 = np.clip(np.floor(lod), 0, max_l).astype(np.int64)
|
| 164 |
+
frac = np.clip(lod - l0, 0, 1)
|
| 165 |
+
|
| 166 |
+
sample = np.zeros((len(xs), 3), np.float64)
|
| 167 |
+
for lev in range(max_l + 1):
|
| 168 |
+
sel = l0 == lev
|
| 169 |
+
if not sel.any():
|
| 170 |
+
continue
|
| 171 |
+
a = mips[lev]
|
| 172 |
+
sa = sample_bilinear_wrap(a, u[sel] * a.shape[1], v[sel] * a.shape[0])
|
| 173 |
+
if lev < max_l:
|
| 174 |
+
b = mips[lev + 1]
|
| 175 |
+
sb = sample_bilinear_wrap(b, u[sel] * b.shape[1], v[sel] * b.shape[0])
|
| 176 |
+
sample[sel] = sa + (sb - sa) * frac[sel][:, None]
|
| 177 |
+
else:
|
| 178 |
+
sample[sel] = sa
|
| 179 |
+
|
| 180 |
+
shade = (
|
| 181 |
+
shade_lo + (shade_map[ys, xs] / 255.0) * (shade_hi - shade_lo)
|
| 182 |
+
if shade_map is not None
|
| 183 |
+
else np.full(len(xs), 1.0)
|
| 184 |
+
)
|
| 185 |
+
|
| 186 |
+
specular = np.zeros(len(xs))
|
| 187 |
+
if lv:
|
| 188 |
+
lvx, lvy = lv.get("x", 0.0), lv.get("y", 0.0)
|
| 189 |
+
a = to_plane(np.array([w * 0.5]), np.array([h * 0.75]))
|
| 190 |
+
step = min(w, h) * 0.05
|
| 191 |
+
b = to_plane(np.array([w * 0.5 + lvx * step]), np.array([h * 0.75 + lvy * step]))
|
| 192 |
+
dxv, dyv = b[0][0] - a[0][0], b[1][0] - a[1][0]
|
| 193 |
+
ln = np.hypot(dxv, dyv)
|
| 194 |
+
if ln > 1e-6:
|
| 195 |
+
lvx, lvy = dxv / ln, dyv / ln
|
| 196 |
+
dfx = (fx - plane_cx) / (plane_w * 0.5)
|
| 197 |
+
dfy = (fy - plane_cy) / (plane_h * 0.5)
|
| 198 |
+
dlen = np.hypot(dfx, dfy)
|
| 199 |
+
ok = dlen > 0.01
|
| 200 |
+
dot = np.where(ok, (dfx * lvx + dfy * lvy) / np.maximum(dlen, 1e-9), 0.0)
|
| 201 |
+
specular = 0.12 * gloss * np.maximum(0, dot) ** 4
|
| 202 |
+
|
| 203 |
+
texel = soft_clip(sample * shade[:, None] * col[None, :] + specular[:, None] * 255.0)
|
| 204 |
+
alpha = conf[ys, xs][:, None] if conf is not None else np.ones((len(xs), 1))
|
| 205 |
+
|
| 206 |
+
out = base.copy()
|
| 207 |
+
out[ys, xs] = np.clip(texel * alpha + base[ys, xs] * (1 - alpha), 0, 255)
|
| 208 |
+
img = Image.fromarray(out.astype(np.uint8))
|
| 209 |
+
scale = min(OUT_MAX_DIM / max(img.size), 1.0)
|
| 210 |
+
if scale < 1.0:
|
| 211 |
+
img = img.resize((round(img.width * scale), round(img.height * scale)), Image.BILINEAR)
|
| 212 |
+
return img
|
| 213 |
+
|
| 214 |
+
|
| 215 |
+
def main():
|
| 216 |
+
if len(sys.argv) != 4:
|
| 217 |
+
print(__doc__)
|
| 218 |
+
return 2
|
| 219 |
+
render(sys.argv[1], sys.argv[2]).save(sys.argv[3])
|
| 220 |
+
print(f"saved {sys.argv[3]}")
|
| 221 |
+
return 0
|
| 222 |
+
|
| 223 |
+
|
| 224 |
+
if __name__ == "__main__":
|
| 225 |
+
raise SystemExit(main())
|
verify_goldens.py
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""R0-1 — golden-image gate.
|
| 2 |
+
|
| 3 |
+
Renders the reference matrix (bundle x tile) through golden_render and compares
|
| 4 |
+
against the committed goldens. Any drift beyond tolerance fails with a
|
| 5 |
+
side-by-side (golden | current | amplified diff) written to verify_out/.
|
| 6 |
+
|
| 7 |
+
Usage:
|
| 8 |
+
python verify_goldens.py # check against goldens (CI mode)
|
| 9 |
+
python verify_goldens.py --bless # regenerate goldens (intentional change)
|
| 10 |
+
|
| 11 |
+
Tolerances: renders are deterministic numpy, so genuine engine changes show up
|
| 12 |
+
as large diffs; the small allowance absorbs PIL/numpy version drift only.
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
+
import os
|
| 16 |
+
import sys
|
| 17 |
+
|
| 18 |
+
import numpy as np
|
| 19 |
+
from PIL import Image
|
| 20 |
+
|
| 21 |
+
import golden_render
|
| 22 |
+
|
| 23 |
+
HERE = os.path.dirname(os.path.abspath(__file__))
|
| 24 |
+
TILES = os.path.join(HERE, "..", "..", "frontend", "viz2d-demo", "src", "assets", "tiles")
|
| 25 |
+
GOLDEN_DIR = os.path.join(HERE, "goldens")
|
| 26 |
+
OUT = os.path.join(HERE, "verify_out")
|
| 27 |
+
|
| 28 |
+
MEAN_TOL = 0.5 # mean abs diff per channel
|
| 29 |
+
P999_TOL = 8.0 # 99.9th percentile abs diff
|
| 30 |
+
|
| 31 |
+
MATRIX = [
|
| 32 |
+
# (golden name, bundle, tile) — tiles cover the three texture-prep paths:
|
| 33 |
+
# checkered = period-snap, rustic-wood = masked-shift, basalt = native wrap
|
| 34 |
+
("desk_checkered", "data/current_bundle.vizbundle.json", "checkered.jpeg"),
|
| 35 |
+
("desk_rustic", "data/current_bundle.vizbundle.json", "rustic-wood.jpg"),
|
| 36 |
+
("desk_basalt", "data/current_bundle.vizbundle.json", "basalt-outside-wal.jpg"),
|
| 37 |
+
("kitchen_checkered", "data/ref_kitchen.vizbundle.json", "checkered.jpeg"),
|
| 38 |
+
("kitchen_rustic", "data/ref_kitchen.vizbundle.json", "rustic-wood.jpg"),
|
| 39 |
+
("kitchen_basalt", "data/ref_kitchen.vizbundle.json", "basalt-outside-wal.jpg"),
|
| 40 |
+
]
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def main():
|
| 44 |
+
bless = "--bless" in sys.argv
|
| 45 |
+
os.makedirs(GOLDEN_DIR, exist_ok=True)
|
| 46 |
+
os.makedirs(OUT, exist_ok=True)
|
| 47 |
+
ok = True
|
| 48 |
+
|
| 49 |
+
for name, bundle, tile in MATRIX:
|
| 50 |
+
img = golden_render.render(os.path.join(HERE, bundle), os.path.join(TILES, tile))
|
| 51 |
+
golden_path = os.path.join(GOLDEN_DIR, f"{name}.png")
|
| 52 |
+
|
| 53 |
+
if bless:
|
| 54 |
+
img.save(golden_path)
|
| 55 |
+
print(f" blessed {name}.png ({img.width}x{img.height})")
|
| 56 |
+
continue
|
| 57 |
+
|
| 58 |
+
if not os.path.exists(golden_path):
|
| 59 |
+
print(f" [FAIL] {name}: golden missing — run `make bless`")
|
| 60 |
+
ok = False
|
| 61 |
+
continue
|
| 62 |
+
|
| 63 |
+
cur = np.asarray(img).astype(np.float64)
|
| 64 |
+
gold = np.asarray(Image.open(golden_path).convert("RGB")).astype(np.float64)
|
| 65 |
+
if cur.shape != gold.shape:
|
| 66 |
+
print(f" [FAIL] {name}: size changed {gold.shape} -> {cur.shape}")
|
| 67 |
+
ok = False
|
| 68 |
+
continue
|
| 69 |
+
|
| 70 |
+
diff = np.abs(cur - gold)
|
| 71 |
+
mean_d = float(diff.mean())
|
| 72 |
+
p999 = float(np.percentile(diff, 99.9))
|
| 73 |
+
passed = mean_d <= MEAN_TOL and p999 <= P999_TOL
|
| 74 |
+
print(f" [{'PASS' if passed else 'FAIL'}] {name}: mean={mean_d:.3f} p99.9={p999:.1f}")
|
| 75 |
+
if not passed:
|
| 76 |
+
ok = False
|
| 77 |
+
amplified = np.clip(diff * 8, 0, 255).astype(np.uint8)
|
| 78 |
+
panel = np.concatenate(
|
| 79 |
+
[gold.astype(np.uint8), cur.astype(np.uint8), amplified], axis=1
|
| 80 |
+
)
|
| 81 |
+
fail_path = os.path.join(OUT, f"golden_fail_{name}.png")
|
| 82 |
+
Image.fromarray(panel).save(fail_path)
|
| 83 |
+
print(f" side-by-side: {fail_path} (golden | current | diff x8)")
|
| 84 |
+
|
| 85 |
+
if bless:
|
| 86 |
+
print("goldens regenerated — commit backend/floor-visualizer/goldens/")
|
| 87 |
+
return 0
|
| 88 |
+
print("\n" + ("ALL GOLDEN CHECKS PASSED" if ok else "GOLDEN CHECKS FAILED"))
|
| 89 |
+
return 0 if ok else 1
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
if __name__ == "__main__":
|
| 93 |
+
raise SystemExit(main())
|
verify_n1_parity.sh
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
# R0-1 — N1 parity gate: the TypeScript periodSnap/detectWrapMode in
|
| 3 |
+
# canvas-engine.ts must make the same decisions as the certified Python
|
| 4 |
+
# implementations in verify_n1_sim.py, on the real catalog tiles.
|
| 5 |
+
#
|
| 6 |
+
# Requires: python3 (PIL, numpy), node, esbuild (present via frontend
|
| 7 |
+
# node_modules). Run from backend/floor-visualizer/.
|
| 8 |
+
set -euo pipefail
|
| 9 |
+
|
| 10 |
+
HERE="$(cd "$(dirname "$0")" && pwd)"
|
| 11 |
+
FRONTEND="$HERE/../../frontend/viz2d-demo"
|
| 12 |
+
TMP="$(mktemp -d)"
|
| 13 |
+
trap 'rm -rf "$TMP"' EXIT
|
| 14 |
+
|
| 15 |
+
# 1. Python: decode tiles to raw RGBA + record expected decisions
|
| 16 |
+
python3 - "$TMP" << 'EOF'
|
| 17 |
+
import json
|
| 18 |
+
import sys
|
| 19 |
+
|
| 20 |
+
import numpy as np
|
| 21 |
+
from PIL import Image
|
| 22 |
+
|
| 23 |
+
sys.path.insert(0, ".")
|
| 24 |
+
from verify_n1_sim import detect_wrap_mode, period_snap
|
| 25 |
+
|
| 26 |
+
tmp = sys.argv[1]
|
| 27 |
+
TILES = "../../frontend/viz2d-demo/src/assets/tiles"
|
| 28 |
+
cases = ["checkered.jpeg", "rustic-wood.jpg", "floor-natural-stone.jpg",
|
| 29 |
+
"basalt-outside-wal.jpg", "mosaic-tile.jpg"]
|
| 30 |
+
expected = {}
|
| 31 |
+
for name in cases:
|
| 32 |
+
key = name.split(".")[0]
|
| 33 |
+
im = Image.open(f"{TILES}/{name}").convert("RGBA")
|
| 34 |
+
open(f"{tmp}/{key}.bin", "wb").write(im.tobytes())
|
| 35 |
+
rgb = np.asarray(im.convert("RGB"))
|
| 36 |
+
mode, _, _ = detect_wrap_mode(rgb)
|
| 37 |
+
snap = None
|
| 38 |
+
if mode != "wrap":
|
| 39 |
+
out, info = period_snap(rgb)
|
| 40 |
+
if info[0] == "snap":
|
| 41 |
+
snap = [out.shape[1], out.shape[0]]
|
| 42 |
+
expected[key] = {"w": im.width, "h": im.height,
|
| 43 |
+
"mode": "wrap" if mode == "wrap" else "mirror", "snap": snap}
|
| 44 |
+
json.dump(expected, open(f"{tmp}/expected.json", "w"))
|
| 45 |
+
print("python decisions:", json.dumps(expected, default=str))
|
| 46 |
+
EOF
|
| 47 |
+
|
| 48 |
+
# 2. Compile the actual frontend engine and replay the same decisions
|
| 49 |
+
(cd "$FRONTEND" && npx esbuild src/visualizer-demo/canvas-engine.ts \
|
| 50 |
+
--format=cjs --outfile="$TMP/ce.cjs" --log-level=error)
|
| 51 |
+
|
| 52 |
+
node - "$TMP" << 'EOF'
|
| 53 |
+
const fs = require("fs");
|
| 54 |
+
const path = require("path");
|
| 55 |
+
const tmp = process.argv[2];
|
| 56 |
+
const { periodSnap, detectWrapMode } = require(path.join(tmp, "ce.cjs"));
|
| 57 |
+
const expected = JSON.parse(fs.readFileSync(path.join(tmp, "expected.json")));
|
| 58 |
+
let ok = true;
|
| 59 |
+
for (const [key, exp] of Object.entries(expected)) {
|
| 60 |
+
const raw = new Uint8ClampedArray(fs.readFileSync(path.join(tmp, key + ".bin")));
|
| 61 |
+
const mode = detectWrapMode(raw, exp.w, exp.h);
|
| 62 |
+
let snap = null;
|
| 63 |
+
if (mode !== "wrap") {
|
| 64 |
+
const r = periodSnap(raw, exp.w, exp.h);
|
| 65 |
+
if (r) snap = [r.w, r.h];
|
| 66 |
+
}
|
| 67 |
+
const pass = mode === exp.mode && JSON.stringify(snap) === JSON.stringify(exp.snap);
|
| 68 |
+
console.log(` [${pass ? "PASS" : "FAIL"}] ${key}: ts mode=${mode} snap=${JSON.stringify(snap)}`
|
| 69 |
+
+ (pass ? "" : ` expected mode=${exp.mode} snap=${JSON.stringify(exp.snap)}`));
|
| 70 |
+
if (!pass) ok = false;
|
| 71 |
+
}
|
| 72 |
+
console.log(ok ? "N1 PARITY PASSED" : "N1 PARITY FAILED");
|
| 73 |
+
process.exit(ok ? 0 : 1);
|
| 74 |
+
EOF
|
verify_r1_metric.py
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""R1-1 — metric depth certification (local harness; needs torch+transformers
|
| 2 |
+
and reference room photos — not part of `make verify`, which uses precomputed
|
| 3 |
+
bundles).
|
| 4 |
+
|
| 5 |
+
Runs the configured depth checkpoint on reference room photos and validates
|
| 6 |
+
that the output is genuinely METRIC:
|
| 7 |
+
|
| 8 |
+
1. floor depth range plausible for an interior (p5/p95 within 0.3-20 m)
|
| 9 |
+
2. ground-plane consistency: on a floor plane, inverse depth is linear in
|
| 10 |
+
image row (1/Z = (y - y_horizon) / (h_cam * f)); the fit must hold
|
| 11 |
+
(R^2 >= 0.9 over floor rows)
|
| 12 |
+
3. absolute scale: the camera height recovered from that fit's slope
|
| 13 |
+
(h = 1 / (slope * f), f ~ image width) must land in 0.7-2.5 m — the
|
| 14 |
+
handheld-phone band. This is the automated equivalent of the backlog's
|
| 15 |
+
"door height ~2.0 m +/-15%" check: both test absolute metric scale, but
|
| 16 |
+
this one needs no manual annotation.
|
| 17 |
+
|
| 18 |
+
Usage:
|
| 19 |
+
python verify_r1_metric.py <room-photo.jpg> [more photos...]
|
| 20 |
+
"""
|
| 21 |
+
|
| 22 |
+
import sys
|
| 23 |
+
|
| 24 |
+
import cv2
|
| 25 |
+
import numpy as np
|
| 26 |
+
import torch
|
| 27 |
+
from PIL import Image
|
| 28 |
+
from transformers import AutoImageProcessor, AutoModelForDepthEstimation
|
| 29 |
+
|
| 30 |
+
# single source of truth: read the configured model + metric predicate from app.py
|
| 31 |
+
src = open("app.py").read()
|
| 32 |
+
ns = {}
|
| 33 |
+
start = src.index("def depth_model_is_metric")
|
| 34 |
+
end = src.index("\nENABLE_DEPTH", start)
|
| 35 |
+
exec(compile(src[start:end], "app.py", "exec"), ns)
|
| 36 |
+
import re
|
| 37 |
+
|
| 38 |
+
MODEL = re.search(r'depth_model_name",\s*\n(?:\s*#.*\n)*\s*"([^"]+)"', src).group(1)
|
| 39 |
+
depth_model_is_metric = ns["depth_model_is_metric"]
|
| 40 |
+
|
| 41 |
+
FLOOR_FRAC = 0.45 # treat the bottom 45% of the frame as floor-dominated
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def run_depth(img):
|
| 45 |
+
processor = run_depth.processor
|
| 46 |
+
model = run_depth.model
|
| 47 |
+
inputs = processor(images=img, return_tensors="pt")
|
| 48 |
+
with torch.no_grad():
|
| 49 |
+
out = model(**inputs)
|
| 50 |
+
depth = torch.nn.functional.interpolate(
|
| 51 |
+
out.predicted_depth.unsqueeze(1),
|
| 52 |
+
size=(img.height, img.width),
|
| 53 |
+
mode="bicubic",
|
| 54 |
+
align_corners=False,
|
| 55 |
+
).squeeze().numpy()
|
| 56 |
+
return cv2.GaussianBlur(depth.astype(np.float32), (0, 0), sigmaX=3)
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
def main():
|
| 60 |
+
photos = sys.argv[1:]
|
| 61 |
+
if not photos:
|
| 62 |
+
print(__doc__)
|
| 63 |
+
return 2
|
| 64 |
+
print(f"model: {MODEL}")
|
| 65 |
+
if not depth_model_is_metric(MODEL):
|
| 66 |
+
print("!! configured model is not metric — R1-1 not in effect")
|
| 67 |
+
return 1
|
| 68 |
+
|
| 69 |
+
print("loading checkpoint...")
|
| 70 |
+
run_depth.processor = AutoImageProcessor.from_pretrained(MODEL)
|
| 71 |
+
run_depth.model = AutoModelForDepthEstimation.from_pretrained(MODEL).eval()
|
| 72 |
+
|
| 73 |
+
ok = True
|
| 74 |
+
for path in photos:
|
| 75 |
+
img = Image.open(path).convert("RGB")
|
| 76 |
+
if max(img.size) > 1280:
|
| 77 |
+
s = 1280 / max(img.size)
|
| 78 |
+
img = img.resize((round(img.width * s), round(img.height * s)), Image.LANCZOS)
|
| 79 |
+
w, h = img.size
|
| 80 |
+
depth = run_depth(img)
|
| 81 |
+
|
| 82 |
+
floor = depth[int(h * (1 - FLOOR_FRAC)):, :]
|
| 83 |
+
p5, p95 = np.percentile(floor, 5), np.percentile(floor, 95)
|
| 84 |
+
range_ok = 0.3 <= p5 and p95 <= 20.0
|
| 85 |
+
|
| 86 |
+
# row-median inverse depth over the floor band; fit 1/Z = a*y + b
|
| 87 |
+
ys = np.arange(int(h * (1 - FLOOR_FRAC)), h)
|
| 88 |
+
inv = np.array([np.median(1.0 / np.maximum(depth[y], 0.05)) for y in ys])
|
| 89 |
+
a, b = np.polyfit(ys, inv, 1)
|
| 90 |
+
pred = a * ys + b
|
| 91 |
+
ss_res = float(np.sum((inv - pred) ** 2))
|
| 92 |
+
ss_tot = float(np.sum((inv - inv.mean()) ** 2)) + 1e-12
|
| 93 |
+
r2 = 1 - ss_res / ss_tot
|
| 94 |
+
|
| 95 |
+
focal = float(w) # P0 convention: f ~ image width
|
| 96 |
+
horizon_y = -b / a if abs(a) > 1e-12 else float("nan")
|
| 97 |
+
# exact ground-plane relation for a pitched camera:
|
| 98 |
+
# 1/Z = (sin(t)*f - cos(t)*y') / (h*f) -> h = cos(t) / (a*f)
|
| 99 |
+
# with pitch t recovered from the fitted horizon row.
|
| 100 |
+
pitch = np.arctan2(h / 2 - horizon_y, focal)
|
| 101 |
+
cam_h = float(np.cos(pitch) / (a * focal)) if a > 1e-9 else float("inf")
|
| 102 |
+
|
| 103 |
+
plane_ok = r2 >= 0.90 and a > 0
|
| 104 |
+
height_ok = 0.7 <= cam_h <= 2.5
|
| 105 |
+
passed = range_ok and plane_ok and height_ok
|
| 106 |
+
ok &= passed
|
| 107 |
+
print(
|
| 108 |
+
f" [{'PASS' if passed else 'FAIL'}] {path.split('/')[-1]}: "
|
| 109 |
+
f"floor p5-p95 = {p5:.2f}-{p95:.2f} m | invZ-fit R2={r2:.3f} | "
|
| 110 |
+
f"camera height = {cam_h:.2f} m | horizon y = {horizon_y:.0f}/{h}"
|
| 111 |
+
)
|
| 112 |
+
if not range_ok:
|
| 113 |
+
print(" !! floor depth outside 0.3-20 m")
|
| 114 |
+
if not plane_ok:
|
| 115 |
+
print(" !! inverse depth not linear in row — not plane-consistent")
|
| 116 |
+
if not height_ok:
|
| 117 |
+
print(" !! camera height outside handheld band 0.7-2.5 m")
|
| 118 |
+
|
| 119 |
+
print("\n" + ("ALL R1-1 METRIC CHECKS PASSED" if ok else "R1-1 METRIC CHECKS FAILED"))
|
| 120 |
+
return 0 if ok else 1
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
if __name__ == "__main__":
|
| 124 |
+
raise SystemExit(main())
|
verify_r1_plane_sim.py
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""R1-2 — depth-based plane-fit fallback certification (CI-safe: analytic
|
| 2 |
+
depth, no torch).
|
| 3 |
+
|
| 4 |
+
Reuses the exact pinhole ground-plane scene from verify_r1_scale_sim (camera
|
| 5 |
+
1.5 m up, pitch 25 deg, f = image width) and runs the REAL
|
| 6 |
+
plane_homography_from_depth from app.py.
|
| 7 |
+
|
| 8 |
+
Checks:
|
| 9 |
+
1. engages: returns a homography + plane on a clean metric ground plane
|
| 10 |
+
2. metric by construction: the certified R1-3 estimator measures
|
| 11 |
+
metersPerUnit ~ 1 on the produced homography (within 2%)
|
| 12 |
+
3. shear-free and true-to-size: a known 1 m ground square maps to a
|
| 13 |
+
1 x 1 plane square with right angles (sides within 2%, angle within 2 deg)
|
| 14 |
+
4. orientation: plane-y grows toward the camera (near field), plane-x
|
| 15 |
+
image-right — the bundle convention the frontend assumes
|
| 16 |
+
5. rejection: non-planar depth (a dome) returns None
|
| 17 |
+
6. rejection: relative (normalised) depth returns None
|
| 18 |
+
"""
|
| 19 |
+
|
| 20 |
+
import cv2
|
| 21 |
+
import numpy as np
|
| 22 |
+
|
| 23 |
+
from verify_r1_scale_sim import CAM_H, F, H as IMG_H, PITCH, W as IMG_W, scene
|
| 24 |
+
|
| 25 |
+
# --- real implementations from app.py ----------------------------------------
|
| 26 |
+
src = open("app.py").read()
|
| 27 |
+
ns = {"np": np, "cv2": cv2, "depth_model_is_metric": lambda name=None: True}
|
| 28 |
+
for fn in ["estimate_meters_per_unit", "plane_homography_from_depth"]:
|
| 29 |
+
start = src.index(f"def {fn}")
|
| 30 |
+
end = src.index("\ndef ", start + 10)
|
| 31 |
+
exec(compile(src[start:end], "app.py", "exec"), ns)
|
| 32 |
+
plane_homography_from_depth = ns["plane_homography_from_depth"]
|
| 33 |
+
estimate_meters_per_unit = ns["estimate_meters_per_unit"]
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def to_plane(H, px, py):
|
| 37 |
+
den = H[2, 0] * px + H[2, 1] * py + H[2, 2]
|
| 38 |
+
return (
|
| 39 |
+
(H[0, 0] * px + H[0, 1] * py + H[0, 2]) / den,
|
| 40 |
+
(H[1, 0] * px + H[1, 1] * py + H[1, 2]) / den,
|
| 41 |
+
)
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def project_ground(x_w, fwd_w):
|
| 45 |
+
"""Image pixel of a world ground point — inverse of the scene mapping."""
|
| 46 |
+
# world -> camera: y_c, z_c from CAM_H/pitch; then u,v via pinhole
|
| 47 |
+
y_w = -CAM_H
|
| 48 |
+
z_c = np.cos(PITCH) * fwd_w - np.sin(PITCH) * y_w
|
| 49 |
+
y_c = -np.sin(PITCH) * fwd_w - np.cos(PITCH) * y_w
|
| 50 |
+
u = x_w / z_c * F + IMG_W / 2.0
|
| 51 |
+
v = y_c / z_c * F + IMG_H / 2.0
|
| 52 |
+
return u, v
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def main():
|
| 56 |
+
ok = True
|
| 57 |
+
mask, z, x_w, fwd_w = scene()
|
| 58 |
+
|
| 59 |
+
fitted = plane_homography_from_depth(z, mask, IMG_W, IMG_H)
|
| 60 |
+
if fitted is None:
|
| 61 |
+
print(" [FAIL] fallback did not engage on a clean metric ground plane")
|
| 62 |
+
print("\nR1-2 SIM CHECKS FAILED")
|
| 63 |
+
return 1
|
| 64 |
+
hom, plane = fitted
|
| 65 |
+
H = np.asarray(hom, np.float64).reshape(3, 3)
|
| 66 |
+
print(f" [PASS] engages: plane {plane['width']:.2f} x {plane['height']:.2f} m, "
|
| 67 |
+
f"source={plane.get('geometrySource')}")
|
| 68 |
+
|
| 69 |
+
mpu = estimate_meters_per_unit(z, mask, hom, IMG_W, IMG_H)
|
| 70 |
+
good = mpu is not None and abs(mpu - 1.0) <= 0.02
|
| 71 |
+
print(f" [{'PASS' if good else 'FAIL'}] metric: metersPerUnit = {mpu}")
|
| 72 |
+
ok &= good
|
| 73 |
+
|
| 74 |
+
# known 1m ground square in the near field, centred
|
| 75 |
+
cx_w = 0.0
|
| 76 |
+
f0 = CAM_H / np.tan(PITCH) * 0.9 # comfortably inside the visible floor
|
| 77 |
+
corners_w = [(cx_w - 0.5, f0), (cx_w + 0.5, f0), (cx_w + 0.5, f0 + 1.0), (cx_w - 0.5, f0 + 1.0)]
|
| 78 |
+
corners_p = []
|
| 79 |
+
for xw, fw in corners_w:
|
| 80 |
+
u, v = project_ground(xw, fw)
|
| 81 |
+
corners_p.append(to_plane(H, u, v))
|
| 82 |
+
corners_p = np.asarray(corners_p)
|
| 83 |
+
s1 = np.linalg.norm(corners_p[1] - corners_p[0])
|
| 84 |
+
s2 = np.linalg.norm(corners_p[2] - corners_p[1])
|
| 85 |
+
d1 = corners_p[1] - corners_p[0]
|
| 86 |
+
d2 = corners_p[2] - corners_p[1]
|
| 87 |
+
angle = np.degrees(np.arccos(abs(d1 @ d2) / (s1 * s2 + 1e-12)))
|
| 88 |
+
square_ok = abs(s1 - 1) <= 0.02 and abs(s2 - 1) <= 0.02 and angle >= 88.0
|
| 89 |
+
print(f" [{'PASS' if square_ok else 'FAIL'}] 1m square -> sides {s1:.3f} x {s2:.3f} m, "
|
| 90 |
+
f"corner angle {angle:.1f} deg")
|
| 91 |
+
ok &= square_ok
|
| 92 |
+
|
| 93 |
+
# orientation: nearer ground (smaller fwd) must have LARGER plane-y;
|
| 94 |
+
# world +x (image right) must have larger plane-x
|
| 95 |
+
u_near, v_near = project_ground(0.0, f0)
|
| 96 |
+
u_far, v_far = project_ground(0.0, f0 + 2.0)
|
| 97 |
+
_, b_near = to_plane(H, u_near, v_near)
|
| 98 |
+
_, b_far = to_plane(H, u_far, v_far)
|
| 99 |
+
u_r, v_r = project_ground(1.0, f0)
|
| 100 |
+
a_l, _ = to_plane(H, u_near, v_near)
|
| 101 |
+
a_r, _ = to_plane(H, u_r, v_r)
|
| 102 |
+
orient_ok = b_near > b_far and a_r > a_l
|
| 103 |
+
print(f" [{'PASS' if orient_ok else 'FAIL'}] orientation: near-y {b_near:.2f} > far-y {b_far:.2f}, "
|
| 104 |
+
f"right-x {a_r:.2f} > left-x {a_l:.2f}")
|
| 105 |
+
ok &= orient_ok
|
| 106 |
+
|
| 107 |
+
# rejection: dome instead of plane
|
| 108 |
+
yy, xx = np.mgrid[0:IMG_H, 0:IMG_W].astype(np.float64)
|
| 109 |
+
dome = (2.5 - 1.2 * np.exp(-(((xx - IMG_W / 2) / 300) ** 2 + ((yy - IMG_H / 2) / 220) ** 2))).astype(np.float32)
|
| 110 |
+
r_dome = plane_homography_from_depth(dome, mask, IMG_W, IMG_H)
|
| 111 |
+
print(f" [{'PASS' if r_dome is None else 'FAIL'}] rejection: dome depth -> {None if r_dome is None else 'accepted'}")
|
| 112 |
+
ok &= r_dome is None
|
| 113 |
+
|
| 114 |
+
# rejection: relative depth
|
| 115 |
+
ns["depth_model_is_metric"] = lambda name=None: False
|
| 116 |
+
r_rel = plane_homography_from_depth(z, mask, IMG_W, IMG_H)
|
| 117 |
+
ns["depth_model_is_metric"] = lambda name=None: True
|
| 118 |
+
print(f" [{'PASS' if r_rel is None else 'FAIL'}] rejection: relative depth -> {None if r_rel is None else 'accepted'}")
|
| 119 |
+
ok &= r_rel is None
|
| 120 |
+
|
| 121 |
+
print("\n" + ("ALL R1-2 SIM CHECKS PASSED" if ok else "R1-2 SIM CHECKS FAILED"))
|
| 122 |
+
return 0 if ok else 1
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
if __name__ == "__main__":
|
| 126 |
+
raise SystemExit(main())
|
verify_r1_scale.py
ADDED
|
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""R1-3 — metric scene scale certification (local harness; needs
|
| 2 |
+
torch+transformers, a reference bundle, and the photo it was converted from).
|
| 3 |
+
|
| 4 |
+
Validates estimate_meters_per_unit end-to-end:
|
| 5 |
+
|
| 6 |
+
1. mpu resolves (not None) on real rooms with metric depth
|
| 7 |
+
2. plausibility: the visible floor's physical bottom width (plane.width *
|
| 8 |
+
mpu) lands in 1.5-10 m
|
| 9 |
+
3. independence check: a 60 cm tile's on-screen size predicted two ways
|
| 10 |
+
must agree within 15%:
|
| 11 |
+
a) through the scale chain: 0.6/mpu plane units -> homography -> pixels
|
| 12 |
+
b) straight from depth: 0.6 * f / Z at the same image row
|
| 13 |
+
4. cross-room consistency: the same physical tile, the same prediction
|
| 14 |
+
logic, in every supplied room.
|
| 15 |
+
|
| 16 |
+
Usage:
|
| 17 |
+
python verify_r1_scale.py <bundle.json[.gz]>:<photo.jpg> [more pairs...]
|
| 18 |
+
"""
|
| 19 |
+
|
| 20 |
+
import base64
|
| 21 |
+
import gzip
|
| 22 |
+
import json
|
| 23 |
+
import sys
|
| 24 |
+
|
| 25 |
+
import cv2
|
| 26 |
+
import numpy as np
|
| 27 |
+
import torch
|
| 28 |
+
from PIL import Image
|
| 29 |
+
from transformers import AutoImageProcessor, AutoModelForDepthEstimation
|
| 30 |
+
|
| 31 |
+
# --- real implementations from app.py ---------------------------------------
|
| 32 |
+
src = open("app.py").read()
|
| 33 |
+
ns = {"np": np, "cv2": cv2}
|
| 34 |
+
start = src.index("def depth_model_is_metric")
|
| 35 |
+
end = src.index("\nENABLE_DEPTH", start)
|
| 36 |
+
exec(compile(src[start:end], "app.py", "exec"), ns)
|
| 37 |
+
ns["depth_model_is_metric"] = lambda name=None: True # harness always metric
|
| 38 |
+
start = src.index("def estimate_meters_per_unit")
|
| 39 |
+
end = src.index("\n# ---", start)
|
| 40 |
+
exec(compile(src[start:end], "app.py", "exec"), ns)
|
| 41 |
+
estimate_meters_per_unit = ns["estimate_meters_per_unit"]
|
| 42 |
+
|
| 43 |
+
import re
|
| 44 |
+
|
| 45 |
+
MODEL = re.search(r'depth_model_name",\s*\n(?:\s*#.*\n)*\s*"([^"]+)"', src).group(1)
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def load_bundle(path):
|
| 49 |
+
if path.endswith(".gz"):
|
| 50 |
+
with gzip.open(path, "rt") as f:
|
| 51 |
+
return json.load(f)
|
| 52 |
+
return json.load(open(path))
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def run_depth(img):
|
| 56 |
+
inputs = run_depth.processor(images=img, return_tensors="pt")
|
| 57 |
+
with torch.no_grad():
|
| 58 |
+
out = run_depth.model(**inputs)
|
| 59 |
+
depth = torch.nn.functional.interpolate(
|
| 60 |
+
out.predicted_depth.unsqueeze(1),
|
| 61 |
+
size=(img.height, img.width),
|
| 62 |
+
mode="bicubic",
|
| 63 |
+
align_corners=False,
|
| 64 |
+
).squeeze().numpy()
|
| 65 |
+
return cv2.GaussianBlur(depth.astype(np.float32), (0, 0), sigmaX=3)
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def main():
|
| 69 |
+
pairs = [a.split(":") for a in sys.argv[1:]]
|
| 70 |
+
if not pairs:
|
| 71 |
+
print(__doc__)
|
| 72 |
+
return 2
|
| 73 |
+
print(f"model: {MODEL}")
|
| 74 |
+
run_depth.processor = AutoImageProcessor.from_pretrained(MODEL)
|
| 75 |
+
run_depth.model = AutoModelForDepthEstimation.from_pretrained(MODEL).eval()
|
| 76 |
+
|
| 77 |
+
ok = True
|
| 78 |
+
for bundle_path, photo in pairs:
|
| 79 |
+
d = load_bundle(bundle_path)
|
| 80 |
+
w, h = d["width"], d["height"]
|
| 81 |
+
seg = max(d["segments"], key=lambda s: len(s["mask"]))
|
| 82 |
+
mask_idx = np.frombuffer(base64.b64decode(seg["mask"]), dtype=np.uint32)
|
| 83 |
+
mask = np.zeros(w * h, np.uint8)
|
| 84 |
+
mask[mask_idx] = 1
|
| 85 |
+
mask = mask.reshape(h, w)
|
| 86 |
+
H = np.asarray(seg["homography"], np.float64).reshape(3, 3)
|
| 87 |
+
|
| 88 |
+
img = Image.open(photo).convert("RGB").resize((w, h), Image.LANCZOS)
|
| 89 |
+
depth = run_depth(img)
|
| 90 |
+
|
| 91 |
+
mpu = estimate_meters_per_unit(depth, mask, seg["homography"], w, h)
|
| 92 |
+
if mpu is None:
|
| 93 |
+
# A clean fallback is acceptable: rooms on the synthetic-VP
|
| 94 |
+
# homography can't carry a trustworthy metric scale until R1-2;
|
| 95 |
+
# the engine then uses the heuristic repeat. FAIL is reserved for
|
| 96 |
+
# a returned-but-wrong scale (checked below).
|
| 97 |
+
print(f" [PASS] {photo.split('/')[-1]}: metersPerUnit = None "
|
| 98 |
+
f"(clean heuristic fallback — geometry not metric-trustworthy)")
|
| 99 |
+
continue
|
| 100 |
+
|
| 101 |
+
plane = seg["plane"]
|
| 102 |
+
floor_w_m = plane["width"] * mpu
|
| 103 |
+
width_ok = 1.5 <= floor_w_m <= 10.0
|
| 104 |
+
|
| 105 |
+
# independence check at a bottom-area floor row
|
| 106 |
+
ys, xs = np.nonzero(mask)
|
| 107 |
+
y_ref = int(np.percentile(ys, 92))
|
| 108 |
+
row_xs = xs[ys == y_ref]
|
| 109 |
+
x_ref = int(np.median(row_xs))
|
| 110 |
+
z_ref = float(depth[y_ref, x_ref])
|
| 111 |
+
f = float(w)
|
| 112 |
+
px_from_depth = f * 0.6 / z_ref
|
| 113 |
+
|
| 114 |
+
# map (0.6/mpu) plane units back through H^-1 at the same location
|
| 115 |
+
Hinv = np.linalg.inv(H)
|
| 116 |
+
den = H[2, 0] * x_ref + H[2, 1] * y_ref + H[2, 2]
|
| 117 |
+
px_p = (H[0, 0] * x_ref + H[0, 1] * y_ref + H[0, 2]) / den
|
| 118 |
+
py_p = (H[1, 0] * x_ref + H[1, 1] * y_ref + H[1, 2]) / den
|
| 119 |
+
|
| 120 |
+
def back(up, vp):
|
| 121 |
+
dz = Hinv[2, 0] * up + Hinv[2, 1] * vp + Hinv[2, 2]
|
| 122 |
+
return (
|
| 123 |
+
(Hinv[0, 0] * up + Hinv[0, 1] * vp + Hinv[0, 2]) / dz,
|
| 124 |
+
(Hinv[1, 0] * up + Hinv[1, 1] * vp + Hinv[1, 2]) / dz,
|
| 125 |
+
)
|
| 126 |
+
|
| 127 |
+
units = 0.6 / mpu
|
| 128 |
+
ax, ay = back(px_p - units / 2, py_p)
|
| 129 |
+
bx, by = back(px_p + units / 2, py_p)
|
| 130 |
+
px_from_chain = float(np.hypot(bx - ax, by - ay))
|
| 131 |
+
rel_err = abs(px_from_chain - px_from_depth) / px_from_depth
|
| 132 |
+
chain_ok = rel_err <= 0.15
|
| 133 |
+
|
| 134 |
+
passed = width_ok and chain_ok
|
| 135 |
+
ok &= passed
|
| 136 |
+
print(
|
| 137 |
+
f" [{'PASS' if passed else 'FAIL'}] {photo.split('/')[-1]}: "
|
| 138 |
+
f"mpu={mpu:.5f} m/unit | floor width = {floor_w_m:.2f} m | "
|
| 139 |
+
f"60cm tile @row{y_ref}: chain={px_from_chain:.0f}px vs depth={px_from_depth:.0f}px "
|
| 140 |
+
f"(err {rel_err * 100:.1f}%)"
|
| 141 |
+
)
|
| 142 |
+
if not width_ok:
|
| 143 |
+
print(" !! floor physical width implausible")
|
| 144 |
+
if not chain_ok:
|
| 145 |
+
print(" !! scale chain disagrees with direct depth prediction")
|
| 146 |
+
|
| 147 |
+
print("\n" + ("ALL R1-3 SCALE CHECKS PASSED" if ok else "R1-3 SCALE CHECKS FAILED"))
|
| 148 |
+
return 0 if ok else 1
|
| 149 |
+
|
| 150 |
+
|
| 151 |
+
if __name__ == "__main__":
|
| 152 |
+
raise SystemExit(main())
|
verify_r1_scale_sim.py
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""R1-3 — metric scale certification on an exact synthetic scene (CI-safe:
|
| 2 |
+
no torch, analytic depth).
|
| 3 |
+
|
| 4 |
+
Scene: pinhole camera (f = image width), height 1.5 m, pitch 25 deg, looking
|
| 5 |
+
at an infinite ground plane. Depth is computed analytically, the homography is
|
| 6 |
+
built exactly from four ground points with a known plane-unit scale, so the
|
| 7 |
+
true metersPerUnit is known in closed form.
|
| 8 |
+
|
| 9 |
+
Checks (real implementation extracted from app.py):
|
| 10 |
+
1. recovery: estimate_meters_per_unit returns the true scale within 2%
|
| 11 |
+
2. rejection: a sheared homography (the synthetic-VP failure mode) returns
|
| 12 |
+
None instead of a confidently-wrong scale
|
| 13 |
+
3. relative depth (normalised [0,1]) returns None (metric-only feature)
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
import cv2
|
| 17 |
+
import numpy as np
|
| 18 |
+
|
| 19 |
+
# --- real implementation from app.py ----------------------------------------
|
| 20 |
+
src = open("app.py").read()
|
| 21 |
+
ns = {"np": np, "cv2": cv2, "depth_model_is_metric": lambda name=None: True}
|
| 22 |
+
start = src.index("def estimate_meters_per_unit")
|
| 23 |
+
end = src.index("\n# ---", start)
|
| 24 |
+
exec(compile(src[start:end], "app.py", "exec"), ns)
|
| 25 |
+
estimate_meters_per_unit = ns["estimate_meters_per_unit"]
|
| 26 |
+
|
| 27 |
+
W, H = 800, 600
|
| 28 |
+
F = float(W)
|
| 29 |
+
CAM_H = 1.5
|
| 30 |
+
PITCH = np.deg2rad(25.0)
|
| 31 |
+
UNITS_PER_M = 200.0 # plane-unit scale baked into the homography
|
| 32 |
+
TRUE_MPU = 1.0 / UNITS_PER_M
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def scene():
|
| 36 |
+
cx, cy = W / 2.0, H / 2.0
|
| 37 |
+
u, v = np.meshgrid(np.arange(W, dtype=np.float64), np.arange(H, dtype=np.float64))
|
| 38 |
+
# ground plane: 1/Z = (sin(t) + cos(t) * (v - cy)/f) / h (v grows downward)
|
| 39 |
+
inv_z = (np.sin(PITCH) + np.cos(PITCH) * (v - cy) / F) / CAM_H
|
| 40 |
+
mask = inv_z > 1.0 / 30.0 # floor visible, within 30 m
|
| 41 |
+
z = np.where(mask, 1.0 / np.maximum(inv_z, 1e-9), 0.0)
|
| 42 |
+
|
| 43 |
+
# camera-frame 3D, then world ground coordinates. Camera pitched DOWN by
|
| 44 |
+
# PITCH, world y up, image v down: world_y = -cos*y_c - sin*z (must be
|
| 45 |
+
# exactly -CAM_H on the ground — asserted), forward = cos*z - sin*y_c.
|
| 46 |
+
x_c = z * (u - cx) / F
|
| 47 |
+
y_c = z * (v - cy) / F
|
| 48 |
+
world_y = -np.cos(PITCH) * y_c - np.sin(PITCH) * z
|
| 49 |
+
assert np.allclose(world_y[mask], -CAM_H, atol=1e-9), "sim geometry inconsistent"
|
| 50 |
+
x_w = x_c
|
| 51 |
+
fwd_w = np.cos(PITCH) * z - np.sin(PITCH) * y_c
|
| 52 |
+
return mask.astype(np.uint8), z.astype(np.float32), x_w, fwd_w
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def exact_homography(mask, x_w, fwd_w):
|
| 56 |
+
ys, xs = np.nonzero(mask)
|
| 57 |
+
# four well-spread ground points
|
| 58 |
+
picks = []
|
| 59 |
+
for fy, fx in [(0.95, 0.2), (0.95, 0.8), (0.55, 0.3), (0.55, 0.7)]:
|
| 60 |
+
yy = int(np.percentile(ys, fy * 100))
|
| 61 |
+
row = xs[ys == yy]
|
| 62 |
+
xx = int(np.percentile(row, fx * 100))
|
| 63 |
+
picks.append((xx, yy))
|
| 64 |
+
src_pts = np.float32(picks)
|
| 65 |
+
dst_pts = np.float32(
|
| 66 |
+
[[x_w[y, x] * UNITS_PER_M, fwd_w[y, x] * UNITS_PER_M] for x, y in picks]
|
| 67 |
+
)
|
| 68 |
+
return cv2.getPerspectiveTransform(src_pts, dst_pts)
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
def main():
|
| 72 |
+
ok = True
|
| 73 |
+
mask, z, x_w, fwd_w = scene()
|
| 74 |
+
Hm = exact_homography(mask, x_w, fwd_w)
|
| 75 |
+
|
| 76 |
+
mpu = estimate_meters_per_unit(z, mask, Hm.flatten().tolist(), W, H)
|
| 77 |
+
if mpu is None:
|
| 78 |
+
print(" [FAIL] recovery: returned None on exact scene")
|
| 79 |
+
ok = False
|
| 80 |
+
else:
|
| 81 |
+
err = abs(mpu - TRUE_MPU) / TRUE_MPU
|
| 82 |
+
good = err <= 0.02
|
| 83 |
+
print(f" [{'PASS' if good else 'FAIL'}] recovery: mpu={mpu:.6f} "
|
| 84 |
+
f"(true {TRUE_MPU:.6f}, err {err * 100:.2f}%)")
|
| 85 |
+
ok &= good
|
| 86 |
+
|
| 87 |
+
# synthetic-VP failure mode: progressive horizontal shear of plane coords
|
| 88 |
+
S = np.array([[1.0, 0.35, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]])
|
| 89 |
+
H_bad = S @ Hm
|
| 90 |
+
mpu_bad = estimate_meters_per_unit(z, mask, H_bad.flatten().tolist(), W, H)
|
| 91 |
+
print(f" [{'PASS' if mpu_bad is None else 'FAIL'}] rejection: sheared homography -> {mpu_bad}")
|
| 92 |
+
ok &= mpu_bad is None
|
| 93 |
+
|
| 94 |
+
rel = (z - z[mask > 0].min()) / (z[mask > 0].max() - z[mask > 0].min())
|
| 95 |
+
ns["depth_model_is_metric"] = lambda name=None: False
|
| 96 |
+
mpu_rel = estimate_meters_per_unit(rel.astype(np.float32), mask, Hm.flatten().tolist(), W, H)
|
| 97 |
+
print(f" [{'PASS' if mpu_rel is None else 'FAIL'}] relative depth -> {mpu_rel}")
|
| 98 |
+
ok &= mpu_rel is None
|
| 99 |
+
|
| 100 |
+
print("\n" + ("ALL R1-3 SIM CHECKS PASSED" if ok else "R1-3 SIM CHECKS FAILED"))
|
| 101 |
+
return 0 if ok else 1
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
if __name__ == "__main__":
|
| 105 |
+
raise SystemExit(main())
|
visualizer.gpu.toml
CHANGED
|
@@ -7,7 +7,7 @@ segmentation_model = "oneformer"
|
|
| 7 |
oneformer_model_name = "shi-labs/oneformer_ade20k_swin_large"
|
| 8 |
mask2former_model_name = "facebook/mask2former-swin-small-ade-semantic"
|
| 9 |
segformer_model_name = "nvidia/segformer-b2-finetuned-ade-512-512"
|
| 10 |
-
depth_model_name = "
|
| 11 |
intrinsic_model_version = "v2"
|
| 12 |
|
| 13 |
[runtime]
|
|
|
|
| 7 |
oneformer_model_name = "shi-labs/oneformer_ade20k_swin_large"
|
| 8 |
mask2former_model_name = "facebook/mask2former-swin-small-ade-semantic"
|
| 9 |
segformer_model_name = "nvidia/segformer-b2-finetuned-ade-512-512"
|
| 10 |
+
depth_model_name = "depth-anything/Depth-Anything-V2-Metric-Indoor-Small-hf"
|
| 11 |
intrinsic_model_version = "v2"
|
| 12 |
|
| 13 |
[runtime]
|
visualizer.hf.toml
CHANGED
|
@@ -5,7 +5,7 @@
|
|
| 5 |
[models]
|
| 6 |
segmentation_model = "oneformer"
|
| 7 |
oneformer_model_name = "shi-labs/oneformer_ade20k_swin_large"
|
| 8 |
-
depth_model_name = "
|
| 9 |
intrinsic_model_version = "v2"
|
| 10 |
|
| 11 |
[runtime]
|
|
|
|
| 5 |
[models]
|
| 6 |
segmentation_model = "oneformer"
|
| 7 |
oneformer_model_name = "shi-labs/oneformer_ade20k_swin_large"
|
| 8 |
+
depth_model_name = "depth-anything/Depth-Anything-V2-Metric-Indoor-Small-hf"
|
| 9 |
intrinsic_model_version = "v2"
|
| 10 |
|
| 11 |
[runtime]
|
visualizer.local.toml
CHANGED
|
@@ -5,7 +5,7 @@
|
|
| 5 |
[models]
|
| 6 |
segmentation_model = "segformer"
|
| 7 |
segformer_model_name = "nvidia/segformer-b2-finetuned-ade-512-512"
|
| 8 |
-
depth_model_name = "
|
| 9 |
|
| 10 |
[runtime]
|
| 11 |
enable_depth_estimation = false
|
|
|
|
| 5 |
[models]
|
| 6 |
segmentation_model = "segformer"
|
| 7 |
segformer_model_name = "nvidia/segformer-b2-finetuned-ade-512-512"
|
| 8 |
+
depth_model_name = "depth-anything/Depth-Anything-V2-Metric-Indoor-Small-hf"
|
| 9 |
|
| 10 |
[runtime]
|
| 11 |
enable_depth_estimation = false
|
visualizer.segformer.toml
CHANGED
|
@@ -6,7 +6,7 @@
|
|
| 6 |
[models]
|
| 7 |
segmentation_model = "segformer"
|
| 8 |
segformer_model_name = "nvidia/segformer-b2-finetuned-ade-512-512"
|
| 9 |
-
depth_model_name = "
|
| 10 |
|
| 11 |
[runtime]
|
| 12 |
enable_depth_estimation = false
|
|
|
|
| 6 |
[models]
|
| 7 |
segmentation_model = "segformer"
|
| 8 |
segformer_model_name = "nvidia/segformer-b2-finetuned-ade-512-512"
|
| 9 |
+
depth_model_name = "depth-anything/Depth-Anything-V2-Metric-Indoor-Small-hf"
|
| 10 |
|
| 11 |
[runtime]
|
| 12 |
enable_depth_estimation = false
|