Spaces:
Sleeping
Sleeping
Arthur Niu commited on
Commit ·
be93391
1
Parent(s): cc1c482
front facet with architectural chart
Browse files- app.py +89 -116
- requirements.txt +3 -3
app.py
CHANGED
|
@@ -26,17 +26,17 @@ sam_model = SamModel.from_pretrained(SAM_ID).to(DEVICE)
|
|
| 26 |
|
| 27 |
|
| 28 |
# -------------------------
|
| 29 |
-
#
|
| 30 |
# -------------------------
|
| 31 |
def _ensure_2d_mask(mask) -> np.ndarray:
|
| 32 |
-
"""Normalize mask to 2D uint8 {0,1}."""
|
| 33 |
if torch.is_tensor(mask):
|
| 34 |
mask = mask.detach().cpu().numpy()
|
| 35 |
mask = np.array(mask)
|
| 36 |
mask = np.squeeze(mask)
|
| 37 |
|
| 38 |
if mask.ndim == 3:
|
| 39 |
-
# (N,H,W) ->
|
| 40 |
if mask.shape[0] <= 16 and mask.shape[1] > 32 and mask.shape[2] > 32:
|
| 41 |
mask = mask[0]
|
| 42 |
else:
|
|
@@ -50,7 +50,7 @@ def _ensure_2d_mask(mask) -> np.ndarray:
|
|
| 50 |
|
| 51 |
|
| 52 |
def _clean_mask(mask01: np.ndarray) -> np.ndarray:
|
| 53 |
-
"""Light morphology cleanup."""
|
| 54 |
mask01 = _ensure_2d_mask(mask01)
|
| 55 |
m = np.ascontiguousarray((mask01 * 255).astype(np.uint8))
|
| 56 |
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (7, 7))
|
|
@@ -59,25 +59,32 @@ def _clean_mask(mask01: np.ndarray) -> np.ndarray:
|
|
| 59 |
return (m > 0).astype(np.uint8)
|
| 60 |
|
| 61 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
def _warp_with_bounds(img: np.ndarray, H: np.ndarray, border_value=(255, 255, 255), interp=cv2.INTER_LINEAR):
|
| 63 |
"""
|
| 64 |
-
Warp with automatic
|
| 65 |
Returns (warped_img, H_out) where H_out includes translation.
|
| 66 |
"""
|
| 67 |
h, w = img.shape[:2]
|
| 68 |
-
corners = np.array(
|
| 69 |
-
[[0, 0], [w - 1, 0], [w - 1, h - 1], [0, h - 1]],
|
| 70 |
-
dtype=np.float32,
|
| 71 |
-
)
|
| 72 |
corners_h = cv2.perspectiveTransform(corners.reshape(-1, 1, 2), H).reshape(-1, 2)
|
| 73 |
|
| 74 |
min_xy = corners_h.min(axis=0)
|
| 75 |
max_xy = corners_h.max(axis=0)
|
| 76 |
-
|
| 77 |
min_x, min_y = float(min_xy[0]), float(min_xy[1])
|
| 78 |
max_x, max_y = float(max_xy[0]), float(max_xy[1])
|
| 79 |
|
| 80 |
-
# Translate so all coords are positive
|
| 81 |
tx = -min_x if min_x < 0 else 0.0
|
| 82 |
ty = -min_y if min_y < 0 else 0.0
|
| 83 |
|
|
@@ -102,7 +109,7 @@ def _warp_with_bounds(img: np.ndarray, H: np.ndarray, border_value=(255, 255, 25
|
|
| 102 |
# Detection + segmentation
|
| 103 |
# -------------------------
|
| 104 |
def _detect_building_box(pil_img: Image.Image, box_threshold=0.35, text_threshold=0.25) -> np.ndarray:
|
| 105 |
-
"""Grounding DINO
|
| 106 |
text_labels = [["a building", "a facade", "a house"]]
|
| 107 |
inputs = dino_processor(images=pil_img, text=text_labels, return_tensors="pt").to(DEVICE)
|
| 108 |
|
|
@@ -126,7 +133,7 @@ def _detect_building_box(pil_img: Image.Image, box_threshold=0.35, text_threshol
|
|
| 126 |
|
| 127 |
|
| 128 |
def _segment_box_mask(pil_img: Image.Image, box_xyxy: np.ndarray) -> np.ndarray:
|
| 129 |
-
"""SAM
|
| 130 |
input_boxes = [[[float(box_xyxy[0]), float(box_xyxy[1]), float(box_xyxy[2]), float(box_xyxy[3])]]]
|
| 131 |
inputs = sam_processor(images=pil_img, input_boxes=input_boxes, return_tensors="pt").to(DEVICE)
|
| 132 |
|
|
@@ -143,7 +150,6 @@ def _segment_box_mask(pil_img: Image.Image, box_xyxy: np.ndarray) -> np.ndarray:
|
|
| 143 |
if torch.is_tensor(m):
|
| 144 |
m = m.detach().cpu().numpy()
|
| 145 |
m = np.array(m)
|
| 146 |
-
|
| 147 |
if m.ndim >= 3:
|
| 148 |
m = m[0]
|
| 149 |
|
|
@@ -169,7 +175,7 @@ def _draw_outline_on_image(rgb_img: np.ndarray, mask01: np.ndarray, thickness: i
|
|
| 169 |
|
| 170 |
|
| 171 |
# -------------------------
|
| 172 |
-
# Option A
|
| 173 |
# -------------------------
|
| 174 |
def architectural_chart(
|
| 175 |
rgb_img: np.ndarray,
|
|
@@ -231,8 +237,20 @@ def architectural_chart(
|
|
| 231 |
|
| 232 |
|
| 233 |
# -------------------------
|
| 234 |
-
#
|
| 235 |
# -------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 236 |
def _extract_lines_lsd(rgb_img: np.ndarray, mask01: np.ndarray, min_len: float = 40.0):
|
| 237 |
"""
|
| 238 |
Extract line segments with LSD, keep those whose midpoints are inside mask.
|
|
@@ -242,7 +260,7 @@ def _extract_lines_lsd(rgb_img: np.ndarray, mask01: np.ndarray, min_len: float =
|
|
| 242 |
gray = cv2.cvtColor(rgb_img, cv2.COLOR_RGB2GRAY)
|
| 243 |
gray = cv2.GaussianBlur(gray, (3, 3), 0)
|
| 244 |
|
| 245 |
-
lsd =
|
| 246 |
detected = lsd.detect(gray)[0]
|
| 247 |
if detected is None:
|
| 248 |
return []
|
|
@@ -265,15 +283,13 @@ def _extract_lines_lsd(rgb_img: np.ndarray, mask01: np.ndarray, min_len: float =
|
|
| 265 |
if mask01[my, mx] == 0:
|
| 266 |
continue
|
| 267 |
|
| 268 |
-
# Homogeneous line from two points: p1 x p2
|
| 269 |
p1 = np.array([x1, y1, 1.0], dtype=np.float32)
|
| 270 |
p2 = np.array([x2, y2, 1.0], dtype=np.float32)
|
| 271 |
l = np.cross(p1, p2) # (a,b,c)
|
| 272 |
norm = float(np.hypot(l[0], l[1]))
|
| 273 |
if norm < 1e-6:
|
| 274 |
continue
|
| 275 |
-
|
| 276 |
-
lines_h.append(l)
|
| 277 |
|
| 278 |
return lines_h
|
| 279 |
|
|
@@ -282,24 +298,15 @@ def _intersection_of_lines(l1, l2):
|
|
| 282 |
p = np.cross(l1, l2)
|
| 283 |
if abs(float(p[2])) < 1e-6:
|
| 284 |
return None
|
| 285 |
-
return (p / p[2]).astype(np.float32) #
|
| 286 |
|
| 287 |
|
| 288 |
def _fit_vanishing_point_ransac(lines, iters=800, dist_thresh=3.0, min_inliers=12):
|
| 289 |
-
"""
|
| 290 |
-
RANSAC for vanishing point:
|
| 291 |
-
- sample 2 lines -> intersection point
|
| 292 |
-
- score by distance to lines
|
| 293 |
-
- refine with SVD on inliers: find vp minimizing ||A vp|| with vp[2]=1 after normalization
|
| 294 |
-
"""
|
| 295 |
if len(lines) < 2:
|
| 296 |
return None, None
|
| 297 |
|
| 298 |
lines = [np.asarray(l, dtype=np.float32) for l in lines]
|
| 299 |
-
best_vp = None
|
| 300 |
-
best_inliers = None
|
| 301 |
-
best_count = 0
|
| 302 |
-
|
| 303 |
rng = np.random.default_rng(0)
|
| 304 |
|
| 305 |
for _ in range(iters):
|
|
@@ -310,8 +317,7 @@ def _fit_vanishing_point_ransac(lines, iters=800, dist_thresh=3.0, min_inliers=1
|
|
| 310 |
if vp is None:
|
| 311 |
continue
|
| 312 |
|
| 313 |
-
|
| 314 |
-
errs = [abs(float(l @ vp)) for l in lines]
|
| 315 |
inliers = [k for k, e in enumerate(errs) if e < dist_thresh]
|
| 316 |
if len(inliers) > best_count:
|
| 317 |
best_count = len(inliers)
|
|
@@ -321,29 +327,25 @@ def _fit_vanishing_point_ransac(lines, iters=800, dist_thresh=3.0, min_inliers=1
|
|
| 321 |
if best_vp is None or best_inliers is None or best_count < min_inliers:
|
| 322 |
return None, None
|
| 323 |
|
| 324 |
-
# refine with SVD: stack inlier lines as A, solve A vp = 0
|
| 325 |
A = np.stack([lines[k] for k in best_inliers], axis=0).astype(np.float32)
|
| 326 |
-
# vp is right singular vector corresponding to smallest singular value
|
| 327 |
_, _, Vt = np.linalg.svd(A)
|
| 328 |
vp = Vt[-1, :]
|
| 329 |
if abs(float(vp[2])) < 1e-6:
|
| 330 |
return None, None
|
| 331 |
vp = (vp / vp[2]).astype(np.float32)
|
| 332 |
-
|
| 333 |
return vp, best_inliers
|
| 334 |
|
| 335 |
|
| 336 |
-
def _split_lines_by_orientation(lines
|
| 337 |
"""
|
| 338 |
-
Split
|
| 339 |
-
For
|
| 340 |
"""
|
| 341 |
horiz, vert = [], []
|
| 342 |
for l in lines:
|
| 343 |
a, b, _ = map(float, l)
|
| 344 |
dx, dy = b, -a
|
| 345 |
-
ang = (np.degrees(np.arctan2(dy, dx)) + 180.0) % 180.0
|
| 346 |
-
# near 0/180 => horizontal direction; near 90 => vertical direction
|
| 347 |
if ang < 25 or ang > 155:
|
| 348 |
horiz.append(l)
|
| 349 |
elif 65 < ang < 115:
|
|
@@ -353,9 +355,8 @@ def _split_lines_by_orientation(lines, prefer="hv"):
|
|
| 353 |
|
| 354 |
def _affine_H_from_vanishing_line(l):
|
| 355 |
"""
|
| 356 |
-
|
| 357 |
H = [[1,0,0],[0,1,0],[l1/l3, l2/l3, 1]]
|
| 358 |
-
This sends vanishing line to infinity.
|
| 359 |
"""
|
| 360 |
l = np.asarray(l, dtype=np.float32)
|
| 361 |
if abs(float(l[2])) < 1e-6:
|
|
@@ -366,10 +367,9 @@ def _affine_H_from_vanishing_line(l):
|
|
| 366 |
|
| 367 |
def _dominant_directions_from_lines(lines):
|
| 368 |
"""
|
| 369 |
-
|
| 370 |
-
direction vectors u (horizontal-ish) and v (vertical-ish) as unit vectors.
|
| 371 |
"""
|
| 372 |
-
if len(lines) <
|
| 373 |
return None, None
|
| 374 |
|
| 375 |
horiz, vert = _split_lines_by_orientation(lines)
|
|
@@ -378,12 +378,12 @@ def _dominant_directions_from_lines(lines):
|
|
| 378 |
vecs = []
|
| 379 |
for l in line_list:
|
| 380 |
a, b, _ = map(float, l)
|
| 381 |
-
dx, dy = b, -a
|
| 382 |
-
n = (dx
|
| 383 |
if n < 1e-6:
|
| 384 |
continue
|
| 385 |
dx, dy = dx / n, dy / n
|
| 386 |
-
#
|
| 387 |
if mode == "h":
|
| 388 |
if dx < 0:
|
| 389 |
dx, dy = -dx, -dy
|
|
@@ -391,8 +391,10 @@ def _dominant_directions_from_lines(lines):
|
|
| 391 |
if dy < 0:
|
| 392 |
dx, dy = -dx, -dy
|
| 393 |
vecs.append([dx, dy])
|
|
|
|
| 394 |
if len(vecs) < 2:
|
| 395 |
return None
|
|
|
|
| 396 |
v = np.mean(np.array(vecs, dtype=np.float32), axis=0)
|
| 397 |
n = float(np.hypot(v[0], v[1]))
|
| 398 |
if n < 1e-6:
|
|
@@ -406,23 +408,20 @@ def _dominant_directions_from_lines(lines):
|
|
| 406 |
|
| 407 |
def _front_facade_rectify(rgb_img: np.ndarray, mask01: np.ndarray):
|
| 408 |
"""
|
| 409 |
-
|
| 410 |
-
1)
|
| 411 |
-
2) estimate
|
| 412 |
-
3)
|
| 413 |
-
4)
|
| 414 |
-
|
| 415 |
-
|
| 416 |
-
Returns (rectified_rgb, rectified_mask01, debug_overlay_rgb)
|
| 417 |
-
If fails, returns (None, None, debug_overlay_rgb).
|
| 418 |
"""
|
| 419 |
mask01 = _clean_mask(mask01)
|
| 420 |
-
lines = _extract_lines_lsd(rgb_img, mask01, min_len=40.0)
|
| 421 |
|
| 422 |
debug = rgb_img.copy()
|
| 423 |
-
# Draw mask outline on debug
|
| 424 |
debug = _draw_outline_on_image(debug, mask01, thickness=2)
|
| 425 |
|
|
|
|
| 426 |
if len(lines) < 10:
|
| 427 |
return None, None, debug
|
| 428 |
|
|
@@ -430,34 +429,31 @@ def _front_facade_rectify(rgb_img: np.ndarray, mask01: np.ndarray):
|
|
| 430 |
if len(horiz) < 4 or len(vert) < 4:
|
| 431 |
return None, None, debug
|
| 432 |
|
| 433 |
-
vp_h,
|
| 434 |
-
vp_v,
|
| 435 |
if vp_h is None or vp_v is None:
|
| 436 |
return None, None, debug
|
| 437 |
|
| 438 |
-
# Vanishing line of the facade plane
|
| 439 |
van_line = np.cross(vp_h, vp_v).astype(np.float32)
|
| 440 |
H_aff = _affine_H_from_vanishing_line(van_line)
|
| 441 |
if H_aff is None:
|
| 442 |
return None, None, debug
|
| 443 |
|
| 444 |
-
#
|
| 445 |
bgr = cv2.cvtColor(rgb_img, cv2.COLOR_RGB2BGR)
|
| 446 |
-
aff_bgr,
|
| 447 |
aff_rgb = cv2.cvtColor(aff_bgr, cv2.COLOR_BGR2RGB)
|
| 448 |
|
| 449 |
mask255 = (mask01 * 255).astype(np.uint8)
|
| 450 |
aff_mask255, _ = _warp_with_bounds(mask255, H_aff, border_value=0, interp=cv2.INTER_NEAREST)
|
| 451 |
aff_mask01 = (aff_mask255 > 0).astype(np.uint8)
|
| 452 |
|
| 453 |
-
# In affine space,
|
| 454 |
aff_lines = _extract_lines_lsd(aff_rgb, aff_mask01, min_len=40.0)
|
| 455 |
u, v = _dominant_directions_from_lines(aff_lines)
|
| 456 |
if u is None or v is None:
|
| 457 |
return None, None, debug
|
| 458 |
|
| 459 |
-
# Linear transform that maps u->x axis and v->y axis:
|
| 460 |
-
# M = [u v] (2x2); A = inv(M)
|
| 461 |
M2 = np.array([[u[0], v[0]], [u[1], v[1]]], dtype=np.float32)
|
| 462 |
if abs(float(np.linalg.det(M2))) < 1e-6:
|
| 463 |
return None, None, debug
|
|
@@ -468,7 +464,6 @@ def _front_facade_rectify(rgb_img: np.ndarray, mask01: np.ndarray):
|
|
| 468 |
dtype=np.float32,
|
| 469 |
)
|
| 470 |
|
| 471 |
-
# Apply linear transform with bounds to avoid cropping
|
| 472 |
aff_bgr2 = cv2.cvtColor(aff_rgb, cv2.COLOR_RGB2BGR)
|
| 473 |
rect_bgr, _ = _warp_with_bounds(aff_bgr2, H_lin, border_value=(255, 255, 255), interp=cv2.INTER_LINEAR)
|
| 474 |
rect_rgb = cv2.cvtColor(rect_bgr, cv2.COLOR_BGR2RGB)
|
|
@@ -476,12 +471,12 @@ def _front_facade_rectify(rgb_img: np.ndarray, mask01: np.ndarray):
|
|
| 476 |
rect_mask255, _ = _warp_with_bounds(aff_mask255, H_lin, border_value=0, interp=cv2.INTER_NEAREST)
|
| 477 |
rect_mask01 = (rect_mask255 > 0).astype(np.uint8)
|
| 478 |
|
| 479 |
-
# Debug:
|
| 480 |
def _draw_vp(img, vp, label):
|
| 481 |
x, y = float(vp[0]), float(vp[1])
|
| 482 |
-
|
| 483 |
-
x_cl = int(np.clip(x, -2 *
|
| 484 |
-
y_cl = int(np.clip(y, -2 *
|
| 485 |
cv2.circle(img, (x_cl, y_cl), 10, (255, 255, 255), -1)
|
| 486 |
cv2.putText(img, label, (x_cl + 12, y_cl + 12), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2)
|
| 487 |
|
|
@@ -496,7 +491,6 @@ def _front_facade_rectify(rgb_img: np.ndarray, mask01: np.ndarray):
|
|
| 496 |
# Fallback: full-building quad from mask contour
|
| 497 |
# -------------------------
|
| 498 |
def _fitline_to_abc(points_xy: np.ndarray):
|
| 499 |
-
"""Fit line via cv2.fitLine, return (a,b,c) for ax + by = c."""
|
| 500 |
pts = points_xy.astype(np.float32).reshape(-1, 1, 2)
|
| 501 |
vx, vy, x0, y0 = cv2.fitLine(pts, cv2.DIST_L2, 0, 0.01, 0.01).reshape(-1)
|
| 502 |
a = -vy
|
|
@@ -516,36 +510,25 @@ def _intersect_lines_abc(l1, l2):
|
|
| 516 |
return np.array([x, y], dtype=np.float32)
|
| 517 |
|
| 518 |
|
| 519 |
-
def _expand_corners(corners: np.ndarray, scale: float = 0.
|
| 520 |
corners = corners.astype(np.float32)
|
| 521 |
center = corners.mean(axis=0, keepdims=True)
|
| 522 |
return (center + (corners - center) * (1.0 + float(scale))).astype(np.float32)
|
| 523 |
|
| 524 |
|
| 525 |
-
def _order_points(pts4: np.ndarray) -> np.ndarray:
|
| 526 |
-
pts4 = np.asarray(pts4, dtype=np.float32)
|
| 527 |
-
s = pts4.sum(axis=1)
|
| 528 |
-
d = pts4[:, 0] - pts4[:, 1]
|
| 529 |
-
tl = pts4[np.argmin(s)]
|
| 530 |
-
br = pts4[np.argmax(s)]
|
| 531 |
-
tr = pts4[np.argmax(d)]
|
| 532 |
-
bl = pts4[np.argmin(d)]
|
| 533 |
-
return np.array([tl, tr, br, bl], dtype=np.float32)
|
| 534 |
-
|
| 535 |
-
|
| 536 |
def _mask_to_full_building_corners(mask01: np.ndarray, band_frac: float = 0.12, expand: float = 0.06) -> np.ndarray:
|
| 537 |
-
"""
|
| 538 |
-
Full-building quadrilateral corners from outer contour bands (works when facade is approximately planar).
|
| 539 |
-
"""
|
| 540 |
mask01 = _clean_mask(mask01)
|
| 541 |
h, w = mask01.shape
|
|
|
|
| 542 |
mask255 = np.ascontiguousarray((mask01 * 255).astype(np.uint8))
|
| 543 |
cnts, _ = cv2.findContours(mask255, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
| 544 |
if not cnts:
|
| 545 |
raise ValueError("Mask is empty (no contours).")
|
|
|
|
| 546 |
cnt = max(cnts, key=cv2.contourArea)
|
| 547 |
if cv2.contourArea(cnt) < 500:
|
| 548 |
raise ValueError("Mask too small to infer corners.")
|
|
|
|
| 549 |
pts = cnt.reshape(-1, 2).astype(np.float32)
|
| 550 |
|
| 551 |
x_min, y_min = pts.min(axis=0)
|
|
@@ -587,10 +570,6 @@ def _mask_to_full_building_corners(mask01: np.ndarray, band_frac: float = 0.12,
|
|
| 587 |
|
| 588 |
|
| 589 |
def _rectify_by_quad(rgb_img: np.ndarray, mask01: np.ndarray, band_frac=0.12, expand=0.06):
|
| 590 |
-
"""
|
| 591 |
-
Fallback rectification: compute 4 corners from mask contour -> warp to rectangle.
|
| 592 |
-
Uses warp_with_bounds (no crop).
|
| 593 |
-
"""
|
| 594 |
corners = _mask_to_full_building_corners(mask01, band_frac=band_frac, expand=expand)
|
| 595 |
|
| 596 |
(tl, tr, br, bl) = corners
|
|
@@ -598,16 +577,14 @@ def _rectify_by_quad(rgb_img: np.ndarray, mask01: np.ndarray, band_frac=0.12, ex
|
|
| 598 |
wB = np.linalg.norm(tr - tl)
|
| 599 |
hA = np.linalg.norm(tr - br)
|
| 600 |
hB = np.linalg.norm(tl - bl)
|
| 601 |
-
out_w = int(max(wA, wB))
|
| 602 |
-
out_h = int(max(hA, hB))
|
| 603 |
-
out_w = max(out_w, 200)
|
| 604 |
-
out_h = max(out_h, 200)
|
| 605 |
|
| 606 |
dst = np.array([[0, 0], [out_w - 1, 0], [out_w - 1, out_h - 1], [0, out_h - 1]], dtype=np.float32)
|
| 607 |
H = cv2.getPerspectiveTransform(corners, dst).astype(np.float32)
|
| 608 |
|
| 609 |
bgr = cv2.cvtColor(rgb_img, cv2.COLOR_RGB2BGR)
|
| 610 |
-
warped_bgr,
|
| 611 |
warped_rgb = cv2.cvtColor(warped_bgr, cv2.COLOR_BGR2RGB)
|
| 612 |
|
| 613 |
mask255 = (mask01 * 255).astype(np.uint8)
|
|
@@ -640,10 +617,11 @@ def straighten_and_chart(
|
|
| 640 |
add_grid=False,
|
| 641 |
):
|
| 642 |
"""
|
| 643 |
-
Goal: produce a better "front façade" even when the photo is taken off-angle.
|
| 644 |
Strategy:
|
| 645 |
-
-
|
|
|
|
| 646 |
- Fallback to full-building quad rectification
|
|
|
|
| 647 |
"""
|
| 648 |
if image_np is None:
|
| 649 |
raise ValueError("Please upload an image.")
|
|
@@ -669,22 +647,21 @@ def straighten_and_chart(
|
|
| 669 |
mask01 = _segment_box_mask(pil, box)
|
| 670 |
mask01 = _clean_mask(mask01)
|
| 671 |
|
| 672 |
-
#
|
| 673 |
original_outlined = _draw_outline_on_image(image_np, mask01, thickness=int(outline_thickness))
|
| 674 |
|
| 675 |
-
# Try
|
| 676 |
rect_rgb, rect_mask01, dbg = _front_facade_rectify(rgb_full, mask01)
|
| 677 |
|
| 678 |
-
# Fallback
|
| 679 |
if rect_rgb is None or rect_mask01 is None:
|
| 680 |
rect_rgb, rect_mask01, dbg2 = _rectify_by_quad(rgb_full, mask01, band_frac=0.12, expand=0.06)
|
| 681 |
-
# prefer VP debug if it exists; otherwise use quad debug
|
| 682 |
dbg = dbg if dbg is not None else dbg2
|
| 683 |
|
| 684 |
-
#
|
| 685 |
straightened_outlined = _draw_outline_on_image(rect_rgb, rect_mask01, thickness=int(outline_thickness))
|
| 686 |
|
| 687 |
-
#
|
| 688 |
chart = architectural_chart(
|
| 689 |
rect_rgb,
|
| 690 |
mode=str(chart_mode),
|
|
@@ -700,16 +677,13 @@ def straighten_and_chart(
|
|
| 700 |
# Mask preview
|
| 701 |
mask_rgb = np.stack([mask01 * 255] * 3, axis=-1).astype(np.uint8)
|
| 702 |
|
| 703 |
-
# Debug:
|
| 704 |
debug = image_np.copy()
|
| 705 |
x1i, y1i, x2i, y2i = map(int, box)
|
| 706 |
cv2.rectangle(debug, (x1i, y1i), (x2i, y2i), (255, 255, 255), 2)
|
| 707 |
-
|
| 708 |
-
if dbg is not None:
|
| 709 |
-
|
| 710 |
-
# If dbg size differs, just skip blending to avoid distortion.
|
| 711 |
-
if dbg.shape[:2] == debug.shape[:2]:
|
| 712 |
-
debug = cv2.addWeighted(debug, 0.70, dbg, 0.30, 0)
|
| 713 |
|
| 714 |
return chart, straightened_outlined, original_outlined, debug, mask_rgb
|
| 715 |
|
|
@@ -740,9 +714,8 @@ demo = gr.Interface(
|
|
| 740 |
],
|
| 741 |
title="Auto Building Front-Façade Rectifier + Architectural Chart",
|
| 742 |
description=(
|
| 743 |
-
"
|
| 744 |
-
"
|
| 745 |
-
"with a full-building contour-based fallback. Warps use auto-bounds to reduce cropping."
|
| 746 |
),
|
| 747 |
)
|
| 748 |
|
|
|
|
| 26 |
|
| 27 |
|
| 28 |
# -------------------------
|
| 29 |
+
# Mask + geometry helpers
|
| 30 |
# -------------------------
|
| 31 |
def _ensure_2d_mask(mask) -> np.ndarray:
|
| 32 |
+
"""Normalize any mask variant to 2D uint8 {0,1}."""
|
| 33 |
if torch.is_tensor(mask):
|
| 34 |
mask = mask.detach().cpu().numpy()
|
| 35 |
mask = np.array(mask)
|
| 36 |
mask = np.squeeze(mask)
|
| 37 |
|
| 38 |
if mask.ndim == 3:
|
| 39 |
+
# (N,H,W) -> first; (H,W,C) -> first channel
|
| 40 |
if mask.shape[0] <= 16 and mask.shape[1] > 32 and mask.shape[2] > 32:
|
| 41 |
mask = mask[0]
|
| 42 |
else:
|
|
|
|
| 50 |
|
| 51 |
|
| 52 |
def _clean_mask(mask01: np.ndarray) -> np.ndarray:
|
| 53 |
+
"""Light morphology cleanup for stability."""
|
| 54 |
mask01 = _ensure_2d_mask(mask01)
|
| 55 |
m = np.ascontiguousarray((mask01 * 255).astype(np.uint8))
|
| 56 |
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (7, 7))
|
|
|
|
| 59 |
return (m > 0).astype(np.uint8)
|
| 60 |
|
| 61 |
|
| 62 |
+
def _order_points(pts4: np.ndarray) -> np.ndarray:
|
| 63 |
+
"""Order 4 points as TL, TR, BR, BL."""
|
| 64 |
+
pts4 = np.asarray(pts4, dtype=np.float32)
|
| 65 |
+
s = pts4.sum(axis=1)
|
| 66 |
+
d = pts4[:, 0] - pts4[:, 1]
|
| 67 |
+
tl = pts4[np.argmin(s)]
|
| 68 |
+
br = pts4[np.argmax(s)]
|
| 69 |
+
tr = pts4[np.argmax(d)]
|
| 70 |
+
bl = pts4[np.argmin(d)]
|
| 71 |
+
return np.array([tl, tr, br, bl], dtype=np.float32)
|
| 72 |
+
|
| 73 |
+
|
| 74 |
def _warp_with_bounds(img: np.ndarray, H: np.ndarray, border_value=(255, 255, 255), interp=cv2.INTER_LINEAR):
|
| 75 |
"""
|
| 76 |
+
Warp with automatic bounds so results are less likely to be cropped.
|
| 77 |
Returns (warped_img, H_out) where H_out includes translation.
|
| 78 |
"""
|
| 79 |
h, w = img.shape[:2]
|
| 80 |
+
corners = np.array([[0, 0], [w - 1, 0], [w - 1, h - 1], [0, h - 1]], dtype=np.float32)
|
|
|
|
|
|
|
|
|
|
| 81 |
corners_h = cv2.perspectiveTransform(corners.reshape(-1, 1, 2), H).reshape(-1, 2)
|
| 82 |
|
| 83 |
min_xy = corners_h.min(axis=0)
|
| 84 |
max_xy = corners_h.max(axis=0)
|
|
|
|
| 85 |
min_x, min_y = float(min_xy[0]), float(min_xy[1])
|
| 86 |
max_x, max_y = float(max_xy[0]), float(max_xy[1])
|
| 87 |
|
|
|
|
| 88 |
tx = -min_x if min_x < 0 else 0.0
|
| 89 |
ty = -min_y if min_y < 0 else 0.0
|
| 90 |
|
|
|
|
| 109 |
# Detection + segmentation
|
| 110 |
# -------------------------
|
| 111 |
def _detect_building_box(pil_img: Image.Image, box_threshold=0.35, text_threshold=0.25) -> np.ndarray:
|
| 112 |
+
"""Grounding DINO detect bbox. Returns xyxy float32."""
|
| 113 |
text_labels = [["a building", "a facade", "a house"]]
|
| 114 |
inputs = dino_processor(images=pil_img, text=text_labels, return_tensors="pt").to(DEVICE)
|
| 115 |
|
|
|
|
| 133 |
|
| 134 |
|
| 135 |
def _segment_box_mask(pil_img: Image.Image, box_xyxy: np.ndarray) -> np.ndarray:
|
| 136 |
+
"""SAM segment in bbox. Returns 2D uint8 mask {0,1}."""
|
| 137 |
input_boxes = [[[float(box_xyxy[0]), float(box_xyxy[1]), float(box_xyxy[2]), float(box_xyxy[3])]]]
|
| 138 |
inputs = sam_processor(images=pil_img, input_boxes=input_boxes, return_tensors="pt").to(DEVICE)
|
| 139 |
|
|
|
|
| 150 |
if torch.is_tensor(m):
|
| 151 |
m = m.detach().cpu().numpy()
|
| 152 |
m = np.array(m)
|
|
|
|
| 153 |
if m.ndim >= 3:
|
| 154 |
m = m[0]
|
| 155 |
|
|
|
|
| 175 |
|
| 176 |
|
| 177 |
# -------------------------
|
| 178 |
+
# Architectural chart (Option A)
|
| 179 |
# -------------------------
|
| 180 |
def architectural_chart(
|
| 181 |
rgb_img: np.ndarray,
|
|
|
|
| 237 |
|
| 238 |
|
| 239 |
# -------------------------
|
| 240 |
+
# Vanishing-point-based facade rectification
|
| 241 |
# -------------------------
|
| 242 |
+
def _create_lsd():
|
| 243 |
+
"""
|
| 244 |
+
OpenCV python bindings differ; avoid keyword args.
|
| 245 |
+
Some builds accept (refine) positional, some only accept ().
|
| 246 |
+
"""
|
| 247 |
+
try:
|
| 248 |
+
refine = cv2.LSD_REFINE_STD if hasattr(cv2, "LSD_REFINE_STD") else 1
|
| 249 |
+
return cv2.createLineSegmentDetector(refine)
|
| 250 |
+
except Exception:
|
| 251 |
+
return cv2.createLineSegmentDetector()
|
| 252 |
+
|
| 253 |
+
|
| 254 |
def _extract_lines_lsd(rgb_img: np.ndarray, mask01: np.ndarray, min_len: float = 40.0):
|
| 255 |
"""
|
| 256 |
Extract line segments with LSD, keep those whose midpoints are inside mask.
|
|
|
|
| 260 |
gray = cv2.cvtColor(rgb_img, cv2.COLOR_RGB2GRAY)
|
| 261 |
gray = cv2.GaussianBlur(gray, (3, 3), 0)
|
| 262 |
|
| 263 |
+
lsd = _create_lsd()
|
| 264 |
detected = lsd.detect(gray)[0]
|
| 265 |
if detected is None:
|
| 266 |
return []
|
|
|
|
| 283 |
if mask01[my, mx] == 0:
|
| 284 |
continue
|
| 285 |
|
|
|
|
| 286 |
p1 = np.array([x1, y1, 1.0], dtype=np.float32)
|
| 287 |
p2 = np.array([x2, y2, 1.0], dtype=np.float32)
|
| 288 |
l = np.cross(p1, p2) # (a,b,c)
|
| 289 |
norm = float(np.hypot(l[0], l[1]))
|
| 290 |
if norm < 1e-6:
|
| 291 |
continue
|
| 292 |
+
lines_h.append((l / norm).astype(np.float32))
|
|
|
|
| 293 |
|
| 294 |
return lines_h
|
| 295 |
|
|
|
|
| 298 |
p = np.cross(l1, l2)
|
| 299 |
if abs(float(p[2])) < 1e-6:
|
| 300 |
return None
|
| 301 |
+
return (p / p[2]).astype(np.float32) # z=1
|
| 302 |
|
| 303 |
|
| 304 |
def _fit_vanishing_point_ransac(lines, iters=800, dist_thresh=3.0, min_inliers=12):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 305 |
if len(lines) < 2:
|
| 306 |
return None, None
|
| 307 |
|
| 308 |
lines = [np.asarray(l, dtype=np.float32) for l in lines]
|
| 309 |
+
best_vp, best_inliers, best_count = None, None, 0
|
|
|
|
|
|
|
|
|
|
| 310 |
rng = np.random.default_rng(0)
|
| 311 |
|
| 312 |
for _ in range(iters):
|
|
|
|
| 317 |
if vp is None:
|
| 318 |
continue
|
| 319 |
|
| 320 |
+
errs = [abs(float(l @ vp)) for l in lines] # point->line distance (lines normalized)
|
|
|
|
| 321 |
inliers = [k for k, e in enumerate(errs) if e < dist_thresh]
|
| 322 |
if len(inliers) > best_count:
|
| 323 |
best_count = len(inliers)
|
|
|
|
| 327 |
if best_vp is None or best_inliers is None or best_count < min_inliers:
|
| 328 |
return None, None
|
| 329 |
|
|
|
|
| 330 |
A = np.stack([lines[k] for k in best_inliers], axis=0).astype(np.float32)
|
|
|
|
| 331 |
_, _, Vt = np.linalg.svd(A)
|
| 332 |
vp = Vt[-1, :]
|
| 333 |
if abs(float(vp[2])) < 1e-6:
|
| 334 |
return None, None
|
| 335 |
vp = (vp / vp[2]).astype(np.float32)
|
|
|
|
| 336 |
return vp, best_inliers
|
| 337 |
|
| 338 |
|
| 339 |
+
def _split_lines_by_orientation(lines):
|
| 340 |
"""
|
| 341 |
+
Split into near-horizontal vs near-vertical groups using direction from normal.
|
| 342 |
+
For line l=(a,b,c), direction vector is (b, -a).
|
| 343 |
"""
|
| 344 |
horiz, vert = [], []
|
| 345 |
for l in lines:
|
| 346 |
a, b, _ = map(float, l)
|
| 347 |
dx, dy = b, -a
|
| 348 |
+
ang = (np.degrees(np.arctan2(dy, dx)) + 180.0) % 180.0
|
|
|
|
| 349 |
if ang < 25 or ang > 155:
|
| 350 |
horiz.append(l)
|
| 351 |
elif 65 < ang < 115:
|
|
|
|
| 355 |
|
| 356 |
def _affine_H_from_vanishing_line(l):
|
| 357 |
"""
|
| 358 |
+
For vanishing line l=(l1,l2,l3), affine rectification:
|
| 359 |
H = [[1,0,0],[0,1,0],[l1/l3, l2/l3, 1]]
|
|
|
|
| 360 |
"""
|
| 361 |
l = np.asarray(l, dtype=np.float32)
|
| 362 |
if abs(float(l[2])) < 1e-6:
|
|
|
|
| 367 |
|
| 368 |
def _dominant_directions_from_lines(lines):
|
| 369 |
"""
|
| 370 |
+
In affinely-rectified space, estimate dominant unit directions u (horizontal-ish) and v (vertical-ish).
|
|
|
|
| 371 |
"""
|
| 372 |
+
if len(lines) < 6:
|
| 373 |
return None, None
|
| 374 |
|
| 375 |
horiz, vert = _split_lines_by_orientation(lines)
|
|
|
|
| 378 |
vecs = []
|
| 379 |
for l in line_list:
|
| 380 |
a, b, _ = map(float, l)
|
| 381 |
+
dx, dy = b, -a
|
| 382 |
+
n = float(np.hypot(dx, dy))
|
| 383 |
if n < 1e-6:
|
| 384 |
continue
|
| 385 |
dx, dy = dx / n, dy / n
|
| 386 |
+
# sign consistency
|
| 387 |
if mode == "h":
|
| 388 |
if dx < 0:
|
| 389 |
dx, dy = -dx, -dy
|
|
|
|
| 391 |
if dy < 0:
|
| 392 |
dx, dy = -dx, -dy
|
| 393 |
vecs.append([dx, dy])
|
| 394 |
+
|
| 395 |
if len(vecs) < 2:
|
| 396 |
return None
|
| 397 |
+
|
| 398 |
v = np.mean(np.array(vecs, dtype=np.float32), axis=0)
|
| 399 |
n = float(np.hypot(v[0], v[1]))
|
| 400 |
if n < 1e-6:
|
|
|
|
| 408 |
|
| 409 |
def _front_facade_rectify(rgb_img: np.ndarray, mask01: np.ndarray):
|
| 410 |
"""
|
| 411 |
+
Stronger facade rectification for off-angle photos:
|
| 412 |
+
1) LSD lines within mask
|
| 413 |
+
2) estimate two vanishing points (horizontal/vertical) via RANSAC
|
| 414 |
+
3) affine rectification using vanishing line
|
| 415 |
+
4) remove shear by mapping dominant directions to orthogonal axes
|
| 416 |
+
5) warps use auto-bounds to reduce cropping
|
| 417 |
+
Returns (rect_rgb, rect_mask01, debug_overlay_rgb) or (None, None, debug) on failure.
|
|
|
|
|
|
|
| 418 |
"""
|
| 419 |
mask01 = _clean_mask(mask01)
|
|
|
|
| 420 |
|
| 421 |
debug = rgb_img.copy()
|
|
|
|
| 422 |
debug = _draw_outline_on_image(debug, mask01, thickness=2)
|
| 423 |
|
| 424 |
+
lines = _extract_lines_lsd(rgb_img, mask01, min_len=40.0)
|
| 425 |
if len(lines) < 10:
|
| 426 |
return None, None, debug
|
| 427 |
|
|
|
|
| 429 |
if len(horiz) < 4 or len(vert) < 4:
|
| 430 |
return None, None, debug
|
| 431 |
|
| 432 |
+
vp_h, _ = _fit_vanishing_point_ransac(horiz, iters=900, dist_thresh=3.0, min_inliers=10)
|
| 433 |
+
vp_v, _ = _fit_vanishing_point_ransac(vert, iters=900, dist_thresh=3.0, min_inliers=10)
|
| 434 |
if vp_h is None or vp_v is None:
|
| 435 |
return None, None, debug
|
| 436 |
|
|
|
|
| 437 |
van_line = np.cross(vp_h, vp_v).astype(np.float32)
|
| 438 |
H_aff = _affine_H_from_vanishing_line(van_line)
|
| 439 |
if H_aff is None:
|
| 440 |
return None, None, debug
|
| 441 |
|
| 442 |
+
# Affine warp (no crop)
|
| 443 |
bgr = cv2.cvtColor(rgb_img, cv2.COLOR_RGB2BGR)
|
| 444 |
+
aff_bgr, _ = _warp_with_bounds(bgr, H_aff, border_value=(255, 255, 255), interp=cv2.INTER_LINEAR)
|
| 445 |
aff_rgb = cv2.cvtColor(aff_bgr, cv2.COLOR_BGR2RGB)
|
| 446 |
|
| 447 |
mask255 = (mask01 * 255).astype(np.uint8)
|
| 448 |
aff_mask255, _ = _warp_with_bounds(mask255, H_aff, border_value=0, interp=cv2.INTER_NEAREST)
|
| 449 |
aff_mask01 = (aff_mask255 > 0).astype(np.uint8)
|
| 450 |
|
| 451 |
+
# In affine space, estimate dominant axes and de-shear
|
| 452 |
aff_lines = _extract_lines_lsd(aff_rgb, aff_mask01, min_len=40.0)
|
| 453 |
u, v = _dominant_directions_from_lines(aff_lines)
|
| 454 |
if u is None or v is None:
|
| 455 |
return None, None, debug
|
| 456 |
|
|
|
|
|
|
|
| 457 |
M2 = np.array([[u[0], v[0]], [u[1], v[1]]], dtype=np.float32)
|
| 458 |
if abs(float(np.linalg.det(M2))) < 1e-6:
|
| 459 |
return None, None, debug
|
|
|
|
| 464 |
dtype=np.float32,
|
| 465 |
)
|
| 466 |
|
|
|
|
| 467 |
aff_bgr2 = cv2.cvtColor(aff_rgb, cv2.COLOR_RGB2BGR)
|
| 468 |
rect_bgr, _ = _warp_with_bounds(aff_bgr2, H_lin, border_value=(255, 255, 255), interp=cv2.INTER_LINEAR)
|
| 469 |
rect_rgb = cv2.cvtColor(rect_bgr, cv2.COLOR_BGR2RGB)
|
|
|
|
| 471 |
rect_mask255, _ = _warp_with_bounds(aff_mask255, H_lin, border_value=0, interp=cv2.INTER_NEAREST)
|
| 472 |
rect_mask01 = (rect_mask255 > 0).astype(np.uint8)
|
| 473 |
|
| 474 |
+
# Debug: mark vanishing points (clamped)
|
| 475 |
def _draw_vp(img, vp, label):
|
| 476 |
x, y = float(vp[0]), float(vp[1])
|
| 477 |
+
Hh, Ww = img.shape[:2]
|
| 478 |
+
x_cl = int(np.clip(x, -2 * Ww, 3 * Ww))
|
| 479 |
+
y_cl = int(np.clip(y, -2 * Hh, 3 * Hh))
|
| 480 |
cv2.circle(img, (x_cl, y_cl), 10, (255, 255, 255), -1)
|
| 481 |
cv2.putText(img, label, (x_cl + 12, y_cl + 12), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2)
|
| 482 |
|
|
|
|
| 491 |
# Fallback: full-building quad from mask contour
|
| 492 |
# -------------------------
|
| 493 |
def _fitline_to_abc(points_xy: np.ndarray):
|
|
|
|
| 494 |
pts = points_xy.astype(np.float32).reshape(-1, 1, 2)
|
| 495 |
vx, vy, x0, y0 = cv2.fitLine(pts, cv2.DIST_L2, 0, 0.01, 0.01).reshape(-1)
|
| 496 |
a = -vy
|
|
|
|
| 510 |
return np.array([x, y], dtype=np.float32)
|
| 511 |
|
| 512 |
|
| 513 |
+
def _expand_corners(corners: np.ndarray, scale: float = 0.06) -> np.ndarray:
|
| 514 |
corners = corners.astype(np.float32)
|
| 515 |
center = corners.mean(axis=0, keepdims=True)
|
| 516 |
return (center + (corners - center) * (1.0 + float(scale))).astype(np.float32)
|
| 517 |
|
| 518 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 519 |
def _mask_to_full_building_corners(mask01: np.ndarray, band_frac: float = 0.12, expand: float = 0.06) -> np.ndarray:
|
|
|
|
|
|
|
|
|
|
| 520 |
mask01 = _clean_mask(mask01)
|
| 521 |
h, w = mask01.shape
|
| 522 |
+
|
| 523 |
mask255 = np.ascontiguousarray((mask01 * 255).astype(np.uint8))
|
| 524 |
cnts, _ = cv2.findContours(mask255, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
| 525 |
if not cnts:
|
| 526 |
raise ValueError("Mask is empty (no contours).")
|
| 527 |
+
|
| 528 |
cnt = max(cnts, key=cv2.contourArea)
|
| 529 |
if cv2.contourArea(cnt) < 500:
|
| 530 |
raise ValueError("Mask too small to infer corners.")
|
| 531 |
+
|
| 532 |
pts = cnt.reshape(-1, 2).astype(np.float32)
|
| 533 |
|
| 534 |
x_min, y_min = pts.min(axis=0)
|
|
|
|
| 570 |
|
| 571 |
|
| 572 |
def _rectify_by_quad(rgb_img: np.ndarray, mask01: np.ndarray, band_frac=0.12, expand=0.06):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 573 |
corners = _mask_to_full_building_corners(mask01, band_frac=band_frac, expand=expand)
|
| 574 |
|
| 575 |
(tl, tr, br, bl) = corners
|
|
|
|
| 577 |
wB = np.linalg.norm(tr - tl)
|
| 578 |
hA = np.linalg.norm(tr - br)
|
| 579 |
hB = np.linalg.norm(tl - bl)
|
| 580 |
+
out_w = max(int(max(wA, wB)), 200)
|
| 581 |
+
out_h = max(int(max(hA, hB)), 200)
|
|
|
|
|
|
|
| 582 |
|
| 583 |
dst = np.array([[0, 0], [out_w - 1, 0], [out_w - 1, out_h - 1], [0, out_h - 1]], dtype=np.float32)
|
| 584 |
H = cv2.getPerspectiveTransform(corners, dst).astype(np.float32)
|
| 585 |
|
| 586 |
bgr = cv2.cvtColor(rgb_img, cv2.COLOR_RGB2BGR)
|
| 587 |
+
warped_bgr, _ = _warp_with_bounds(bgr, H, border_value=(255, 255, 255), interp=cv2.INTER_LINEAR)
|
| 588 |
warped_rgb = cv2.cvtColor(warped_bgr, cv2.COLOR_BGR2RGB)
|
| 589 |
|
| 590 |
mask255 = (mask01 * 255).astype(np.uint8)
|
|
|
|
| 617 |
add_grid=False,
|
| 618 |
):
|
| 619 |
"""
|
|
|
|
| 620 |
Strategy:
|
| 621 |
+
- Detect + segment building
|
| 622 |
+
- Try vanishing-point facade rectification (better for off-angle shots)
|
| 623 |
- Fallback to full-building quad rectification
|
| 624 |
+
- Generate architectural chart from rectified image
|
| 625 |
"""
|
| 626 |
if image_np is None:
|
| 627 |
raise ValueError("Please upload an image.")
|
|
|
|
| 647 |
mask01 = _segment_box_mask(pil, box)
|
| 648 |
mask01 = _clean_mask(mask01)
|
| 649 |
|
| 650 |
+
# Original + outline
|
| 651 |
original_outlined = _draw_outline_on_image(image_np, mask01, thickness=int(outline_thickness))
|
| 652 |
|
| 653 |
+
# Try stronger “front facade”
|
| 654 |
rect_rgb, rect_mask01, dbg = _front_facade_rectify(rgb_full, mask01)
|
| 655 |
|
| 656 |
+
# Fallback
|
| 657 |
if rect_rgb is None or rect_mask01 is None:
|
| 658 |
rect_rgb, rect_mask01, dbg2 = _rectify_by_quad(rgb_full, mask01, band_frac=0.12, expand=0.06)
|
|
|
|
| 659 |
dbg = dbg if dbg is not None else dbg2
|
| 660 |
|
| 661 |
+
# Rectified + outline
|
| 662 |
straightened_outlined = _draw_outline_on_image(rect_rgb, rect_mask01, thickness=int(outline_thickness))
|
| 663 |
|
| 664 |
+
# Chart
|
| 665 |
chart = architectural_chart(
|
| 666 |
rect_rgb,
|
| 667 |
mode=str(chart_mode),
|
|
|
|
| 677 |
# Mask preview
|
| 678 |
mask_rgb = np.stack([mask01 * 255] * 3, axis=-1).astype(np.uint8)
|
| 679 |
|
| 680 |
+
# Debug: bbox + (optional) VP markers
|
| 681 |
debug = image_np.copy()
|
| 682 |
x1i, y1i, x2i, y2i = map(int, box)
|
| 683 |
cv2.rectangle(debug, (x1i, y1i), (x2i, y2i), (255, 255, 255), 2)
|
| 684 |
+
|
| 685 |
+
if dbg is not None and dbg.shape[:2] == debug.shape[:2]:
|
| 686 |
+
debug = cv2.addWeighted(debug, 0.70, dbg, 0.30, 0)
|
|
|
|
|
|
|
|
|
|
| 687 |
|
| 688 |
return chart, straightened_outlined, original_outlined, debug, mask_rgb
|
| 689 |
|
|
|
|
| 714 |
],
|
| 715 |
title="Auto Building Front-Façade Rectifier + Architectural Chart",
|
| 716 |
description=(
|
| 717 |
+
"Corrects off-angle building photos toward a front façade using vanishing-point rectification "
|
| 718 |
+
"(with a full-building contour fallback), then generates a deterministic architectural chart."
|
|
|
|
| 719 |
),
|
| 720 |
)
|
| 721 |
|
requirements.txt
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
-
gradio
|
| 2 |
numpy
|
| 3 |
-
opencv-python-headless
|
| 4 |
torch
|
| 5 |
-
transformers
|
| 6 |
Pillow
|
|
|
|
| 1 |
+
gradio==4.44.0
|
| 2 |
numpy
|
| 3 |
+
opencv-python-headless==4.12.0.88
|
| 4 |
torch
|
| 5 |
+
transformers==4.45.0
|
| 6 |
Pillow
|