Arthur Niu commited on
Commit
be93391
·
1 Parent(s): cc1c482

front facet with architectural chart

Browse files
Files changed (2) hide show
  1. app.py +89 -116
  2. requirements.txt +3 -3
app.py CHANGED
@@ -26,17 +26,17 @@ sam_model = SamModel.from_pretrained(SAM_ID).to(DEVICE)
26
 
27
 
28
  # -------------------------
29
- # Basic helpers
30
  # -------------------------
31
  def _ensure_2d_mask(mask) -> np.ndarray:
32
- """Normalize mask to 2D uint8 {0,1}."""
33
  if torch.is_tensor(mask):
34
  mask = mask.detach().cpu().numpy()
35
  mask = np.array(mask)
36
  mask = np.squeeze(mask)
37
 
38
  if mask.ndim == 3:
39
- # (N,H,W) -> take first; (H,W,C) -> take first channel
40
  if mask.shape[0] <= 16 and mask.shape[1] > 32 and mask.shape[2] > 32:
41
  mask = mask[0]
42
  else:
@@ -50,7 +50,7 @@ def _ensure_2d_mask(mask) -> np.ndarray:
50
 
51
 
52
  def _clean_mask(mask01: np.ndarray) -> np.ndarray:
53
- """Light morphology cleanup."""
54
  mask01 = _ensure_2d_mask(mask01)
55
  m = np.ascontiguousarray((mask01 * 255).astype(np.uint8))
56
  kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (7, 7))
@@ -59,25 +59,32 @@ def _clean_mask(mask01: np.ndarray) -> np.ndarray:
59
  return (m > 0).astype(np.uint8)
60
 
61
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  def _warp_with_bounds(img: np.ndarray, H: np.ndarray, border_value=(255, 255, 255), interp=cv2.INTER_LINEAR):
63
  """
64
- Warp with automatic output bounds so the result is not cropped.
65
  Returns (warped_img, H_out) where H_out includes translation.
66
  """
67
  h, w = img.shape[:2]
68
- corners = np.array(
69
- [[0, 0], [w - 1, 0], [w - 1, h - 1], [0, h - 1]],
70
- dtype=np.float32,
71
- )
72
  corners_h = cv2.perspectiveTransform(corners.reshape(-1, 1, 2), H).reshape(-1, 2)
73
 
74
  min_xy = corners_h.min(axis=0)
75
  max_xy = corners_h.max(axis=0)
76
-
77
  min_x, min_y = float(min_xy[0]), float(min_xy[1])
78
  max_x, max_y = float(max_xy[0]), float(max_xy[1])
79
 
80
- # Translate so all coords are positive
81
  tx = -min_x if min_x < 0 else 0.0
82
  ty = -min_y if min_y < 0 else 0.0
83
 
@@ -102,7 +109,7 @@ def _warp_with_bounds(img: np.ndarray, H: np.ndarray, border_value=(255, 255, 25
102
  # Detection + segmentation
103
  # -------------------------
104
  def _detect_building_box(pil_img: Image.Image, box_threshold=0.35, text_threshold=0.25) -> np.ndarray:
105
- """Grounding DINO: detect building bbox. Returns xyxy float32."""
106
  text_labels = [["a building", "a facade", "a house"]]
107
  inputs = dino_processor(images=pil_img, text=text_labels, return_tensors="pt").to(DEVICE)
108
 
@@ -126,7 +133,7 @@ def _detect_building_box(pil_img: Image.Image, box_threshold=0.35, text_threshol
126
 
127
 
128
  def _segment_box_mask(pil_img: Image.Image, box_xyxy: np.ndarray) -> np.ndarray:
129
- """SAM: segment within bbox. Returns 2D uint8 mask {0,1}."""
130
  input_boxes = [[[float(box_xyxy[0]), float(box_xyxy[1]), float(box_xyxy[2]), float(box_xyxy[3])]]]
131
  inputs = sam_processor(images=pil_img, input_boxes=input_boxes, return_tensors="pt").to(DEVICE)
132
 
@@ -143,7 +150,6 @@ def _segment_box_mask(pil_img: Image.Image, box_xyxy: np.ndarray) -> np.ndarray:
143
  if torch.is_tensor(m):
144
  m = m.detach().cpu().numpy()
145
  m = np.array(m)
146
-
147
  if m.ndim >= 3:
148
  m = m[0]
149
 
@@ -169,7 +175,7 @@ def _draw_outline_on_image(rgb_img: np.ndarray, mask01: np.ndarray, thickness: i
169
 
170
 
171
  # -------------------------
172
- # Option A chart
173
  # -------------------------
174
  def architectural_chart(
175
  rgb_img: np.ndarray,
@@ -231,8 +237,20 @@ def architectural_chart(
231
 
232
 
233
  # -------------------------
234
- # Perspective -> front facade rectification
235
  # -------------------------
 
 
 
 
 
 
 
 
 
 
 
 
236
  def _extract_lines_lsd(rgb_img: np.ndarray, mask01: np.ndarray, min_len: float = 40.0):
237
  """
238
  Extract line segments with LSD, keep those whose midpoints are inside mask.
@@ -242,7 +260,7 @@ def _extract_lines_lsd(rgb_img: np.ndarray, mask01: np.ndarray, min_len: float =
242
  gray = cv2.cvtColor(rgb_img, cv2.COLOR_RGB2GRAY)
243
  gray = cv2.GaussianBlur(gray, (3, 3), 0)
244
 
245
- lsd = cv2.createLineSegmentDetector(_refine=cv2.LSD_REFINE_STD)
246
  detected = lsd.detect(gray)[0]
247
  if detected is None:
248
  return []
@@ -265,15 +283,13 @@ def _extract_lines_lsd(rgb_img: np.ndarray, mask01: np.ndarray, min_len: float =
265
  if mask01[my, mx] == 0:
266
  continue
267
 
268
- # Homogeneous line from two points: p1 x p2
269
  p1 = np.array([x1, y1, 1.0], dtype=np.float32)
270
  p2 = np.array([x2, y2, 1.0], dtype=np.float32)
271
  l = np.cross(p1, p2) # (a,b,c)
272
  norm = float(np.hypot(l[0], l[1]))
273
  if norm < 1e-6:
274
  continue
275
- l = (l / norm).astype(np.float32)
276
- lines_h.append(l)
277
 
278
  return lines_h
279
 
@@ -282,24 +298,15 @@ def _intersection_of_lines(l1, l2):
282
  p = np.cross(l1, l2)
283
  if abs(float(p[2])) < 1e-6:
284
  return None
285
- return (p / p[2]).astype(np.float32) # normalize to z=1
286
 
287
 
288
  def _fit_vanishing_point_ransac(lines, iters=800, dist_thresh=3.0, min_inliers=12):
289
- """
290
- RANSAC for vanishing point:
291
- - sample 2 lines -> intersection point
292
- - score by distance to lines
293
- - refine with SVD on inliers: find vp minimizing ||A vp|| with vp[2]=1 after normalization
294
- """
295
  if len(lines) < 2:
296
  return None, None
297
 
298
  lines = [np.asarray(l, dtype=np.float32) for l in lines]
299
- best_vp = None
300
- best_inliers = None
301
- best_count = 0
302
-
303
  rng = np.random.default_rng(0)
304
 
305
  for _ in range(iters):
@@ -310,8 +317,7 @@ def _fit_vanishing_point_ransac(lines, iters=800, dist_thresh=3.0, min_inliers=1
310
  if vp is None:
311
  continue
312
 
313
- # distance point->line: |l·vp| (since l normalized by sqrt(a^2+b^2))
314
- errs = [abs(float(l @ vp)) for l in lines]
315
  inliers = [k for k, e in enumerate(errs) if e < dist_thresh]
316
  if len(inliers) > best_count:
317
  best_count = len(inliers)
@@ -321,29 +327,25 @@ def _fit_vanishing_point_ransac(lines, iters=800, dist_thresh=3.0, min_inliers=1
321
  if best_vp is None or best_inliers is None or best_count < min_inliers:
322
  return None, None
323
 
324
- # refine with SVD: stack inlier lines as A, solve A vp = 0
325
  A = np.stack([lines[k] for k in best_inliers], axis=0).astype(np.float32)
326
- # vp is right singular vector corresponding to smallest singular value
327
  _, _, Vt = np.linalg.svd(A)
328
  vp = Vt[-1, :]
329
  if abs(float(vp[2])) < 1e-6:
330
  return None, None
331
  vp = (vp / vp[2]).astype(np.float32)
332
-
333
  return vp, best_inliers
334
 
335
 
336
- def _split_lines_by_orientation(lines, prefer="hv"):
337
  """
338
- Split lines into near-horizontal and near-vertical groups using segment direction angle inferred from line normal.
339
- For a line l=(a,b,c), direction vector is (b, -a).
340
  """
341
  horiz, vert = [], []
342
  for l in lines:
343
  a, b, _ = map(float, l)
344
  dx, dy = b, -a
345
- ang = (np.degrees(np.arctan2(dy, dx)) + 180.0) % 180.0 # [0,180)
346
- # near 0/180 => horizontal direction; near 90 => vertical direction
347
  if ang < 25 or ang > 155:
348
  horiz.append(l)
349
  elif 65 < ang < 115:
@@ -353,9 +355,8 @@ def _split_lines_by_orientation(lines, prefer="hv"):
353
 
354
  def _affine_H_from_vanishing_line(l):
355
  """
356
- If vanishing line is l=(l1,l2,l3), affine rectification homography:
357
  H = [[1,0,0],[0,1,0],[l1/l3, l2/l3, 1]]
358
- This sends vanishing line to infinity.
359
  """
360
  l = np.asarray(l, dtype=np.float32)
361
  if abs(float(l[2])) < 1e-6:
@@ -366,10 +367,9 @@ def _affine_H_from_vanishing_line(l):
366
 
367
  def _dominant_directions_from_lines(lines):
368
  """
369
- From homogeneous lines (already in an affinely-rectified space), compute dominant
370
- direction vectors u (horizontal-ish) and v (vertical-ish) as unit vectors.
371
  """
372
- if len(lines) < 4:
373
  return None, None
374
 
375
  horiz, vert = _split_lines_by_orientation(lines)
@@ -378,12 +378,12 @@ def _dominant_directions_from_lines(lines):
378
  vecs = []
379
  for l in line_list:
380
  a, b, _ = map(float, l)
381
- dx, dy = b, -a # direction vector
382
- n = (dx * dx + dy * dy) ** 0.5
383
  if n < 1e-6:
384
  continue
385
  dx, dy = dx / n, dy / n
386
- # make sign consistent to avoid cancellation
387
  if mode == "h":
388
  if dx < 0:
389
  dx, dy = -dx, -dy
@@ -391,8 +391,10 @@ def _dominant_directions_from_lines(lines):
391
  if dy < 0:
392
  dx, dy = -dx, -dy
393
  vecs.append([dx, dy])
 
394
  if len(vecs) < 2:
395
  return None
 
396
  v = np.mean(np.array(vecs, dtype=np.float32), axis=0)
397
  n = float(np.hypot(v[0], v[1]))
398
  if n < 1e-6:
@@ -406,23 +408,20 @@ def _dominant_directions_from_lines(lines):
406
 
407
  def _front_facade_rectify(rgb_img: np.ndarray, mask01: np.ndarray):
408
  """
409
- Attempt a stronger "front façade" rectification when the photo is not taken head-on:
410
- 1) extract lines within mask
411
- 2) estimate horizontal/vertical vanishing points via RANSAC
412
- 3) compute vanishing line -> affine rectification
413
- 4) in affine space, estimate dominant directions u,v -> apply linear transform A = inv([u v])
414
- (this removes shear and makes axes orthogonal, producing a more "front" view)
415
- 5) warp with bounds (no cropping)
416
- Returns (rectified_rgb, rectified_mask01, debug_overlay_rgb)
417
- If fails, returns (None, None, debug_overlay_rgb).
418
  """
419
  mask01 = _clean_mask(mask01)
420
- lines = _extract_lines_lsd(rgb_img, mask01, min_len=40.0)
421
 
422
  debug = rgb_img.copy()
423
- # Draw mask outline on debug
424
  debug = _draw_outline_on_image(debug, mask01, thickness=2)
425
 
 
426
  if len(lines) < 10:
427
  return None, None, debug
428
 
@@ -430,34 +429,31 @@ def _front_facade_rectify(rgb_img: np.ndarray, mask01: np.ndarray):
430
  if len(horiz) < 4 or len(vert) < 4:
431
  return None, None, debug
432
 
433
- vp_h, in_h = _fit_vanishing_point_ransac(horiz, iters=800, dist_thresh=3.0, min_inliers=10)
434
- vp_v, in_v = _fit_vanishing_point_ransac(vert, iters=800, dist_thresh=3.0, min_inliers=10)
435
  if vp_h is None or vp_v is None:
436
  return None, None, debug
437
 
438
- # Vanishing line of the facade plane
439
  van_line = np.cross(vp_h, vp_v).astype(np.float32)
440
  H_aff = _affine_H_from_vanishing_line(van_line)
441
  if H_aff is None:
442
  return None, None, debug
443
 
444
- # Warp image + mask to affine space (no crop)
445
  bgr = cv2.cvtColor(rgb_img, cv2.COLOR_RGB2BGR)
446
- aff_bgr, H_aff_out = _warp_with_bounds(bgr, H_aff, border_value=(255, 255, 255), interp=cv2.INTER_LINEAR)
447
  aff_rgb = cv2.cvtColor(aff_bgr, cv2.COLOR_BGR2RGB)
448
 
449
  mask255 = (mask01 * 255).astype(np.uint8)
450
  aff_mask255, _ = _warp_with_bounds(mask255, H_aff, border_value=0, interp=cv2.INTER_NEAREST)
451
  aff_mask01 = (aff_mask255 > 0).astype(np.uint8)
452
 
453
- # In affine space, re-extract lines and estimate dominant orthogonal axes
454
  aff_lines = _extract_lines_lsd(aff_rgb, aff_mask01, min_len=40.0)
455
  u, v = _dominant_directions_from_lines(aff_lines)
456
  if u is None or v is None:
457
  return None, None, debug
458
 
459
- # Linear transform that maps u->x axis and v->y axis:
460
- # M = [u v] (2x2); A = inv(M)
461
  M2 = np.array([[u[0], v[0]], [u[1], v[1]]], dtype=np.float32)
462
  if abs(float(np.linalg.det(M2))) < 1e-6:
463
  return None, None, debug
@@ -468,7 +464,6 @@ def _front_facade_rectify(rgb_img: np.ndarray, mask01: np.ndarray):
468
  dtype=np.float32,
469
  )
470
 
471
- # Apply linear transform with bounds to avoid cropping
472
  aff_bgr2 = cv2.cvtColor(aff_rgb, cv2.COLOR_RGB2BGR)
473
  rect_bgr, _ = _warp_with_bounds(aff_bgr2, H_lin, border_value=(255, 255, 255), interp=cv2.INTER_LINEAR)
474
  rect_rgb = cv2.cvtColor(rect_bgr, cv2.COLOR_BGR2RGB)
@@ -476,12 +471,12 @@ def _front_facade_rectify(rgb_img: np.ndarray, mask01: np.ndarray):
476
  rect_mask255, _ = _warp_with_bounds(aff_mask255, H_lin, border_value=0, interp=cv2.INTER_NEAREST)
477
  rect_mask01 = (rect_mask255 > 0).astype(np.uint8)
478
 
479
- # Debug: plot vanishing points (clamped if far away)
480
  def _draw_vp(img, vp, label):
481
  x, y = float(vp[0]), float(vp[1])
482
- H, W = img.shape[:2]
483
- x_cl = int(np.clip(x, -2 * W, 3 * W))
484
- y_cl = int(np.clip(y, -2 * H, 3 * H))
485
  cv2.circle(img, (x_cl, y_cl), 10, (255, 255, 255), -1)
486
  cv2.putText(img, label, (x_cl + 12, y_cl + 12), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2)
487
 
@@ -496,7 +491,6 @@ def _front_facade_rectify(rgb_img: np.ndarray, mask01: np.ndarray):
496
  # Fallback: full-building quad from mask contour
497
  # -------------------------
498
  def _fitline_to_abc(points_xy: np.ndarray):
499
- """Fit line via cv2.fitLine, return (a,b,c) for ax + by = c."""
500
  pts = points_xy.astype(np.float32).reshape(-1, 1, 2)
501
  vx, vy, x0, y0 = cv2.fitLine(pts, cv2.DIST_L2, 0, 0.01, 0.01).reshape(-1)
502
  a = -vy
@@ -516,36 +510,25 @@ def _intersect_lines_abc(l1, l2):
516
  return np.array([x, y], dtype=np.float32)
517
 
518
 
519
- def _expand_corners(corners: np.ndarray, scale: float = 0.05) -> np.ndarray:
520
  corners = corners.astype(np.float32)
521
  center = corners.mean(axis=0, keepdims=True)
522
  return (center + (corners - center) * (1.0 + float(scale))).astype(np.float32)
523
 
524
 
525
- def _order_points(pts4: np.ndarray) -> np.ndarray:
526
- pts4 = np.asarray(pts4, dtype=np.float32)
527
- s = pts4.sum(axis=1)
528
- d = pts4[:, 0] - pts4[:, 1]
529
- tl = pts4[np.argmin(s)]
530
- br = pts4[np.argmax(s)]
531
- tr = pts4[np.argmax(d)]
532
- bl = pts4[np.argmin(d)]
533
- return np.array([tl, tr, br, bl], dtype=np.float32)
534
-
535
-
536
  def _mask_to_full_building_corners(mask01: np.ndarray, band_frac: float = 0.12, expand: float = 0.06) -> np.ndarray:
537
- """
538
- Full-building quadrilateral corners from outer contour bands (works when facade is approximately planar).
539
- """
540
  mask01 = _clean_mask(mask01)
541
  h, w = mask01.shape
 
542
  mask255 = np.ascontiguousarray((mask01 * 255).astype(np.uint8))
543
  cnts, _ = cv2.findContours(mask255, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
544
  if not cnts:
545
  raise ValueError("Mask is empty (no contours).")
 
546
  cnt = max(cnts, key=cv2.contourArea)
547
  if cv2.contourArea(cnt) < 500:
548
  raise ValueError("Mask too small to infer corners.")
 
549
  pts = cnt.reshape(-1, 2).astype(np.float32)
550
 
551
  x_min, y_min = pts.min(axis=0)
@@ -587,10 +570,6 @@ def _mask_to_full_building_corners(mask01: np.ndarray, band_frac: float = 0.12,
587
 
588
 
589
  def _rectify_by_quad(rgb_img: np.ndarray, mask01: np.ndarray, band_frac=0.12, expand=0.06):
590
- """
591
- Fallback rectification: compute 4 corners from mask contour -> warp to rectangle.
592
- Uses warp_with_bounds (no crop).
593
- """
594
  corners = _mask_to_full_building_corners(mask01, band_frac=band_frac, expand=expand)
595
 
596
  (tl, tr, br, bl) = corners
@@ -598,16 +577,14 @@ def _rectify_by_quad(rgb_img: np.ndarray, mask01: np.ndarray, band_frac=0.12, ex
598
  wB = np.linalg.norm(tr - tl)
599
  hA = np.linalg.norm(tr - br)
600
  hB = np.linalg.norm(tl - bl)
601
- out_w = int(max(wA, wB))
602
- out_h = int(max(hA, hB))
603
- out_w = max(out_w, 200)
604
- out_h = max(out_h, 200)
605
 
606
  dst = np.array([[0, 0], [out_w - 1, 0], [out_w - 1, out_h - 1], [0, out_h - 1]], dtype=np.float32)
607
  H = cv2.getPerspectiveTransform(corners, dst).astype(np.float32)
608
 
609
  bgr = cv2.cvtColor(rgb_img, cv2.COLOR_RGB2BGR)
610
- warped_bgr, H_out = _warp_with_bounds(bgr, H, border_value=(255, 255, 255), interp=cv2.INTER_LINEAR)
611
  warped_rgb = cv2.cvtColor(warped_bgr, cv2.COLOR_BGR2RGB)
612
 
613
  mask255 = (mask01 * 255).astype(np.uint8)
@@ -640,10 +617,11 @@ def straighten_and_chart(
640
  add_grid=False,
641
  ):
642
  """
643
- Goal: produce a better "front façade" even when the photo is taken off-angle.
644
  Strategy:
645
- - Try vanishing-point based rectification (projective -> affine -> orthogonal axes)
 
646
  - Fallback to full-building quad rectification
 
647
  """
648
  if image_np is None:
649
  raise ValueError("Please upload an image.")
@@ -669,22 +647,21 @@ def straighten_and_chart(
669
  mask01 = _segment_box_mask(pil, box)
670
  mask01 = _clean_mask(mask01)
671
 
672
- # Outline on original
673
  original_outlined = _draw_outline_on_image(image_np, mask01, thickness=int(outline_thickness))
674
 
675
- # Try: strong front façade rectification
676
  rect_rgb, rect_mask01, dbg = _front_facade_rectify(rgb_full, mask01)
677
 
678
- # Fallback if needed
679
  if rect_rgb is None or rect_mask01 is None:
680
  rect_rgb, rect_mask01, dbg2 = _rectify_by_quad(rgb_full, mask01, band_frac=0.12, expand=0.06)
681
- # prefer VP debug if it exists; otherwise use quad debug
682
  dbg = dbg if dbg is not None else dbg2
683
 
684
- # Outline on rectified
685
  straightened_outlined = _draw_outline_on_image(rect_rgb, rect_mask01, thickness=int(outline_thickness))
686
 
687
- # Architectural chart from rectified image
688
  chart = architectural_chart(
689
  rect_rgb,
690
  mode=str(chart_mode),
@@ -700,16 +677,13 @@ def straighten_and_chart(
700
  # Mask preview
701
  mask_rgb = np.stack([mask01 * 255] * 3, axis=-1).astype(np.uint8)
702
 
703
- # Debug: show bbox on original + rectification debug overlay
704
  debug = image_np.copy()
705
  x1i, y1i, x2i, y2i = map(int, box)
706
  cv2.rectangle(debug, (x1i, y1i), (x2i, y2i), (255, 255, 255), 2)
707
- # Blend in rectification debug (VPs) if available, otherwise leave bbox-only
708
- if dbg is not None:
709
- # Put VP debug into the same canvas size (original) by simple overlay where possible
710
- # If dbg size differs, just skip blending to avoid distortion.
711
- if dbg.shape[:2] == debug.shape[:2]:
712
- debug = cv2.addWeighted(debug, 0.70, dbg, 0.30, 0)
713
 
714
  return chart, straightened_outlined, original_outlined, debug, mask_rgb
715
 
@@ -740,9 +714,8 @@ demo = gr.Interface(
740
  ],
741
  title="Auto Building Front-Façade Rectifier + Architectural Chart",
742
  description=(
743
- "Better front façade correction when the photo is taken off-angle: "
744
- "vanishing-point rectification (projective->affine->orthogonal axes), "
745
- "with a full-building contour-based fallback. Warps use auto-bounds to reduce cropping."
746
  ),
747
  )
748
 
 
26
 
27
 
28
  # -------------------------
29
+ # Mask + geometry helpers
30
  # -------------------------
31
  def _ensure_2d_mask(mask) -> np.ndarray:
32
+ """Normalize any mask variant to 2D uint8 {0,1}."""
33
  if torch.is_tensor(mask):
34
  mask = mask.detach().cpu().numpy()
35
  mask = np.array(mask)
36
  mask = np.squeeze(mask)
37
 
38
  if mask.ndim == 3:
39
+ # (N,H,W) -> first; (H,W,C) -> first channel
40
  if mask.shape[0] <= 16 and mask.shape[1] > 32 and mask.shape[2] > 32:
41
  mask = mask[0]
42
  else:
 
50
 
51
 
52
  def _clean_mask(mask01: np.ndarray) -> np.ndarray:
53
+ """Light morphology cleanup for stability."""
54
  mask01 = _ensure_2d_mask(mask01)
55
  m = np.ascontiguousarray((mask01 * 255).astype(np.uint8))
56
  kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (7, 7))
 
59
  return (m > 0).astype(np.uint8)
60
 
61
 
62
+ def _order_points(pts4: np.ndarray) -> np.ndarray:
63
+ """Order 4 points as TL, TR, BR, BL."""
64
+ pts4 = np.asarray(pts4, dtype=np.float32)
65
+ s = pts4.sum(axis=1)
66
+ d = pts4[:, 0] - pts4[:, 1]
67
+ tl = pts4[np.argmin(s)]
68
+ br = pts4[np.argmax(s)]
69
+ tr = pts4[np.argmax(d)]
70
+ bl = pts4[np.argmin(d)]
71
+ return np.array([tl, tr, br, bl], dtype=np.float32)
72
+
73
+
74
  def _warp_with_bounds(img: np.ndarray, H: np.ndarray, border_value=(255, 255, 255), interp=cv2.INTER_LINEAR):
75
  """
76
+ Warp with automatic bounds so results are less likely to be cropped.
77
  Returns (warped_img, H_out) where H_out includes translation.
78
  """
79
  h, w = img.shape[:2]
80
+ corners = np.array([[0, 0], [w - 1, 0], [w - 1, h - 1], [0, h - 1]], dtype=np.float32)
 
 
 
81
  corners_h = cv2.perspectiveTransform(corners.reshape(-1, 1, 2), H).reshape(-1, 2)
82
 
83
  min_xy = corners_h.min(axis=0)
84
  max_xy = corners_h.max(axis=0)
 
85
  min_x, min_y = float(min_xy[0]), float(min_xy[1])
86
  max_x, max_y = float(max_xy[0]), float(max_xy[1])
87
 
 
88
  tx = -min_x if min_x < 0 else 0.0
89
  ty = -min_y if min_y < 0 else 0.0
90
 
 
109
  # Detection + segmentation
110
  # -------------------------
111
  def _detect_building_box(pil_img: Image.Image, box_threshold=0.35, text_threshold=0.25) -> np.ndarray:
112
+ """Grounding DINO detect bbox. Returns xyxy float32."""
113
  text_labels = [["a building", "a facade", "a house"]]
114
  inputs = dino_processor(images=pil_img, text=text_labels, return_tensors="pt").to(DEVICE)
115
 
 
133
 
134
 
135
  def _segment_box_mask(pil_img: Image.Image, box_xyxy: np.ndarray) -> np.ndarray:
136
+ """SAM segment in bbox. Returns 2D uint8 mask {0,1}."""
137
  input_boxes = [[[float(box_xyxy[0]), float(box_xyxy[1]), float(box_xyxy[2]), float(box_xyxy[3])]]]
138
  inputs = sam_processor(images=pil_img, input_boxes=input_boxes, return_tensors="pt").to(DEVICE)
139
 
 
150
  if torch.is_tensor(m):
151
  m = m.detach().cpu().numpy()
152
  m = np.array(m)
 
153
  if m.ndim >= 3:
154
  m = m[0]
155
 
 
175
 
176
 
177
  # -------------------------
178
+ # Architectural chart (Option A)
179
  # -------------------------
180
  def architectural_chart(
181
  rgb_img: np.ndarray,
 
237
 
238
 
239
  # -------------------------
240
+ # Vanishing-point-based facade rectification
241
  # -------------------------
242
+ def _create_lsd():
243
+ """
244
+ OpenCV python bindings differ; avoid keyword args.
245
+ Some builds accept (refine) positional, some only accept ().
246
+ """
247
+ try:
248
+ refine = cv2.LSD_REFINE_STD if hasattr(cv2, "LSD_REFINE_STD") else 1
249
+ return cv2.createLineSegmentDetector(refine)
250
+ except Exception:
251
+ return cv2.createLineSegmentDetector()
252
+
253
+
254
  def _extract_lines_lsd(rgb_img: np.ndarray, mask01: np.ndarray, min_len: float = 40.0):
255
  """
256
  Extract line segments with LSD, keep those whose midpoints are inside mask.
 
260
  gray = cv2.cvtColor(rgb_img, cv2.COLOR_RGB2GRAY)
261
  gray = cv2.GaussianBlur(gray, (3, 3), 0)
262
 
263
+ lsd = _create_lsd()
264
  detected = lsd.detect(gray)[0]
265
  if detected is None:
266
  return []
 
283
  if mask01[my, mx] == 0:
284
  continue
285
 
 
286
  p1 = np.array([x1, y1, 1.0], dtype=np.float32)
287
  p2 = np.array([x2, y2, 1.0], dtype=np.float32)
288
  l = np.cross(p1, p2) # (a,b,c)
289
  norm = float(np.hypot(l[0], l[1]))
290
  if norm < 1e-6:
291
  continue
292
+ lines_h.append((l / norm).astype(np.float32))
 
293
 
294
  return lines_h
295
 
 
298
  p = np.cross(l1, l2)
299
  if abs(float(p[2])) < 1e-6:
300
  return None
301
+ return (p / p[2]).astype(np.float32) # z=1
302
 
303
 
304
  def _fit_vanishing_point_ransac(lines, iters=800, dist_thresh=3.0, min_inliers=12):
 
 
 
 
 
 
305
  if len(lines) < 2:
306
  return None, None
307
 
308
  lines = [np.asarray(l, dtype=np.float32) for l in lines]
309
+ best_vp, best_inliers, best_count = None, None, 0
 
 
 
310
  rng = np.random.default_rng(0)
311
 
312
  for _ in range(iters):
 
317
  if vp is None:
318
  continue
319
 
320
+ errs = [abs(float(l @ vp)) for l in lines] # point->line distance (lines normalized)
 
321
  inliers = [k for k, e in enumerate(errs) if e < dist_thresh]
322
  if len(inliers) > best_count:
323
  best_count = len(inliers)
 
327
  if best_vp is None or best_inliers is None or best_count < min_inliers:
328
  return None, None
329
 
 
330
  A = np.stack([lines[k] for k in best_inliers], axis=0).astype(np.float32)
 
331
  _, _, Vt = np.linalg.svd(A)
332
  vp = Vt[-1, :]
333
  if abs(float(vp[2])) < 1e-6:
334
  return None, None
335
  vp = (vp / vp[2]).astype(np.float32)
 
336
  return vp, best_inliers
337
 
338
 
339
+ def _split_lines_by_orientation(lines):
340
  """
341
+ Split into near-horizontal vs near-vertical groups using direction from normal.
342
+ For line l=(a,b,c), direction vector is (b, -a).
343
  """
344
  horiz, vert = [], []
345
  for l in lines:
346
  a, b, _ = map(float, l)
347
  dx, dy = b, -a
348
+ ang = (np.degrees(np.arctan2(dy, dx)) + 180.0) % 180.0
 
349
  if ang < 25 or ang > 155:
350
  horiz.append(l)
351
  elif 65 < ang < 115:
 
355
 
356
  def _affine_H_from_vanishing_line(l):
357
  """
358
+ For vanishing line l=(l1,l2,l3), affine rectification:
359
  H = [[1,0,0],[0,1,0],[l1/l3, l2/l3, 1]]
 
360
  """
361
  l = np.asarray(l, dtype=np.float32)
362
  if abs(float(l[2])) < 1e-6:
 
367
 
368
  def _dominant_directions_from_lines(lines):
369
  """
370
+ In affinely-rectified space, estimate dominant unit directions u (horizontal-ish) and v (vertical-ish).
 
371
  """
372
+ if len(lines) < 6:
373
  return None, None
374
 
375
  horiz, vert = _split_lines_by_orientation(lines)
 
378
  vecs = []
379
  for l in line_list:
380
  a, b, _ = map(float, l)
381
+ dx, dy = b, -a
382
+ n = float(np.hypot(dx, dy))
383
  if n < 1e-6:
384
  continue
385
  dx, dy = dx / n, dy / n
386
+ # sign consistency
387
  if mode == "h":
388
  if dx < 0:
389
  dx, dy = -dx, -dy
 
391
  if dy < 0:
392
  dx, dy = -dx, -dy
393
  vecs.append([dx, dy])
394
+
395
  if len(vecs) < 2:
396
  return None
397
+
398
  v = np.mean(np.array(vecs, dtype=np.float32), axis=0)
399
  n = float(np.hypot(v[0], v[1]))
400
  if n < 1e-6:
 
408
 
409
  def _front_facade_rectify(rgb_img: np.ndarray, mask01: np.ndarray):
410
  """
411
+ Stronger facade rectification for off-angle photos:
412
+ 1) LSD lines within mask
413
+ 2) estimate two vanishing points (horizontal/vertical) via RANSAC
414
+ 3) affine rectification using vanishing line
415
+ 4) remove shear by mapping dominant directions to orthogonal axes
416
+ 5) warps use auto-bounds to reduce cropping
417
+ Returns (rect_rgb, rect_mask01, debug_overlay_rgb) or (None, None, debug) on failure.
 
 
418
  """
419
  mask01 = _clean_mask(mask01)
 
420
 
421
  debug = rgb_img.copy()
 
422
  debug = _draw_outline_on_image(debug, mask01, thickness=2)
423
 
424
+ lines = _extract_lines_lsd(rgb_img, mask01, min_len=40.0)
425
  if len(lines) < 10:
426
  return None, None, debug
427
 
 
429
  if len(horiz) < 4 or len(vert) < 4:
430
  return None, None, debug
431
 
432
+ vp_h, _ = _fit_vanishing_point_ransac(horiz, iters=900, dist_thresh=3.0, min_inliers=10)
433
+ vp_v, _ = _fit_vanishing_point_ransac(vert, iters=900, dist_thresh=3.0, min_inliers=10)
434
  if vp_h is None or vp_v is None:
435
  return None, None, debug
436
 
 
437
  van_line = np.cross(vp_h, vp_v).astype(np.float32)
438
  H_aff = _affine_H_from_vanishing_line(van_line)
439
  if H_aff is None:
440
  return None, None, debug
441
 
442
+ # Affine warp (no crop)
443
  bgr = cv2.cvtColor(rgb_img, cv2.COLOR_RGB2BGR)
444
+ aff_bgr, _ = _warp_with_bounds(bgr, H_aff, border_value=(255, 255, 255), interp=cv2.INTER_LINEAR)
445
  aff_rgb = cv2.cvtColor(aff_bgr, cv2.COLOR_BGR2RGB)
446
 
447
  mask255 = (mask01 * 255).astype(np.uint8)
448
  aff_mask255, _ = _warp_with_bounds(mask255, H_aff, border_value=0, interp=cv2.INTER_NEAREST)
449
  aff_mask01 = (aff_mask255 > 0).astype(np.uint8)
450
 
451
+ # In affine space, estimate dominant axes and de-shear
452
  aff_lines = _extract_lines_lsd(aff_rgb, aff_mask01, min_len=40.0)
453
  u, v = _dominant_directions_from_lines(aff_lines)
454
  if u is None or v is None:
455
  return None, None, debug
456
 
 
 
457
  M2 = np.array([[u[0], v[0]], [u[1], v[1]]], dtype=np.float32)
458
  if abs(float(np.linalg.det(M2))) < 1e-6:
459
  return None, None, debug
 
464
  dtype=np.float32,
465
  )
466
 
 
467
  aff_bgr2 = cv2.cvtColor(aff_rgb, cv2.COLOR_RGB2BGR)
468
  rect_bgr, _ = _warp_with_bounds(aff_bgr2, H_lin, border_value=(255, 255, 255), interp=cv2.INTER_LINEAR)
469
  rect_rgb = cv2.cvtColor(rect_bgr, cv2.COLOR_BGR2RGB)
 
471
  rect_mask255, _ = _warp_with_bounds(aff_mask255, H_lin, border_value=0, interp=cv2.INTER_NEAREST)
472
  rect_mask01 = (rect_mask255 > 0).astype(np.uint8)
473
 
474
+ # Debug: mark vanishing points (clamped)
475
  def _draw_vp(img, vp, label):
476
  x, y = float(vp[0]), float(vp[1])
477
+ Hh, Ww = img.shape[:2]
478
+ x_cl = int(np.clip(x, -2 * Ww, 3 * Ww))
479
+ y_cl = int(np.clip(y, -2 * Hh, 3 * Hh))
480
  cv2.circle(img, (x_cl, y_cl), 10, (255, 255, 255), -1)
481
  cv2.putText(img, label, (x_cl + 12, y_cl + 12), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2)
482
 
 
491
  # Fallback: full-building quad from mask contour
492
  # -------------------------
493
  def _fitline_to_abc(points_xy: np.ndarray):
 
494
  pts = points_xy.astype(np.float32).reshape(-1, 1, 2)
495
  vx, vy, x0, y0 = cv2.fitLine(pts, cv2.DIST_L2, 0, 0.01, 0.01).reshape(-1)
496
  a = -vy
 
510
  return np.array([x, y], dtype=np.float32)
511
 
512
 
513
+ def _expand_corners(corners: np.ndarray, scale: float = 0.06) -> np.ndarray:
514
  corners = corners.astype(np.float32)
515
  center = corners.mean(axis=0, keepdims=True)
516
  return (center + (corners - center) * (1.0 + float(scale))).astype(np.float32)
517
 
518
 
 
 
 
 
 
 
 
 
 
 
 
519
  def _mask_to_full_building_corners(mask01: np.ndarray, band_frac: float = 0.12, expand: float = 0.06) -> np.ndarray:
 
 
 
520
  mask01 = _clean_mask(mask01)
521
  h, w = mask01.shape
522
+
523
  mask255 = np.ascontiguousarray((mask01 * 255).astype(np.uint8))
524
  cnts, _ = cv2.findContours(mask255, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
525
  if not cnts:
526
  raise ValueError("Mask is empty (no contours).")
527
+
528
  cnt = max(cnts, key=cv2.contourArea)
529
  if cv2.contourArea(cnt) < 500:
530
  raise ValueError("Mask too small to infer corners.")
531
+
532
  pts = cnt.reshape(-1, 2).astype(np.float32)
533
 
534
  x_min, y_min = pts.min(axis=0)
 
570
 
571
 
572
  def _rectify_by_quad(rgb_img: np.ndarray, mask01: np.ndarray, band_frac=0.12, expand=0.06):
 
 
 
 
573
  corners = _mask_to_full_building_corners(mask01, band_frac=band_frac, expand=expand)
574
 
575
  (tl, tr, br, bl) = corners
 
577
  wB = np.linalg.norm(tr - tl)
578
  hA = np.linalg.norm(tr - br)
579
  hB = np.linalg.norm(tl - bl)
580
+ out_w = max(int(max(wA, wB)), 200)
581
+ out_h = max(int(max(hA, hB)), 200)
 
 
582
 
583
  dst = np.array([[0, 0], [out_w - 1, 0], [out_w - 1, out_h - 1], [0, out_h - 1]], dtype=np.float32)
584
  H = cv2.getPerspectiveTransform(corners, dst).astype(np.float32)
585
 
586
  bgr = cv2.cvtColor(rgb_img, cv2.COLOR_RGB2BGR)
587
+ warped_bgr, _ = _warp_with_bounds(bgr, H, border_value=(255, 255, 255), interp=cv2.INTER_LINEAR)
588
  warped_rgb = cv2.cvtColor(warped_bgr, cv2.COLOR_BGR2RGB)
589
 
590
  mask255 = (mask01 * 255).astype(np.uint8)
 
617
  add_grid=False,
618
  ):
619
  """
 
620
  Strategy:
621
+ - Detect + segment building
622
+ - Try vanishing-point facade rectification (better for off-angle shots)
623
  - Fallback to full-building quad rectification
624
+ - Generate architectural chart from rectified image
625
  """
626
  if image_np is None:
627
  raise ValueError("Please upload an image.")
 
647
  mask01 = _segment_box_mask(pil, box)
648
  mask01 = _clean_mask(mask01)
649
 
650
+ # Original + outline
651
  original_outlined = _draw_outline_on_image(image_np, mask01, thickness=int(outline_thickness))
652
 
653
+ # Try stronger front facade”
654
  rect_rgb, rect_mask01, dbg = _front_facade_rectify(rgb_full, mask01)
655
 
656
+ # Fallback
657
  if rect_rgb is None or rect_mask01 is None:
658
  rect_rgb, rect_mask01, dbg2 = _rectify_by_quad(rgb_full, mask01, band_frac=0.12, expand=0.06)
 
659
  dbg = dbg if dbg is not None else dbg2
660
 
661
+ # Rectified + outline
662
  straightened_outlined = _draw_outline_on_image(rect_rgb, rect_mask01, thickness=int(outline_thickness))
663
 
664
+ # Chart
665
  chart = architectural_chart(
666
  rect_rgb,
667
  mode=str(chart_mode),
 
677
  # Mask preview
678
  mask_rgb = np.stack([mask01 * 255] * 3, axis=-1).astype(np.uint8)
679
 
680
+ # Debug: bbox + (optional) VP markers
681
  debug = image_np.copy()
682
  x1i, y1i, x2i, y2i = map(int, box)
683
  cv2.rectangle(debug, (x1i, y1i), (x2i, y2i), (255, 255, 255), 2)
684
+
685
+ if dbg is not None and dbg.shape[:2] == debug.shape[:2]:
686
+ debug = cv2.addWeighted(debug, 0.70, dbg, 0.30, 0)
 
 
 
687
 
688
  return chart, straightened_outlined, original_outlined, debug, mask_rgb
689
 
 
714
  ],
715
  title="Auto Building Front-Façade Rectifier + Architectural Chart",
716
  description=(
717
+ "Corrects off-angle building photos toward a front façade using vanishing-point rectification "
718
+ "(with a full-building contour fallback), then generates a deterministic architectural chart."
 
719
  ),
720
  )
721
 
requirements.txt CHANGED
@@ -1,6 +1,6 @@
1
- gradio
2
  numpy
3
- opencv-python-headless
4
  torch
5
- transformers
6
  Pillow
 
1
+ gradio==4.44.0
2
  numpy
3
+ opencv-python-headless==4.12.0.88
4
  torch
5
+ transformers==4.45.0
6
  Pillow