Spaces:

atxniu
/

warp_perspective

Sleeping

App Files Files Community

Arthur Niu commited on Jan 12

Commit

be93391

1 Parent(s): cc1c482

front facet with architectural chart

Browse files

Files changed (2) hide show

app.py +89 -116
requirements.txt +3 -3

app.py CHANGED Viewed

@@ -26,17 +26,17 @@ sam_model = SamModel.from_pretrained(SAM_ID).to(DEVICE)
 # -------------------------
-# Basic helpers
 # -------------------------
 def _ensure_2d_mask(mask) -> np.ndarray:
-    """Normalize mask to 2D uint8 {0,1}."""
     if torch.is_tensor(mask):
         mask = mask.detach().cpu().numpy()
     mask = np.array(mask)
     mask = np.squeeze(mask)
     if mask.ndim == 3:
-        # (N,H,W) -> take first; (H,W,C) -> take first channel
         if mask.shape[0] <= 16 and mask.shape[1] > 32 and mask.shape[2] > 32:
             mask = mask[0]
         else:
@@ -50,7 +50,7 @@ def _ensure_2d_mask(mask) -> np.ndarray:
 def _clean_mask(mask01: np.ndarray) -> np.ndarray:
-    """Light morphology cleanup."""
     mask01 = _ensure_2d_mask(mask01)
     m = np.ascontiguousarray((mask01 * 255).astype(np.uint8))
     kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (7, 7))
@@ -59,25 +59,32 @@ def _clean_mask(mask01: np.ndarray) -> np.ndarray:
     return (m > 0).astype(np.uint8)
 def _warp_with_bounds(img: np.ndarray, H: np.ndarray, border_value=(255, 255, 255), interp=cv2.INTER_LINEAR):
     """
-    Warp with automatic output bounds so the result is not cropped.
     Returns (warped_img, H_out) where H_out includes translation.
     """
     h, w = img.shape[:2]
-    corners = np.array(
-        [[0, 0], [w - 1, 0], [w - 1, h - 1], [0, h - 1]],
-        dtype=np.float32,
-    )
     corners_h = cv2.perspectiveTransform(corners.reshape(-1, 1, 2), H).reshape(-1, 2)
     min_xy = corners_h.min(axis=0)
     max_xy = corners_h.max(axis=0)
     min_x, min_y = float(min_xy[0]), float(min_xy[1])
     max_x, max_y = float(max_xy[0]), float(max_xy[1])
-    # Translate so all coords are positive
     tx = -min_x if min_x < 0 else 0.0
     ty = -min_y if min_y < 0 else 0.0
@@ -102,7 +109,7 @@ def _warp_with_bounds(img: np.ndarray, H: np.ndarray, border_value=(255, 255, 25
 # Detection + segmentation
 # -------------------------
 def _detect_building_box(pil_img: Image.Image, box_threshold=0.35, text_threshold=0.25) -> np.ndarray:
-    """Grounding DINO: detect building bbox. Returns xyxy float32."""
     text_labels = [["a building", "a facade", "a house"]]
     inputs = dino_processor(images=pil_img, text=text_labels, return_tensors="pt").to(DEVICE)
@@ -126,7 +133,7 @@ def _detect_building_box(pil_img: Image.Image, box_threshold=0.35, text_threshol
 def _segment_box_mask(pil_img: Image.Image, box_xyxy: np.ndarray) -> np.ndarray:
-    """SAM: segment within bbox. Returns 2D uint8 mask {0,1}."""
     input_boxes = [[[float(box_xyxy[0]), float(box_xyxy[1]), float(box_xyxy[2]), float(box_xyxy[3])]]]
     inputs = sam_processor(images=pil_img, input_boxes=input_boxes, return_tensors="pt").to(DEVICE)
@@ -143,7 +150,6 @@ def _segment_box_mask(pil_img: Image.Image, box_xyxy: np.ndarray) -> np.ndarray:
     if torch.is_tensor(m):
         m = m.detach().cpu().numpy()
     m = np.array(m)
     if m.ndim >= 3:
         m = m[0]
@@ -169,7 +175,7 @@ def _draw_outline_on_image(rgb_img: np.ndarray, mask01: np.ndarray, thickness: i
 # -------------------------
-# Option A chart
 # -------------------------
 def architectural_chart(
     rgb_img: np.ndarray,
@@ -231,8 +237,20 @@ def architectural_chart(
 # -------------------------
-# Perspective -> front facade rectification
 # -------------------------
 def _extract_lines_lsd(rgb_img: np.ndarray, mask01: np.ndarray, min_len: float = 40.0):
     """
     Extract line segments with LSD, keep those whose midpoints are inside mask.
@@ -242,7 +260,7 @@ def _extract_lines_lsd(rgb_img: np.ndarray, mask01: np.ndarray, min_len: float =
     gray = cv2.cvtColor(rgb_img, cv2.COLOR_RGB2GRAY)
     gray = cv2.GaussianBlur(gray, (3, 3), 0)
-    lsd = cv2.createLineSegmentDetector(_refine=cv2.LSD_REFINE_STD)
     detected = lsd.detect(gray)[0]
     if detected is None:
         return []
@@ -265,15 +283,13 @@ def _extract_lines_lsd(rgb_img: np.ndarray, mask01: np.ndarray, min_len: float =
         if mask01[my, mx] == 0:
             continue
-        # Homogeneous line from two points: p1 x p2
         p1 = np.array([x1, y1, 1.0], dtype=np.float32)
         p2 = np.array([x2, y2, 1.0], dtype=np.float32)
         l = np.cross(p1, p2)  # (a,b,c)
         norm = float(np.hypot(l[0], l[1]))
         if norm < 1e-6:
             continue
-        l = (l / norm).astype(np.float32)
-        lines_h.append(l)
     return lines_h
@@ -282,24 +298,15 @@ def _intersection_of_lines(l1, l2):
     p = np.cross(l1, l2)
     if abs(float(p[2])) < 1e-6:
         return None
-    return (p / p[2]).astype(np.float32)  # normalize to z=1
 def _fit_vanishing_point_ransac(lines, iters=800, dist_thresh=3.0, min_inliers=12):
-    """
-    RANSAC for vanishing point:
-      - sample 2 lines -> intersection point
-      - score by distance to lines
-      - refine with SVD on inliers: find vp minimizing ||A vp|| with vp[2]=1 after normalization
-    """
     if len(lines) < 2:
         return None, None
     lines = [np.asarray(l, dtype=np.float32) for l in lines]
-    best_vp = None
-    best_inliers = None
-    best_count = 0
     rng = np.random.default_rng(0)
     for _ in range(iters):
@@ -310,8 +317,7 @@ def _fit_vanishing_point_ransac(lines, iters=800, dist_thresh=3.0, min_inliers=1
         if vp is None:
             continue
-        # distance point->line: |l·vp| (since l normalized by sqrt(a^2+b^2))
-        errs = [abs(float(l @ vp)) for l in lines]
         inliers = [k for k, e in enumerate(errs) if e < dist_thresh]
         if len(inliers) > best_count:
             best_count = len(inliers)
@@ -321,29 +327,25 @@ def _fit_vanishing_point_ransac(lines, iters=800, dist_thresh=3.0, min_inliers=1
     if best_vp is None or best_inliers is None or best_count < min_inliers:
         return None, None
-    # refine with SVD: stack inlier lines as A, solve A vp = 0
     A = np.stack([lines[k] for k in best_inliers], axis=0).astype(np.float32)
-    # vp is right singular vector corresponding to smallest singular value
     _, _, Vt = np.linalg.svd(A)
     vp = Vt[-1, :]
     if abs(float(vp[2])) < 1e-6:
         return None, None
     vp = (vp / vp[2]).astype(np.float32)
     return vp, best_inliers
-def _split_lines_by_orientation(lines, prefer="hv"):
     """
-    Split lines into near-horizontal and near-vertical groups using segment direction angle inferred from line normal.
-    For a line l=(a,b,c), direction vector is (b, -a).
     """
     horiz, vert = [], []
     for l in lines:
         a, b, _ = map(float, l)
         dx, dy = b, -a
-        ang = (np.degrees(np.arctan2(dy, dx)) + 180.0) % 180.0  # [0,180)
-        # near 0/180 => horizontal direction; near 90 => vertical direction
         if ang < 25 or ang > 155:
             horiz.append(l)
         elif 65 < ang < 115:
@@ -353,9 +355,8 @@ def _split_lines_by_orientation(lines, prefer="hv"):
 def _affine_H_from_vanishing_line(l):
     """
-    If vanishing line is l=(l1,l2,l3), affine rectification homography:
       H = [[1,0,0],[0,1,0],[l1/l3, l2/l3, 1]]
-    This sends vanishing line to infinity.
     """
     l = np.asarray(l, dtype=np.float32)
     if abs(float(l[2])) < 1e-6:
@@ -366,10 +367,9 @@ def _affine_H_from_vanishing_line(l):
 def _dominant_directions_from_lines(lines):
     """
-    From homogeneous lines (already in an affinely-rectified space), compute dominant
-    direction vectors u (horizontal-ish) and v (vertical-ish) as unit vectors.
     """
-    if len(lines) < 4:
         return None, None
     horiz, vert = _split_lines_by_orientation(lines)
@@ -378,12 +378,12 @@ def _dominant_directions_from_lines(lines):
         vecs = []
         for l in line_list:
             a, b, _ = map(float, l)
-            dx, dy = b, -a  # direction vector
-            n = (dx * dx + dy * dy) ** 0.5
             if n < 1e-6:
                 continue
             dx, dy = dx / n, dy / n
-            # make sign consistent to avoid cancellation
             if mode == "h":
                 if dx < 0:
                     dx, dy = -dx, -dy
@@ -391,8 +391,10 @@ def _dominant_directions_from_lines(lines):
                 if dy < 0:
                     dx, dy = -dx, -dy
             vecs.append([dx, dy])
         if len(vecs) < 2:
             return None
         v = np.mean(np.array(vecs, dtype=np.float32), axis=0)
         n = float(np.hypot(v[0], v[1]))
         if n < 1e-6:
@@ -406,23 +408,20 @@ def _dominant_directions_from_lines(lines):
 def _front_facade_rectify(rgb_img: np.ndarray, mask01: np.ndarray):
     """
-    Attempt a stronger "front façade" rectification when the photo is not taken head-on:
-      1) extract lines within mask
-      2) estimate horizontal/vertical vanishing points via RANSAC
-      3) compute vanishing line -> affine rectification
-      4) in affine space, estimate dominant directions u,v -> apply linear transform A = inv([u v])
-         (this removes shear and makes axes orthogonal, producing a more "front" view)
-      5) warp with bounds (no cropping)
-    Returns (rectified_rgb, rectified_mask01, debug_overlay_rgb)
-    If fails, returns (None, None, debug_overlay_rgb).
     """
     mask01 = _clean_mask(mask01)
-    lines = _extract_lines_lsd(rgb_img, mask01, min_len=40.0)
     debug = rgb_img.copy()
-    # Draw mask outline on debug
     debug = _draw_outline_on_image(debug, mask01, thickness=2)
     if len(lines) < 10:
         return None, None, debug
@@ -430,34 +429,31 @@ def _front_facade_rectify(rgb_img: np.ndarray, mask01: np.ndarray):
     if len(horiz) < 4 or len(vert) < 4:
         return None, None, debug
-    vp_h, in_h = _fit_vanishing_point_ransac(horiz, iters=800, dist_thresh=3.0, min_inliers=10)
-    vp_v, in_v = _fit_vanishing_point_ransac(vert, iters=800, dist_thresh=3.0, min_inliers=10)
     if vp_h is None or vp_v is None:
         return None, None, debug
-    # Vanishing line of the facade plane
     van_line = np.cross(vp_h, vp_v).astype(np.float32)
     H_aff = _affine_H_from_vanishing_line(van_line)
     if H_aff is None:
         return None, None, debug
-    # Warp image + mask to affine space (no crop)
     bgr = cv2.cvtColor(rgb_img, cv2.COLOR_RGB2BGR)
-    aff_bgr, H_aff_out = _warp_with_bounds(bgr, H_aff, border_value=(255, 255, 255), interp=cv2.INTER_LINEAR)
     aff_rgb = cv2.cvtColor(aff_bgr, cv2.COLOR_BGR2RGB)
     mask255 = (mask01 * 255).astype(np.uint8)
     aff_mask255, _ = _warp_with_bounds(mask255, H_aff, border_value=0, interp=cv2.INTER_NEAREST)
     aff_mask01 = (aff_mask255 > 0).astype(np.uint8)
-    # In affine space, re-extract lines and estimate dominant orthogonal axes
     aff_lines = _extract_lines_lsd(aff_rgb, aff_mask01, min_len=40.0)
     u, v = _dominant_directions_from_lines(aff_lines)
     if u is None or v is None:
         return None, None, debug
-    # Linear transform that maps u->x axis and v->y axis:
-    # M = [u v] (2x2); A = inv(M)
     M2 = np.array([[u[0], v[0]], [u[1], v[1]]], dtype=np.float32)
     if abs(float(np.linalg.det(M2))) < 1e-6:
         return None, None, debug
@@ -468,7 +464,6 @@ def _front_facade_rectify(rgb_img: np.ndarray, mask01: np.ndarray):
         dtype=np.float32,
     )
-    # Apply linear transform with bounds to avoid cropping
     aff_bgr2 = cv2.cvtColor(aff_rgb, cv2.COLOR_RGB2BGR)
     rect_bgr, _ = _warp_with_bounds(aff_bgr2, H_lin, border_value=(255, 255, 255), interp=cv2.INTER_LINEAR)
     rect_rgb = cv2.cvtColor(rect_bgr, cv2.COLOR_BGR2RGB)
@@ -476,12 +471,12 @@ def _front_facade_rectify(rgb_img: np.ndarray, mask01: np.ndarray):
     rect_mask255, _ = _warp_with_bounds(aff_mask255, H_lin, border_value=0, interp=cv2.INTER_NEAREST)
     rect_mask01 = (rect_mask255 > 0).astype(np.uint8)
-    # Debug: plot vanishing points (clamped if far away)
     def _draw_vp(img, vp, label):
         x, y = float(vp[0]), float(vp[1])
-        H, W = img.shape[:2]
-        x_cl = int(np.clip(x, -2 * W, 3 * W))
-        y_cl = int(np.clip(y, -2 * H, 3 * H))
         cv2.circle(img, (x_cl, y_cl), 10, (255, 255, 255), -1)
         cv2.putText(img, label, (x_cl + 12, y_cl + 12), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2)
@@ -496,7 +491,6 @@ def _front_facade_rectify(rgb_img: np.ndarray, mask01: np.ndarray):
 # Fallback: full-building quad from mask contour
 # -------------------------
 def _fitline_to_abc(points_xy: np.ndarray):
-    """Fit line via cv2.fitLine, return (a,b,c) for ax + by = c."""
     pts = points_xy.astype(np.float32).reshape(-1, 1, 2)
     vx, vy, x0, y0 = cv2.fitLine(pts, cv2.DIST_L2, 0, 0.01, 0.01).reshape(-1)
     a = -vy
@@ -516,36 +510,25 @@ def _intersect_lines_abc(l1, l2):
     return np.array([x, y], dtype=np.float32)
-def _expand_corners(corners: np.ndarray, scale: float = 0.05) -> np.ndarray:
     corners = corners.astype(np.float32)
     center = corners.mean(axis=0, keepdims=True)
     return (center + (corners - center) * (1.0 + float(scale))).astype(np.float32)
-def _order_points(pts4: np.ndarray) -> np.ndarray:
-    pts4 = np.asarray(pts4, dtype=np.float32)
-    s = pts4.sum(axis=1)
-    d = pts4[:, 0] - pts4[:, 1]
-    tl = pts4[np.argmin(s)]
-    br = pts4[np.argmax(s)]
-    tr = pts4[np.argmax(d)]
-    bl = pts4[np.argmin(d)]
-    return np.array([tl, tr, br, bl], dtype=np.float32)
 def _mask_to_full_building_corners(mask01: np.ndarray, band_frac: float = 0.12, expand: float = 0.06) -> np.ndarray:
-    """
-    Full-building quadrilateral corners from outer contour bands (works when facade is approximately planar).
-    """
     mask01 = _clean_mask(mask01)
     h, w = mask01.shape
     mask255 = np.ascontiguousarray((mask01 * 255).astype(np.uint8))
     cnts, _ = cv2.findContours(mask255, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
     if not cnts:
         raise ValueError("Mask is empty (no contours).")
     cnt = max(cnts, key=cv2.contourArea)
     if cv2.contourArea(cnt) < 500:
         raise ValueError("Mask too small to infer corners.")
     pts = cnt.reshape(-1, 2).astype(np.float32)
     x_min, y_min = pts.min(axis=0)
@@ -587,10 +570,6 @@ def _mask_to_full_building_corners(mask01: np.ndarray, band_frac: float = 0.12,
 def _rectify_by_quad(rgb_img: np.ndarray, mask01: np.ndarray, band_frac=0.12, expand=0.06):
-    """
-    Fallback rectification: compute 4 corners from mask contour -> warp to rectangle.
-    Uses warp_with_bounds (no crop).
-    """
     corners = _mask_to_full_building_corners(mask01, band_frac=band_frac, expand=expand)
     (tl, tr, br, bl) = corners
@@ -598,16 +577,14 @@ def _rectify_by_quad(rgb_img: np.ndarray, mask01: np.ndarray, band_frac=0.12, ex
     wB = np.linalg.norm(tr - tl)
     hA = np.linalg.norm(tr - br)
     hB = np.linalg.norm(tl - bl)
-    out_w = int(max(wA, wB))
-    out_h = int(max(hA, hB))
-    out_w = max(out_w, 200)
-    out_h = max(out_h, 200)
     dst = np.array([[0, 0], [out_w - 1, 0], [out_w - 1, out_h - 1], [0, out_h - 1]], dtype=np.float32)
     H = cv2.getPerspectiveTransform(corners, dst).astype(np.float32)
     bgr = cv2.cvtColor(rgb_img, cv2.COLOR_RGB2BGR)
-    warped_bgr, H_out = _warp_with_bounds(bgr, H, border_value=(255, 255, 255), interp=cv2.INTER_LINEAR)
     warped_rgb = cv2.cvtColor(warped_bgr, cv2.COLOR_BGR2RGB)
     mask255 = (mask01 * 255).astype(np.uint8)
@@ -640,10 +617,11 @@ def straighten_and_chart(
     add_grid=False,
 ):
     """
-    Goal: produce a better "front façade" even when the photo is taken off-angle.
     Strategy:
-      - Try vanishing-point based rectification (projective -> affine -> orthogonal axes)
       - Fallback to full-building quad rectification
     """
     if image_np is None:
         raise ValueError("Please upload an image.")
@@ -669,22 +647,21 @@ def straighten_and_chart(
     mask01 = _segment_box_mask(pil, box)
     mask01 = _clean_mask(mask01)
-    # Outline on original
     original_outlined = _draw_outline_on_image(image_np, mask01, thickness=int(outline_thickness))
-    # Try: strong front façade rectification
     rect_rgb, rect_mask01, dbg = _front_facade_rectify(rgb_full, mask01)
-    # Fallback if needed
     if rect_rgb is None or rect_mask01 is None:
         rect_rgb, rect_mask01, dbg2 = _rectify_by_quad(rgb_full, mask01, band_frac=0.12, expand=0.06)
-        # prefer VP debug if it exists; otherwise use quad debug
         dbg = dbg if dbg is not None else dbg2
-    # Outline on rectified
     straightened_outlined = _draw_outline_on_image(rect_rgb, rect_mask01, thickness=int(outline_thickness))
-    # Architectural chart from rectified image
     chart = architectural_chart(
         rect_rgb,
         mode=str(chart_mode),
@@ -700,16 +677,13 @@ def straighten_and_chart(
     # Mask preview
     mask_rgb = np.stack([mask01 * 255] * 3, axis=-1).astype(np.uint8)
-    # Debug: show bbox on original + rectification debug overlay
     debug = image_np.copy()
     x1i, y1i, x2i, y2i = map(int, box)
     cv2.rectangle(debug, (x1i, y1i), (x2i, y2i), (255, 255, 255), 2)
-    # Blend in rectification debug (VPs) if available, otherwise leave bbox-only
-    if dbg is not None:
-        # Put VP debug into the same canvas size (original) by simple overlay where possible
-        # If dbg size differs, just skip blending to avoid distortion.
-        if dbg.shape[:2] == debug.shape[:2]:
-            debug = cv2.addWeighted(debug, 0.70, dbg, 0.30, 0)
     return chart, straightened_outlined, original_outlined, debug, mask_rgb
@@ -740,9 +714,8 @@ demo = gr.Interface(
     ],
     title="Auto Building Front-Façade Rectifier + Architectural Chart",
     description=(
-        "Better front façade correction when the photo is taken off-angle: "
-        "vanishing-point rectification (projective->affine->orthogonal axes), "
-        "with a full-building contour-based fallback. Warps use auto-bounds to reduce cropping."
     ),
 )

 # -------------------------
+# Mask + geometry helpers
 # -------------------------
 def _ensure_2d_mask(mask) -> np.ndarray:
+    """Normalize any mask variant to 2D uint8 {0,1}."""
     if torch.is_tensor(mask):
         mask = mask.detach().cpu().numpy()
     mask = np.array(mask)
     mask = np.squeeze(mask)
     if mask.ndim == 3:
+        # (N,H,W) -> first; (H,W,C) -> first channel
         if mask.shape[0] <= 16 and mask.shape[1] > 32 and mask.shape[2] > 32:
             mask = mask[0]
         else:
 def _clean_mask(mask01: np.ndarray) -> np.ndarray:
+    """Light morphology cleanup for stability."""
     mask01 = _ensure_2d_mask(mask01)
     m = np.ascontiguousarray((mask01 * 255).astype(np.uint8))
     kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (7, 7))
     return (m > 0).astype(np.uint8)
+def _order_points(pts4: np.ndarray) -> np.ndarray:
+    """Order 4 points as TL, TR, BR, BL."""
+    pts4 = np.asarray(pts4, dtype=np.float32)
+    s = pts4.sum(axis=1)
+    d = pts4[:, 0] - pts4[:, 1]
+    tl = pts4[np.argmin(s)]
+    br = pts4[np.argmax(s)]
+    tr = pts4[np.argmax(d)]
+    bl = pts4[np.argmin(d)]
+    return np.array([tl, tr, br, bl], dtype=np.float32)
 def _warp_with_bounds(img: np.ndarray, H: np.ndarray, border_value=(255, 255, 255), interp=cv2.INTER_LINEAR):
     """
+    Warp with automatic bounds so results are less likely to be cropped.
     Returns (warped_img, H_out) where H_out includes translation.
     """
     h, w = img.shape[:2]
+    corners = np.array([[0, 0], [w - 1, 0], [w - 1, h - 1], [0, h - 1]], dtype=np.float32)
     corners_h = cv2.perspectiveTransform(corners.reshape(-1, 1, 2), H).reshape(-1, 2)
     min_xy = corners_h.min(axis=0)
     max_xy = corners_h.max(axis=0)
     min_x, min_y = float(min_xy[0]), float(min_xy[1])
     max_x, max_y = float(max_xy[0]), float(max_xy[1])
     tx = -min_x if min_x < 0 else 0.0
     ty = -min_y if min_y < 0 else 0.0
 # Detection + segmentation
 # -------------------------
 def _detect_building_box(pil_img: Image.Image, box_threshold=0.35, text_threshold=0.25) -> np.ndarray:
+    """Grounding DINO detect bbox. Returns xyxy float32."""
     text_labels = [["a building", "a facade", "a house"]]
     inputs = dino_processor(images=pil_img, text=text_labels, return_tensors="pt").to(DEVICE)
 def _segment_box_mask(pil_img: Image.Image, box_xyxy: np.ndarray) -> np.ndarray:
+    """SAM segment in bbox. Returns 2D uint8 mask {0,1}."""
     input_boxes = [[[float(box_xyxy[0]), float(box_xyxy[1]), float(box_xyxy[2]), float(box_xyxy[3])]]]
     inputs = sam_processor(images=pil_img, input_boxes=input_boxes, return_tensors="pt").to(DEVICE)
     if torch.is_tensor(m):
         m = m.detach().cpu().numpy()
     m = np.array(m)
     if m.ndim >= 3:
         m = m[0]
 # -------------------------
+# Architectural chart (Option A)
 # -------------------------
 def architectural_chart(
     rgb_img: np.ndarray,
 # -------------------------
+# Vanishing-point-based facade rectification
 # -------------------------
+def _create_lsd():
+    """
+    OpenCV python bindings differ; avoid keyword args.
+    Some builds accept (refine) positional, some only accept ().
+    """
+    try:
+        refine = cv2.LSD_REFINE_STD if hasattr(cv2, "LSD_REFINE_STD") else 1
+        return cv2.createLineSegmentDetector(refine)
+    except Exception:
+        return cv2.createLineSegmentDetector()
 def _extract_lines_lsd(rgb_img: np.ndarray, mask01: np.ndarray, min_len: float = 40.0):
     """
     Extract line segments with LSD, keep those whose midpoints are inside mask.
     gray = cv2.cvtColor(rgb_img, cv2.COLOR_RGB2GRAY)
     gray = cv2.GaussianBlur(gray, (3, 3), 0)
+    lsd = _create_lsd()
     detected = lsd.detect(gray)[0]
     if detected is None:
         return []
         if mask01[my, mx] == 0:
             continue
         p1 = np.array([x1, y1, 1.0], dtype=np.float32)
         p2 = np.array([x2, y2, 1.0], dtype=np.float32)
         l = np.cross(p1, p2)  # (a,b,c)
         norm = float(np.hypot(l[0], l[1]))
         if norm < 1e-6:
             continue
+        lines_h.append((l / norm).astype(np.float32))
     return lines_h
     p = np.cross(l1, l2)
     if abs(float(p[2])) < 1e-6:
         return None
+    return (p / p[2]).astype(np.float32)  # z=1
 def _fit_vanishing_point_ransac(lines, iters=800, dist_thresh=3.0, min_inliers=12):
     if len(lines) < 2:
         return None, None
     lines = [np.asarray(l, dtype=np.float32) for l in lines]
+    best_vp, best_inliers, best_count = None, None, 0
     rng = np.random.default_rng(0)
     for _ in range(iters):
         if vp is None:
             continue
+        errs = [abs(float(l @ vp)) for l in lines]  # point->line distance (lines normalized)
         inliers = [k for k, e in enumerate(errs) if e < dist_thresh]
         if len(inliers) > best_count:
             best_count = len(inliers)
     if best_vp is None or best_inliers is None or best_count < min_inliers:
         return None, None
     A = np.stack([lines[k] for k in best_inliers], axis=0).astype(np.float32)
     _, _, Vt = np.linalg.svd(A)
     vp = Vt[-1, :]
     if abs(float(vp[2])) < 1e-6:
         return None, None
     vp = (vp / vp[2]).astype(np.float32)
     return vp, best_inliers
+def _split_lines_by_orientation(lines):
     """
+    Split into near-horizontal vs near-vertical groups using direction from normal.
+    For line l=(a,b,c), direction vector is (b, -a).
     """
     horiz, vert = [], []
     for l in lines:
         a, b, _ = map(float, l)
         dx, dy = b, -a
+        ang = (np.degrees(np.arctan2(dy, dx)) + 180.0) % 180.0
         if ang < 25 or ang > 155:
             horiz.append(l)
         elif 65 < ang < 115:
 def _affine_H_from_vanishing_line(l):
     """
+    For vanishing line l=(l1,l2,l3), affine rectification:
       H = [[1,0,0],[0,1,0],[l1/l3, l2/l3, 1]]
     """
     l = np.asarray(l, dtype=np.float32)
     if abs(float(l[2])) < 1e-6:
 def _dominant_directions_from_lines(lines):
     """
+    In affinely-rectified space, estimate dominant unit directions u (horizontal-ish) and v (vertical-ish).
     """
+    if len(lines) < 6:
         return None, None
     horiz, vert = _split_lines_by_orientation(lines)
         vecs = []
         for l in line_list:
             a, b, _ = map(float, l)
+            dx, dy = b, -a
+            n = float(np.hypot(dx, dy))
             if n < 1e-6:
                 continue
             dx, dy = dx / n, dy / n
+            # sign consistency
             if mode == "h":
                 if dx < 0:
                     dx, dy = -dx, -dy
                 if dy < 0:
                     dx, dy = -dx, -dy
             vecs.append([dx, dy])
         if len(vecs) < 2:
             return None
         v = np.mean(np.array(vecs, dtype=np.float32), axis=0)
         n = float(np.hypot(v[0], v[1]))
         if n < 1e-6:
 def _front_facade_rectify(rgb_img: np.ndarray, mask01: np.ndarray):
     """
+    Stronger facade rectification for off-angle photos:
+      1) LSD lines within mask
+      2) estimate two vanishing points (horizontal/vertical) via RANSAC
+      3) affine rectification using vanishing line
+      4) remove shear by mapping dominant directions to orthogonal axes
+      5) warps use auto-bounds to reduce cropping
+    Returns (rect_rgb, rect_mask01, debug_overlay_rgb) or (None, None, debug) on failure.
     """
     mask01 = _clean_mask(mask01)
     debug = rgb_img.copy()
     debug = _draw_outline_on_image(debug, mask01, thickness=2)
+    lines = _extract_lines_lsd(rgb_img, mask01, min_len=40.0)
     if len(lines) < 10:
         return None, None, debug
     if len(horiz) < 4 or len(vert) < 4:
         return None, None, debug
+    vp_h, _ = _fit_vanishing_point_ransac(horiz, iters=900, dist_thresh=3.0, min_inliers=10)
+    vp_v, _ = _fit_vanishing_point_ransac(vert, iters=900, dist_thresh=3.0, min_inliers=10)
     if vp_h is None or vp_v is None:
         return None, None, debug
     van_line = np.cross(vp_h, vp_v).astype(np.float32)
     H_aff = _affine_H_from_vanishing_line(van_line)
     if H_aff is None:
         return None, None, debug
+    # Affine warp (no crop)
     bgr = cv2.cvtColor(rgb_img, cv2.COLOR_RGB2BGR)
+    aff_bgr, _ = _warp_with_bounds(bgr, H_aff, border_value=(255, 255, 255), interp=cv2.INTER_LINEAR)
     aff_rgb = cv2.cvtColor(aff_bgr, cv2.COLOR_BGR2RGB)
     mask255 = (mask01 * 255).astype(np.uint8)
     aff_mask255, _ = _warp_with_bounds(mask255, H_aff, border_value=0, interp=cv2.INTER_NEAREST)
     aff_mask01 = (aff_mask255 > 0).astype(np.uint8)
+    # In affine space, estimate dominant axes and de-shear
     aff_lines = _extract_lines_lsd(aff_rgb, aff_mask01, min_len=40.0)
     u, v = _dominant_directions_from_lines(aff_lines)
     if u is None or v is None:
         return None, None, debug
     M2 = np.array([[u[0], v[0]], [u[1], v[1]]], dtype=np.float32)
     if abs(float(np.linalg.det(M2))) < 1e-6:
         return None, None, debug
         dtype=np.float32,
     )
     aff_bgr2 = cv2.cvtColor(aff_rgb, cv2.COLOR_RGB2BGR)
     rect_bgr, _ = _warp_with_bounds(aff_bgr2, H_lin, border_value=(255, 255, 255), interp=cv2.INTER_LINEAR)
     rect_rgb = cv2.cvtColor(rect_bgr, cv2.COLOR_BGR2RGB)
     rect_mask255, _ = _warp_with_bounds(aff_mask255, H_lin, border_value=0, interp=cv2.INTER_NEAREST)
     rect_mask01 = (rect_mask255 > 0).astype(np.uint8)
+    # Debug: mark vanishing points (clamped)
     def _draw_vp(img, vp, label):
         x, y = float(vp[0]), float(vp[1])
+        Hh, Ww = img.shape[:2]
+        x_cl = int(np.clip(x, -2 * Ww, 3 * Ww))
+        y_cl = int(np.clip(y, -2 * Hh, 3 * Hh))
         cv2.circle(img, (x_cl, y_cl), 10, (255, 255, 255), -1)
         cv2.putText(img, label, (x_cl + 12, y_cl + 12), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2)
 # Fallback: full-building quad from mask contour
 # -------------------------
 def _fitline_to_abc(points_xy: np.ndarray):
     pts = points_xy.astype(np.float32).reshape(-1, 1, 2)
     vx, vy, x0, y0 = cv2.fitLine(pts, cv2.DIST_L2, 0, 0.01, 0.01).reshape(-1)
     a = -vy
     return np.array([x, y], dtype=np.float32)
+def _expand_corners(corners: np.ndarray, scale: float = 0.06) -> np.ndarray:
     corners = corners.astype(np.float32)
     center = corners.mean(axis=0, keepdims=True)
     return (center + (corners - center) * (1.0 + float(scale))).astype(np.float32)
 def _mask_to_full_building_corners(mask01: np.ndarray, band_frac: float = 0.12, expand: float = 0.06) -> np.ndarray:
     mask01 = _clean_mask(mask01)
     h, w = mask01.shape
     mask255 = np.ascontiguousarray((mask01 * 255).astype(np.uint8))
     cnts, _ = cv2.findContours(mask255, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
     if not cnts:
         raise ValueError("Mask is empty (no contours).")
     cnt = max(cnts, key=cv2.contourArea)
     if cv2.contourArea(cnt) < 500:
         raise ValueError("Mask too small to infer corners.")
     pts = cnt.reshape(-1, 2).astype(np.float32)
     x_min, y_min = pts.min(axis=0)
 def _rectify_by_quad(rgb_img: np.ndarray, mask01: np.ndarray, band_frac=0.12, expand=0.06):
     corners = _mask_to_full_building_corners(mask01, band_frac=band_frac, expand=expand)
     (tl, tr, br, bl) = corners
     wB = np.linalg.norm(tr - tl)
     hA = np.linalg.norm(tr - br)
     hB = np.linalg.norm(tl - bl)
+    out_w = max(int(max(wA, wB)), 200)
+    out_h = max(int(max(hA, hB)), 200)
     dst = np.array([[0, 0], [out_w - 1, 0], [out_w - 1, out_h - 1], [0, out_h - 1]], dtype=np.float32)
     H = cv2.getPerspectiveTransform(corners, dst).astype(np.float32)
     bgr = cv2.cvtColor(rgb_img, cv2.COLOR_RGB2BGR)
+    warped_bgr, _ = _warp_with_bounds(bgr, H, border_value=(255, 255, 255), interp=cv2.INTER_LINEAR)
     warped_rgb = cv2.cvtColor(warped_bgr, cv2.COLOR_BGR2RGB)
     mask255 = (mask01 * 255).astype(np.uint8)
     add_grid=False,
 ):
     """
     Strategy:
+      - Detect + segment building
+      - Try vanishing-point facade rectification (better for off-angle shots)
       - Fallback to full-building quad rectification
+      - Generate architectural chart from rectified image
     """
     if image_np is None:
         raise ValueError("Please upload an image.")
     mask01 = _segment_box_mask(pil, box)
     mask01 = _clean_mask(mask01)
+    # Original + outline
     original_outlined = _draw_outline_on_image(image_np, mask01, thickness=int(outline_thickness))
+    # Try stronger “front facade”
     rect_rgb, rect_mask01, dbg = _front_facade_rectify(rgb_full, mask01)
+    # Fallback
     if rect_rgb is None or rect_mask01 is None:
         rect_rgb, rect_mask01, dbg2 = _rectify_by_quad(rgb_full, mask01, band_frac=0.12, expand=0.06)
         dbg = dbg if dbg is not None else dbg2
+    # Rectified + outline
     straightened_outlined = _draw_outline_on_image(rect_rgb, rect_mask01, thickness=int(outline_thickness))
+    # Chart
     chart = architectural_chart(
         rect_rgb,
         mode=str(chart_mode),
     # Mask preview
     mask_rgb = np.stack([mask01 * 255] * 3, axis=-1).astype(np.uint8)
+    # Debug: bbox + (optional) VP markers
     debug = image_np.copy()
     x1i, y1i, x2i, y2i = map(int, box)
     cv2.rectangle(debug, (x1i, y1i), (x2i, y2i), (255, 255, 255), 2)
+    if dbg is not None and dbg.shape[:2] == debug.shape[:2]:
+        debug = cv2.addWeighted(debug, 0.70, dbg, 0.30, 0)
     return chart, straightened_outlined, original_outlined, debug, mask_rgb
     ],
     title="Auto Building Front-Façade Rectifier + Architectural Chart",
     description=(
+        "Corrects off-angle building photos toward a front façade using vanishing-point rectification "
+        "(with a full-building contour fallback), then generates a deterministic architectural chart."
     ),
 )

requirements.txt CHANGED Viewed

@@ -1,6 +1,6 @@
-gradio
 numpy
-opencv-python-headless
 torch
-transformers
 Pillow

+gradio==4.44.0
 numpy
+opencv-python-headless==4.12.0.88
 torch
+transformers==4.45.0
 Pillow