Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import numpy as np | |
| import cv2 | |
| import torch | |
| from PIL import Image | |
| from transformers import ( | |
| AutoProcessor, | |
| AutoModelForZeroShotObjectDetection, | |
| SamModel, | |
| SamProcessor, | |
| ) | |
| DEVICE = "cuda" if torch.cuda.is_available() else "cpu" | |
| # ------------------------- | |
| # Models | |
| # ------------------------- | |
| DINO_ID = "IDEA-Research/grounding-dino-tiny" | |
| dino_processor = AutoProcessor.from_pretrained(DINO_ID) | |
| dino_model = AutoModelForZeroShotObjectDetection.from_pretrained(DINO_ID).to(DEVICE) | |
| SAM_ID = "facebook/sam-vit-base" | |
| sam_processor = SamProcessor.from_pretrained(SAM_ID) | |
| sam_model = SamModel.from_pretrained(SAM_ID).to(DEVICE) | |
| # ------------------------- | |
| # Mask + geometry helpers | |
| # ------------------------- | |
| def _ensure_2d_mask(mask) -> np.ndarray: | |
| if torch.is_tensor(mask): | |
| mask = mask.detach().cpu().numpy() | |
| mask = np.array(mask) | |
| mask = np.squeeze(mask) | |
| if mask.ndim == 3: | |
| if mask.shape[0] <= 16 and mask.shape[1] > 32 and mask.shape[2] > 32: | |
| mask = mask[0] | |
| else: | |
| mask = mask[:, :, 0] | |
| mask = np.squeeze(mask) | |
| if mask.ndim != 2: | |
| raise ValueError(f"Mask is not 2D after normalization. Got shape: {mask.shape}") | |
| return (mask > 0).astype(np.uint8) | |
| def _clean_mask(mask01: np.ndarray) -> np.ndarray: | |
| mask01 = _ensure_2d_mask(mask01) | |
| m = np.ascontiguousarray((mask01 * 255).astype(np.uint8)) | |
| kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (7, 7)) | |
| m = cv2.morphologyEx(m, cv2.MORPH_CLOSE, kernel, iterations=1) | |
| m = cv2.morphologyEx(m, cv2.MORPH_OPEN, kernel, iterations=1) | |
| return (m > 0).astype(np.uint8) | |
| def _order_points(pts4: np.ndarray) -> np.ndarray: | |
| pts4 = np.asarray(pts4, dtype=np.float32) | |
| s = pts4.sum(axis=1) | |
| d = pts4[:, 0] - pts4[:, 1] | |
| tl = pts4[np.argmin(s)] | |
| br = pts4[np.argmax(s)] | |
| tr = pts4[np.argmax(d)] | |
| bl = pts4[np.argmin(d)] | |
| return np.array([tl, tr, br, bl], dtype=np.float32) | |
| def _warp_with_bounds(img: np.ndarray, H: np.ndarray, border_value=(255, 255, 255), interp=cv2.INTER_LINEAR): | |
| h, w = img.shape[:2] | |
| corners = np.array([[0, 0], [w - 1, 0], [w - 1, h - 1], [0, h - 1]], dtype=np.float32) | |
| corners_h = cv2.perspectiveTransform(corners.reshape(-1, 1, 2), H).reshape(-1, 2) | |
| min_xy = corners_h.min(axis=0) | |
| max_xy = corners_h.max(axis=0) | |
| min_x, min_y = float(min_xy[0]), float(min_xy[1]) | |
| max_x, max_y = float(max_xy[0]), float(max_xy[1]) | |
| tx = -min_x if min_x < 0 else 0.0 | |
| ty = -min_y if min_y < 0 else 0.0 | |
| out_w = int(np.ceil(max_x + tx)) + 1 | |
| out_h = int(np.ceil(max_y + ty)) + 1 | |
| T = np.array([[1.0, 0.0, tx], [0.0, 1.0, ty], [0.0, 0.0, 1.0]], dtype=np.float32) | |
| H_out = (T @ H).astype(np.float32) | |
| warped = cv2.warpPerspective( | |
| img, | |
| H_out, | |
| (out_w, out_h), | |
| flags=interp, | |
| borderMode=cv2.BORDER_CONSTANT, | |
| borderValue=border_value, | |
| ) | |
| return warped, H_out | |
| # ------------------------- | |
| # GroundingDINO post-process compatibility | |
| # ------------------------- | |
| def _dino_post_process(outputs, inputs, pil_img: Image.Image): | |
| """ | |
| Handle multiple transformers versions: | |
| - Some accept (threshold, text_threshold) | |
| - Some accept different kw names | |
| - Some accept no thresholds at all | |
| We always return a dict with 'boxes' and 'scores'. | |
| """ | |
| target_sizes = [pil_img.size[::-1]] # (h,w) | |
| # Try most common signature (newer) | |
| try: | |
| return dino_processor.post_process_grounded_object_detection( | |
| outputs, | |
| inputs.input_ids, | |
| threshold=0.0, # let us filter ourselves later | |
| text_threshold=0.0, | |
| target_sizes=target_sizes, | |
| )[0] | |
| except TypeError: | |
| pass | |
| # Try without thresholds (older) | |
| try: | |
| return dino_processor.post_process_grounded_object_detection( | |
| outputs, | |
| inputs.input_ids, | |
| target_sizes=target_sizes, | |
| )[0] | |
| except TypeError: | |
| pass | |
| # Try with positional args only | |
| try: | |
| return dino_processor.post_process_grounded_object_detection( | |
| outputs, | |
| inputs.input_ids, | |
| target_sizes, | |
| )[0] | |
| except Exception as e: | |
| raise RuntimeError(f"GroundingDINO post_process API mismatch: {e}") | |
| # ------------------------- | |
| # Detection + segmentation | |
| # ------------------------- | |
| def _detect_building_box(pil_img: Image.Image, box_threshold=0.35, text_threshold=0.25) -> np.ndarray: | |
| """ | |
| Grounding DINO detect bbox. Returns xyxy float32. | |
| We do our own filtering by box_threshold to avoid version-specific kwargs. | |
| """ | |
| # Use a single prompt string (most compatible) | |
| prompt = "building. building facade. house. house facade. facade." | |
| # Processor call compatibility | |
| try: | |
| inputs = dino_processor(images=pil_img, text=prompt, return_tensors="pt") | |
| except TypeError: | |
| inputs = dino_processor(images=pil_img, text=[prompt], return_tensors="pt") | |
| inputs = inputs.to(DEVICE) | |
| with torch.no_grad(): | |
| outputs = dino_model(**inputs) | |
| results = _dino_post_process(outputs, inputs, pil_img) | |
| if "boxes" not in results or len(results["boxes"]) == 0: | |
| raise ValueError("No building detected. Try a closer crop or adjust thresholds.") | |
| boxes = results["boxes"].detach().cpu().numpy().astype(np.float32) | |
| scores = results["scores"].detach().cpu().numpy().astype(np.float32) | |
| # Manual thresholding (since processor signature differs) | |
| keep = scores >= float(box_threshold) | |
| if not np.any(keep): | |
| # If nothing passes, keep the best one anyway | |
| best = int(np.argmax(scores)) | |
| return boxes[best] | |
| boxes_k = boxes[keep] | |
| scores_k = scores[keep] | |
| best = int(np.argmax(scores_k)) | |
| return boxes_k[best] | |
| def _segment_box_mask(pil_img: Image.Image, box_xyxy: np.ndarray) -> np.ndarray: | |
| input_boxes = [[[float(box_xyxy[0]), float(box_xyxy[1]), float(box_xyxy[2]), float(box_xyxy[3])]]] | |
| inputs = sam_processor(images=pil_img, input_boxes=input_boxes, return_tensors="pt").to(DEVICE) | |
| with torch.no_grad(): | |
| outputs = sam_model(**inputs, multimask_output=False) | |
| masks = sam_processor.image_processor.post_process_masks( | |
| outputs.pred_masks.cpu(), | |
| inputs["original_sizes"].cpu(), | |
| inputs["reshaped_input_sizes"].cpu(), | |
| ) | |
| m = masks[0] | |
| if torch.is_tensor(m): | |
| m = m.detach().cpu().numpy() | |
| m = np.array(m) | |
| if m.ndim >= 3: | |
| m = m[0] | |
| return _ensure_2d_mask(m) | |
| # ------------------------- | |
| # Outline helpers | |
| # ------------------------- | |
| def _get_mask_contours(mask01: np.ndarray): | |
| mask01 = _ensure_2d_mask(mask01) | |
| mask255 = np.ascontiguousarray((mask01 * 255).astype(np.uint8)) | |
| cnts, _ = cv2.findContours(mask255, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) | |
| return cnts | |
| def _draw_outline_on_image(rgb_img: np.ndarray, mask01: np.ndarray, thickness: int = 3) -> np.ndarray: | |
| out = rgb_img.copy() | |
| cnts = _get_mask_contours(mask01) | |
| if cnts: | |
| cv2.drawContours(out, cnts, contourIdx=-1, color=(255, 255, 255), thickness=int(thickness)) | |
| return out | |
| # ------------------------- | |
| # Architectural chart (Option A) | |
| # ------------------------- | |
| def architectural_chart( | |
| rgb_img: np.ndarray, | |
| mode: str = "blueprint", | |
| edge1: int = 60, | |
| edge2: int = 160, | |
| hough_threshold: int = 80, | |
| min_line_length: int = 40, | |
| max_line_gap: int = 8, | |
| thickness: int = 2, | |
| add_grid: bool = False, | |
| ) -> np.ndarray: | |
| gray = cv2.cvtColor(rgb_img, cv2.COLOR_RGB2GRAY) | |
| gray = cv2.bilateralFilter(gray, d=7, sigmaColor=50, sigmaSpace=50) | |
| edges = cv2.Canny(gray, int(edge1), int(edge2)) | |
| lines = cv2.HoughLinesP( | |
| edges, | |
| rho=1, | |
| theta=np.pi / 180, | |
| threshold=int(hough_threshold), | |
| minLineLength=int(min_line_length), | |
| maxLineGap=int(max_line_gap), | |
| ) | |
| h, w = edges.shape[:2] | |
| if mode == "blueprint": | |
| canvas = np.zeros((h, w, 3), dtype=np.uint8) | |
| canvas[:, :] = (20, 40, 90) | |
| line_color = (255, 255, 255) | |
| edge_color = (220, 220, 220) | |
| grid_color = (255, 255, 255) | |
| else: | |
| canvas = np.ones((h, w, 3), dtype=np.uint8) * 255 | |
| line_color = (0, 0, 0) | |
| edge_color = (30, 30, 30) | |
| grid_color = (0, 0, 0) | |
| edge_layer = np.zeros_like(canvas) | |
| ys, xs = np.where(edges > 0) | |
| edge_layer[ys, xs] = edge_color | |
| canvas = cv2.addWeighted(canvas, 1.0, edge_layer, 0.35, 0) | |
| if lines is not None: | |
| for x1, y1, x2, y2 in lines[:, 0]: | |
| cv2.line(canvas, (x1, y1), (x2, y2), line_color, int(thickness), cv2.LINE_AA) | |
| if add_grid: | |
| step = max(40, min(h, w) // 25) | |
| grid = canvas.copy() | |
| for x in range(0, w, step): | |
| cv2.line(grid, (x, 0), (x, h), grid_color, 1) | |
| for y in range(0, h, step): | |
| cv2.line(grid, (0, y), (w, y), grid_color, 1) | |
| canvas = cv2.addWeighted(canvas, 1.0, grid, 0.08, 0) | |
| return canvas | |
| # ------------------------- | |
| # Vanishing-point-based facade rectification | |
| # ------------------------- | |
| def _create_lsd(): | |
| try: | |
| refine = cv2.LSD_REFINE_STD if hasattr(cv2, "LSD_REFINE_STD") else 1 | |
| return cv2.createLineSegmentDetector(refine) | |
| except Exception: | |
| return cv2.createLineSegmentDetector() | |
| def _extract_lines_lsd(rgb_img: np.ndarray, mask01: np.ndarray, min_len: float = 40.0): | |
| mask01 = _ensure_2d_mask(mask01) | |
| gray = cv2.cvtColor(rgb_img, cv2.COLOR_RGB2GRAY) | |
| gray = cv2.GaussianBlur(gray, (3, 3), 0) | |
| lsd = _create_lsd() | |
| detected = lsd.detect(gray)[0] | |
| if detected is None: | |
| return [] | |
| lines_h = [] | |
| h, w = mask01.shape | |
| for seg in detected.reshape(-1, 4): | |
| x1, y1, x2, y2 = map(float, seg) | |
| dx = x2 - x1 | |
| dy = y2 - y1 | |
| length = (dx * dx + dy * dy) ** 0.5 | |
| if length < min_len: | |
| continue | |
| mx = int(round((x1 + x2) * 0.5)) | |
| my = int(round((y1 + y2) * 0.5)) | |
| if mx < 0 or my < 0 or mx >= w or my >= h: | |
| continue | |
| if mask01[my, mx] == 0: | |
| continue | |
| p1 = np.array([x1, y1, 1.0], dtype=np.float32) | |
| p2 = np.array([x2, y2, 1.0], dtype=np.float32) | |
| l = np.cross(p1, p2) | |
| norm = float(np.hypot(l[0], l[1])) | |
| if norm < 1e-6: | |
| continue | |
| lines_h.append((l / norm).astype(np.float32)) | |
| return lines_h | |
| def _intersection_of_lines(l1, l2): | |
| p = np.cross(l1, l2) | |
| if abs(float(p[2])) < 1e-6: | |
| return None | |
| return (p / p[2]).astype(np.float32) | |
| def _fit_vanishing_point_ransac(lines, iters=900, dist_thresh=3.0, min_inliers=10): | |
| if len(lines) < 2: | |
| return None, None | |
| lines = [np.asarray(l, dtype=np.float32) for l in lines] | |
| best_vp, best_inliers, best_count = None, None, 0 | |
| rng = np.random.default_rng(0) | |
| for _ in range(iters): | |
| i, j = rng.integers(0, len(lines), size=2) | |
| if i == j: | |
| continue | |
| vp = _intersection_of_lines(lines[i], lines[j]) | |
| if vp is None: | |
| continue | |
| errs = [abs(float(l @ vp)) for l in lines] | |
| inliers = [k for k, e in enumerate(errs) if e < dist_thresh] | |
| if len(inliers) > best_count: | |
| best_count = len(inliers) | |
| best_vp = vp | |
| best_inliers = inliers | |
| if best_vp is None or best_inliers is None or best_count < min_inliers: | |
| return None, None | |
| A = np.stack([lines[k] for k in best_inliers], axis=0).astype(np.float32) | |
| _, _, Vt = np.linalg.svd(A) | |
| vp = Vt[-1, :] | |
| if abs(float(vp[2])) < 1e-6: | |
| return None, None | |
| vp = (vp / vp[2]).astype(np.float32) | |
| return vp, best_inliers | |
| def _split_lines_by_orientation(lines): | |
| horiz, vert = [], [] | |
| for l in lines: | |
| a, b, _ = map(float, l) | |
| dx, dy = b, -a | |
| ang = (np.degrees(np.arctan2(dy, dx)) + 180.0) % 180.0 | |
| if ang < 25 or ang > 155: | |
| horiz.append(l) | |
| elif 65 < ang < 115: | |
| vert.append(l) | |
| return horiz, vert | |
| def _affine_H_from_vanishing_line(l): | |
| l = np.asarray(l, dtype=np.float32) | |
| if abs(float(l[2])) < 1e-6: | |
| return None | |
| l1, l2, l3 = map(float, l) | |
| return np.array([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [l1 / l3, l2 / l3, 1.0]], dtype=np.float32) | |
| def _dominant_directions_from_lines(lines): | |
| if len(lines) < 6: | |
| return None, None | |
| horiz, vert = _split_lines_by_orientation(lines) | |
| def mean_dir(line_list, mode): | |
| vecs = [] | |
| for l in line_list: | |
| a, b, _ = map(float, l) | |
| dx, dy = b, -a | |
| n = float(np.hypot(dx, dy)) | |
| if n < 1e-6: | |
| continue | |
| dx, dy = dx / n, dy / n | |
| if mode == "h": | |
| if dx < 0: | |
| dx, dy = -dx, -dy | |
| else: | |
| if dy < 0: | |
| dx, dy = -dx, -dy | |
| vecs.append([dx, dy]) | |
| if len(vecs) < 2: | |
| return None | |
| v = np.mean(np.array(vecs, dtype=np.float32), axis=0) | |
| n = float(np.hypot(v[0], v[1])) | |
| if n < 1e-6: | |
| return None | |
| return (v / n).astype(np.float32) | |
| u = mean_dir(horiz, "h") | |
| v = mean_dir(vert, "v") | |
| return u, v | |
| def _front_facade_rectify(rgb_img: np.ndarray, mask01: np.ndarray): | |
| mask01 = _clean_mask(mask01) | |
| debug = rgb_img.copy() | |
| debug = _draw_outline_on_image(debug, mask01, thickness=2) | |
| lines = _extract_lines_lsd(rgb_img, mask01, min_len=40.0) | |
| if len(lines) < 10: | |
| return None, None, debug | |
| horiz, vert = _split_lines_by_orientation(lines) | |
| if len(horiz) < 4 or len(vert) < 4: | |
| return None, None, debug | |
| vp_h, _ = _fit_vanishing_point_ransac(horiz, iters=900, dist_thresh=3.0, min_inliers=10) | |
| vp_v, _ = _fit_vanishing_point_ransac(vert, iters=900, dist_thresh=3.0, min_inliers=10) | |
| if vp_h is None or vp_v is None: | |
| return None, None, debug | |
| van_line = np.cross(vp_h, vp_v).astype(np.float32) | |
| H_aff = _affine_H_from_vanishing_line(van_line) | |
| if H_aff is None: | |
| return None, None, debug | |
| bgr = cv2.cvtColor(rgb_img, cv2.COLOR_RGB2BGR) | |
| aff_bgr, _ = _warp_with_bounds(bgr, H_aff, border_value=(255, 255, 255), interp=cv2.INTER_LINEAR) | |
| aff_rgb = cv2.cvtColor(aff_bgr, cv2.COLOR_BGR2RGB) | |
| mask255 = (mask01 * 255).astype(np.uint8) | |
| aff_mask255, _ = _warp_with_bounds(mask255, H_aff, border_value=0, interp=cv2.INTER_NEAREST) | |
| aff_mask01 = (aff_mask255 > 0).astype(np.uint8) | |
| aff_lines = _extract_lines_lsd(aff_rgb, aff_mask01, min_len=40.0) | |
| u, v = _dominant_directions_from_lines(aff_lines) | |
| if u is None or v is None: | |
| return None, None, debug | |
| M2 = np.array([[u[0], v[0]], [u[1], v[1]]], dtype=np.float32) | |
| if abs(float(np.linalg.det(M2))) < 1e-6: | |
| return None, None, debug | |
| A2 = np.linalg.inv(M2).astype(np.float32) | |
| H_lin = np.array( | |
| [[A2[0, 0], A2[0, 1], 0.0], [A2[1, 0], A2[1, 1], 0.0], [0.0, 0.0, 1.0]], | |
| dtype=np.float32, | |
| ) | |
| aff_bgr2 = cv2.cvtColor(aff_rgb, cv2.COLOR_RGB2BGR) | |
| rect_bgr, _ = _warp_with_bounds(aff_bgr2, H_lin, border_value=(255, 255, 255), interp=cv2.INTER_LINEAR) | |
| rect_rgb = cv2.cvtColor(rect_bgr, cv2.COLOR_BGR2RGB) | |
| rect_mask255, _ = _warp_with_bounds(aff_mask255, H_lin, border_value=0, interp=cv2.INTER_NEAREST) | |
| rect_mask01 = (rect_mask255 > 0).astype(np.uint8) | |
| return rect_rgb, rect_mask01, debug | |
| # ------------------------- | |
| # Fallback: full-building quad from mask contour | |
| # ------------------------- | |
| def _fitline_to_abc(points_xy: np.ndarray): | |
| pts = points_xy.astype(np.float32).reshape(-1, 1, 2) | |
| vx, vy, x0, y0 = cv2.fitLine(pts, cv2.DIST_L2, 0, 0.01, 0.01).reshape(-1) | |
| a = -vy | |
| b = vx | |
| c = a * x0 + b * y0 | |
| return float(a), float(b), float(c) | |
| def _intersect_lines_abc(l1, l2): | |
| a1, b1, c1 = l1 | |
| a2, b2, c2 = l2 | |
| det = a1 * b2 - a2 * b1 | |
| if abs(det) < 1e-9: | |
| return None | |
| x = (c1 * b2 - c2 * b1) / det | |
| y = (a1 * c2 - a2 * c1) / det | |
| return np.array([x, y], dtype=np.float32) | |
| def _expand_corners(corners: np.ndarray, scale: float = 0.06) -> np.ndarray: | |
| corners = corners.astype(np.float32) | |
| center = corners.mean(axis=0, keepdims=True) | |
| return (center + (corners - center) * (1.0 + float(scale))).astype(np.float32) | |
| def _mask_to_full_building_corners(mask01: np.ndarray, band_frac: float = 0.12, expand: float = 0.06) -> np.ndarray: | |
| mask01 = _clean_mask(mask01) | |
| h, w = mask01.shape | |
| mask255 = np.ascontiguousarray((mask01 * 255).astype(np.uint8)) | |
| cnts, _ = cv2.findContours(mask255, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) | |
| if not cnts: | |
| raise ValueError("Mask is empty (no contours).") | |
| cnt = max(cnts, key=cv2.contourArea) | |
| if cv2.contourArea(cnt) < 500: | |
| raise ValueError("Mask too small to infer corners.") | |
| pts = cnt.reshape(-1, 2).astype(np.float32) | |
| x_min, y_min = pts.min(axis=0) | |
| x_max, y_max = pts.max(axis=0) | |
| dx = max(float(x_max - x_min), 1.0) | |
| dy = max(float(y_max - y_min), 1.0) | |
| bf = float(band_frac) | |
| left_pts = pts[pts[:, 0] <= x_min + bf * dx] | |
| right_pts = pts[pts[:, 0] >= x_max - bf * dx] | |
| top_pts = pts[pts[:, 1] <= y_min + bf * dy] | |
| bottom_pts = pts[pts[:, 1] >= y_max - bf * dy] | |
| if min(len(left_pts), len(right_pts), len(top_pts), len(bottom_pts)) < 30: | |
| raise ValueError("Not enough contour points for stable corner fitting.") | |
| L = _fitline_to_abc(left_pts) | |
| R = _fitline_to_abc(right_pts) | |
| T = _fitline_to_abc(top_pts) | |
| B = _fitline_to_abc(bottom_pts) | |
| tl = _intersect_lines_abc(L, T) | |
| tr = _intersect_lines_abc(R, T) | |
| br = _intersect_lines_abc(R, B) | |
| bl = _intersect_lines_abc(L, B) | |
| if tl is None or tr is None or br is None or bl is None: | |
| raise ValueError("Failed to intersect boundary lines for corners.") | |
| corners = np.array([tl, tr, br, bl], dtype=np.float32) | |
| corners = _expand_corners(corners, scale=expand) | |
| return _order_points(corners) | |
| def _rectify_by_quad(rgb_img: np.ndarray, mask01: np.ndarray, band_frac=0.12, expand=0.06): | |
| corners = _mask_to_full_building_corners(mask01, band_frac=band_frac, expand=expand) | |
| (tl, tr, br, bl) = corners | |
| wA = np.linalg.norm(br - bl) | |
| wB = np.linalg.norm(tr - tl) | |
| hA = np.linalg.norm(tr - br) | |
| hB = np.linalg.norm(tl - bl) | |
| out_w = max(int(max(wA, wB)), 200) | |
| out_h = max(int(max(hA, hB)), 200) | |
| dst = np.array([[0, 0], [out_w - 1, 0], [out_w - 1, out_h - 1], [0, out_h - 1]], dtype=np.float32) | |
| H = cv2.getPerspectiveTransform(corners, dst).astype(np.float32) | |
| bgr = cv2.cvtColor(rgb_img, cv2.COLOR_RGB2BGR) | |
| warped_bgr, _ = _warp_with_bounds(bgr, H, border_value=(255, 255, 255), interp=cv2.INTER_LINEAR) | |
| warped_rgb = cv2.cvtColor(warped_bgr, cv2.COLOR_BGR2RGB) | |
| mask255 = (mask01 * 255).astype(np.uint8) | |
| warped_mask255, _ = _warp_with_bounds(mask255, H, border_value=0, interp=cv2.INTER_NEAREST) | |
| warped_mask01 = (warped_mask255 > 0).astype(np.uint8) | |
| return warped_rgb, warped_mask01, rgb_img | |
| # ------------------------- | |
| # Main pipeline | |
| # ------------------------- | |
| def straighten_and_chart( | |
| image_np, | |
| box_threshold=0.35, | |
| text_threshold=0.25, # kept for UI compatibility, not strictly used now | |
| padding=0.03, | |
| outline_thickness=3, | |
| chart_mode="blueprint", | |
| canny_low=60, | |
| canny_high=160, | |
| hough_threshold=80, | |
| min_line_length=40, | |
| max_line_gap=8, | |
| line_thickness=2, | |
| add_grid=False, | |
| ): | |
| if image_np is None: | |
| raise ValueError("Please upload an image.") | |
| pil = Image.fromarray(image_np).convert("RGB") | |
| W, H = pil.size | |
| rgb_full = np.array(pil) | |
| box = _detect_building_box(pil, box_threshold=box_threshold, text_threshold=text_threshold) | |
| x1, y1, x2, y2 = box | |
| pad_x = float(padding) * (x2 - x1) | |
| pad_y = float(padding) * (y2 - y1) | |
| x1 = max(0, x1 - pad_x) | |
| y1 = max(0, y1 - pad_y) | |
| x2 = min(W - 1, x2 + pad_x) | |
| y2 = min(H - 1, y2 + pad_y) | |
| box = np.array([x1, y1, x2, y2], dtype=np.float32) | |
| mask01 = _segment_box_mask(pil, box) | |
| mask01 = _clean_mask(mask01) | |
| original_outlined = _draw_outline_on_image(image_np, mask01, thickness=int(outline_thickness)) | |
| rect_rgb, rect_mask01, dbg = _front_facade_rectify(rgb_full, mask01) | |
| if rect_rgb is None or rect_mask01 is None: | |
| rect_rgb, rect_mask01, dbg2 = _rectify_by_quad(rgb_full, mask01, band_frac=0.12, expand=0.06) | |
| dbg = dbg if dbg is not None else dbg2 | |
| straightened_outlined = _draw_outline_on_image(rect_rgb, rect_mask01, thickness=int(outline_thickness)) | |
| chart = architectural_chart( | |
| rect_rgb, | |
| mode=str(chart_mode), | |
| edge1=int(canny_low), | |
| edge2=int(canny_high), | |
| hough_threshold=int(hough_threshold), | |
| min_line_length=int(min_line_length), | |
| max_line_gap=int(max_line_gap), | |
| thickness=int(line_thickness), | |
| add_grid=bool(add_grid), | |
| ) | |
| mask_rgb = np.stack([mask01 * 255] * 3, axis=-1).astype(np.uint8) | |
| debug = image_np.copy() | |
| x1i, y1i, x2i, y2i = map(int, box) | |
| cv2.rectangle(debug, (x1i, y1i), (x2i, y2i), (255, 255, 255), 2) | |
| return chart, straightened_outlined, original_outlined, debug, mask_rgb | |
| demo = gr.Interface( | |
| fn=straighten_and_chart, | |
| inputs=[ | |
| gr.Image(type="numpy", label="Upload photo"), | |
| gr.Slider(0.1, 0.8, value=0.35, step=0.05, label="Box threshold (DINO)"), | |
| gr.Slider(0.05, 0.6, value=0.25, step=0.05, label="Text threshold (unused, kept for UI)"), | |
| gr.Slider(0.0, 0.15, value=0.03, step=0.01, label="BBox padding"), | |
| gr.Slider(1, 12, value=3, step=1, label="Outline thickness"), | |
| gr.Radio(["blueprint", "black_on_white"], value="blueprint", label="Architectural chart style"), | |
| gr.Slider(1, 200, value=60, step=1, label="Canny low threshold"), | |
| gr.Slider(1, 300, value=160, step=1, label="Canny high threshold"), | |
| gr.Slider(10, 200, value=80, step=1, label="Hough threshold"), | |
| gr.Slider(10, 400, value=40, step=5, label="Min line length"), | |
| gr.Slider(0, 50, value=8, step=1, label="Max line gap"), | |
| gr.Slider(1, 8, value=2, step=1, label="Chart line thickness"), | |
| gr.Checkbox(value=False, label="Add grid"), | |
| ], | |
| outputs=[ | |
| gr.Image(type="numpy", label="Architectural chart (front façade corrected)"), | |
| gr.Image(type="numpy", label="Front façade (rectified) + outline"), | |
| gr.Image(type="numpy", label="Original + outline"), | |
| gr.Image(type="numpy", label="Debug (bbox)"), | |
| gr.Image(type="numpy", label="Building mask (SAM)"), | |
| ], | |
| title="Auto Building Front-Façade Rectifier + Architectural Chart", | |
| description=( | |
| "GroundingDINO + SAM: detect and segment a building, correct off-angle views toward a front façade " | |
| "using vanishing-point rectification (fallback to contour quad), then generate an architectural chart." | |
| ), | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |