Spaces:

atxniu
/

warp_perspective

Sleeping

Arthur Niu

fix post_process_grounded_object_detection

cdcb644 3 months ago

23.1 kB

	import gradio as gr
	import numpy as np
	import cv2
	import torch
	from PIL import Image

	from transformers import (
	AutoProcessor,
	AutoModelForZeroShotObjectDetection,
	SamModel,
	SamProcessor,
	)

	DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

	# -------------------------
	# Models
	# -------------------------
	DINO_ID = "IDEA-Research/grounding-dino-tiny"
	dino_processor = AutoProcessor.from_pretrained(DINO_ID)
	dino_model = AutoModelForZeroShotObjectDetection.from_pretrained(DINO_ID).to(DEVICE)

	SAM_ID = "facebook/sam-vit-base"
	sam_processor = SamProcessor.from_pretrained(SAM_ID)
	sam_model = SamModel.from_pretrained(SAM_ID).to(DEVICE)


	# -------------------------
	# Mask + geometry helpers
	# -------------------------
	def _ensure_2d_mask(mask) -> np.ndarray:
	if torch.is_tensor(mask):
	mask = mask.detach().cpu().numpy()
	mask = np.array(mask)
	mask = np.squeeze(mask)

	if mask.ndim == 3:
	if mask.shape[0] <= 16 and mask.shape[1] > 32 and mask.shape[2] > 32:
	mask = mask[0]
	else:
	mask = mask[:, :, 0]
	mask = np.squeeze(mask)

	if mask.ndim != 2:
	raise ValueError(f"Mask is not 2D after normalization. Got shape: {mask.shape}")

	return (mask > 0).astype(np.uint8)


	def _clean_mask(mask01: np.ndarray) -> np.ndarray:
	mask01 = _ensure_2d_mask(mask01)
	m = np.ascontiguousarray((mask01 * 255).astype(np.uint8))
	kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (7, 7))
	m = cv2.morphologyEx(m, cv2.MORPH_CLOSE, kernel, iterations=1)
	m = cv2.morphologyEx(m, cv2.MORPH_OPEN, kernel, iterations=1)
	return (m > 0).astype(np.uint8)


	def _order_points(pts4: np.ndarray) -> np.ndarray:
	pts4 = np.asarray(pts4, dtype=np.float32)
	s = pts4.sum(axis=1)
	d = pts4[:, 0] - pts4[:, 1]
	tl = pts4[np.argmin(s)]
	br = pts4[np.argmax(s)]
	tr = pts4[np.argmax(d)]
	bl = pts4[np.argmin(d)]
	return np.array([tl, tr, br, bl], dtype=np.float32)


	def _warp_with_bounds(img: np.ndarray, H: np.ndarray, border_value=(255, 255, 255), interp=cv2.INTER_LINEAR):
	h, w = img.shape[:2]
	corners = np.array([[0, 0], [w - 1, 0], [w - 1, h - 1], [0, h - 1]], dtype=np.float32)
	corners_h = cv2.perspectiveTransform(corners.reshape(-1, 1, 2), H).reshape(-1, 2)

	min_xy = corners_h.min(axis=0)
	max_xy = corners_h.max(axis=0)
	min_x, min_y = float(min_xy[0]), float(min_xy[1])
	max_x, max_y = float(max_xy[0]), float(max_xy[1])

	tx = -min_x if min_x < 0 else 0.0
	ty = -min_y if min_y < 0 else 0.0

	out_w = int(np.ceil(max_x + tx)) + 1
	out_h = int(np.ceil(max_y + ty)) + 1

	T = np.array([[1.0, 0.0, tx], [0.0, 1.0, ty], [0.0, 0.0, 1.0]], dtype=np.float32)
	H_out = (T @ H).astype(np.float32)

	warped = cv2.warpPerspective(
	img,
	H_out,
	(out_w, out_h),
	flags=interp,
	borderMode=cv2.BORDER_CONSTANT,
	borderValue=border_value,
	)
	return warped, H_out


	# -------------------------
	# GroundingDINO post-process compatibility
	# -------------------------
	def _dino_post_process(outputs, inputs, pil_img: Image.Image):
	"""
	Handle multiple transformers versions:
	- Some accept (threshold, text_threshold)
	- Some accept different kw names
	- Some accept no thresholds at all
	We always return a dict with 'boxes' and 'scores'.
	"""
	target_sizes = [pil_img.size[::-1]] # (h,w)

	# Try most common signature (newer)
	try:
	return dino_processor.post_process_grounded_object_detection(
	outputs,
	inputs.input_ids,
	threshold=0.0, # let us filter ourselves later
	text_threshold=0.0,
	target_sizes=target_sizes,
	)[0]
	except TypeError:
	pass

	# Try without thresholds (older)
	try:
	return dino_processor.post_process_grounded_object_detection(
	outputs,
	inputs.input_ids,
	target_sizes=target_sizes,
	)[0]
	except TypeError:
	pass

	# Try with positional args only
	try:
	return dino_processor.post_process_grounded_object_detection(
	outputs,
	inputs.input_ids,
	target_sizes,
	)[0]
	except Exception as e:
	raise RuntimeError(f"GroundingDINO post_process API mismatch: {e}")


	# -------------------------
	# Detection + segmentation
	# -------------------------
	def _detect_building_box(pil_img: Image.Image, box_threshold=0.35, text_threshold=0.25) -> np.ndarray:
	"""
	Grounding DINO detect bbox. Returns xyxy float32.

	We do our own filtering by box_threshold to avoid version-specific kwargs.
	"""
	# Use a single prompt string (most compatible)
	prompt = "building. building facade. house. house facade. facade."

	# Processor call compatibility
	try:
	inputs = dino_processor(images=pil_img, text=prompt, return_tensors="pt")
	except TypeError:
	inputs = dino_processor(images=pil_img, text=[prompt], return_tensors="pt")

	inputs = inputs.to(DEVICE)

	with torch.no_grad():
	outputs = dino_model(**inputs)

	results = _dino_post_process(outputs, inputs, pil_img)

	if "boxes" not in results or len(results["boxes"]) == 0:
	raise ValueError("No building detected. Try a closer crop or adjust thresholds.")

	boxes = results["boxes"].detach().cpu().numpy().astype(np.float32)
	scores = results["scores"].detach().cpu().numpy().astype(np.float32)

	# Manual thresholding (since processor signature differs)
	keep = scores >= float(box_threshold)
	if not np.any(keep):
	# If nothing passes, keep the best one anyway
	best = int(np.argmax(scores))
	return boxes[best]

	boxes_k = boxes[keep]
	scores_k = scores[keep]
	best = int(np.argmax(scores_k))
	return boxes_k[best]


	def _segment_box_mask(pil_img: Image.Image, box_xyxy: np.ndarray) -> np.ndarray:
	input_boxes = [[[float(box_xyxy[0]), float(box_xyxy[1]), float(box_xyxy[2]), float(box_xyxy[3])]]]
	inputs = sam_processor(images=pil_img, input_boxes=input_boxes, return_tensors="pt").to(DEVICE)

	with torch.no_grad():
	outputs = sam_model(**inputs, multimask_output=False)

	masks = sam_processor.image_processor.post_process_masks(
	outputs.pred_masks.cpu(),
	inputs["original_sizes"].cpu(),
	inputs["reshaped_input_sizes"].cpu(),
	)

	m = masks[0]
	if torch.is_tensor(m):
	m = m.detach().cpu().numpy()
	m = np.array(m)
	if m.ndim >= 3:
	m = m[0]

	return _ensure_2d_mask(m)


	# -------------------------
	# Outline helpers
	# -------------------------
	def _get_mask_contours(mask01: np.ndarray):
	mask01 = _ensure_2d_mask(mask01)
	mask255 = np.ascontiguousarray((mask01 * 255).astype(np.uint8))
	cnts, _ = cv2.findContours(mask255, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
	return cnts


	def _draw_outline_on_image(rgb_img: np.ndarray, mask01: np.ndarray, thickness: int = 3) -> np.ndarray:
	out = rgb_img.copy()
	cnts = _get_mask_contours(mask01)
	if cnts:
	cv2.drawContours(out, cnts, contourIdx=-1, color=(255, 255, 255), thickness=int(thickness))
	return out


	# -------------------------
	# Architectural chart (Option A)
	# -------------------------
	def architectural_chart(
	rgb_img: np.ndarray,
	mode: str = "blueprint",
	edge1: int = 60,
	edge2: int = 160,
	hough_threshold: int = 80,
	min_line_length: int = 40,
	max_line_gap: int = 8,
	thickness: int = 2,
	add_grid: bool = False,
	) -> np.ndarray:
	gray = cv2.cvtColor(rgb_img, cv2.COLOR_RGB2GRAY)
	gray = cv2.bilateralFilter(gray, d=7, sigmaColor=50, sigmaSpace=50)

	edges = cv2.Canny(gray, int(edge1), int(edge2))

	lines = cv2.HoughLinesP(
	edges,
	rho=1,
	theta=np.pi / 180,
	threshold=int(hough_threshold),
	minLineLength=int(min_line_length),
	maxLineGap=int(max_line_gap),
	)

	h, w = edges.shape[:2]
	if mode == "blueprint":
	canvas = np.zeros((h, w, 3), dtype=np.uint8)
	canvas[:, :] = (20, 40, 90)
	line_color = (255, 255, 255)
	edge_color = (220, 220, 220)
	grid_color = (255, 255, 255)
	else:
	canvas = np.ones((h, w, 3), dtype=np.uint8) * 255
	line_color = (0, 0, 0)
	edge_color = (30, 30, 30)
	grid_color = (0, 0, 0)

	edge_layer = np.zeros_like(canvas)
	ys, xs = np.where(edges > 0)
	edge_layer[ys, xs] = edge_color
	canvas = cv2.addWeighted(canvas, 1.0, edge_layer, 0.35, 0)

	if lines is not None:
	for x1, y1, x2, y2 in lines[:, 0]:
	cv2.line(canvas, (x1, y1), (x2, y2), line_color, int(thickness), cv2.LINE_AA)

	if add_grid:
	step = max(40, min(h, w) // 25)
	grid = canvas.copy()
	for x in range(0, w, step):
	cv2.line(grid, (x, 0), (x, h), grid_color, 1)
	for y in range(0, h, step):
	cv2.line(grid, (0, y), (w, y), grid_color, 1)
	canvas = cv2.addWeighted(canvas, 1.0, grid, 0.08, 0)

	return canvas


	# -------------------------
	# Vanishing-point-based facade rectification
	# -------------------------
	def _create_lsd():
	try:
	refine = cv2.LSD_REFINE_STD if hasattr(cv2, "LSD_REFINE_STD") else 1
	return cv2.createLineSegmentDetector(refine)
	except Exception:
	return cv2.createLineSegmentDetector()


	def _extract_lines_lsd(rgb_img: np.ndarray, mask01: np.ndarray, min_len: float = 40.0):
	mask01 = _ensure_2d_mask(mask01)
	gray = cv2.cvtColor(rgb_img, cv2.COLOR_RGB2GRAY)
	gray = cv2.GaussianBlur(gray, (3, 3), 0)

	lsd = _create_lsd()
	detected = lsd.detect(gray)[0]
	if detected is None:
	return []

	lines_h = []
	h, w = mask01.shape

	for seg in detected.reshape(-1, 4):
	x1, y1, x2, y2 = map(float, seg)
	dx = x2 - x1
	dy = y2 - y1
	length = (dx * dx + dy * dy) ** 0.5
	if length < min_len:
	continue

	mx = int(round((x1 + x2) * 0.5))
	my = int(round((y1 + y2) * 0.5))
	if mx < 0 or my < 0 or mx >= w or my >= h:
	continue
	if mask01[my, mx] == 0:
	continue

	p1 = np.array([x1, y1, 1.0], dtype=np.float32)
	p2 = np.array([x2, y2, 1.0], dtype=np.float32)
	l = np.cross(p1, p2)
	norm = float(np.hypot(l[0], l[1]))
	if norm < 1e-6:
	continue
	lines_h.append((l / norm).astype(np.float32))

	return lines_h


	def _intersection_of_lines(l1, l2):
	p = np.cross(l1, l2)
	if abs(float(p[2])) < 1e-6:
	return None
	return (p / p[2]).astype(np.float32)


	def _fit_vanishing_point_ransac(lines, iters=900, dist_thresh=3.0, min_inliers=10):
	if len(lines) < 2:
	return None, None

	lines = [np.asarray(l, dtype=np.float32) for l in lines]
	best_vp, best_inliers, best_count = None, None, 0
	rng = np.random.default_rng(0)

	for _ in range(iters):
	i, j = rng.integers(0, len(lines), size=2)
	if i == j:
	continue
	vp = _intersection_of_lines(lines[i], lines[j])
	if vp is None:
	continue

	errs = [abs(float(l @ vp)) for l in lines]
	inliers = [k for k, e in enumerate(errs) if e < dist_thresh]
	if len(inliers) > best_count:
	best_count = len(inliers)
	best_vp = vp
	best_inliers = inliers

	if best_vp is None or best_inliers is None or best_count < min_inliers:
	return None, None

	A = np.stack([lines[k] for k in best_inliers], axis=0).astype(np.float32)
	_, _, Vt = np.linalg.svd(A)
	vp = Vt[-1, :]
	if abs(float(vp[2])) < 1e-6:
	return None, None
	vp = (vp / vp[2]).astype(np.float32)
	return vp, best_inliers


	def _split_lines_by_orientation(lines):
	horiz, vert = [], []
	for l in lines:
	a, b, _ = map(float, l)
	dx, dy = b, -a
	ang = (np.degrees(np.arctan2(dy, dx)) + 180.0) % 180.0
	if ang < 25 or ang > 155:
	horiz.append(l)
	elif 65 < ang < 115:
	vert.append(l)
	return horiz, vert


	def _affine_H_from_vanishing_line(l):
	l = np.asarray(l, dtype=np.float32)
	if abs(float(l[2])) < 1e-6:
	return None
	l1, l2, l3 = map(float, l)
	return np.array([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [l1 / l3, l2 / l3, 1.0]], dtype=np.float32)


	def _dominant_directions_from_lines(lines):
	if len(lines) < 6:
	return None, None

	horiz, vert = _split_lines_by_orientation(lines)

	def mean_dir(line_list, mode):
	vecs = []
	for l in line_list:
	a, b, _ = map(float, l)
	dx, dy = b, -a
	n = float(np.hypot(dx, dy))
	if n < 1e-6:
	continue
	dx, dy = dx / n, dy / n
	if mode == "h":
	if dx < 0:
	dx, dy = -dx, -dy
	else:
	if dy < 0:
	dx, dy = -dx, -dy
	vecs.append([dx, dy])

	if len(vecs) < 2:
	return None

	v = np.mean(np.array(vecs, dtype=np.float32), axis=0)
	n = float(np.hypot(v[0], v[1]))
	if n < 1e-6:
	return None
	return (v / n).astype(np.float32)

	u = mean_dir(horiz, "h")
	v = mean_dir(vert, "v")
	return u, v


	def _front_facade_rectify(rgb_img: np.ndarray, mask01: np.ndarray):
	mask01 = _clean_mask(mask01)

	debug = rgb_img.copy()
	debug = _draw_outline_on_image(debug, mask01, thickness=2)

	lines = _extract_lines_lsd(rgb_img, mask01, min_len=40.0)
	if len(lines) < 10:
	return None, None, debug

	horiz, vert = _split_lines_by_orientation(lines)
	if len(horiz) < 4 or len(vert) < 4:
	return None, None, debug

	vp_h, _ = _fit_vanishing_point_ransac(horiz, iters=900, dist_thresh=3.0, min_inliers=10)
	vp_v, _ = _fit_vanishing_point_ransac(vert, iters=900, dist_thresh=3.0, min_inliers=10)
	if vp_h is None or vp_v is None:
	return None, None, debug

	van_line = np.cross(vp_h, vp_v).astype(np.float32)
	H_aff = _affine_H_from_vanishing_line(van_line)
	if H_aff is None:
	return None, None, debug

	bgr = cv2.cvtColor(rgb_img, cv2.COLOR_RGB2BGR)
	aff_bgr, _ = _warp_with_bounds(bgr, H_aff, border_value=(255, 255, 255), interp=cv2.INTER_LINEAR)
	aff_rgb = cv2.cvtColor(aff_bgr, cv2.COLOR_BGR2RGB)

	mask255 = (mask01 * 255).astype(np.uint8)
	aff_mask255, _ = _warp_with_bounds(mask255, H_aff, border_value=0, interp=cv2.INTER_NEAREST)
	aff_mask01 = (aff_mask255 > 0).astype(np.uint8)

	aff_lines = _extract_lines_lsd(aff_rgb, aff_mask01, min_len=40.0)
	u, v = _dominant_directions_from_lines(aff_lines)
	if u is None or v is None:
	return None, None, debug

	M2 = np.array([[u[0], v[0]], [u[1], v[1]]], dtype=np.float32)
	if abs(float(np.linalg.det(M2))) < 1e-6:
	return None, None, debug
	A2 = np.linalg.inv(M2).astype(np.float32)

	H_lin = np.array(
	[[A2[0, 0], A2[0, 1], 0.0], [A2[1, 0], A2[1, 1], 0.0], [0.0, 0.0, 1.0]],
	dtype=np.float32,
	)

	aff_bgr2 = cv2.cvtColor(aff_rgb, cv2.COLOR_RGB2BGR)
	rect_bgr, _ = _warp_with_bounds(aff_bgr2, H_lin, border_value=(255, 255, 255), interp=cv2.INTER_LINEAR)
	rect_rgb = cv2.cvtColor(rect_bgr, cv2.COLOR_BGR2RGB)

	rect_mask255, _ = _warp_with_bounds(aff_mask255, H_lin, border_value=0, interp=cv2.INTER_NEAREST)
	rect_mask01 = (rect_mask255 > 0).astype(np.uint8)

	return rect_rgb, rect_mask01, debug


	# -------------------------
	# Fallback: full-building quad from mask contour
	# -------------------------
	def _fitline_to_abc(points_xy: np.ndarray):
	pts = points_xy.astype(np.float32).reshape(-1, 1, 2)
	vx, vy, x0, y0 = cv2.fitLine(pts, cv2.DIST_L2, 0, 0.01, 0.01).reshape(-1)
	a = -vy
	b = vx
	c = a * x0 + b * y0
	return float(a), float(b), float(c)


	def _intersect_lines_abc(l1, l2):
	a1, b1, c1 = l1
	a2, b2, c2 = l2
	det = a1 * b2 - a2 * b1
	if abs(det) < 1e-9:
	return None
	x = (c1 * b2 - c2 * b1) / det
	y = (a1 * c2 - a2 * c1) / det
	return np.array([x, y], dtype=np.float32)


	def _expand_corners(corners: np.ndarray, scale: float = 0.06) -> np.ndarray:
	corners = corners.astype(np.float32)
	center = corners.mean(axis=0, keepdims=True)
	return (center + (corners - center) * (1.0 + float(scale))).astype(np.float32)


	def _mask_to_full_building_corners(mask01: np.ndarray, band_frac: float = 0.12, expand: float = 0.06) -> np.ndarray:
	mask01 = _clean_mask(mask01)
	h, w = mask01.shape

	mask255 = np.ascontiguousarray((mask01 * 255).astype(np.uint8))
	cnts, _ = cv2.findContours(mask255, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
	if not cnts:
	raise ValueError("Mask is empty (no contours).")

	cnt = max(cnts, key=cv2.contourArea)
	if cv2.contourArea(cnt) < 500:
	raise ValueError("Mask too small to infer corners.")

	pts = cnt.reshape(-1, 2).astype(np.float32)

	x_min, y_min = pts.min(axis=0)
	x_max, y_max = pts.max(axis=0)
	dx = max(float(x_max - x_min), 1.0)
	dy = max(float(y_max - y_min), 1.0)

	bf = float(band_frac)
	left_pts = pts[pts[:, 0] <= x_min + bf * dx]
	right_pts = pts[pts[:, 0] >= x_max - bf * dx]
	top_pts = pts[pts[:, 1] <= y_min + bf * dy]
	bottom_pts = pts[pts[:, 1] >= y_max - bf * dy]

	if min(len(left_pts), len(right_pts), len(top_pts), len(bottom_pts)) < 30:
	raise ValueError("Not enough contour points for stable corner fitting.")

	L = _fitline_to_abc(left_pts)
	R = _fitline_to_abc(right_pts)
	T = _fitline_to_abc(top_pts)
	B = _fitline_to_abc(bottom_pts)

	tl = _intersect_lines_abc(L, T)
	tr = _intersect_lines_abc(R, T)
	br = _intersect_lines_abc(R, B)
	bl = _intersect_lines_abc(L, B)

	if tl is None or tr is None or br is None or bl is None:
	raise ValueError("Failed to intersect boundary lines for corners.")

	corners = np.array([tl, tr, br, bl], dtype=np.float32)
	corners = _expand_corners(corners, scale=expand)

	return _order_points(corners)


	def _rectify_by_quad(rgb_img: np.ndarray, mask01: np.ndarray, band_frac=0.12, expand=0.06):
	corners = _mask_to_full_building_corners(mask01, band_frac=band_frac, expand=expand)

	(tl, tr, br, bl) = corners
	wA = np.linalg.norm(br - bl)
	wB = np.linalg.norm(tr - tl)
	hA = np.linalg.norm(tr - br)
	hB = np.linalg.norm(tl - bl)
	out_w = max(int(max(wA, wB)), 200)
	out_h = max(int(max(hA, hB)), 200)

	dst = np.array([[0, 0], [out_w - 1, 0], [out_w - 1, out_h - 1], [0, out_h - 1]], dtype=np.float32)
	H = cv2.getPerspectiveTransform(corners, dst).astype(np.float32)

	bgr = cv2.cvtColor(rgb_img, cv2.COLOR_RGB2BGR)
	warped_bgr, _ = _warp_with_bounds(bgr, H, border_value=(255, 255, 255), interp=cv2.INTER_LINEAR)
	warped_rgb = cv2.cvtColor(warped_bgr, cv2.COLOR_BGR2RGB)

	mask255 = (mask01 * 255).astype(np.uint8)
	warped_mask255, _ = _warp_with_bounds(mask255, H, border_value=0, interp=cv2.INTER_NEAREST)
	warped_mask01 = (warped_mask255 > 0).astype(np.uint8)

	return warped_rgb, warped_mask01, rgb_img


	# -------------------------
	# Main pipeline
	# -------------------------
	def straighten_and_chart(
	image_np,
	box_threshold=0.35,
	text_threshold=0.25, # kept for UI compatibility, not strictly used now
	padding=0.03,
	outline_thickness=3,
	chart_mode="blueprint",
	canny_low=60,
	canny_high=160,
	hough_threshold=80,
	min_line_length=40,
	max_line_gap=8,
	line_thickness=2,
	add_grid=False,
	):
	if image_np is None:
	raise ValueError("Please upload an image.")

	pil = Image.fromarray(image_np).convert("RGB")
	W, H = pil.size
	rgb_full = np.array(pil)

	box = _detect_building_box(pil, box_threshold=box_threshold, text_threshold=text_threshold)

	x1, y1, x2, y2 = box
	pad_x = float(padding) * (x2 - x1)
	pad_y = float(padding) * (y2 - y1)
	x1 = max(0, x1 - pad_x)
	y1 = max(0, y1 - pad_y)
	x2 = min(W - 1, x2 + pad_x)
	y2 = min(H - 1, y2 + pad_y)
	box = np.array([x1, y1, x2, y2], dtype=np.float32)

	mask01 = _segment_box_mask(pil, box)
	mask01 = _clean_mask(mask01)

	original_outlined = _draw_outline_on_image(image_np, mask01, thickness=int(outline_thickness))

	rect_rgb, rect_mask01, dbg = _front_facade_rectify(rgb_full, mask01)
	if rect_rgb is None or rect_mask01 is None:
	rect_rgb, rect_mask01, dbg2 = _rectify_by_quad(rgb_full, mask01, band_frac=0.12, expand=0.06)
	dbg = dbg if dbg is not None else dbg2

	straightened_outlined = _draw_outline_on_image(rect_rgb, rect_mask01, thickness=int(outline_thickness))

	chart = architectural_chart(
	rect_rgb,
	mode=str(chart_mode),
	edge1=int(canny_low),
	edge2=int(canny_high),
	hough_threshold=int(hough_threshold),
	min_line_length=int(min_line_length),
	max_line_gap=int(max_line_gap),
	thickness=int(line_thickness),
	add_grid=bool(add_grid),
	)

	mask_rgb = np.stack([mask01 * 255] * 3, axis=-1).astype(np.uint8)

	debug = image_np.copy()
	x1i, y1i, x2i, y2i = map(int, box)
	cv2.rectangle(debug, (x1i, y1i), (x2i, y2i), (255, 255, 255), 2)

	return chart, straightened_outlined, original_outlined, debug, mask_rgb


	demo = gr.Interface(
	fn=straighten_and_chart,
	inputs=[
	gr.Image(type="numpy", label="Upload photo"),
	gr.Slider(0.1, 0.8, value=0.35, step=0.05, label="Box threshold (DINO)"),
	gr.Slider(0.05, 0.6, value=0.25, step=0.05, label="Text threshold (unused, kept for UI)"),
	gr.Slider(0.0, 0.15, value=0.03, step=0.01, label="BBox padding"),
	gr.Slider(1, 12, value=3, step=1, label="Outline thickness"),
	gr.Radio(["blueprint", "black_on_white"], value="blueprint", label="Architectural chart style"),
	gr.Slider(1, 200, value=60, step=1, label="Canny low threshold"),
	gr.Slider(1, 300, value=160, step=1, label="Canny high threshold"),
	gr.Slider(10, 200, value=80, step=1, label="Hough threshold"),
	gr.Slider(10, 400, value=40, step=5, label="Min line length"),
	gr.Slider(0, 50, value=8, step=1, label="Max line gap"),
	gr.Slider(1, 8, value=2, step=1, label="Chart line thickness"),
	gr.Checkbox(value=False, label="Add grid"),
	],
	outputs=[
	gr.Image(type="numpy", label="Architectural chart (front façade corrected)"),
	gr.Image(type="numpy", label="Front façade (rectified) + outline"),
	gr.Image(type="numpy", label="Original + outline"),
	gr.Image(type="numpy", label="Debug (bbox)"),
	gr.Image(type="numpy", label="Building mask (SAM)"),
	],
	title="Auto Building Front-Façade Rectifier + Architectural Chart",
	description=(
	"GroundingDINO + SAM: detect and segment a building, correct off-angle views toward a front façade "
	"using vanishing-point rectification (fallback to contour quad), then generate an architectural chart."
	),
	)

	if __name__ == "__main__":
	demo.launch()