Spaces:

abhivsh
/

Engineering_Drawing_Auditor

Running

App Files Files Community

Engineering_Drawing_Auditor / app.py

abhivsh

Upload app.py

baf8731 verified 11 days ago

raw

history blame contribute delete

91.6 kB

	"""
	app.py — POWERGRID Document Auditor (single-file HuggingFace Spaces build)
	=============================================================================
	Single-file Gradio app for AI-powered engineering drawing comparison.
	Designed for POWERGRID (765/400/132kV AIS/GIS vendor drawing audits).

	Pipeline:
	Stage 1 — Global Alignment : Phase Correlation + ORB/RANSAC homography
	Stage 2 — Region Extraction : Content-aware morphology (no pretrained detector)
	Stage 3 — Semantic Matching : ResNet50 embeddings + cosine similarity (position-agnostic)
	Stage 4 — Siamese Comparison : ResNet50 patch embeddings + GradCAM heatmaps

	Run locally:
	python app.py
	"""

	# ══════════════════════════════════════════════════════════════════════
	# IMPORTS
	# ══════════════════════════════════════════════════════════════════════

	import base64
	import io
	import logging
	import os
	import time
	from dataclasses import dataclass, field
	from typing import Dict, List, Optional, Tuple

	import cv2
	import fitz # PyMuPDF
	import gradio as gr
	import numpy as np
	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	from PIL import Image
	from scipy.optimize import linear_sum_assignment
	from skimage.metrics import structural_similarity as ssim
	from torchvision import models, transforms

	logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
	logger = logging.getLogger(__name__)

	# ── Logo: embed as base64 so it works on HuggingFace Spaces (no static folder) ──
	def _load_logo_b64(filename: str = "logo_0.png") -> str:
	"""Return a data-URI string for the logo, or empty string if file not found."""
	logo_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), filename)
	if os.path.exists(logo_path):
	with open(logo_path, "rb") as f:
	b64 = base64.b64encode(f.read()).decode("utf-8")
	ext = filename.rsplit(".", 1)[-1].lower()
	mime = "image/png" if ext == "png" else f"image/{ext}"
	return f"data:{mime};base64,{b64}"
	return ""

	_LOGO_URI = _load_logo_b64("logo_0.png")


	# ══════════════════════════════════════════════════════════════════════
	# DATA STRUCTURES
	# ══════════════════════════════════════════════════════════════════════

	@dataclass
	class Region:
	"""A detected layout region (axis-aligned bounding box)."""
	x: int
	y: int
	w: int
	h: int
	label: str = "text_block" # text_block \| figure \| table \| margin
	confidence: float = 1.0

	@property
	def bbox(self) -> Tuple[int, int, int, int]:
	return (self.x, self.y, self.x + self.w, self.y + self.h)

	@property
	def area(self) -> int:
	return self.w * self.h

	@property
	def center(self) -> Tuple[float, float]:
	return (self.x + self.w / 2.0, self.y + self.h / 2.0)

	def iou(self, other: "Region") -> float:
	xa = max(self.x, other.x)
	ya = max(self.y, other.y)
	xb = min(self.x + self.w, other.x + other.w)
	yb = min(self.y + self.h, other.y + other.h)
	inter = max(0, xb - xa) * max(0, yb - ya)
	union = self.area + other.area - inter
	return inter / union if union > 0 else 0.0


	@dataclass
	class MatchedPair:
	"""A matched region pair between old and new documents."""
	region_old: Region
	region_new: Region
	match_score: float
	position_cost: float
	appearance_cost: float
	pixel_diff: float = 0.0
	ssim_score: float = 1.0
	semantic_diff: float = 0.0
	total_change: float = 0.0
	heatmap: Optional[np.ndarray] = None


	@dataclass
	class ComparisonResult:
	"""Full comparison result for one document page."""
	matched_pairs: List[MatchedPair]
	unmatched_old: List[Region]
	unmatched_new: List[Region]
	global_transform: Optional[np.ndarray]
	total_change_pct: float
	heatmap: np.ndarray
	img_old_aligned: Optional[np.ndarray] = None # aligned OLD, same coord-space as NEW

	def summary(self) -> str:
	lines = [
	f" Global Alignment : {'Applied' if self.global_transform is not None else 'Skipped'}",
	f" Matched Pairs : {len(self.matched_pairs)}",
	f" Deleted Regions : {len(self.unmatched_old)}",
	f" Added Regions : {len(self.unmatched_new)}",
	f" Total Change : {self.total_change_pct:.1f}%",
	]
	changed = [p for p in self.matched_pairs if p.total_change > 0.05]
	if changed:
	avg_chg = np.mean([p.total_change for p in changed])
	lines.append(f" Avg Change (modified regions): {avg_chg:.2f}")
	return "\n".join(lines)


	# ══════════════════════════════════════════════════════════════════════
	# STAGE 1 — GLOBAL ALIGNER
	# ══════════════════════════════════════════════════════════════════════

	class GlobalAligner:
	def __init__(self, orb_features: int = 2000, ransac_threshold: float = 5.0):
	self.orb_features = orb_features
	self.ransac_threshold = ransac_threshold

	def _phase_correlation_shift(self, gray1: np.ndarray, gray2: np.ndarray) -> Tuple[float, float]:
	f1 = np.fft.fft2(gray1.astype(np.float32))
	f2 = np.fft.fft2(gray2.astype(np.float32))
	denom = np.abs(f1 * np.conj(f2)) + 1e-10
	cross = (f1 * np.conj(f2)) / denom
	corr = np.fft.ifft2(cross).real
	y_shift, x_shift = np.unravel_index(np.argmax(corr), corr.shape)
	h, w = gray1.shape
	if y_shift > h // 2:
	y_shift -= h
	if x_shift > w // 2:
	x_shift -= w
	return float(-x_shift), float(-y_shift)

	def _orb_affine(self, gray_old: np.ndarray, gray_new: np.ndarray) -> Optional[np.ndarray]:
	orb = cv2.ORB_create(nfeatures=self.orb_features)
	kp1, des1 = orb.detectAndCompute(gray_old, None)
	kp2, des2 = orb.detectAndCompute(gray_new, None)
	if des1 is None or des2 is None or len(kp1) < 10 or len(kp2) < 10:
	return None
	bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
	matches = sorted(bf.match(des1, des2), key=lambda m: m.distance)
	if len(matches) < 10:
	return None
	top_k = min(200, len(matches))
	# src = OLD keypoints, dst = NEW keypoints
	# → M maps OLD→NEW (forward transform), which is what warpAffine expects:
	# warpAffine(img_old, M, size) correctly places OLD pixels at their NEW positions.
	# BUG that was here: src/dst were swapped (kp2/NEW as src, kp1/OLD as dst),
	# giving M that mapped NEW→OLD. warpAffine then doubled the displacement
	# instead of correcting it, causing the full-image red/cyan fringe seen in
	# the Alignment Check view.
	src_pts = np.float32([kp1[m.queryIdx].pt for m in matches[:top_k]]).reshape(-1, 1, 2)
	dst_pts = np.float32([kp2[m.trainIdx].pt for m in matches[:top_k]]).reshape(-1, 1, 2)
	M, mask = cv2.estimateAffinePartial2D(
	src_pts, dst_pts, method=cv2.RANSAC,
	ransacReprojThreshold=self.ransac_threshold,
	)
	return M

	def align(self, img_old: np.ndarray, img_new: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
	g_old = cv2.cvtColor(img_old, cv2.COLOR_RGB2GRAY)
	g_new = cv2.cvtColor(img_new, cv2.COLOR_RGB2GRAY)
	dx, dy = self._phase_correlation_shift(g_old, g_new)
	M = self._orb_affine(g_old, g_new)
	if M is None:
	M = np.array([[1.0, 0.0, dx], [0.0, 1.0, dy]], dtype=np.float32)
	h, w = img_old.shape[:2]
	aligned = cv2.warpAffine(
	img_old, M, (w, h),
	flags=cv2.INTER_LINEAR,
	borderMode=cv2.BORDER_CONSTANT,
	borderValue=(255, 255, 255),
	)
	return aligned, M


	# ══════════════════════════════════════════════════════════════════════
	# STAGE 2 — LAYOUT REGION EXTRACTOR
	# ══════════════════════════════════════════════════════════════════════

	class LayoutRegionExtractor:
	def __init__(
	self,
	min_area_ratio: float = 0.0003,
	max_area_ratio: float = 0.92,
	dilation_kernel: Tuple[int, int] = (8, 2),
	dilation_iters: int = 2,
	merge_iou_threshold: float = 0.40,
	):
	self.min_area_ratio = min_area_ratio
	self.max_area_ratio = max_area_ratio
	self.dilation_kernel = dilation_kernel
	self.dilation_iters = dilation_iters
	self.merge_iou_threshold = merge_iou_threshold

	def _binarise(self, gray: np.ndarray) -> np.ndarray:
	blurred = cv2.GaussianBlur(gray, (5, 5), 0)
	_, binary = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
	return binary

	def _dilate(self, binary: np.ndarray) -> np.ndarray:
	k = cv2.getStructuringElement(cv2.MORPH_RECT, self.dilation_kernel)
	dilated = cv2.dilate(binary, k, iterations=self.dilation_iters)
	k_line = cv2.getStructuringElement(cv2.MORPH_RECT, (15, 1))
	dilated = cv2.dilate(dilated, k_line, iterations=1)
	k_vert = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 8))
	return cv2.morphologyEx(dilated, cv2.MORPH_CLOSE, k_vert)

	def _classify(self, patch_gray: np.ndarray, w: int, h: int) -> str:
	aspect = w / max(h, 1)
	_, binary = cv2.threshold(patch_gray, 127, 255, cv2.THRESH_BINARY_INV)
	density = np.sum(binary > 0) / max(w * h, 1)
	if density < 0.02:
	contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
	if len(contours) < 3:
	return "margin"
	if aspect > 4.0 and density > 0.06:
	return "text_block"
	if 0.4 < aspect < 2.8 and density < 0.25:
	return "figure"
	if density > 0.18 and aspect > 1.0:
	return "table"
	return "text_block"

	def _merge_overlapping(self, regions: List[Region]) -> List[Region]:
	changed = True
	while changed:
	changed = False
	used = [False] * len(regions)
	merged: List[Region] = []
	for i, r1 in enumerate(regions):
	if used[i]:
	continue
	x0, y0 = r1.x, r1.y
	x1, y1 = r1.x + r1.w, r1.y + r1.h
	for j, r2 in enumerate(regions):
	if i == j or used[j]:
	continue
	expanded = Region(x0, y0, x1 - x0, y1 - y0)
	if expanded.iou(r2) > self.merge_iou_threshold:
	x0 = min(x0, r2.x)
	y0 = min(y0, r2.y)
	x1 = max(x1, r2.x + r2.w)
	y1 = max(y1, r2.y + r2.h)
	used[j] = True
	changed = True
	merged.append(Region(x0, y0, x1 - x0, y1 - y0))
	used[i] = True
	regions = merged
	return regions

	def extract(self, img_rgb: np.ndarray) -> List[Region]:
	h, w = img_rgb.shape[:2]
	page_area = h * w
	gray = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2GRAY)
	binary = self._binarise(gray)
	dilated = self._dilate(binary)
	contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
	candidates: List[Region] = []
	for cnt in contours:
	rx, ry, rw, rh = cv2.boundingRect(cnt)
	area = rw * rh
	if area < page_area * self.min_area_ratio:
	continue
	if area > page_area * self.max_area_ratio:
	continue
	patch = gray[ry: ry + rh, rx: rx + rw]
	label = self._classify(patch, rw, rh)
	if label == "margin":
	continue
	candidates.append(Region(rx, ry, rw, rh, label=label))
	regions = self._merge_overlapping(candidates)
	regions.sort(key=lambda r: (r.y // 50, r.x))
	logger.info("LayoutExtractor: %d regions detected", len(regions))
	return regions


	# ══════════════════════════════════════════════════════════════════════
	# STAGE 3 — HUNGARIAN REGION MATCHER
	# ══════════════════════════════════════════════════════════════════════

	# ══════════════════════════════════════════════════════════════════════
	# STAGE 3 — SEMANTIC RETRIEVAL MATCHER (position-agnostic)
	# ══════════════════════════════════════════════════════════════════════

	class SemanticRetrievalMatcher:
	"""
	Replaces HungarianRegionMatcher for layout-shift-robust document comparison.

	Strategy
	--------
	For every region in the NEW page:
	1. Extract the patch image from the NEW document.
	2. Encode it with the shared ResNet50 backbone → 128-d L2-normalised vector.
	Simultaneously encode every OLD region patch.
	Build an (N_new × N_old) cosine-similarity matrix.
	Run scipy.linear_sum_assignment on −similarity (maximise similarity).
	Accept a pair only when similarity ≥ min_similarity.

	This means a region that has moved (different x/y) but is otherwise
	identical will still get similarity ≈ 1.0 and be matched correctly.
	"""

	def __init__(
	self,
	encoder: "_SiameseEncoder",
	device: torch.device,
	min_similarity: float = 0.50,
	thumbnail_size: Tuple[int, int] = (224, 224),
	):
	self.encoder = encoder
	self.device = device
	self.min_similarity = min_similarity
	self._transform = transforms.Compose([
	transforms.Resize(thumbnail_size),
	transforms.ToTensor(),
	transforms.Normalize(mean=[0.485, 0.456, 0.406],
	std=[0.229, 0.224, 0.225]),
	])

	# ------------------------------------------------------------------
	def _patch(self, region: Region, img: np.ndarray) -> np.ndarray:
	"""Crop a region from the image; returns white 64×64 if empty."""
	p = img[region.y: region.y + region.h, region.x: region.x + region.w]
	if p.size == 0:
	p = np.full((64, 64, 3), 255, dtype=np.uint8)
	return p

	def _embed(self, patches: List[np.ndarray]) -> torch.Tensor:
	"""
	Batch-encode a list of patches → (N, 128) normalised embedding tensor.
	Runs entirely on self.device with no gradient.
	"""
	tensors = [
	self._transform(Image.fromarray(p)) for p in patches
	]
	batch = torch.stack(tensors).to(self.device) # (N, 3, 224, 224)
	with torch.no_grad():
	embeddings, _ = self.encoder.encode(batch) # (N, 128) — already L2-normed
	return embeddings

	# ------------------------------------------------------------------
	def match(
	self,
	regions_old: List[Region],
	regions_new: List[Region],
	img_old: np.ndarray,
	img_new: np.ndarray,
	) -> Tuple[List[MatchedPair], List[Region], List[Region]]:
	n_old, n_new = len(regions_old), len(regions_new)
	if n_old == 0 or n_new == 0:
	return [], list(regions_old), list(regions_new)

	# ── 1. Encode both sets of patches ─────────────────────────
	patches_old = [self._patch(r, img_old) for r in regions_old]
	patches_new = [self._patch(r, img_new) for r in regions_new]

	emb_old = self._embed(patches_old) # (n_old, 128)
	emb_new = self._embed(patches_new) # (n_new, 128)

	# ── 2. Cosine similarity matrix: rows=NEW, cols=OLD ─────────
	# L2-normed → dot product == cosine similarity
	sim_mat = torch.mm(emb_new, emb_old.T).cpu().numpy() # (n_new, n_old)

	# ── 3. Hungarian assignment on −similarity ──────────────────
	row_ind, col_ind = linear_sum_assignment(-sim_mat) # maximise sim

	matched_pairs: List[MatchedPair] = []
	matched_old_idx: set = set()
	matched_new_idx: set = set()

	for ri, ci in zip(row_ind, col_ind):
	sim = float(sim_mat[ri, ci])
	if sim < self.min_similarity:
	continue # below threshold → treat as unmatched
	matched_pairs.append(MatchedPair(
	region_old = regions_old[ci],
	region_new = regions_new[ri],
	match_score = sim,
	position_cost = 0.0, # no position penalty
	appearance_cost= max(0.0, 1.0 - sim),
	))
	matched_old_idx.add(ci)
	matched_new_idx.add(ri)

	unmatched_old = [regions_old[i] for i in range(n_old) if i not in matched_old_idx]
	unmatched_new = [regions_new[j] for j in range(n_new) if j not in matched_new_idx]

	logger.info(
	"SemanticRetrieval: %d matched \| %d deleted \| %d added "
	"(min_sim=%.2f)",
	len(matched_pairs), len(unmatched_old), len(unmatched_new),
	self.min_similarity,
	)
	return matched_pairs, unmatched_old, unmatched_new


	# ══════════════════════════════════════════════════════════════════════
	# STAGE 4 — SIAMESE PATCH COMPARATOR
	# ══════════════════════════════════════════════════════════════════════

	class _SiameseEncoder(nn.Module):
	def __init__(self):
	super().__init__()
	resnet = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
	self.features = nn.Sequential(*list(resnet.children())[:-2])
	self.pool = resnet.avgpool
	self.embed = nn.Sequential(
	nn.Linear(2048, 512), nn.ReLU(),
	nn.Linear(512, 128),
	)

	def encode(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
	feat_map = self.features(x)
	pooled = torch.flatten(self.pool(feat_map), 1)
	embed = F.normalize(self.embed(pooled), p=2, dim=1)
	return embed, feat_map

	def forward(self, x1: torch.Tensor, x2: torch.Tensor):
	e1, f1 = self.encode(x1)
	e2, f2 = self.encode(x2)
	return e1, e2, f1, f2


	class SiamesePatchComparator:
	def __init__(
	self,
	device: Optional[torch.device] = None,
	encoder: Optional[_SiameseEncoder] = None, # ← shared encoder
	):
	if device is None:
	if torch.cuda.is_available():
	device = torch.device("cuda")
	elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
	device = torch.device("mps")
	else:
	device = torch.device("cpu")
	self.device = device
	# Reuse the encoder from SemanticRetrievalMatcher if provided —
	# avoids loading ResNet50 weights a second time.
	if encoder is not None:
	self.model = encoder
	logger.info("SiamesePatchComparator: reusing shared encoder on %s", device)
	else:
	self.model = _SiameseEncoder().to(device).eval()
	logger.info("SiamesePatchComparator: created new encoder on %s", device)
	self.transform = transforms.Compose([
	transforms.Resize((224, 224)),
	transforms.ToTensor(),
	transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
	])

	def _to_tensor(self, patch_rgb: np.ndarray) -> torch.Tensor:
	return self.transform(Image.fromarray(patch_rgb)).unsqueeze(0).to(self.device)

	def _grad_cam(
	self,
	patch_old: np.ndarray,
	patch_new: np.ndarray,
	target_hw: Tuple[int, int],
	) -> np.ndarray:
	"""
	Grad-CAM spatial change map — WHERE inside the patch the embedding differs.

	Method
	------
	1. Forward patch_old (no grad) → embedding e_old.
	2. Forward patch_new (with grad, hooks on last conv block) → embedding e_new
	+ feature map F captured by forward hook.
	3. Scalar loss = pairwise_distance(e_old.detach(), e_new).
	4. loss.backward() → ∂loss/∂F captured by backward hook.
	5. Grad-CAM = ReLU( mean_c(∂loss/∂F) · F ) → (7×7) → upsample to patch size.

	Pixels with HIGH activation changed the embedding the most → the actual edits.

	Returns
	-------
	np.ndarray shape (target_hw[0], target_hw[1]), float32, values in [0, 1].
	"""
	t_old = self._to_tensor(patch_old)
	t_new = self._to_tensor(patch_new)

	feat_store: Dict[str, torch.Tensor] = {}
	grad_store: Dict[str, torch.Tensor] = {}

	# Hook on the last convolutional block of the shared ResNet50
	last_block = self.model.features[-1]

	def _fwd(module, inp, out):
	feat_store["f"] = out # (1, 2048, 7, 7)

	def _bwd(module, grad_in, grad_out):
	grad_store["g"] = grad_out[0] # (1, 2048, 7, 7)

	h_fwd = last_block.register_forward_hook(_fwd)
	h_bwd = last_block.register_full_backward_hook(_bwd)

	try:
	# e_old — no gradient needed, just a reference point
	with torch.no_grad():
	e_old, _ = self.model.encode(t_old)

	# e_new — gradient flows through this path only
	with torch.enable_grad():
	self.model.zero_grad()
	e_new, _ = self.model.encode(t_new)
	dist = F.pairwise_distance(e_old.detach(), e_new)
	dist.backward()
	finally:
	h_fwd.remove()
	h_bwd.remove()

	if "f" not in feat_store or "g" not in grad_store:
	return np.zeros(target_hw, dtype=np.float32)

	# Grad-CAM: global-average-pool the gradients, weight feature maps
	weights = grad_store["g"].mean(dim=[2, 3], keepdim=True) # (1,2048,1,1)
	cam = (weights * feat_store["f"]).sum(dim=1).squeeze() # (7, 7)
	cam = F.relu(cam)

	cam_max = cam.max()
	if cam_max < 1e-8:
	return np.zeros(target_hw, dtype=np.float32)

	cam = (cam / cam_max).detach().cpu().numpy() # (7, 7) in [0, 1]

	# Upsample to original patch resolution
	h, w = target_hw
	cam_up = cv2.resize(cam, (w, h), interpolation=cv2.INTER_LINEAR)
	return np.clip(cam_up, 0.0, 1.0).astype(np.float32)

	def compare(self, patch_old: np.ndarray, patch_new: np.ndarray) -> Dict[str, object]:
	g_old = cv2.cvtColor(patch_old, cv2.COLOR_RGB2GRAY).astype(np.float32)
	g_new = cv2.cvtColor(patch_new, cv2.COLOR_RGB2GRAY).astype(np.float32)
	diff_map = np.abs(g_old - g_new)
	# Threshold of 8 (was 15) — CAD drawings have fine lines and small
	# text; a dimension change may shift only a handful of pixels slightly.
	changed_pixels = np.sum(diff_map > 8.0)
	pixel_diff = float(changed_pixels) / max(g_old.size, 1)
	ssim_val = float(ssim(g_old, g_new, data_range=255.0))
	ssim_cost = max(0.0, 1.0 - ssim_val)
	with torch.no_grad():
	t1 = self._to_tensor(patch_old)
	t2 = self._to_tensor(patch_new)
	e1, e2, _, _ = self.model(t1, t2)
	l2_dist = float(F.pairwise_distance(e1, e2).item())
	semantic_diff = min(l2_dist / 10.0, 1.0)
	total = 0.30 * pixel_diff + 0.40 * ssim_cost + 0.30 * semantic_diff

	# Grad-CAM: spatial map showing WHERE inside this patch the change is
	h, w = patch_new.shape[:2]
	grad_cam_map = self._grad_cam(patch_old, patch_new, (h, w))

	return {
	"pixel_diff": pixel_diff,
	"ssim_score": ssim_val,
	"semantic_diff":semantic_diff,
	"total_change": min(float(total), 1.0),
	"grad_cam": grad_cam_map, # (h, w) float32 [0,1] ← new
	}

	def compare_pair(self, pair: MatchedPair, img_old: np.ndarray, img_new: np.ndarray) -> MatchedPair:
	ro, rn = pair.region_old, pair.region_new
	patch_old = img_old[ro.y: ro.y + ro.h, ro.x: ro.x + ro.w]
	patch_new = img_new[rn.y: rn.y + rn.h, rn.x: rn.x + rn.w]
	if patch_old.size == 0 or patch_new.size == 0:
	return pair
	target_h = max(patch_old.shape[0], patch_new.shape[0])
	target_w = max(patch_old.shape[1], patch_new.shape[1])

	def _pad_white(patch: np.ndarray, th: int, tw: int) -> np.ndarray:
	canvas = np.full((th, tw, patch.shape[2]), 255, dtype=np.uint8)
	canvas[:patch.shape[0], :patch.shape[1]] = patch
	return canvas

	patch_old_p = _pad_white(patch_old, target_h, target_w)
	patch_new_p = _pad_white(patch_new, target_h, target_w)
	metrics = self.compare(patch_old_p, patch_new_p)
	pair.pixel_diff = metrics["pixel_diff"]
	pair.ssim_score = metrics["ssim_score"]
	pair.semantic_diff = metrics["semantic_diff"]
	pair.total_change = metrics["total_change"]
	# Store Grad-CAM map (sized to the new patch, not the padded version)
	raw_cam = metrics.get("grad_cam")
	if raw_cam is not None:
	rn = pair.region_new
	pair.heatmap = cv2.resize(raw_cam, (rn.w, rn.h),
	interpolation=cv2.INTER_LINEAR)
	return pair


	# ══════════════════════════════════════════════════════════════════════
	# HEATMAP GENERATOR
	# ══════════════════════════════════════════════════════════════════════

	class HeatmapGenerator:
	_COLOUR_CHANGED = np.array([255, 220, 0], dtype=np.float32)
	_COLOUR_MAJOR = np.array([230, 30, 30], dtype=np.float32)
	_COLOUR_ADDED = np.array([ 30, 200, 60], dtype=np.float32)
	_COLOUR_DELETED = np.array([200, 30, 200], dtype=np.float32)

	@staticmethod
	def _project_region(r: Region, M_inv: Optional[np.ndarray], w: int, h: int) -> Tuple[int, int, int, int]:
	if M_inv is not None:
	corners = np.array([
	[r.x, r.y ],
	[r.x + r.w, r.y ],
	[r.x, r.y + r.h],
	[r.x + r.w, r.y + r.h],
	], dtype=np.float32)
	ones = np.ones((4, 1), dtype=np.float32)
	projected = (M_inv @ np.hstack([corners, ones]).T).T
	x0 = int(np.clip(projected[:, 0].min(), 0, w - 1))
	y0 = int(np.clip(projected[:, 1].min(), 0, h - 1))
	x1 = int(np.clip(projected[:, 0].max(), 0, w - 1))
	y1 = int(np.clip(projected[:, 1].max(), 0, h - 1))
	else:
	x0, y0, x1, y1 = r.x, r.y, r.x + r.w, r.y + r.h
	return x0, y0, x1, y1

	@staticmethod
	def generate(
	img_shape: Tuple[int, int],
	matched_pairs: List[MatchedPair],
	unmatched_old: List[Region],
	unmatched_new: List[Region],
	smooth_kernel: int = 11,
	M_inv: Optional[np.ndarray] = None,
	change_threshold: float = 0.05,
	) -> np.ndarray:
	h, w = img_shape
	layers = np.zeros((h, w, 4), dtype=np.float32)
	for pair in matched_pairs:
	chg = float(pair.total_change)
	if chg <= change_threshold:
	continue
	r = pair.region_new
	ch = 0 if chg <= 0.40 else 1 # yellow channel vs red channel

	if pair.heatmap is not None:
	# ── Grad-CAM path: paint only the pixels that actually changed ──
	# pair.heatmap is (r.h, r.w) float32 in [0,1]
	# Scale by total_change so brighter = more changed
	cam = pair.heatmap
	if cam.shape != (r.h, r.w):
	cam = cv2.resize(cam, (r.w, r.h),
	interpolation=cv2.INTER_LINEAR)
	intensity = np.clip(cam * chg, 0.0, 1.0)
	layers[r.y:r.y + r.h, r.x:r.x + r.w, ch] = np.maximum(
	layers[r.y:r.y + r.h, r.x:r.x + r.w, ch], intensity)
	else:
	# ── Fallback: flood the whole bounding box (no Grad-CAM available) ──
	layers[r.y:r.y + r.h, r.x:r.x + r.w, ch] = np.maximum(
	layers[r.y:r.y + r.h, r.x:r.x + r.w, ch], chg)
	# Channels 2 (added/green) and 3 (deleted/purple) intentionally omitted.
	# The Heatmap tab shows only modification intensity via yellow gradient.
	# Added / deleted regions are visible in the Match Canvas thermal view.
	if smooth_kernel > 0:
	ksize = smooth_kernel if smooth_kernel % 2 == 1 else smooth_kernel + 1
	for ch in range(4):
	if layers[:, :, ch].max() > 0:
	layers[:, :, ch] = cv2.GaussianBlur(layers[:, :, ch], (ksize, ksize), sigmaX=3.0)
	for ch in range(2):
	if layers[:, :, ch].max() > 0:
	layers[:, :, ch] = np.power(layers[:, :, ch], 0.6)
	return layers


	# ══════════════════════════════════════════════════════════════════════
	# VISUALISER
	# ══════════════════════════════════════════════════════════════════════

	class Visualiser:
	COLOURS: Dict[str, Tuple[int, int, int]] = {
	"text_block": (30, 144, 255),
	"figure": (255, 165, 0),
	"table": (50, 205, 50),
	"unknown": (180, 180, 180),
	"deleted": (220, 50, 50),
	"added": (50, 220, 80),
	"changed": (255, 200, 0),
	"unchanged": (80, 220, 80),
	}

	@staticmethod
	def draw_alignment_check(
	img_old_aligned: np.ndarray,
	img_new: np.ndarray,
	) -> np.ndarray:
	"""
	Red-cyan overlay — Alignment Check tab.

	How to read it
	--------------
	OLD aligned → Red channel
	NEW doc → Green + Blue channels (= Cyan)

	• Lines present at the SAME pixel in both → gray (R≈G≈B)
	• Lines in OLD that drifted → RED fringe
	• Lines in NEW that drifted → CYAN fringe
	• White background on both → white

	If the overlay looks mostly gray/white with no fringes, alignment is
	good. Red/cyan colour fringes indicate residual misalignment.
	"""
	g_old = cv2.cvtColor(img_old_aligned, cv2.COLOR_RGB2GRAY)
	g_new = cv2.cvtColor(img_new, cv2.COLOR_RGB2GRAY)
	# Stack: R = old, G = new, B = new → cyan for new, red for old
	return np.stack([g_old, g_new, g_new], axis=2)


	# ══════════════════════════════════════════════════════════════════════
	# HELPER — unmatched region visual-change check
	# ══════════════════════════════════════════════════════════════════════

	# Mean-abs pixel diff below this threshold → region is visually identical
	# despite not being paired by the matcher; excluded from the change score.
	_UNMATCHED_PIXEL_THR: float = 12.0 # on 0–255 grayscale scale


	def _region_mean_diff(
	r: Region,
	img_a: np.ndarray,
	candidates: List[Region],
	img_b: np.ndarray,
	thumb: int = 64,
	) -> float:
	"""
	Return the minimum mean-abs-diff (grayscale, 0–255) between region `r`
	in `img_a` and the spatially closest candidate region in `img_b`.

	"Spatially closest" = smallest Euclidean centre-to-centre distance.
	If there are no candidates, return 255.0 (maximally different).
	"""
	if not candidates:
	return 255.0
	pa = img_a[r.y: r.y + r.h, r.x: r.x + r.w]
	if pa.size == 0:
	return 255.0
	ga = cv2.resize(cv2.cvtColor(pa, cv2.COLOR_RGB2GRAY), (thumb, thumb)).astype(np.float32)

	cx_r, cy_r = r.center
	# Sort candidates by centre distance — only check the 3 nearest for speed
	candidates_sorted = sorted(
	candidates,
	key=lambda c: (c.center[0] - cx_r) 2 + (c.center[1] - cy_r) 2,
	)[:3]

	best = 255.0
	for cand in candidates_sorted:
	pb = img_b[cand.y: cand.y + cand.h, cand.x: cand.x + cand.w]
	if pb.size == 0:
	continue
	gb = cv2.resize(
	cv2.cvtColor(pb, cv2.COLOR_RGB2GRAY), (thumb, thumb)
	).astype(np.float32)
	diff = float(np.mean(np.abs(ga - gb)))
	if diff < best:
	best = diff
	return best


	def _is_truly_changed(
	r: Region,
	candidates: List[Region],
	img_a: np.ndarray,
	img_b: np.ndarray,
	) -> bool:
	"""
	Return True only when region `r` (from img_a) is visually different
	from its nearest spatial counterpart in candidates (from img_b).

	Used to distinguish "matcher failed to pair identical regions" from
	"content was genuinely added or deleted."
	"""
	return _region_mean_diff(r, img_a, candidates, img_b) >= _UNMATCHED_PIXEL_THR


	# ══════════════════════════════════════════════════════════════════════
	# MAIN PIPELINE
	# ══════════════════════════════════════════════════════════════════════

	class CoarseToFinePipeline:
	def __init__(
	self,
	align: bool = True,
	device: Optional[torch.device] = None,
	region_extractor: Optional[LayoutRegionExtractor] = None,
	matcher=None, # SemanticRetrievalMatcher or HungarianRegionMatcher
	comparator: Optional[SiamesePatchComparator] = None,
	min_similarity: float = 0.50, # used only when matcher=None (auto-build)
	):
	# Resolve device once here so both sub-modules share it
	if device is None:
	if torch.cuda.is_available():
	device = torch.device("cuda")
	elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
	device = torch.device("mps")
	else:
	device = torch.device("cpu")
	self._device = device

	self.aligner = GlobalAligner() if align else None
	self.extractor = region_extractor or LayoutRegionExtractor()

	if matcher is not None:
	# Caller supplied a custom matcher — use it as-is
	self.matcher = matcher
	self.comparator = comparator or SiamesePatchComparator(device=device)
	else:
	# ── Default path: shared ResNet50 encoder ──────────────
	# Build the encoder once; hand the same object to both
	# SemanticRetrievalMatcher (Stage 3) and SiamesePatchComparator (Stage 4).
	# This halves model-load time and GPU/CPU RAM usage.
	shared_encoder = _SiameseEncoder().to(device).eval()
	logger.info("Pipeline: shared ResNet50 encoder on %s", device)

	self.matcher = SemanticRetrievalMatcher(
	encoder = shared_encoder,
	device = device,
	min_similarity = min_similarity,
	)
	self.comparator = comparator or SiamesePatchComparator(
	device = device,
	encoder = shared_encoder, # ← reuse, no second load
	)

	def compare(self, img_old: np.ndarray, img_new: np.ndarray, verbose: bool = True) -> ComparisonResult:
	timings: Dict[str, float] = {}
	t = time.time()
	M = None
	if self.aligner is not None:
	img_old_aligned, M = self.aligner.align(img_old, img_new)
	else:
	img_old_aligned = img_old.copy()
	timings["alignment"] = time.time() - t

	t = time.time()
	regions_old = self.extractor.extract(img_old_aligned)
	regions_new = self.extractor.extract(img_new)
	timings["extraction"] = time.time() - t

	t = time.time()
	matched, unmatched_old, unmatched_new = self.matcher.match(
	regions_old, regions_new, img_old_aligned, img_new)
	timings["matching"] = time.time() - t

	t = time.time()
	for i, pair in enumerate(matched):
	matched[i] = self.comparator.compare_pair(pair, img_old_aligned, img_new)
	timings["siamese"] = time.time() - t

	if verbose:
	logger.info("Timings → align: %.2fs \| extract: %.2fs \| match: %.2fs \| siamese: %.2fs",
	timings["alignment"], timings["extraction"],
	timings["matching"], timings["siamese"])

	h, w = img_new.shape[:2]
	# After the ORB fix, M maps OLD→NEW (forward).
	# _project_region uses this matrix to map unmatched OLD region corners
	# into NEW-page coordinates for heatmap rendering — so pass M directly,
	# NOT its inverse. (Previously M mapped NEW→OLD so the inverse was
	# needed; now the roles are corrected.)
	heatmap = HeatmapGenerator.generate(
	(h, w), matched, unmatched_old, unmatched_new,
	M_inv=M, change_threshold=0.05,
	)
	# ── Change % calculation (two-part fix) ────────────────────────
	#
	# Part A — pixel-diff gate on unmatched regions
	# Unmatched regions are NOT automatically "added/deleted".
	# They may simply be regions the matcher failed to pair even though
	# the content is identical. We compare each unmatched region to its
	# nearest spatial counterpart in the opposite list; only those whose
	# pixel diff exceeds _UNMATCHED_PIXEL_THR are counted as truly changed.
	#
	# Part B — normalise against full page area (not just detected regions)
	# Using content_area as denominator collapses to 100% when all regions
	# are unmatched. Using h*w gives a stable baseline independent of
	# how many regions were detected or matched.

	truly_deleted = [
	r for r in unmatched_old
	if _is_truly_changed(r, unmatched_new, img_old_aligned, img_new)
	]
	truly_added = [
	r for r in unmatched_new
	if _is_truly_changed(r, unmatched_old, img_new, img_old_aligned)
	]

	page_area = max(h * w, 1) # Part B denominator
	changed_area = sum(p.region_new.area for p in matched if p.total_change > 0.05)
	deleted_area = sum(r.area for r in truly_deleted)
	added_area = sum(r.area for r in truly_added)
	total_pct = min(100.0 * (changed_area + added_area + deleted_area) / page_area, 100.0)

	return ComparisonResult(
	matched_pairs=matched,
	unmatched_old=unmatched_old,
	unmatched_new=unmatched_new,
	global_transform=M,
	total_change_pct=total_pct,
	heatmap=heatmap,
	img_old_aligned=img_old_aligned, # ← stored for thermal overlay
	)


	# ══════════════════════════════════════════════════════════════════════
	# GRADIO APP — HELPERS
	# ══════════════════════════════════════════════════════════════════════

	def _pick_device() -> torch.device:
	if torch.cuda.is_available():
	return torch.device("cuda")
	if hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
	return torch.device("mps")
	return torch.device("cpu")


	def _page_to_rgb(doc: fitz.Document, idx: int, dpi: int) -> np.ndarray:
	pix = doc[idx].get_pixmap(dpi=dpi)
	return np.frombuffer(pix.samples, np.uint8).reshape(pix.height, pix.width, 3)


	def _build_summary(
	page_results: list,
	aligned: bool,
	skip_old_p1: bool = False,
	skip_new_p1: bool = False,
	) -> str:
	total_changes = [pr["total_change_pct"] for pr in page_results]

	lines = [
	"╔══════════════════════════════════════════════════════════╗",
	"║ POWERGRID DOCUMENT AUDIT — CHANGE REPORT ║",
	"╚══════════════════════════════════════════════════════════╝",
	"",
	f" Total Pages Analysed : {len(page_results)}",
	f" Overall Avg Change : {np.mean(total_changes):.2f}%",
	"",
	"──────────────────────────────────────────────────────────",
	" PAGE-WISE CHANGE SUMMARY",
	"──────────────────────────────────────────────────────────",
	]

	for pr in page_results:
	pct = pr["total_change_pct"]
	status = "✅ MINIMAL" if pct < 5 else "⚠️ MODERATE" if pct < 20 else "🔴 SIGNIFICANT"
	lines.append(f" Page {pr['page']:>3} │ {pct:>5.1f}% │ {status}")

	significant = [pr["page"] for pr in page_results if pr["total_change_pct"] > 20]
	if significant:
	lines += [
	"",
	f" ⚠️ Pages with significant changes (>20%): {significant}",
	]

	return "\n".join(lines)


	def _build_output_pdf(page_results: list, output_path: str,
	process_dpi: int = 400) -> str:
	"""
	Build the output PDF at full pixel depth.

	PyMuPDF page dimensions are in points (1 pt = 1/72 inch).
	The overlay images are rendered at process_dpi. To preserve every
	pixel without resampling, set the page size so that 1 image pixel = 1 pt
	scaled by (72 / process_dpi):
	page_width_pts = img_width_px * 72 / process_dpi
	page_height_pts = img_height_px * 72 / process_dpi
	insert_image() maps the image 1:1 onto the page rect, so no
	downsampling or upsampling occurs — full pixel depth is preserved.
	"""
	doc_out = fitz.open()
	for pr in page_results:
	img = pr["align_check"].convert("RGB")
	px_w, px_h = img.size
	# Convert pixel dimensions to PDF points at the process DPI
	pt_w = px_w * 72.0 / process_dpi
	pt_h = px_h * 72.0 / process_dpi
	page_out = doc_out.new_page(width=pt_w, height=pt_h)
	buf = io.BytesIO()
	img.save(buf, format="PNG", optimize=True) # lossless — no JPEG ringing
	buf.seek(0)
	page_out.insert_image(page_out.rect, stream=buf.read())
	doc_out.save(output_path, deflate=True, garbage=4, clean=True)
	doc_out.close()
	return output_path


	# ══════════════════════════════════════════════════════════════════════
	# SPECIFIC-REGION HELPER — semantic global search in OLD document
	# ══════════════════════════════════════════════════════════════════════

	# ImageNet normalisation reused from SemanticRetrievalMatcher
	_REGION_TRANSFORM = transforms.Compose([
	transforms.Resize((224, 224)),
	transforms.ToTensor(),
	transforms.Normalize(mean=[0.485, 0.456, 0.406],
	std=[0.229, 0.224, 0.225]),
	])


	def _embed_patch(patch_rgb: np.ndarray,
	encoder: "_SiameseEncoder",
	device: torch.device) -> torch.Tensor:
	"""Encode a single RGB numpy patch → (128,) L2-normalised embedding."""
	t = _REGION_TRANSFORM(Image.fromarray(patch_rgb)).unsqueeze(0).to(device)
	with torch.no_grad():
	emb, _ = encoder.encode(t) # (1, 128)
	return emb[0] # (128,)


	def _find_matching_region_in_old(
	new_crop: np.ndarray,
	img_old_full: np.ndarray,
	encoder: "_SiameseEncoder",
	device: torch.device,
	) -> Tuple[int, int, int, int]:
	"""
	Locate where new_crop (user-selected patch from NEW page) sits inside
	img_old_full (the complete OLD page).

	Method — Semantic sliding-window search
	----------------------------------------
	1. Encode new_crop with the shared ResNet50 encoder → 128-d embedding.
	2. Slide a window across img_old_full at multiple scales (±30 % of the
	crop size, preserving aspect ratio). Step = 50 % of window size so
	adjacent windows overlap and the true location is never missed.
	3. Encode every window patch and compute cosine similarity with the
	query embedding. Pick the window with the highest similarity.
	4. Clamp the winning box to page bounds and return it.

	Why semantic (not pixel-level):
	• ResNet50 encodes what is in a region (shapes, structure, symbols),
	not pixel values. Two revisions of the same table/panel/diagram will
	have near-identical embeddings even if text values changed slightly.
	• Scale-invariant: the multi-scale sweep handles content that was
	enlarged or shrunk between revisions.
	• Position-invariant: the full-page sweep finds content anywhere on the
	OLD page regardless of how far it moved.

	Returns (x1, y1, x2, y2) in img_old_full pixel space.
	"""
	crop_h, crop_w = new_crop.shape[:2]
	old_h, old_w = img_old_full.shape[:2]

	def _clamp_box(bx: int, by: int, bw: int, bh: int
	) -> Tuple[int, int, int, int]:
	bx = max(0, min(bx, old_w - 1))
	by = max(0, min(by, old_h - 1))
	bw = max(1, min(bw, old_w - bx))
	bh = max(1, min(bh, old_h - by))
	return bx, by, bx + bw, by + bh

	# ── Step 1: encode the query (NEW crop) ──────────────────────────
	q_emb = _embed_patch(new_crop, encoder, device) # (128,)

	# ── Step 2: build candidate windows across scales ────────────────
	# Scales relative to the crop's own size. For a 400-DPI page a crop
	# that is, say, 600 px wide is tested at 420 … 780 px widths.
	scales = (0.70, 0.85, 1.00, 1.15, 1.30)
	aspect = crop_h / max(crop_w, 1)

	candidates: List[Tuple[int, int, int, int]] = [] # (x, y, w, h)

	for sc in scales:
	win_w = max(32, int(crop_w * sc))
	win_h = max(32, int(crop_h * sc))
	if win_w > old_w or win_h > old_h:
	continue
	step_x = max(1, win_w // 2)
	step_y = max(1, win_h // 2)
	for y in range(0, old_h - win_h + 1, step_y):
	for x in range(0, old_w - win_w + 1, step_x):
	candidates.append((x, y, win_w, win_h))

	logger.info(
	"_find_matching_region_in_old: %d candidate windows across %d scales",
	len(candidates), len(scales),
	)

	if not candidates:
	# Entire crop is bigger than the old page — return full page
	logger.warning("_find_matching_region_in_old: crop >= page; returning full page box.")
	return _clamp_box(0, 0, old_w, old_h)

	# ── Step 3: batch-encode all windows, find best cosine similarity ─
	# Process in mini-batches of 64 to avoid OOM on large pages.
	BATCH = 64
	best_sim: float = -1.0
	best_box: Tuple[int, int, int, int] = candidates[0]

	for start in range(0, len(candidates), BATCH):
	batch_cands = candidates[start: start + BATCH]
	patches = []
	for (cx, cy, cw, ch) in batch_cands:
	patch = img_old_full[cy: cy + ch, cx: cx + cw]
	patches.append(patch)

	tensors = [
	_REGION_TRANSFORM(Image.fromarray(p)) for p in patches
	]
	batch_t = torch.stack(tensors).to(device) # (B, 3, 224, 224)
	with torch.no_grad():
	embs, _ = encoder.encode(batch_t) # (B, 128)

	# Cosine similarity: q_emb is already L2-normed, embs are L2-normed
	sims = (embs @ q_emb).cpu().numpy() # (B,)

	idx = int(sims.argmax())
	if sims[idx] > best_sim:
	best_sim = float(sims[idx])
	best_box = batch_cands[idx]

	bx, by, bw, bh = best_box
	x1o, y1o, x2o, y2o = _clamp_box(bx, by, bw, bh)

	logger.info(
	"_find_matching_region_in_old: best cosine=%.4f OLD box (%d,%d)–(%d,%d)",
	best_sim, x1o, y1o, x2o, y2o,
	)
	return (x1o, y1o, x2o, y2o)


	# ══════════════════════════════════════════════════════════════════════
	# CORE PROCESSING
	# ══════════════════════════════════════════════════════════════════════

	def run_comparison(
	pdf_old_file,
	pdf_new_file,
	skip_old_p1: bool,
	skip_new_p1: bool,
	enable_align: bool,
	compare_mode: str,
	page_old_input: int,
	page_new_input: int,
	page_compare_mode: str = "Full Page",
	region_coords=None,
	display_dpi: int = 72,
	progress=gr.Progress(),
	):
	dpi = 400 # process DPI — higher = more pixel depth in overlay output

	if pdf_old_file is None or pdf_new_file is None:
	raise gr.Error("Please upload both Previous Revision and New Document PDF files.")

	device = _pick_device()

	pipeline = CoarseToFinePipeline(
	align = enable_align,
	device = device,
	min_similarity = 0.50,
	)

	progress(0, desc="Opening PDF files …")
	doc_old = fitz.open(pdf_old_file.name)
	doc_new = fitz.open(pdf_new_file.name)

	# ── Build the list of (old_page_idx, new_page_idx) pairs to process ──
	if compare_mode == "Specific Pages":
	# Convert 1-based user input to 0-based index
	old_idx_req = int(page_old_input or 1) - 1
	new_idx_req = int(page_new_input or 1) - 1
	# Clamp to valid range
	old_idx_req = max(0, min(old_idx_req, len(doc_old) - 1))
	new_idx_req = max(0, min(new_idx_req, len(doc_new) - 1))
	page_pairs = [(old_idx_req, new_idx_req)]
	else:
	# Full document mode
	old_start = 1 if skip_old_p1 else 0
	new_start = 1 if skip_new_p1 else 0
	old_pages = len(doc_old) - old_start
	new_pages = len(doc_new) - new_start
	num_pages = min(old_pages, new_pages)

	if skip_old_p1:
	gr.Info("Skipping cover page of Previous Revision.")
	if skip_new_p1:
	gr.Info("Skipping cover page of New Document.")
	if old_pages != new_pages:
	gr.Warning(
	f"Page count mismatch: Previous Revision={old_pages}, New Document={new_pages}. "
	f"Processing {num_pages} pages."
	)
	page_pairs = [(pg + old_start, pg + new_start) for pg in range(num_pages)]

	num_pairs = len(page_pairs)
	page_results = []

	for i, (old_idx, new_idx) in enumerate(page_pairs):
	progress(i / num_pairs, desc=f"Processing page {i + 1} / {num_pairs} …")
	img_old = _page_to_rgb(doc_old, old_idx, dpi)
	img_new = _page_to_rgb(doc_new, new_idx, dpi)

	# ── Normalise page dimensions before any cropping ─────────────
	# Both pages must have the same native DPI dimensions so that the
	# same pixel box selects the same physical region in both docs.
	if img_old.shape != img_new.shape:
	img_old = cv2.resize(img_old, (img_new.shape[1], img_new.shape[0]))

	# ── Specific-region crop ──────────────────────────────────────
	# The user drew a box on the NEW-doc preview (at display_dpi).
	# Steps:
	# 1. Scale the drag coordinates from preview pixels → process DPI pixels.
	# 2. Crop the same pixel box from BOTH old and new pages.
	# (Engineering drawings keep the same layout between revisions —
	# same position = same physical area. The ORB aligner inside
	# pipeline.compare() handles any sub-pixel drift between the two.)
	# 3. Replace img_old / img_new with the two crops → overlay is
	# scoped to only the selected region.
	if (compare_mode == "Specific Pages"
	and page_compare_mode == "Specific Region"
	and region_coords):
	rx = region_coords.get("x", 0)
	ry = region_coords.get("y", 0)
	rw = region_coords.get("width", img_new.shape[1])
	rh = region_coords.get("height", img_new.shape[0])
	sf = dpi / float(display_dpi or 72) # preview px → process DPI px
	x1 = max(0, int(rx * sf))
	y1 = max(0, int(ry * sf))
	x2 = min(img_new.shape[1], int((rx + rw) * sf))
	y2 = min(img_new.shape[0], int((ry + rh) * sf))

	logger.info(
	"Specific Region: display_dpi=%d sf=%.3f "
	"preview-box (%d,%d,%d,%d) → process-px (%d,%d)–(%d,%d)",
	display_dpi, sf, rx, ry, rw, rh, x1, y1, x2, y2,
	)

	if x2 > x1 and y2 > y1:
	# Step 1 — crop the selected region from NEW page
	img_new_crop = img_new[y1:y2, x1:x2]

	# Step 2 — semantic global search: encode the NEW crop with
	# ResNet50, slide windows over the FULL OLD page at
	# multiple scales, pick the highest cosine-similarity
	# window as the matching region in OLD.
	ox1, oy1, ox2, oy2 = _find_matching_region_in_old(
	new_crop = img_new_crop,
	img_old_full = img_old,
	encoder = pipeline.matcher.encoder,
	device = device,
	)
	logger.info(
	"Specific Region: NEW (%d,%d)–(%d,%d) → OLD (%d,%d)–(%d,%d)",
	x1, y1, x2, y2, ox1, oy1, ox2, oy2,
	)

	# Step 3 — crop OLD at found location; resize to exactly match
	# NEW crop so pipeline.compare() gets equal-size inputs
	img_old_raw = img_old[oy1:oy2, ox1:ox2]
	nh, nw = img_new_crop.shape[:2]
	if img_old_raw.shape[:2] != (nh, nw):
	img_old_crop = cv2.resize(
	img_old_raw, (nw, nh), interpolation=cv2.INTER_LINEAR,
	)
	else:
	img_old_crop = img_old_raw

	# Step 4 — overlay is scoped to the selected region only
	img_old = img_old_crop
	img_new = img_new_crop

	result = pipeline.compare(img_old, img_new)

	old_aligned_for_check = (
	result.img_old_aligned if result.img_old_aligned is not None
	else img_old
	)
	align_check = Visualiser.draw_alignment_check(old_aligned_for_check, img_new)

	page_results.append({
	"page": i + 1,
	"result": result,
	"align_check": Image.fromarray(align_check),
	"original": Image.fromarray(img_old),
	"revised": Image.fromarray(img_new),
	"total_change_pct": result.total_change_pct,
	})

	doc_old.close()
	doc_new.close()

	progress(0.95, desc="Generating report PDF …")
	output_pdf = _build_output_pdf(page_results, "ctf_output.pdf", process_dpi=dpi)
	summary = _build_summary(page_results, enable_align, skip_old_p1, skip_new_p1)

	progress(1.0, desc="Done!")
	return page_results, summary, output_pdf, 1, gr.update(maximum=num_pairs, value=1)


	def get_page_view(page_num, pages_data, view_mode, rotation: int = 0,
	nudge_x: int = 0, nudge_y: int = 0, nudge_scale: float = 1.0):
	if not pages_data:
	return None
	idx = int(page_num) - 1
	idx = max(0, min(idx, len(pages_data) - 1))
	pr = pages_data[idx]
	key_map = {
	"Auto-Overlay": "align_check",
	"Previous Revision": "original",
	"New Document": "revised",
	}
	img = pr.get(key_map.get(view_mode, "align_check"))
	if img is None:
	return None

	# Manual fine-tune: only applies to Auto-Overlay view
	ns = float(nudge_scale) if nudge_scale else 1.0
	if view_mode == "Auto-Overlay" and (nudge_x != 0 or nudge_y != 0 or abs(ns - 1.0) > 1e-4):
	img = _apply_nudge_overlay(pr, nudge_x, nudge_y, ns)

	if img is not None and rotation % 360 != 0:
	img = img.rotate(rotation, expand=True)
	return img


	def _apply_nudge_overlay(pr: dict, dx: int, dy: int, scale: float = 1.0) -> Image.Image:
	"""
	Re-render the Auto-Overlay with the NEW (red) layer shifted by (dx, dy) pixels
	and scaled by `scale` around the image centre.

	Cyan channel stays fixed (Previous Revision aligned).
	Red channel = New Doc with nudge translate + scale applied.
	"""
	if pr.get("align_check") is None:
	return None

	# Extract channels from the stored align_check image
	align_check_arr = np.array(pr["align_check"].convert("RGB"))
	g_old_aligned = align_check_arr[:, :, 0] # cyan source (Previous Revision)
	g_new_orig = align_check_arr[:, :, 1] # red source (New Doc)

	h, w = g_old_aligned.shape
	cx, cy = w / 2.0, h / 2.0

	# Build combined affine: scale about centre + translate
	# M = T(cx,cy) · S(scale) · T(-cx,-cy) · T(dx,dy)
	scale = float(scale) if scale and scale > 0 else 1.0
	# Combined 2×3 affine matrix
	M = np.float32([
	[scale, 0, dx + cx * (1 - scale)],
	[0, scale, dy + cy * (1 - scale)],
	])

	g_new_transformed = cv2.warpAffine(
	g_new_orig, M, (w, h),
	flags=cv2.INTER_LINEAR,
	borderMode=cv2.BORDER_CONSTANT,
	borderValue=255,
	)

	# Stack: R=old_aligned (cyan base), G=new_transformed, B=new_transformed (→ red fringe)
	overlay = np.stack([g_old_aligned, g_new_transformed, g_new_transformed], axis=2)
	return Image.fromarray(overlay.astype(np.uint8))


	# ══════════════════════════════════════════════════════════════════════
	# GRADIO UI
	# ══════════════════════════════════════════════════════════════════════

	with open(os.path.join(os.path.dirname(os.path.abspath(__file__)), "styles.css"),
	encoding="utf-8") as _css_f:
	_CSS = _css_f.read()

	_THEME = gr.themes.Base(
	primary_hue=gr.themes.colors.blue,
	neutral_hue=gr.themes.colors.gray,
	font=[gr.themes.GoogleFont("Inter"), "sans-serif"],
	)

	# Gradio 6+: theme & css are passed to launch(), not Blocks()
	with gr.Blocks(title="POWERGRID Document Auditor") as demo:

	# ── Header ─────────────────────────────────────────────────────────
	_logo_tag = (
	f'<img src="{_LOGO_URI}" alt="POWERGRID Logo" />'
	if _LOGO_URI else
	'<span style="font-size:1.4rem;font-weight:900;color:#003087;letter-spacing:-1px;">PG</span>'
	)
	gr.HTML(f"""
	<div id="app-header">
	<div id="app-header-inner">
	<div id="app-header-logo">{_logo_tag}</div>
	<div id="app-header-text">
	<h1>POWERGRID Document Auditor</h1>
	<p>Power Grid Corporation of India Limited  —  AI-Powered Document Comparison</p>
	</div>
	</div>
	</div>
	""")

	# (JS injected via demo.load below — see end of Blocks context)

	# ── Shared State ───────────────────────────────────────────────────
	pages_state = gr.State(value=None)
	rotation_state = gr.State(value=0)
	nudge_x_state = gr.State(value=0) # manual X offset for red (New Doc) layer
	nudge_y_state = gr.State(value=0) # manual Y offset for red (New Doc) layer
	nudge_scale_state = gr.State(value=1.0) # manual scale for red (New Doc) layer
	region_coords_state = gr.State(value=None) # {x,y,width,height} in preview px; None = full page
	display_dpi_state = gr.State(value=72) # DPI used when rendering the region preview

	# ── Layout ─────────────────────────────────────────────────────────
	with gr.Row(equal_height=False):

	# ════════════════════════════════════════════════════════════
	# LEFT PANE — inputs
	# ════════════════════════════════════════════════════════════
	with gr.Column(scale=1, min_width=290, elem_id="left-panel"):

	gr.HTML('<div class="section-label">Documents</div>')
	pdf_old = gr.File(label="Previous Revision PDF", file_types=[".pdf"])
	skip_old_p1 = gr.Checkbox(
	value=False,
	label="Skip cover page of Previous Revision",
	interactive=False,
	elem_classes=["skip-cb"],
	)

	gr.HTML('<div class="section-divider"></div>')
	pdf_new = gr.File(label="Revised (New) PDF", file_types=[".pdf"])
	skip_new_p1 = gr.Checkbox(
	value=False,
	label="Skip cover page of New Revision",
	interactive=False,
	elem_classes=["skip-cb"],
	)

	gr.HTML('<div class="section-divider"></div>')
	gr.HTML('<div class="section-label">Options</div>')
	enable_align = gr.Checkbox(
	value=True,
	label="Auto-align pages before comparing",
	info="Enable if documents were scanned or printed at different positions or scales.",
	)

	gr.HTML('<div class="section-divider"></div>')
	gr.HTML('<div class="section-label">Compare Mode</div>')
	compare_mode = gr.Radio(
	choices=["Full Document", "Specific Pages"],
	value="Full Document",
	label="Compare Mode",
	show_label=False,
	elem_id="compare-mode-radio",
	)
	with gr.Row(visible=False, elem_id="specific-pages-row") as specific_pages_row:
	page_old_input = gr.Number(
	value=1, minimum=1, step=1, precision=0,
	label="Prev. Revision Page",
	elem_id="page-old-input",
	)
	page_new_input = gr.Number(
	value=1, minimum=1, step=1, precision=0,
	label="New Document Page",
	elem_id="page-new-input",
	)

	# Sub-options shown when "Specific Pages" is selected
	with gr.Column(visible=False, elem_id="region-col") as region_col:
	page_compare_mode = gr.Radio(
	choices=["Full Page", "Specific Region"],
	value="Full Page",
	label="Page Comparison",
	show_label=True,
	elem_id="page-compare-mode-radio",
	)

	# Region selection — gr.Image shows the page; canvas overlay captures bbox drag
	with gr.Column(visible=False, elem_id="region-preview-col") as region_preview_col:
	region_readout = gr.HTML(
	value='<div id="region-readout">No region selected — full page will be used</div>',
	elem_id="region-readout",
	)
	# gr.Image: Python pushes the page PIL image here (always visible in DOM)
	region_page_img = gr.Image(
	value=None,
	label=None,
	show_label=False,
	type="pil",
	interactive=False,
	elem_id="region-page-img",
	height=380,
	)
	# Coords textbox: JS→Python bridge — visible but CSS-collapsed to 0px
	region_coords_txt = gr.Textbox(
	value="",
	label=None,
	show_label=False,
	elem_id="region-coords-txt",
	elem_classes=["region-coords-hidden"],
	)
	clear_region_btn = gr.Button(
	"✕ Clear Region",
	size="sm",
	elem_id="clear-region-btn",
	)

	gr.HTML('<div class="section-divider"></div>')
	run_btn = gr.Button("Run Audit", variant="primary", size="lg", elem_id="run-btn")

	gr.HTML('<div class="section-divider"></div>')
	gr.HTML('<div class="section-label">Fine-Tune Alignment</div>')

	# ── MacBook-style arrow key D-pad ─────────────────────────
	# Row 1: [ ▲ ] (centred, half-row)
	with gr.Row(equal_height=True, elem_id="nudge-row-top"):
	gr.HTML('<div style="flex:1;min-width:0"></div>')
	nudge_up_btn = gr.Button("▲", elem_id="nudge-up", min_width=44, scale=0)
	gr.HTML('<div style="flex:1;min-width:0"></div>')

	# Row 2: [ ◀ ][ ▼ ][ ▶ ]
	with gr.Row(equal_height=True, elem_id="nudge-row-bot"):
	nudge_left_btn = gr.Button("◀", elem_id="nudge-left", min_width=44, scale=0)
	nudge_down_btn = gr.Button("▼", elem_id="nudge-down", min_width=44, scale=0)
	nudge_right_btn = gr.Button("▶", elem_id="nudge-right", min_width=44, scale=0)

	gr.HTML('<p class="nudge-tip">Tip: Run Audit resets alignment</p>')

	nudge_step = gr.Number(
	value=1, minimum=1, maximum=100, step=1,
	label="Step Size (px)", precision=0,
	elem_id="nudge-step",
	)
	nudge_scale = gr.Number(
	value=1.0, minimum=0.10, maximum=10.0, step=0.005,
	label="Scale — Red Layer", precision=3,
	elem_id="nudge-scale",
	)
	nudge_readout = gr.HTML(
	value='<div id="nudge-readout-wrap">x = +0 px<br>y = +0 px<br>scale = 1.000</div>',
	elem_id="nudge-readout",
	)

	# ════════════════════════════════════════════════════════════
	# RIGHT PANE — results
	# ════════════════════════════════════════════════════════════
	with gr.Column(scale=3, elem_id="right-panel"):

	# ── Toolbar: view tabs \| rotation buttons ──
	with gr.Row(elem_id="toolbar-row"):
	view_mode = gr.Radio(
	choices=["Auto-Overlay", "Previous Revision", "New Document"],
	value="Auto-Overlay",
	label="View",
	show_label=False,
	scale=1,
	min_width=320,
	elem_id="view-mode-radio",
	)
	gr.HTML('<div class="toolbar-sep"></div>')
	rot_left_btn = gr.Button("↺", scale=0, elem_id="rot-left", min_width=38)
	rot_right_btn = gr.Button("↻", scale=0, elem_id="rot-right", min_width=38)

	# ── Page slider (shown only after audit runs) ──────────────
	page_slider = gr.Slider(
	minimum=1, maximum=1, value=1, step=1,
	label="Page",
	visible=False,
	elem_id="page-slider",
	)

	# Hidden state
	page_num_state = gr.State(value=1)
	total_pages_state = gr.State(value=1)

	result_image = gr.Image(
	label="",
	type="pil",
	height=720,
	interactive=False,
	elem_id="result-image",
	)

	gr.HTML("""
	<div id="legend-bar" style="display:flex; gap:18px; flex-wrap:wrap; align-items:center;">
	<span style="font-size:0.60rem;font-weight:700;color:#8BA0BB;text-transform:uppercase;
	letter-spacing:0.11em;white-space:nowrap;flex-shrink:0;">Overlay Legend</span>
	<span style="display:flex;align-items:center;gap:6px;">
	<span style="width:12px;height:12px;border-radius:3px;background:#7A7A7A;
	flex-shrink:0;display:inline-block;box-shadow:0 1px 2px rgba(0,0,0,0.15);"></span>
	<span style="font-size:0.75rem;color:#4A6585;white-space:nowrap;">
	<b style="color:#0F1C2E;font-weight:600;">Gray</b> — Unchanged</span>
	</span>
	<span style="display:flex;align-items:center;gap:6px;">
	<span style="width:12px;height:12px;border-radius:3px;background:#00BBBB;
	flex-shrink:0;display:inline-block;box-shadow:0 1px 2px rgba(0,0,0,0.15);"></span>
	<span style="font-size:0.75rem;color:#4A6585;white-space:nowrap;">
	<b style="color:#007070;font-weight:600;">Cyan</b> — Previous Revision</span>
	</span>
	<span style="display:flex;align-items:center;gap:6px;">
	<span style="width:12px;height:12px;border-radius:3px;background:#EE3333;
	flex-shrink:0;display:inline-block;box-shadow:0 1px 2px rgba(0,0,0,0.15);"></span>
	<span style="font-size:0.75rem;color:#4A6585;white-space:nowrap;">
	<b style="color:#BB0000;font-weight:600;">Red</b> — New Document</span>
	</span>
	</div>
	""")

	with gr.Row():
	pdf_output = gr.File(label="⬇️ Download Result PDF")

	# ══════════════════════════════════════════════════════════════════
	# EVENT HANDLERS
	# ══════════════════════════════════════════════════════════════════

	def on_pdf_upload(pdf_file):
	"""Disable skip-cover-page checkbox when uploaded PDF has only 1 page."""
	if pdf_file is None:
	return gr.update(interactive=False, value=False)
	try:
	doc = fitz.open(pdf_file.name)
	n = len(doc)
	doc.close()
	if n <= 1:
	return gr.update(interactive=False, value=False)
	else:
	return gr.update(interactive=True)
	except Exception:
	return gr.update(interactive=True)

	def _readout_html(nx: int, ny: int, ns: float) -> str:
	return (
	f'<div id="nudge-readout-wrap">'
	f'x = {nx:+d} px<br>'
	f'y = {ny:+d} px<br>'
	f'scale = {ns:.3f}'
	f'</div>'
	)

	def on_compare_mode_change(mode):
	"""Show/hide the specific-page number inputs and region sub-options."""
	show = (mode == "Specific Pages")
	return gr.update(visible=show), gr.update(visible=show)

	def on_load_preview(pdf_new_f, pg_new):
	"""Render the New Doc page at 72 DPI and return as PIL image for inline display."""
	if pdf_new_f is None:
	raise gr.Error("Please upload the New Document PDF first.")
	preview_dpi = 72
	doc = fitz.open(pdf_new_f.name)
	idx = max(0, int(pg_new or 1) - 1)
	idx = min(idx, len(doc) - 1)
	arr = _page_to_rgb(doc, idx, preview_dpi)
	doc.close()
	pil_img = Image.fromarray(arr)
	readout = '<div id="region-readout">Draw a box on the image below to select a region</div>'
	# returns: pil_img, coords_txt_reset, coords_state_reset, display_dpi, readout
	return pil_img, "", None, preview_dpi, readout

	def on_region_coords_change(coords_txt):
	"""Parse 'x,y,w,h' string written by JS canvas into region_coords_state dict."""
	if not coords_txt or coords_txt.strip() == "":
	return None, '<div id="region-readout">No region selected — full page will be used</div>'
	try:
	parts = [float(v) for v in coords_txt.strip().split(",")]
	x, y, w, h = int(parts[0]), int(parts[1]), int(parts[2]), int(parts[3])
	if w < 5 or h < 5:
	return None, '<div id="region-readout">Region too small — drag a larger area</div>'
	coords = {"x": x, "y": y, "width": w, "height": h}
	readout = (
	f'<div id="region-readout">'
	f'✅ Region: ({x}, {y}) → ({x+w}, {y+h})'
	f' \| {w}×{h} px'
	f'</div>'
	)
	return coords, readout
	except Exception:
	return None, '<div id="region-readout">Invalid region — drag again</div>'

	def on_clear_region():
	"""Reset region — clear coords textbox and state (image stays, JS clears the overlay)."""
	return "", None, '<div id="region-readout">Draw a box on the image below to select a region</div>'

	def on_run(pdf_old_f, pdf_new_f, skip_old, skip_new, align,
	cmp_mode, pg_old, pg_new,
	pg_cmp_mode, region_coords, display_dpi,
	progress=gr.Progress()):
	page_results, _summary, pdf_path, _, _ = run_comparison(
	pdf_old_f, pdf_new_f, skip_old, skip_new, align,
	cmp_mode, pg_old, pg_new,
	pg_cmp_mode, region_coords, display_dpi,
	progress
	)
	n_pages = len(page_results)
	first_img = page_results[0]["align_check"] if page_results else None
	return (
	page_results,
	0, # rotation reset
	0, # nudge_x reset
	0, # nudge_y reset
	1.0, # nudge_scale reset
	1, # page_num reset to 1
	n_pages,# total_pages
	pdf_path,
	first_img,
	_readout_html(0, 0, 1.0),
	gr.update(visible=n_pages > 1, minimum=1, maximum=n_pages, value=1),
	)

	def on_view_change(view, pg, total, pages_data, rot, nx, ny, ns):
	return get_page_view(pg, pages_data, view, 0, nx, ny, ns), 0

	def on_rot_left(pg, total, pages_data, view, rot, nx, ny, ns):
	new_rot = (rot + 90) % 360
	return get_page_view(pg, pages_data, view, new_rot, nx, ny, ns), new_rot

	def on_rot_right(pg, total, pages_data, view, rot, nx, ny, ns):
	new_rot = (rot - 90) % 360
	return get_page_view(pg, pages_data, view, new_rot, nx, ny, ns), new_rot

	def on_pg_slide(pg, total, pages_data, view, rot, nx, ny, ns):
	pg = int(pg or 1)
	img = get_page_view(pg, pages_data, view, rot, nx, ny, ns)
	return img, pg

	# ── Nudge handlers (arrow buttons + scale change) ─────────────────
	def on_nudge(direction: str, pg, total, pages_data, view, rot, nx, ny, ns, step):
	step = int(step or 1)
	if direction == "left": nx -= step
	elif direction == "right": nx += step
	elif direction == "up": ny -= step
	elif direction == "down": ny += step
	img = get_page_view(pg, pages_data, view, rot, nx, ny, ns)
	return img, nx, ny, ns, _readout_html(nx, ny, ns)

	def on_scale_change(sc, pg, total, pages_data, view, rot, nx, ny):
	ns = float(sc) if sc else 1.0
	img = get_page_view(pg, pages_data, view, rot, nx, ny, ns)
	return img, ns, _readout_html(nx, ny, ns)

	pdf_old.change(fn=on_pdf_upload, inputs=[pdf_old], outputs=[skip_old_p1])
	pdf_new.change(fn=on_pdf_upload, inputs=[pdf_new], outputs=[skip_new_p1])

	# Show / hide specific-page inputs and region sub-options when compare mode changes
	compare_mode.change(
	fn=on_compare_mode_change,
	inputs=[compare_mode],
	outputs=[specific_pages_row, region_col],
	)

	# Show / hide the region preview block AND auto-load the preview
	# _preview_outputs: [region_page_img, region_coords_txt, coords_state, display_dpi_state, region_readout]
	_preview_outputs = [region_page_img, region_coords_txt,
	region_coords_state, display_dpi_state, region_readout]

	def on_page_compare_mode_change(sub_mode, pdf_new_f, pg_new):
	show = (sub_mode == "Specific Region")
	col_update = gr.update(visible=show)
	if show:
	try:
	pil_img, ctxt, coords, dpi, rdout = on_load_preview(pdf_new_f, pg_new)
	return col_update, pil_img, ctxt, coords, dpi, rdout
	except Exception:
	pass
	blank_readout = '<div id="region-readout">No region selected — full page will be used</div>'
	return col_update, None, "", None, 72, blank_readout

	page_compare_mode.change(
	fn=on_page_compare_mode_change,
	inputs=[page_compare_mode, pdf_new, page_new_input],
	outputs=[region_preview_col] + _preview_outputs,
	)

	# Re-load preview when the New Doc page number changes (if Specific Region is active)
	def on_page_new_change(pg_new, pdf_new_f, sub_mode):
	if sub_mode == "Specific Region" and pdf_new_f is not None:
	try:
	return on_load_preview(pdf_new_f, pg_new)
	except Exception:
	pass
	blank_readout = '<div id="region-readout">No region selected — full page will be used</div>'
	return None, "", None, 72, blank_readout

	page_new_input.change(
	fn=on_page_new_change,
	inputs=[page_new_input, pdf_new, page_compare_mode],
	outputs=_preview_outputs,
	)

	# JS canvas overlay writes "x,y,w,h" into region_coords_txt when drag ends → parse to dict
	region_coords_txt.change(
	fn=on_region_coords_change,
	inputs=[region_coords_txt],
	outputs=[region_coords_state, region_readout],
	show_progress="hidden",
	show_progress_on=[],
	)

	# Clear region button — clear coords, JS overlay self-clears on next poll
	clear_region_btn.click(
	fn=on_clear_region,
	inputs=None,
	outputs=[region_coords_txt, region_coords_state, region_readout],
	)

	run_btn.click(
	fn=on_run,
	inputs=[pdf_old, pdf_new, skip_old_p1, skip_new_p1, enable_align,
	compare_mode, page_old_input, page_new_input,
	page_compare_mode, region_coords_state, display_dpi_state],
	outputs=[pages_state, rotation_state, nudge_x_state, nudge_y_state, nudge_scale_state,
	page_num_state, total_pages_state,
	pdf_output, result_image, nudge_readout, page_slider],
	)

	# View-mode tab change
	view_mode.change(
	fn=on_view_change,
	inputs=[view_mode, page_num_state, total_pages_state, pages_state, rotation_state,
	nudge_x_state, nudge_y_state, nudge_scale_state],
	outputs=[result_image, rotation_state],
	show_progress="hidden",
	show_progress_on=[],
	)

	# Rotation buttons
	rot_left_btn.click(
	fn=on_rot_left,
	inputs=[page_num_state, total_pages_state, pages_state, view_mode, rotation_state,
	nudge_x_state, nudge_y_state, nudge_scale_state],
	outputs=[result_image, rotation_state],
	show_progress="hidden",
	show_progress_on=[],
	)
	rot_right_btn.click(
	fn=on_rot_right,
	inputs=[page_num_state, total_pages_state, pages_state, view_mode, rotation_state,
	nudge_x_state, nudge_y_state, nudge_scale_state],
	outputs=[result_image, rotation_state],
	show_progress="hidden",
	show_progress_on=[],
	)

	# Page slider
	page_slider.change(
	fn=on_pg_slide,
	inputs=[page_slider, total_pages_state, pages_state, view_mode,
	rotation_state, nudge_x_state, nudge_y_state, nudge_scale_state],
	outputs=[result_image, page_num_state],
	show_progress="hidden",
	show_progress_on=[],
	)

	# ── Nudge arrow buttons ───────────────────────────────────────────
	_nudge_inputs = [page_num_state, total_pages_state, pages_state, view_mode, rotation_state,
	nudge_x_state, nudge_y_state, nudge_scale_state, nudge_step]
	_nudge_outputs = [result_image, nudge_x_state, nudge_y_state,
	nudge_scale_state, nudge_readout]

	nudge_left_btn.click(
	fn=lambda a: on_nudge("left", a), inputs=_nudge_inputs, outputs=_nudge_outputs,
	show_progress="hidden", show_progress_on=[])
	nudge_right_btn.click(
	fn=lambda a: on_nudge("right", a), inputs=_nudge_inputs, outputs=_nudge_outputs,
	show_progress="hidden", show_progress_on=[])
	nudge_up_btn.click(
	fn=lambda a: on_nudge("up", a), inputs=_nudge_inputs, outputs=_nudge_outputs,
	show_progress="hidden", show_progress_on=[])
	nudge_down_btn.click(
	fn=lambda a: on_nudge("down", a), inputs=_nudge_inputs, outputs=_nudge_outputs,
	show_progress="hidden", show_progress_on=[])

	# ── Scale number input (live update on change) ────────────────────
	nudge_scale.change(
	fn=on_scale_change,
	inputs=[nudge_scale, page_num_state, total_pages_state, pages_state, view_mode,
	rotation_state, nudge_x_state, nudge_y_state],
	outputs=[result_image, nudge_scale_state, nudge_readout],
	show_progress="hidden",
	show_progress_on=[],
	)

	# ── Inline canvas JS — overlays a transparent draw canvas on the gr.Image ──
	_INLINE_CANVAS_JS = """
	() => {
	let _overlay = null, _ctx = null;
	let _dragging = false, _sx = 0, _sy = 0, _sel = null;
	let _lastCoords = '';

	function getImgEl() {
	// The rendered <img> inside the gr.Image component
	const wrap = document.getElementById('region-page-img');
	return wrap ? wrap.querySelector('img') : null;
	}

	function getCoordsEl() {
	const wrap = document.getElementById('region-coords-txt');
	return wrap ? wrap.querySelector('textarea') : null;
	}

	function syncOverlay() {
	if (!_overlay) return;
	const img = getImgEl();
	if (!img \|\| !img.src \|\| img.src.startsWith('data:image/gif')) return;
	const r = img.getBoundingClientRect();
	const pr = img.parentElement.getBoundingClientRect();
	_overlay.style.left = (r.left - pr.left) + 'px';
	_overlay.style.top = (r.top - pr.top) + 'px';
	_overlay.style.width = r.width + 'px';
	_overlay.style.height = r.height + 'px';
	if (_overlay.width !== Math.round(r.width) \|\| _overlay.height !== Math.round(r.height)) {
	_overlay.width = Math.round(r.width);
	_overlay.height = Math.round(r.height);
	redraw();
	}
	}

	function toCanvas(cx, cy) {
	const r = _overlay.getBoundingClientRect();
	return { x: (cx - r.left) * _overlay.width / r.width,
	y: (cy - r.top) * _overlay.height / r.height };
	}

	function redraw() {
	if (!_ctx \|\| !_overlay.width) return;
	_ctx.clearRect(0, 0, _overlay.width, _overlay.height);
	if (_sel) {
	_ctx.strokeStyle = '#00BBBB';
	_ctx.lineWidth = Math.max(2, _overlay.width / 400);
	_ctx.strokeRect(_sel.x, _sel.y, _sel.w, _sel.h);
	_ctx.fillStyle = 'rgba(0,187,187,0.15)';
	_ctx.fillRect(_sel.x, _sel.y, _sel.w, _sel.h);
	}
	}

	function pushCoords() {
	const el = getCoordsEl();
	if (!el \|\| !_sel) return;
	// Scale from display px back to natural image px
	const img = getImgEl();
	if (!img) return;
	const scaleX = img.naturalWidth / _overlay.width;
	const scaleY = img.naturalHeight / _overlay.height;
	const val = Math.round(_sel.x * scaleX) + ',' +
	Math.round(_sel.y * scaleY) + ',' +
	Math.round(_sel.w * scaleX) + ',' +
	Math.round(_sel.h * scaleY);
	const setter = Object.getOwnPropertyDescriptor(HTMLTextAreaElement.prototype, 'value').set;
	setter.call(el, val);
	el.dispatchEvent(new Event('input', { bubbles: true }));
	}

	function setupOverlay() {
	const imgWrap = document.getElementById('region-page-img');
	if (!imgWrap) return false;
	// Make sure parent is positioned
	const parent = imgWrap.querySelector('.image-container') \|\| imgWrap;
	if (getComputedStyle(parent).position === 'static') parent.style.position = 'relative';

	if (!_overlay) {
	_overlay = document.createElement('canvas');
	_overlay.id = 'region-draw-overlay';
	_overlay.style.cssText = 'position:absolute;top:0;left:0;cursor:crosshair;z-index:10;pointer-events:all;';
	parent.appendChild(_overlay);
	_ctx = _overlay.getContext('2d');

	_overlay.addEventListener('mousedown', function(e) {
	const p = toCanvas(e.clientX, e.clientY);
	_sx = p.x; _sy = p.y; _sel = null; _dragging = true; e.preventDefault();
	});
	_overlay.addEventListener('mousemove', function(e) {
	if (!_dragging) return;
	const p = toCanvas(e.clientX, e.clientY);
	_sel = { x: Math.min(_sx, p.x), y: Math.min(_sy, p.y),
	w: Math.abs(p.x - _sx), h: Math.abs(p.y - _sy) };
	redraw(); e.preventDefault();
	});
	_overlay.addEventListener('mouseup', function(e) {
	if (!_dragging) return; _dragging = false;
	if (!_sel \|\| _sel.w < 5 \|\| _sel.h < 5) { _sel = null; redraw(); return; }
	redraw(); pushCoords(); e.preventDefault();
	});
	}
	return true;
	}

	// Poll every 300ms: sync overlay size, watch for cleared coords
	setInterval(function() {
	setupOverlay();
	syncOverlay();

	// Clear overlay when coords textbox is wiped by Clear button
	const el = getCoordsEl();
	if (el) {
	const cur = el.value;
	if (cur !== _lastCoords) {
	_lastCoords = cur;
	if (cur === '') { _sel = null; redraw(); }
	}
	}
	}, 300);
	}
	"""
	demo.load(fn=None, js=_INLINE_CANVAS_JS)


	# ══════════════════════════════════════════════════════════════════════
	# ENTRY POINT
	# ══════════════════════════════════════════════════════════════════════

	if __name__ == "__main__":
	import socket as _socket
	def _find_free_port(start: int = 7860, end: int = 7880) -> int:
	for p in range(start, end + 1):
	with _socket.socket(_socket.AF_INET, _socket.SOCK_STREAM) as s:
	try:
	s.bind(("", p))
	return p
	except OSError:
	continue
	return start # fallback — Gradio will error with a clear message

	_port = _find_free_port()
	print(f"\n🚀 POWERGRID Document Auditor → http://localhost:{_port}\n")
	demo.queue(default_concurrency_limit=20).launch(
	server_name="0.0.0.0",
	server_port=_port,
	share=False,
	show_error=True,
	theme=_THEME,
	css=_CSS,
	)