Spaces:

abhivsh
/

Engineering_Drawing_Auditor

Running

App Files Files Community

abhivsh commited on Mar 9

Commit

baf8731

verified ·

1 Parent(s): e46db36

Upload app.py

Browse files

deleted unnecessary steps and output size reduced

Files changed (1) hide show

app.py +2051 -0

app.py ADDED Viewed

	@@ -0,0 +1,2051 @@

+"""
+app.py  —  POWERGRID Document Auditor  (single-file HuggingFace Spaces build)
+=============================================================================
+Single-file Gradio app for AI-powered engineering drawing comparison.
+Designed for POWERGRID (765/400/132kV AIS/GIS vendor drawing audits).
+Pipeline:
+  Stage 1 — Global Alignment    : Phase Correlation + ORB/RANSAC homography
+  Stage 2 — Region Extraction   : Content-aware morphology (no pretrained detector)
+  Stage 3 — Semantic Matching   : ResNet50 embeddings + cosine similarity (position-agnostic)
+  Stage 4 — Siamese Comparison  : ResNet50 patch embeddings + GradCAM heatmaps
+Run locally:
+    python app.py
+"""
+# ══════════════════════════════════════════════════════════════════════
+# IMPORTS
+# ══════════════════════════════════════════════════════════════════════
+import base64
+import io
+import logging
+import os
+import time
+from dataclasses import dataclass, field
+from typing import Dict, List, Optional, Tuple
+import cv2
+import fitz                     # PyMuPDF
+import gradio as gr
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from PIL import Image
+from scipy.optimize import linear_sum_assignment
+from skimage.metrics import structural_similarity as ssim
+from torchvision import models, transforms
+logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
+logger = logging.getLogger(__name__)
+# ── Logo: embed as base64 so it works on HuggingFace Spaces (no static folder) ──
+def _load_logo_b64(filename: str = "logo_0.png") -> str:
+    """Return a data-URI string for the logo, or empty string if file not found."""
+    logo_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), filename)
+    if os.path.exists(logo_path):
+        with open(logo_path, "rb") as f:
+            b64 = base64.b64encode(f.read()).decode("utf-8")
+        ext = filename.rsplit(".", 1)[-1].lower()
+        mime = "image/png" if ext == "png" else f"image/{ext}"
+        return f"data:{mime};base64,{b64}"
+    return ""
+_LOGO_URI = _load_logo_b64("logo_0.png")
+# ══════════════════════════════════════════════════════════════════════
+# DATA STRUCTURES
+# ══════════════════════════════════════════════════════════════════════
+@dataclass
+class Region:
+    """A detected layout region (axis-aligned bounding box)."""
+    x: int
+    y: int
+    w: int
+    h: int
+    label: str = "text_block"   # text_block | figure | table | margin
+    confidence: float = 1.0
+    @property
+    def bbox(self) -> Tuple[int, int, int, int]:
+        return (self.x, self.y, self.x + self.w, self.y + self.h)
+    @property
+    def area(self) -> int:
+        return self.w * self.h
+    @property
+    def center(self) -> Tuple[float, float]:
+        return (self.x + self.w / 2.0, self.y + self.h / 2.0)
+    def iou(self, other: "Region") -> float:
+        xa = max(self.x, other.x)
+        ya = max(self.y, other.y)
+        xb = min(self.x + self.w, other.x + other.w)
+        yb = min(self.y + self.h, other.y + other.h)
+        inter = max(0, xb - xa) * max(0, yb - ya)
+        union = self.area + other.area - inter
+        return inter / union if union > 0 else 0.0
+@dataclass
+class MatchedPair:
+    """A matched region pair between old and new documents."""
+    region_old: Region
+    region_new: Region
+    match_score: float
+    position_cost: float
+    appearance_cost: float
+    pixel_diff: float = 0.0
+    ssim_score: float = 1.0
+    semantic_diff: float = 0.0
+    total_change: float = 0.0
+    heatmap: Optional[np.ndarray] = None
+@dataclass
+class ComparisonResult:
+    """Full comparison result for one document page."""
+    matched_pairs: List[MatchedPair]
+    unmatched_old: List[Region]
+    unmatched_new: List[Region]
+    global_transform: Optional[np.ndarray]
+    total_change_pct: float
+    heatmap: np.ndarray
+    img_old_aligned: Optional[np.ndarray] = None   # aligned OLD, same coord-space as NEW
+    def summary(self) -> str:
+        lines = [
+            f"  Global Alignment   : {'Applied' if self.global_transform is not None else 'Skipped'}",
+            f"  Matched Pairs      : {len(self.matched_pairs)}",
+            f"  Deleted Regions    : {len(self.unmatched_old)}",
+            f"  Added   Regions    : {len(self.unmatched_new)}",
+            f"  Total Change       : {self.total_change_pct:.1f}%",
+        ]
+        changed = [p for p in self.matched_pairs if p.total_change > 0.05]
+        if changed:
+            avg_chg = np.mean([p.total_change for p in changed])
+            lines.append(f"  Avg Change (modified regions): {avg_chg:.2f}")
+        return "\n".join(lines)
+# ══════════════════════════════════════════════════════════════════════
+# STAGE 1 — GLOBAL ALIGNER
+# ══════════════════════════════════════════════════════════════════════
+class GlobalAligner:
+    def __init__(self, orb_features: int = 2000, ransac_threshold: float = 5.0):
+        self.orb_features = orb_features
+        self.ransac_threshold = ransac_threshold
+    def _phase_correlation_shift(self, gray1: np.ndarray, gray2: np.ndarray) -> Tuple[float, float]:
+        f1 = np.fft.fft2(gray1.astype(np.float32))
+        f2 = np.fft.fft2(gray2.astype(np.float32))
+        denom = np.abs(f1 * np.conj(f2)) + 1e-10
+        cross = (f1 * np.conj(f2)) / denom
+        corr = np.fft.ifft2(cross).real
+        y_shift, x_shift = np.unravel_index(np.argmax(corr), corr.shape)
+        h, w = gray1.shape
+        if y_shift > h // 2:
+            y_shift -= h
+        if x_shift > w // 2:
+            x_shift -= w
+        return float(-x_shift), float(-y_shift)
+    def _orb_affine(self, gray_old: np.ndarray, gray_new: np.ndarray) -> Optional[np.ndarray]:
+        orb = cv2.ORB_create(nfeatures=self.orb_features)
+        kp1, des1 = orb.detectAndCompute(gray_old, None)
+        kp2, des2 = orb.detectAndCompute(gray_new, None)
+        if des1 is None or des2 is None or len(kp1) < 10 or len(kp2) < 10:
+            return None
+        bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
+        matches = sorted(bf.match(des1, des2), key=lambda m: m.distance)
+        if len(matches) < 10:
+            return None
+        top_k = min(200, len(matches))
+        # src = OLD keypoints, dst = NEW keypoints
+        # → M maps OLD→NEW (forward transform), which is what warpAffine expects:
+        #   warpAffine(img_old, M, size) correctly places OLD pixels at their NEW positions.
+        # BUG that was here: src/dst were swapped (kp2/NEW as src, kp1/OLD as dst),
+        # giving M that mapped NEW→OLD.  warpAffine then doubled the displacement
+        # instead of correcting it, causing the full-image red/cyan fringe seen in
+        # the Alignment Check view.
+        src_pts = np.float32([kp1[m.queryIdx].pt for m in matches[:top_k]]).reshape(-1, 1, 2)
+        dst_pts = np.float32([kp2[m.trainIdx].pt  for m in matches[:top_k]]).reshape(-1, 1, 2)
+        M, mask = cv2.estimateAffinePartial2D(
+            src_pts, dst_pts, method=cv2.RANSAC,
+            ransacReprojThreshold=self.ransac_threshold,
+        )
+        return M
+    def align(self, img_old: np.ndarray, img_new: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
+        g_old = cv2.cvtColor(img_old, cv2.COLOR_RGB2GRAY)
+        g_new = cv2.cvtColor(img_new, cv2.COLOR_RGB2GRAY)
+        dx, dy = self._phase_correlation_shift(g_old, g_new)
+        M = self._orb_affine(g_old, g_new)
+        if M is None:
+            M = np.array([[1.0, 0.0, dx], [0.0, 1.0, dy]], dtype=np.float32)
+        h, w = img_old.shape[:2]
+        aligned = cv2.warpAffine(
+            img_old, M, (w, h),
+            flags=cv2.INTER_LINEAR,
+            borderMode=cv2.BORDER_CONSTANT,
+            borderValue=(255, 255, 255),
+        )
+        return aligned, M
+# ══════════════════════════════════════════════════════════════════════
+# STAGE 2 — LAYOUT REGION EXTRACTOR
+# ══════════════════════════════════════════════════════════════════════
+class LayoutRegionExtractor:
+    def __init__(
+        self,
+        min_area_ratio: float = 0.0003,
+        max_area_ratio: float = 0.92,
+        dilation_kernel: Tuple[int, int] = (8, 2),
+        dilation_iters: int = 2,
+        merge_iou_threshold: float = 0.40,
+    ):
+        self.min_area_ratio = min_area_ratio
+        self.max_area_ratio = max_area_ratio
+        self.dilation_kernel = dilation_kernel
+        self.dilation_iters = dilation_iters
+        self.merge_iou_threshold = merge_iou_threshold
+    def _binarise(self, gray: np.ndarray) -> np.ndarray:
+        blurred = cv2.GaussianBlur(gray, (5, 5), 0)
+        _, binary = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
+        return binary
+    def _dilate(self, binary: np.ndarray) -> np.ndarray:
+        k = cv2.getStructuringElement(cv2.MORPH_RECT, self.dilation_kernel)
+        dilated = cv2.dilate(binary, k, iterations=self.dilation_iters)
+        k_line = cv2.getStructuringElement(cv2.MORPH_RECT, (15, 1))
+        dilated = cv2.dilate(dilated, k_line, iterations=1)
+        k_vert = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 8))
+        return cv2.morphologyEx(dilated, cv2.MORPH_CLOSE, k_vert)
+    def _classify(self, patch_gray: np.ndarray, w: int, h: int) -> str:
+        aspect = w / max(h, 1)
+        _, binary = cv2.threshold(patch_gray, 127, 255, cv2.THRESH_BINARY_INV)
+        density = np.sum(binary > 0) / max(w * h, 1)
+        if density < 0.02:
+            contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+            if len(contours) < 3:
+                return "margin"
+        if aspect > 4.0 and density > 0.06:
+            return "text_block"
+        if 0.4 < aspect < 2.8 and density < 0.25:
+            return "figure"
+        if density > 0.18 and aspect > 1.0:
+            return "table"
+        return "text_block"
+    def _merge_overlapping(self, regions: List[Region]) -> List[Region]:
+        changed = True
+        while changed:
+            changed = False
+            used = [False] * len(regions)
+            merged: List[Region] = []
+            for i, r1 in enumerate(regions):
+                if used[i]:
+                    continue
+                x0, y0 = r1.x, r1.y
+                x1, y1 = r1.x + r1.w, r1.y + r1.h
+                for j, r2 in enumerate(regions):
+                    if i == j or used[j]:
+                        continue
+                    expanded = Region(x0, y0, x1 - x0, y1 - y0)
+                    if expanded.iou(r2) > self.merge_iou_threshold:
+                        x0 = min(x0, r2.x)
+                        y0 = min(y0, r2.y)
+                        x1 = max(x1, r2.x + r2.w)
+                        y1 = max(y1, r2.y + r2.h)
+                        used[j] = True
+                        changed = True
+                merged.append(Region(x0, y0, x1 - x0, y1 - y0))
+                used[i] = True
+            regions = merged
+        return regions
+    def extract(self, img_rgb: np.ndarray) -> List[Region]:
+        h, w = img_rgb.shape[:2]
+        page_area = h * w
+        gray = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2GRAY)
+        binary = self._binarise(gray)
+        dilated = self._dilate(binary)
+        contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+        candidates: List[Region] = []
+        for cnt in contours:
+            rx, ry, rw, rh = cv2.boundingRect(cnt)
+            area = rw * rh
+            if area < page_area * self.min_area_ratio:
+                continue
+            if area > page_area * self.max_area_ratio:
+                continue
+            patch = gray[ry: ry + rh, rx: rx + rw]
+            label = self._classify(patch, rw, rh)
+            if label == "margin":
+                continue
+            candidates.append(Region(rx, ry, rw, rh, label=label))
+        regions = self._merge_overlapping(candidates)
+        regions.sort(key=lambda r: (r.y // 50, r.x))
+        logger.info("LayoutExtractor: %d regions detected", len(regions))
+        return regions
+# ══════════════════════════════════════════════════════════════════════
+# STAGE 3 — HUNGARIAN REGION MATCHER
+# ══════════════════════════════════════════════════════════════════════
+# ══════════════════════════════════════════════════════════════════════
+# STAGE 3 — SEMANTIC RETRIEVAL MATCHER  (position-agnostic)
+# ══════════════════════════════════════════════════════════════════════
+class SemanticRetrievalMatcher:
+    """
+    Replaces HungarianRegionMatcher for layout-shift-robust document comparison.
+    Strategy
+    --------
+    For every region in the NEW page:
+      1. Extract the patch image from the NEW document.
+      2. Encode it with the shared ResNet50 backbone → 128-d L2-normalised vector.
+    Simultaneously encode every OLD region patch.
+    Build an (N_new × N_old) cosine-similarity matrix.
+    Run scipy.linear_sum_assignment on −similarity  (maximise similarity).
+    Accept a pair only when similarity ≥ min_similarity.
+    This means a region that has *moved* (different x/y) but is otherwise
+    identical will still get similarity ≈ 1.0 and be matched correctly.
+    """
+    def __init__(
+        self,
+        encoder: "_SiameseEncoder",
+        device: torch.device,
+        min_similarity: float = 0.50,
+        thumbnail_size: Tuple[int, int] = (224, 224),
+    ):
+        self.encoder        = encoder
+        self.device         = device
+        self.min_similarity = min_similarity
+        self._transform     = transforms.Compose([
+            transforms.Resize(thumbnail_size),
+            transforms.ToTensor(),
+            transforms.Normalize(mean=[0.485, 0.456, 0.406],
+                                 std=[0.229, 0.224, 0.225]),
+        ])
+    # ------------------------------------------------------------------
+    def _patch(self, region: Region, img: np.ndarray) -> np.ndarray:
+        """Crop a region from the image; returns white 64×64 if empty."""
+        p = img[region.y: region.y + region.h, region.x: region.x + region.w]
+        if p.size == 0:
+            p = np.full((64, 64, 3), 255, dtype=np.uint8)
+        return p
+    def _embed(self, patches: List[np.ndarray]) -> torch.Tensor:
+        """
+        Batch-encode a list of patches → (N, 128) normalised embedding tensor.
+        Runs entirely on self.device with no gradient.
+        """
+        tensors = [
+            self._transform(Image.fromarray(p)) for p in patches
+        ]
+        batch = torch.stack(tensors).to(self.device)   # (N, 3, 224, 224)
+        with torch.no_grad():
+            embeddings, _ = self.encoder.encode(batch) # (N, 128) — already L2-normed
+        return embeddings
+    # ------------------------------------------------------------------
+    def match(
+        self,
+        regions_old: List[Region],
+        regions_new: List[Region],
+        img_old: np.ndarray,
+        img_new: np.ndarray,
+    ) -> Tuple[List[MatchedPair], List[Region], List[Region]]:
+        n_old, n_new = len(regions_old), len(regions_new)
+        if n_old == 0 or n_new == 0:
+            return [], list(regions_old), list(regions_new)
+        # ── 1. Encode both sets of patches ─────────────────────────
+        patches_old = [self._patch(r, img_old) for r in regions_old]
+        patches_new = [self._patch(r, img_new) for r in regions_new]
+        emb_old = self._embed(patches_old)   # (n_old, 128)
+        emb_new = self._embed(patches_new)   # (n_new, 128)
+        # ── 2. Cosine similarity matrix: rows=NEW, cols=OLD ─────────
+        # L2-normed → dot product == cosine similarity
+        sim_mat = torch.mm(emb_new, emb_old.T).cpu().numpy()   # (n_new, n_old)
+        # ── 3. Hungarian assignment on −similarity ──────────────────
+        row_ind, col_ind = linear_sum_assignment(-sim_mat)      # maximise sim
+        matched_pairs:   List[MatchedPair] = []
+        matched_old_idx: set = set()
+        matched_new_idx: set = set()
+        for ri, ci in zip(row_ind, col_ind):
+            sim = float(sim_mat[ri, ci])
+            if sim < self.min_similarity:
+                continue   # below threshold → treat as unmatched
+            matched_pairs.append(MatchedPair(
+                region_old     = regions_old[ci],
+                region_new     = regions_new[ri],
+                match_score    = sim,
+                position_cost  = 0.0,          # no position penalty
+                appearance_cost= max(0.0, 1.0 - sim),
+            ))
+            matched_old_idx.add(ci)
+            matched_new_idx.add(ri)
+        unmatched_old = [regions_old[i] for i in range(n_old) if i not in matched_old_idx]
+        unmatched_new = [regions_new[j] for j in range(n_new) if j not in matched_new_idx]
+        logger.info(
+            "SemanticRetrieval: %d matched | %d deleted | %d added  "
+            "(min_sim=%.2f)",
+            len(matched_pairs), len(unmatched_old), len(unmatched_new),
+            self.min_similarity,
+        )
+        return matched_pairs, unmatched_old, unmatched_new
+# ══════════════════════════════════════════════════════════════════════
+# STAGE 4 — SIAMESE PATCH COMPARATOR
+# ══════════════════════════════════════════════════════════════════════
+class _SiameseEncoder(nn.Module):
+    def __init__(self):
+        super().__init__()
+        resnet = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
+        self.features = nn.Sequential(*list(resnet.children())[:-2])
+        self.pool = resnet.avgpool
+        self.embed = nn.Sequential(
+            nn.Linear(2048, 512), nn.ReLU(),
+            nn.Linear(512, 128),
+        )
+    def encode(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
+        feat_map = self.features(x)
+        pooled = torch.flatten(self.pool(feat_map), 1)
+        embed = F.normalize(self.embed(pooled), p=2, dim=1)
+        return embed, feat_map
+    def forward(self, x1: torch.Tensor, x2: torch.Tensor):
+        e1, f1 = self.encode(x1)
+        e2, f2 = self.encode(x2)
+        return e1, e2, f1, f2
+class SiamesePatchComparator:
+    def __init__(
+        self,
+        device: Optional[torch.device] = None,
+        encoder: Optional[_SiameseEncoder] = None,   # ← shared encoder
+    ):
+        if device is None:
+            if torch.cuda.is_available():
+                device = torch.device("cuda")
+            elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
+                device = torch.device("mps")
+            else:
+                device = torch.device("cpu")
+        self.device = device
+        # Reuse the encoder from SemanticRetrievalMatcher if provided —
+        # avoids loading ResNet50 weights a second time.
+        if encoder is not None:
+            self.model = encoder
+            logger.info("SiamesePatchComparator: reusing shared encoder on %s", device)
+        else:
+            self.model = _SiameseEncoder().to(device).eval()
+            logger.info("SiamesePatchComparator: created new encoder on %s", device)
+        self.transform = transforms.Compose([
+            transforms.Resize((224, 224)),
+            transforms.ToTensor(),
+            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+        ])
+    def _to_tensor(self, patch_rgb: np.ndarray) -> torch.Tensor:
+        return self.transform(Image.fromarray(patch_rgb)).unsqueeze(0).to(self.device)
+    def _grad_cam(
+        self,
+        patch_old: np.ndarray,
+        patch_new: np.ndarray,
+        target_hw: Tuple[int, int],
+    ) -> np.ndarray:
+        """
+        Grad-CAM spatial change map — WHERE inside the patch the embedding differs.
+        Method
+        ------
+        1. Forward patch_old  (no grad) → embedding e_old.
+        2. Forward patch_new  (with grad, hooks on last conv block) → embedding e_new
+           + feature map F captured by forward hook.
+        3. Scalar loss = pairwise_distance(e_old.detach(), e_new).
+        4. loss.backward() → ∂loss/∂F captured by backward hook.
+        5. Grad-CAM = ReLU( mean_c(∂loss/∂F) · F ) → (7×7) → upsample to patch size.
+        Pixels with HIGH activation changed the embedding the most → the actual edits.
+        Returns
+        -------
+        np.ndarray  shape (target_hw[0], target_hw[1]), float32, values in [0, 1].
+        """
+        t_old = self._to_tensor(patch_old)
+        t_new = self._to_tensor(patch_new)
+        feat_store: Dict[str, torch.Tensor] = {}
+        grad_store: Dict[str, torch.Tensor] = {}
+        # Hook on the last convolutional block of the shared ResNet50
+        last_block = self.model.features[-1]
+        def _fwd(module, inp, out):
+            feat_store["f"] = out                          # (1, 2048, 7, 7)
+        def _bwd(module, grad_in, grad_out):
+            grad_store["g"] = grad_out[0]                  # (1, 2048, 7, 7)
+        h_fwd = last_block.register_forward_hook(_fwd)
+        h_bwd = last_block.register_full_backward_hook(_bwd)
+        try:
+            # e_old — no gradient needed, just a reference point
+            with torch.no_grad():
+                e_old, _ = self.model.encode(t_old)
+            # e_new — gradient flows through this path only
+            with torch.enable_grad():
+                self.model.zero_grad()
+                e_new, _ = self.model.encode(t_new)
+                dist = F.pairwise_distance(e_old.detach(), e_new)
+                dist.backward()
+        finally:
+            h_fwd.remove()
+            h_bwd.remove()
+        if "f" not in feat_store or "g" not in grad_store:
+            return np.zeros(target_hw, dtype=np.float32)
+        # Grad-CAM: global-average-pool the gradients, weight feature maps
+        weights = grad_store["g"].mean(dim=[2, 3], keepdim=True)   # (1,2048,1,1)
+        cam = (weights * feat_store["f"]).sum(dim=1).squeeze()      # (7, 7)
+        cam = F.relu(cam)
+        cam_max = cam.max()
+        if cam_max < 1e-8:
+            return np.zeros(target_hw, dtype=np.float32)
+        cam = (cam / cam_max).detach().cpu().numpy()   # (7, 7) in [0, 1]
+        # Upsample to original patch resolution
+        h, w = target_hw
+        cam_up = cv2.resize(cam, (w, h), interpolation=cv2.INTER_LINEAR)
+        return np.clip(cam_up, 0.0, 1.0).astype(np.float32)
+    def compare(self, patch_old: np.ndarray, patch_new: np.ndarray) -> Dict[str, object]:
+        g_old = cv2.cvtColor(patch_old, cv2.COLOR_RGB2GRAY).astype(np.float32)
+        g_new = cv2.cvtColor(patch_new, cv2.COLOR_RGB2GRAY).astype(np.float32)
+        diff_map = np.abs(g_old - g_new)
+        # Threshold of 8 (was 15) — CAD drawings have fine lines and small
+        # text; a dimension change may shift only a handful of pixels slightly.
+        changed_pixels = np.sum(diff_map > 8.0)
+        pixel_diff = float(changed_pixels) / max(g_old.size, 1)
+        ssim_val = float(ssim(g_old, g_new, data_range=255.0))
+        ssim_cost = max(0.0, 1.0 - ssim_val)
+        with torch.no_grad():
+            t1 = self._to_tensor(patch_old)
+            t2 = self._to_tensor(patch_new)
+            e1, e2, _, _ = self.model(t1, t2)
+            l2_dist = float(F.pairwise_distance(e1, e2).item())
+            semantic_diff = min(l2_dist / 10.0, 1.0)
+        total = 0.30 * pixel_diff + 0.40 * ssim_cost + 0.30 * semantic_diff
+        # Grad-CAM: spatial map showing WHERE inside this patch the change is
+        h, w = patch_new.shape[:2]
+        grad_cam_map = self._grad_cam(patch_old, patch_new, (h, w))
+        return {
+            "pixel_diff":   pixel_diff,
+            "ssim_score":   ssim_val,
+            "semantic_diff":semantic_diff,
+            "total_change": min(float(total), 1.0),
+            "grad_cam":     grad_cam_map,   # (h, w) float32 [0,1]  ← new
+        }
+    def compare_pair(self, pair: MatchedPair, img_old: np.ndarray, img_new: np.ndarray) -> MatchedPair:
+        ro, rn = pair.region_old, pair.region_new
+        patch_old = img_old[ro.y: ro.y + ro.h, ro.x: ro.x + ro.w]
+        patch_new = img_new[rn.y: rn.y + rn.h, rn.x: rn.x + rn.w]
+        if patch_old.size == 0 or patch_new.size == 0:
+            return pair
+        target_h = max(patch_old.shape[0], patch_new.shape[0])
+        target_w = max(patch_old.shape[1], patch_new.shape[1])
+        def _pad_white(patch: np.ndarray, th: int, tw: int) -> np.ndarray:
+            canvas = np.full((th, tw, patch.shape[2]), 255, dtype=np.uint8)
+            canvas[:patch.shape[0], :patch.shape[1]] = patch
+            return canvas
+        patch_old_p = _pad_white(patch_old, target_h, target_w)
+        patch_new_p = _pad_white(patch_new, target_h, target_w)
+        metrics = self.compare(patch_old_p, patch_new_p)
+        pair.pixel_diff    = metrics["pixel_diff"]
+        pair.ssim_score    = metrics["ssim_score"]
+        pair.semantic_diff = metrics["semantic_diff"]
+        pair.total_change  = metrics["total_change"]
+        # Store Grad-CAM map (sized to the new patch, not the padded version)
+        raw_cam = metrics.get("grad_cam")
+        if raw_cam is not None:
+            rn = pair.region_new
+            pair.heatmap = cv2.resize(raw_cam, (rn.w, rn.h),
+                                      interpolation=cv2.INTER_LINEAR)
+        return pair
+# ══════════════════════════════════════════════════════════════════════
+# HEATMAP GENERATOR
+# ══════════════════════════════════════════════════════════════════════
+class HeatmapGenerator:
+    _COLOUR_CHANGED = np.array([255, 220,   0], dtype=np.float32)
+    _COLOUR_MAJOR   = np.array([230,  30,  30], dtype=np.float32)
+    _COLOUR_ADDED   = np.array([ 30, 200,  60], dtype=np.float32)
+    _COLOUR_DELETED = np.array([200,  30, 200], dtype=np.float32)
+    @staticmethod
+    def _project_region(r: Region, M_inv: Optional[np.ndarray], w: int, h: int) -> Tuple[int, int, int, int]:
+        if M_inv is not None:
+            corners = np.array([
+                [r.x,       r.y      ],
+                [r.x + r.w, r.y      ],
+                [r.x,       r.y + r.h],
+                [r.x + r.w, r.y + r.h],
+            ], dtype=np.float32)
+            ones = np.ones((4, 1), dtype=np.float32)
+            projected = (M_inv @ np.hstack([corners, ones]).T).T
+            x0 = int(np.clip(projected[:, 0].min(), 0, w - 1))
+            y0 = int(np.clip(projected[:, 1].min(), 0, h - 1))
+            x1 = int(np.clip(projected[:, 0].max(), 0, w - 1))
+            y1 = int(np.clip(projected[:, 1].max(), 0, h - 1))
+        else:
+            x0, y0, x1, y1 = r.x, r.y, r.x + r.w, r.y + r.h
+        return x0, y0, x1, y1
+    @staticmethod
+    def generate(
+        img_shape: Tuple[int, int],
+        matched_pairs: List[MatchedPair],
+        unmatched_old: List[Region],
+        unmatched_new: List[Region],
+        smooth_kernel: int = 11,
+        M_inv: Optional[np.ndarray] = None,
+        change_threshold: float = 0.05,
+    ) -> np.ndarray:
+        h, w = img_shape
+        layers = np.zeros((h, w, 4), dtype=np.float32)
+        for pair in matched_pairs:
+            chg = float(pair.total_change)
+            if chg <= change_threshold:
+                continue
+            r   = pair.region_new
+            ch  = 0 if chg <= 0.40 else 1   # yellow channel vs red channel
+            if pair.heatmap is not None:
+                # ── Grad-CAM path: paint only the pixels that actually changed ──
+                # pair.heatmap is (r.h, r.w) float32 in [0,1]
+                # Scale by total_change so brighter = more changed
+                cam = pair.heatmap
+                if cam.shape != (r.h, r.w):
+                    cam = cv2.resize(cam, (r.w, r.h),
+                                     interpolation=cv2.INTER_LINEAR)
+                intensity = np.clip(cam * chg, 0.0, 1.0)
+                layers[r.y:r.y + r.h, r.x:r.x + r.w, ch] = np.maximum(
+                    layers[r.y:r.y + r.h, r.x:r.x + r.w, ch], intensity)
+            else:
+                # ── Fallback: flood the whole bounding box (no Grad-CAM available) ──
+                layers[r.y:r.y + r.h, r.x:r.x + r.w, ch] = np.maximum(
+                    layers[r.y:r.y + r.h, r.x:r.x + r.w, ch], chg)
+        # Channels 2 (added/green) and 3 (deleted/purple) intentionally omitted.
+        # The Heatmap tab shows only modification intensity via yellow gradient.
+        # Added / deleted regions are visible in the Match Canvas thermal view.
+        if smooth_kernel > 0:
+            ksize = smooth_kernel if smooth_kernel % 2 == 1 else smooth_kernel + 1
+            for ch in range(4):
+                if layers[:, :, ch].max() > 0:
+                    layers[:, :, ch] = cv2.GaussianBlur(layers[:, :, ch], (ksize, ksize), sigmaX=3.0)
+        for ch in range(2):
+            if layers[:, :, ch].max() > 0:
+                layers[:, :, ch] = np.power(layers[:, :, ch], 0.6)
+        return layers
+# ══════════════════════════════════════════════════════════════════════
+# VISUALISER
+# ══════════════════════════════════════════════════════════════════════
+class Visualiser:
+    COLOURS: Dict[str, Tuple[int, int, int]] = {
+        "text_block": (30,  144, 255),
+        "figure":     (255, 165,   0),
+        "table":      (50,  205,  50),
+        "unknown":    (180, 180, 180),
+        "deleted":    (220,  50,  50),
+        "added":      (50,  220,  80),
+        "changed":    (255, 200,   0),
+        "unchanged":  (80,  220,  80),
+    }
+    @staticmethod
+    def draw_alignment_check(
+        img_old_aligned: np.ndarray,
+        img_new: np.ndarray,
+    ) -> np.ndarray:
+        """
+        Red-cyan overlay — Alignment Check tab.
+        How to read it
+        --------------
+        OLD aligned  →  Red   channel
+        NEW doc      →  Green + Blue channels  (= Cyan)
+        • Lines present at the SAME pixel in both  →  gray  (R≈G≈B)
+        • Lines in OLD that drifted               →  RED fringe
+        • Lines in NEW that drifted               →  CYAN fringe
+        • White background on both               →  white
+        If the overlay looks mostly gray/white with no fringes, alignment is
+        good.  Red/cyan colour fringes indicate residual misalignment.
+        """
+        g_old = cv2.cvtColor(img_old_aligned, cv2.COLOR_RGB2GRAY)
+        g_new = cv2.cvtColor(img_new,         cv2.COLOR_RGB2GRAY)
+        # Stack: R = old, G = new, B = new  →  cyan for new, red for old
+        return np.stack([g_old, g_new, g_new], axis=2)
+# ══════════════════════════════════════════════════════════════════════
+# HELPER — unmatched region visual-change check
+# ══════════════════════════════════════════════════════════════════════
+# Mean-abs pixel diff below this threshold → region is visually identical
+# despite not being paired by the matcher; excluded from the change score.
+_UNMATCHED_PIXEL_THR: float = 12.0   # on 0–255 grayscale scale
+def _region_mean_diff(
+    r: Region,
+    img_a: np.ndarray,
+    candidates: List[Region],
+    img_b: np.ndarray,
+    thumb: int = 64,
+) -> float:
+    """
+    Return the *minimum* mean-abs-diff (grayscale, 0–255) between region `r`
+    in `img_a` and the spatially closest candidate region in `img_b`.
+    "Spatially closest" = smallest Euclidean centre-to-centre distance.
+    If there are no candidates, return 255.0 (maximally different).
+    """
+    if not candidates:
+        return 255.0
+    pa = img_a[r.y: r.y + r.h, r.x: r.x + r.w]
+    if pa.size == 0:
+        return 255.0
+    ga = cv2.resize(cv2.cvtColor(pa, cv2.COLOR_RGB2GRAY), (thumb, thumb)).astype(np.float32)
+    cx_r, cy_r = r.center
+    # Sort candidates by centre distance — only check the 3 nearest for speed
+    candidates_sorted = sorted(
+        candidates,
+        key=lambda c: (c.center[0] - cx_r) ** 2 + (c.center[1] - cy_r) ** 2,
+    )[:3]
+    best = 255.0
+    for cand in candidates_sorted:
+        pb = img_b[cand.y: cand.y + cand.h, cand.x: cand.x + cand.w]
+        if pb.size == 0:
+            continue
+        gb = cv2.resize(
+            cv2.cvtColor(pb, cv2.COLOR_RGB2GRAY), (thumb, thumb)
+        ).astype(np.float32)
+        diff = float(np.mean(np.abs(ga - gb)))
+        if diff < best:
+            best = diff
+    return best
+def _is_truly_changed(
+    r: Region,
+    candidates: List[Region],
+    img_a: np.ndarray,
+    img_b: np.ndarray,
+) -> bool:
+    """
+    Return True only when region `r` (from img_a) is visually *different*
+    from its nearest spatial counterpart in candidates (from img_b).
+    Used to distinguish "matcher failed to pair identical regions" from
+    "content was genuinely added or deleted."
+    """
+    return _region_mean_diff(r, img_a, candidates, img_b) >= _UNMATCHED_PIXEL_THR
+# ══════════════════════════════════════════════════════════════════════
+# MAIN PIPELINE
+# ══════════════════════════════════════════════════════════════════════
+class CoarseToFinePipeline:
+    def __init__(
+        self,
+        align: bool = True,
+        device: Optional[torch.device] = None,
+        region_extractor: Optional[LayoutRegionExtractor] = None,
+        matcher=None,                        # SemanticRetrievalMatcher or HungarianRegionMatcher
+        comparator: Optional[SiamesePatchComparator] = None,
+        min_similarity: float = 0.50,        # used only when matcher=None (auto-build)
+    ):
+        # Resolve device once here so both sub-modules share it
+        if device is None:
+            if torch.cuda.is_available():
+                device = torch.device("cuda")
+            elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
+                device = torch.device("mps")
+            else:
+                device = torch.device("cpu")
+        self._device = device
+        self.aligner   = GlobalAligner() if align else None
+        self.extractor = region_extractor or LayoutRegionExtractor()
+        if matcher is not None:
+            # Caller supplied a custom matcher — use it as-is
+            self.matcher    = matcher
+            self.comparator = comparator or SiamesePatchComparator(device=device)
+        else:
+            # ── Default path: shared ResNet50 encoder ──────────────
+            # Build the encoder once; hand the same object to both
+            # SemanticRetrievalMatcher (Stage 3) and SiamesePatchComparator (Stage 4).
+            # This halves model-load time and GPU/CPU RAM usage.
+            shared_encoder = _SiameseEncoder().to(device).eval()
+            logger.info("Pipeline: shared ResNet50 encoder on %s", device)
+            self.matcher = SemanticRetrievalMatcher(
+                encoder        = shared_encoder,
+                device         = device,
+                min_similarity = min_similarity,
+            )
+            self.comparator = comparator or SiamesePatchComparator(
+                device  = device,
+                encoder = shared_encoder,     # ← reuse, no second load
+            )
+    def compare(self, img_old: np.ndarray, img_new: np.ndarray, verbose: bool = True) -> ComparisonResult:
+        timings: Dict[str, float] = {}
+        t = time.time()
+        M = None
+        if self.aligner is not None:
+            img_old_aligned, M = self.aligner.align(img_old, img_new)
+        else:
+            img_old_aligned = img_old.copy()
+        timings["alignment"] = time.time() - t
+        t = time.time()
+        regions_old = self.extractor.extract(img_old_aligned)
+        regions_new = self.extractor.extract(img_new)
+        timings["extraction"] = time.time() - t
+        t = time.time()
+        matched, unmatched_old, unmatched_new = self.matcher.match(
+            regions_old, regions_new, img_old_aligned, img_new)
+        timings["matching"] = time.time() - t
+        t = time.time()
+        for i, pair in enumerate(matched):
+            matched[i] = self.comparator.compare_pair(pair, img_old_aligned, img_new)
+        timings["siamese"] = time.time() - t
+        if verbose:
+            logger.info("Timings → align: %.2fs | extract: %.2fs | match: %.2fs | siamese: %.2fs",
+                        timings["alignment"], timings["extraction"],
+                        timings["matching"],  timings["siamese"])
+        h, w = img_new.shape[:2]
+        # After the ORB fix, M maps OLD→NEW (forward).
+        # _project_region uses this matrix to map unmatched OLD region corners
+        # into NEW-page coordinates for heatmap rendering — so pass M directly,
+        # NOT its inverse.  (Previously M mapped NEW→OLD so the inverse was
+        # needed; now the roles are corrected.)
+        heatmap = HeatmapGenerator.generate(
+            (h, w), matched, unmatched_old, unmatched_new,
+            M_inv=M, change_threshold=0.05,
+        )
+        # ── Change % calculation (two-part fix) ────────────────────────
+        #
+        # Part A — pixel-diff gate on unmatched regions
+        #   Unmatched regions are NOT automatically "added/deleted".
+        #   They may simply be regions the matcher failed to pair even though
+        #   the content is identical.  We compare each unmatched region to its
+        #   nearest spatial counterpart in the opposite list; only those whose
+        #   pixel diff exceeds _UNMATCHED_PIXEL_THR are counted as truly changed.
+        #
+        # Part B — normalise against full page area (not just detected regions)
+        #   Using content_area as denominator collapses to 100% when all regions
+        #   are unmatched.  Using h*w gives a stable baseline independent of
+        #   how many regions were detected or matched.
+        truly_deleted = [
+            r for r in unmatched_old
+            if _is_truly_changed(r, unmatched_new, img_old_aligned, img_new)
+        ]
+        truly_added = [
+            r for r in unmatched_new
+            if _is_truly_changed(r, unmatched_old, img_new, img_old_aligned)
+        ]
+        page_area    = max(h * w, 1)                                          # Part B denominator
+        changed_area = sum(p.region_new.area for p in matched if p.total_change > 0.05)
+        deleted_area = sum(r.area for r in truly_deleted)
+        added_area   = sum(r.area for r in truly_added)
+        total_pct    = min(100.0 * (changed_area + added_area + deleted_area) / page_area, 100.0)
+        return ComparisonResult(
+            matched_pairs=matched,
+            unmatched_old=unmatched_old,
+            unmatched_new=unmatched_new,
+            global_transform=M,
+            total_change_pct=total_pct,
+            heatmap=heatmap,
+            img_old_aligned=img_old_aligned,   # ← stored for thermal overlay
+        )
+# ══════════════════════════════════════════════════════════════════════
+# GRADIO APP — HELPERS
+# ══════════════════════════════════════════════════════════════════════
+def _pick_device() -> torch.device:
+    if torch.cuda.is_available():
+        return torch.device("cuda")
+    if hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
+        return torch.device("mps")
+    return torch.device("cpu")
+def _page_to_rgb(doc: fitz.Document, idx: int, dpi: int) -> np.ndarray:
+    pix = doc[idx].get_pixmap(dpi=dpi)
+    return np.frombuffer(pix.samples, np.uint8).reshape(pix.height, pix.width, 3)
+def _build_summary(
+    page_results: list,
+    aligned: bool,
+    skip_old_p1: bool = False,
+    skip_new_p1: bool = False,
+) -> str:
+    total_changes = [pr["total_change_pct"] for pr in page_results]
+    lines = [
+        "╔══════════════════════════════════════════════════════════╗",
+        "║         POWERGRID DOCUMENT AUDIT — CHANGE REPORT        ║",
+        "╚══════════════════════════════════════════════════════════╝",
+        "",
+        f"  Total Pages Analysed : {len(page_results)}",
+        f"  Overall Avg Change   : {np.mean(total_changes):.2f}%",
+        "",
+        "──────────────────────────────────────────────────────────",
+        "  PAGE-WISE CHANGE SUMMARY",
+        "──────────────────────────────────────────────────────────",
+    ]
+    for pr in page_results:
+        pct    = pr["total_change_pct"]
+        status = "✅ MINIMAL" if pct < 5 else "⚠️  MODERATE" if pct < 20 else "🔴 SIGNIFICANT"
+        lines.append(f"  Page {pr['page']:>3}  │  {pct:>5.1f}%  │  {status}")
+    significant = [pr["page"] for pr in page_results if pr["total_change_pct"] > 20]
+    if significant:
+        lines += [
+            "",
+            f"  ⚠️  Pages with significant changes (>20%): {significant}",
+        ]
+    return "\n".join(lines)
+def _build_output_pdf(page_results: list, output_path: str,
+                      process_dpi: int = 400) -> str:
+    """
+    Build the output PDF at full pixel depth.
+    PyMuPDF page dimensions are in points (1 pt = 1/72 inch).
+    The overlay images are rendered at process_dpi.  To preserve every
+    pixel without resampling, set the page size so that 1 image pixel = 1 pt
+    scaled by (72 / process_dpi):
+        page_width_pts  = img_width_px  * 72 / process_dpi
+        page_height_pts = img_height_px * 72 / process_dpi
+    insert_image() maps the image 1:1 onto the page rect, so no
+    downsampling or upsampling occurs — full pixel depth is preserved.
+    """
+    doc_out = fitz.open()
+    for pr in page_results:
+        img = pr["align_check"].convert("RGB")
+        px_w, px_h = img.size
+        # Convert pixel dimensions to PDF points at the process DPI
+        pt_w = px_w * 72.0 / process_dpi
+        pt_h = px_h * 72.0 / process_dpi
+        page_out = doc_out.new_page(width=pt_w, height=pt_h)
+        buf = io.BytesIO()
+        img.save(buf, format="PNG", optimize=True)  # lossless — no JPEG ringing
+        buf.seek(0)
+        page_out.insert_image(page_out.rect, stream=buf.read())
+    doc_out.save(output_path, deflate=True, garbage=4, clean=True)
+    doc_out.close()
+    return output_path
+# ══════════════════════════════════════════════════════════════════════
+# SPECIFIC-REGION HELPER — semantic global search in OLD document
+# ══════════════════════════════════════════════════════════════════════
+# ImageNet normalisation reused from SemanticRetrievalMatcher
+_REGION_TRANSFORM = transforms.Compose([
+    transforms.Resize((224, 224)),
+    transforms.ToTensor(),
+    transforms.Normalize(mean=[0.485, 0.456, 0.406],
+                         std=[0.229, 0.224, 0.225]),
+])
+def _embed_patch(patch_rgb: np.ndarray,
+                 encoder: "_SiameseEncoder",
+                 device: torch.device) -> torch.Tensor:
+    """Encode a single RGB numpy patch → (128,) L2-normalised embedding."""
+    t = _REGION_TRANSFORM(Image.fromarray(patch_rgb)).unsqueeze(0).to(device)
+    with torch.no_grad():
+        emb, _ = encoder.encode(t)          # (1, 128)
+    return emb[0]                            # (128,)
+def _find_matching_region_in_old(
+    new_crop:     np.ndarray,
+    img_old_full: np.ndarray,
+    encoder:      "_SiameseEncoder",
+    device:       torch.device,
+) -> Tuple[int, int, int, int]:
+    """
+    Locate where new_crop (user-selected patch from NEW page) sits inside
+    img_old_full (the complete OLD page).
+    Method — Semantic sliding-window search
+    ----------------------------------------
+    1. Encode new_crop with the shared ResNet50 encoder → 128-d embedding.
+    2. Slide a window across img_old_full at multiple scales (±30 % of the
+       crop size, preserving aspect ratio). Step = 50 % of window size so
+       adjacent windows overlap and the true location is never missed.
+    3. Encode every window patch and compute cosine similarity with the
+       query embedding.  Pick the window with the highest similarity.
+    4. Clamp the winning box to page bounds and return it.
+    Why semantic (not pixel-level):
+    • ResNet50 encodes *what* is in a region (shapes, structure, symbols),
+      not pixel values.  Two revisions of the same table/panel/diagram will
+      have near-identical embeddings even if text values changed slightly.
+    • Scale-invariant: the multi-scale sweep handles content that was
+      enlarged or shrunk between revisions.
+    • Position-invariant: the full-page sweep finds content anywhere on the
+      OLD page regardless of how far it moved.
+    Returns (x1, y1, x2, y2) in img_old_full pixel space.
+    """
+    crop_h, crop_w = new_crop.shape[:2]
+    old_h,  old_w  = img_old_full.shape[:2]
+    def _clamp_box(bx: int, by: int, bw: int, bh: int
+                   ) -> Tuple[int, int, int, int]:
+        bx = max(0, min(bx, old_w - 1))
+        by = max(0, min(by, old_h - 1))
+        bw = max(1, min(bw, old_w - bx))
+        bh = max(1, min(bh, old_h - by))
+        return bx, by, bx + bw, by + bh
+    # ── Step 1: encode the query (NEW crop) ──────────────────────────
+    q_emb = _embed_patch(new_crop, encoder, device)   # (128,)
+    # ── Step 2: build candidate windows across scales ────────────────
+    # Scales relative to the crop's own size.  For a 400-DPI page a crop
+    # that is, say, 600 px wide is tested at 420 … 780 px widths.
+    scales   = (0.70, 0.85, 1.00, 1.15, 1.30)
+    aspect   = crop_h / max(crop_w, 1)
+    candidates: List[Tuple[int, int, int, int]] = []   # (x, y, w, h)
+    for sc in scales:
+        win_w = max(32, int(crop_w * sc))
+        win_h = max(32, int(crop_h * sc))
+        if win_w > old_w or win_h > old_h:
+            continue
+        step_x = max(1, win_w // 2)
+        step_y = max(1, win_h // 2)
+        for y in range(0, old_h - win_h + 1, step_y):
+            for x in range(0, old_w - win_w + 1, step_x):
+                candidates.append((x, y, win_w, win_h))
+    logger.info(
+        "_find_matching_region_in_old: %d candidate windows across %d scales",
+        len(candidates), len(scales),
+    )
+    if not candidates:
+        # Entire crop is bigger than the old page — return full page
+        logger.warning("_find_matching_region_in_old: crop >= page; returning full page box.")
+        return _clamp_box(0, 0, old_w, old_h)
+    # ── Step 3: batch-encode all windows, find best cosine similarity ─
+    # Process in mini-batches of 64 to avoid OOM on large pages.
+    BATCH = 64
+    best_sim:  float = -1.0
+    best_box:  Tuple[int, int, int, int] = candidates[0]
+    for start in range(0, len(candidates), BATCH):
+        batch_cands = candidates[start: start + BATCH]
+        patches = []
+        for (cx, cy, cw, ch) in batch_cands:
+            patch = img_old_full[cy: cy + ch, cx: cx + cw]
+            patches.append(patch)
+        tensors = [
+            _REGION_TRANSFORM(Image.fromarray(p)) for p in patches
+        ]
+        batch_t = torch.stack(tensors).to(device)          # (B, 3, 224, 224)
+        with torch.no_grad():
+            embs, _ = encoder.encode(batch_t)              # (B, 128)
+        # Cosine similarity: q_emb is already L2-normed, embs are L2-normed
+        sims = (embs @ q_emb).cpu().numpy()               # (B,)
+        idx  = int(sims.argmax())
+        if sims[idx] > best_sim:
+            best_sim = float(sims[idx])
+            best_box = batch_cands[idx]
+    bx, by, bw, bh = best_box
+    x1o, y1o, x2o, y2o = _clamp_box(bx, by, bw, bh)
+    logger.info(
+        "_find_matching_region_in_old: best cosine=%.4f  OLD box (%d,%d)–(%d,%d)",
+        best_sim, x1o, y1o, x2o, y2o,
+    )
+    return (x1o, y1o, x2o, y2o)
+# ══════════════════════════════════════════════════════════════════════
+# CORE PROCESSING
+# ══════════════════════════════════════════════════════════════════════
+def run_comparison(
+    pdf_old_file,
+    pdf_new_file,
+    skip_old_p1: bool,
+    skip_new_p1: bool,
+    enable_align: bool,
+    compare_mode: str,
+    page_old_input: int,
+    page_new_input: int,
+    page_compare_mode: str = "Full Page",
+    region_coords=None,
+    display_dpi: int = 72,
+    progress=gr.Progress(),
+):
+    dpi = 400   # process DPI — higher = more pixel depth in overlay output
+    if pdf_old_file is None or pdf_new_file is None:
+        raise gr.Error("Please upload both Previous Revision and New Document PDF files.")
+    device = _pick_device()
+    pipeline = CoarseToFinePipeline(
+        align          = enable_align,
+        device         = device,
+        min_similarity = 0.50,
+    )
+    progress(0, desc="Opening PDF files …")
+    doc_old = fitz.open(pdf_old_file.name)
+    doc_new = fitz.open(pdf_new_file.name)
+    # ── Build the list of (old_page_idx, new_page_idx) pairs to process ──
+    if compare_mode == "Specific Pages":
+        # Convert 1-based user input to 0-based index
+        old_idx_req = int(page_old_input or 1) - 1
+        new_idx_req = int(page_new_input or 1) - 1
+        # Clamp to valid range
+        old_idx_req = max(0, min(old_idx_req, len(doc_old) - 1))
+        new_idx_req = max(0, min(new_idx_req, len(doc_new) - 1))
+        page_pairs = [(old_idx_req, new_idx_req)]
+    else:
+        # Full document mode
+        old_start = 1 if skip_old_p1 else 0
+        new_start = 1 if skip_new_p1 else 0
+        old_pages = len(doc_old) - old_start
+        new_pages = len(doc_new) - new_start
+        num_pages = min(old_pages, new_pages)
+        if skip_old_p1:
+            gr.Info("Skipping cover page of Previous Revision.")
+        if skip_new_p1:
+            gr.Info("Skipping cover page of New Document.")
+        if old_pages != new_pages:
+            gr.Warning(
+                f"Page count mismatch: Previous Revision={old_pages}, New Document={new_pages}. "
+                f"Processing {num_pages} pages."
+            )
+        page_pairs = [(pg + old_start, pg + new_start) for pg in range(num_pages)]
+    num_pairs = len(page_pairs)
+    page_results = []
+    for i, (old_idx, new_idx) in enumerate(page_pairs):
+        progress(i / num_pairs, desc=f"Processing page {i + 1} / {num_pairs} …")
+        img_old = _page_to_rgb(doc_old, old_idx, dpi)
+        img_new = _page_to_rgb(doc_new, new_idx, dpi)
+        # ── Normalise page dimensions before any cropping ─────────────
+        # Both pages must have the same native DPI dimensions so that the
+        # same pixel box selects the same physical region in both docs.
+        if img_old.shape != img_new.shape:
+            img_old = cv2.resize(img_old, (img_new.shape[1], img_new.shape[0]))
+        # ── Specific-region crop ──────────────────────────────────────
+        # The user drew a box on the NEW-doc preview (at display_dpi).
+        # Steps:
+        #   1. Scale the drag coordinates from preview pixels → process DPI pixels.
+        #   2. Crop the same pixel box from BOTH old and new pages.
+        #      (Engineering drawings keep the same layout between revisions —
+        #       same position = same physical area. The ORB aligner inside
+        #       pipeline.compare() handles any sub-pixel drift between the two.)
+        #   3. Replace img_old / img_new with the two crops → overlay is
+        #      scoped to only the selected region.
+        if (compare_mode == "Specific Pages"
+                and page_compare_mode == "Specific Region"
+                and region_coords):
+            rx = region_coords.get("x", 0)
+            ry = region_coords.get("y", 0)
+            rw = region_coords.get("width",  img_new.shape[1])
+            rh = region_coords.get("height", img_new.shape[0])
+            sf = dpi / float(display_dpi or 72)          # preview px → process DPI px
+            x1 = max(0, int(rx * sf))
+            y1 = max(0, int(ry * sf))
+            x2 = min(img_new.shape[1], int((rx + rw) * sf))
+            y2 = min(img_new.shape[0], int((ry + rh) * sf))
+            logger.info(
+                "Specific Region: display_dpi=%d  sf=%.3f  "
+                "preview-box (%d,%d,%d,%d) → process-px (%d,%d)–(%d,%d)",
+                display_dpi, sf, rx, ry, rw, rh, x1, y1, x2, y2,
+            )
+            if x2 > x1 and y2 > y1:
+                # Step 1 — crop the selected region from NEW page
+                img_new_crop = img_new[y1:y2, x1:x2]
+                # Step 2 — semantic global search: encode the NEW crop with
+                #           ResNet50, slide windows over the FULL OLD page at
+                #           multiple scales, pick the highest cosine-similarity
+                #           window as the matching region in OLD.
+                ox1, oy1, ox2, oy2 = _find_matching_region_in_old(
+                    new_crop     = img_new_crop,
+                    img_old_full = img_old,
+                    encoder      = pipeline.matcher.encoder,
+                    device       = device,
+                )
+                logger.info(
+                    "Specific Region: NEW (%d,%d)–(%d,%d)  →  OLD (%d,%d)–(%d,%d)",
+                    x1, y1, x2, y2, ox1, oy1, ox2, oy2,
+                )
+                # Step 3 — crop OLD at found location; resize to exactly match
+                #           NEW crop so pipeline.compare() gets equal-size inputs
+                img_old_raw = img_old[oy1:oy2, ox1:ox2]
+                nh, nw = img_new_crop.shape[:2]
+                if img_old_raw.shape[:2] != (nh, nw):
+                    img_old_crop = cv2.resize(
+                        img_old_raw, (nw, nh), interpolation=cv2.INTER_LINEAR,
+                    )
+                else:
+                    img_old_crop = img_old_raw
+                # Step 4 — overlay is scoped to the selected region only
+                img_old = img_old_crop
+                img_new = img_new_crop
+        result = pipeline.compare(img_old, img_new)
+        old_aligned_for_check = (
+            result.img_old_aligned if result.img_old_aligned is not None
+            else img_old
+        )
+        align_check = Visualiser.draw_alignment_check(old_aligned_for_check, img_new)
+        page_results.append({
+            "page":             i + 1,
+            "result":           result,
+            "align_check":      Image.fromarray(align_check),
+            "original":         Image.fromarray(img_old),
+            "revised":          Image.fromarray(img_new),
+            "total_change_pct": result.total_change_pct,
+        })
+    doc_old.close()
+    doc_new.close()
+    progress(0.95, desc="Generating report PDF …")
+    output_pdf = _build_output_pdf(page_results, "ctf_output.pdf", process_dpi=dpi)
+    summary    = _build_summary(page_results, enable_align, skip_old_p1, skip_new_p1)
+    progress(1.0, desc="Done!")
+    return page_results, summary, output_pdf, 1, gr.update(maximum=num_pairs, value=1)
+def get_page_view(page_num, pages_data, view_mode, rotation: int = 0,
+                  nudge_x: int = 0, nudge_y: int = 0, nudge_scale: float = 1.0):
+    if not pages_data:
+        return None
+    idx = int(page_num) - 1
+    idx = max(0, min(idx, len(pages_data) - 1))
+    pr  = pages_data[idx]
+    key_map = {
+        "Auto-Overlay":     "align_check",
+        "Previous Revision": "original",
+        "New Document":      "revised",
+    }
+    img = pr.get(key_map.get(view_mode, "align_check"))
+    if img is None:
+        return None
+    # Manual fine-tune: only applies to Auto-Overlay view
+    ns = float(nudge_scale) if nudge_scale else 1.0
+    if view_mode == "Auto-Overlay" and (nudge_x != 0 or nudge_y != 0 or abs(ns - 1.0) > 1e-4):
+        img = _apply_nudge_overlay(pr, nudge_x, nudge_y, ns)
+    if img is not None and rotation % 360 != 0:
+        img = img.rotate(rotation, expand=True)
+    return img
+def _apply_nudge_overlay(pr: dict, dx: int, dy: int, scale: float = 1.0) -> Image.Image:
+    """
+    Re-render the Auto-Overlay with the NEW (red) layer shifted by (dx, dy) pixels
+    and scaled by `scale` around the image centre.
+    Cyan channel stays fixed (Previous Revision aligned).
+    Red channel = New Doc with nudge translate + scale applied.
+    """
+    if pr.get("align_check") is None:
+        return None
+    # Extract channels from the stored align_check image
+    align_check_arr = np.array(pr["align_check"].convert("RGB"))
+    g_old_aligned = align_check_arr[:, :, 0]   # cyan source  (Previous Revision)
+    g_new_orig    = align_check_arr[:, :, 1]   # red source   (New Doc)
+    h, w = g_old_aligned.shape
+    cx, cy = w / 2.0, h / 2.0
+    # Build combined affine: scale about centre + translate
+    # M = T(cx,cy) · S(scale) · T(-cx,-cy) · T(dx,dy)
+    scale = float(scale) if scale and scale > 0 else 1.0
+    # Combined 2×3 affine matrix
+    M = np.float32([
+        [scale, 0,     dx + cx * (1 - scale)],
+        [0,     scale, dy + cy * (1 - scale)],
+    ])
+    g_new_transformed = cv2.warpAffine(
+        g_new_orig, M, (w, h),
+        flags=cv2.INTER_LINEAR,
+        borderMode=cv2.BORDER_CONSTANT,
+        borderValue=255,
+    )
+    # Stack: R=old_aligned (cyan base), G=new_transformed, B=new_transformed (→ red fringe)
+    overlay = np.stack([g_old_aligned, g_new_transformed, g_new_transformed], axis=2)
+    return Image.fromarray(overlay.astype(np.uint8))
+# ══════════════════════════════════════════════════════════════════════
+# GRADIO UI
+# ══════════════════════════════════════════════════════════════════════
+with open(os.path.join(os.path.dirname(os.path.abspath(__file__)), "styles.css"),
+          encoding="utf-8") as _css_f:
+    _CSS = _css_f.read()
+_THEME = gr.themes.Base(
+    primary_hue=gr.themes.colors.blue,
+    neutral_hue=gr.themes.colors.gray,
+    font=[gr.themes.GoogleFont("Inter"), "sans-serif"],
+)
+# Gradio 6+: theme & css are passed to launch(), not Blocks()
+with gr.Blocks(title="POWERGRID Document Auditor") as demo:
+    # ── Header ─────────────────────────────────────────────────────────
+    _logo_tag = (
+        f'<img src="{_LOGO_URI}" alt="POWERGRID Logo" />'
+        if _LOGO_URI else
+        '<span style="font-size:1.4rem;font-weight:900;color:#003087;letter-spacing:-1px;">PG</span>'
+    )
+    gr.HTML(f"""
+    <div id="app-header">
+      <div id="app-header-inner">
+        <div id="app-header-logo">{_logo_tag}</div>
+        <div id="app-header-text">
+          <h1>POWERGRID Document Auditor</h1>
+          <p>Power Grid Corporation of India Limited &nbsp;&mdash;&nbsp; AI-Powered Document Comparison</p>
+        </div>
+      </div>
+    </div>
+    """)
+    # (JS injected via demo.load below — see end of Blocks context)
+    # ── Shared State ───────────────────────────────────────────────────
+    pages_state       = gr.State(value=None)
+    rotation_state    = gr.State(value=0)
+    nudge_x_state     = gr.State(value=0)    # manual X offset for red (New Doc) layer
+    nudge_y_state     = gr.State(value=0)    # manual Y offset for red (New Doc) layer
+    nudge_scale_state = gr.State(value=1.0)  # manual scale for red (New Doc) layer
+    region_coords_state = gr.State(value=None)  # {x,y,width,height} in preview px; None = full page
+    display_dpi_state   = gr.State(value=72)    # DPI used when rendering the region preview
+    # ── Layout ─────────────────────────────────────────────────────────
+    with gr.Row(equal_height=False):
+        # ════════════════════════════════════════════════════════════
+        # LEFT PANE — inputs
+        # ════════════════════════════════════════════════════════════
+        with gr.Column(scale=1, min_width=290, elem_id="left-panel"):
+            gr.HTML('<div class="section-label">Documents</div>')
+            pdf_old = gr.File(label="Previous Revision PDF", file_types=[".pdf"])
+            skip_old_p1 = gr.Checkbox(
+                value=False,
+                label="Skip cover page of Previous Revision",
+                interactive=False,
+                elem_classes=["skip-cb"],
+            )
+            gr.HTML('<div class="section-divider"></div>')
+            pdf_new = gr.File(label="Revised (New) PDF", file_types=[".pdf"])
+            skip_new_p1 = gr.Checkbox(
+                value=False,
+                label="Skip cover page of New Revision",
+                interactive=False,
+                elem_classes=["skip-cb"],
+            )
+            gr.HTML('<div class="section-divider"></div>')
+            gr.HTML('<div class="section-label">Options</div>')
+            enable_align = gr.Checkbox(
+                value=True,
+                label="Auto-align pages before comparing",
+                info="Enable if documents were scanned or printed at different positions or scales.",
+            )
+            gr.HTML('<div class="section-divider"></div>')
+            gr.HTML('<div class="section-label">Compare Mode</div>')
+            compare_mode = gr.Radio(
+                choices=["Full Document", "Specific Pages"],
+                value="Full Document",
+                label="Compare Mode",
+                show_label=False,
+                elem_id="compare-mode-radio",
+            )
+            with gr.Row(visible=False, elem_id="specific-pages-row") as specific_pages_row:
+                page_old_input = gr.Number(
+                    value=1, minimum=1, step=1, precision=0,
+                    label="Prev. Revision Page",
+                    elem_id="page-old-input",
+                )
+                page_new_input = gr.Number(
+                    value=1, minimum=1, step=1, precision=0,
+                    label="New Document Page",
+                    elem_id="page-new-input",
+                )
+            # Sub-options shown when "Specific Pages" is selected
+            with gr.Column(visible=False, elem_id="region-col") as region_col:
+                page_compare_mode = gr.Radio(
+                    choices=["Full Page", "Specific Region"],
+                    value="Full Page",
+                    label="Page Comparison",
+                    show_label=True,
+                    elem_id="page-compare-mode-radio",
+                )
+                # Region selection — gr.Image shows the page; canvas overlay captures bbox drag
+                with gr.Column(visible=False, elem_id="region-preview-col") as region_preview_col:
+                    region_readout = gr.HTML(
+                        value='<div id="region-readout">No region selected — full page will be used</div>',
+                        elem_id="region-readout",
+                    )
+                    # gr.Image: Python pushes the page PIL image here (always visible in DOM)
+                    region_page_img = gr.Image(
+                        value=None,
+                        label=None,
+                        show_label=False,
+                        type="pil",
+                        interactive=False,
+                        elem_id="region-page-img",
+                        height=380,
+                    )
+                    # Coords textbox: JS→Python bridge — visible but CSS-collapsed to 0px
+                    region_coords_txt = gr.Textbox(
+                        value="",
+                        label=None,
+                        show_label=False,
+                        elem_id="region-coords-txt",
+                        elem_classes=["region-coords-hidden"],
+                    )
+                    clear_region_btn = gr.Button(
+                        "✕ Clear Region",
+                        size="sm",
+                        elem_id="clear-region-btn",
+                    )
+            gr.HTML('<div class="section-divider"></div>')
+            run_btn = gr.Button("Run Audit", variant="primary", size="lg", elem_id="run-btn")
+            gr.HTML('<div class="section-divider"></div>')
+            gr.HTML('<div class="section-label">Fine-Tune Alignment</div>')
+            # ── MacBook-style arrow key D-pad ─────────────────────────
+            # Row 1:  [    ▲    ]   (centred, half-row)
+            with gr.Row(equal_height=True, elem_id="nudge-row-top"):
+                gr.HTML('<div style="flex:1;min-width:0"></div>')
+                nudge_up_btn = gr.Button("▲", elem_id="nudge-up", min_width=44, scale=0)
+                gr.HTML('<div style="flex:1;min-width:0"></div>')
+            # Row 2:  [ ◀ ][ ▼ ][ ▶ ]
+            with gr.Row(equal_height=True, elem_id="nudge-row-bot"):
+                nudge_left_btn  = gr.Button("◀", elem_id="nudge-left",  min_width=44, scale=0)
+                nudge_down_btn  = gr.Button("▼", elem_id="nudge-down",  min_width=44, scale=0)
+                nudge_right_btn = gr.Button("▶", elem_id="nudge-right", min_width=44, scale=0)
+            gr.HTML('<p class="nudge-tip">Tip: Run Audit resets alignment</p>')
+            nudge_step = gr.Number(
+                value=1, minimum=1, maximum=100, step=1,
+                label="Step Size (px)", precision=0,
+                elem_id="nudge-step",
+            )
+            nudge_scale = gr.Number(
+                value=1.0, minimum=0.10, maximum=10.0, step=0.005,
+                label="Scale — Red Layer", precision=3,
+                elem_id="nudge-scale",
+            )
+            nudge_readout = gr.HTML(
+                value='<div id="nudge-readout-wrap">x&nbsp;=&nbsp;+0 px<br>y&nbsp;=&nbsp;+0 px<br>scale&nbsp;=&nbsp;1.000</div>',
+                elem_id="nudge-readout",
+            )
+        # ════════════════════════════════════════════════════════════
+        # RIGHT PANE — results
+        # ════════════════════════════════════════════════════════════
+        with gr.Column(scale=3, elem_id="right-panel"):
+            # ── Toolbar: view tabs | rotation buttons ──
+            with gr.Row(elem_id="toolbar-row"):
+                view_mode = gr.Radio(
+                    choices=["Auto-Overlay", "Previous Revision", "New Document"],
+                    value="Auto-Overlay",
+                    label="View",
+                    show_label=False,
+                    scale=1,
+                    min_width=320,
+                    elem_id="view-mode-radio",
+                )
+                gr.HTML('<div class="toolbar-sep"></div>')
+                rot_left_btn  = gr.Button("↺", scale=0, elem_id="rot-left",  min_width=38)
+                rot_right_btn = gr.Button("↻", scale=0, elem_id="rot-right", min_width=38)
+            # ── Page slider (shown only after audit runs) ──────────────
+            page_slider = gr.Slider(
+                minimum=1, maximum=1, value=1, step=1,
+                label="Page",
+                visible=False,
+                elem_id="page-slider",
+            )
+            # Hidden state
+            page_num_state    = gr.State(value=1)
+            total_pages_state = gr.State(value=1)
+            result_image = gr.Image(
+                label="",
+                type="pil",
+                height=720,
+                interactive=False,
+                elem_id="result-image",
+            )
+            gr.HTML("""
+<div id="legend-bar" style="display:flex; gap:18px; flex-wrap:wrap; align-items:center;">
+  <span style="font-size:0.60rem;font-weight:700;color:#8BA0BB;text-transform:uppercase;
+               letter-spacing:0.11em;white-space:nowrap;flex-shrink:0;">Overlay Legend</span>
+  <span style="display:flex;align-items:center;gap:6px;">
+    <span style="width:12px;height:12px;border-radius:3px;background:#7A7A7A;
+                 flex-shrink:0;display:inline-block;box-shadow:0 1px 2px rgba(0,0,0,0.15);"></span>
+    <span style="font-size:0.75rem;color:#4A6585;white-space:nowrap;">
+      <b style="color:#0F1C2E;font-weight:600;">Gray</b>&nbsp;&mdash;&nbsp;Unchanged</span>
+  </span>
+  <span style="display:flex;align-items:center;gap:6px;">
+    <span style="width:12px;height:12px;border-radius:3px;background:#00BBBB;
+                 flex-shrink:0;display:inline-block;box-shadow:0 1px 2px rgba(0,0,0,0.15);"></span>
+    <span style="font-size:0.75rem;color:#4A6585;white-space:nowrap;">
+      <b style="color:#007070;font-weight:600;">Cyan</b>&nbsp;&mdash;&nbsp;Previous Revision</span>
+  </span>
+  <span style="display:flex;align-items:center;gap:6px;">
+    <span style="width:12px;height:12px;border-radius:3px;background:#EE3333;
+                 flex-shrink:0;display:inline-block;box-shadow:0 1px 2px rgba(0,0,0,0.15);"></span>
+    <span style="font-size:0.75rem;color:#4A6585;white-space:nowrap;">
+      <b style="color:#BB0000;font-weight:600;">Red</b>&nbsp;&mdash;&nbsp;New Document</span>
+  </span>
+</div>
+            """)
+            with gr.Row():
+                pdf_output = gr.File(label="⬇️ Download Result PDF")
+    # ══════════════════════════════════════════════════════════════════
+    # EVENT HANDLERS
+    # ══════════════════════════════════════════════════════════════════
+    def on_pdf_upload(pdf_file):
+        """Disable skip-cover-page checkbox when uploaded PDF has only 1 page."""
+        if pdf_file is None:
+            return gr.update(interactive=False, value=False)
+        try:
+            doc = fitz.open(pdf_file.name)
+            n   = len(doc)
+            doc.close()
+            if n <= 1:
+                return gr.update(interactive=False, value=False)
+            else:
+                return gr.update(interactive=True)
+        except Exception:
+            return gr.update(interactive=True)
+    def _readout_html(nx: int, ny: int, ns: float) -> str:
+        return (
+            f'<div id="nudge-readout-wrap">'
+            f'x&nbsp;=&nbsp;{nx:+d}&thinsp;px<br>'
+            f'y&nbsp;=&nbsp;{ny:+d}&thinsp;px<br>'
+            f'scale&nbsp;=&nbsp;{ns:.3f}'
+            f'</div>'
+        )
+    def on_compare_mode_change(mode):
+        """Show/hide the specific-page number inputs and region sub-options."""
+        show = (mode == "Specific Pages")
+        return gr.update(visible=show), gr.update(visible=show)
+    def on_load_preview(pdf_new_f, pg_new):
+        """Render the New Doc page at 72 DPI and return as PIL image for inline display."""
+        if pdf_new_f is None:
+            raise gr.Error("Please upload the New Document PDF first.")
+        preview_dpi = 72
+        doc = fitz.open(pdf_new_f.name)
+        idx = max(0, int(pg_new or 1) - 1)
+        idx = min(idx, len(doc) - 1)
+        arr = _page_to_rgb(doc, idx, preview_dpi)
+        doc.close()
+        pil_img = Image.fromarray(arr)
+        readout = '<div id="region-readout">Draw a box on the image below to select a region</div>'
+        # returns: pil_img, coords_txt_reset, coords_state_reset, display_dpi, readout
+        return pil_img, "", None, preview_dpi, readout
+    def on_region_coords_change(coords_txt):
+        """Parse 'x,y,w,h' string written by JS canvas into region_coords_state dict."""
+        if not coords_txt or coords_txt.strip() == "":
+            return None, '<div id="region-readout">No region selected — full page will be used</div>'
+        try:
+            parts = [float(v) for v in coords_txt.strip().split(",")]
+            x, y, w, h = int(parts[0]), int(parts[1]), int(parts[2]), int(parts[3])
+            if w < 5 or h < 5:
+                return None, '<div id="region-readout">Region too small — drag a larger area</div>'
+            coords = {"x": x, "y": y, "width": w, "height": h}
+            readout = (
+                f'<div id="region-readout">'
+                f'✅ Region: ({x}, {y}) → ({x+w}, {y+h})'
+                f'&nbsp;|&nbsp;{w}&times;{h} px'
+                f'</div>'
+            )
+            return coords, readout
+        except Exception:
+            return None, '<div id="region-readout">Invalid region — drag again</div>'
+    def on_clear_region():
+        """Reset region — clear coords textbox and state (image stays, JS clears the overlay)."""
+        return "", None, '<div id="region-readout">Draw a box on the image below to select a region</div>'
+    def on_run(pdf_old_f, pdf_new_f, skip_old, skip_new, align,
+               cmp_mode, pg_old, pg_new,
+               pg_cmp_mode, region_coords, display_dpi,
+               progress=gr.Progress()):
+        page_results, _summary, pdf_path, _, _ = run_comparison(
+            pdf_old_f, pdf_new_f, skip_old, skip_new, align,
+            cmp_mode, pg_old, pg_new,
+            pg_cmp_mode, region_coords, display_dpi,
+            progress
+        )
+        n_pages   = len(page_results)
+        first_img = page_results[0]["align_check"] if page_results else None
+        return (
+            page_results,
+            0,      # rotation reset
+            0,      # nudge_x reset
+            0,      # nudge_y reset
+            1.0,    # nudge_scale reset
+            1,      # page_num reset to 1
+            n_pages,# total_pages
+            pdf_path,
+            first_img,
+            _readout_html(0, 0, 1.0),
+            gr.update(visible=n_pages > 1, minimum=1, maximum=n_pages, value=1),
+        )
+    def on_view_change(view, pg, total, pages_data, rot, nx, ny, ns):
+        return get_page_view(pg, pages_data, view, 0, nx, ny, ns), 0
+    def on_rot_left(pg, total, pages_data, view, rot, nx, ny, ns):
+        new_rot = (rot + 90) % 360
+        return get_page_view(pg, pages_data, view, new_rot, nx, ny, ns), new_rot
+    def on_rot_right(pg, total, pages_data, view, rot, nx, ny, ns):
+        new_rot = (rot - 90) % 360
+        return get_page_view(pg, pages_data, view, new_rot, nx, ny, ns), new_rot
+    def on_pg_slide(pg, total, pages_data, view, rot, nx, ny, ns):
+        pg = int(pg or 1)
+        img = get_page_view(pg, pages_data, view, rot, nx, ny, ns)
+        return img, pg
+    # ── Nudge handlers (arrow buttons + scale change) ─────────────────
+    def on_nudge(direction: str, pg, total, pages_data, view, rot, nx, ny, ns, step):
+        step = int(step or 1)
+        if   direction == "left":  nx -= step
+        elif direction == "right": nx += step
+        elif direction == "up":    ny -= step
+        elif direction == "down":  ny += step
+        img = get_page_view(pg, pages_data, view, rot, nx, ny, ns)
+        return img, nx, ny, ns, _readout_html(nx, ny, ns)
+    def on_scale_change(sc, pg, total, pages_data, view, rot, nx, ny):
+        ns = float(sc) if sc else 1.0
+        img = get_page_view(pg, pages_data, view, rot, nx, ny, ns)
+        return img, ns, _readout_html(nx, ny, ns)
+    pdf_old.change(fn=on_pdf_upload, inputs=[pdf_old], outputs=[skip_old_p1])
+    pdf_new.change(fn=on_pdf_upload, inputs=[pdf_new], outputs=[skip_new_p1])
+    # Show / hide specific-page inputs and region sub-options when compare mode changes
+    compare_mode.change(
+        fn=on_compare_mode_change,
+        inputs=[compare_mode],
+        outputs=[specific_pages_row, region_col],
+    )
+    # Show / hide the region preview block AND auto-load the preview
+    # _preview_outputs: [region_page_img, region_coords_txt, coords_state, display_dpi_state, region_readout]
+    _preview_outputs = [region_page_img, region_coords_txt,
+                        region_coords_state, display_dpi_state, region_readout]
+    def on_page_compare_mode_change(sub_mode, pdf_new_f, pg_new):
+        show = (sub_mode == "Specific Region")
+        col_update = gr.update(visible=show)
+        if show:
+            try:
+                pil_img, ctxt, coords, dpi, rdout = on_load_preview(pdf_new_f, pg_new)
+                return col_update, pil_img, ctxt, coords, dpi, rdout
+            except Exception:
+                pass
+        blank_readout = '<div id="region-readout">No region selected — full page will be used</div>'
+        return col_update, None, "", None, 72, blank_readout
+    page_compare_mode.change(
+        fn=on_page_compare_mode_change,
+        inputs=[page_compare_mode, pdf_new, page_new_input],
+        outputs=[region_preview_col] + _preview_outputs,
+    )
+    # Re-load preview when the New Doc page number changes (if Specific Region is active)
+    def on_page_new_change(pg_new, pdf_new_f, sub_mode):
+        if sub_mode == "Specific Region" and pdf_new_f is not None:
+            try:
+                return on_load_preview(pdf_new_f, pg_new)
+            except Exception:
+                pass
+        blank_readout = '<div id="region-readout">No region selected — full page will be used</div>'
+        return None, "", None, 72, blank_readout
+    page_new_input.change(
+        fn=on_page_new_change,
+        inputs=[page_new_input, pdf_new, page_compare_mode],
+        outputs=_preview_outputs,
+    )
+    # JS canvas overlay writes "x,y,w,h" into region_coords_txt when drag ends → parse to dict
+    region_coords_txt.change(
+        fn=on_region_coords_change,
+        inputs=[region_coords_txt],
+        outputs=[region_coords_state, region_readout],
+        show_progress="hidden",
+        show_progress_on=[],
+    )
+    # Clear region button — clear coords, JS overlay self-clears on next poll
+    clear_region_btn.click(
+        fn=on_clear_region,
+        inputs=None,
+        outputs=[region_coords_txt, region_coords_state, region_readout],
+    )
+    run_btn.click(
+        fn=on_run,
+        inputs=[pdf_old, pdf_new, skip_old_p1, skip_new_p1, enable_align,
+                compare_mode, page_old_input, page_new_input,
+                page_compare_mode, region_coords_state, display_dpi_state],
+        outputs=[pages_state, rotation_state, nudge_x_state, nudge_y_state, nudge_scale_state,
+                 page_num_state, total_pages_state,
+                 pdf_output, result_image, nudge_readout, page_slider],
+    )
+    # View-mode tab change
+    view_mode.change(
+        fn=on_view_change,
+        inputs=[view_mode, page_num_state, total_pages_state, pages_state, rotation_state,
+                nudge_x_state, nudge_y_state, nudge_scale_state],
+        outputs=[result_image, rotation_state],
+        show_progress="hidden",
+        show_progress_on=[],
+    )
+    # Rotation buttons
+    rot_left_btn.click(
+        fn=on_rot_left,
+        inputs=[page_num_state, total_pages_state, pages_state, view_mode, rotation_state,
+                nudge_x_state, nudge_y_state, nudge_scale_state],
+        outputs=[result_image, rotation_state],
+        show_progress="hidden",
+        show_progress_on=[],
+    )
+    rot_right_btn.click(
+        fn=on_rot_right,
+        inputs=[page_num_state, total_pages_state, pages_state, view_mode, rotation_state,
+                nudge_x_state, nudge_y_state, nudge_scale_state],
+        outputs=[result_image, rotation_state],
+        show_progress="hidden",
+        show_progress_on=[],
+    )
+    # Page slider
+    page_slider.change(
+        fn=on_pg_slide,
+        inputs=[page_slider, total_pages_state, pages_state, view_mode,
+                rotation_state, nudge_x_state, nudge_y_state, nudge_scale_state],
+        outputs=[result_image, page_num_state],
+        show_progress="hidden",
+        show_progress_on=[],
+    )
+    # ── Nudge arrow buttons ───────────────────────────────────────────
+    _nudge_inputs  = [page_num_state, total_pages_state, pages_state, view_mode, rotation_state,
+                      nudge_x_state, nudge_y_state, nudge_scale_state, nudge_step]
+    _nudge_outputs = [result_image, nudge_x_state, nudge_y_state,
+                      nudge_scale_state, nudge_readout]
+    nudge_left_btn.click(
+        fn=lambda *a: on_nudge("left",  *a), inputs=_nudge_inputs, outputs=_nudge_outputs,
+        show_progress="hidden", show_progress_on=[])
+    nudge_right_btn.click(
+        fn=lambda *a: on_nudge("right", *a), inputs=_nudge_inputs, outputs=_nudge_outputs,
+        show_progress="hidden", show_progress_on=[])
+    nudge_up_btn.click(
+        fn=lambda *a: on_nudge("up",    *a), inputs=_nudge_inputs, outputs=_nudge_outputs,
+        show_progress="hidden", show_progress_on=[])
+    nudge_down_btn.click(
+        fn=lambda *a: on_nudge("down",  *a), inputs=_nudge_inputs, outputs=_nudge_outputs,
+        show_progress="hidden", show_progress_on=[])
+    # ── Scale number input (live update on change) ────────────────────
+    nudge_scale.change(
+        fn=on_scale_change,
+        inputs=[nudge_scale, page_num_state, total_pages_state, pages_state, view_mode,
+                rotation_state, nudge_x_state, nudge_y_state],
+        outputs=[result_image, nudge_scale_state, nudge_readout],
+        show_progress="hidden",
+        show_progress_on=[],
+    )
+    # ── Inline canvas JS — overlays a transparent draw canvas on the gr.Image ──
+    _INLINE_CANVAS_JS = """
+() => {
+  let _overlay = null, _ctx = null;
+  let _dragging = false, _sx = 0, _sy = 0, _sel = null;
+  let _lastCoords = '';
+  function getImgEl() {
+    // The rendered <img> inside the gr.Image component
+    const wrap = document.getElementById('region-page-img');
+    return wrap ? wrap.querySelector('img') : null;
+  }
+  function getCoordsEl() {
+    const wrap = document.getElementById('region-coords-txt');
+    return wrap ? wrap.querySelector('textarea') : null;
+  }
+  function syncOverlay() {
+    if (!_overlay) return;
+    const img = getImgEl();
+    if (!img || !img.src || img.src.startsWith('data:image/gif')) return;
+    const r = img.getBoundingClientRect();
+    const pr = img.parentElement.getBoundingClientRect();
+    _overlay.style.left   = (r.left - pr.left) + 'px';
+    _overlay.style.top    = (r.top  - pr.top)  + 'px';
+    _overlay.style.width  = r.width  + 'px';
+    _overlay.style.height = r.height + 'px';
+    if (_overlay.width !== Math.round(r.width) || _overlay.height !== Math.round(r.height)) {
+      _overlay.width  = Math.round(r.width);
+      _overlay.height = Math.round(r.height);
+      redraw();
+    }
+  }
+  function toCanvas(cx, cy) {
+    const r = _overlay.getBoundingClientRect();
+    return { x: (cx - r.left) * _overlay.width / r.width,
+             y: (cy - r.top)  * _overlay.height / r.height };
+  }
+  function redraw() {
+    if (!_ctx || !_overlay.width) return;
+    _ctx.clearRect(0, 0, _overlay.width, _overlay.height);
+    if (_sel) {
+      _ctx.strokeStyle = '#00BBBB';
+      _ctx.lineWidth = Math.max(2, _overlay.width / 400);
+      _ctx.strokeRect(_sel.x, _sel.y, _sel.w, _sel.h);
+      _ctx.fillStyle = 'rgba(0,187,187,0.15)';
+      _ctx.fillRect(_sel.x, _sel.y, _sel.w, _sel.h);
+    }
+  }
+  function pushCoords() {
+    const el = getCoordsEl();
+    if (!el || !_sel) return;
+    // Scale from display px back to natural image px
+    const img = getImgEl();
+    if (!img) return;
+    const scaleX = img.naturalWidth  / _overlay.width;
+    const scaleY = img.naturalHeight / _overlay.height;
+    const val = Math.round(_sel.x * scaleX) + ',' +
+                Math.round(_sel.y * scaleY) + ',' +
+                Math.round(_sel.w * scaleX) + ',' +
+                Math.round(_sel.h * scaleY);
+    const setter = Object.getOwnPropertyDescriptor(HTMLTextAreaElement.prototype, 'value').set;
+    setter.call(el, val);
+    el.dispatchEvent(new Event('input', { bubbles: true }));
+  }
+  function setupOverlay() {
+    const imgWrap = document.getElementById('region-page-img');
+    if (!imgWrap) return false;
+    // Make sure parent is positioned
+    const parent = imgWrap.querySelector('.image-container') || imgWrap;
+    if (getComputedStyle(parent).position === 'static') parent.style.position = 'relative';
+    if (!_overlay) {
+      _overlay = document.createElement('canvas');
+      _overlay.id = 'region-draw-overlay';
+      _overlay.style.cssText = 'position:absolute;top:0;left:0;cursor:crosshair;z-index:10;pointer-events:all;';
+      parent.appendChild(_overlay);
+      _ctx = _overlay.getContext('2d');
+      _overlay.addEventListener('mousedown', function(e) {
+        const p = toCanvas(e.clientX, e.clientY);
+        _sx = p.x; _sy = p.y; _sel = null; _dragging = true; e.preventDefault();
+      });
+      _overlay.addEventListener('mousemove', function(e) {
+        if (!_dragging) return;
+        const p = toCanvas(e.clientX, e.clientY);
+        _sel = { x: Math.min(_sx, p.x), y: Math.min(_sy, p.y),
+                 w: Math.abs(p.x - _sx),  h: Math.abs(p.y - _sy) };
+        redraw(); e.preventDefault();
+      });
+      _overlay.addEventListener('mouseup', function(e) {
+        if (!_dragging) return; _dragging = false;
+        if (!_sel || _sel.w < 5 || _sel.h < 5) { _sel = null; redraw(); return; }
+        redraw(); pushCoords(); e.preventDefault();
+      });
+    }
+    return true;
+  }
+  // Poll every 300ms: sync overlay size, watch for cleared coords
+  setInterval(function() {
+    setupOverlay();
+    syncOverlay();
+    // Clear overlay when coords textbox is wiped by Clear button
+    const el = getCoordsEl();
+    if (el) {
+      const cur = el.value;
+      if (cur !== _lastCoords) {
+        _lastCoords = cur;
+        if (cur === '') { _sel = null; redraw(); }
+      }
+    }
+  }, 300);
+}
+"""
+    demo.load(fn=None, js=_INLINE_CANVAS_JS)
+# ══════════════════════════════════════════════════════════════════════
+# ENTRY POINT
+# ══════════════════════════════════════════════════════════════════════
+if __name__ == "__main__":
+    import socket as _socket
+    def _find_free_port(start: int = 7860, end: int = 7880) -> int:
+        for p in range(start, end + 1):
+            with _socket.socket(_socket.AF_INET, _socket.SOCK_STREAM) as s:
+                try:
+                    s.bind(("", p))
+                    return p
+                except OSError:
+                    continue
+        return start  # fallback — Gradio will error with a clear message
+    _port = _find_free_port()
+    print(f"\n🚀  POWERGRID Document Auditor → http://localhost:{_port}\n")
+    demo.queue(default_concurrency_limit=20).launch(
+        server_name="0.0.0.0",
+        server_port=_port,
+        share=False,
+        show_error=True,
+        theme=_THEME,
+        css=_CSS,
+    )