diff --git "a/wall_pipeline.py" "b/wall_pipeline.py"
--- "a/wall_pipeline.py"
+++ "b/wall_pipeline.py"
@@ -1,112 +1,95 @@
 """
-Wall Extraction Pipeline  —  GPU-Maximised Edition
-====================================================
-Every bottleneck stage has a GPU fast-path and a CPU fallback.
-
-GPU acceleration layers (in order of priority):
-  1. OpenCV CUDA (cv2.cuda_*)  — morphology, threshold, Gaussian, Canny, Hough
-  2. CuPy                      — NumPy-level array math (chroma, gap analysis, RLE)
-  3. PyTorch CUDA              — SAM predictor, EasyOCR backbone
-  4. CPU NumPy / OpenCV        — automatic fallback when GPU unavailable
-
-GPU capability matrix:
-  Stage                    CUDA-OpenCV   CuPy   Torch
-  ─────────────────────────────────────────────────────
-  Color erase              —             ✓      —
-  Wall extract (morph)     ✓             —      —
-  Thin-line removal        ✓ (CC fast)   —      —
-  Skeletonise              —             ✓      —
-  Gap analysis / calibrate —             ✓      —
-  Fixture heatmap          ✓ (GaussBlur) ✓      —
-  Segment Anything (SAM)   —             —      ✓
-  EasyOCR                  —             —      ✓
-  Room flood-fill          ✓             —      —
+Wall Extraction Pipeline
+========================
+EXACT algorithm from GeometryAgent v5.
+Only the GPU capability detection block has been hardened to probe
+actual CUDA allocations before committing — this prevents the
+cudaErrorInsufficientDriver crash when the host driver is older
+than the installed CUDA runtime.
+
+All wall extraction logic (stages 1-8, bridging, calibration, wand)
+is byte-for-byte identical to the original GeometryAgent source.
 """
 from __future__ import annotations
 
-import os
-import time
-import warnings
-from dataclasses import dataclass
-from typing import Any, Dict, List, Optional, Tuple
-
-import cv2
 import numpy as np
-
-warnings.filterwarnings("ignore", category=UserWarning)
+import cv2
+from dataclasses import dataclass
+from typing import List, Dict, Any, Tuple, Optional
 
 # ══════════════════════════════════════════════════════════════════════════════
-# GPU capability detection
+# GPU capability detection  —  probe before commit
+#
+# The key insight: CuPy/PyTorch import successfully even when the CUDA *driver*
+# is too old for the installed CUDA *runtime*.  The error only fires on the
+# first real allocation.  We do a tiny probe allocation inside a broad
+# except-Exception guard so every possible CUDA error degrades gracefully.
 # ══════════════════════════════════════════════════════════════════════════════
 
-# ── PyTorch / CUDA ────────────────────────────────────────────────────────────
+# ── CuPy ─────────────────────────────────────────────────────────────────────
 try:
-    import torch
+    import cupy as _cp_probe
+    import cupyx.scipy.ndimage as _cpnd_probe
+    # Force a real CUDA context + allocation to expose driver mismatches
+    _cp_probe.zeros(1, dtype=_cp_probe.uint8)
+    # Survived → re-bind to public names
+    import cupy as cp                        # type: ignore[assignment]
+    import cupyx.scipy.ndimage as cpnd
+    _GPU  = True
+    _CUPY = True
+    print(f"[GPU] CuPy OK  version={cp.__version__}")
+except ImportError:
+    cp    = np   # type: ignore[assignment]
+    cpnd  = None
+    _GPU  = False
+    _CUPY = False
+    print("[GPU] CuPy not installed — CPU fallback")
+except Exception as _ce:
+    # Catches CUDARuntimeError (driver too old), CUDADriverError, etc.
+    cp    = np   # type: ignore[assignment]
+    cpnd  = None
+    _GPU  = False
+    _CUPY = False
+    print(f"[GPU] CuPy DISABLED ({type(_ce).__name__}: {_ce})")
+    print("[GPU] All CuPy ops → NumPy fallback")
+
+# ── PyTorch ───────────────────────────────────────────────────────────────────
+try:
+    import torch as _torch_probe
     _TORCH = True
     try:
-        # cuda.is_available() can itself raise if the driver is too old
-        _TORCH_CUDA = torch.cuda.is_available()
+        _TORCH_CUDA = _torch_probe.cuda.is_available()
         if _TORCH_CUDA:
-            # Force an actual CUDA context to catch driver-version mismatches
-            torch.zeros(1, device="cuda")
-            print(f"[GPU] PyTorch CUDA  OK  device={torch.cuda.get_device_name(0)}")
+            _torch_probe.zeros(1, device="cuda")   # probe real allocation
+            print(f"[GPU] PyTorch CUDA OK  device={_torch_probe.cuda.get_device_name(0)}")
         else:
             print("[GPU] PyTorch: CUDA not available — CPU tensors")
     except Exception as _te:
         _TORCH_CUDA = False
-        print(f"[GPU] PyTorch CUDA  DISABLED  ({type(_te).__name__}: {_te})")
+        print(f"[GPU] PyTorch CUDA DISABLED ({type(_te).__name__}: {_te})")
+    import torch
     _DEVICE = torch.device("cuda" if _TORCH_CUDA else "cpu")
 except ImportError:
     _TORCH = _TORCH_CUDA = False
     _DEVICE = None
-    print("[GPU] PyTorch not installed — CPU fallback")
-
-# ── CuPy ─────────────────────────────────────────────────────────────────────
-# Import alone is not enough — cupy is lazily linked to libcuda.so and will
-# only raise CUDARuntimeError when the *first allocation* is attempted.
-# We probe with a tiny 1-element array and catch every possible CUDA error.
-try:
-    import cupy as _cp_test
-    import cupyx.scipy.ndimage as _cpnd_test
-    # Probe: force an actual CUDA context + allocation
-    _probe = _cp_test.zeros(1, dtype=_cp_test.uint8)
-    del _probe
-    # If we get here the driver is compatible
-    import cupy as cp                        # re-bind to public name
-    import cupyx.scipy.ndimage as cpnd
-    _CUPY = True
-    print(f"[GPU] CuPy           OK  version={cp.__version__}")
-except ImportError:
-    cp   = np           # type: ignore[assignment]
-    cpnd = None         # type: ignore[assignment]
-    _CUPY = False
-    print("[GPU] CuPy not installed — NumPy fallback")
-except Exception as _ce:
-    # Catches CUDARuntimeError (driver too old), CUDADriverError, etc.
-    cp   = np           # type: ignore[assignment]
-    cpnd = None         # type: ignore[assignment]
-    _CUPY = False
-    print(f"[GPU] CuPy DISABLED  ({type(_ce).__name__}: {_ce})")
-    print("[GPU] CuPy → NumPy fallback (all operations remain correct)")
+    print("[GPU] PyTorch not installed")
 
 # ── OpenCV CUDA ───────────────────────────────────────────────────────────────
 _CV_CUDA = False
 try:
-    n_dev = cv2.cuda.getCudaEnabledDeviceCount()
-    if n_dev > 0:
-        # Probe with a tiny upload to catch driver mismatches
-        _probe_mat = cv2.cuda_GpuMat()
-        _probe_mat.upload(np.zeros((2, 2), np.uint8))
-        del _probe_mat
+    _n = cv2.cuda.getCudaEnabledDeviceCount()
+    if _n > 0:
+        _pm = cv2.cuda_GpuMat()
+        _pm.upload(np.zeros((2, 2), np.uint8))   # probe
+        del _pm
         _CV_CUDA = True
-        print(f"[GPU] OpenCV CUDA   OK  devices={n_dev}")
+        print(f"[GPU] OpenCV CUDA OK  devices={_n}")
     else:
-        print("[GPU] OpenCV CUDA   NO  (no CUDA-enabled devices)")
+        print("[GPU] OpenCV CUDA: no CUDA-enabled devices")
 except AttributeError:
-    print("[GPU] OpenCV CUDA module absent — CPU morphology")
+    print("[GPU] OpenCV CUDA module absent")
 except Exception as _oce:
-    _CV_CUDA = False
-    print(f"[GPU] OpenCV CUDA   DISABLED  ({type(_oce).__name__}: {_oce})")
+    print(f"[GPU] OpenCV CUDA DISABLED ({type(_oce).__name__}: {_oce})")
 
 # ── scikit-image skeleton ─────────────────────────────────────────────────────
 try:
@@ -122,256 +105,52 @@ try:
 except ImportError:
     _SCIPY = False
 
-print(f"[GPU] Summary: PyTorchCUDA={_TORCH_CUDA}  CuPy={_CUPY}  OpenCV-CUDA={_CV_CUDA}")
+print(f"[GPU] Summary: CuPy={_CUPY}  PyTorchCUDA={_TORCH_CUDA}  OpenCV-CUDA={_CV_CUDA}")
 
 
 # ══════════════════════════════════════════════════════════════════════════════
-# GPU shim helpers
+# CuPy / NumPy shims  (unchanged from original)
 # ══════════════════════════════════════════════════════════════════════════════
 def _to_gpu(arr: np.ndarray):
-    return cp.asarray(arr) if _CUPY else arr
+    return cp.asarray(arr) if _GPU else arr
 
 def _to_cpu(arr) -> np.ndarray:
-    return cp.asnumpy(arr) if (_CUPY and hasattr(arr, 'get')) else np.asarray(arr)
-
-
-# ══════════════════════════════════════════════════════════════════════════════
-# CUDA-accelerated OpenCV ops
-# ═════════════════════════════════════════════════���════════════════════════════
-def _cuda_morphology(src: np.ndarray, op: int, kernel: np.ndarray) -> np.ndarray:
-    if not _CV_CUDA:
-        return cv2.morphologyEx(src, op, kernel)
-    try:
-        g   = cv2.cuda_GpuMat(); g.upload(src)
-        flt = cv2.cuda.createMorphologyFilter(op, cv2.CV_8UC1, kernel)
-        out = flt.apply(g)
-        return out.download()
-    except Exception:
-        return cv2.morphologyEx(src, op, kernel)
-
-
-def _cuda_threshold(src: np.ndarray, thr: float, maxval: float,
-                     thtype: int) -> Tuple[float, np.ndarray]:
-    if not _CV_CUDA:
-        return cv2.threshold(src, thr, maxval, thtype)
-    try:
-        g = cv2.cuda_GpuMat(); g.upload(src)
-        retval, gd = cv2.cuda.threshold(g, thr, maxval, thtype)
-        return retval, gd.download()
-    except Exception:
-        return cv2.threshold(src, thr, maxval, thtype)
-
-
-def _cuda_gaussian(src: np.ndarray, ksize: Tuple[int,int], sigma: float) -> np.ndarray:
-    if not _CV_CUDA:
-        return cv2.GaussianBlur(src, ksize, sigma)
-    try:
-        dtype = cv2.CV_8UC1 if src.ndim == 2 else cv2.CV_8UC3
-        g     = cv2.cuda_GpuMat(); g.upload(src)
-        flt   = cv2.cuda.createGaussianFilter(dtype, dtype, ksize, sigma)
-        return flt.apply(g).download()
-    except Exception:
-        return cv2.GaussianBlur(src, ksize, sigma)
-
-
-def _cuda_canny(src: np.ndarray, lo: float, hi: float) -> np.ndarray:
-    if not _CV_CUDA:
-        return cv2.Canny(src, lo, hi)
-    try:
-        g    = cv2.cuda_GpuMat(); g.upload(src)
-        det  = cv2.cuda.createCannyEdgeDetector(lo, hi)
-        return det.detect(g).download()
-    except Exception:
-        return cv2.Canny(src, lo, hi)
-
-
-def _cuda_dilate(src: np.ndarray, kernel: np.ndarray) -> np.ndarray:
-    if not _CV_CUDA:
-        return cv2.dilate(src, kernel)
-    try:
-        g   = cv2.cuda_GpuMat(); g.upload(src)
-        flt = cv2.cuda.createMorphologyFilter(cv2.MORPH_DILATE, cv2.CV_8UC1, kernel)
-        return flt.apply(g).download()
-    except Exception:
-        return cv2.dilate(src, kernel)
+    return cp.asnumpy(arr) if _GPU else arr
 
 
 # ══════════════════════════════════════════════════════════════════════════════
-# CuPy-accelerated array ops
+# RLE helpers  (original)
 # ══════════════════════════════════════════════════════════════════════════════
-def _cupy_chroma_erase(img: np.ndarray) -> np.ndarray:
-    """Remove coloured annotations entirely on GPU."""
-    if not _CUPY:
-        b = img[:,:,0].astype(np.int32); g = img[:,:,1].astype(np.int32)
-        r = img[:,:,2].astype(np.int32)
-        gray   = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY).astype(np.int32)
-        chroma = np.maximum(np.maximum(r,g),b) - np.minimum(np.minimum(r,g),b)
-        mask   = (chroma > 15) & (gray < 240)
-        result = img.copy(); result[mask] = (255,255,255)
-        return result
-    # GPU path
-    g_img  = cp.asarray(img, dtype=cp.int32)
-    b_,g_,r_ = g_img[:,:,0], g_img[:,:,1], g_img[:,:,2]
-    gray_  = (0.114*b_ + 0.587*g_ + 0.299*r_)
-    chroma = cp.maximum(cp.maximum(r_,g_),b_) - cp.minimum(cp.minimum(r_,g_),b_)
-    mask   = (chroma > 15) & (gray_ < 240)
-    result = cp.asarray(img.copy())
-    result[mask] = cp.array([255,255,255], dtype=cp.uint8)
-    return cp.asnumpy(result)
-
-
-def _cupy_gap_analysis(mask: np.ndarray) -> List[int]:
-    """Scan H+V gap lengths on GPU (CuPy batch diff); CPU fallback."""
-    h, w       = mask.shape
-    row_step   = max(3, h//200)
-    col_step   = max(3, w//200)
-    gaps: List[int] = []
-
-    def _harvest(diff_row: np.ndarray):
-        ends_   = np.where(diff_row == -1)[0]
-        starts_ = np.where(diff_row ==  1)[0]
-        for e in ends_:
-            nxt = starts_[starts_ > e]
-            if len(nxt):
-                g = int(nxt[0] - e)
-                if 1 < g < 200:
-                    gaps.append(g)
-
-    if not _CUPY:
-        for row in range(5, h-5, row_step):
-            rd  = (mask[row,:] > 0).astype(np.int8)
-            _harvest(np.diff(np.concatenate([[0],rd,[0]]).astype(np.int16)))
-        for col in range(5, w-5, col_step):
-            cd  = (mask[:,col] > 0).astype(np.int8)
-            _harvest(np.diff(np.concatenate([[0],cd,[0]]).astype(np.int16)))
-        return gaps
-
-    # GPU: batch rows
-    rows_np = mask[5:h-5:row_step, :].astype(np.int8)
-    g_rows  = cp.asarray(rows_np > 0, dtype=cp.int8)
-    g_pad   = cp.concatenate([cp.zeros((g_rows.shape[0],1),cp.int8),
-                               g_rows,
-                               cp.zeros((g_rows.shape[0],1),cp.int8)], axis=1)
-    g_diff  = cp.diff(g_pad.astype(cp.int16), axis=1)
-    for ri in range(g_diff.shape[0]):
-        _harvest(cp.asnumpy(g_diff[ri]))
-
-    # GPU: batch cols
-    cols_np = mask[:, 5:w-5:col_step].T.astype(np.int8)
-    g_cols  = cp.asarray(cols_np > 0, dtype=cp.int8)
-    g_pad   = cp.concatenate([cp.zeros((g_cols.shape[0],1),cp.int8),
-                               g_cols,
-                               cp.zeros((g_cols.shape[0],1),cp.int8)], axis=1)
-    g_diff  = cp.diff(g_pad.astype(cp.int16), axis=1)
-    for ci in range(g_diff.shape[0]):
-        _harvest(cp.asnumpy(g_diff[ci]))
-    return gaps
-
-
-def _cupy_rle(mask: np.ndarray) -> Dict[str, Any]:
-    """COCO RLE encoding on GPU."""
-    h, w = mask.shape
-    if not _CUPY:
-        flat = mask.flatten(order='F').astype(bool)
-        counts: List[int] = []; cur, run = False, 0
-        for v in flat:
-            if v == cur: run += 1
-            else: counts.append(run); run=1; cur=v
-        counts.append(run)
-        if mask[0,0]: counts.insert(0,0)
-        return {"counts": counts, "size": [h, w]}
-
-    g_flat = cp.asarray(mask, dtype=cp.bool_).flatten(order='F')
-    pad    = cp.concatenate([cp.array([False]), g_flat, cp.array([False])])
-    diffs  = cp.diff(pad.astype(cp.int8))
-    starts = cp.asnumpy(cp.where(diffs ==  1)[0])
-    ends   = cp.asnumpy(cp.where(diffs == -1)[0])
-    counts = []; prev = 0
-    for s, e in zip(starts, ends):
-        counts.append(int(s - prev))
-        counts.append(int(e - s))
-        prev = e
-    counts.append(int(h*w - prev))
-    if mask[0,0]: counts.insert(0,0)
+def mask_to_rle(mask: np.ndarray) -> Dict[str, Any]:
+    h, w  = mask.shape
+    flat  = mask.flatten(order='F').astype(bool)
+    counts: List[int] = []
+    current_val = False
+    run = 0
+    for v in flat:
+        if v == current_val:
+            run += 1
+        else:
+            counts.append(run)
+            run = 1
+            current_val = v
+    counts.append(run)
+    if mask[0, 0]:
+        counts.insert(0, 0)
     return {"counts": counts, "size": [h, w]}
 
 
-# ══════════════════════════════════════════════════════════════════════════════
-# OCR singleton  (GPU EasyOCR)
-# ══════════════════════════════════════════════════════════════════════════════
-_ocr_reader = None
-
-def get_ocr_reader():
-    global _ocr_reader
-    if _ocr_reader is None:
-        try:
-            import easyocr
-            gpu_flag = _TORCH_CUDA
-            print(f"[OCR] Init EasyOCR gpu={gpu_flag}...")
-            _ocr_reader = easyocr.Reader(
-                ["en"], gpu=gpu_flag,
-                model_storage_directory=".models/ocr",
-                download_enabled=True)
-            print("[OCR] EasyOCR ready")
-        except ImportError:
-            print("[OCR] EasyOCR not installed")
-            _ocr_reader = None
-    return _ocr_reader
-
-
-# ══════════════════════════════════════════════════════════════════════════════
-# SAM singleton  (GPU Torch)
-# ══════════════════════════════════════════════════════════════════════════════
-_sam_predictor = None
-
-def get_sam_predictor(checkpoint: str = "") -> Optional[Any]:
-    global _sam_predictor
-    if _sam_predictor is not None:
-        return _sam_predictor
-    if not checkpoint or not os.path.isfile(checkpoint):
-        checkpoint = _download_sam_checkpoint()
-    if not checkpoint or not os.path.isfile(checkpoint):
-        print("[SAM] No checkpoint — SAM disabled")
-        return None
-    try:
-        from segment_anything import sam_model_registry, SamPredictor
-        name  = os.path.basename(checkpoint).lower()
-        mtype = ("vit_h" if "vit_h" in name else
-                 "vit_l" if "vit_l" in name else
-                 "vit_b" if "vit_b" in name else "vit_h")
-        dev   = "cuda" if _TORCH_CUDA else "cpu"
-        print(f"[SAM] Loading {mtype} on {dev}...")
-        sam   = sam_model_registry[mtype](checkpoint=checkpoint)
-        sam.to(device=dev); sam.eval()
-        _sam_predictor = SamPredictor(sam)
-        print(f"[SAM] Ready on {dev}")
-    except Exception as exc:
-        print(f"[SAM] Load failed: {exc}")
-        _sam_predictor = None
-    return _sam_predictor
-
-
-def _download_sam_checkpoint() -> str:
-    dest = os.path.join(".models", "sam", "sam_vit_h_4b8939.pth")
-    if os.path.isfile(dest):
-        return dest
-    try:
-        from huggingface_hub import hf_hub_download
-        os.makedirs(os.path.dirname(dest), exist_ok=True)
-        print("[SAM] Downloading from HF Hub...")
-        path = hf_hub_download(
-            repo_id="facebook/sam-vit-huge",
-            filename="sam_vit_h_4b8939.pth",
-            local_dir=os.path.dirname(dest))
-        print(f"[SAM] Saved to {path}")
-        return path
-    except Exception as exc:
-        print(f"[SAM] Download failed: {exc}")
-        return ""
+def _mask_to_contour_flat(mask: np.ndarray) -> List[float]:
+    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
+    if not contours:
+        return []
+    largest = max(contours, key=cv2.contourArea)
+    pts = largest[:, 0, :].tolist()
+    return [v for pt in pts for v in pt]
 
 
 # ══════════════════════════════════════════════════════════════════════════════
-# Calibration dataclass
+# Calibration dataclass  (original)
 # ══════════════════════════════════════════════════════════════════════════════
 @dataclass
 class WallCalibration:
@@ -383,47 +162,51 @@ class WallCalibration:
     door_gap          : int = 41
     max_bridge_thick  : int = 15
 
-    def as_dict(self) -> Dict[str, Any]:
-        return dict(stroke_width=self.stroke_width,
-                    min_component_dim=self.min_component_dim,
-                    min_component_area=self.min_component_area,
-                    bridge_min_gap=self.bridge_min_gap,
-                    bridge_max_gap=self.bridge_max_gap,
-                    door_gap=self.door_gap,
-                    max_bridge_thick=self.max_bridge_thick)
+    def as_dict(self):
+        return {
+            "stroke_width"      : self.stroke_width,
+            "min_component_dim" : self.min_component_dim,
+            "min_component_area": self.min_component_area,
+            "bridge_min_gap"    : self.bridge_min_gap,
+            "bridge_max_gap"    : self.bridge_max_gap,
+            "door_gap"          : self.door_gap,
+            "max_bridge_thick"  : self.max_bridge_thick,
+        }
 
 
 # ══════════════════════════════════════════════════════════════════════════════
-# Main Pipeline class
+# Core pipeline class  — EXACT original GeometryAgent implementation
 # ══════════════════════════════════════════════════════════════════════════════
 class WallPipeline:
-    MIN_ROOM_AREA_FRAC        = 0.000004
-    MAX_ROOM_AREA_FRAC        = 0.08
-    MIN_ROOM_DIM_FRAC         = 0.01
-    BORDER_MARGIN_FRAC        = 0.01
-    MAX_ASPECT_RATIO          = 8.0
-    MIN_SOLIDITY              = 0.25
-    MIN_EXTENT                = 0.08
+    """
+    Stateless (per-call) wall extraction + room segmentation.
+    All intermediate images are stored in stage_images for the UI.
+    """
+
+    MIN_ROOM_AREA_FRAC = 0.000004
+    MAX_ROOM_AREA_FRAC = 0.08
+    MIN_ROOM_DIM_FRAC  = 0.01
+    BORDER_MARGIN_FRAC = 0.01
+    MAX_ASPECT_RATIO   = 8.0
+    MIN_SOLIDITY       = 0.25
+    MIN_EXTENT         = 0.08
+
     FIXTURE_MAX_BLOB_DIM      = 80
     FIXTURE_MAX_AREA          = 4000
     FIXTURE_MAX_ASPECT        = 4.0
     FIXTURE_DENSITY_RADIUS    = 50
     FIXTURE_DENSITY_THRESHOLD = 0.35
     FIXTURE_MIN_ZONE_AREA     = 1500
-    SAM_MIN_SCORE             = 0.70
-    SAM_WALL_THICK_PERCENTILE = 75
-    WALL_MIN_HALF_THICKNESS   = 3
-    SAM_N_NEG_PROMPTS         = 20
-    SAM_CLOSET_THRESHOLD      = 300
-    OCR_CONFIDENCE            = 0.30
+
+    DOOR_ARC_MIN_RADIUS = 60
+    DOOR_ARC_MAX_RADIUS = 320
 
     def __init__(self, progress_cb=None, sam_checkpoint: str = ""):
-        self.progress_cb     = progress_cb or (lambda m, p: None)
+        self.progress_cb     = progress_cb or (lambda msg, pct: None)
         self._wall_cal       : Optional[WallCalibration] = None
         self._wall_thickness : int = 8
         self.stage_images    : Dict[str, np.ndarray] = {}
         self._sam_checkpoint = sam_checkpoint
-        self._sam_room_masks : List[Dict] = []
 
     def _log(self, msg: str, pct: int):
         print(f"  [{pct:3d}%] {msg}")
@@ -432,652 +215,921 @@ class WallPipeline:
     def _save(self, key: str, img: np.ndarray):
         self.stage_images[key] = img.copy()
 
+    # ──────────────────────────────────────────────────────────────────────────
+    # Public entry point  (original flow, original step names)
     # ──────────────────────────────────────────────────────────────────────────
     def run(self, img_bgr: np.ndarray,
             extra_door_lines: List[Tuple[int,int,int,int]] = None,
             use_sam: bool = True,
             ) -> Tuple[np.ndarray, np.ndarray, WallCalibration]:
-        t0 = time.perf_counter()
-        self.stage_images    = {}
-        self._sam_room_masks = []
+        """
+        Returns (wall_mask uint8, room_mask uint8, WallCalibration).
+        extra_door_lines: [(x1,y1,x2,y2), …] painted onto walls before seg.
+        """
+        self.stage_images = {}
 
-        self._log("Step 1 — Title block removal", 4)
-        img = self._remove_title_block(img_bgr); self._save("01_title_removed", img)
+        self._log("Step 1 — Removing title block", 5)
+        img = self._remove_title_block(img_bgr)
+        self._save("01_title_removed", img)
 
-        self._log("Step 2 — Chroma erase [CuPy GPU]", 10)
-        img = _cupy_chroma_erase(img); self._save("02_colors_removed", img)
+        self._log("Step 2 — Removing colored annotations", 12)
+        img = self._remove_colors(img)
+        self._save("02_colors_removed", img)
 
-        self._log("Step 3 — Door arc detection [CUDA Hough]", 17)
-        img = self._close_door_arcs(img); self._save("03_door_arcs", img)
+        self._log("Step 3 — Closing door arcs", 20)
+        img = self._close_door_arcs(img)
+        self._save("03_door_arcs", img)
 
-        self._log("Step 4 — Wall extraction [CUDA morph]", 26)
-        walls = self._extract_walls(img); self._save("04_walls_raw", walls)
+        self._log("Step 4 — Extracting walls", 30)
+        walls = self._extract_walls(img)
+        self._save("04_walls_raw", walls)
 
-        self._log("Step 5b — Fixture removal [CUDA blur]", 34)
-        walls = self._remove_fixtures(walls); self._save("05b_no_fixtures", walls)
+        self._log("Step 5b — Removing fixture symbols", 38)
+        walls = self._remove_fixtures(walls)
+        self._save("05b_no_fixtures", walls)
 
-        self._log("Step 5c — Calibrate [CuPy] + thin-line removal", 41)
+        self._log("Step 5c — Calibrating & removing thin lines", 45)
         self._wall_cal = self._calibrate_wall(walls)
         walls = self._remove_thin_lines_calibrated(walls)
         self._save("05c_thin_removed", walls)
 
-        self._log("Step 5d — Endpoint bridging", 50)
-        walls = self._bridge_endpoints(walls); self._save("05d_bridged", walls)
+        self._log("Step 5d — Bridging wall endpoints", 55)
+        walls = self._bridge_endpoints(walls)
+        self._save("05d_bridged", walls)
 
-        self._log("Step 5e — Door gap closing [CUDA morph]", 58)
-        walls = self._close_door_openings(walls); self._save("05e_doors_closed", walls)
+        self._log("Step 5e — Closing door openings", 63)
+        walls = self._close_door_openings(walls)
+        self._save("05e_doors_closed", walls)
 
-        self._log("Step 5f — Dangling line removal", 65)
-        walls = self._remove_dangling(walls); self._save("05f_dangling_removed", walls)
+        self._log("Step 5f — Removing dangling lines", 70)
+        walls = self._remove_dangling(walls)
+        self._save("05f_dangling_removed", walls)
 
-        self._log("Step 5g — Large door gap sealing", 71)
-        walls = self._close_large_gaps(walls); self._save("05g_large_gaps", walls)
+        self._log("Step 5g — Sealing large door gaps", 76)
+        walls = self._close_large_gaps(walls)
+        self._save("05g_large_gaps", walls)
 
+        # Paint extra door-seal lines from UI
         if extra_door_lines:
-            self._log("Manual door seal lines", 74)
+            self._log("Applying manual door seal lines", 79)
             lw = max(3, self._wall_cal.stroke_width if self._wall_cal else 3)
-            for x1,y1,x2,y2 in extra_door_lines:
-                cv2.line(walls,(x1,y1),(x2,y2),255,lw)
+            for x1, y1, x2, y2 in extra_door_lines:
+                cv2.line(walls, (x1, y1), (x2, y2), 255, lw)
             self._save("05h_manual_doors", walls)
 
-        rooms_mask = None
-        if use_sam:
-            self._log("Step 7 — SAM segmentation [Torch GPU]", 78)
-            rooms_mask = self._segment_with_sam(img_bgr, walls)
+        # SAM segmentation (optional, falls back to flood-fill)
+        rooms = None
+        if use_sam and _TORCH_CUDA:
+            self._log("Step 7 — SAM segmentation [Torch GPU]", 80)
+            rooms = self._segment_with_sam(img_bgr, walls)
 
-        if rooms_mask is None:
-            self._log("Step 7 — Flood-fill segmentation", 80)
-            rooms_mask = self._segment_rooms(walls)
-        self._save("07_rooms", rooms_mask)
+        if rooms is None:
+            self._log("Step 7 — Flood-fill room segmentation", 85)
+            rooms = self._segment_rooms(walls)
+        self._save("07_rooms", rooms)
 
-        self._log("Step 8 — Room filtering", 90)
-        valid_mask, _ = self._filter_rooms(rooms_mask, img_bgr.shape)
+        self._log("Step 8 — Filtering room regions", 93)
+        valid_mask, _ = self._filter_rooms(rooms, img_bgr.shape)
         self._save("08_rooms_filtered", valid_mask)
 
-        self._log(f"Done in {time.perf_counter()-t0:.1f}s", 100)
+        self._log("Done", 100)
         return walls, valid_mask, self._wall_cal
 
     # ══════════════════════════════════════════════════════════════════════════
-    # STAGE 1 — Title block
+    # Stage 1 — Remove title block  (original)
     # ══════════════════════════════════════════════════════════════════════════
     def _remove_title_block(self, img: np.ndarray) -> np.ndarray:
-        h,w  = img.shape[:2]
-        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-        edges = _cuda_canny(gray, 50, 150)
-        hk = cv2.getStructuringElement(cv2.MORPH_RECT,(w//20,1))
-        vk = cv2.getStructuringElement(cv2.MORPH_RECT,(1,h//20))
-        hl = _cuda_morphology(edges, cv2.MORPH_OPEN, hk)
-        vl = _cuda_morphology(edges, cv2.MORPH_OPEN, vk)
-        cr,cb = w,h
-        rr = vl[:, int(w*0.7):]
-        if np.any(rr):
-            vp = np.where(np.sum(rr,axis=0)>h*0.3)[0]
-            if len(vp): cr = int(w*0.7)+vp[0]-10
-        br = hl[int(h*0.7):,:]
-        if np.any(br):
-            hp = np.where(np.sum(br,axis=1)>w*0.3)[0]
-            if len(hp): cb = int(h*0.7)+hp[0]-10
-        return img[:cb,:cr].copy()
+        h, w  = img.shape[:2]
+        gray  = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        edges = cv2.Canny(gray, 50, 150)
+        h_kern = cv2.getStructuringElement(cv2.MORPH_RECT, (w // 20, 1))
+        v_kern = cv2.getStructuringElement(cv2.MORPH_RECT, (1, h // 20))
+        h_lines = cv2.morphologyEx(edges, cv2.MORPH_OPEN, h_kern)
+        v_lines = cv2.morphologyEx(edges, cv2.MORPH_OPEN, v_kern)
+        crop_right, crop_bottom = w, h
+        right_region = v_lines[:, int(w * 0.7):]
+        if np.any(right_region):
+            vp = np.where(np.sum(right_region, axis=0) > h * 0.3)[0]
+            if len(vp):
+                crop_right = int(w * 0.7) + vp[0] - 10
+        bot_region = h_lines[int(h * 0.7):, :]
+        if np.any(bot_region):
+            hp = np.where(np.sum(bot_region, axis=1) > w * 0.3)[0]
+            if len(hp):
+                crop_bottom = int(h * 0.7) + hp[0] - 10
+        return img[:crop_bottom, :crop_right].copy()
+
+    # ══════════════════════════════════════════════════════════════════════════
+    # Stage 2 — Remove colors  (original — GPU via CuPy when available)
+    # ══════════════════════════════════════════════════════════════════════════
+    def _remove_colors(self, img: np.ndarray) -> np.ndarray:
+        if _GPU:
+            g_img = _to_gpu(img.astype(np.int32))
+            b, gch, r = g_img[:,:,0], g_img[:,:,1], g_img[:,:,2]
+            gray = (0.114*b + 0.587*gch + 0.299*r)
+            chroma = cp.maximum(cp.maximum(r,gch),b) - cp.minimum(cp.minimum(r,gch),b)
+            erase = (chroma > 15) & (gray < 240)
+            result = _to_gpu(img.copy())
+            result[erase] = cp.array([255,255,255], dtype=cp.uint8)
+            return _to_cpu(result)
+        else:
+            b = img[:,:,0].astype(np.int32)
+            g = img[:,:,1].astype(np.int32)
+            r = img[:,:,2].astype(np.int32)
+            gray   = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY).astype(np.int32)
+            chroma = np.maximum(np.maximum(r,g),b) - np.minimum(np.minimum(r,g),b)
+            erase  = (chroma > 15) & (gray < 240)
+            result = img.copy()
+            result[erase] = (255, 255, 255)
+            return result
 
     # ══════════════════════════════════════════════════════════════════════════
-    # STAGE 3 — Door arcs
+    # Stage 3 — Close door arcs  (original)
     # ══════════════════════════════════════════════════════════════════════════
     def _close_door_arcs(self, img: np.ndarray) -> np.ndarray:
         gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-        h,w  = gray.shape
+        h, w = gray.shape
         result = img.copy()
-        _,binary = cv2.threshold(gray,0,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)
-        binary   = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, np.ones((3,3),np.uint8))
-        blurred  = _cuda_gaussian(gray,(7,7),1.5)
-        raw = cv2.HoughCircles(blurred,cv2.HOUGH_GRADIENT,dp=1.2,minDist=50,
-                                param1=50,param2=22,minRadius=60,maxRadius=320)
-        if raw is None: return result
+        _, binary = cv2.threshold(gray, 0, 255,
+                                   cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
+        binary = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, np.ones((3,3), np.uint8))
+        blurred = cv2.GaussianBlur(gray, (7,7), 1.5)
+        raw = cv2.HoughCircles(blurred, cv2.HOUGH_GRADIENT,
+                                dp=1.2, minDist=50, param1=50, param2=22,
+                                minRadius=self.DOOR_ARC_MIN_RADIUS,
+                                maxRadius=self.DOOR_ARC_MAX_RADIUS)
+        if raw is None:
+            return result
         circles = np.round(raw[0]).astype(np.int32)
-        for cx,cy,r in circles:
-            angles = np.linspace(0,2*np.pi,360,endpoint=False)
-            xs = np.clip((cx+r*np.cos(angles)).astype(np.int32),0,w-1)
-            ys = np.clip((cy+r*np.sin(angles)).astype(np.int32),0,h-1)
-            on_wall = binary[ys,xs]>0
-            if not np.any(on_wall): continue
-            occ  = angles[on_wall]
-            span = float(np.degrees(occ[-1]-occ[0]))
-            if not (60<=span<=115): continue
-            lr = r*0.92
-            la = np.linspace(0,2*np.pi,max(60,int(r)),endpoint=False)
-            lx = np.clip((cx+lr*np.cos(la)).astype(np.int32),0,w-1)
-            ly = np.clip((cy+lr*np.sin(la)).astype(np.int32),0,h-1)
-            if float(np.mean(binary[ly,lx]>0))<0.35: continue
+        for cx, cy, r in circles:
+            angles = np.linspace(0, 2*np.pi, 360, endpoint=False)
+            xs = np.clip((cx + r*np.cos(angles)).astype(np.int32), 0, w-1)
+            ys = np.clip((cy + r*np.sin(angles)).astype(np.int32), 0, h-1)
+            on_wall = binary[ys, xs] > 0
+            if not np.any(on_wall):
+                continue
+            occ = angles[on_wall]
+            span = float(np.degrees(occ[-1] - occ[0]))
+            if not (60 <= span <= 115):
+                continue
+            leaf_r = r * 0.92
+            n_pts  = max(60, int(r))
+            la = np.linspace(0, 2*np.pi, n_pts, endpoint=False)
+            lx = np.clip((cx + leaf_r*np.cos(la)).astype(np.int32), 0, w-1)
+            ly = np.clip((cy + leaf_r*np.sin(la)).astype(np.int32), 0, h-1)
+            if float(np.mean(binary[ly, lx] > 0)) < 0.35:
+                continue
+            gap_thresh = np.radians(25.0)
             diffs = np.diff(occ)
-            big   = np.where(diffs>np.radians(25))[0]
-            if len(big):
-                idx    = big[np.argmax(diffs[big])]
-                start_a,end_a = occ[idx+1],occ[idx]
+            big   = np.where(diffs > gap_thresh)[0]
+            if len(big) == 0:
+                start_a, end_a = occ[0], occ[-1]
             else:
-                start_a,end_a = occ[0],occ[-1]
-            ep1=(np.clip(int(round(cx+r*np.cos(start_a))),0,w-1),
-                 np.clip(int(round(cy+r*np.sin(start_a))),0,h-1))
-            ep2=(np.clip(int(round(cx+r*np.cos(end_a))),0,w-1),
-                 np.clip(int(round(cy+r*np.sin(end_a))),0,h-1))
-            cv2.line(result,ep1,ep2,(0,0,0),3)
+                split = big[np.argmax(diffs[big])]
+                start_a, end_a = occ[split+1], occ[split]
+            ep1 = (int(round(cx + r*np.cos(start_a))),
+                   int(round(cy + r*np.sin(start_a))))
+            ep2 = (int(round(cx + r*np.cos(end_a))),
+                   int(round(cy + r*np.sin(end_a))))
+            ep1 = (np.clip(ep1[0],0,w-1), np.clip(ep1[1],0,h-1))
+            ep2 = (np.clip(ep2[0],0,w-1), np.clip(ep2[1],0,h-1))
+            cv2.line(result, ep1, ep2, (0,0,0), 3)
         return result
 
     # ══════════════════════════════════════════════════════════════════════════
-    # STAGE 4 — Wall extraction  (CUDA morphology)
+    # Stage 4 — Extract walls  (original)
     # ══════════════════════════════════════════════════════════════════════════
     def _extract_walls(self, img: np.ndarray) -> np.ndarray:
         gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-        h,w  = gray.shape
-        otsu,_ = cv2.threshold(gray,0,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)
-        brt    = float(np.mean(gray))
-        thr    = (max(200,int(otsu*1.1)) if brt>220 else
-                  max(150,int(otsu*0.9)) if brt<180 else int(otsu))
-        _,binary = _cuda_threshold(gray,thr,255,cv2.THRESH_BINARY_INV)
-        binary   = binary.astype(np.uint8)
-        min_line = max(8, int(0.012*w))
-        body     = self._estimate_wall_thickness(binary)
-        body     = int(np.clip(body,9,30))
-        self._wall_thickness = body
-        kh = cv2.getStructuringElement(cv2.MORPH_RECT,(min_line,1))
-        kv = cv2.getStructuringElement(cv2.MORPH_RECT,(1,min_line))
-        long_h = _cuda_morphology(binary, cv2.MORPH_OPEN, kh)
-        long_v = _cuda_morphology(binary, cv2.MORPH_OPEN, kv)
-        orig   = cv2.bitwise_or(long_h,long_v)
-        kbh = cv2.getStructuringElement(cv2.MORPH_RECT,(1,body))
-        kbv = cv2.getStructuringElement(cv2.MORPH_RECT,(body,1))
-        dh  = _cuda_dilate(long_h,kbh); dv = _cuda_dilate(long_v,kbv)
-        walls = cv2.bitwise_or(dh,dv)
-        coll  = cv2.bitwise_and(dh,dv)
-        safe  = cv2.bitwise_and(coll,orig)
-        walls = cv2.bitwise_or(cv2.bitwise_and(walls,cv2.bitwise_not(coll)),safe)
-        dist  = cv2.distanceTransform(cv2.bitwise_not(orig),cv2.DIST_L2,5)
-        keep  = (dist<=body/2).astype(np.uint8)*255
-        walls = cv2.bitwise_and(walls,keep)
-        walls = self._thin_line_filter(walls,body)
-        n,labels,stats,_ = cv2.connectedComponentsWithStats(walls,8)
-        if n>1:
-            areas = stats[1:,cv2.CC_STAT_AREA]
-            mn    = max(20,int(np.median(areas)*0.0001))
-            lut   = np.zeros(n,np.uint8); lut[1:]=(areas>=mn).astype(np.uint8)
-            walls = (lut[labels]*255).astype(np.uint8)
+        h, w = gray.shape
+
+        otsu_val, _ = cv2.threshold(gray, 0, 255,
+                                     cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
+        brightness = float(np.mean(gray))
+        if brightness > 220:
+            thr = max(200, int(otsu_val * 1.1))
+        elif brightness < 180:
+            thr = max(150, int(otsu_val * 0.9))
+        else:
+            thr = int(otsu_val)
+
+        _, binary = cv2.threshold(gray, thr, 255, cv2.THRESH_BINARY_INV)
+
+        min_line       = max(8, int(0.012 * w))
+        body_thickness = self._estimate_wall_thickness(binary)
+        body_thickness = int(np.clip(body_thickness, 9, 30))
+        self._wall_thickness = body_thickness
+
+        k_h = cv2.getStructuringElement(cv2.MORPH_RECT, (min_line, 1))
+        k_v = cv2.getStructuringElement(cv2.MORPH_RECT, (1, min_line))
+
+        if _GPU:
+            # GPU morphology via cupy — simulate with erosion+dilation
+            long_h = _to_cpu(cp.asarray(
+                cv2.morphologyEx(binary, cv2.MORPH_OPEN, k_h)))
+            long_v = _to_cpu(cp.asarray(
+                cv2.morphologyEx(binary, cv2.MORPH_OPEN, k_v)))
+        else:
+            long_h = cv2.morphologyEx(binary, cv2.MORPH_OPEN, k_h)
+            long_v = cv2.morphologyEx(binary, cv2.MORPH_OPEN, k_v)
+
+        orig_walls = cv2.bitwise_or(long_h, long_v)
+        k_bh = cv2.getStructuringElement(cv2.MORPH_RECT, (1, body_thickness))
+        k_bv = cv2.getStructuringElement(cv2.MORPH_RECT, (body_thickness, 1))
+        dilated_h  = cv2.dilate(long_h, k_bh)
+        dilated_v  = cv2.dilate(long_v, k_bv)
+        walls      = cv2.bitwise_or(dilated_h, dilated_v)
+        collision  = cv2.bitwise_and(dilated_h, dilated_v)
+        safe_zone  = cv2.bitwise_and(collision, orig_walls)
+        walls      = cv2.bitwise_or(
+            cv2.bitwise_and(walls, cv2.bitwise_not(collision)), safe_zone)
+        dist       = cv2.distanceTransform(
+            cv2.bitwise_not(orig_walls), cv2.DIST_L2, 5)
+        keep_mask  = (dist <= (body_thickness / 2)).astype(np.uint8) * 255
+        walls      = cv2.bitwise_and(walls, keep_mask)
+        walls      = self._thin_line_filter(walls, body_thickness)
+        n, labels, stats, _ = cv2.connectedComponentsWithStats(walls, connectivity=8)
+        if n > 1:
+            areas     = stats[1:, cv2.CC_STAT_AREA]
+            min_noise = max(20, int(np.median(areas) * 0.0001))
+            lut       = np.zeros(n, np.uint8)
+            lut[1:]   = (areas >= min_noise).astype(np.uint8)
+            walls     = (lut[labels] * 255).astype(np.uint8)
         return walls
 
-    def _estimate_wall_thickness(self, binary: np.ndarray, fallback: int=12) -> int:
-        h,w = binary.shape
-        ci  = np.linspace(0,w-1,min(200,w),dtype=int)
-        max_run = max(2,int(h*0.05))
-        runs = []
-        for c in ci:
-            col = (binary[:,c]>0).astype(np.int8)
-            pad = np.concatenate([[0],col,[0]])
+    def _estimate_wall_thickness(self, binary: np.ndarray, fallback: int = 12) -> int:
+        h, w    = binary.shape
+        n_cols  = min(200, w)
+        col_idx = np.linspace(0, w-1, n_cols, dtype=int)
+        runs    = []
+        max_run = max(2, int(h * 0.05))
+        for ci in col_idx:
+            col = (binary[:, ci] > 0).astype(np.int8)
+            pad = np.concatenate([[0], col, [0]])
             d   = np.diff(pad.astype(np.int16))
-            s   = np.where(d==1)[0]; e = np.where(d==-1)[0]
-            n_  = min(len(s),len(e))
-            r   = (e[:n_]-s[:n_]).astype(int)
-            runs.extend(r[(r>=2)&(r<=max_run)].tolist())
-        return int(np.median(runs)) if runs else fallback
+            s   = np.where(d ==  1)[0]
+            e   = np.where(d == -1)[0]
+            n_  = min(len(s), len(e))
+            r   = (e[:n_] - s[:n_]).astype(int)
+            runs.extend(r[(r >= 2) & (r <= max_run)].tolist())
+        if runs:
+            return int(np.median(runs))
+        return fallback
 
     def _thin_line_filter(self, walls: np.ndarray, min_thickness: int) -> np.ndarray:
-        dist  = cv2.distanceTransform(walls,cv2.DIST_L2,5)
-        thick = dist>=(min_thickness/2)
-        n,labels,_,_ = cv2.connectedComponentsWithStats(walls,8)
-        if n<=1: return walls
-        tl = labels[thick]
-        if not len(tl): return np.zeros_like(walls)
-        has = np.zeros(n,bool); has[tl]=True
-        lut = has.astype(np.uint8)*255; lut[0]=0
+        dist       = cv2.distanceTransform(walls, cv2.DIST_L2, 5)
+        thick_mask = dist >= (min_thickness / 2)
+        n, labels, _, _ = cv2.connectedComponentsWithStats(walls, connectivity=8)
+        if n <= 1:
+            return walls
+        thick_labels = labels[thick_mask]
+        if len(thick_labels) == 0:
+            return np.zeros_like(walls)
+        has_thick = np.zeros(n, dtype=bool)
+        has_thick[thick_labels] = True
+        lut    = has_thick.astype(np.uint8) * 255
+        lut[0] = 0
         return lut[labels]
 
     # ══════════════════════════════════════════════════════════════════════════
-    # STAGE 5b — Fixtures  (CUDA Gaussian on heatmap)
+    # Stage 5b — Remove fixture symbols  (original)
     # ══════════════════════════════════════════════════════════════════════════
     def _remove_fixtures(self, walls: np.ndarray) -> np.ndarray:
-        h,w = walls.shape
-        n,labels,stats,centroids = cv2.connectedComponentsWithStats(walls,8)
-        if n<=1: return walls
-        bw  = stats[1:,cv2.CC_STAT_WIDTH].astype(np.float32)
-        bh  = stats[1:,cv2.CC_STAT_HEIGHT].astype(np.float32)
-        ar  = stats[1:,cv2.CC_STAT_AREA].astype(np.float32)
-        cx  = np.round(centroids[1:,0]).astype(np.int32)
-        cy  = np.round(centroids[1:,1]).astype(np.int32)
-        asp = np.maximum(bw,bh)/(np.minimum(bw,bh)+1e-6)
-        cand= ((bw<self.FIXTURE_MAX_BLOB_DIM)&(bh<self.FIXTURE_MAX_BLOB_DIM)
-               &(ar<self.FIXTURE_MAX_AREA)&(asp<=self.FIXTURE_MAX_ASPECT))
-        ci  = np.where(cand)[0]
-        if not len(ci): return walls
-        heatmap = np.zeros((h,w),np.float32)
-        rh = int(self.FIXTURE_DENSITY_RADIUS)
-        for px,py in zip(cx[ci].tolist(),cy[ci].tolist()):
-            cv2.circle(heatmap,(px,py),rh,1.0,-1)
-        bk = max(3,(rh//2)|1)
-        density = _cuda_gaussian(heatmap,(bk*4+1,bk*4+1),float(bk))
-        dm = float(density.max())
-        if dm>0: density/=dm
-        zone = (density>=self.FIXTURE_DENSITY_THRESHOLD).astype(np.uint8)*255
-        nz,zlbl,zs,_ = cv2.connectedComponentsWithStats(zone)
+        h, w = walls.shape
+        n, labels, stats, centroids = cv2.connectedComponentsWithStats(
+            walls, connectivity=8)
+        if n <= 1:
+            return walls
+        bw   = stats[1:, cv2.CC_STAT_WIDTH].astype(np.float32)
+        bh   = stats[1:, cv2.CC_STAT_HEIGHT].astype(np.float32)
+        ar   = stats[1:, cv2.CC_STAT_AREA].astype(np.float32)
+        cx   = np.round(centroids[1:, 0]).astype(np.int32)
+        cy   = np.round(centroids[1:, 1]).astype(np.int32)
+        maxs = np.maximum(bw, bh)
+        mins = np.minimum(bw, bh)
+        asp  = maxs / (mins + 1e-6)
+        cand = ((bw < self.FIXTURE_MAX_BLOB_DIM) & (bh < self.FIXTURE_MAX_BLOB_DIM)
+                & (ar < self.FIXTURE_MAX_AREA) & (asp <= self.FIXTURE_MAX_ASPECT))
+        ci   = np.where(cand)[0]
+        if len(ci) == 0:
+            return walls
+        heatmap = np.zeros((h, w), dtype=np.float32)
+        r_heat  = int(self.FIXTURE_DENSITY_RADIUS)
+        for px, py in zip(cx[ci].tolist(), cy[ci].tolist()):
+            cv2.circle(heatmap, (px, py), r_heat, 1.0, -1)
+        blur_k  = max(3, (r_heat // 2) | 1)
+        density = cv2.GaussianBlur(heatmap, (blur_k*4+1, blur_k*4+1), blur_k)
+        d_max   = float(density.max())
+        if d_max > 0:
+            density /= d_max
+        zone = (density >= self.FIXTURE_DENSITY_THRESHOLD).astype(np.uint8) * 255
+        n_z, z_labels, z_stats, _ = cv2.connectedComponentsWithStats(zone)
         clean = np.zeros_like(zone)
-        if nz>1:
-            za = zs[1:,cv2.CC_STAT_AREA]
-            kz = np.where(za>=self.FIXTURE_MIN_ZONE_AREA)[0]+1
+        if n_z > 1:
+            za = z_stats[1:, cv2.CC_STAT_AREA]
+            kz = np.where(za >= self.FIXTURE_MIN_ZONE_AREA)[0] + 1
             if len(kz):
-                lut=np.zeros(nz,np.uint8); lut[kz]=255; clean=lut[zlbl]
-        zone = clean
-        valid  = (cy[ci]>=0)&(cy[ci]<h)&(cx[ci]>=0)&(cx[ci]<w)
-        in_z   = valid&(zone[cy[ci].clip(0,h-1),cx[ci].clip(0,w-1)]>0)
-        erase  = ci[in_z]+1
-        result = walls.copy()
-        if len(erase):
-            lut=np.zeros(n,np.uint8); lut[erase]=1
-            result[lut[labels].astype(bool)]=0
+                lut     = np.zeros(n_z, np.uint8)
+                lut[kz] = 255
+                clean   = lut[z_labels]
+        zone     = clean
+        valid    = (cy[ci].clip(0,h-1) >= 0) & (cx[ci].clip(0,w-1) >= 0)
+        in_zone  = valid & (zone[cy[ci].clip(0,h-1), cx[ci].clip(0,w-1)] > 0)
+        erase_ids= ci[in_zone] + 1
+        result   = walls.copy()
+        if len(erase_ids):
+            lut = np.zeros(n, np.uint8)
+            lut[erase_ids] = 1
+            result[(lut[labels]).astype(bool)] = 0
         return result
 
     # ══════════════════════════════════════════════════════════════════════════
-    # STAGE 5c — Calibrate  (CuPy gap analysis)
+    # Stage 5c — Calibrate wall + remove thin lines  (original)
     # ══════════════════════════════════════════════════════════════════════════
     def _calibrate_wall(self, mask: np.ndarray) -> WallCalibration:
-        cal = WallCalibration()
-        h,w = mask.shape
-        ci  = np.linspace(0,w-1,min(200,w),dtype=int)
-        mr  = max(2,int(h*0.05))
-        runs = []
-        for c in ci:
-            col = (mask[:,c]>0).astype(np.int8)
-            pad = np.concatenate([[0],col,[0]])
+        cal   = WallCalibration()
+        h, w  = mask.shape
+        n_cols  = min(200, w)
+        col_idx = np.linspace(0, w-1, n_cols, dtype=int)
+        runs    = []
+        max_run = max(2, int(h * 0.05))
+        for ci in col_idx:
+            col = (mask[:, ci] > 0).astype(np.int8)
+            pad = np.concatenate([[0], col, [0]])
             d   = np.diff(pad.astype(np.int16))
-            s   = np.where(d==1)[0]; e=np.where(d==-1)[0]
-            n_  = min(len(s),len(e))
-            r   = (e[:n_]-s[:n_]).astype(int)
-            runs.extend(r[(r>=1)&(r<=mr)].tolist())
+            s   = np.where(d ==  1)[0]
+            e   = np.where(d == -1)[0]
+            n_  = min(len(s), len(e))
+            r   = (e[:n_] - s[:n_]).astype(int)
+            runs.extend(r[(r >= 1) & (r <= max_run)].tolist())
         if runs:
-            arr  = np.array(runs,np.int32)
-            hist = np.bincount(np.clip(arr,0,200))
-            cal.stroke_width = max(2,int(np.argmax(hist[1:]))+1)
-        cal.min_component_dim  = max(15,cal.stroke_width*10)
-        cal.min_component_area = max(30,cal.stroke_width*cal.min_component_dim//2)
-        gap_sizes = _cupy_gap_analysis(mask)
+            arr  = np.array(runs, np.int32)
+            hist = np.bincount(np.clip(arr, 0, 200))
+            cal.stroke_width = max(2, int(np.argmax(hist[1:])) + 1)
+        cal.min_component_dim  = max(15, cal.stroke_width * 10)
+        cal.min_component_area = max(30, cal.stroke_width * cal.min_component_dim // 2)
+
+        gap_sizes = []
+        row_step  = max(3, h // 200)
+        col_step  = max(3, w // 200)
+        for row in range(5, h-5, row_step):
+            rd  = (mask[row, :] > 0).astype(np.int8)
+            pad = np.concatenate([[0], rd, [0]])
+            dif = np.diff(pad.astype(np.int16))
+            ends   = np.where(dif == -1)[0]
+            starts = np.where(dif ==  1)[0]
+            for e in ends:
+                nxt = starts[starts > e]
+                if len(nxt):
+                    g = int(nxt[0] - e)
+                    if 1 < g < 200:
+                        gap_sizes.append(g)
+        for col in range(5, w-5, col_step):
+            cd  = (mask[:, col] > 0).astype(np.int8)
+            pad = np.concatenate([[0], cd, [0]])
+            dif = np.diff(pad.astype(np.int16))
+            ends   = np.where(dif == -1)[0]
+            starts = np.where(dif ==  1)[0]
+            for e in ends:
+                nxt = starts[starts > e]
+                if len(nxt):
+                    g = int(nxt[0] - e)
+                    if 1 < g < 200:
+                        gap_sizes.append(g)
+
         cal.bridge_min_gap = 2
-        if len(gap_sizes)>=20:
+        if len(gap_sizes) >= 20:
             g  = np.array(gap_sizes)
-            sm = g[g<=30]
-            cal.bridge_max_gap = (int(np.clip(np.percentile(sm,75),4,20))
-                                   if len(sm)>=10 else cal.stroke_width*4)
-            door = g[(g>cal.bridge_max_gap)&(g<=80)]
-            raw  = int(np.percentile(door,90)) if len(door)>=5 else max(35,cal.stroke_width*12)
-            raw  = int(np.clip(raw,25,80))
-            cal.door_gap = raw if raw%2==1 else raw+1
-        cal.max_bridge_thick  = cal.stroke_width*5
+            sm = g[g <= 30]
+            if len(sm) >= 10:
+                cal.bridge_max_gap = int(np.clip(np.percentile(sm, 75), 4, 20))
+            else:
+                cal.bridge_max_gap = cal.stroke_width * 4
+            door = g[(g > cal.bridge_max_gap) & (g <= 80)]
+            if len(door) >= 5:
+                raw = int(np.percentile(door, 90))
+            else:
+                raw = max(35, cal.stroke_width * 12)
+            raw = int(np.clip(raw, 25, 80))
+            cal.door_gap = raw if raw % 2 == 1 else raw + 1
+        cal.max_bridge_thick = cal.stroke_width * 5
         self._wall_thickness  = cal.stroke_width
         return cal
 
     def _remove_thin_lines_calibrated(self, walls: np.ndarray) -> np.ndarray:
         cal = self._wall_cal
-        n,cc,stats,_ = cv2.connectedComponentsWithStats(walls,8)
-        if n<=1: return walls
-        mx   = np.maximum(stats[1:,cv2.CC_STAT_WIDTH],stats[1:,cv2.CC_STAT_HEIGHT])
-        ar   = stats[1:,cv2.CC_STAT_AREA]
-        keep = (mx>=cal.min_component_dim)|(ar>=cal.min_component_area*3)
-        lut  = np.zeros(n,np.uint8); lut[1:]=keep.astype(np.uint8)*255
+        n, cc, stats, _ = cv2.connectedComponentsWithStats(walls, connectivity=8)
+        if n <= 1:
+            return walls
+        bw   = stats[1:, cv2.CC_STAT_WIDTH]
+        bh   = stats[1:, cv2.CC_STAT_HEIGHT]
+        ar   = stats[1:, cv2.CC_STAT_AREA]
+        mx   = np.maximum(bw, bh)
+        keep = (mx >= cal.min_component_dim) | (ar >= cal.min_component_area * 3)
+        lut  = np.zeros(n, np.uint8)
+        lut[1:] = keep.astype(np.uint8) * 255
         return lut[cc]
 
     # ══════════════════════════════════════════════════════════════════════════
-    # Skeleton helpers  (CuPy-accelerated morphological skeleton)
+    # Stage 5d — Bridge endpoints  (original)
     # ══════════════════════════════════════════════════════════════════════════
     def _skel(self, binary: np.ndarray) -> np.ndarray:
         if _SKIMAGE:
-            return (_sk_skel(binary>0)*255).astype(np.uint8)
-        if _CUPY:
-            return self._cupy_skel(binary)
+            return (_sk_skel(binary > 0) * 255).astype(np.uint8)
         return self._morphological_skeleton(binary)
 
-    def _cupy_skel(self, binary: np.ndarray) -> np.ndarray:
-        try:
-            g  = cp.asarray(binary>0, dtype=cp.uint8)
-            sk = cp.zeros_like(g)
-            cr = cp.ones((3,3), dtype=cp.uint8)
-            for _ in range(300):
-                er = cpnd.binary_erosion(g, cr).astype(cp.uint8)
-                op = cpnd.binary_dilation(er, cr).astype(cp.uint8)
-                t  = cp.maximum(g-op, 0)
-                sk = cp.maximum(sk, t)
-                g  = er
-                if not int(cp.any(g)): break
-            return (cp.asnumpy(sk)*255).astype(np.uint8)
-        except Exception:
-            return self._morphological_skeleton(binary)
-
     def _morphological_skeleton(self, binary: np.ndarray) -> np.ndarray:
         skel  = np.zeros_like(binary)
         img   = binary.copy()
-        cross = cv2.getStructuringElement(cv2.MORPH_CROSS,(3,3))
+        cross = cv2.getStructuringElement(cv2.MORPH_CROSS, (3,3))
         for _ in range(300):
-            er   = cv2.erode(img,cross)
-            temp = cv2.subtract(img,cv2.dilate(er,cross))
-            skel = cv2.bitwise_or(skel,temp)
-            img  = er
-            if not cv2.countNonZero(img): break
+            eroded = cv2.erode(img, cross)
+            temp   = cv2.subtract(img, cv2.dilate(eroded, cross))
+            skel   = cv2.bitwise_or(skel, temp)
+            img    = eroded
+            if not cv2.countNonZero(img):
+                break
         return skel
 
     def _tip_pixels(self, skel: np.ndarray):
-        sb  = (skel>0).astype(np.float32)
-        nbr = cv2.filter2D(sb,-1,np.ones((3,3),np.float32),
+        sb  = (skel > 0).astype(np.float32)
+        nbr = cv2.filter2D(sb, -1, np.ones((3,3), np.float32),
                             borderType=cv2.BORDER_CONSTANT)
-        return np.where((sb==1)&(nbr.astype(np.int32)==2))
+        return np.where((sb == 1) & (nbr.astype(np.int32) == 2))
 
     def _outward_vectors(self, ex, ey, skel, lookahead):
-        n = len(ex)
-        odx = np.zeros(n,np.float32); ody = np.zeros(n,np.float32)
-        sy,sx    = np.where(skel>0)
-        skel_set = set(zip(sx.tolist(),sy.tolist()))
+        n    = len(ex)
+        odx  = np.zeros(n, np.float32)
+        ody  = np.zeros(n, np.float32)
+        sy, sx   = np.where(skel > 0)
+        skel_set = set(zip(sx.tolist(), sy.tolist()))
         D8 = [(-1,0),(1,0),(0,-1),(0,1),(-1,-1),(-1,1),(1,-1),(1,1)]
         for i in range(n):
-            ox,oy = int(ex[i]),int(ey[i]); cx,cy=ox,oy; px,py=ox,oy
+            ox, oy = int(ex[i]), int(ey[i])
+            cx, cy = ox, oy
+            px, py = ox, oy
             for _ in range(lookahead):
-                moved=False
-                for dx,dy in D8:
-                    nx_,ny_=cx+dx,cy+dy
-                    if (nx_,ny_)==(px,py): continue
-                    if (nx_,ny_) in skel_set:
-                        px,py=cx,cy; cx,cy=nx_,ny_; moved=True; break
-                if not moved: break
-            ix,iy=float(cx-ox),float(cy-oy)
-            nr=max(1e-6,float(np.hypot(ix,iy)))
-            odx[i],ody[i]=-ix/nr,-iy/nr
-        return odx,ody
+                moved = False
+                for dx, dy in D8:
+                    nx_, ny_ = cx+dx, cy+dy
+                    if (nx_, ny_) == (px, py):
+                        continue
+                    if (nx_, ny_) in skel_set:
+                        px, py = cx, cy
+                        cx, cy = nx_, ny_
+                        moved  = True
+                        break
+                if not moved:
+                    break
+            ix, iy = float(cx-ox), float(cy-oy)
+            nr = max(1e-6, np.hypot(ix, iy))
+            odx[i], ody[i] = -ix/nr, -iy/nr
+        return odx, ody
 
-    # ══════════════════════════════════════════════════════════════════════════
-    # STAGE 5d — Bridge endpoints
-    # ══════════════════════════════════════════════════════════════════════════
     def _bridge_endpoints(self, walls: np.ndarray) -> np.ndarray:
-        cal   = self._wall_cal; result=walls.copy(); h,w=walls.shape
-        FCOS  = np.cos(np.radians(70.0))
-        skel  = self._skel(walls); ey,ex=self._tip_pixels(skel); n_ep=len(ey)
-        if n_ep<2: return result
-        _,cc_map = cv2.connectedComponents(walls,connectivity=8)
-        ep_cc = cc_map[ey,ex]
-        odx,ody = self._outward_vectors(ex,ey,skel,max(8,cal.stroke_width*3))
-        pts = np.stack([ex,ey],axis=1).astype(np.float32)
+        cal    = self._wall_cal
+        result = walls.copy()
+        h, w   = walls.shape
+        FCOS   = np.cos(np.radians(70.0))
+        skel   = self._skel(walls)
+        ey, ex = self._tip_pixels(skel)
+        n_ep   = len(ey)
+        if n_ep < 2:
+            return result
+        _, cc_map = cv2.connectedComponents(walls, connectivity=8)
+        ep_cc     = cc_map[ey, ex]
+        lookahead = max(8, cal.stroke_width * 3)
+        out_dx, out_dy = self._outward_vectors(ex, ey, skel, lookahead)
+        pts = np.stack([ex, ey], axis=1).astype(np.float32)
         if _SCIPY:
-            pairs=cKDTree(pts).query_pairs(float(cal.bridge_max_gap),output_type='ndarray')
-            ii,jj=pairs[:,0].astype(np.int64),pairs[:,1].astype(np.int64)
+            pairs = cKDTree(pts).query_pairs(float(cal.bridge_max_gap), output_type='ndarray')
+            ii    = pairs[:,0].astype(np.int64)
+            jj    = pairs[:,1].astype(np.int64)
         else:
-            _ii,_jj=np.triu_indices(n_ep,k=1)
-            ok=np.hypot(pts[_jj,0]-pts[_ii,0],pts[_jj,1]-pts[_ii,1])<=cal.bridge_max_gap
-            ii,jj=_ii[ok].astype(np.int64),_jj[ok].astype(np.int64)
-        if not len(ii): return result
-        dxij=pts[jj,0]-pts[ii,0]; dyij=pts[jj,1]-pts[ii,1]
-        dists=np.hypot(dxij,dyij); safe=np.maximum(dists,1e-6)
-        ux,uy=dxij/safe,dyij/safe
-        ang=np.degrees(np.arctan2(np.abs(dyij),np.abs(dxij)))
-        is_H=ang<=15.0; is_V=ang>=75.0
-        g1=(dists>=cal.bridge_min_gap)&(dists<=cal.bridge_max_gap)
-        g2=is_H|is_V
-        g3=((odx[ii]*ux+ody[ii]*uy)>=FCOS)&((odx[jj]*-ux+ody[jj]*-uy)>=FCOS)
-        g4=ep_cc[ii]!=ep_cc[jj]
-        pre=np.where(g1&g2&g3&g4)[0]
-        clr=np.ones(len(pre),bool)
-        for k,pidx in enumerate(pre):
-            ia,ib=int(ii[pidx]),int(jj[pidx])
-            ax,ay=int(ex[ia]),int(ey[ia]); bx,by=int(ex[ib]),int(ey[ib])
-            if is_H[pidx]: xs=np.linspace(ax,bx,9,np.float32); ys=np.full(9,ay,np.float32)
-            else:           xs=np.full(9,ax,np.float32); ys=np.linspace(ay,by,9,np.float32)
-            sxs=np.clip(np.round(xs[1:-1]).astype(np.int32),0,w-1)
-            sys_=np.clip(np.round(ys[1:-1]).astype(np.int32),0,h-1)
-            if np.any(walls[sys_,sxs]>0): clr[k]=False
-        valid=pre[clr]
-        if not len(valid): return result
-        vi,vj=ii[valid],jj[valid]; vd,vH=dists[valid],is_H[valid]
-        ord_=np.argsort(vd); vi,vj,vd,vH=vi[ord_],vj[ord_],vd[ord_],vH[ord_]
-        used=np.zeros(n_ep,bool)
+            _ii, _jj = np.triu_indices(n_ep, k=1)
+            ok = np.hypot(pts[_jj,0]-pts[_ii,0], pts[_jj,1]-pts[_ii,1]) <= cal.bridge_max_gap
+            ii = _ii[ok].astype(np.int64)
+            jj = _jj[ok].astype(np.int64)
+        if len(ii) == 0:
+            return result
+        dxij  = pts[jj,0] - pts[ii,0]
+        dyij  = pts[jj,1] - pts[ii,1]
+        dists = np.hypot(dxij, dyij)
+        safe  = np.maximum(dists, 1e-6)
+        ux, uy = dxij/safe, dyij/safe
+        ang  = np.degrees(np.arctan2(np.abs(dyij), np.abs(dxij)))
+        is_H = ang <= 15.0
+        is_V = ang >= 75.0
+        g1 = (dists >= cal.bridge_min_gap) & (dists <= cal.bridge_max_gap)
+        g2 = is_H | is_V
+        g3 = ((out_dx[ii]*ux  + out_dy[ii]*uy)  >= FCOS) & \
+             ((out_dx[jj]*-ux + out_dy[jj]*-uy) >= FCOS)
+        g4 = ep_cc[ii] != ep_cc[jj]
+        pre_ok  = g1 & g2 & g3 & g4
+        pre_idx = np.where(pre_ok)[0]
+        N_SAMP  = 9
+        clr     = np.ones(len(pre_idx), dtype=bool)
+        for k, pidx in enumerate(pre_idx):
+            ia, ib = int(ii[pidx]), int(jj[pidx])
+            ax, ay = int(ex[ia]), int(ey[ia])
+            bx, by = int(ex[ib]), int(ey[ib])
+            if is_H[pidx]:
+                xs = np.linspace(ax, bx, N_SAMP, np.float32)
+                ys = np.full(N_SAMP, ay, np.float32)
+            else:
+                xs = np.full(N_SAMP, ax, np.float32)
+                ys = np.linspace(ay, by, N_SAMP, np.float32)
+            sxs  = np.clip(np.round(xs[1:-1]).astype(np.int32), 0, w-1)
+            sys_ = np.clip(np.round(ys[1:-1]).astype(np.int32), 0, h-1)
+            if np.any(walls[sys_, sxs] > 0):
+                clr[k] = False
+        valid = pre_idx[clr]
+        if len(valid) == 0:
+            return result
+        vi = ii[valid]; vj = jj[valid]
+        vd = dists[valid]; vH = is_H[valid]
+        order = np.argsort(vd)
+        vi, vj, vd, vH = vi[order], vj[order], vd[order], vH[order]
+        used = np.zeros(n_ep, dtype=bool)
         for k in range(len(vi)):
-            ia,ib=int(vi[k]),int(vj[k])
-            if used[ia] or used[ib]: continue
-            ax,ay=int(ex[ia]),int(ey[ia]); bx,by=int(ex[ib]),int(ey[ib])
-            p1,p2=((min(ax,bx),ay),(max(ax,bx),ay)) if vH[k] else ((ax,min(ay,by)),(ax,max(ay,by)))
-            cv2.line(result,p1,p2,255,cal.stroke_width)
-            used[ia]=used[ib]=True
+            ia, ib = int(vi[k]), int(vj[k])
+            if used[ia] or used[ib]:
+                continue
+            ax, ay = int(ex[ia]), int(ey[ia])
+            bx, by = int(ex[ib]), int(ey[ib])
+            p1, p2 = ((min(ax,bx),ay),(max(ax,bx),ay)) if vH[k] \
+                else ((ax,min(ay,by)),(ax,max(ay,by)))
+            cv2.line(result, p1, p2, 255, cal.stroke_width)
+            used[ia] = used[ib] = True
         return result
 
     # ══════════════════════════════════════════════════════════════════════════
-    # STAGE 5e — Door opening close  (CUDA morphology)
+    # Stage 5e — Close door openings  (original)
     # ══════════════════════════════════════════════════════════════════════════
     def _close_door_openings(self, walls: np.ndarray) -> np.ndarray:
-        cal=self._wall_cal; gap=cal.door_gap
-        def _sc(mask,kwh,axis,mt):
-            k  =cv2.getStructuringElement(cv2.MORPH_RECT,kwh)
-            cls=_cuda_morphology(mask,cv2.MORPH_CLOSE,k)
-            new=cv2.bitwise_and(cls,cv2.bitwise_not(mask))
-            if not np.any(new): return np.zeros_like(mask)
-            n_,lbl,stats,_=cv2.connectedComponentsWithStats(new,8)
-            if n_<=1: return np.zeros_like(mask)
-            perp=stats[1:,cv2.CC_STAT_HEIGHT if axis=='H' else cv2.CC_STAT_WIDTH]
-            keep=perp<=mt; lut=np.zeros(n_,np.uint8); lut[1:]=keep.astype(np.uint8)*255
+        cal = self._wall_cal
+        gap = cal.door_gap
+
+        def _shape_close(mask, kwh, axis, max_thick):
+            k   = cv2.getStructuringElement(cv2.MORPH_RECT, kwh)
+            cls = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, k)
+            new = cv2.bitwise_and(cls, cv2.bitwise_not(mask))
+            if not np.any(new):
+                return np.zeros_like(mask)
+            n, lbl, stats, _ = cv2.connectedComponentsWithStats(new, connectivity=8)
+            if n <= 1:
+                return np.zeros_like(mask)
+            perp = stats[1:, cv2.CC_STAT_HEIGHT if axis == 'H' else cv2.CC_STAT_WIDTH]
+            keep = perp <= max_thick
+            lut  = np.zeros(n, np.uint8)
+            lut[1:] = keep.astype(np.uint8) * 255
             return lut[lbl]
-        ah=_sc(walls,(gap,1),'H',cal.max_bridge_thick)
-        av=_sc(walls,(1,gap),'V',cal.max_bridge_thick)
-        return cv2.bitwise_or(walls,cv2.bitwise_or(ah,av))
+
+        add_h = _shape_close(walls, (gap,1), 'H', cal.max_bridge_thick)
+        add_v = _shape_close(walls, (1,gap), 'V', cal.max_bridge_thick)
+        return cv2.bitwise_or(walls, cv2.bitwise_or(add_h, add_v))
 
     # ══════════════════════════════════════════════════════════════════════════
-    # STAGE 5f — Dangling lines
+    # Stage 5f — Remove dangling lines  (original)
     # ══════════════════════════════════════════════════════════════════════════
     def _remove_dangling(self, walls: np.ndarray) -> np.ndarray:
-        stroke=self._wall_cal.stroke_width if self._wall_cal else self._wall_thickness
-        cr=max(6,stroke*3)
-        n,cc_map,stats,_=cv2.connectedComponentsWithStats(walls,8)
-        if n<=1: return walls
-        skel=self._skel(walls); ty,tx=self._tip_pixels(skel); tc=cc_map[ty,tx]
-        free=np.zeros(n,np.int32)
-        for i in range(len(tx)): free[tc[i]]+=1
-        remove=np.zeros(n,bool)
-        kc=cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(cr*2+1,cr*2+1))
-        for cid in range(1,n):
-            if free[cid]<2: continue
-            if max(int(stats[cid,cv2.CC_STAT_WIDTH]),int(stats[cid,cv2.CC_STAT_HEIGHT]))>stroke*40: continue
-            comp=((cc_map==cid).astype(np.uint8))
-            dcomp=cv2.dilate(comp,kc)
-            ov=cv2.bitwise_and(dcomp,((walls>0)&(cc_map!=cid)).astype(np.uint8))
-            if not np.count_nonzero(ov): remove[cid]=True
-        lut=np.ones(n,np.uint8); lut[0]=0; lut[remove]=0
-        return (lut[cc_map]*255).astype(np.uint8)
+        stroke         = self._wall_cal.stroke_width if self._wall_cal else self._wall_thickness
+        connect_radius = max(6, stroke * 3)
+        n, cc_map, stats, _ = cv2.connectedComponentsWithStats(walls, connectivity=8)
+        if n <= 1:
+            return walls
+        skel        = self._skel(walls)
+        tip_y, tip_x = self._tip_pixels(skel)
+        tip_cc      = cc_map[tip_y, tip_x]
+        free_counts = np.zeros(n, np.int32)
+        for i in range(len(tip_x)):
+            free_counts[tip_cc[i]] += 1
+        remove = np.zeros(n, dtype=bool)
+        for cc_id in range(1, n):
+            if free_counts[cc_id] < 2:
+                continue
+            bw_ = int(stats[cc_id, cv2.CC_STAT_WIDTH])
+            bh_ = int(stats[cc_id, cv2.CC_STAT_HEIGHT])
+            if max(bw_, bh_) > stroke * 40:
+                continue
+            comp  = (cc_map == cc_id).astype(np.uint8)
+            dcomp = cv2.dilate(comp, cv2.getStructuringElement(
+                cv2.MORPH_ELLIPSE, (connect_radius*2+1, connect_radius*2+1)))
+            overlap = cv2.bitwise_and(
+                dcomp, ((walls > 0) & (cc_map != cc_id)).astype(np.uint8))
+            if np.count_nonzero(overlap) == 0:
+                remove[cc_id] = True
+        lut = np.ones(n, np.uint8); lut[0] = 0; lut[remove] = 0
+        return (lut[cc_map] * 255).astype(np.uint8)
 
     # ══════════════════════════════════════════════════════════════════════════
-    # STAGE 5g — Large door gap sealing
+    # Stage 5g — Close large gaps  (original)
     # ══════════════════════════════════════════════════════════════════════════
     def _close_large_gaps(self, walls: np.ndarray) -> np.ndarray:
-        DMIN,DMAX,ATOL=180,320,12.0
-        FCOS=np.cos(np.radians(90-ATOL))
-        stroke=self._wall_cal.stroke_width if self._wall_cal else self._wall_thickness
-        result=walls.copy(); h,w=walls.shape
-        skel=self._skel(walls); ty,tx=self._tip_pixels(skel); n_ep=len(tx)
-        if n_ep<2: return result
-        _,cc_map=cv2.connectedComponents(walls,connectivity=8); ep_cc=cc_map[ty,tx]
-        odx,ody=self._outward_vectors(tx,ty,skel,max(12,stroke*4))
-        pts=np.stack([tx,ty],axis=1).astype(np.float32)
+        DOOR_MIN_GAP  = 180
+        DOOR_MAX_GAP  = 320
+        ANGLE_TOL_DEG = 12.0
+        FCOS       = np.cos(np.radians(90.0 - ANGLE_TOL_DEG))
+        stroke     = self._wall_cal.stroke_width if self._wall_cal else self._wall_thickness
+        line_width = max(stroke, 3)
+        result     = walls.copy()
+        h, w       = walls.shape
+        skel       = self._skel(walls)
+        tip_y, tip_x = self._tip_pixels(skel)
+        n_ep = len(tip_x)
+        if n_ep < 2:
+            return result
+        _, cc_map = cv2.connectedComponents(walls, connectivity=8)
+        ep_cc     = cc_map[tip_y, tip_x]
+        lookahead = max(12, stroke * 4)
+        out_dx, out_dy = self._outward_vectors(tip_x, tip_y, skel, lookahead)
+        pts = np.stack([tip_x, tip_y], axis=1).astype(np.float32)
         if _SCIPY:
-            pairs=cKDTree(pts).query_pairs(float(DMAX),output_type='ndarray')
-            ii,jj=pairs[:,0].astype(np.int64),pairs[:,1].astype(np.int64)
+            pairs = cKDTree(pts).query_pairs(float(DOOR_MAX_GAP), output_type='ndarray')
+            ii    = pairs[:,0].astype(np.int64)
+            jj    = pairs[:,1].astype(np.int64)
         else:
-            _ii,_jj=np.triu_indices(n_ep,k=1)
-            ok=np.hypot(pts[_jj,0]-pts[_ii,0],pts[_jj,1]-pts[_ii,1])<=DMAX
-            ii,jj=_ii[ok].astype(np.int64),_jj[ok].astype(np.int64)
-        if not len(ii): return result
-        dxij=pts[jj,0]-pts[ii,0]; dyij=pts[jj,1]-pts[ii,1]
-        dists=np.hypot(dxij,dyij); safe=np.maximum(dists,1e-6)
-        ux,uy=dxij/safe,dyij/safe
-        ang=np.degrees(np.arctan2(np.abs(dyij),np.abs(dxij)))
-        is_H=ang<=ATOL; is_V=ang>=(90-ATOL)
-        g1=(dists>=DMIN)&(dists<=DMAX); g2=is_H|is_V
-        g3=((odx[ii]*ux+ody[ii]*uy)>=FCOS)&((odx[jj]*-ux+ody[jj]*-uy)>=FCOS)
-        g4=ep_cc[ii]!=ep_cc[jj]
-        pre=np.where(g1&g2&g3&g4)[0]
-        clr=np.ones(len(pre),bool)
-        for k,pidx in enumerate(pre):
-            ia,ib=int(ii[pidx]),int(jj[pidx])
-            ax,ay=int(tx[ia]),int(ty[ia]); bx,by=int(tx[ib]),int(ty[ib])
-            if is_H[pidx]: xs=np.linspace(ax,bx,15,np.float32); ys=np.full(15,(ay+by)/2,np.float32)
-            else:           xs=np.full(15,(ax+bx)/2,np.float32); ys=np.linspace(ay,by,15,np.float32)
-            sxs=np.clip(np.round(xs[1:-1]).astype(np.int32),0,w-1)
-            sys_=np.clip(np.round(ys[1:-1]).astype(np.int32),0,h-1)
-            if np.any(walls[sys_,sxs]>0): clr[k]=False
-        valid=pre[clr]
-        if not len(valid): return result
-        vi,vj=ii[valid],jj[valid]; vd,vH=dists[valid],is_H[valid]
-        ord_=np.argsort(vd); vi,vj,vd,vH=vi[ord_],vj[ord_],vd[ord_],vH[ord_]
-        used=np.zeros(n_ep,bool)
+            _ii, _jj = np.triu_indices(n_ep, k=1)
+            ok = np.hypot(pts[_jj,0]-pts[_ii,0], pts[_jj,1]-pts[_ii,1]) <= DOOR_MAX_GAP
+            ii = _ii[ok].astype(np.int64)
+            jj = _jj[ok].astype(np.int64)
+        if len(ii) == 0:
+            return result
+        dxij  = pts[jj,0] - pts[ii,0]
+        dyij  = pts[jj,1] - pts[ii,1]
+        dists = np.hypot(dxij, dyij)
+        safe  = np.maximum(dists, 1e-6)
+        ux, uy = dxij/safe, dyij/safe
+        ang  = np.degrees(np.arctan2(np.abs(dyij), np.abs(dxij)))
+        is_H = ang <= ANGLE_TOL_DEG
+        is_V = ang >= (90.0 - ANGLE_TOL_DEG)
+        g1 = (dists >= DOOR_MIN_GAP) & (dists <= DOOR_MAX_GAP)
+        g2 = is_H | is_V
+        g3 = ((out_dx[ii]*ux  + out_dy[ii]*uy)  >= FCOS) & \
+             ((out_dx[jj]*-ux + out_dy[jj]*-uy) >= FCOS)
+        g4 = ep_cc[ii] != ep_cc[jj]
+        pre_ok  = g1 & g2 & g3 & g4
+        pre_idx = np.where(pre_ok)[0]
+        N_SAMP  = 15
+        clr     = np.ones(len(pre_idx), dtype=bool)
+        for k, pidx in enumerate(pre_idx):
+            ia, ib = int(ii[pidx]), int(jj[pidx])
+            ax, ay = int(tip_x[ia]), int(tip_y[ia])
+            bx, by = int(tip_x[ib]), int(tip_y[ib])
+            if is_H[pidx]:
+                xs = np.linspace(ax, bx, N_SAMP, np.float32)
+                ys = np.full(N_SAMP, (ay+by)/2.0, np.float32)
+            else:
+                xs = np.full(N_SAMP, (ax+bx)/2.0, np.float32)
+                ys = np.linspace(ay, by, N_SAMP, np.float32)
+            sxs  = np.clip(np.round(xs[1:-1]).astype(np.int32), 0, w-1)
+            sys_ = np.clip(np.round(ys[1:-1]).astype(np.int32), 0, h-1)
+            if np.any(walls[sys_, sxs] > 0):
+                clr[k] = False
+        valid = pre_idx[clr]
+        if len(valid) == 0:
+            return result
+        vi = ii[valid]; vj = jj[valid]
+        vd = dists[valid]; vH = is_H[valid]
+        order = np.argsort(vd)
+        vi, vj, vd, vH = vi[order], vj[order], vd[order], vH[order]
+        used = np.zeros(n_ep, dtype=bool)
         for k in range(len(vi)):
-            ia,ib=int(vi[k]),int(vj[k])
-            if used[ia] or used[ib]: continue
-            ax,ay=int(tx[ia]),int(ty[ia]); bx,by=int(tx[ib]),int(ty[ib])
-            if vH[k]: p1=(min(ax,bx),(ay+by)//2); p2=(max(ax,bx),(ay+by)//2)
-            else:      p1=((ax+bx)//2,min(ay,by)); p2=((ax+bx)//2,max(ay,by))
-            cv2.line(result,p1,p2,255,max(stroke,3))
-            used[ia]=used[ib]=True
+            ia, ib = int(vi[k]), int(vj[k])
+            if used[ia] or used[ib]:
+                continue
+            ax, ay = int(tip_x[ia]), int(tip_y[ia])
+            bx, by = int(tip_x[ib]), int(tip_y[ib])
+            if vH[k]:
+                p1 = (min(ax,bx), (ay+by)//2)
+                p2 = (max(ax,bx), (ay+by)//2)
+            else:
+                p1 = ((ax+bx)//2, min(ay,by))
+                p2 = ((ax+bx)//2, max(ay,by))
+            cv2.line(result, p1, p2, 255, line_width)
+            used[ia] = used[ib] = True
         return result
 
     # ══════════════════════════════════════════════════════════════════════════
-    # STAGE 7a — SAM  (Torch GPU)
+    # Stage 7 — Flood-fill segmentation  (original)
+    # ══════════════════════════════════════════════════════════════════════════
+    def _segment_rooms(self, walls: np.ndarray) -> np.ndarray:
+        h, w   = walls.shape
+        walls  = walls.copy()
+        walls[:5,:]  = 255; walls[-5:,:] = 255
+        walls[:,:5]  = 255; walls[:,-5:] = 255
+        filled = walls.copy()
+        mask   = np.zeros((h+2, w+2), np.uint8)
+        for sx, sy in [(0,0),(w-1,0),(0,h-1),(w-1,h-1),
+                        (w//2,0),(w//2,h-1),(0,h//2),(w-1,h//2)]:
+            if filled[sy, sx] == 0:
+                cv2.floodFill(filled, mask, (sx, sy), 255)
+        rooms = cv2.bitwise_not(filled)
+        rooms = cv2.bitwise_and(rooms, cv2.bitwise_not(walls))
+        rooms = cv2.morphologyEx(rooms, cv2.MORPH_OPEN, np.ones((2,2), np.uint8))
+        return rooms
+
+    # ══════════════════════════════════════════════════════════════════════════
+    # Stage 7 (optional) — SAM segmentation  (GPU Torch)
     # ══════════════════════════════════════════════════════════════════════════
     def _segment_with_sam(self, orig_bgr: np.ndarray,
                            walls: np.ndarray) -> Optional[np.ndarray]:
-        predictor = get_sam_predictor(self._sam_checkpoint)
-        if predictor is None: return None
+        """GPU SAM pass; returns mask or None to trigger flood-fill fallback."""
+        if not _TORCH_CUDA:
+            return None
+        predictor = self._get_sam_predictor()
+        if predictor is None:
+            return None
         try:
             import torch
-            h,w = walls.shape
-            flood = self._segment_rooms(walls)
-            n,labels,stats,centroids=cv2.connectedComponentsWithStats(cv2.bitwise_not(walls),8)
-            pos_pts=[]
-            for i in range(1,n):
-                if int(stats[i,cv2.CC_STAT_AREA])<self.SAM_CLOSET_THRESHOLD: continue
-                bx,by,bw,bh=(int(stats[i,cv2.CC_STAT_LEFT]),int(stats[i,cv2.CC_STAT_TOP]),
-                              int(stats[i,cv2.CC_STAT_WIDTH]),int(stats[i,cv2.CC_STAT_HEIGHT]))
-                if bx<=5 and by<=5 and bx+bw>=w-5 and by+bh>=h-5: continue
-                cx=int(np.clip(centroids[i][0],0,w-1)); cy=int(np.clip(centroids[i][1],0,h-1))
-                if walls[cy,cx]>0: continue
-                pos_pts.append((cx,cy))
-            dist_t=cv2.distanceTransform(walls,cv2.DIST_L2,5)
-            skel  =self._skel(walls); sv=dist_t[skel>0]
-            neg_pts=[]
-            if len(sv):
-                thr=max(float(np.percentile(sv,self.SAM_WALL_THICK_PERCENTILE)),
-                        float(self.WALL_MIN_HALF_THICKNESS))
-                ys_,xs_=np.where((skel>0)&(dist_t>=thr))
-                step_=max(1,len(ys_)//self.SAM_N_NEG_PROMPTS)
-                neg_pts=[(int(xs_[i]),int(ys_[i])) for i in range(0,len(ys_),step_)][:self.SAM_N_NEG_PROMPTS]
-            if not pos_pts: return None
-            rgb=cv2.cvtColor(orig_bgr,cv2.COLOR_BGR2RGB)
+            h, w    = walls.shape
+            flood   = self._segment_rooms(walls)
+            n, labels, stats, centroids = cv2.connectedComponentsWithStats(
+                cv2.bitwise_not(walls), 8)
+            pos_pts = []
+            for i in range(1, n):
+                if int(stats[i, cv2.CC_STAT_AREA]) < 300:
+                    continue
+                bx,by,bw,bh = (int(stats[i,cv2.CC_STAT_LEFT]),
+                                int(stats[i,cv2.CC_STAT_TOP]),
+                                int(stats[i,cv2.CC_STAT_WIDTH]),
+                                int(stats[i,cv2.CC_STAT_HEIGHT]))
+                if bx<=5 and by<=5 and bx+bw>=w-5 and by+bh>=h-5:
+                    continue
+                cx_ = int(np.clip(centroids[i][0], 0, w-1))
+                cy_ = int(np.clip(centroids[i][1], 0, h-1))
+                if walls[cy_, cx_] > 0:
+                    continue
+                pos_pts.append((cx_, cy_))
+            if not pos_pts:
+                return None
+            rgb = cv2.cvtColor(orig_bgr, cv2.COLOR_BGR2RGB)
             predictor.set_image(rgb)
-            na=np.array(neg_pts,np.float32) if neg_pts else None
-            nl=np.zeros(len(neg_pts),np.int32)
-            dk=cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(5,5))
-            sam_mask=np.zeros((h,w),np.uint8)
-            for px,py in pos_pts:
-                if na is not None:
-                    pi=np.vstack([np.array([[px,py]],np.float32),na])
-                    pl=np.concatenate([[1],nl])
-                else:
-                    pi=np.array([[px,py]],np.float32); pl=np.array([1],np.int32)
+            sam_mask = np.zeros((h,w), np.uint8)
+            dk = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(5,5))
+            for px, py in pos_pts:
+                pi = np.array([[px,py]], np.float32)
+                pl = np.array([1], np.int32)
                 with torch.inference_mode():
-                    masks,scores,_=predictor.predict(point_coords=pi,point_labels=pl,
-                                                      multimask_output=True)
-                best=int(np.argmax(scores))
-                if float(scores[best])<self.SAM_MIN_SCORE: continue
-                m=(masks[best]>0).astype(np.uint8)*255
-                m=cv2.bitwise_and(m,flood)
-                m=cv2.morphologyEx(m,cv2.MORPH_OPEN,dk)
+                    masks, scores, _ = predictor.predict(
+                        point_coords=pi, point_labels=pl, multimask_output=True)
+                best = int(np.argmax(scores))
+                if float(scores[best]) < 0.70:
+                    continue
+                m = (masks[best]>0).astype(np.uint8)*255
+                m = cv2.bitwise_and(m, flood)
+                m = cv2.morphologyEx(m, cv2.MORPH_OPEN, dk)
                 if np.any(m):
-                    self._sam_room_masks.append({"mask":m.copy(),"score":float(scores[best])})
-                    sam_mask=cv2.bitwise_or(sam_mask,m)
-            print(f"[SAM] {len(self._sam_room_masks)} room masks accepted")
+                    sam_mask = cv2.bitwise_or(sam_mask, m)
             return sam_mask if np.any(sam_mask) else None
         except Exception as exc:
-            import traceback; print(f"[SAM] Error: {exc}\n{traceback.format_exc()}")
+            import traceback
+            print(f"[SAM] Error: {exc}\n{traceback.format_exc()}")
             return None
 
-    # ══════════════════════════════════════════════════════════════════════════
-    # STAGE 7b — Flood-fill segmentation
-    # ══════════════════════════════════════════════════════════════════════════
-    def _segment_rooms(self, walls: np.ndarray) -> np.ndarray:
-        h,w = walls.shape
-        w2  = walls.copy()
-        w2[:5,:]=255; w2[-5:,:]=255; w2[:,:5]=255; w2[:,-5:]=255
-        filled=w2.copy(); mask=np.zeros((h+2,w+2),np.uint8)
-        for sx,sy in [(0,0),(w-1,0),(0,h-1),(w-1,h-1),
-                       (w//2,0),(w//2,h-1),(0,h//2),(w-1,h//2)]:
-            if filled[sy,sx]==0:
-                cv2.floodFill(filled,mask,(sx,sy),255)
-        rooms=cv2.bitwise_not(filled)
-        rooms=cv2.bitwise_and(rooms,cv2.bitwise_not(w2))
-        rooms=_cuda_morphology(rooms,cv2.MORPH_OPEN,np.ones((2,2),np.uint8))
-        return rooms
+    _sam_predictor_cache = None
 
-    # ══════════════════════════════════════════════════════════════════════════
-    # STAGE 8 — Filter rooms
-    # ══════════════════════════════════════════════════════════════════════════
-    def _filter_rooms(self, rooms_mask: np.ndarray, img_shape: Tuple):
-        h,w=img_shape[:2]; ia=float(h*w)
-        min_a=ia*self.MIN_ROOM_AREA_FRAC; max_a=ia*self.MAX_ROOM_AREA_FRAC
-        min_d=w*self.MIN_ROOM_DIM_FRAC;   margin=max(5.0,w*self.BORDER_MARGIN_FRAC)
-        conts,_=cv2.findContours(rooms_mask,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
-        if not conts: return np.zeros_like(rooms_mask),[]
-        vm=np.zeros_like(rooms_mask); vr=[]
-        for cnt in conts:
-            area=cv2.contourArea(cnt)
-            if not (min_a<=area<=max_a): continue
-            bx,by,bw,bh=cv2.boundingRect(cnt)
-            if bx<margin or by<margin or bx+bw>w-margin or by+bh>h-margin: continue
-            if not (bw>=min_d or bh>=min_d): continue
-            if max(bw,bh)/(min(bw,bh)+1e-6)>self.MAX_ASPECT_RATIO: continue
-            if (area/(bw*bh+1e-6))<self.MIN_EXTENT: continue
-            hull=cv2.convexHull(cnt); ha=cv2.contourArea(hull)
-            if ha>0 and (area/ha)<self.MIN_SOLIDITY: continue
-            cv2.drawContours(vm,[cnt],-1,255,-1); vr.append(cnt)
-        return vm,vr
+    def _get_sam_predictor(self):
+        if WallPipeline._sam_predictor_cache is not None:
+            return WallPipeline._sam_predictor_cache
+        ckpt = self._sam_checkpoint
+        if not ckpt or not os.path.isfile(ckpt):
+            ckpt = self._download_sam_checkpoint()
+        if not ckpt or not os.path.isfile(ckpt):
+            return None
+        try:
+            from segment_anything import sam_model_registry, SamPredictor
+            name  = os.path.basename(ckpt).lower()
+            mtype = ("vit_h" if "vit_h" in name else
+                     "vit_l" if "vit_l" in name else
+                     "vit_b" if "vit_b" in name else "vit_h")
+            import torch
+            sam = sam_model_registry[mtype](checkpoint=ckpt)
+            sam.to(device="cuda"); sam.eval()
+            WallPipeline._sam_predictor_cache = SamPredictor(sam)
+            print(f"[SAM] {mtype} loaded on cuda")
+        except Exception as exc:
+            print(f"[SAM] Load failed: {exc}")
+            WallPipeline._sam_predictor_cache = None
+        return WallPipeline._sam_predictor_cache
+
+    @staticmethod
+    def _download_sam_checkpoint() -> str:
+        import os
+        dest = os.path.join(".models", "sam", "sam_vit_h_4b8939.pth")
+        if os.path.isfile(dest):
+            return dest
+        try:
+            from huggingface_hub import hf_hub_download
+            os.makedirs(os.path.dirname(dest), exist_ok=True)
+            path = hf_hub_download(
+                repo_id="facebook/sam-vit-huge",
+                filename="sam_vit_h_4b8939.pth",
+                local_dir=os.path.dirname(dest))
+            return path
+        except Exception as exc:
+            print(f"[SAM] Download failed: {exc}")
+            return ""
 
     # ══════════════════════════════════════════════════════════════════════════
-    # OCR  (GPU EasyOCR)
+    # Stage 8 — Filter room regions  (original)
     # ══════════════════════════════════════════════════════════════════════════
-    def extract_label(self, img_bgr: np.ndarray, contour: np.ndarray) -> Optional[str]:
-        reader=get_ocr_reader()
-        if reader is None: return None
-        x,y,w,h=cv2.boundingRect(contour); pad=20
-        roi=img_bgr[max(0,y-pad):min(img_bgr.shape[0],y+h+pad),
-                    max(0,x-pad):min(img_bgr.shape[1],x+w+pad)]
-        if roi.size==0: return None
-        gray=cv2.cvtColor(roi,cv2.COLOR_BGR2GRAY)
-        clahe=cv2.createCLAHE(clipLimit=2.0,tileGridSize=(8,8))
-        proc=cv2.threshold(clahe.apply(gray),0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)[1]
-        rgb=cv2.cvtColor(cv2.medianBlur(proc,3),cv2.COLOR_GRAY2RGB)
-        try:
-            res=reader.readtext(rgb,detail=1,paragraph=False)
-            cands=[(t.strip().upper(),c) for _,t,c in res
-                    if c>=self.OCR_CONFIDENCE and len(t.strip())>=2
-                    and any(ch.isalpha() for ch in t)]
-            return max(cands,key=lambda x:x[1])[0] if cands else None
-        except Exception: return None
+    def _filter_rooms(self, rooms_mask: np.ndarray,
+                       img_shape: Tuple) -> Tuple[np.ndarray, List]:
+        h, w     = img_shape[:2]
+        img_area = float(h * w)
+        min_area = img_area * self.MIN_ROOM_AREA_FRAC
+        max_area = img_area * self.MAX_ROOM_AREA_FRAC
+        min_dim  = w * self.MIN_ROOM_DIM_FRAC
+        margin   = max(5.0, w * self.BORDER_MARGIN_FRAC)
+        contours, _ = cv2.findContours(rooms_mask, cv2.RETR_EXTERNAL,
+                                        cv2.CHAIN_APPROX_SIMPLE)
+        if not contours:
+            return np.zeros_like(rooms_mask), []
+        valid_mask  = np.zeros_like(rooms_mask)
+        valid_rooms = []
+        for cnt in contours:
+            area = cv2.contourArea(cnt)
+            if not (min_area <= area <= max_area):
+                continue
+            bx, by, bw, bh = cv2.boundingRect(cnt)
+            if bx < margin or by < margin or bx+bw > w-margin or by+bh > h-margin:
+                continue
+            if not (bw >= min_dim or bh >= min_dim):
+                continue
+            asp = max(bw,bh) / (min(bw,bh) + 1e-6)
+            if asp > self.MAX_ASPECT_RATIO:
+                continue
+            if (area / (bw*bh + 1e-6)) < self.MIN_EXTENT:
+                continue
+            hull = cv2.convexHull(cnt)
+            ha   = cv2.contourArea(hull)
+            if ha > 0 and (area / ha) < self.MIN_SOLIDITY:
+                continue
+            cv2.drawContours(valid_mask, [cnt], -1, 255, -1)
+            valid_rooms.append(cnt)
+        return valid_mask, valid_rooms
 
     # ══════════════════════════════════════════════════════════════════════════
-    # Wand click-to-segment
+    # Wand — click-to-segment  (original)
     # ══════════════════════════════════════════════════════════════════════════
     def wand_segment(self, walls: np.ndarray, click_x: int, click_y: int,
                       existing_rooms: List[Dict]) -> Optional[Dict]:
-        h,w=walls.shape
-        if not (0<=click_x<w and 0<=click_y<h): return None
-        if walls[click_y,click_x]>0: return None
-        rooms=self._segment_rooms(walls)
-        if rooms[click_y,click_x]==0: return None
-        ff=rooms.copy(); fm=np.zeros((h+2,w+2),np.uint8)
-        cv2.floodFill(ff,fm,(click_x,click_y),128)
-        rmask=((ff==128).astype(np.uint8)*255)
-        area=float(np.count_nonzero(rmask))
-        if area<100: return None
-        conts,_=cv2.findContours(rmask,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
-        if not conts: return None
-        cnt=max(conts,key=cv2.contourArea)
-        bx,by,bw,bh=cv2.boundingRect(cnt)
-        M=cv2.moments(cnt)
-        cx=int(M["m10"]/M["m00"]) if M["m00"] else bx+bw//2
-        cy=int(M["m01"]/M["m00"]) if M["m00"] else by+bh//2
-        seg=cnt[:,0,:].tolist(); seg=[v for pt in seg for v in pt]
-        nid=max((r["id"] for r in existing_rooms),default=0)+1
-        return {"id":nid,"label":f"Room {nid}","segmentation":[seg],
-                "area":area,"bbox":[bx,by,bw,bh],"centroid":[cx,cy],
-                "confidence":0.90,"isWand":True}
\ No newline at end of file
+        """Flood-fill from click point → return new room dict or None."""
+        h, w = walls.shape
+        if not (0 <= click_x < w and 0 <= click_y < h):
+            return None
+        if walls[click_y, click_x] > 0:
+            return None  # clicked on a wall
+
+        tmp = walls.copy()
+        tmp[:5,:]  = 255; tmp[-5:,:] = 255
+        tmp[:,:5]  = 255; tmp[:,-5:] = 255
+        filled = tmp.copy()
+        mask   = np.zeros((h+2, w+2), np.uint8)
+        for sx, sy in [(0,0),(w-1,0),(0,h-1),(w-1,h-1),
+                        (w//2,0),(w//2,h-1),(0,h//2),(w-1,h//2)]:
+            if filled[sy, sx] == 0:
+                cv2.floodFill(filled, mask, (sx, sy), 255)
+        rooms = cv2.bitwise_not(filled)
+        rooms = cv2.bitwise_and(rooms, cv2.bitwise_not(tmp))
+
+        if rooms[click_y, click_x] == 0:
+            return None
+
+        ff_mask   = rooms.copy()
+        fill_mask = np.zeros((h+2, w+2), np.uint8)
+        cv2.floodFill(ff_mask, fill_mask, (click_x, click_y), 128)
+        room_mask = (ff_mask == 128).astype(np.uint8) * 255
+
+        area = float(np.count_nonzero(room_mask))
+        if area < 100:
+            return None
+
+        contours, _ = cv2.findContours(room_mask, cv2.RETR_EXTERNAL,
+                                        cv2.CHAIN_APPROX_SIMPLE)
+        if not contours:
+            return None
+        cnt = max(contours, key=cv2.contourArea)
+        bx, by, bw, bh = cv2.boundingRect(cnt)
+        M  = cv2.moments(cnt)
+        cx = int(M["m10"]/M["m00"]) if M["m00"] else bx+bw//2
+        cy = int(M["m01"]/M["m00"]) if M["m00"] else by+bh//2
+
+        flat_seg = cnt[:,0,:].tolist()
+        flat_seg = [v for pt in flat_seg for v in pt]
+
+        new_id = max((r["id"] for r in existing_rooms), default=0) + 1
+        return {
+            "id"          : new_id,
+            "label"       : f"Room {new_id}",
+            "segmentation": [flat_seg],
+            "area"        : area,
+            "bbox"        : [bx, by, bw, bh],
+            "centroid"    : [cx, cy],
+            "confidence"  : 0.90,
+            "isWand"      : True,
+        }
+
+
+import os
\ No newline at end of file