Spaces:

yakvrz
/

drone-landing-safety

Runtime error

App Files Files Community

yakvrz commited on 25 days ago

Commit

deeabb9

1 Parent(s): a463107

Tune defaults for masks and clarify warnings

Browse files

Files changed (3) hide show

app/config.py +61 -0
app/safety.py +463 -0
app/segmentation.py +177 -0

app/config.py ADDED Viewed

	@@ -0,0 +1,61 @@

+from __future__ import annotations
+from dataclasses import dataclass
+from pathlib import Path
+VISLOC_DIR = Path("data/Image/VISLOC")
+HAGDAVS_DIR = Path("data/Image/HAGDAVS")
+VIDEO_DIR = Path("data/Video")
+IMAGE_EXTS = (".jpg", ".jpeg", ".png", ".JPG", ".JPEG", ".PNG")
+VIDEO_EXTS = {".mp4", ".avi", ".mov", ".mkv", ".flv", ".wmv", ".webm", ".m4v"}
+DEFAULT_ALTITUDE_M = 450.0
+ASSUMED_FOV_DEG = 90.0
+DEFAULT_MODEL_ID = "depth-anything/DA3MONO-LARGE"
+SEGMENTATION_MODEL_ID = "facebook/sam3"
+SEGMENTATION_MAX_SIDE = 384
+SEGMENTATION_SCORE_THRESH = 0.5
+SEGMENTATION_MASK_THRESH = 0.5
+WATER_PROMPT = "water, river, lake, ocean, sea"
+ROAD_PROMPT = "road, highway, street, runway"
+@dataclass(frozen=True)
+class AnalyzerSettings:
+    """Bundle knobs shared between the UI and the processing pipeline."""
+    footprint_m: float = 15.0
+    std_thresh: float = 0.005
+    grad_thresh: float = 0.1
+    clearance_factor: float = 0.0
+    process_res_cap: int = 1024
+    depth_smoothing_base: float = 0.8
+    segmentation_max_side: int = SEGMENTATION_MAX_SIDE
+    segmentation_score_thresh: float = SEGMENTATION_SCORE_THRESH
+    segmentation_mask_thresh: float = SEGMENTATION_MASK_THRESH
+    water_prompt: str = WATER_PROMPT
+    road_prompt: str = ROAD_PROMPT
+    coverage_strictness: float = 0.95
+    openness_weight: float = 0.3
+    texture_threshold: float = 0.5
+    altitude_m: float = DEFAULT_ALTITUDE_M
+    fov_deg: float = ASSUMED_FOV_DEG
+    model_id: str = DEFAULT_MODEL_ID
+__all__ = [
+    "VISLOC_DIR",
+    "HAGDAVS_DIR",
+    "VIDEO_DIR",
+    "IMAGE_EXTS",
+    "VIDEO_EXTS",
+    "DEFAULT_ALTITUDE_M",
+    "ASSUMED_FOV_DEG",
+    "DEFAULT_MODEL_ID",
+    "SEGMENTATION_MODEL_ID",
+    "SEGMENTATION_MAX_SIDE",
+    "SEGMENTATION_SCORE_THRESH",
+    "SEGMENTATION_MASK_THRESH",
+    "WATER_PROMPT",
+    "ROAD_PROMPT",
+    "AnalyzerSettings",
+]

app/safety.py ADDED Viewed

	@@ -0,0 +1,463 @@

+from __future__ import annotations
+from dataclasses import dataclass, replace
+from pathlib import Path
+from typing import Dict, Optional
+import time
+import cv2
+import numpy as np
+from PIL import Image
+from .config import IMAGE_EXTS
+from .depth_pipeline import DepthEngine, compute_roof_mask_depth, crop_nonblack, pick_flat_patch, smooth_depth
+from .segmentation import SegmenterRequest, SegmenterService
+from .visualization import build_result_layers
+@dataclass
+class AnalysisRequest:
+    footprint_m: float
+    std_thresh: float
+    grad_thresh: float
+    use_water_mask: bool
+    use_road_mask: bool
+    use_roof_mask: bool
+    water_prompt: str
+    road_prompt: str
+    altitude_m: float
+    fov_deg: float
+    clearance_factor: float
+    process_res_cap: int
+    depth_smoothing_base: float
+    segmentation_max_side: int
+    segmentation_score_thresh: float
+    segmentation_mask_thresh: float
+    coverage_strictness: float
+    model_id: str
+    openness_weight: float
+    texture_threshold: float
+    source_path: Optional[str] = None
+@dataclass
+class AnalysisSummary:
+    model_id: str
+    process_resolution: int
+    runtime_ms: float
+    footprint_m: float
+    footprint_depth_px: int
+    footprint_image_px: int
+    landing_center_depth: tuple[int, int]
+    landing_center_image: tuple[int, int]
+    safe_area_pct: float
+    hazard_pct: float
+    water_mask_pct: Optional[float]
+    road_mask_pct: Optional[float]
+    roof_mask_pct: Optional[float]
+    water_mask_enabled: bool
+    road_mask_enabled: bool
+    roof_mask_enabled: bool
+    used_valid_center: bool
+    warnings: list[str]
+@dataclass
+class AnalysisResult:
+    images: Dict[str, Image.Image]
+    summary: AnalysisSummary
+class SafetyAnalyzer:
+    def __init__(self, depth_engine: DepthEngine | None = None, segmenter: SegmenterService | None = None):
+        self.depth_engine = depth_engine or DepthEngine()
+        self.segmenter = segmenter or SegmenterService()
+    @staticmethod
+    def build_depth_roof_mask(
+        depth: np.ndarray,
+        grad_norm: np.ndarray,
+        footprint_px: int,
+        aggressiveness: float = 1.2,
+        grad_threshold: float = 0.35,
+        max_area_frac: float = 0.2,
+    ) -> np.ndarray | None:
+        depth_mask = compute_roof_mask_depth(
+            depth,
+            aggressiveness=aggressiveness,
+            morph_kernel=max(3, int(round(max(3, footprint_px * 0.15))) | 1),
+        )
+        flat_mask = grad_norm < grad_threshold
+        roof_mask = depth_mask & flat_mask
+        roof_mask = roof_mask.astype(np.uint8)
+        kernel = cv2.getStructuringElement(
+            cv2.MORPH_ELLIPSE,
+            (
+                max(3, int(round(footprint_px * 0.1)) | 1),
+                max(3, int(round(footprint_px * 0.1)) | 1),
+            ),
+        )
+        roof_mask = cv2.morphologyEx(roof_mask, cv2.MORPH_CLOSE, kernel)
+        roof_mask = cv2.morphologyEx(roof_mask, cv2.MORPH_OPEN, kernel)
+        area_thresh = max(footprint_px * footprint_px // 4, 64)
+        num_labels, labels, stats, _ = cv2.connectedComponentsWithStats(roof_mask, connectivity=8)
+        refined = np.zeros_like(roof_mask, dtype=bool)
+        max_area = max_area_frac * depth_mask.size if max_area_frac > 0 else None
+        for i in range(1, num_labels):
+            area = stats[i, cv2.CC_STAT_AREA]
+            if area < area_thresh:
+                continue
+            if max_area is not None and area > max_area:
+                # Skip overly large blobs (e.g., entire fields) to avoid over-masking
+                continue
+            refined |= labels == i
+        return refined if refined.any() else None
+    def analyze_image(self, image: Image.Image, request: AnalysisRequest) -> AnalysisResult:
+        t0 = time.perf_counter()
+        rgb_np = np.array(image)
+        depth_raw, depth, process_res = self.depth_engine.predict_depth(rgb_np, request.model_id, request.process_res_cap)
+        res_scale = max(0.5, min(2.5, process_res / 1024))
+        sigma = max(0.0, request.depth_smoothing_base) * res_scale
+        depth = smooth_depth(depth, sigma)
+        fov = max(10.0, min(170.0, float(request.fov_deg)))
+        altitude = max(1.0, float(request.altitude_m))
+        fx = (depth.shape[1] / 2.0) / np.tan(np.radians(fov) / 2.0)
+        patch_px = request.footprint_m * fx / altitude
+        patch_px = max(3, min(int(round(patch_px)), min(depth.shape) - 1))
+        if patch_px % 2 == 0:
+            patch_px += 1
+        half_span = patch_px // 2
+        depth_norm = (depth - depth.min()) / (np.ptp(depth) + 1e-6)
+        vis_patch = max(
+            5,
+            min(
+                patch_px,
+                max(7, min(depth.shape) // 8),
+                min(depth.shape) - 1,
+            ),
+        )
+        if vis_patch % 2 == 0:
+            vis_patch += 1
+        import torch.nn.functional as F
+        import torch
+        def box_mean_np(arr: np.ndarray, k: int):
+            pad = k // 2
+            t = torch.from_numpy(arr).unsqueeze(0).unsqueeze(0)
+            t = F.pad(t, (pad, pad, pad, pad), mode="reflect")
+            mean = F.avg_pool2d(t, kernel_size=k, stride=1, padding=0, count_include_pad=False)
+            return mean.squeeze(0).squeeze(0).numpy()
+        std_map_vis = np.sqrt(
+            np.maximum(box_mean_np(depth_norm * depth_norm, vis_patch) - box_mean_np(depth_norm, vis_patch) ** 2, 0.0)
+        )
+        gray = cv2.cvtColor(rgb_np, cv2.COLOR_RGB2GRAY).astype(np.float32) / 255.0
+        gx = cv2.Sobel(gray, cv2.CV_32F, 1, 0, ksize=3)
+        gy = cv2.Sobel(gray, cv2.CV_32F, 0, 1, ksize=3)
+        texture = np.sqrt(gx * gx + gy * gy)
+        sigma_tex = max(1.0, patch_px / 40.0)
+        texture = cv2.GaussianBlur(texture, (0, 0), sigmaX=sigma_tex, sigmaY=sigma_tex)
+        if texture.max() > texture.min():
+            texture_norm = (texture - texture.min()) / (np.ptp(texture) + 1e-6)
+        else:
+            texture_norm = np.zeros_like(texture)
+        texture_norm = cv2.resize(texture_norm, (depth.shape[1], depth.shape[0]), interpolation=cv2.INTER_LINEAR)
+        water_mask_resized = None
+        road_mask_resized = None
+        roof_mask_resized = None
+        water_mask_block = None
+        road_mask_block = None
+        roof_mask_block = None
+        def expand_mask_for_footprint(mask: np.ndarray | None) -> np.ndarray | None:
+            if mask is None:
+                return None
+            if patch_px <= 1:
+                return mask.copy()
+            try:
+                kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (patch_px, patch_px))
+            except Exception:
+                return mask.copy()
+            expanded = cv2.dilate(mask.astype(np.uint8), kernel, iterations=1)
+            return expanded.astype(bool)
+        if request.use_water_mask or request.use_road_mask:
+            masks = self.segmenter.get_masks(
+                SegmenterRequest(
+                    image=image,
+                    source_path=request.source_path,
+                    want_water=request.use_water_mask,
+                    want_road=request.use_road_mask,
+                    max_side=int(max(128, request.segmentation_max_side)),
+                    water_prompt=request.water_prompt,
+                    road_prompt=request.road_prompt,
+                    score_threshold=float(request.segmentation_score_thresh),
+                    mask_threshold=float(request.segmentation_mask_thresh),
+                )
+            )
+            if request.use_water_mask and masks.get("water") is not None:
+                water_mask_resized = Image.fromarray(masks["water"].astype(np.uint8) * 255).resize(
+                    (depth.shape[1], depth.shape[0]), resample=Image.NEAREST
+                )
+                water_mask_resized = np.array(water_mask_resized) > 0
+                water_mask_block = expand_mask_for_footprint(water_mask_resized)
+            if request.use_road_mask and masks.get("road") is not None:
+                road_mask_resized = Image.fromarray(masks["road"].astype(np.uint8) * 255).resize(
+                    (depth.shape[1], depth.shape[0]), resample=Image.NEAREST
+                )
+                road_mask_resized = np.array(road_mask_resized) > 0
+                road_mask_block = expand_mask_for_footprint(road_mask_resized)
+        box, std_map, grad_norm, grad_mask, landing_mask = pick_flat_patch(
+            depth,
+            patch=patch_px,
+            std_thresh=request.std_thresh,
+            grad_thresh=request.grad_thresh,
+            water_mask=water_mask_block if water_mask_block is not None else water_mask_resized,
+        )
+        if request.use_roof_mask:
+            roof_mask_resized = self.build_depth_roof_mask(
+                depth=depth,
+                grad_norm=grad_norm,
+                footprint_px=patch_px,
+                max_area_frac=0.2,
+            )
+            roof_mask_block = expand_mask_for_footprint(roof_mask_resized)
+        seg_block_mask = None
+        for mask in (water_mask_block, road_mask_block, roof_mask_block):
+            if mask is None:
+                continue
+            if seg_block_mask is None:
+                seg_block_mask = mask.copy()
+            else:
+                seg_block_mask |= mask
+        if seg_block_mask is not None:
+            landing_mask = landing_mask & (~seg_block_mask)
+        if half_span > 0:
+            if (landing_mask.shape[0] > 2 * half_span) and (landing_mask.shape[1] > 2 * half_span):
+                interior_mask = np.zeros_like(landing_mask, dtype=bool)
+                interior_mask[
+                    half_span : landing_mask.shape[0] - half_span,
+                    half_span : landing_mask.shape[1] - half_span,
+                ] = True
+            else:
+                interior_mask = np.zeros_like(landing_mask, dtype=bool)
+        else:
+            interior_mask = np.ones_like(landing_mask, dtype=bool)
+        landing_mask = landing_mask & interior_mask
+        texture_mask = texture_norm <= max(0.0, min(1.0, request.texture_threshold))
+        safe_mask = (std_map < request.std_thresh) & (grad_norm < request.grad_thresh) & landing_mask & texture_mask
+        try:
+            clearance_px = max(1, int(round(request.clearance_factor * patch_px)))
+            if clearance_px % 2 == 0:
+                clearance_px += 1
+            kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (clearance_px, clearance_px))
+            hazard = ~safe_mask
+            if seg_block_mask is not None:
+                hazard = hazard & (~seg_block_mask)
+            buffered = cv2.dilate(hazard.astype(np.uint8), kernel, iterations=1).astype(bool)
+            safe_mask = safe_mask & (~buffered)
+            if seg_block_mask is not None:
+                safe_mask = safe_mask & (~seg_block_mask)
+        except Exception:
+            pass
+        try:
+            coverage = cv2.boxFilter(
+                safe_mask.astype(np.float32),
+                ddepth=-1,
+                ksize=(patch_px, patch_px),
+                normalize=True,
+                anchor=(patch_px // 2, patch_px // 2),
+            )
+            safe_mask = coverage >= max(0.0, min(1.0, request.coverage_strictness))
+        except Exception:
+            pass
+        area_thresh = max(1, int(patch_px * patch_px))
+        num_labels, labels, stats, _ = cv2.connectedComponentsWithStats(safe_mask.astype(np.uint8), connectivity=8)
+        if num_labels > 1:
+            keep = np.zeros_like(labels, dtype=bool)
+            for i in range(1, num_labels):
+                if stats[i, cv2.CC_STAT_AREA] >= area_thresh:
+                    keep |= labels == i
+            safe_mask = keep
+        risk_std = np.clip((std_map - request.std_thresh) / (request.std_thresh + 1e-6), 0.0, 1.0)
+        risk_grad = np.clip((grad_norm - request.grad_thresh) / (request.grad_thresh + 1e-6), 0.0, 1.0)
+        risk_map = np.maximum(risk_std, risk_grad) * (~safe_mask)
+        safe_fit = safe_mask.astype(np.float32)
+        safe_mask_uint = safe_mask.astype(np.uint8)
+        try:
+            distance = cv2.distanceTransform(safe_mask_uint, cv2.DIST_L2, 3)
+        except Exception:
+            distance = np.zeros_like(safe_fit)
+        try:
+            coverage = cv2.boxFilter(
+                safe_fit.astype(np.float32),
+                ddepth=-1,
+                ksize=(patch_px, patch_px),
+                normalize=True,
+                anchor=(patch_px // 2, patch_px // 2),
+            )
+            valid_centers = coverage >= 1.0
+        except Exception:
+            valid_centers = safe_fit > 0.5
+        used_valid_center = bool(valid_centers.any())
+        if used_valid_center:
+            cc_mask = valid_centers.astype(np.uint8)
+            num_c, labels_c, stats_c, _ = cv2.connectedComponentsWithStats(cc_mask, connectivity=8)
+            target_mask = valid_centers
+            if num_c > 1:
+                areas = stats_c[1:, cv2.CC_STAT_AREA]
+                largest_idx = 1 + int(np.argmax(areas))
+                target_mask = labels_c == largest_idx
+            cand = np.where(target_mask)
+            dist_cand = distance[cand]
+            std_cand = std_map[cand]
+            if dist_cand.size:
+                dist_norm = dist_cand / (dist_cand.max() + 1e-6)
+                std_norm = (std_cand - std_cand.min()) / (np.ptp(std_cand) + 1e-6)
+                weight = max(0.0, min(1.0, request.openness_weight))
+                score = dist_norm - weight * std_norm
+                idx = int(np.argmax(score))
+            else:
+                idx = int(np.argmin(std_cand))
+            cy, cx = cand[0][idx], cand[1][idx]
+        else:
+            fallback_mask = landing_mask.copy()
+            if not fallback_mask.any():
+                fallback_mask = np.ones_like(landing_mask, dtype=bool)
+                if seg_block_mask is not None:
+                    fallback_mask &= (~seg_block_mask)
+                fallback_mask &= interior_mask
+            if fallback_mask.any():
+                cand = np.where(fallback_mask)
+                std_cand = std_map[cand]
+                idx = int(np.argmin(std_cand))
+                cy, cx = cand[0][idx], cand[1][idx]
+            else:
+                y0, x0, y1, x1 = box[1], box[0], box[3], box[2]
+                cy, cx = (y0 + y1) // 2, (x0 + x1) // 2
+                if half_span > 0 and depth.shape[0] > 2 * half_span:
+                    cy = min(max(int(cy), half_span), depth.shape[0] - half_span - 1)
+                else:
+                    cy = min(max(int(cy), 0), depth.shape[0] - 1)
+                if half_span > 0 and depth.shape[1] > 2 * half_span:
+                    cx = min(max(int(cx), half_span), depth.shape[1] - half_span - 1)
+                else:
+                    cx = min(max(int(cx), 0), depth.shape[1] - 1)
+        scale_x = image.width / depth.shape[1]
+        scale_y = image.height / depth.shape[0]
+        footprint_img_px = max(3, int(round(patch_px * scale_x)))
+        cx_img = int(round(cx * scale_x))
+        cy_img = int(round(cy * scale_y))
+        center_img = (cx_img, cy_img)
+        center_depth = (cx, cy)
+        safe_display_mask = safe_mask
+        try:
+            footprint_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (patch_px, patch_px))
+            safe_display_mask = cv2.dilate(safe_mask.astype(np.uint8), footprint_kernel, iterations=1).astype(bool)
+        except Exception:
+            safe_display_mask = safe_mask
+        mask_union = None
+        for mask in (water_mask_resized, road_mask_resized, roof_mask_resized):
+            if mask is None:
+                continue
+            if mask_union is None:
+                mask_union = mask.copy()
+            else:
+                mask_union |= mask
+        seg_mask_union = mask_union.copy() if mask_union is not None else None
+        if mask_union is not None:
+            safe_display_mask = safe_display_mask & (~mask_union)
+        hazard_mask = ~safe_display_mask
+        layers = build_result_layers(
+            image=image,
+            depth_raw=depth_raw,
+            std_map_vis=std_map_vis,
+            grad_norm=grad_norm,
+            grad_thresh=request.grad_thresh,
+            safe_mask=safe_display_mask,
+            risk_map=risk_map,
+            footprint_img_px=footprint_img_px,
+            center_img=center_img,
+            water_mask=water_mask_resized,
+            road_mask=road_mask_resized,
+            roof_mask=roof_mask_resized,
+            seg_mask_union=seg_mask_union,
+            hazard_mask=hazard_mask,
+        )
+        runtime_ms = (time.perf_counter() - t0) * 1000.0
+        safe_area_pct = float(safe_display_mask.mean()) * 100.0
+        hazard_pct = 100.0 - safe_area_pct
+        def mask_pct(mask: np.ndarray | None) -> Optional[float]:
+            if mask is None:
+                return None
+            return float(mask.mean()) * 100.0
+        warnings: list[str] = []
+        if not safe_mask.any():
+            warnings.append("No regions satisfied safety thresholds; showing flattest candidate.")
+        if not request.use_water_mask:
+            warnings.append("Water mask disabled.")
+        elif water_mask_resized is None:
+            warnings.append("No water detected; continuing without a water mask.")
+        if not request.use_road_mask:
+            warnings.append("Road mask disabled.")
+        elif road_mask_resized is None:
+            warnings.append("Road segmentation unavailable; continuing without mask.")
+        if not request.use_roof_mask:
+            warnings.append("Roof mask disabled.")
+        elif roof_mask_resized is None:
+            warnings.append("Roof segmentation unavailable; continuing without mask.")
+        summary = AnalysisSummary(
+            model_id=request.model_id,
+            process_resolution=process_res,
+            runtime_ms=runtime_ms,
+            footprint_m=request.footprint_m,
+            footprint_depth_px=patch_px,
+            footprint_image_px=footprint_img_px,
+            landing_center_depth=center_depth,
+            landing_center_image=center_img,
+            safe_area_pct=safe_area_pct,
+            hazard_pct=hazard_pct,
+            water_mask_pct=mask_pct(water_mask_resized) if request.use_water_mask else None,
+            road_mask_pct=mask_pct(road_mask_resized) if request.use_road_mask else None,
+            roof_mask_pct=mask_pct(roof_mask_resized) if request.use_roof_mask else None,
+            water_mask_enabled=request.use_water_mask,
+            road_mask_enabled=request.use_road_mask,
+            roof_mask_enabled=request.use_roof_mask,
+            used_valid_center=used_valid_center,
+            warnings=warnings,
+        )
+        return AnalysisResult(images=layers, summary=summary)
+    def process_path(self, path: Path, request: AnalysisRequest) -> AnalysisResult:
+        if not path.exists():
+            raise ValueError(f"Input path not found: {path}")
+        if path.suffix.lower() not in IMAGE_EXTS:
+            raise ValueError(f"Unsupported image type for path: {path}")
+        image = crop_nonblack(Image.open(path).convert("RGB"))
+        request_with_source = replace(request, source_path=str(path))
+        return self.analyze_image(image, request_with_source)
+def build_request(**kwargs) -> AnalysisRequest:
+    return AnalysisRequest(**kwargs)
+__all__ = ["SafetyAnalyzer", "AnalysisRequest", "AnalysisResult", "AnalysisSummary", "build_request"]

app/segmentation.py ADDED Viewed

	@@ -0,0 +1,177 @@

+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Dict, Optional
+import re
+import numpy as np
+import torch
+from PIL import Image
+from .config import (
+    ROAD_PROMPT,
+    SEGMENTATION_MASK_THRESH,
+    SEGMENTATION_MAX_SIDE,
+    SEGMENTATION_MODEL_ID,
+    SEGMENTATION_SCORE_THRESH,
+    WATER_PROMPT,
+)
+class SemanticSegmenter:
+    """Promptable segmenter backed by SAM3."""
+    def __init__(self, model_id: str):
+        import transformers  # type: ignore
+        processor_cls = getattr(transformers, "Sam3Processor", None) or getattr(
+            transformers, "AutoProcessor", None
+        ) or getattr(transformers, "AutoImageProcessor", None)
+        model_cls = getattr(transformers, "Sam3Model", None) or getattr(
+            transformers, "AutoModelForMaskGeneration", None
+        )
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        processor = processor_cls.from_pretrained(model_id)
+        model = model_cls.from_pretrained(model_id)
+        try:
+            model = model.to(device)
+        except RuntimeError as exc:
+            # Fall back to CPU if the GPU move fails (e.g., OOM or missing device)
+            device = torch.device("cpu")
+            model = model.to(device)
+            print(f"[WARN] SAM3 fell back to CPU after .to(device) error: {exc}")
+        model.eval()
+        self.processor = processor
+        self.model = model
+        self.device = device
+        if torch.cuda.is_available() and self.device.type != "cuda":
+            print("[WARN] CUDA is available but SAM3 is running on CPU; mask generation will be slow.")
+        else:
+            print(f"[INFO] SAM3 loaded on {self.device}")
+    def segment(
+        self,
+        img: Image.Image,
+        max_side: int,
+        prompts: Dict[str, str],
+        score_threshold: float,
+        mask_threshold: float,
+    ) -> dict[str, np.ndarray]:
+        if not prompts:
+            return {}
+        orig_size = img.size  # (W, H)
+        img_proc = img
+        if max(img.size) > max_side:
+            scale = max_side / max(img.size)
+            new_size = (max(1, int(round(img.size[0] * scale))), max(1, int(round(img.size[1] * scale))))
+            img_proc = img.resize(new_size, resample=Image.BILINEAR)
+        def _split_prompts(text: str) -> list[str]:
+            parts = [p.strip() for p in re.split(r"[;,\\n]", text) if p.strip()]
+            return parts if parts else ([text.strip()] if text.strip() else [])
+        masks: dict[str, np.ndarray] = {}
+        for key, prompt in prompts.items():
+            prompt_texts = _split_prompts(prompt or "")
+            if not prompt_texts:
+                continue
+            mask_union = None
+            for text in prompt_texts:
+                try:
+                    inputs = self.processor(images=img_proc, text=text, return_tensors="pt").to(self.device)
+                except TypeError as exc:
+                    raise ImportError(
+                        "Loaded processor does not accept text prompts; install a transformers build with SAM3 text prompting support (e.g., pip install --upgrade transformers or a nightly that includes Sam3Processor)."
+                    ) from exc
+                with torch.inference_mode():
+                    outputs = self.model(**inputs)
+                results = self.processor.post_process_instance_segmentation(
+                    outputs,
+                    threshold=score_threshold,
+                    mask_threshold=mask_threshold,
+                    target_sizes=[(orig_size[1], orig_size[0])],
+                )[0]
+                inst_masks = results.get("masks")
+                if inst_masks is None or len(inst_masks) == 0:
+                    continue
+                if torch.is_floating_point(inst_masks):
+                    inst_masks = inst_masks > 0.5
+                mask_tensor = torch.any(inst_masks, dim=0)
+                mask_union = mask_tensor if mask_union is None else (mask_union | mask_tensor)
+            if mask_union is None:
+                continue
+            mask_np = mask_union.detach().cpu().numpy().astype(bool)
+            if mask_np.any():
+                masks[key] = mask_np
+        return masks
+@dataclass
+class SegmenterRequest:
+    image: Image.Image
+    source_path: Optional[str] = None
+    want_water: bool = False
+    want_road: bool = False
+    max_side: int = SEGMENTATION_MAX_SIDE
+    water_prompt: str = WATER_PROMPT
+    road_prompt: str = ROAD_PROMPT
+    score_threshold: float = SEGMENTATION_SCORE_THRESH
+    mask_threshold: float = SEGMENTATION_MASK_THRESH
+class SegmenterService:
+    """Caches segmenters and mask outputs across UI interactions."""
+    def __init__(self, model_id: str = SEGMENTATION_MODEL_ID):
+        self.model_id = model_id
+        self._segmenters: Dict[str, SemanticSegmenter] = {}
+        self._mask_cache: Dict[tuple[str, str, int], dict[str, np.ndarray]] = {}
+    def _get_segmenter(self, model_id: str) -> SemanticSegmenter:
+        if model_id not in self._segmenters:
+            self._segmenters[model_id] = SemanticSegmenter(model_id)
+        return self._segmenters[model_id]
+    def get_masks(self, request: SegmenterRequest) -> dict[str, np.ndarray]:
+        if not (request.want_water or request.want_road):
+            return {}
+        key = (
+            self.model_id,
+            request.source_path or "",
+            request.max_side,
+            (request.water_prompt or "").strip(),
+            (request.road_prompt or "").strip(),
+            float(request.score_threshold),
+            float(request.mask_threshold),
+        )
+        masks = self._mask_cache.get(key)
+        if masks is None:
+            segmenter = self._get_segmenter(self.model_id)
+            prompts: dict[str, str] = {}
+            if request.want_water and request.water_prompt:
+                prompts["water"] = request.water_prompt
+            if request.want_road and request.road_prompt:
+                prompts["road"] = request.road_prompt
+            try:
+                masks = segmenter.segment(
+                    request.image,
+                    request.max_side,
+                    prompts=prompts,
+                    score_threshold=float(request.score_threshold),
+                    mask_threshold=float(request.mask_threshold),
+                )
+            except RuntimeError as exc:
+                print(f"[WARN] Segmentation failed; skipping masks: {exc}")
+                masks = {}
+            if request.source_path and masks:
+                self._mask_cache[key] = masks
+        result: dict[str, np.ndarray] = {}
+        if request.want_water and masks.get("water") is not None:
+            result["water"] = masks["water"]
+        if request.want_road and masks.get("road") is not None:
+            result["road"] = masks["road"]
+        return result
+__all__ = ["SegmenterService", "SegmenterRequest", "SemanticSegmenter"]