Spaces:

yakvrz
/

drone-landing-safety

Runtime error

App Files Files Community

yakvrz commited on 23 days ago

Commit

05c6078

1 Parent(s): 34a328a

Add app module and helpers (depth pipeline, data sources, viz)

Browse files

Files changed (4) hide show

app/__init__.py +0 -0
app/data_sources.py +51 -0
app/depth_pipeline.py +190 -0
app/visualization.py +214 -0

app/__init__.py ADDED Viewed

File without changes

app/data_sources.py ADDED Viewed

	@@ -0,0 +1,51 @@

+from __future__ import annotations
+from functools import lru_cache
+from pathlib import Path
+from .config import HAGDAVS_DIR, IMAGE_EXTS, VIDEO_DIR, VIDEO_EXTS, VISLOC_DIR
+@lru_cache(maxsize=1)
+def list_visloc_images() -> list[Path]:
+    if not VISLOC_DIR.exists():
+        return []
+    files = [p for p in VISLOC_DIR.iterdir() if p.suffix in IMAGE_EXTS]
+    return sorted(files)
+@lru_cache(maxsize=1)
+def list_hagdavs_images() -> list[Path]:
+    if not HAGDAVS_DIR.exists():
+        return []
+    files = [p for p in HAGDAVS_DIR.iterdir() if p.suffix in IMAGE_EXTS]
+    return sorted(files)
+@lru_cache(maxsize=1)
+def list_videos() -> list[Path]:
+    if not VIDEO_DIR.exists():
+        return []
+    files = [p for p in VIDEO_DIR.iterdir() if p.suffix.lower() in VIDEO_EXTS]
+    return sorted(files)
+@lru_cache(maxsize=1)
+def list_all_data_inputs() -> list[str]:
+    return [str(p) for p in list_visloc_images()]
+def clear_caches() -> None:
+    list_visloc_images.cache_clear()
+    list_hagdavs_images.cache_clear()
+    list_videos.cache_clear()
+    list_all_data_inputs.cache_clear()
+__all__ = [
+    "list_visloc_images",
+    "list_hagdavs_images",
+    "list_videos",
+    "list_all_data_inputs",
+    "clear_caches",
+]

app/depth_pipeline.py ADDED Viewed

	@@ -0,0 +1,190 @@

+from __future__ import annotations
+import functools
+import math
+from pathlib import Path
+from typing import Tuple
+import cv2
+import numpy as np
+import torch
+from PIL import Image
+try:  # pragma: no cover - optional dependency resolution
+    from depth_anything_3.api import DepthAnything3  # type: ignore
+    from depth_anything_3.utils.visualize import visualize_depth  # type: ignore
+except ModuleNotFoundError:  # pragma: no cover
+    import sys
+    ROOT = Path(__file__).resolve().parents[1]
+    sys.path.append(str(ROOT / "src"))
+    from depth_anything_3.api import DepthAnything3  # type: ignore  # noqa: E402
+    from depth_anything_3.utils.visualize import visualize_depth  # type: ignore  # noqa: E402
+def crop_nonblack(img: Image.Image, frac: float = 0.05) -> Image.Image:
+    w, h = img.size
+    dx = int(round(w * frac))
+    dy = int(round(h * frac))
+    return img.crop((dx, dy, w - dx, h - dy))
+def compute_roof_mask_depth(depth: np.ndarray, aggressiveness: float = 1.3, morph_kernel: int = 5) -> np.ndarray:
+    d = depth.astype(np.float32)
+    med = np.median(d)
+    mad = np.median(np.abs(d - med)) + 1e-6
+    threshold = med - aggressiveness * mad
+    mask = d < threshold
+    mask = mask.astype(np.uint8)
+    k = max(1, int(morph_kernel))
+    if k % 2 == 0:
+        k += 1
+    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (k, k))
+    try:
+        mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
+        mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
+    except Exception:
+        pass
+    return mask > 0
+def fit_plane_ransac(points: np.ndarray, values: np.ndarray, iterations: int = 200, threshold: float = 0.01):
+    best_coef = None
+    best_inliers = -1
+    n_samples = points.shape[0]
+    if n_samples < 3:
+        return None
+    for _ in range(iterations):
+        idx = np.random.choice(n_samples, 3, replace=False)
+        A = np.concatenate([points[idx], np.ones((3, 1))], axis=1)
+        try:
+            coef = np.linalg.lstsq(A, values[idx], rcond=None)[0]
+        except np.linalg.LinAlgError:
+            continue
+        residuals = np.abs(points[:, 0] * coef[0] + points[:, 1] * coef[1] + coef[2] - values.flatten())
+        inliers = np.sum(residuals < threshold)
+        if inliers > best_inliers:
+            best_inliers = inliers
+            best_coef = coef
+    return best_coef
+def remove_global_plane(depth: np.ndarray) -> np.ndarray:
+    if depth.ndim != 2:
+        return depth
+    h, w = depth.shape
+    yy, xx = np.mgrid[0:h, 0:w].astype(np.float32)
+    points = np.stack((xx.flatten(), yy.flatten()), axis=1)
+    values = depth.astype(np.float32).reshape(-1, 1)
+    coef = fit_plane_ransac(points, values, iterations=300, threshold=0.01 * np.ptp(depth))
+    if coef is None:
+        return depth
+    plane = (points @ coef[:2] + coef[2]).reshape(h, w)
+    return depth - plane
+def pick_flat_patch(
+    depth: np.ndarray,
+    patch: int = 96,
+    std_thresh: float = 0.03,
+    grad_thresh: float = 0.35,
+    water_mask: np.ndarray | None = None,
+):
+    depth = depth.astype(np.float32)
+    if depth.ndim != 2:
+        raise ValueError("Depth map must be 2D (H, W)")
+    patch = max(3, min(patch, min(depth.shape)))
+    if patch % 2 == 0:
+        patch += 1
+    depth_norm = (depth - depth.min()) / (np.ptp(depth) + 1e-6)
+    import torch.nn.functional as F
+    def box_mean(arr, k):
+        pad = k // 2
+        t = torch.from_numpy(arr).unsqueeze(0).unsqueeze(0)
+        t = F.pad(t, (pad, pad, pad, pad), mode="reflect")
+        mean = F.avg_pool2d(t, kernel_size=k, stride=1, padding=0, count_include_pad=False)
+        return mean.squeeze(0).squeeze(0).numpy()
+    mean = box_mean(depth_norm, patch)
+    mean_sq = box_mean(depth_norm * depth_norm, patch)
+    var = np.maximum(mean_sq - mean * mean, 0.0)
+    std_map = np.sqrt(var)
+    dy, dx = np.gradient(depth_norm)
+    grad = np.sqrt(dx * dx + dy * dy)
+    grad_ref = np.percentile(grad, 95) + 1e-6
+    grad_norm = np.clip(grad / grad_ref, 0.0, 1.0)
+    grad_mask = grad_norm < grad_thresh
+    landing_mask = grad_mask
+    if water_mask is not None and water_mask.shape == grad_mask.shape:
+        landing_mask = landing_mask & (~water_mask)
+    masked_std = np.where(landing_mask, std_map, np.inf)
+    if not np.isfinite(masked_std).any():
+        masked_std = std_map
+    y, x = np.unravel_index(np.argmin(masked_std), masked_std.shape)
+    half = patch // 2
+    y0, y1 = max(y - half, 0), min(y + half, depth.shape[0] - 1)
+    x0, x1 = max(x - half, 0), min(x + half, depth.shape[1] - 1)
+    return (x0, y0, x1, y1), std_map, grad_norm, grad_mask, landing_mask
+class DepthEngine:
+    """Caches DepthAnything models and runs inference at bounded resolution."""
+    def __init__(self):
+        self._model_cache: dict[str, tuple[DepthAnything3, torch.device]] = {}
+    def _load_model(self, model_id: str) -> tuple[DepthAnything3, torch.device]:
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        model = DepthAnything3.from_pretrained(model_id).to(device)
+        model.eval()
+        return model, device
+    def get_model(self, model_id: str) -> tuple[DepthAnything3, torch.device]:
+        if model_id not in self._model_cache:
+            self._model_cache[model_id] = self._load_model(model_id)
+        return self._model_cache[model_id]
+    def predict_depth(
+        self, image: np.ndarray, model_id: str, process_res_cap: int
+    ) -> tuple[np.ndarray, np.ndarray, int]:
+        model, device = self.get_model(model_id)
+        process_res = min(max(image.shape[0], image.shape[1]), int(process_res_cap))
+        with torch.inference_mode():
+            pred = model.inference(
+                image=[image],
+                process_res=process_res,
+                process_res_method="upper_bound_resize",
+                export_dir=None,
+            )
+        depth_raw = np.array(pred.depth[0])
+        depth = remove_global_plane(depth_raw)
+        return depth_raw, depth, process_res
+def smooth_depth(depth: np.ndarray, sigma: float) -> np.ndarray:
+    if sigma <= 0:
+        return depth
+    k = max(3, int(round(sigma * 3)) * 2 + 1)
+    try:
+        depth = cv2.GaussianBlur(depth, (k, k), sigmaX=sigma, sigmaY=sigma)
+    except Exception:
+        pass
+    return depth
+__all__ = [
+    "DepthEngine",
+    "compute_roof_mask_depth",
+    "crop_nonblack",
+    "fit_plane_ransac",
+    "pick_flat_patch",
+    "remove_global_plane",
+    "smooth_depth",
+    "visualize_depth",
+]

app/visualization.py ADDED Viewed

	@@ -0,0 +1,214 @@

+from __future__ import annotations
+from typing import Dict, Tuple
+import numpy as np
+from PIL import Image, ImageDraw
+from .depth_pipeline import visualize_depth
+GRAD_ALPHA = 0.35
+FLAT_ALPHA = 0.25
+def make_safety_heatmap(
+    rgb: Image.Image,
+    safe_mask: np.ndarray,
+    hazard_mask: np.ndarray,
+    risk_map: np.ndarray,
+    risk_threshold: float = 0.35,
+):
+    safe = np.clip(safe_mask.astype(np.float32), 0.0, 1.0)
+    hazard = hazard_mask.astype(bool)
+    risk = np.clip(risk_map.astype(np.float32), 0.0, 1.0)
+    h, w = safe.shape
+    safe_overlay = np.zeros((h, w, 4), dtype=np.uint8)
+    safe_pixels = safe > 0.0
+    safe_overlay[safe_pixels, 1] = 255
+    safe_overlay[safe_pixels, 3] = 255
+    risk_focus = np.zeros_like(risk)
+    risk_focus[risk > risk_threshold] = risk[risk > risk_threshold]
+    hazard_intensity = np.where(hazard, np.maximum(risk_focus, 1.0), risk_focus)
+    hazard_alpha = (np.clip(hazard_intensity, 0.0, 1.0) * 255).astype(np.uint8)
+    hazard_overlay = np.zeros((h, w, 4), dtype=np.uint8)
+    hazard_overlay[..., 0] = 255
+    hazard_overlay[..., 3] = hazard_alpha
+    safe_img = Image.fromarray(safe_overlay, mode="RGBA").resize(rgb.size, resample=Image.NEAREST)
+    hazard_img = Image.fromarray(hazard_overlay, mode="RGBA").resize(rgb.size, resample=Image.NEAREST)
+    score_gray = Image.fromarray((safe * 255).astype(np.uint8)).resize(rgb.size, resample=Image.NEAREST)
+    return safe_img, hazard_img, score_gray
+def build_result_layers(
+    image: Image.Image,
+    depth_raw: np.ndarray,
+    std_map_vis: np.ndarray,
+    grad_norm: np.ndarray,
+    grad_thresh: float,
+    safe_mask: np.ndarray,
+    risk_map: np.ndarray,
+    footprint_img_px: int,
+    center_img: Tuple[int, int],
+    water_mask: np.ndarray | None,
+    road_mask: np.ndarray | None,
+    roof_mask: np.ndarray | None,
+    seg_mask_union: np.ndarray | None,
+    hazard_mask: np.ndarray,
+) -> Dict[str, Image.Image]:
+    depth_vis = Image.fromarray(visualize_depth(depth_raw, cmap="Spectral")).resize(
+        image.size, resample=Image.BILINEAR
+    )
+    flatness_img = Image.fromarray((std_map_vis / (std_map_vis.max() + 1e-6) * 255).astype(np.uint8)).resize(
+        image.size, resample=Image.NEAREST
+    )
+    grad_img = Image.fromarray((grad_norm * 255).astype(np.uint8)).resize(image.size, resample=Image.BILINEAR)
+    grad_mask_img = Image.fromarray(((grad_norm < grad_thresh).astype(np.uint8) * 255)).resize(
+        image.size, resample=Image.NEAREST
+    )
+    def _mask_to_image(mask: np.ndarray | None) -> Image.Image:
+        if mask is None:
+            return Image.new("L", image.size, 0)
+        return Image.fromarray((mask.astype(np.uint8) * 255)).resize(image.size, resample=Image.NEAREST)
+    water_mask_img = _mask_to_image(water_mask)
+    road_mask_img = _mask_to_image(road_mask)
+    roof_mask_img = _mask_to_image(roof_mask)
+    seg_mask_img = _mask_to_image(seg_mask_union) if seg_mask_union is not None else Image.new("L", image.size, 0)
+    safe_overlay, hazard_overlay, heat_gray = make_safety_heatmap(image, safe_mask, hazard_mask, risk_map)
+    spot_overlay = Image.new("RGBA", image.size, (0, 0, 0, 0))
+    draw = ImageDraw.Draw(spot_overlay)
+    cx_img, cy_img = center_img
+    side_img = max(3, footprint_img_px | 1)
+    half_img = side_img // 2
+    bx0 = cx_img - half_img
+    by0 = cy_img - half_img
+    bx1 = bx0 + side_img - 1
+    by1 = by0 + side_img - 1
+    clipped_x = False
+    clipped_y = False
+    if bx0 < 0:
+        shift = -bx0
+        bx0 = 0
+        bx1 += shift
+        clipped_x = True
+    if bx1 >= image.width:
+        shift = bx1 - (image.width - 1)
+        bx1 = image.width - 1
+        bx0 = max(0, bx0 - shift)
+        clipped_x = True
+    if by0 < 0:
+        shift = -by0
+        by0 = 0
+        by1 += shift
+        clipped_y = True
+    if by1 >= image.height:
+        shift = by1 - (image.height - 1)
+        by1 = image.height - 1
+        by0 = max(0, by0 - shift)
+        clipped_y = True
+    if clipped_x:
+        cx_draw = int(round((bx0 + bx1) / 2.0))
+    else:
+        cx_draw = int(round(min(max(cx_img, bx0), bx1)))
+    if clipped_y:
+        cy_draw = int(round((by0 + by1) / 2.0))
+    else:
+        cy_draw = int(round(min(max(cy_img, by0), by1)))
+    overlay_box = Image.new("RGBA", image.size, (0, 0, 0, 0))
+    box_draw = ImageDraw.Draw(overlay_box)
+    fill = (0, 102, 255, 60)
+    outline = (0, 102, 255, 255)
+    box_draw.rectangle((bx0, by0, bx1, by1), fill=fill, outline=outline, width=4)
+    box_draw.line((cx_draw, by0, cx_draw, by1), fill=outline, width=2)
+    box_draw.line((bx0, cy_draw, bx1, cy_draw), fill=outline, width=2)
+    radius = 8
+    box_draw.ellipse((cx_draw - radius, cy_draw - radius, cx_draw + radius, cy_draw + radius), fill=outline)
+    return {
+        "RGB": image,
+        "Depth": depth_vis,
+        "Flatness map (std)": flatness_img,
+        "Depth gradient": grad_img,
+        "Gradient mask": grad_mask_img,
+        "Water mask": water_mask_img,
+        "Road mask": road_mask_img,
+        "Roof mask": roof_mask_img,
+        "Segmentation hazards": seg_mask_img,
+        "Safety heatmap overlay": safe_overlay,
+        "Hazard overlay": hazard_overlay,
+        "Safety score": heat_gray,
+        "Landing spot overlay": Image.alpha_composite(spot_overlay, overlay_box),
+    }
+def compose_view(
+    images_dict: dict,
+    base_view: str,
+    heat_on: bool,
+    heat_alpha: float,
+    hazard_on: bool,
+    hazard_alpha: float,
+    grad_on: bool,
+    flat_on: bool,
+    spot_on: bool,
+) -> Image.Image:
+    import gradio as gr
+    if not images_dict:
+        raise gr.Error("Run inference first, then select a view.")
+    if base_view not in images_dict:
+        raise gr.Error(f"Unknown view: {base_view}")
+    base = images_dict.get(base_view)
+    if base is None:
+        raise gr.Error(f"No image for view: {base_view}")
+    out = base.convert("RGBA")
+    if heat_on and "Safety heatmap overlay" in images_dict:
+        heat = images_dict["Safety heatmap overlay"]
+        if heat is not None:
+            heat_rgba = heat.convert("RGBA")
+            alpha_factor = max(0.0, min(1.0, heat_alpha))
+            alpha_channel = np.array(heat_rgba.getchannel("A"), dtype=np.uint8)
+            alpha_channel = (alpha_channel.astype(np.float32) * alpha_factor).astype(np.uint8)
+            heat_rgba.putalpha(Image.fromarray(alpha_channel, mode="L"))
+            out = Image.alpha_composite(out, heat_rgba)
+    if hazard_on and "Segmentation hazards" in images_dict:
+        hazard = images_dict["Segmentation hazards"]
+        if hazard is not None:
+            mask = hazard.convert("L")
+            alpha = int(max(0.0, min(1.0, hazard_alpha)) * 255)
+            overlay = Image.new("RGBA", hazard.size, (0, 0, 0, 0))
+            pattern = Image.new("RGBA", hazard.size, (255, 0, 0, alpha))
+            overlay = Image.composite(pattern, overlay, mask)
+            out = Image.alpha_composite(out, overlay)
+    if grad_on and "Depth gradient" in images_dict:
+        grad_img = images_dict["Depth gradient"]
+        if grad_img is not None:
+            grad_rgba = grad_img.convert("RGBA")
+            grad_rgba.putalpha(int(GRAD_ALPHA * 255))
+            out = Image.alpha_composite(out, grad_rgba)
+    if flat_on and "Flatness map (std)" in images_dict:
+        flat_img = images_dict["Flatness map (std)"]
+        if flat_img is not None:
+            flat_rgba = flat_img.convert("RGBA")
+            flat_rgba.putalpha(int(FLAT_ALPHA * 255))
+            out = Image.alpha_composite(out, flat_rgba)
+    if spot_on and "Landing spot overlay" in images_dict:
+        spot = images_dict["Landing spot overlay"]
+        if spot is not None:
+            out = Image.alpha_composite(out, spot.convert("RGBA"))
+    return out.convert("RGB")
+__all__ = ["build_result_layers", "compose_view", "make_safety_heatmap"]