Spaces:

yakvrz
/

drone-landing-safety

Runtime error

App Files Files Community

drone-landing-safety / app /depth_pipeline.py

yakvrz

Switch rooftop masking to SAM3 and refresh demos

c5794e7 18 days ago

raw

history blame

5.77 kB

	from __future__ import annotations

	from pathlib import Path

	import cv2
	import numpy as np
	import torch
	from PIL import Image

	try: # pragma: no cover - optional dependency resolution
	from depth_anything_3.api import DepthAnything3 # type: ignore
	from depth_anything_3.utils.visualize import visualize_depth # type: ignore
	except ModuleNotFoundError: # pragma: no cover
	import sys

	ROOT = Path(__file__).resolve().parents[1]
	sys.path.append(str(ROOT / "src"))
	from depth_anything_3.api import DepthAnything3 # type: ignore # noqa: E402
	from depth_anything_3.utils.visualize import visualize_depth # type: ignore # noqa: E402


	def crop_nonblack(img: Image.Image, frac: float = 0.05) -> Image.Image:
	w, h = img.size
	dx = int(round(w * frac))
	dy = int(round(h * frac))
	return img.crop((dx, dy, w - dx, h - dy))


	def remove_global_plane(depth: np.ndarray, method: str = "least_squares") -> np.ndarray:
	if depth.ndim != 2:
	return depth
	method = (method or "least_squares").lower()
	if method in {"none", "off"}:
	return depth
	h, w = depth.shape
	yy, xx = np.mgrid[0:h, 0:w].astype(np.float32)
	points = np.stack((xx.flatten(), yy.flatten()), axis=1)
	values = depth.astype(np.float32).reshape(-1, 1)
	coef = None
	if method in {"ls", "least_squares", "lstsq"}:
	try:
	coef, *_ = np.linalg.lstsq(
	np.concatenate([points, np.ones((points.shape[0], 1), dtype=np.float32)], axis=1),
	values,
	rcond=None,
	)
	except np.linalg.LinAlgError:
	coef = None
	if coef is None:
	return depth
	plane = (points @ coef[:2] + coef[2]).reshape(h, w)
	return depth - plane


	def pick_flat_patch(
	depth: np.ndarray,
	patch: int = 96,
	std_thresh: float = 0.03,
	grad_thresh: float = 0.35,
	water_mask: np.ndarray \| None = None,
	):
	depth = depth.astype(np.float32)
	if depth.ndim != 2:
	raise ValueError("Depth map must be 2D (H, W)")

	patch = max(3, min(patch, min(depth.shape)))
	if patch % 2 == 0:
	patch += 1
	depth_norm = (depth - depth.min()) / (np.ptp(depth) + 1e-6)

	import torch.nn.functional as F

	def box_mean(arr, k):
	pad = k // 2
	t = torch.from_numpy(arr).unsqueeze(0).unsqueeze(0)
	t = F.pad(t, (pad, pad, pad, pad), mode="reflect")
	mean = F.avg_pool2d(t, kernel_size=k, stride=1, padding=0, count_include_pad=False)
	return mean.squeeze(0).squeeze(0).numpy()

	mean = box_mean(depth_norm, patch)
	mean_sq = box_mean(depth_norm * depth_norm, patch)
	var = np.maximum(mean_sq - mean * mean, 0.0)
	std_map = np.sqrt(var)

	dy, dx = np.gradient(depth_norm)
	grad = np.sqrt(dx * dx + dy * dy)
	grad_ref = np.percentile(grad, 95) + 1e-6
	grad_norm = np.clip(grad / grad_ref, 0.0, 1.0)
	grad_mask = grad_norm < grad_thresh

	landing_mask = grad_mask
	if water_mask is not None and water_mask.shape == grad_mask.shape:
	landing_mask = landing_mask & (~water_mask)

	masked_std = np.where(landing_mask, std_map, np.inf)
	if not np.isfinite(masked_std).any():
	masked_std = std_map
	y, x = np.unravel_index(np.argmin(masked_std), masked_std.shape)
	half = patch // 2
	y0, y1 = max(y - half, 0), min(y + half, depth.shape[0] - 1)
	x0, x1 = max(x - half, 0), min(x + half, depth.shape[1] - 1)
	return (x0, y0, x1, y1), std_map, grad_norm, grad_mask, landing_mask


	class DepthEngine:
	"""Caches DepthAnything models and runs inference at bounded resolution."""

	def __init__(self):
	self._model_cache: dict[str, tuple[DepthAnything3, torch.device]] = {}

	def _load_model(self, model_id: str) -> tuple[DepthAnything3, torch.device]:
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	model = DepthAnything3.from_pretrained(model_id).to(device)
	model.eval()
	return model, device

	def get_model(self, model_id: str) -> tuple[DepthAnything3, torch.device]:
	if model_id not in self._model_cache:
	self._model_cache[model_id] = self._load_model(model_id)
	return self._model_cache[model_id]

	def predict_depth(
	self, image: np.ndarray, model_id: str, process_res_cap: int, plane_method: str = "least_squares"
	) -> tuple[np.ndarray, np.ndarray, int, dict[str, float]]:
	import time as _time

	t0 = _time.perf_counter()
	model, device = self.get_model(model_id)
	process_res = min(max(image.shape[0], image.shape[1]), int(process_res_cap))
	t_pre = _time.perf_counter()
	with torch.inference_mode():
	pred = model.inference(
	image=[image],
	process_res=process_res,
	process_res_method="upper_bound_resize",
	export_dir=None,
	)
	t_model = _time.perf_counter()
	depth_raw = np.array(pred.depth[0])
	depth = remove_global_plane(depth_raw, method=plane_method)
	t_post = _time.perf_counter()
	timings = {
	"prep_ms": (t_pre - t0) * 1000.0,
	"model_ms": (t_model - t_pre) * 1000.0,
	"plane_ms": (t_post - t_model) * 1000.0,
	}
	return depth_raw, depth, process_res, timings


	def smooth_depth(depth: np.ndarray, sigma: float) -> np.ndarray:
	if sigma <= 0:
	return depth
	k = max(3, int(round(sigma * 3)) * 2 + 1)
	try:
	depth = cv2.GaussianBlur(depth, (k, k), sigmaX=sigma, sigmaY=sigma)
	except Exception:
	pass
	return depth


	__all__ = [
	"DepthEngine",
	"crop_nonblack",
	"pick_flat_patch",
	"remove_global_plane",
	"smooth_depth",
	"visualize_depth",
	]