File size: 7,380 Bytes
58aefd4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 | """Pinhole camera β ground projection for aerial detections (nadir + oblique).
Geometry (local ENU frame, camera at the origin):
world axes X = East, Y = North, Z = Up
camera position C = (0, 0, H) H = altitude above ground (AGL, metres)
ground plane Z = 0 (flat-earth; terrain DEM is a later refinement)
A detection at pixel (u, v) defines a ray from the camera; we rotate it into
the world by the camera pose and intersect it with the ground plane to get the
animal's ground position. The camera's *own* ground position is (0, 0) β the
nadir point β so every returned coordinate is metres East / North of the point
directly below the aircraft.
Pose convention (degrees):
pitch = 0 -> nadir (optical axis straight down)
pitch > 0 -> tilted FORWARD toward the horizon (oblique); the image centre
then lands H*tan(pitch) ahead of the nadir point
yaw -> heading / rotation of the ground pattern about vertical
roll -> rotation about the optical axis (banking); usually ~0
The maths is validated against closed-form cases in tests/test_geolocation.py
(nadir centre -> (0,0); nadir edge -> GSD-scaled offset; oblique centre ->
H*tan(pitch) forward).
"""
from __future__ import annotations
import math
from dataclasses import dataclass
import numpy as np
@dataclass
class GroundPoint:
"""A detection projected to the ground, in the local ENU frame (metres)."""
east: float
north: float
range_m: float # slant range camera->point (for detection-function covariates)
perp_distance: float | None = None # |offset| from the flight line (set by project_detections)
def _rot_z(deg: float) -> np.ndarray:
a = math.radians(deg)
c, s = math.cos(a), math.sin(a)
return np.array([[c, -s, 0.0], [s, c, 0.0], [0.0, 0.0, 1.0]])
def _rot_x(deg: float) -> np.ndarray:
a = math.radians(deg)
c, s = math.cos(a), math.sin(a)
return np.array([[1.0, 0.0, 0.0], [0.0, c, -s], [0.0, s, c]])
# Base nadir orientation: camera optical axis (+Z_cam) -> world Down (0,0,-1);
# image-right (+X_cam) -> East; image-down (+Y_cam) -> South (top of frame = North).
_R0 = np.array([[1.0, 0.0, 0.0],
[0.0, -1.0, 0.0],
[0.0, 0.0, -1.0]])
@dataclass
class CameraModel:
"""Aerial camera pose + intrinsics, enough to project pixels to the ground.
Provide focal length either as ``hfov_deg`` (horizontal field of view) or
``focal_px`` directly. ``altitude`` is height above the ground plane.
"""
altitude: float
image_width: int
image_height: int
hfov_deg: float | None = None
focal_px: float | None = None
pitch_deg: float = 0.0 # 0 = nadir
roll_deg: float = 0.0
yaw_deg: float = 0.0
def focal_pixels(self) -> float:
if self.focal_px is not None:
return float(self.focal_px)
if self.hfov_deg is None:
raise ValueError("Provide either hfov_deg or focal_px.")
return (self.image_width / 2.0) / math.tan(math.radians(self.hfov_deg) / 2.0)
def _world_from_cam(self) -> np.ndarray:
# R = yaw(world) @ base_nadir @ pitch(cam-X) @ roll(cam-Z)
return _rot_z(self.yaw_deg) @ _R0 @ _rot_x(self.pitch_deg) @ _rot_z(self.roll_deg)
def ground_sample_distance(self) -> float:
"""Metres-per-pixel at the image centre for a nadir view (altitude/focal)."""
return self.altitude / self.focal_pixels()
def pixel_to_ground(self, u: float, v: float) -> GroundPoint:
"""Project image pixel (u, v) to a ground point in ENU metres.
Raises ValueError if the ray points at or above the horizon (no ground
intersection) β which happens for oblique views looking too high.
"""
f = self.focal_pixels()
cx, cy = self.image_width / 2.0, self.image_height / 2.0
# Ray in camera frame: x right, y down, z along optical axis.
ray_cam = np.array([u - cx, v - cy, f])
ray_world = self._world_from_cam() @ ray_cam
dz = ray_world[2]
if dz >= -1e-9:
raise ValueError(
"Pixel ray does not intersect the ground (at/above horizon) β "
"check pitch/altitude; this pixel is sky, not ground."
)
t = -self.altitude / dz # camera Z is +altitude; solve C_z + t*dz = 0
east = float(t * ray_world[0])
north = float(t * ray_world[1])
# Slant range from camera (0,0,H) to ground point (east, north, 0).
rng = float(math.sqrt(east**2 + north**2 + self.altitude**2))
return GroundPoint(east=east, north=north, range_m=rng)
def ground_to_pixel(self, east: float, north: float) -> tuple[float, float] | None:
"""Inverse projection: ground point (east, north, 0) -> pixel (u, v).
Returns None if the point is behind the camera. The caller checks whether
(u, v) falls within the image bounds. Used to simulate what a camera at a
given pose would actually capture (round-trips pixel_to_ground exactly).
"""
R = self._world_from_cam()
d_world = np.array([east, north, -self.altitude]) # camera (0,0,H) -> point
d_cam = R.T @ d_world # world -> camera frame
if d_cam[2] <= 1e-9:
return None # behind the image plane
f = self.focal_pixels()
u = self.image_width / 2.0 + f * d_cam[0] / d_cam[2]
v = self.image_height / 2.0 + f * d_cam[1] / d_cam[2]
return (float(u), float(v))
def perpendicular_distance(east: float, north: float, heading_deg: float) -> float:
"""Distance from a ground point to the flight line.
The flight line passes through the nadir point (0,0) along the aircraft
heading. Perpendicular distance is the across-track offset β exactly the
quantity line-transect distance sampling consumes.
"""
# Heading unit vector in ENU (0deg = North, 90deg = East).
h = math.radians(heading_deg)
dir_e, dir_n = math.sin(h), math.cos(h)
# Perpendicular (across-track) component = cross product magnitude in 2D.
return abs(east * dir_n - north * dir_e)
def project_detections(
detections,
camera: CameraModel,
heading_deg: float | None = None,
) -> list[GroundPoint]:
"""Project a batch of pixel detections to ground points (+ perpendicular distance).
Args:
detections: iterable of (u, v) image points β typically each detection's
ground-contact point (e.g. bottom-centre of its box).
camera: the CameraModel for the frame.
heading_deg: aircraft heading; if given, each GroundPoint gets its
``perp_distance`` (across-track offset) filled in for distance
sampling. Defaults to the camera yaw.
Returns:
list of GroundPoint. Points whose rays miss the ground (sky) are skipped.
"""
hd = camera.yaw_deg if heading_deg is None else heading_deg
out: list[GroundPoint] = []
for u, v in detections:
try:
gp = camera.pixel_to_ground(u, v)
except ValueError:
continue # ray above horizon β not a ground detection
gp.perp_distance = perpendicular_distance(gp.east, gp.north, hd)
out.append(gp)
return out
|