| """Pose extraction for traditional controllers from clean images.""" |
|
|
| from __future__ import annotations |
|
|
| import numpy as np |
|
|
|
|
| def estimate_pose_from_clean_image( |
| image: np.ndarray, |
| workspace: tuple[float, float, float, float] = (0.0, 10.0, 0.0, 10.0), |
| pad: int = 4, |
| visual_scale: float = 2.5, |
| ) -> np.ndarray: |
| img = image.astype(np.float32) |
| mask = (img[..., 2] > 110.0) & (img[..., 0] < 100.0) & (img[..., 1] < 140.0) |
| ys, xs = np.nonzero(mask) |
| centroid_x = xs.mean() |
| centroid_y = ys.mean() |
| coords = np.stack([xs - centroid_x, ys - centroid_y], axis=1) |
| cov = coords.T @ coords / coords.shape[0] |
| vals, vecs = np.linalg.eigh(cov) |
| axis = vecs[:, int(np.argmax(vals))] |
| theta_px = np.arctan2(-axis[1], axis[0]) |
| marker = (img[..., 0] > 180.0) & (img[..., 1] > 140.0) & (img[..., 2] < 130.0) |
| my, mx = np.nonzero(marker) |
| marker_x = marker_y = None |
| if len(mx): |
| marker_x = float(mx.mean()) |
| marker_y = float(my.mean()) |
| theta_px = np.arctan2(-(marker_y - centroid_y), marker_x - centroid_x) |
| image_size = image.shape[0] |
| xmin, xmax, ymin, ymax = workspace |
| x = xmin + (centroid_x - pad) / (image_size - 2 * pad) * (xmax - xmin) |
| y = ymin + (image_size - pad - centroid_y) / (image_size - 2 * pad) * (ymax - ymin) |
| if marker_x is not None and marker_y is not None: |
| marker_world_x = xmin + (marker_x - pad) / (image_size - 2 * pad) * (xmax - xmin) |
| marker_world_y = ymin + (image_size - pad - marker_y) / (image_size - 2 * pad) * (ymax - ymin) |
| marker_offset = 0.22 * float(visual_scale) |
| x = marker_world_x - marker_offset * np.cos(theta_px) |
| y = marker_world_y - marker_offset * np.sin(theta_px) |
| return np.array([x, y, np.cos(theta_px), np.sin(theta_px)], dtype=np.float32) |
|
|