|
|
|
|
|
""" |
|
|
Coordinate Transformer - Pixel to Robot Coordinate System |
|
|
Handles camera calibration, stereo vision, and coordinate transformations |
|
|
|
|
|
Author: AI Assistant |
|
|
Date: 2025-12-15 |
|
|
""" |
|
|
|
|
|
import numpy as np |
|
|
import cv2 |
|
|
import logging |
|
|
from pathlib import Path |
|
|
from typing import Tuple, Optional, Dict, List |
|
|
from dataclasses import dataclass |
|
|
import json |
|
|
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
@dataclass |
|
|
class CameraCalibration: |
|
|
"""Camera calibration parameters""" |
|
|
camera_matrix: np.ndarray |
|
|
distortion_coeffs: np.ndarray |
|
|
image_width: int |
|
|
image_height: int |
|
|
focal_length: float |
|
|
principal_point: Tuple[float, float] |
|
|
|
|
|
@dataclass |
|
|
class StereoCalibration: |
|
|
"""Stereo camera calibration parameters""" |
|
|
left_camera_matrix: np.ndarray |
|
|
right_camera_matrix: np.ndarray |
|
|
left_distortion: np.ndarray |
|
|
right_distortion: np.ndarray |
|
|
rotation_matrix: np.ndarray |
|
|
translation_vector: np.ndarray |
|
|
essential_matrix: np.ndarray |
|
|
fundamental_matrix: np.ndarray |
|
|
rectification_matrix_left: np.ndarray |
|
|
rectification_matrix_right: np.ndarray |
|
|
projection_matrix_left: np.ndarray |
|
|
projection_matrix_right: np.ndarray |
|
|
disparity_to_depth_map: np.ndarray |
|
|
|
|
|
class CoordinateTransformer: |
|
|
""" |
|
|
Handles coordinate transformations between pixel coordinates and robot world coordinates |
|
|
Supports both monocular and stereo camera systems |
|
|
""" |
|
|
|
|
|
def __init__(self, |
|
|
camera_matrix_path: Optional[str] = None, |
|
|
distortion_coeffs_path: Optional[str] = None, |
|
|
stereo_calibration_path: Optional[str] = None): |
|
|
"""Initialize coordinate transformer""" |
|
|
|
|
|
self.camera_calibration: Optional[CameraCalibration] = None |
|
|
self.stereo_calibration: Optional[StereoCalibration] = None |
|
|
self.stereo_matcher: Optional[cv2.StereoSGBM] = None |
|
|
|
|
|
|
|
|
self.robot_origin = (0.0, 0.0, 0.0) |
|
|
self.camera_to_robot_transform = np.eye(4) |
|
|
|
|
|
|
|
|
if camera_matrix_path and distortion_coeffs_path: |
|
|
self.load_camera_calibration(camera_matrix_path, distortion_coeffs_path) |
|
|
|
|
|
if stereo_calibration_path: |
|
|
self.load_stereo_calibration(stereo_calibration_path) |
|
|
|
|
|
logger.info("Coordinate Transformer initialized") |
|
|
|
|
|
def load_camera_calibration(self, camera_matrix_path: str, distortion_coeffs_path: str): |
|
|
"""Load single camera calibration""" |
|
|
try: |
|
|
camera_matrix = np.load(camera_matrix_path) |
|
|
distortion_coeffs = np.load(distortion_coeffs_path) |
|
|
|
|
|
|
|
|
fx, fy = camera_matrix[0, 0], camera_matrix[1, 1] |
|
|
cx, cy = camera_matrix[0, 2], camera_matrix[1, 2] |
|
|
|
|
|
self.camera_calibration = CameraCalibration( |
|
|
camera_matrix=camera_matrix, |
|
|
distortion_coeffs=distortion_coeffs, |
|
|
image_width=int(camera_matrix[0, 2] * 2), |
|
|
image_height=int(camera_matrix[1, 2] * 2), |
|
|
focal_length=(fx + fy) / 2, |
|
|
principal_point=(cx, cy) |
|
|
) |
|
|
|
|
|
logger.info("Camera calibration loaded successfully") |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"Failed to load camera calibration: {e}") |
|
|
raise |
|
|
|
|
|
def load_stereo_calibration(self, stereo_calibration_path: str): |
|
|
"""Load stereo camera calibration""" |
|
|
try: |
|
|
calibration_data = np.load(stereo_calibration_path) |
|
|
|
|
|
self.stereo_calibration = StereoCalibration( |
|
|
left_camera_matrix=calibration_data['left_camera_matrix'], |
|
|
right_camera_matrix=calibration_data['right_camera_matrix'], |
|
|
left_distortion=calibration_data['left_distortion'], |
|
|
right_distortion=calibration_data['right_distortion'], |
|
|
rotation_matrix=calibration_data['rotation_matrix'], |
|
|
translation_vector=calibration_data['translation_vector'], |
|
|
essential_matrix=calibration_data['essential_matrix'], |
|
|
fundamental_matrix=calibration_data['fundamental_matrix'], |
|
|
rectification_matrix_left=calibration_data['rectification_matrix_left'], |
|
|
rectification_matrix_right=calibration_data['rectification_matrix_right'], |
|
|
projection_matrix_left=calibration_data['projection_matrix_left'], |
|
|
projection_matrix_right=calibration_data['projection_matrix_right'], |
|
|
disparity_to_depth_map=calibration_data['disparity_to_depth_map'] |
|
|
) |
|
|
|
|
|
|
|
|
self._initialize_stereo_matcher() |
|
|
|
|
|
logger.info("Stereo calibration loaded successfully") |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"Failed to load stereo calibration: {e}") |
|
|
raise |
|
|
|
|
|
def _initialize_stereo_matcher(self): |
|
|
"""Initialize stereo matching for depth calculation""" |
|
|
if not self.stereo_calibration: |
|
|
return |
|
|
|
|
|
|
|
|
self.stereo_matcher = cv2.StereoSGBM_create( |
|
|
minDisparity=0, |
|
|
numDisparities=64, |
|
|
blockSize=9, |
|
|
P1=8 * 9 * 9, |
|
|
P2=32 * 9 * 9, |
|
|
disp12MaxDiff=1, |
|
|
uniquenessRatio=10, |
|
|
speckleWindowSize=100, |
|
|
speckleRange=32 |
|
|
) |
|
|
|
|
|
logger.info("Stereo matcher initialized") |
|
|
|
|
|
def pixel_to_world(self, |
|
|
pixel_x: int, |
|
|
pixel_y: int, |
|
|
image_shape: Tuple[int, int, int], |
|
|
depth: Optional[float] = None) -> Tuple[float, float, float]: |
|
|
""" |
|
|
Convert pixel coordinates to world coordinates |
|
|
|
|
|
Args: |
|
|
pixel_x, pixel_y: Pixel coordinates |
|
|
image_shape: Shape of the image (height, width, channels) |
|
|
depth: Depth in meters (if None, uses stereo or default depth) |
|
|
|
|
|
Returns: |
|
|
Tuple of (x, y, z) world coordinates in meters |
|
|
""" |
|
|
if not self.camera_calibration: |
|
|
raise ValueError("Camera calibration not loaded") |
|
|
|
|
|
|
|
|
if depth is None: |
|
|
depth = self._estimate_depth(pixel_x, pixel_y, image_shape) |
|
|
|
|
|
|
|
|
fx, fy = self.camera_calibration.camera_matrix[0, 0], self.camera_calibration.camera_matrix[1, 1] |
|
|
cx, cy = self.camera_calibration.principal_point |
|
|
|
|
|
|
|
|
x_cam = (pixel_x - cx) * depth / fx |
|
|
y_cam = (pixel_y - cy) * depth / fy |
|
|
z_cam = depth |
|
|
|
|
|
|
|
|
camera_point = np.array([x_cam, y_cam, z_cam, 1.0]) |
|
|
world_point = self.camera_to_robot_transform @ camera_point |
|
|
|
|
|
return (float(world_point[0]), float(world_point[1]), float(world_point[2])) |
|
|
|
|
|
def world_to_pixel(self, |
|
|
world_x: float, |
|
|
world_y: float, |
|
|
world_z: float) -> Tuple[int, int]: |
|
|
""" |
|
|
Convert world coordinates to pixel coordinates |
|
|
|
|
|
Returns: |
|
|
Tuple of (pixel_x, pixel_y) coordinates |
|
|
""" |
|
|
if not self.camera_calibration: |
|
|
raise ValueError("Camera calibration not loaded") |
|
|
|
|
|
|
|
|
world_point = np.array([world_x, world_y, world_z, 1.0]) |
|
|
camera_point = np.linalg.inv(self.camera_to_robot_transform) @ world_point |
|
|
|
|
|
x_cam, y_cam, z_cam = camera_point[:3] |
|
|
|
|
|
|
|
|
fx, fy = self.camera_calibration.camera_matrix[0, 0], self.camera_calibration.camera_matrix[1, 1] |
|
|
cx, cy = self.camera_calibration.principal_point |
|
|
|
|
|
pixel_x = int(x_cam * fx / z_cam + cx) |
|
|
pixel_y = int(y_cam * fy / z_cam + cy) |
|
|
|
|
|
return (pixel_x, pixel_y) |
|
|
|
|
|
def _estimate_depth(self, pixel_x: int, pixel_y: int, image_shape: Tuple[int, int, int]) -> float: |
|
|
"""Estimate depth using stereo vision or default depth""" |
|
|
|
|
|
|
|
|
if self.stereo_calibration and len(image_shape) == 3: |
|
|
|
|
|
pass |
|
|
|
|
|
|
|
|
|
|
|
image_height = image_shape[0] |
|
|
|
|
|
|
|
|
|
|
|
normalized_y = pixel_y / image_height |
|
|
estimated_depth = 0.2 + (0.3 * (1.0 - normalized_y)) |
|
|
|
|
|
return estimated_depth |
|
|
|
|
|
def calculate_depth_from_stereo(self, |
|
|
left_image: np.ndarray, |
|
|
right_image: np.ndarray, |
|
|
pixel_x: int, |
|
|
pixel_y: int) -> Optional[float]: |
|
|
"""Calculate depth from stereo images""" |
|
|
if not self.stereo_matcher or not self.stereo_calibration: |
|
|
return None |
|
|
|
|
|
try: |
|
|
|
|
|
disparity = self.stereo_matcher.compute(left_image, right_image) |
|
|
|
|
|
|
|
|
if 0 <= pixel_y < disparity.shape[0] and 0 <= pixel_x < disparity.shape[1]: |
|
|
disp_value = disparity[pixel_y, pixel_x] |
|
|
|
|
|
if disp_value > 0: |
|
|
|
|
|
depth = self.stereo_calibration.disparity_to_depth_map[disp_value] |
|
|
return float(depth) |
|
|
|
|
|
return None |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"Error calculating stereo depth: {e}") |
|
|
return None |
|
|
|
|
|
def undistort_point(self, pixel_x: int, pixel_y: int) -> Tuple[int, int]: |
|
|
"""Undistort pixel coordinates using camera calibration""" |
|
|
if not self.camera_calibration: |
|
|
return pixel_x, pixel_y |
|
|
|
|
|
try: |
|
|
|
|
|
points = np.array([[pixel_x, pixel_y]], dtype=np.float32) |
|
|
|
|
|
|
|
|
undistorted_points = cv2.undistortPoints( |
|
|
points, |
|
|
self.camera_calibration.camera_matrix, |
|
|
self.camera_calibration.distortion_coeffs |
|
|
) |
|
|
|
|
|
return int(undistorted_points[0][0][0]), int(undistorted_points[0][0][1]) |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"Error undistorting point: {e}") |
|
|
return pixel_x, pixel_y |
|
|
|
|
|
def set_camera_to_robot_transform(self, transform_matrix: np.ndarray): |
|
|
"""Set the transformation matrix from camera to robot coordinates""" |
|
|
if transform_matrix.shape != (4, 4): |
|
|
raise ValueError("Transform matrix must be 4x4") |
|
|
|
|
|
self.camera_to_robot_transform = transform_matrix |
|
|
logger.info("Camera to robot transform updated") |
|
|
|
|
|
def calibrate_camera_to_robot(self, |
|
|
world_points: List[Tuple[float, float, float]], |
|
|
pixel_points: List[Tuple[int, int]]) -> bool: |
|
|
""" |
|
|
Calibrate camera to robot transformation using known correspondences |
|
|
|
|
|
Args: |
|
|
world_points: List of (x, y, z) world coordinates |
|
|
pixel_points: List of (pixel_x, pixel_y) pixel coordinates |
|
|
|
|
|
Returns: |
|
|
True if calibration successful |
|
|
""" |
|
|
if len(world_points) != len(pixel_points) or len(world_points) < 4: |
|
|
logger.error("Need at least 4 corresponding points for calibration") |
|
|
return False |
|
|
|
|
|
try: |
|
|
|
|
|
world_points_3d = np.array(world_points, dtype=np.float32) |
|
|
pixel_points_2d = np.array(pixel_points, dtype=np.float32) |
|
|
|
|
|
|
|
|
success, rotation_vector, translation_vector, inliers = cv2.solvePnPRansac( |
|
|
world_points_3d, |
|
|
pixel_points_2d, |
|
|
self.camera_calibration.camera_matrix, |
|
|
self.camera_calibration.distortion_coeffs |
|
|
) |
|
|
|
|
|
if success: |
|
|
|
|
|
rotation_matrix, _ = cv2.Rodrigues(rotation_vector) |
|
|
|
|
|
|
|
|
transform_matrix = np.eye(4) |
|
|
transform_matrix[:3, :3] = rotation_matrix |
|
|
transform_matrix[:3, 3] = translation_vector.flatten() |
|
|
|
|
|
self.set_camera_to_robot_transform(transform_matrix) |
|
|
|
|
|
logger.info("Camera to robot calibration successful") |
|
|
return True |
|
|
else: |
|
|
logger.error("PnP calibration failed") |
|
|
return False |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"Calibration error: {e}") |
|
|
return False |
|
|
|
|
|
def get_workspace_bounds(self) -> Dict[str, Tuple[float, float]]: |
|
|
"""Get the bounds of the robot workspace in world coordinates""" |
|
|
|
|
|
return { |
|
|
'x_min': -0.5, 'x_max': 0.5, |
|
|
'y_min': -0.5, 'y_max': 0.5, |
|
|
'z_min': 0.0, 'z_max': 0.5 |
|
|
} |
|
|
|
|
|
def is_point_in_workspace(self, x: float, y: float, z: float) -> bool: |
|
|
"""Check if a point is within the robot workspace""" |
|
|
bounds = self.get_workspace_bounds() |
|
|
|
|
|
return (bounds['x_min'] <= x <= bounds['x_max'] and |
|
|
bounds['y_min'] <= y <= bounds['y_max'] and |
|
|
bounds['z_min'] <= z <= bounds['z_max']) |
|
|
|
|
|
def project_3d_to_2d(self, |
|
|
world_points: List[Tuple[float, float, float]], |
|
|
image_shape: Tuple[int, int]) -> List[Tuple[int, int]]: |
|
|
"""Project 3D world points to 2D image coordinates""" |
|
|
if not self.camera_calibration: |
|
|
raise ValueError("Camera calibration not loaded") |
|
|
|
|
|
projected_points = [] |
|
|
|
|
|
for world_point in world_points: |
|
|
pixel_x, pixel_y = self.world_to_pixel(*world_point) |
|
|
|
|
|
|
|
|
if (0 <= pixel_x < image_shape[1] and 0 <= pixel_y < image_shape[0]): |
|
|
projected_points.append((pixel_x, pixel_y)) |
|
|
else: |
|
|
projected_points.append((-1, -1)) |
|
|
|
|
|
return projected_points |
|
|
|
|
|
def save_calibration(self, filepath: str): |
|
|
"""Save current calibration to file""" |
|
|
calibration_data = { |
|
|
'camera_calibration': { |
|
|
'camera_matrix': self.camera_calibration.camera_matrix.tolist() if self.camera_calibration else None, |
|
|
'distortion_coeffs': self.camera_calibration.distortion_coeffs.tolist() if self.camera_calibration else None, |
|
|
'image_width': self.camera_calibration.image_width if self.camera_calibration else None, |
|
|
'image_height': self.camera_calibration.image_height if self.camera_calibration else None, |
|
|
'focal_length': self.camera_calibration.focal_length if self.camera_calibration else None, |
|
|
'principal_point': self.camera_calibration.principal_point if self.camera_calibration else None |
|
|
}, |
|
|
'stereo_calibration': { |
|
|
'left_camera_matrix': self.stereo_calibration.left_camera_matrix.tolist() if self.stereo_calibration else None, |
|
|
'right_camera_matrix': self.stereo_calibration.right_camera_matrix.tolist() if self.stereo_calibration else None, |
|
|
'rotation_matrix': self.stereo_calibration.rotation_matrix.tolist() if self.stereo_calibration else None, |
|
|
'translation_vector': self.stereo_calibration.translation_vector.tolist() if self.stereo_calibration else None |
|
|
}, |
|
|
'camera_to_robot_transform': self.camera_to_robot_transform.tolist(), |
|
|
'robot_origin': self.robot_origin |
|
|
} |
|
|
|
|
|
with open(filepath, 'w') as f: |
|
|
json.dump(calibration_data, f, indent=2) |
|
|
|
|
|
logger.info(f"Calibration saved to {filepath}") |
|
|
|
|
|
def load_calibration(self, filepath: str): |
|
|
"""Load calibration from file""" |
|
|
try: |
|
|
with open(filepath, 'r') as f: |
|
|
calibration_data = json.load(f) |
|
|
|
|
|
|
|
|
if calibration_data['camera_calibration']['camera_matrix']: |
|
|
cam_data = calibration_data['camera_calibration'] |
|
|
self.camera_calibration = CameraCalibration( |
|
|
camera_matrix=np.array(cam_data['camera_matrix']), |
|
|
distortion_coeffs=np.array(cam_data['distortion_coeffs']), |
|
|
image_width=cam_data['image_width'], |
|
|
image_height=cam_data['image_height'], |
|
|
focal_length=cam_data['focal_length'], |
|
|
principal_point=tuple(cam_data['principal_point']) |
|
|
) |
|
|
|
|
|
|
|
|
if calibration_data['stereo_calibration']['left_camera_matrix']: |
|
|
stereo_data = calibration_data['stereo_calibration'] |
|
|
|
|
|
logger.warning("Stereo calibration loading not fully implemented") |
|
|
|
|
|
|
|
|
self.camera_to_robot_transform = np.array(calibration_data['camera_to_robot_transform']) |
|
|
self.robot_origin = tuple(calibration_data['robot_origin']) |
|
|
|
|
|
logger.info(f"Calibration loaded from {filepath}") |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"Failed to load calibration: {e}") |
|
|
raise |
|
|
|
|
|
def main(): |
|
|
"""Test coordinate transformer functionality""" |
|
|
import argparse |
|
|
|
|
|
parser = argparse.ArgumentParser(description='Test Coordinate Transformer') |
|
|
parser.add_argument('--camera-matrix', help='Camera matrix file path') |
|
|
parser.add_argument('--distortion', help='Distortion coefficients file path') |
|
|
parser.add_argument('--stereo', help='Stereo calibration file path') |
|
|
parser.add_argument('--test-pixel', nargs=2, type=int, metavar=('X', 'Y'), |
|
|
help='Test pixel coordinates') |
|
|
|
|
|
args = parser.parse_args() |
|
|
|
|
|
try: |
|
|
|
|
|
transformer = CoordinateTransformer( |
|
|
args.camera_matrix, |
|
|
args.distortion, |
|
|
args.stereo |
|
|
) |
|
|
|
|
|
print("Coordinate Transformer initialized") |
|
|
|
|
|
if args.test_pixel: |
|
|
pixel_x, pixel_y = args.test_pixel |
|
|
world_coords = transformer.pixel_to_world(pixel_x, pixel_y, (480, 640, 3)) |
|
|
print(f"Pixel ({pixel_x}, {pixel_y}) -> World {world_coords}") |
|
|
|
|
|
|
|
|
pixel_x_back, pixel_y_back = transformer.world_to_pixel(*world_coords) |
|
|
print(f"World {world_coords} -> Pixel ({pixel_x_back}, {pixel_y_back})") |
|
|
|
|
|
|
|
|
bounds = transformer.get_workspace_bounds() |
|
|
print(f"Workspace bounds: {bounds}") |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Error: {e}") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |