#!/usr/bin/env python3 """ Coordinate Transformer - Pixel to Robot Coordinate System Handles camera calibration, stereo vision, and coordinate transformations Author: AI Assistant Date: 2025-12-15 """ import numpy as np import cv2 import logging from pathlib import Path from typing import Tuple, Optional, Dict, List from dataclasses import dataclass import json logger = logging.getLogger(__name__) @dataclass class CameraCalibration: """Camera calibration parameters""" camera_matrix: np.ndarray distortion_coeffs: np.ndarray image_width: int image_height: int focal_length: float principal_point: Tuple[float, float] @dataclass class StereoCalibration: """Stereo camera calibration parameters""" left_camera_matrix: np.ndarray right_camera_matrix: np.ndarray left_distortion: np.ndarray right_distortion: np.ndarray rotation_matrix: np.ndarray translation_vector: np.ndarray essential_matrix: np.ndarray fundamental_matrix: np.ndarray rectification_matrix_left: np.ndarray rectification_matrix_right: np.ndarray projection_matrix_left: np.ndarray projection_matrix_right: np.ndarray disparity_to_depth_map: np.ndarray class CoordinateTransformer: """ Handles coordinate transformations between pixel coordinates and robot world coordinates Supports both monocular and stereo camera systems """ def __init__(self, camera_matrix_path: Optional[str] = None, distortion_coeffs_path: Optional[str] = None, stereo_calibration_path: Optional[str] = None): """Initialize coordinate transformer""" self.camera_calibration: Optional[CameraCalibration] = None self.stereo_calibration: Optional[StereoCalibration] = None self.stereo_matcher: Optional[cv2.StereoSGBM] = None # Robot coordinate system parameters self.robot_origin = (0.0, 0.0, 0.0) # Robot base position self.camera_to_robot_transform = np.eye(4) # 4x4 transformation matrix # Load calibrations if provided if camera_matrix_path and distortion_coeffs_path: self.load_camera_calibration(camera_matrix_path, distortion_coeffs_path) if stereo_calibration_path: self.load_stereo_calibration(stereo_calibration_path) logger.info("Coordinate Transformer initialized") def load_camera_calibration(self, camera_matrix_path: str, distortion_coeffs_path: str): """Load single camera calibration""" try: camera_matrix = np.load(camera_matrix_path) distortion_coeffs = np.load(distortion_coeffs_path) # Extract calibration parameters fx, fy = camera_matrix[0, 0], camera_matrix[1, 1] cx, cy = camera_matrix[0, 2], camera_matrix[1, 2] self.camera_calibration = CameraCalibration( camera_matrix=camera_matrix, distortion_coeffs=distortion_coeffs, image_width=int(camera_matrix[0, 2] * 2), # Approximate image_height=int(camera_matrix[1, 2] * 2), # Approximate focal_length=(fx + fy) / 2, principal_point=(cx, cy) ) logger.info("Camera calibration loaded successfully") except Exception as e: logger.error(f"Failed to load camera calibration: {e}") raise def load_stereo_calibration(self, stereo_calibration_path: str): """Load stereo camera calibration""" try: calibration_data = np.load(stereo_calibration_path) self.stereo_calibration = StereoCalibration( left_camera_matrix=calibration_data['left_camera_matrix'], right_camera_matrix=calibration_data['right_camera_matrix'], left_distortion=calibration_data['left_distortion'], right_distortion=calibration_data['right_distortion'], rotation_matrix=calibration_data['rotation_matrix'], translation_vector=calibration_data['translation_vector'], essential_matrix=calibration_data['essential_matrix'], fundamental_matrix=calibration_data['fundamental_matrix'], rectification_matrix_left=calibration_data['rectification_matrix_left'], rectification_matrix_right=calibration_data['rectification_matrix_right'], projection_matrix_left=calibration_data['projection_matrix_left'], projection_matrix_right=calibration_data['projection_matrix_right'], disparity_to_depth_map=calibration_data['disparity_to_depth_map'] ) # Initialize stereo matcher for real-time depth calculation self._initialize_stereo_matcher() logger.info("Stereo calibration loaded successfully") except Exception as e: logger.error(f"Failed to load stereo calibration: {e}") raise def _initialize_stereo_matcher(self): """Initialize stereo matching for depth calculation""" if not self.stereo_calibration: return # Create stereo block matcher self.stereo_matcher = cv2.StereoSGBM_create( minDisparity=0, numDisparities=64, blockSize=9, P1=8 * 9 * 9, P2=32 * 9 * 9, disp12MaxDiff=1, uniquenessRatio=10, speckleWindowSize=100, speckleRange=32 ) logger.info("Stereo matcher initialized") def pixel_to_world(self, pixel_x: int, pixel_y: int, image_shape: Tuple[int, int, int], depth: Optional[float] = None) -> Tuple[float, float, float]: """ Convert pixel coordinates to world coordinates Args: pixel_x, pixel_y: Pixel coordinates image_shape: Shape of the image (height, width, channels) depth: Depth in meters (if None, uses stereo or default depth) Returns: Tuple of (x, y, z) world coordinates in meters """ if not self.camera_calibration: raise ValueError("Camera calibration not loaded") # Get depth if not provided if depth is None: depth = self._estimate_depth(pixel_x, pixel_y, image_shape) # Convert pixel to normalized coordinates fx, fy = self.camera_calibration.camera_matrix[0, 0], self.camera_calibration.camera_matrix[1, 1] cx, cy = self.camera_calibration.principal_point # Convert to camera coordinates x_cam = (pixel_x - cx) * depth / fx y_cam = (pixel_y - cy) * depth / fy z_cam = depth # Transform to robot world coordinates camera_point = np.array([x_cam, y_cam, z_cam, 1.0]) world_point = self.camera_to_robot_transform @ camera_point return (float(world_point[0]), float(world_point[1]), float(world_point[2])) def world_to_pixel(self, world_x: float, world_y: float, world_z: float) -> Tuple[int, int]: """ Convert world coordinates to pixel coordinates Returns: Tuple of (pixel_x, pixel_y) coordinates """ if not self.camera_calibration: raise ValueError("Camera calibration not loaded") # Transform world point to camera coordinates world_point = np.array([world_x, world_y, world_z, 1.0]) camera_point = np.linalg.inv(self.camera_to_robot_transform) @ world_point x_cam, y_cam, z_cam = camera_point[:3] # Convert to pixel coordinates fx, fy = self.camera_calibration.camera_matrix[0, 0], self.camera_calibration.camera_matrix[1, 1] cx, cy = self.camera_calibration.principal_point pixel_x = int(x_cam * fx / z_cam + cx) pixel_y = int(y_cam * fy / z_cam + cy) return (pixel_x, pixel_y) def _estimate_depth(self, pixel_x: int, pixel_y: int, image_shape: Tuple[int, int, int]) -> float: """Estimate depth using stereo vision or default depth""" # If stereo calibration is available, try to get depth from disparity if self.stereo_calibration and len(image_shape) == 3: # This would require stereo images - simplified for now pass # Default depth estimation based on image position # Assume strawberries are typically 20-50cm from camera image_height = image_shape[0] # Simple depth estimation based on vertical position # Lower in image = closer to camera normalized_y = pixel_y / image_height estimated_depth = 0.2 + (0.3 * (1.0 - normalized_y)) # 20-50cm range return estimated_depth def calculate_depth_from_stereo(self, left_image: np.ndarray, right_image: np.ndarray, pixel_x: int, pixel_y: int) -> Optional[float]: """Calculate depth from stereo images""" if not self.stereo_matcher or not self.stereo_calibration: return None try: # Compute disparity map disparity = self.stereo_matcher.compute(left_image, right_image) # Get disparity at specific pixel if 0 <= pixel_y < disparity.shape[0] and 0 <= pixel_x < disparity.shape[1]: disp_value = disparity[pixel_y, pixel_x] if disp_value > 0: # Convert disparity to depth depth = self.stereo_calibration.disparity_to_depth_map[disp_value] return float(depth) return None except Exception as e: logger.error(f"Error calculating stereo depth: {e}") return None def undistort_point(self, pixel_x: int, pixel_y: int) -> Tuple[int, int]: """Undistort pixel coordinates using camera calibration""" if not self.camera_calibration: return pixel_x, pixel_y try: # Create point array points = np.array([[pixel_x, pixel_y]], dtype=np.float32) # Undistort points undistorted_points = cv2.undistortPoints( points, self.camera_calibration.camera_matrix, self.camera_calibration.distortion_coeffs ) return int(undistorted_points[0][0][0]), int(undistorted_points[0][0][1]) except Exception as e: logger.error(f"Error undistorting point: {e}") return pixel_x, pixel_y def set_camera_to_robot_transform(self, transform_matrix: np.ndarray): """Set the transformation matrix from camera to robot coordinates""" if transform_matrix.shape != (4, 4): raise ValueError("Transform matrix must be 4x4") self.camera_to_robot_transform = transform_matrix logger.info("Camera to robot transform updated") def calibrate_camera_to_robot(self, world_points: List[Tuple[float, float, float]], pixel_points: List[Tuple[int, int]]) -> bool: """ Calibrate camera to robot transformation using known correspondences Args: world_points: List of (x, y, z) world coordinates pixel_points: List of (pixel_x, pixel_y) pixel coordinates Returns: True if calibration successful """ if len(world_points) != len(pixel_points) or len(world_points) < 4: logger.error("Need at least 4 corresponding points for calibration") return False try: # Prepare point correspondences world_points_3d = np.array(world_points, dtype=np.float32) pixel_points_2d = np.array(pixel_points, dtype=np.float32) # Solve PnP problem success, rotation_vector, translation_vector, inliers = cv2.solvePnPRansac( world_points_3d, pixel_points_2d, self.camera_calibration.camera_matrix, self.camera_calibration.distortion_coeffs ) if success: # Convert rotation vector to rotation matrix rotation_matrix, _ = cv2.Rodrigues(rotation_vector) # Create 4x4 transformation matrix transform_matrix = np.eye(4) transform_matrix[:3, :3] = rotation_matrix transform_matrix[:3, 3] = translation_vector.flatten() self.set_camera_to_robot_transform(transform_matrix) logger.info("Camera to robot calibration successful") return True else: logger.error("PnP calibration failed") return False except Exception as e: logger.error(f"Calibration error: {e}") return False def get_workspace_bounds(self) -> Dict[str, Tuple[float, float]]: """Get the bounds of the robot workspace in world coordinates""" # This should be calibrated for your specific robot setup return { 'x_min': -0.5, 'x_max': 0.5, 'y_min': -0.5, 'y_max': 0.5, 'z_min': 0.0, 'z_max': 0.5 } def is_point_in_workspace(self, x: float, y: float, z: float) -> bool: """Check if a point is within the robot workspace""" bounds = self.get_workspace_bounds() return (bounds['x_min'] <= x <= bounds['x_max'] and bounds['y_min'] <= y <= bounds['y_max'] and bounds['z_min'] <= z <= bounds['z_max']) def project_3d_to_2d(self, world_points: List[Tuple[float, float, float]], image_shape: Tuple[int, int]) -> List[Tuple[int, int]]: """Project 3D world points to 2D image coordinates""" if not self.camera_calibration: raise ValueError("Camera calibration not loaded") projected_points = [] for world_point in world_points: pixel_x, pixel_y = self.world_to_pixel(*world_point) # Check if point is within image bounds if (0 <= pixel_x < image_shape[1] and 0 <= pixel_y < image_shape[0]): projected_points.append((pixel_x, pixel_y)) else: projected_points.append((-1, -1)) # Outside image return projected_points def save_calibration(self, filepath: str): """Save current calibration to file""" calibration_data = { 'camera_calibration': { 'camera_matrix': self.camera_calibration.camera_matrix.tolist() if self.camera_calibration else None, 'distortion_coeffs': self.camera_calibration.distortion_coeffs.tolist() if self.camera_calibration else None, 'image_width': self.camera_calibration.image_width if self.camera_calibration else None, 'image_height': self.camera_calibration.image_height if self.camera_calibration else None, 'focal_length': self.camera_calibration.focal_length if self.camera_calibration else None, 'principal_point': self.camera_calibration.principal_point if self.camera_calibration else None }, 'stereo_calibration': { 'left_camera_matrix': self.stereo_calibration.left_camera_matrix.tolist() if self.stereo_calibration else None, 'right_camera_matrix': self.stereo_calibration.right_camera_matrix.tolist() if self.stereo_calibration else None, 'rotation_matrix': self.stereo_calibration.rotation_matrix.tolist() if self.stereo_calibration else None, 'translation_vector': self.stereo_calibration.translation_vector.tolist() if self.stereo_calibration else None }, 'camera_to_robot_transform': self.camera_to_robot_transform.tolist(), 'robot_origin': self.robot_origin } with open(filepath, 'w') as f: json.dump(calibration_data, f, indent=2) logger.info(f"Calibration saved to {filepath}") def load_calibration(self, filepath: str): """Load calibration from file""" try: with open(filepath, 'r') as f: calibration_data = json.load(f) # Load camera calibration if calibration_data['camera_calibration']['camera_matrix']: cam_data = calibration_data['camera_calibration'] self.camera_calibration = CameraCalibration( camera_matrix=np.array(cam_data['camera_matrix']), distortion_coeffs=np.array(cam_data['distortion_coeffs']), image_width=cam_data['image_width'], image_height=cam_data['image_height'], focal_length=cam_data['focal_length'], principal_point=tuple(cam_data['principal_point']) ) # Load stereo calibration if calibration_data['stereo_calibration']['left_camera_matrix']: stereo_data = calibration_data['stereo_calibration'] # Note: This is simplified - you'd need to load all stereo parameters logger.warning("Stereo calibration loading not fully implemented") # Load transformation matrix self.camera_to_robot_transform = np.array(calibration_data['camera_to_robot_transform']) self.robot_origin = tuple(calibration_data['robot_origin']) logger.info(f"Calibration loaded from {filepath}") except Exception as e: logger.error(f"Failed to load calibration: {e}") raise def main(): """Test coordinate transformer functionality""" import argparse parser = argparse.ArgumentParser(description='Test Coordinate Transformer') parser.add_argument('--camera-matrix', help='Camera matrix file path') parser.add_argument('--distortion', help='Distortion coefficients file path') parser.add_argument('--stereo', help='Stereo calibration file path') parser.add_argument('--test-pixel', nargs=2, type=int, metavar=('X', 'Y'), help='Test pixel coordinates') args = parser.parse_args() try: # Create transformer transformer = CoordinateTransformer( args.camera_matrix, args.distortion, args.stereo ) print("Coordinate Transformer initialized") if args.test_pixel: pixel_x, pixel_y = args.test_pixel world_coords = transformer.pixel_to_world(pixel_x, pixel_y, (480, 640, 3)) print(f"Pixel ({pixel_x}, {pixel_y}) -> World {world_coords}") # Convert back pixel_x_back, pixel_y_back = transformer.world_to_pixel(*world_coords) print(f"World {world_coords} -> Pixel ({pixel_x_back}, {pixel_y_back})") # Print workspace bounds bounds = transformer.get_workspace_bounds() print(f"Workspace bounds: {bounds}") except Exception as e: print(f"Error: {e}") if __name__ == "__main__": main()