File size: 20,128 Bytes

efb1801

#!/usr/bin/env python3
"""
Coordinate Transformer - Pixel to Robot Coordinate System
Handles camera calibration, stereo vision, and coordinate transformations

Author: AI Assistant
Date: 2025-12-15
"""

import numpy as np
import cv2
import logging
from pathlib import Path
from typing import Tuple, Optional, Dict, List
from dataclasses import dataclass
import json

logger = logging.getLogger(__name__)

@dataclass
class CameraCalibration:
    """Camera calibration parameters"""
    camera_matrix: np.ndarray
    distortion_coeffs: np.ndarray
    image_width: int
    image_height: int
    focal_length: float
    principal_point: Tuple[float, float]

@dataclass
class StereoCalibration:
    """Stereo camera calibration parameters"""
    left_camera_matrix: np.ndarray
    right_camera_matrix: np.ndarray
    left_distortion: np.ndarray
    right_distortion: np.ndarray
    rotation_matrix: np.ndarray
    translation_vector: np.ndarray
    essential_matrix: np.ndarray
    fundamental_matrix: np.ndarray
    rectification_matrix_left: np.ndarray
    rectification_matrix_right: np.ndarray
    projection_matrix_left: np.ndarray
    projection_matrix_right: np.ndarray
    disparity_to_depth_map: np.ndarray

class CoordinateTransformer:
    """
    Handles coordinate transformations between pixel coordinates and robot world coordinates
    Supports both monocular and stereo camera systems
    """
    
    def __init__(self, 
                 camera_matrix_path: Optional[str] = None,
                 distortion_coeffs_path: Optional[str] = None,
                 stereo_calibration_path: Optional[str] = None):
        """Initialize coordinate transformer"""
        
        self.camera_calibration: Optional[CameraCalibration] = None
        self.stereo_calibration: Optional[StereoCalibration] = None
        self.stereo_matcher: Optional[cv2.StereoSGBM] = None
        
        # Robot coordinate system parameters
        self.robot_origin = (0.0, 0.0, 0.0)  # Robot base position
        self.camera_to_robot_transform = np.eye(4)  # 4x4 transformation matrix
        
        # Load calibrations if provided
        if camera_matrix_path and distortion_coeffs_path:
            self.load_camera_calibration(camera_matrix_path, distortion_coeffs_path)
        
        if stereo_calibration_path:
            self.load_stereo_calibration(stereo_calibration_path)
        
        logger.info("Coordinate Transformer initialized")
    
    def load_camera_calibration(self, camera_matrix_path: str, distortion_coeffs_path: str):
        """Load single camera calibration"""
        try:
            camera_matrix = np.load(camera_matrix_path)
            distortion_coeffs = np.load(distortion_coeffs_path)
            
            # Extract calibration parameters
            fx, fy = camera_matrix[0, 0], camera_matrix[1, 1]
            cx, cy = camera_matrix[0, 2], camera_matrix[1, 2]
            
            self.camera_calibration = CameraCalibration(
                camera_matrix=camera_matrix,
                distortion_coeffs=distortion_coeffs,
                image_width=int(camera_matrix[0, 2] * 2),  # Approximate
                image_height=int(camera_matrix[1, 2] * 2),  # Approximate
                focal_length=(fx + fy) / 2,
                principal_point=(cx, cy)
            )
            
            logger.info("Camera calibration loaded successfully")
            
        except Exception as e:
            logger.error(f"Failed to load camera calibration: {e}")
            raise
    
    def load_stereo_calibration(self, stereo_calibration_path: str):
        """Load stereo camera calibration"""
        try:
            calibration_data = np.load(stereo_calibration_path)
            
            self.stereo_calibration = StereoCalibration(
                left_camera_matrix=calibration_data['left_camera_matrix'],
                right_camera_matrix=calibration_data['right_camera_matrix'],
                left_distortion=calibration_data['left_distortion'],
                right_distortion=calibration_data['right_distortion'],
                rotation_matrix=calibration_data['rotation_matrix'],
                translation_vector=calibration_data['translation_vector'],
                essential_matrix=calibration_data['essential_matrix'],
                fundamental_matrix=calibration_data['fundamental_matrix'],
                rectification_matrix_left=calibration_data['rectification_matrix_left'],
                rectification_matrix_right=calibration_data['rectification_matrix_right'],
                projection_matrix_left=calibration_data['projection_matrix_left'],
                projection_matrix_right=calibration_data['projection_matrix_right'],
                disparity_to_depth_map=calibration_data['disparity_to_depth_map']
            )
            
            # Initialize stereo matcher for real-time depth calculation
            self._initialize_stereo_matcher()
            
            logger.info("Stereo calibration loaded successfully")
            
        except Exception as e:
            logger.error(f"Failed to load stereo calibration: {e}")
            raise
    
    def _initialize_stereo_matcher(self):
        """Initialize stereo matching for depth calculation"""
        if not self.stereo_calibration:
            return
        
        # Create stereo block matcher
        self.stereo_matcher = cv2.StereoSGBM_create(
            minDisparity=0,
            numDisparities=64,
            blockSize=9,
            P1=8 * 9 * 9,
            P2=32 * 9 * 9,
            disp12MaxDiff=1,
            uniquenessRatio=10,
            speckleWindowSize=100,
            speckleRange=32
        )
        
        logger.info("Stereo matcher initialized")
    
    def pixel_to_world(self, 
                      pixel_x: int, 
                      pixel_y: int, 
                      image_shape: Tuple[int, int, int],
                      depth: Optional[float] = None) -> Tuple[float, float, float]:
        """
        Convert pixel coordinates to world coordinates
        
        Args:
            pixel_x, pixel_y: Pixel coordinates
            image_shape: Shape of the image (height, width, channels)
            depth: Depth in meters (if None, uses stereo or default depth)
        
        Returns:
            Tuple of (x, y, z) world coordinates in meters
        """
        if not self.camera_calibration:
            raise ValueError("Camera calibration not loaded")
        
        # Get depth if not provided
        if depth is None:
            depth = self._estimate_depth(pixel_x, pixel_y, image_shape)
        
        # Convert pixel to normalized coordinates
        fx, fy = self.camera_calibration.camera_matrix[0, 0], self.camera_calibration.camera_matrix[1, 1]
        cx, cy = self.camera_calibration.principal_point
        
        # Convert to camera coordinates
        x_cam = (pixel_x - cx) * depth / fx
        y_cam = (pixel_y - cy) * depth / fy
        z_cam = depth
        
        # Transform to robot world coordinates
        camera_point = np.array([x_cam, y_cam, z_cam, 1.0])
        world_point = self.camera_to_robot_transform @ camera_point
        
        return (float(world_point[0]), float(world_point[1]), float(world_point[2]))
    
    def world_to_pixel(self, 
                      world_x: float, 
                      world_y: float, 
                      world_z: float) -> Tuple[int, int]:
        """
        Convert world coordinates to pixel coordinates
        
        Returns:
            Tuple of (pixel_x, pixel_y) coordinates
        """
        if not self.camera_calibration:
            raise ValueError("Camera calibration not loaded")
        
        # Transform world point to camera coordinates
        world_point = np.array([world_x, world_y, world_z, 1.0])
        camera_point = np.linalg.inv(self.camera_to_robot_transform) @ world_point
        
        x_cam, y_cam, z_cam = camera_point[:3]
        
        # Convert to pixel coordinates
        fx, fy = self.camera_calibration.camera_matrix[0, 0], self.camera_calibration.camera_matrix[1, 1]
        cx, cy = self.camera_calibration.principal_point
        
        pixel_x = int(x_cam * fx / z_cam + cx)
        pixel_y = int(y_cam * fy / z_cam + cy)
        
        return (pixel_x, pixel_y)
    
    def _estimate_depth(self, pixel_x: int, pixel_y: int, image_shape: Tuple[int, int, int]) -> float:
        """Estimate depth using stereo vision or default depth"""
        
        # If stereo calibration is available, try to get depth from disparity
        if self.stereo_calibration and len(image_shape) == 3:
            # This would require stereo images - simplified for now
            pass
        
        # Default depth estimation based on image position
        # Assume strawberries are typically 20-50cm from camera
        image_height = image_shape[0]
        
        # Simple depth estimation based on vertical position
        # Lower in image = closer to camera
        normalized_y = pixel_y / image_height
        estimated_depth = 0.2 + (0.3 * (1.0 - normalized_y))  # 20-50cm range
        
        return estimated_depth
    
    def calculate_depth_from_stereo(self, 
                                   left_image: np.ndarray, 
                                   right_image: np.ndarray,
                                   pixel_x: int, 
                                   pixel_y: int) -> Optional[float]:
        """Calculate depth from stereo images"""
        if not self.stereo_matcher or not self.stereo_calibration:
            return None
        
        try:
            # Compute disparity map
            disparity = self.stereo_matcher.compute(left_image, right_image)
            
            # Get disparity at specific pixel
            if 0 <= pixel_y < disparity.shape[0] and 0 <= pixel_x < disparity.shape[1]:
                disp_value = disparity[pixel_y, pixel_x]
                
                if disp_value > 0:
                    # Convert disparity to depth
                    depth = self.stereo_calibration.disparity_to_depth_map[disp_value]
                    return float(depth)
            
            return None
            
        except Exception as e:
            logger.error(f"Error calculating stereo depth: {e}")
            return None
    
    def undistort_point(self, pixel_x: int, pixel_y: int) -> Tuple[int, int]:
        """Undistort pixel coordinates using camera calibration"""
        if not self.camera_calibration:
            return pixel_x, pixel_y
        
        try:
            # Create point array
            points = np.array([[pixel_x, pixel_y]], dtype=np.float32)
            
            # Undistort points
            undistorted_points = cv2.undistortPoints(
                points,
                self.camera_calibration.camera_matrix,
                self.camera_calibration.distortion_coeffs
            )
            
            return int(undistorted_points[0][0][0]), int(undistorted_points[0][0][1])
            
        except Exception as e:
            logger.error(f"Error undistorting point: {e}")
            return pixel_x, pixel_y
    
    def set_camera_to_robot_transform(self, transform_matrix: np.ndarray):
        """Set the transformation matrix from camera to robot coordinates"""
        if transform_matrix.shape != (4, 4):
            raise ValueError("Transform matrix must be 4x4")
        
        self.camera_to_robot_transform = transform_matrix
        logger.info("Camera to robot transform updated")
    
    def calibrate_camera_to_robot(self, 
                                 world_points: List[Tuple[float, float, float]],
                                 pixel_points: List[Tuple[int, int]]) -> bool:
        """
        Calibrate camera to robot transformation using known correspondences
        
        Args:
            world_points: List of (x, y, z) world coordinates
            pixel_points: List of (pixel_x, pixel_y) pixel coordinates
        
        Returns:
            True if calibration successful
        """
        if len(world_points) != len(pixel_points) or len(world_points) < 4:
            logger.error("Need at least 4 corresponding points for calibration")
            return False
        
        try:
            # Prepare point correspondences
            world_points_3d = np.array(world_points, dtype=np.float32)
            pixel_points_2d = np.array(pixel_points, dtype=np.float32)
            
            # Solve PnP problem
            success, rotation_vector, translation_vector, inliers = cv2.solvePnPRansac(
                world_points_3d,
                pixel_points_2d,
                self.camera_calibration.camera_matrix,
                self.camera_calibration.distortion_coeffs
            )
            
            if success:
                # Convert rotation vector to rotation matrix
                rotation_matrix, _ = cv2.Rodrigues(rotation_vector)
                
                # Create 4x4 transformation matrix
                transform_matrix = np.eye(4)
                transform_matrix[:3, :3] = rotation_matrix
                transform_matrix[:3, 3] = translation_vector.flatten()
                
                self.set_camera_to_robot_transform(transform_matrix)
                
                logger.info("Camera to robot calibration successful")
                return True
            else:
                logger.error("PnP calibration failed")
                return False
                
        except Exception as e:
            logger.error(f"Calibration error: {e}")
            return False
    
    def get_workspace_bounds(self) -> Dict[str, Tuple[float, float]]:
        """Get the bounds of the robot workspace in world coordinates"""
        # This should be calibrated for your specific robot setup
        return {
            'x_min': -0.5, 'x_max': 0.5,
            'y_min': -0.5, 'y_max': 0.5,
            'z_min': 0.0, 'z_max': 0.5
        }
    
    def is_point_in_workspace(self, x: float, y: float, z: float) -> bool:
        """Check if a point is within the robot workspace"""
        bounds = self.get_workspace_bounds()
        
        return (bounds['x_min'] <= x <= bounds['x_max'] and
                bounds['y_min'] <= y <= bounds['y_max'] and
                bounds['z_min'] <= z <= bounds['z_max'])
    
    def project_3d_to_2d(self, 
                        world_points: List[Tuple[float, float, float]],
                        image_shape: Tuple[int, int]) -> List[Tuple[int, int]]:
        """Project 3D world points to 2D image coordinates"""
        if not self.camera_calibration:
            raise ValueError("Camera calibration not loaded")
        
        projected_points = []
        
        for world_point in world_points:
            pixel_x, pixel_y = self.world_to_pixel(*world_point)
            
            # Check if point is within image bounds
            if (0 <= pixel_x < image_shape[1] and 0 <= pixel_y < image_shape[0]):
                projected_points.append((pixel_x, pixel_y))
            else:
                projected_points.append((-1, -1))  # Outside image
        
        return projected_points
    
    def save_calibration(self, filepath: str):
        """Save current calibration to file"""
        calibration_data = {
            'camera_calibration': {
                'camera_matrix': self.camera_calibration.camera_matrix.tolist() if self.camera_calibration else None,
                'distortion_coeffs': self.camera_calibration.distortion_coeffs.tolist() if self.camera_calibration else None,
                'image_width': self.camera_calibration.image_width if self.camera_calibration else None,
                'image_height': self.camera_calibration.image_height if self.camera_calibration else None,
                'focal_length': self.camera_calibration.focal_length if self.camera_calibration else None,
                'principal_point': self.camera_calibration.principal_point if self.camera_calibration else None
            },
            'stereo_calibration': {
                'left_camera_matrix': self.stereo_calibration.left_camera_matrix.tolist() if self.stereo_calibration else None,
                'right_camera_matrix': self.stereo_calibration.right_camera_matrix.tolist() if self.stereo_calibration else None,
                'rotation_matrix': self.stereo_calibration.rotation_matrix.tolist() if self.stereo_calibration else None,
                'translation_vector': self.stereo_calibration.translation_vector.tolist() if self.stereo_calibration else None
            },
            'camera_to_robot_transform': self.camera_to_robot_transform.tolist(),
            'robot_origin': self.robot_origin
        }
        
        with open(filepath, 'w') as f:
            json.dump(calibration_data, f, indent=2)
        
        logger.info(f"Calibration saved to {filepath}")
    
    def load_calibration(self, filepath: str):
        """Load calibration from file"""
        try:
            with open(filepath, 'r') as f:
                calibration_data = json.load(f)
            
            # Load camera calibration
            if calibration_data['camera_calibration']['camera_matrix']:
                cam_data = calibration_data['camera_calibration']
                self.camera_calibration = CameraCalibration(
                    camera_matrix=np.array(cam_data['camera_matrix']),
                    distortion_coeffs=np.array(cam_data['distortion_coeffs']),
                    image_width=cam_data['image_width'],
                    image_height=cam_data['image_height'],
                    focal_length=cam_data['focal_length'],
                    principal_point=tuple(cam_data['principal_point'])
                )
            
            # Load stereo calibration
            if calibration_data['stereo_calibration']['left_camera_matrix']:
                stereo_data = calibration_data['stereo_calibration']
                # Note: This is simplified - you'd need to load all stereo parameters
                logger.warning("Stereo calibration loading not fully implemented")
            
            # Load transformation matrix
            self.camera_to_robot_transform = np.array(calibration_data['camera_to_robot_transform'])
            self.robot_origin = tuple(calibration_data['robot_origin'])
            
            logger.info(f"Calibration loaded from {filepath}")
            
        except Exception as e:
            logger.error(f"Failed to load calibration: {e}")
            raise

def main():
    """Test coordinate transformer functionality"""
    import argparse
    
    parser = argparse.ArgumentParser(description='Test Coordinate Transformer')
    parser.add_argument('--camera-matrix', help='Camera matrix file path')
    parser.add_argument('--distortion', help='Distortion coefficients file path')
    parser.add_argument('--stereo', help='Stereo calibration file path')
    parser.add_argument('--test-pixel', nargs=2, type=int, metavar=('X', 'Y'), 
                       help='Test pixel coordinates')
    
    args = parser.parse_args()
    
    try:
        # Create transformer
        transformer = CoordinateTransformer(
            args.camera_matrix,
            args.distortion,
            args.stereo
        )
        
        print("Coordinate Transformer initialized")
        
        if args.test_pixel:
            pixel_x, pixel_y = args.test_pixel
            world_coords = transformer.pixel_to_world(pixel_x, pixel_y, (480, 640, 3))
            print(f"Pixel ({pixel_x}, {pixel_y}) -> World {world_coords}")
            
            # Convert back
            pixel_x_back, pixel_y_back = transformer.world_to_pixel(*world_coords)
            print(f"World {world_coords} -> Pixel ({pixel_x_back}, {pixel_y_back})")
        
        # Print workspace bounds
        bounds = transformer.get_workspace_bounds()
        print(f"Workspace bounds: {bounds}")
        
    except Exception as e:
        print(f"Error: {e}")

if __name__ == "__main__":
    main()