strawberryPicker / src /coordinate_transformer.py

Gareth

Initial clean commit for Hugging Face

efb1801 2 months ago

20.1 kB

	#!/usr/bin/env python3
	"""
	Coordinate Transformer - Pixel to Robot Coordinate System
	Handles camera calibration, stereo vision, and coordinate transformations

	Author: AI Assistant
	Date: 2025-12-15
	"""

	import numpy as np
	import cv2
	import logging
	from pathlib import Path
	from typing import Tuple, Optional, Dict, List
	from dataclasses import dataclass
	import json

	logger = logging.getLogger(__name__)

	@dataclass
	class CameraCalibration:
	"""Camera calibration parameters"""
	camera_matrix: np.ndarray
	distortion_coeffs: np.ndarray
	image_width: int
	image_height: int
	focal_length: float
	principal_point: Tuple[float, float]

	@dataclass
	class StereoCalibration:
	"""Stereo camera calibration parameters"""
	left_camera_matrix: np.ndarray
	right_camera_matrix: np.ndarray
	left_distortion: np.ndarray
	right_distortion: np.ndarray
	rotation_matrix: np.ndarray
	translation_vector: np.ndarray
	essential_matrix: np.ndarray
	fundamental_matrix: np.ndarray
	rectification_matrix_left: np.ndarray
	rectification_matrix_right: np.ndarray
	projection_matrix_left: np.ndarray
	projection_matrix_right: np.ndarray
	disparity_to_depth_map: np.ndarray

	class CoordinateTransformer:
	"""
	Handles coordinate transformations between pixel coordinates and robot world coordinates
	Supports both monocular and stereo camera systems
	"""

	def __init__(self,
	camera_matrix_path: Optional[str] = None,
	distortion_coeffs_path: Optional[str] = None,
	stereo_calibration_path: Optional[str] = None):
	"""Initialize coordinate transformer"""

	self.camera_calibration: Optional[CameraCalibration] = None
	self.stereo_calibration: Optional[StereoCalibration] = None
	self.stereo_matcher: Optional[cv2.StereoSGBM] = None

	# Robot coordinate system parameters
	self.robot_origin = (0.0, 0.0, 0.0) # Robot base position
	self.camera_to_robot_transform = np.eye(4) # 4x4 transformation matrix

	# Load calibrations if provided
	if camera_matrix_path and distortion_coeffs_path:
	self.load_camera_calibration(camera_matrix_path, distortion_coeffs_path)

	if stereo_calibration_path:
	self.load_stereo_calibration(stereo_calibration_path)

	logger.info("Coordinate Transformer initialized")

	def load_camera_calibration(self, camera_matrix_path: str, distortion_coeffs_path: str):
	"""Load single camera calibration"""
	try:
	camera_matrix = np.load(camera_matrix_path)
	distortion_coeffs = np.load(distortion_coeffs_path)

	# Extract calibration parameters
	fx, fy = camera_matrix[0, 0], camera_matrix[1, 1]
	cx, cy = camera_matrix[0, 2], camera_matrix[1, 2]

	self.camera_calibration = CameraCalibration(
	camera_matrix=camera_matrix,
	distortion_coeffs=distortion_coeffs,
	image_width=int(camera_matrix[0, 2] * 2), # Approximate
	image_height=int(camera_matrix[1, 2] * 2), # Approximate
	focal_length=(fx + fy) / 2,
	principal_point=(cx, cy)
	)

	logger.info("Camera calibration loaded successfully")

	except Exception as e:
	logger.error(f"Failed to load camera calibration: {e}")
	raise

	def load_stereo_calibration(self, stereo_calibration_path: str):
	"""Load stereo camera calibration"""
	try:
	calibration_data = np.load(stereo_calibration_path)

	self.stereo_calibration = StereoCalibration(
	left_camera_matrix=calibration_data['left_camera_matrix'],
	right_camera_matrix=calibration_data['right_camera_matrix'],
	left_distortion=calibration_data['left_distortion'],
	right_distortion=calibration_data['right_distortion'],
	rotation_matrix=calibration_data['rotation_matrix'],
	translation_vector=calibration_data['translation_vector'],
	essential_matrix=calibration_data['essential_matrix'],
	fundamental_matrix=calibration_data['fundamental_matrix'],
	rectification_matrix_left=calibration_data['rectification_matrix_left'],
	rectification_matrix_right=calibration_data['rectification_matrix_right'],
	projection_matrix_left=calibration_data['projection_matrix_left'],
	projection_matrix_right=calibration_data['projection_matrix_right'],
	disparity_to_depth_map=calibration_data['disparity_to_depth_map']
	)

	# Initialize stereo matcher for real-time depth calculation
	self._initialize_stereo_matcher()

	logger.info("Stereo calibration loaded successfully")

	except Exception as e:
	logger.error(f"Failed to load stereo calibration: {e}")
	raise

	def _initialize_stereo_matcher(self):
	"""Initialize stereo matching for depth calculation"""
	if not self.stereo_calibration:
	return

	# Create stereo block matcher
	self.stereo_matcher = cv2.StereoSGBM_create(
	minDisparity=0,
	numDisparities=64,
	blockSize=9,
	P1=8 * 9 * 9,
	P2=32 * 9 * 9,
	disp12MaxDiff=1,
	uniquenessRatio=10,
	speckleWindowSize=100,
	speckleRange=32
	)

	logger.info("Stereo matcher initialized")

	def pixel_to_world(self,
	pixel_x: int,
	pixel_y: int,
	image_shape: Tuple[int, int, int],
	depth: Optional[float] = None) -> Tuple[float, float, float]:
	"""
	Convert pixel coordinates to world coordinates

	Args:
	pixel_x, pixel_y: Pixel coordinates
	image_shape: Shape of the image (height, width, channels)
	depth: Depth in meters (if None, uses stereo or default depth)

	Returns:
	Tuple of (x, y, z) world coordinates in meters
	"""
	if not self.camera_calibration:
	raise ValueError("Camera calibration not loaded")

	# Get depth if not provided
	if depth is None:
	depth = self._estimate_depth(pixel_x, pixel_y, image_shape)

	# Convert pixel to normalized coordinates
	fx, fy = self.camera_calibration.camera_matrix[0, 0], self.camera_calibration.camera_matrix[1, 1]
	cx, cy = self.camera_calibration.principal_point

	# Convert to camera coordinates
	x_cam = (pixel_x - cx) * depth / fx
	y_cam = (pixel_y - cy) * depth / fy
	z_cam = depth

	# Transform to robot world coordinates
	camera_point = np.array([x_cam, y_cam, z_cam, 1.0])
	world_point = self.camera_to_robot_transform @ camera_point

	return (float(world_point[0]), float(world_point[1]), float(world_point[2]))

	def world_to_pixel(self,
	world_x: float,
	world_y: float,
	world_z: float) -> Tuple[int, int]:
	"""
	Convert world coordinates to pixel coordinates

	Returns:
	Tuple of (pixel_x, pixel_y) coordinates
	"""
	if not self.camera_calibration:
	raise ValueError("Camera calibration not loaded")

	# Transform world point to camera coordinates
	world_point = np.array([world_x, world_y, world_z, 1.0])
	camera_point = np.linalg.inv(self.camera_to_robot_transform) @ world_point

	x_cam, y_cam, z_cam = camera_point[:3]

	# Convert to pixel coordinates
	fx, fy = self.camera_calibration.camera_matrix[0, 0], self.camera_calibration.camera_matrix[1, 1]
	cx, cy = self.camera_calibration.principal_point

	pixel_x = int(x_cam * fx / z_cam + cx)
	pixel_y = int(y_cam * fy / z_cam + cy)

	return (pixel_x, pixel_y)

	def _estimate_depth(self, pixel_x: int, pixel_y: int, image_shape: Tuple[int, int, int]) -> float:
	"""Estimate depth using stereo vision or default depth"""

	# If stereo calibration is available, try to get depth from disparity
	if self.stereo_calibration and len(image_shape) == 3:
	# This would require stereo images - simplified for now
	pass

	# Default depth estimation based on image position
	# Assume strawberries are typically 20-50cm from camera
	image_height = image_shape[0]

	# Simple depth estimation based on vertical position
	# Lower in image = closer to camera
	normalized_y = pixel_y / image_height
	estimated_depth = 0.2 + (0.3 * (1.0 - normalized_y)) # 20-50cm range

	return estimated_depth

	def calculate_depth_from_stereo(self,
	left_image: np.ndarray,
	right_image: np.ndarray,
	pixel_x: int,
	pixel_y: int) -> Optional[float]:
	"""Calculate depth from stereo images"""
	if not self.stereo_matcher or not self.stereo_calibration:
	return None

	try:
	# Compute disparity map
	disparity = self.stereo_matcher.compute(left_image, right_image)

	# Get disparity at specific pixel
	if 0 <= pixel_y < disparity.shape[0] and 0 <= pixel_x < disparity.shape[1]:
	disp_value = disparity[pixel_y, pixel_x]

	if disp_value > 0:
	# Convert disparity to depth
	depth = self.stereo_calibration.disparity_to_depth_map[disp_value]
	return float(depth)

	return None

	except Exception as e:
	logger.error(f"Error calculating stereo depth: {e}")
	return None

	def undistort_point(self, pixel_x: int, pixel_y: int) -> Tuple[int, int]:
	"""Undistort pixel coordinates using camera calibration"""
	if not self.camera_calibration:
	return pixel_x, pixel_y

	try:
	# Create point array
	points = np.array([[pixel_x, pixel_y]], dtype=np.float32)

	# Undistort points
	undistorted_points = cv2.undistortPoints(
	points,
	self.camera_calibration.camera_matrix,
	self.camera_calibration.distortion_coeffs
	)

	return int(undistorted_points[0][0][0]), int(undistorted_points[0][0][1])

	except Exception as e:
	logger.error(f"Error undistorting point: {e}")
	return pixel_x, pixel_y

	def set_camera_to_robot_transform(self, transform_matrix: np.ndarray):
	"""Set the transformation matrix from camera to robot coordinates"""
	if transform_matrix.shape != (4, 4):
	raise ValueError("Transform matrix must be 4x4")

	self.camera_to_robot_transform = transform_matrix
	logger.info("Camera to robot transform updated")

	def calibrate_camera_to_robot(self,
	world_points: List[Tuple[float, float, float]],
	pixel_points: List[Tuple[int, int]]) -> bool:
	"""
	Calibrate camera to robot transformation using known correspondences

	Args:
	world_points: List of (x, y, z) world coordinates
	pixel_points: List of (pixel_x, pixel_y) pixel coordinates

	Returns:
	True if calibration successful
	"""
	if len(world_points) != len(pixel_points) or len(world_points) < 4:
	logger.error("Need at least 4 corresponding points for calibration")
	return False

	try:
	# Prepare point correspondences
	world_points_3d = np.array(world_points, dtype=np.float32)
	pixel_points_2d = np.array(pixel_points, dtype=np.float32)

	# Solve PnP problem
	success, rotation_vector, translation_vector, inliers = cv2.solvePnPRansac(
	world_points_3d,
	pixel_points_2d,
	self.camera_calibration.camera_matrix,
	self.camera_calibration.distortion_coeffs
	)

	if success:
	# Convert rotation vector to rotation matrix
	rotation_matrix, _ = cv2.Rodrigues(rotation_vector)

	# Create 4x4 transformation matrix
	transform_matrix = np.eye(4)
	transform_matrix[:3, :3] = rotation_matrix
	transform_matrix[:3, 3] = translation_vector.flatten()

	self.set_camera_to_robot_transform(transform_matrix)

	logger.info("Camera to robot calibration successful")
	return True
	else:
	logger.error("PnP calibration failed")
	return False

	except Exception as e:
	logger.error(f"Calibration error: {e}")
	return False

	def get_workspace_bounds(self) -> Dict[str, Tuple[float, float]]:
	"""Get the bounds of the robot workspace in world coordinates"""
	# This should be calibrated for your specific robot setup
	return {
	'x_min': -0.5, 'x_max': 0.5,
	'y_min': -0.5, 'y_max': 0.5,
	'z_min': 0.0, 'z_max': 0.5
	}

	def is_point_in_workspace(self, x: float, y: float, z: float) -> bool:
	"""Check if a point is within the robot workspace"""
	bounds = self.get_workspace_bounds()

	return (bounds['x_min'] <= x <= bounds['x_max'] and
	bounds['y_min'] <= y <= bounds['y_max'] and
	bounds['z_min'] <= z <= bounds['z_max'])

	def project_3d_to_2d(self,
	world_points: List[Tuple[float, float, float]],
	image_shape: Tuple[int, int]) -> List[Tuple[int, int]]:
	"""Project 3D world points to 2D image coordinates"""
	if not self.camera_calibration:
	raise ValueError("Camera calibration not loaded")

	projected_points = []

	for world_point in world_points:
	pixel_x, pixel_y = self.world_to_pixel(*world_point)

	# Check if point is within image bounds
	if (0 <= pixel_x < image_shape[1] and 0 <= pixel_y < image_shape[0]):
	projected_points.append((pixel_x, pixel_y))
	else:
	projected_points.append((-1, -1)) # Outside image

	return projected_points

	def save_calibration(self, filepath: str):
	"""Save current calibration to file"""
	calibration_data = {
	'camera_calibration': {
	'camera_matrix': self.camera_calibration.camera_matrix.tolist() if self.camera_calibration else None,
	'distortion_coeffs': self.camera_calibration.distortion_coeffs.tolist() if self.camera_calibration else None,
	'image_width': self.camera_calibration.image_width if self.camera_calibration else None,
	'image_height': self.camera_calibration.image_height if self.camera_calibration else None,
	'focal_length': self.camera_calibration.focal_length if self.camera_calibration else None,
	'principal_point': self.camera_calibration.principal_point if self.camera_calibration else None
	},
	'stereo_calibration': {
	'left_camera_matrix': self.stereo_calibration.left_camera_matrix.tolist() if self.stereo_calibration else None,
	'right_camera_matrix': self.stereo_calibration.right_camera_matrix.tolist() if self.stereo_calibration else None,
	'rotation_matrix': self.stereo_calibration.rotation_matrix.tolist() if self.stereo_calibration else None,
	'translation_vector': self.stereo_calibration.translation_vector.tolist() if self.stereo_calibration else None
	},
	'camera_to_robot_transform': self.camera_to_robot_transform.tolist(),
	'robot_origin': self.robot_origin
	}

	with open(filepath, 'w') as f:
	json.dump(calibration_data, f, indent=2)

	logger.info(f"Calibration saved to {filepath}")

	def load_calibration(self, filepath: str):
	"""Load calibration from file"""
	try:
	with open(filepath, 'r') as f:
	calibration_data = json.load(f)

	# Load camera calibration
	if calibration_data['camera_calibration']['camera_matrix']:
	cam_data = calibration_data['camera_calibration']
	self.camera_calibration = CameraCalibration(
	camera_matrix=np.array(cam_data['camera_matrix']),
	distortion_coeffs=np.array(cam_data['distortion_coeffs']),
	image_width=cam_data['image_width'],
	image_height=cam_data['image_height'],
	focal_length=cam_data['focal_length'],
	principal_point=tuple(cam_data['principal_point'])
	)

	# Load stereo calibration
	if calibration_data['stereo_calibration']['left_camera_matrix']:
	stereo_data = calibration_data['stereo_calibration']
	# Note: This is simplified - you'd need to load all stereo parameters
	logger.warning("Stereo calibration loading not fully implemented")

	# Load transformation matrix
	self.camera_to_robot_transform = np.array(calibration_data['camera_to_robot_transform'])
	self.robot_origin = tuple(calibration_data['robot_origin'])

	logger.info(f"Calibration loaded from {filepath}")

	except Exception as e:
	logger.error(f"Failed to load calibration: {e}")
	raise

	def main():
	"""Test coordinate transformer functionality"""
	import argparse

	parser = argparse.ArgumentParser(description='Test Coordinate Transformer')
	parser.add_argument('--camera-matrix', help='Camera matrix file path')
	parser.add_argument('--distortion', help='Distortion coefficients file path')
	parser.add_argument('--stereo', help='Stereo calibration file path')
	parser.add_argument('--test-pixel', nargs=2, type=int, metavar=('X', 'Y'),
	help='Test pixel coordinates')

	args = parser.parse_args()

	try:
	# Create transformer
	transformer = CoordinateTransformer(
	args.camera_matrix,
	args.distortion,
	args.stereo
	)

	print("Coordinate Transformer initialized")

	if args.test_pixel:
	pixel_x, pixel_y = args.test_pixel
	world_coords = transformer.pixel_to_world(pixel_x, pixel_y, (480, 640, 3))
	print(f"Pixel ({pixel_x}, {pixel_y}) -> World {world_coords}")

	# Convert back
	pixel_x_back, pixel_y_back = transformer.world_to_pixel(*world_coords)
	print(f"World {world_coords} -> Pixel ({pixel_x_back}, {pixel_y_back})")

	# Print workspace bounds
	bounds = transformer.get_workspace_bounds()
	print(f"Workspace bounds: {bounds}")

	except Exception as e:
	print(f"Error: {e}")

	if __name__ == "__main__":
	main()