File size: 8,808 Bytes
806bdda |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 |
"""
Perspective Transformation Module
==================================
Implements perspective transformation for converting camera view coordinates
to real-world top-down coordinates, essential for accurate speed calculation.
Authors:
- Abhay Gupta (0205CC221005)
- Aditi Lakhera (0205CC221011)
- Balraj Patel (0205CC221049)
- Bhumika Patel (0205CC221050)
Mathematical Background:
Perspective transformation uses a 3x3 homography matrix to map points
from one plane to another. This is crucial for converting pixel coordinates
to real-world measurements.
"""
import cv2
import numpy as np
from typing import Tuple
import logging
logger = logging.getLogger(__name__)
class PerspectiveTransformer:
"""
Handles perspective transformation between camera view and real-world coordinates.
This class computes and applies homography transformations to convert
image coordinates to a top-down view with real-world measurements.
"""
def __init__(
self,
source_points: np.ndarray,
target_points: np.ndarray
):
"""
Initialize the perspective transformer.
Args:
source_points: 4 points in source image (camera view)
Shape: (4, 2) with [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
target_points: 4 corresponding points in target space (real-world)
Shape: (4, 2) with same format
Raises:
ValueError: If points are invalid or transformation cannot be computed
"""
# Validate inputs
self._validate_points(source_points, "source")
self._validate_points(target_points, "target")
# Store points as float32 (required by OpenCV)
self.source_pts = source_points.astype(np.float32)
self.target_pts = target_points.astype(np.float32)
# Compute transformation matrix
self.matrix = self._compute_transformation_matrix()
# Compute inverse transformation (for reverse mapping if needed)
self.inverse_matrix = self._compute_inverse_matrix()
logger.info("Perspective transformer initialized successfully")
logger.debug(f"Source points:\n{self.source_pts}")
logger.debug(f"Target points:\n{self.target_pts}")
def _validate_points(self, points: np.ndarray, name: str) -> None:
"""
Validate point array format and values.
Args:
points: Points array to validate
name: Name for error messages
Raises:
ValueError: If points are invalid
"""
if not isinstance(points, np.ndarray):
raise ValueError(f"{name} points must be a numpy array")
if points.shape != (4, 2):
raise ValueError(
f"{name} points must have shape (4, 2), got {points.shape}"
)
if not np.isfinite(points).all():
raise ValueError(f"{name} points contain invalid values (NaN or Inf)")
def _compute_transformation_matrix(self) -> np.ndarray:
"""
Compute the perspective transformation matrix.
Returns:
3x3 homography matrix
Raises:
ValueError: If transformation cannot be computed
"""
try:
matrix = cv2.getPerspectiveTransform(
self.source_pts,
self.target_pts
)
# Validate matrix
if matrix is None or not np.isfinite(matrix).all():
raise ValueError("Invalid transformation matrix computed")
logger.debug(f"Transformation matrix:\n{matrix}")
return matrix
except cv2.error as e:
raise ValueError(f"Failed to compute perspective transform: {e}")
def _compute_inverse_matrix(self) -> np.ndarray:
"""
Compute the inverse transformation matrix.
Returns:
3x3 inverse homography matrix
"""
try:
inverse = cv2.getPerspectiveTransform(
self.target_pts,
self.source_pts
)
return inverse
except Exception as e:
logger.warning(f"Could not compute inverse matrix: {e}")
return None
def apply_transformation(self, points: np.ndarray) -> np.ndarray:
"""
Transform points from source to target coordinate system.
Args:
points: Array of points to transform
Shape: (N, 2) where N is number of points
Returns:
Transformed points in target coordinate system
Shape: (N, 2)
Raises:
ValueError: If points have invalid shape
"""
# Handle empty input
if points.size == 0:
return points
# Validate input shape
if len(points.shape) != 2 or points.shape[1] != 2:
raise ValueError(
f"Points must have shape (N, 2), got {points.shape}"
)
try:
# Reshape for OpenCV: (N, 1, 2)
points_reshaped = points.reshape(-1, 1, 2).astype(np.float32)
# Apply transformation
transformed = cv2.perspectiveTransform(
points_reshaped,
self.matrix
)
# Reshape back to (N, 2)
result = transformed.reshape(-1, 2)
return result
except Exception as e:
logger.error(f"Error applying transformation: {e}")
raise ValueError(f"Transformation failed: {e}")
def apply_inverse_transformation(self, points: np.ndarray) -> np.ndarray:
"""
Transform points from target back to source coordinate system.
Args:
points: Array of points in target coordinates
Shape: (N, 2)
Returns:
Points in source coordinate system
Shape: (N, 2)
Raises:
ValueError: If inverse matrix not available or transformation fails
"""
if self.inverse_matrix is None:
raise ValueError("Inverse transformation matrix not available")
if points.size == 0:
return points
try:
points_reshaped = points.reshape(-1, 1, 2).astype(np.float32)
transformed = cv2.perspectiveTransform(
points_reshaped,
self.inverse_matrix
)
return transformed.reshape(-1, 2)
except Exception as e:
logger.error(f"Error applying inverse transformation: {e}")
raise ValueError(f"Inverse transformation failed: {e}")
def transform_single_point(self, x: float, y: float) -> Tuple[float, float]:
"""
Transform a single point (convenience method).
Args:
x: X coordinate in source system
y: Y coordinate in source system
Returns:
Tuple of (x, y) in target system
"""
point = np.array([[x, y]], dtype=np.float32)
transformed = self.apply_transformation(point)
return tuple(transformed[0])
def get_transformation_matrix(self) -> np.ndarray:
"""
Get the transformation matrix.
Returns:
3x3 homography matrix
"""
return self.matrix.copy()
def get_scale_factors(self) -> Tuple[float, float]:
"""
Estimate scale factors in x and y directions.
Returns:
Tuple of (scale_x, scale_y) representing pixels per meter
"""
# Transform corners to estimate scaling
source_width = np.linalg.norm(self.source_pts[1] - self.source_pts[0])
source_height = np.linalg.norm(self.source_pts[3] - self.source_pts[0])
target_width = np.linalg.norm(self.target_pts[1] - self.target_pts[0])
target_height = np.linalg.norm(self.target_pts[3] - self.target_pts[0])
scale_x = source_width / target_width if target_width > 0 else 1.0
scale_y = source_height / target_height if target_height > 0 else 1.0
return scale_x, scale_y
|