Spaces:
Sleeping
Sleeping
| """ | |
| Card boundary detection and perspective transformation | |
| Detects Pokemon card boundaries in images and extracts the card | |
| using perspective transformation. | |
| """ | |
| import cv2 | |
| import numpy as np | |
| from typing import Optional, Tuple | |
| from ..utils.logger import get_logger | |
| logger = get_logger(__name__) | |
| # Shared detector parameters for Pokemon card boundary checks. | |
| # Used by both pre-DL validation layers and DL preprocessing to keep behavior aligned. | |
| POKEMON_CARD_DETECTION_CONFIG = { | |
| "min_area_ratio": 0.001, | |
| "max_area_ratio": 0.999, | |
| "aspect_ratio_range": (0.65, 0.78), | |
| "solidity_threshold": 0.60, | |
| "fill_ratio_threshold": 0.40, | |
| } | |
| def detect_card_boundary_strict( | |
| image: np.ndarray, | |
| debug: bool = False, | |
| min_area_ratio: float = 0.05, | |
| max_area_ratio: float = 0.95, | |
| aspect_ratio_range: Tuple[float, float] = (0.55, 0.90), | |
| solidity_threshold: float = 0.90, | |
| fill_ratio_threshold: float = 0.75, | |
| max_contours: int = 10, | |
| ) -> Optional[np.ndarray]: | |
| """ | |
| Detect card boundary and return corner points if a plausible card is found. | |
| This is a stricter variant of :func:`detect_card_boundary` that returns | |
| ``None`` when no valid card boundary is detected (instead of returning | |
| fallback corners). | |
| Args: | |
| image: Input image (BGR format from cv2.imread) | |
| debug: If True, log intermediate steps | |
| min_area_ratio: Minimum candidate contour area as fraction of image area | |
| max_area_ratio: Maximum candidate contour area as fraction of image area | |
| aspect_ratio_range: Acceptable range for min(width, height)/max(width, height) | |
| solidity_threshold: Minimum contour solidity (area/convex_hull_area) | |
| fill_ratio_threshold: Minimum contour-to-quad area ratio (area/quad_area) | |
| max_contours: Number of largest contours to evaluate | |
| Returns: | |
| Array of 4 corner points [[x1,y1], [x2,y2], [x3,y3], [x4,y4]] | |
| or None if no plausible card detected | |
| """ | |
| if image is None or image.size == 0: | |
| logger.warning("Empty or None image provided") | |
| return None | |
| height, width = image.shape[:2] | |
| if height <= 0 or width <= 0: | |
| return None | |
| # Downscale for faster/more stable contour detection. | |
| max_dim = 800 | |
| scale = 1.0 | |
| work_image = image | |
| if max(height, width) > max_dim: | |
| scale = max_dim / float(max(height, width)) | |
| new_w = max(1, int(width * scale)) | |
| new_h = max(1, int(height * scale)) | |
| work_image = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_AREA) | |
| image_area = float(work_image.shape[0] * work_image.shape[1]) | |
| if image_area <= 0: | |
| return None | |
| # Convert to grayscale | |
| gray = cv2.cvtColor(work_image, cv2.COLOR_BGR2GRAY) | |
| # Blur + edge detect (document-scanner style) | |
| blurred = cv2.GaussianBlur(gray, (5, 5), 0) | |
| edges = cv2.Canny(blurred, 20, 80) | |
| # Close gaps in edges to form cleaner contours | |
| kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5)) | |
| edges = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel, iterations=2) | |
| # Find contours (external only to avoid nested shapes) | |
| contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) | |
| if not contours: | |
| if debug: | |
| logger.debug("No contours found in image (strict)") | |
| return None | |
| # Sort contours by area (largest first) | |
| contours = sorted(contours, key=cv2.contourArea, reverse=True)[:max_contours] | |
| best_corners: Optional[np.ndarray] = None | |
| best_score = -1.0 | |
| for contour in contours: | |
| contour_area = float(cv2.contourArea(contour)) | |
| if contour_area <= 0: | |
| continue | |
| area_ratio = contour_area / image_area | |
| if area_ratio < min_area_ratio or area_ratio > max_area_ratio: | |
| continue | |
| hull = cv2.convexHull(contour) | |
| hull_area = float(cv2.contourArea(hull)) | |
| solidity = (contour_area / hull_area) if hull_area > 0 else 0.0 | |
| if solidity < solidity_threshold: | |
| continue | |
| peri = cv2.arcLength(contour, True) | |
| corners: Optional[np.ndarray] = None | |
| # Try multiple approximation epsilons to be robust to noise/perspective | |
| for epsilon_multiplier in (0.02, 0.015, 0.03): | |
| approx = cv2.approxPolyDP(contour, epsilon_multiplier * peri, True) | |
| if len(approx) == 4 and cv2.isContourConvex(approx): | |
| corners = approx.reshape(4, 2).astype(np.float32) | |
| break | |
| if corners is None: | |
| # Fallback: fit minimum-area rotated rectangle | |
| rect = cv2.minAreaRect(contour) | |
| corners = cv2.boxPoints(rect).astype(np.float32) | |
| if not validate_card_detection( | |
| work_image, | |
| corners, | |
| min_area_ratio=min_area_ratio, | |
| max_area_ratio=max_area_ratio, | |
| aspect_ratio_range=aspect_ratio_range, | |
| ): | |
| continue | |
| quad_area = float(cv2.contourArea(corners)) | |
| if quad_area <= 0: | |
| continue | |
| fill_ratio = contour_area / quad_area | |
| if fill_ratio < fill_ratio_threshold: | |
| continue | |
| # Prefer larger, more rectangular candidates. | |
| score = fill_ratio * area_ratio | |
| if score > best_score: | |
| best_score = score | |
| best_corners = corners | |
| if debug: | |
| if best_corners is None: | |
| logger.debug("Strict detection: no plausible card found") | |
| else: | |
| logger.debug(f"Strict detection: best score={best_score:.3f}") | |
| if best_corners is None: | |
| return None | |
| # Scale corners back to original image coordinates | |
| if scale != 1.0: | |
| best_corners = (best_corners / scale).astype(np.float32) | |
| return best_corners | |
| def detect_skin_mask(image: np.ndarray) -> np.ndarray: | |
| """Return a dilated binary mask of skin-coloured pixels using YCrCb thresholding.""" | |
| ycrcb = cv2.cvtColor(image, cv2.COLOR_BGR2YCrCb) | |
| mask = cv2.inRange(ycrcb, | |
| np.array([0, 133, 77]), | |
| np.array([255, 173, 127])) | |
| kernel = np.ones((15, 15), np.uint8) | |
| return cv2.dilate(mask, kernel, iterations=2) | |
| def detect_card_boundary_with_hand(image: np.ndarray, **kwargs) -> Optional[np.ndarray]: | |
| """Strict detection first; if that fails, retry with skin pixels neutralised to grey. | |
| For hand-held cards the fingers/skin tones (and warm-background surfaces that share | |
| YCrCb values with skin) confuse Canny edge detection. Three-tier strategy: | |
| Tier 1 — normal strict path (flat card on table: always succeeds here). | |
| Tier 2 — skin removal + same strict params. | |
| Tier 3 — skin removal + relaxed params (hand distorts the bounding contour AR). | |
| Tiers 2-3 are only entered when a non-zero skin mask is found, so flat-card | |
| images where no skin is present always exit at Tier 1 or return None. | |
| Args: | |
| image: Input image (BGR format). | |
| **kwargs: Forwarded to :func:`detect_card_boundary_strict` for Tiers 1 & 2. | |
| Tier 3 overrides aspect_ratio_range, solidity_threshold, and | |
| fill_ratio_threshold with hand-tolerant values. | |
| Returns: | |
| Array of 4 corner points or None if no card found after all tiers. | |
| """ | |
| # Tier 1: normal strict path | |
| corners = detect_card_boundary_strict(image, **kwargs) | |
| if corners is not None: | |
| return corners # Flat-card path unchanged | |
| skin_mask = detect_skin_mask(image) | |
| if skin_mask.sum() == 0: | |
| return None # No skin present; detection simply failed | |
| masked = image.copy() | |
| masked[skin_mask > 0] = [128, 128, 128] # Neutral grey removes skin/background edges | |
| # Tier 2: skin removal + caller's strict params | |
| corners = detect_card_boundary_strict(masked, **kwargs) | |
| if corners is not None: | |
| return corners | |
| # Tier 3: skin removal + relaxed params | |
| # Holding the card in hand makes the detected bounding contour wider/shorter than | |
| # the bare card, pushing AR below 0.65. Relax all three geometry thresholds. | |
| relaxed = dict(kwargs) | |
| relaxed['aspect_ratio_range'] = (0.40, 0.95) | |
| relaxed['solidity_threshold'] = 0.40 | |
| relaxed['fill_ratio_threshold'] = 0.20 | |
| return detect_card_boundary_strict(masked, **relaxed) | |
| def detect_card_boundary( | |
| image: np.ndarray, | |
| debug: bool = False, | |
| aspect_ratio_range: Tuple[float, float] = (0.50, 0.90), | |
| ) -> Optional[np.ndarray]: | |
| """ | |
| Detect card boundary and return corner points | |
| Uses edge detection and contour analysis to find the rectangular | |
| card boundary in the image. Only accepts 4-point contours whose | |
| bounding-box aspect ratio (min/max side) falls within | |
| ``aspect_ratio_range``, so that clearly non-card-shaped contours | |
| (e.g. wide background rectangles) are skipped. Falls back to | |
| ``_fallback_corners()`` when no valid contour is found. | |
| Args: | |
| image: Input image (BGR format from cv2.imread) | |
| debug: If True, log intermediate steps | |
| aspect_ratio_range: Acceptable (min, max) for min(w,h)/max(w,h). | |
| Default (0.50, 0.90) accepts Sample-1's AR≈0.589 while | |
| rejecting Sample-6's bad background contour AR≈0.377. | |
| Returns: | |
| Array of 4 corner points [[x1,y1], [x2,y2], [x3,y3], [x4,y4]] | |
| or None if no card detected | |
| """ | |
| if image is None or image.size == 0: | |
| logger.warning("Empty or None image provided") | |
| return None | |
| # Convert to grayscale | |
| gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) | |
| # Apply bilateral filter to reduce noise while keeping edges sharp | |
| blurred = cv2.bilateralFilter(gray, 11, 17, 17) | |
| # Apply Canny edge detection | |
| edges = cv2.Canny(blurred, 30, 200) | |
| # Find contours | |
| contours, _ = cv2.findContours(edges, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) | |
| if not contours: | |
| logger.warning("No contours found in image") | |
| return _fallback_corners(image) | |
| # Sort contours by area (largest first) | |
| contours = sorted(contours, key=cv2.contourArea, reverse=True)[:10] | |
| card_contour = None | |
| # Find the contour that approximates to 4 points AND has a card-like AR | |
| for contour in contours: | |
| peri = cv2.arcLength(contour, True) | |
| approx = cv2.approxPolyDP(contour, 0.02 * peri, True) | |
| if len(approx) == 4: | |
| pts = approx.reshape(4, 2).astype(np.float32) | |
| xs, ys = pts[:, 0], pts[:, 1] | |
| bw = float(xs.max() - xs.min()) | |
| bh = float(ys.max() - ys.min()) | |
| if bw > 0 and bh > 0: | |
| ar = min(bw, bh) / max(bw, bh) | |
| if aspect_ratio_range[0] <= ar <= aspect_ratio_range[1]: | |
| card_contour = approx | |
| if debug: | |
| logger.debug(f"Accepted contour AR={ar:.3f}") | |
| break | |
| else: | |
| if debug: | |
| logger.debug(f"Skipped contour AR={ar:.3f} (outside {aspect_ratio_range})") | |
| if card_contour is None: | |
| logger.warning("No rectangular contour found, using fallback") | |
| return _fallback_corners(image) | |
| # Reshape to (4, 2) array | |
| corners = card_contour.reshape(4, 2).astype(np.float32) | |
| if debug: | |
| logger.debug(f"Detected card corners: {corners}") | |
| return corners | |
| def _fallback_corners(image: np.ndarray) -> np.ndarray: | |
| """ | |
| Return corners for entire image when card detection fails | |
| Args: | |
| image: Input image | |
| Returns: | |
| Corners representing the full image bounds | |
| """ | |
| height, width = image.shape[:2] | |
| # Return corners with small margin (5% on each side) | |
| margin = 0.05 | |
| corners = np.array([ | |
| [width * margin, height * margin], | |
| [width * (1 - margin), height * margin], | |
| [width * (1 - margin), height * (1 - margin)], | |
| [width * margin, height * (1 - margin)] | |
| ], dtype=np.float32) | |
| logger.debug("Using fallback corners (full image)") | |
| return corners | |
| def crop_to_card( | |
| image: np.ndarray, | |
| corners: np.ndarray, | |
| target_width: int = 714, | |
| target_height: int = 1000 | |
| ) -> np.ndarray: | |
| """ | |
| Apply perspective transform to extract card | |
| Takes an image and 4 corner points, then applies a perspective | |
| transformation to create a rectangular view of the card. | |
| Pokemon cards have 2.5" × 3.5" dimensions (aspect ratio ~0.714). | |
| Default output size maintains this ratio. | |
| Args: | |
| image: Input image (BGR format) | |
| corners: 4 corner points of the card [[x1,y1], [x2,y2], [x3,y3], [x4,y4]] | |
| target_width: Output width in pixels (default: 714) | |
| target_height: Output height in pixels (default: 1000) | |
| Returns: | |
| Perspective-corrected card image | |
| """ | |
| if corners is None or len(corners) != 4: | |
| logger.error(f"Invalid corners provided: {corners}") | |
| return image | |
| # Order corners: top-left, top-right, bottom-right, bottom-left | |
| ordered_corners = _order_points(corners) | |
| # Define destination points for perspective transform | |
| dst_points = np.array([ | |
| [0, 0], | |
| [target_width - 1, 0], | |
| [target_width - 1, target_height - 1], | |
| [0, target_height - 1] | |
| ], dtype=np.float32) | |
| # Calculate perspective transform matrix | |
| matrix = cv2.getPerspectiveTransform(ordered_corners, dst_points) | |
| # Apply perspective transformation | |
| warped = cv2.warpPerspective(image, matrix, (target_width, target_height)) | |
| logger.debug(f"Cropped card to {target_width}×{target_height}") | |
| return warped | |
| def _order_points(pts: np.ndarray) -> np.ndarray: | |
| """ | |
| Order points in clockwise order starting from top-left | |
| Args: | |
| pts: 4 corner points in any order | |
| Returns: | |
| Ordered points: [top-left, top-right, bottom-right, bottom-left] | |
| """ | |
| # Initialize ordered points | |
| rect = np.zeros((4, 2), dtype=np.float32) | |
| # Sum and diff of coordinates | |
| s = pts.sum(axis=1) | |
| diff = np.diff(pts, axis=1) | |
| # Top-left point will have smallest sum | |
| rect[0] = pts[np.argmin(s)] | |
| # Bottom-right point will have largest sum | |
| rect[2] = pts[np.argmax(s)] | |
| # Top-right point will have smallest difference (y - x) | |
| rect[1] = pts[np.argmin(diff)] | |
| # Bottom-left point will have largest difference | |
| rect[3] = pts[np.argmax(diff)] | |
| return rect | |
| def get_card_region_mask(image: np.ndarray, corners: np.ndarray) -> np.ndarray: | |
| """ | |
| Create a binary mask of the card region | |
| Args: | |
| image: Input image | |
| corners: 4 corner points of the card | |
| Returns: | |
| Binary mask (255 inside card, 0 outside) | |
| """ | |
| mask = np.zeros(image.shape[:2], dtype=np.uint8) | |
| # Fill polygon defined by corners | |
| corners_int = corners.astype(np.int32) | |
| cv2.fillPoly(mask, [corners_int], 255) | |
| return mask | |
| def validate_card_detection( | |
| image: np.ndarray, | |
| corners: np.ndarray, | |
| min_area_ratio: float = 0.3, | |
| max_area_ratio: float = 0.95, | |
| aspect_ratio_range: Tuple[float, float] = (0.55, 0.90), | |
| ) -> bool: | |
| """ | |
| Validate that detected corners represent a reasonable card region | |
| Args: | |
| image: Input image | |
| corners: Detected corner points | |
| min_area_ratio: Minimum card area as fraction of image (default: 0.3) | |
| max_area_ratio: Maximum card area as fraction of image (default: 0.95) | |
| Returns: | |
| True if detection seems valid, False otherwise | |
| """ | |
| if corners is None or len(corners) != 4: | |
| return False | |
| # Calculate area of detected region | |
| card_area = cv2.contourArea(corners) | |
| # Calculate image area | |
| image_area = image.shape[0] * image.shape[1] | |
| # Check area ratio | |
| area_ratio = card_area / image_area | |
| if not (min_area_ratio <= area_ratio <= max_area_ratio): | |
| logger.warning(f"Card area ratio {area_ratio:.2f} outside valid range") | |
| return False | |
| # Check if corners form a roughly rectangular shape | |
| ordered = _order_points(corners) | |
| # Calculate side lengths | |
| side1 = np.linalg.norm(ordered[1] - ordered[0]) | |
| side2 = np.linalg.norm(ordered[2] - ordered[1]) | |
| side3 = np.linalg.norm(ordered[3] - ordered[2]) | |
| side4 = np.linalg.norm(ordered[0] - ordered[3]) | |
| # Check if opposite sides are similar length (±30%) | |
| horizontal_ratio = min(side1, side3) / max(side1, side3) if max(side1, side3) > 0 else 0 | |
| vertical_ratio = min(side2, side4) / max(side2, side4) if max(side2, side4) > 0 else 0 | |
| if horizontal_ratio < 0.7 or vertical_ratio < 0.7: | |
| logger.warning("Detected shape is not rectangular enough") | |
| return False | |
| # Check expected card aspect ratio (orientation-invariant) | |
| width_a = np.linalg.norm(ordered[2] - ordered[3]) | |
| width_b = np.linalg.norm(ordered[1] - ordered[0]) | |
| height_a = np.linalg.norm(ordered[1] - ordered[2]) | |
| height_b = np.linalg.norm(ordered[0] - ordered[3]) | |
| width = max(width_a, width_b) | |
| height = max(height_a, height_b) | |
| if width <= 0 or height <= 0: | |
| logger.warning("Invalid dimensions computed for detected corners") | |
| return False | |
| ratio = min(width, height) / max(width, height) | |
| if not (aspect_ratio_range[0] <= ratio <= aspect_ratio_range[1]): | |
| logger.warning(f"Detected aspect ratio {ratio:.2f} outside valid range") | |
| return False | |
| return True | |