Spaces:

feng-x
/

ring-sizer

Running

File size: 20,635 Bytes

347d1a8

"""
Credit card detection and scale calibration utilities.

This module handles:
- Detecting credit card contour in an image
- Verifying aspect ratio matches standard credit card
- Perspective rectification
- Computing pixels-per-cm scale factor
"""

import cv2
import numpy as np
from typing import Optional, Tuple, Dict, Any, List
from pathlib import Path

# Import debug observer and drawing functions
from .debug_observer import DebugObserver, draw_contours_overlay, draw_candidates_with_scores

# Import shared visualization constants
from .viz_constants import (
    FONT_FACE,
    Color,
    StrategyColor,
    FontScale,
    FontThickness,
    Size,
    Layout,
)

# Standard credit card dimensions (ISO/IEC 7810 ID-1)
CARD_WIDTH_MM = 85.60
CARD_HEIGHT_MM = 53.98
CARD_WIDTH_CM = CARD_WIDTH_MM / 10
CARD_HEIGHT_CM = CARD_HEIGHT_MM / 10
CARD_ASPECT_RATIO = CARD_WIDTH_MM / CARD_HEIGHT_MM  # ~1.586

# Detection parameters
MIN_CARD_AREA_RATIO = 0.01  # Card must be at least 1% of image area
MAX_CARD_AREA_RATIO = 0.5   # Card must be at most 50% of image area


def order_corners(corners: np.ndarray) -> np.ndarray:
    """
    Order corners as: top-left, top-right, bottom-right, bottom-left.

    Args:
        corners: 4x2 array of corner points

    Returns:
        Ordered 4x2 array of corners
    """
    corners = corners.reshape(4, 2).astype(np.float32)

    # Sort by sum (x+y): smallest = top-left, largest = bottom-right
    s = corners.sum(axis=1)
    tl_idx = np.argmin(s)
    br_idx = np.argmax(s)

    # Sort by diff (y-x): smallest = top-right, largest = bottom-left
    d = np.diff(corners, axis=1).flatten()
    tr_idx = np.argmin(d)
    bl_idx = np.argmax(d)

    return np.array([
        corners[tl_idx],
        corners[tr_idx],
        corners[br_idx],
        corners[bl_idx],
    ], dtype=np.float32)


def get_quad_dimensions(corners: np.ndarray) -> Tuple[float, float]:
    """
    Get width and height of a quadrilateral from ordered corners.

    Args:
        corners: Ordered 4x2 array (TL, TR, BR, BL)

    Returns:
        Tuple of (width, height) in pixels
    """
    # Width: average of top and bottom edges
    top_width = np.linalg.norm(corners[1] - corners[0])
    bottom_width = np.linalg.norm(corners[2] - corners[3])
    width = (top_width + bottom_width) / 2

    # Height: average of left and right edges
    left_height = np.linalg.norm(corners[3] - corners[0])
    right_height = np.linalg.norm(corners[2] - corners[1])
    height = (left_height + right_height) / 2

    return width, height


def score_card_candidate(
    contour: np.ndarray,
    corners: np.ndarray,
    image_area: float,
    aspect_ratio_tolerance: float = 0.15,
) -> Tuple[float, Dict[str, Any]]:
    """
    Score a quadrilateral candidate for being a credit card.

    Since candidates come from minAreaRect, corners are always a perfect
    rectangle. Scoring focuses on aspect ratio match and area coverage.

    Args:
        contour: Original contour (minAreaRect box points)
        corners: 4 corner points
        image_area: Total image area for relative sizing
        aspect_ratio_tolerance: Allowed deviation from standard ratio

    Returns:
        Tuple of (score, details_dict)
    """
    ordered = order_corners(corners)
    width, height = get_quad_dimensions(ordered)
    area = cv2.contourArea(corners)

    details = {
        "corners": ordered,
        "width": width,
        "height": height,
        "area": area,
    }

    # Check area ratio
    area_ratio = area / image_area
    if area_ratio < MIN_CARD_AREA_RATIO or area_ratio > MAX_CARD_AREA_RATIO:
        details["reject_reason"] = f"area_ratio={area_ratio:.3f}"
        return 0.0, details

    # Safeguard against zero dimensions
    if width <= 0 or height <= 0:
        details["reject_reason"] = "invalid_dimensions"
        return 0.0, details

    # Calculate aspect ratio (always use larger/smaller for consistency)
    if width > height:
        aspect_ratio = width / height
    else:
        aspect_ratio = height / width
    details["aspect_ratio"] = aspect_ratio

    # Check aspect ratio against credit card standard
    ratio_diff = abs(aspect_ratio - CARD_ASPECT_RATIO) / CARD_ASPECT_RATIO
    if ratio_diff > aspect_ratio_tolerance:
        details["reject_reason"] = f"aspect_ratio={aspect_ratio:.3f}, expected~{CARD_ASPECT_RATIO:.3f}"
        return 0.0, details

    # Compute score (higher is better)
    # minAreaRect always produces perfect rectangles, so no angle check needed.
    # Score based on area size and aspect ratio match.
    area_score = min(area_ratio / 0.1, 1.0)  # Normalize to max at 10% of image
    ratio_score = 1.0 - ratio_diff / aspect_ratio_tolerance

    score = 0.5 * area_score + 0.5 * ratio_score
    details["score_components"] = {
        "area": area_score,
        "ratio": ratio_score,
    }

    return score, details


def find_card_contours(
    image: np.ndarray,
    image_area: float,
    aspect_ratio_tolerance: float = 0.15,
    min_score: float = 0.3,
    debug_dir: Optional[str] = None,
) -> List[np.ndarray]:
    """
    Find potential card contours using a waterfall of detection strategies.

    Strategies are tried in order: Canny → Adaptive → Otsu → Color.
    If a strategy produces a candidate scoring above min_score, subsequent
    strategies are skipped.

    Args:
        image: Input BGR image
        image_area: Total image area in pixels
        aspect_ratio_tolerance: Allowed deviation from standard aspect ratio
        min_score: Minimum score to accept a strategy's candidates
        debug_dir: Optional directory to save debug images

    Returns:
        List of 4-point contour approximations from the first successful strategy
    """
    # Create debug observer if debug mode enabled
    observer = DebugObserver(debug_dir) if debug_dir else None

    h, w = image.shape[:2]
    min_area = h * w * 0.01  # At least 1% of image
    max_area = h * w * 0.5   # At most 50% of image

    # Save original image
    if observer:
        observer.save_stage("01_original", image)

    # Convert to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    if observer:
        observer.save_stage("02_grayscale", gray)

    # Apply bilateral filter to reduce noise while keeping edges
    filtered = cv2.bilateralFilter(gray, 11, 75, 75)
    if observer:
        observer.save_stage("03_bilateral_filtered", filtered)

    def extract_quads(contours, epsilon_factor=0.02, min_rectangularity=0.7,
                       aspect_tolerance=0.15):
        """Extract quadrilaterals from contours using minAreaRect.

        Shape constraints:
        - Rectangularity (contour_area / rect_area): rejects irregular shapes
        - Aspect ratio: rejects rectangles that don't match card proportions
        """
        quads = []
        for contour in contours:
            contour_area = cv2.contourArea(contour)
            if contour_area < min_area or contour_area > max_area:
                continue

            peri = cv2.arcLength(contour, True)
            approx = cv2.approxPolyDP(contour, epsilon_factor * peri, True)

            if len(approx) < 4:
                continue

            rect = cv2.minAreaRect(contour)
            box = cv2.boxPoints(rect).astype(np.float32)

            rect_area = cv2.contourArea(box)
            if rect_area <= 0:
                continue
            rectangularity = contour_area / rect_area
            if rectangularity < min_rectangularity:
                continue

            (_, _), (bw, bh), _ = rect
            if bw <= 0 or bh <= 0:
                continue
            aspect = max(bw, bh) / min(bw, bh)
            if abs(aspect - CARD_ASPECT_RATIO) / CARD_ASPECT_RATIO > aspect_tolerance:
                continue

            quads.append(box.reshape(4, 1, 2))

        return quads

    def dedup_quads(quads, center_threshold=50):
        """Remove near-duplicate boxes, keeping the largest when centers overlap.

        Two boxes are considered duplicates if their centers are within
        center_threshold pixels of each other.
        """
        if len(quads) <= 1:
            return quads

        # Sort by area descending so largest comes first
        quads_with_area = [(q, cv2.contourArea(q)) for q in quads]
        quads_with_area.sort(key=lambda x: x[1], reverse=True)

        kept = []
        for quad, area in quads_with_area:
            center = quad.reshape(4, 2).mean(axis=0)
            is_dup = False
            for kept_quad in kept:
                kept_center = kept_quad.reshape(4, 2).mean(axis=0)
                dist = np.linalg.norm(center - kept_center)
                if dist < center_threshold:
                    is_dup = True
                    break
            if not is_dup:
                kept.append(quad)

        return kept

    def score_best(quads):
        """Return the best score among quads."""
        best = 0.0
        for q in quads:
            corners = q.reshape(4, 2)
            score, _ = score_card_candidate(
                q, corners, image_area, aspect_ratio_tolerance
            )
            best = max(best, score)
        return best

    # --- Waterfall: try strategies in order, stop on first success ---

    # Strategy 1: Canny edge detection with various thresholds
    canny_candidates = []
    canny_configs = [(20, 60), (30, 100), (50, 150), (75, 200), (100, 250)]
    saved_canny_indices = [0, 2, 4]

    for idx, (canny_low, canny_high) in enumerate(canny_configs):
        edges = cv2.Canny(filtered, canny_low, canny_high)

        if idx in saved_canny_indices and observer:
            observer.save_stage(f"04_canny_{canny_low}_{canny_high}", edges)

        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
        edges_morphed = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel)

        if idx == 2 and observer:
            observer.save_stage("07_canny_morphology", edges_morphed)

        contours, _ = cv2.findContours(edges_morphed, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
        canny_candidates.extend(extract_quads(contours))

    canny_candidates = dedup_quads(canny_candidates)

    if observer and canny_candidates:
        observer.draw_and_save("08_canny_contours", image,
                             draw_contours_overlay, canny_candidates, "Canny Edge Detection", StrategyColor.CANNY)

    if canny_candidates and score_best(canny_candidates) >= min_score:
        return canny_candidates

    # Strategy 2: Adaptive thresholding (for varying lighting)
    adaptive_candidates = []
    adaptive_configs = [(11, 2), (21, 5), (31, 10), (51, 10)]
    saved_adaptive = [0, 2]

    for idx, (block_size, C) in enumerate(adaptive_configs):
        thresh = cv2.adaptiveThreshold(
            filtered, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
            cv2.THRESH_BINARY, block_size, C
        )

        if idx in saved_adaptive and observer:
            if idx == 0:
                observer.save_stage("09_adaptive_11_2", thresh)
            elif idx == 2:
                observer.save_stage("10_adaptive_31_10", thresh)

        for img in [thresh, 255 - thresh]:
            contours, _ = cv2.findContours(img, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
            adaptive_candidates.extend(extract_quads(contours))

    adaptive_candidates = dedup_quads(adaptive_candidates)

    if observer and adaptive_candidates:
        observer.draw_and_save("11_adaptive_contours", image,
                             draw_contours_overlay, adaptive_candidates, "Adaptive Thresholding", StrategyColor.ADAPTIVE)

    if adaptive_candidates and score_best(adaptive_candidates) >= min_score:
        return adaptive_candidates

    # Strategy 3: Otsu's thresholding
    otsu_candidates = []
    _, otsu = cv2.threshold(filtered, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    if observer:
        observer.save_stage("12_otsu_binary", otsu)

    otsu_inverted = 255 - otsu
    if observer:
        observer.save_stage("13_otsu_inverted", otsu_inverted)

    for img in [otsu, otsu_inverted]:
        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
        img_morphed = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel)
        contours, _ = cv2.findContours(img_morphed, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
        otsu_candidates.extend(extract_quads(contours))

    otsu_candidates = dedup_quads(otsu_candidates)

    if observer and otsu_candidates:
        observer.draw_and_save("14_otsu_contours", image,
                             draw_contours_overlay, otsu_candidates, "Otsu Thresholding", StrategyColor.OTSU)

    if otsu_candidates and score_best(otsu_candidates) >= min_score:
        return otsu_candidates

    # Strategy 4: Color-based segmentation (gray card on light background)
    color_candidates = []
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    sat = hsv[:, :, 1]
    if observer:
        observer.save_stage("15_hsv_saturation", sat)

    _, low_sat_mask = cv2.threshold(sat, 30, 255, cv2.THRESH_BINARY_INV)
    if observer:
        observer.save_stage("16_low_sat_mask", low_sat_mask)

    val = hsv[:, :, 2]
    gray_mask = cv2.bitwise_and(low_sat_mask, cv2.inRange(val, 80, 200))

    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (7, 7))
    gray_mask = cv2.morphologyEx(gray_mask, cv2.MORPH_CLOSE, kernel)
    gray_mask = cv2.morphologyEx(gray_mask, cv2.MORPH_OPEN, kernel)
    if observer:
        observer.save_stage("17_gray_mask", gray_mask)

    contours, _ = cv2.findContours(gray_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    color_candidates = dedup_quads(extract_quads(contours, epsilon_factor=0.03))

    if observer and color_candidates:
        observer.draw_and_save("18_color_contours", image,
                             draw_contours_overlay, color_candidates, "Color-Based Detection", StrategyColor.COLOR_BASED)

    if color_candidates and score_best(color_candidates) >= min_score:
        return color_candidates

    # No strategy succeeded — return all collected candidates as last resort
    all_candidates = canny_candidates + adaptive_candidates + otsu_candidates + color_candidates
    if observer and all_candidates:
        observer.draw_and_save("19_all_candidates", image,
                             draw_contours_overlay, all_candidates, "All Candidates (fallback)", StrategyColor.ALL_CANDIDATES)
    return all_candidates


def detect_credit_card(
    image: np.ndarray,
    aspect_ratio_tolerance: float = 0.15,
    debug_dir: Optional[str] = None,
) -> Optional[Dict[str, Any]]:
    """
    Detect a credit card in the image.

    Args:
        image: Input BGR image
        aspect_ratio_tolerance: Allowed deviation from standard aspect ratio
        debug_dir: Optional directory to save debug images

    Returns:
        Dictionary containing:
        - corners: 4x2 array of corner points (ordered)
        - contour: Full contour points
        - confidence: Detection confidence score
        - width_px, height_px: Detected dimensions
        - aspect_ratio: Detected aspect ratio
        Or None if no card detected
    """
    # Create debug observer if debug mode enabled
    observer = DebugObserver(debug_dir) if debug_dir else None
    
    if observer:
        print(f"Saving card detection debug images to: {debug_dir}")

    h, w = image.shape[:2]
    image_area = h * w

    # Find candidate contours (waterfall: stops after first successful strategy)
    candidates = find_card_contours(
        image, image_area=image_area,
        aspect_ratio_tolerance=aspect_ratio_tolerance,
        debug_dir=debug_dir,
    )

    if not candidates:
        if observer:
            print("  No candidates found")
        return None

    # Score each candidate
    best_score = 0.0
    best_result = None
    all_scored = []

    for contour in candidates:
        corners = contour.reshape(4, 2)
        score, details = score_card_candidate(
            contour, corners, image_area, aspect_ratio_tolerance
        )

        all_scored.append((corners, score, details))

        if score > best_score:
            best_score = score
            best_result = details

    # Sort by score (descending) and take top 5
    all_scored.sort(key=lambda x: x[1], reverse=True)
    top_candidates = all_scored[:5]

    # Save scored candidates visualization
    if observer and top_candidates:
        observer.draw_and_save("20_scored_candidates", image,
                             draw_candidates_with_scores, top_candidates, "Top 5 Candidates")

    if best_result is None or best_score < 0.3:
        if observer:
            print(f"  Best score {best_score:.2f} below threshold 0.3")
        return None

    # Save final detection
    if observer:
        final_overlay = image.copy()
        corners = best_result["corners"].astype(np.int32)
        cv2.polylines(final_overlay, [corners], True, Color.GREEN, Size.CONTOUR_THICK)

        # Draw corners
        for pt in corners:
            cv2.circle(final_overlay, tuple(pt), Size.CORNER_RADIUS + 2, Color.RED, -1)

        # Add details text
        text_y = Layout.TITLE_Y
        details_text = [
            "Final Detection",
            f"Score: {best_score:.3f}",
            f"Aspect Ratio: {best_result['aspect_ratio']:.3f}",
            f"Dimensions: {best_result['width']:.0f}x{best_result['height']:.0f}px",
        ]

        for text in details_text:
            cv2.putText(
                final_overlay, text, (Layout.TEXT_OFFSET_X, text_y),
                FONT_FACE, FontScale.SUBTITLE, Color.WHITE,
                FontThickness.SUBTITLE_OUTLINE, cv2.LINE_AA
            )
            cv2.putText(
                final_overlay, text, (Layout.TEXT_OFFSET_X, text_y),
                FONT_FACE, FontScale.SUBTITLE, Color.GREEN,
                FontThickness.SUBTITLE, cv2.LINE_AA
            )
            text_y += Layout.LINE_SPACING

        observer.save_stage("21_final_detection", final_overlay)
        print(f"  Saved 21 debug images")

    return {
        "corners": best_result["corners"],
        "contour": best_result["corners"],
        "confidence": best_score,
        "width_px": best_result["width"],
        "height_px": best_result["height"],
        "aspect_ratio": best_result["aspect_ratio"],
    }


def rectify_card(
    image: np.ndarray,
    corners: np.ndarray,
    output_width: int = 856,
) -> Tuple[np.ndarray, np.ndarray]:
    """
    Apply perspective transform to rectify the card region.

    Args:
        image: Input BGR image
        corners: Ordered 4x2 array of corner points (TL, TR, BR, BL)
        output_width: Width of output image (height computed from aspect ratio)

    Returns:
        Tuple of (rectified_image, transform_matrix)
    """
    corners = corners.astype(np.float32)

    # Determine if card is in portrait or landscape orientation
    width, height = get_quad_dimensions(corners)

    if width > height:
        # Landscape orientation
        out_w = output_width
        out_h = int(output_width / CARD_ASPECT_RATIO)
    else:
        # Portrait orientation (rotated 90°)
        out_h = output_width
        out_w = int(output_width / CARD_ASPECT_RATIO)

    # Destination points
    dst = np.array([
        [0, 0],
        [out_w - 1, 0],
        [out_w - 1, out_h - 1],
        [0, out_h - 1],
    ], dtype=np.float32)

    # Compute perspective transform
    M = cv2.getPerspectiveTransform(corners, dst)

    # Apply transform
    rectified = cv2.warpPerspective(image, M, (out_w, out_h))

    return rectified, M


def compute_scale_factor(
    corners: np.ndarray,
) -> Tuple[float, float]:
    """
    Compute pixels-per-cm scale factor from detected card corners.

    Args:
        corners: Ordered 4x2 array of corner points

    Returns:
        Tuple of (px_per_cm, confidence)
    """
    width_px, height_px = get_quad_dimensions(corners)

    # Determine orientation and compute scale
    if width_px > height_px:
        # Landscape: width corresponds to card width (8.56 cm)
        px_per_cm_w = width_px / CARD_WIDTH_CM
        px_per_cm_h = height_px / CARD_HEIGHT_CM
    else:
        # Portrait: width corresponds to card height (5.398 cm)
        px_per_cm_w = width_px / CARD_HEIGHT_CM
        px_per_cm_h = height_px / CARD_WIDTH_CM

    # Average the two estimates
    px_per_cm = (px_per_cm_w + px_per_cm_h) / 2

    # Confidence based on consistency between width and height estimates
    consistency = 1.0 - abs(px_per_cm_w - px_per_cm_h) / max(px_per_cm_w, px_per_cm_h)
    confidence = max(0.0, min(1.0, consistency))

    return px_per_cm, confidence