Spaces:

feng-x
/

ring-sizer

Sleeping

App Files Files Community

ring-sizer / src /card_detection.py

feng-x

Upload folder using huggingface_hub

347d1a8 verified 7 days ago

raw

history blame contribute delete

20.6 kB

	"""
	Credit card detection and scale calibration utilities.

	This module handles:
	- Detecting credit card contour in an image
	- Verifying aspect ratio matches standard credit card
	- Perspective rectification
	- Computing pixels-per-cm scale factor
	"""

	import cv2
	import numpy as np
	from typing import Optional, Tuple, Dict, Any, List
	from pathlib import Path

	# Import debug observer and drawing functions
	from .debug_observer import DebugObserver, draw_contours_overlay, draw_candidates_with_scores

	# Import shared visualization constants
	from .viz_constants import (
	FONT_FACE,
	Color,
	StrategyColor,
	FontScale,
	FontThickness,
	Size,
	Layout,
	)

	# Standard credit card dimensions (ISO/IEC 7810 ID-1)
	CARD_WIDTH_MM = 85.60
	CARD_HEIGHT_MM = 53.98
	CARD_WIDTH_CM = CARD_WIDTH_MM / 10
	CARD_HEIGHT_CM = CARD_HEIGHT_MM / 10
	CARD_ASPECT_RATIO = CARD_WIDTH_MM / CARD_HEIGHT_MM # ~1.586

	# Detection parameters
	MIN_CARD_AREA_RATIO = 0.01 # Card must be at least 1% of image area
	MAX_CARD_AREA_RATIO = 0.5 # Card must be at most 50% of image area


	def order_corners(corners: np.ndarray) -> np.ndarray:
	"""
	Order corners as: top-left, top-right, bottom-right, bottom-left.

	Args:
	corners: 4x2 array of corner points

	Returns:
	Ordered 4x2 array of corners
	"""
	corners = corners.reshape(4, 2).astype(np.float32)

	# Sort by sum (x+y): smallest = top-left, largest = bottom-right
	s = corners.sum(axis=1)
	tl_idx = np.argmin(s)
	br_idx = np.argmax(s)

	# Sort by diff (y-x): smallest = top-right, largest = bottom-left
	d = np.diff(corners, axis=1).flatten()
	tr_idx = np.argmin(d)
	bl_idx = np.argmax(d)

	return np.array([
	corners[tl_idx],
	corners[tr_idx],
	corners[br_idx],
	corners[bl_idx],
	], dtype=np.float32)


	def get_quad_dimensions(corners: np.ndarray) -> Tuple[float, float]:
	"""
	Get width and height of a quadrilateral from ordered corners.

	Args:
	corners: Ordered 4x2 array (TL, TR, BR, BL)

	Returns:
	Tuple of (width, height) in pixels
	"""
	# Width: average of top and bottom edges
	top_width = np.linalg.norm(corners[1] - corners[0])
	bottom_width = np.linalg.norm(corners[2] - corners[3])
	width = (top_width + bottom_width) / 2

	# Height: average of left and right edges
	left_height = np.linalg.norm(corners[3] - corners[0])
	right_height = np.linalg.norm(corners[2] - corners[1])
	height = (left_height + right_height) / 2

	return width, height


	def score_card_candidate(
	contour: np.ndarray,
	corners: np.ndarray,
	image_area: float,
	aspect_ratio_tolerance: float = 0.15,
	) -> Tuple[float, Dict[str, Any]]:
	"""
	Score a quadrilateral candidate for being a credit card.

	Since candidates come from minAreaRect, corners are always a perfect
	rectangle. Scoring focuses on aspect ratio match and area coverage.

	Args:
	contour: Original contour (minAreaRect box points)
	corners: 4 corner points
	image_area: Total image area for relative sizing
	aspect_ratio_tolerance: Allowed deviation from standard ratio

	Returns:
	Tuple of (score, details_dict)
	"""
	ordered = order_corners(corners)
	width, height = get_quad_dimensions(ordered)
	area = cv2.contourArea(corners)

	details = {
	"corners": ordered,
	"width": width,
	"height": height,
	"area": area,
	}

	# Check area ratio
	area_ratio = area / image_area
	if area_ratio < MIN_CARD_AREA_RATIO or area_ratio > MAX_CARD_AREA_RATIO:
	details["reject_reason"] = f"area_ratio={area_ratio:.3f}"
	return 0.0, details

	# Safeguard against zero dimensions
	if width <= 0 or height <= 0:
	details["reject_reason"] = "invalid_dimensions"
	return 0.0, details

	# Calculate aspect ratio (always use larger/smaller for consistency)
	if width > height:
	aspect_ratio = width / height
	else:
	aspect_ratio = height / width
	details["aspect_ratio"] = aspect_ratio

	# Check aspect ratio against credit card standard
	ratio_diff = abs(aspect_ratio - CARD_ASPECT_RATIO) / CARD_ASPECT_RATIO
	if ratio_diff > aspect_ratio_tolerance:
	details["reject_reason"] = f"aspect_ratio={aspect_ratio:.3f}, expected~{CARD_ASPECT_RATIO:.3f}"
	return 0.0, details

	# Compute score (higher is better)
	# minAreaRect always produces perfect rectangles, so no angle check needed.
	# Score based on area size and aspect ratio match.
	area_score = min(area_ratio / 0.1, 1.0) # Normalize to max at 10% of image
	ratio_score = 1.0 - ratio_diff / aspect_ratio_tolerance

	score = 0.5 * area_score + 0.5 * ratio_score
	details["score_components"] = {
	"area": area_score,
	"ratio": ratio_score,
	}

	return score, details


	def find_card_contours(
	image: np.ndarray,
	image_area: float,
	aspect_ratio_tolerance: float = 0.15,
	min_score: float = 0.3,
	debug_dir: Optional[str] = None,
	) -> List[np.ndarray]:
	"""
	Find potential card contours using a waterfall of detection strategies.

	Strategies are tried in order: Canny → Adaptive → Otsu → Color.
	If a strategy produces a candidate scoring above min_score, subsequent
	strategies are skipped.

	Args:
	image: Input BGR image
	image_area: Total image area in pixels
	aspect_ratio_tolerance: Allowed deviation from standard aspect ratio
	min_score: Minimum score to accept a strategy's candidates
	debug_dir: Optional directory to save debug images

	Returns:
	List of 4-point contour approximations from the first successful strategy
	"""
	# Create debug observer if debug mode enabled
	observer = DebugObserver(debug_dir) if debug_dir else None

	h, w = image.shape[:2]
	min_area = h * w * 0.01 # At least 1% of image
	max_area = h * w * 0.5 # At most 50% of image

	# Save original image
	if observer:
	observer.save_stage("01_original", image)

	# Convert to grayscale
	gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
	if observer:
	observer.save_stage("02_grayscale", gray)

	# Apply bilateral filter to reduce noise while keeping edges
	filtered = cv2.bilateralFilter(gray, 11, 75, 75)
	if observer:
	observer.save_stage("03_bilateral_filtered", filtered)

	def extract_quads(contours, epsilon_factor=0.02, min_rectangularity=0.7,
	aspect_tolerance=0.15):
	"""Extract quadrilaterals from contours using minAreaRect.

	Shape constraints:
	- Rectangularity (contour_area / rect_area): rejects irregular shapes
	- Aspect ratio: rejects rectangles that don't match card proportions
	"""
	quads = []
	for contour in contours:
	contour_area = cv2.contourArea(contour)
	if contour_area < min_area or contour_area > max_area:
	continue

	peri = cv2.arcLength(contour, True)
	approx = cv2.approxPolyDP(contour, epsilon_factor * peri, True)

	if len(approx) < 4:
	continue

	rect = cv2.minAreaRect(contour)
	box = cv2.boxPoints(rect).astype(np.float32)

	rect_area = cv2.contourArea(box)
	if rect_area <= 0:
	continue
	rectangularity = contour_area / rect_area
	if rectangularity < min_rectangularity:
	continue

	(_, _), (bw, bh), _ = rect
	if bw <= 0 or bh <= 0:
	continue
	aspect = max(bw, bh) / min(bw, bh)
	if abs(aspect - CARD_ASPECT_RATIO) / CARD_ASPECT_RATIO > aspect_tolerance:
	continue

	quads.append(box.reshape(4, 1, 2))

	return quads

	def dedup_quads(quads, center_threshold=50):
	"""Remove near-duplicate boxes, keeping the largest when centers overlap.

	Two boxes are considered duplicates if their centers are within
	center_threshold pixels of each other.
	"""
	if len(quads) <= 1:
	return quads

	# Sort by area descending so largest comes first
	quads_with_area = [(q, cv2.contourArea(q)) for q in quads]
	quads_with_area.sort(key=lambda x: x[1], reverse=True)

	kept = []
	for quad, area in quads_with_area:
	center = quad.reshape(4, 2).mean(axis=0)
	is_dup = False
	for kept_quad in kept:
	kept_center = kept_quad.reshape(4, 2).mean(axis=0)
	dist = np.linalg.norm(center - kept_center)
	if dist < center_threshold:
	is_dup = True
	break
	if not is_dup:
	kept.append(quad)

	return kept

	def score_best(quads):
	"""Return the best score among quads."""
	best = 0.0
	for q in quads:
	corners = q.reshape(4, 2)
	score, _ = score_card_candidate(
	q, corners, image_area, aspect_ratio_tolerance
	)
	best = max(best, score)
	return best

	# --- Waterfall: try strategies in order, stop on first success ---

	# Strategy 1: Canny edge detection with various thresholds
	canny_candidates = []
	canny_configs = [(20, 60), (30, 100), (50, 150), (75, 200), (100, 250)]
	saved_canny_indices = [0, 2, 4]

	for idx, (canny_low, canny_high) in enumerate(canny_configs):
	edges = cv2.Canny(filtered, canny_low, canny_high)

	if idx in saved_canny_indices and observer:
	observer.save_stage(f"04_canny_{canny_low}_{canny_high}", edges)

	kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
	edges_morphed = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel)

	if idx == 2 and observer:
	observer.save_stage("07_canny_morphology", edges_morphed)

	contours, _ = cv2.findContours(edges_morphed, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
	canny_candidates.extend(extract_quads(contours))

	canny_candidates = dedup_quads(canny_candidates)

	if observer and canny_candidates:
	observer.draw_and_save("08_canny_contours", image,
	draw_contours_overlay, canny_candidates, "Canny Edge Detection", StrategyColor.CANNY)

	if canny_candidates and score_best(canny_candidates) >= min_score:
	return canny_candidates

	# Strategy 2: Adaptive thresholding (for varying lighting)
	adaptive_candidates = []
	adaptive_configs = [(11, 2), (21, 5), (31, 10), (51, 10)]
	saved_adaptive = [0, 2]

	for idx, (block_size, C) in enumerate(adaptive_configs):
	thresh = cv2.adaptiveThreshold(
	filtered, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
	cv2.THRESH_BINARY, block_size, C
	)

	if idx in saved_adaptive and observer:
	if idx == 0:
	observer.save_stage("09_adaptive_11_2", thresh)
	elif idx == 2:
	observer.save_stage("10_adaptive_31_10", thresh)

	for img in [thresh, 255 - thresh]:
	contours, _ = cv2.findContours(img, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
	adaptive_candidates.extend(extract_quads(contours))

	adaptive_candidates = dedup_quads(adaptive_candidates)

	if observer and adaptive_candidates:
	observer.draw_and_save("11_adaptive_contours", image,
	draw_contours_overlay, adaptive_candidates, "Adaptive Thresholding", StrategyColor.ADAPTIVE)

	if adaptive_candidates and score_best(adaptive_candidates) >= min_score:
	return adaptive_candidates

	# Strategy 3: Otsu's thresholding
	otsu_candidates = []
	_, otsu = cv2.threshold(filtered, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
	if observer:
	observer.save_stage("12_otsu_binary", otsu)

	otsu_inverted = 255 - otsu
	if observer:
	observer.save_stage("13_otsu_inverted", otsu_inverted)

	for img in [otsu, otsu_inverted]:
	kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
	img_morphed = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel)
	contours, _ = cv2.findContours(img_morphed, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
	otsu_candidates.extend(extract_quads(contours))

	otsu_candidates = dedup_quads(otsu_candidates)

	if observer and otsu_candidates:
	observer.draw_and_save("14_otsu_contours", image,
	draw_contours_overlay, otsu_candidates, "Otsu Thresholding", StrategyColor.OTSU)

	if otsu_candidates and score_best(otsu_candidates) >= min_score:
	return otsu_candidates

	# Strategy 4: Color-based segmentation (gray card on light background)
	color_candidates = []
	hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
	sat = hsv[:, :, 1]
	if observer:
	observer.save_stage("15_hsv_saturation", sat)

	_, low_sat_mask = cv2.threshold(sat, 30, 255, cv2.THRESH_BINARY_INV)
	if observer:
	observer.save_stage("16_low_sat_mask", low_sat_mask)

	val = hsv[:, :, 2]
	gray_mask = cv2.bitwise_and(low_sat_mask, cv2.inRange(val, 80, 200))

	kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (7, 7))
	gray_mask = cv2.morphologyEx(gray_mask, cv2.MORPH_CLOSE, kernel)
	gray_mask = cv2.morphologyEx(gray_mask, cv2.MORPH_OPEN, kernel)
	if observer:
	observer.save_stage("17_gray_mask", gray_mask)

	contours, _ = cv2.findContours(gray_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
	color_candidates = dedup_quads(extract_quads(contours, epsilon_factor=0.03))

	if observer and color_candidates:
	observer.draw_and_save("18_color_contours", image,
	draw_contours_overlay, color_candidates, "Color-Based Detection", StrategyColor.COLOR_BASED)

	if color_candidates and score_best(color_candidates) >= min_score:
	return color_candidates

	# No strategy succeeded — return all collected candidates as last resort
	all_candidates = canny_candidates + adaptive_candidates + otsu_candidates + color_candidates
	if observer and all_candidates:
	observer.draw_and_save("19_all_candidates", image,
	draw_contours_overlay, all_candidates, "All Candidates (fallback)", StrategyColor.ALL_CANDIDATES)
	return all_candidates


	def detect_credit_card(
	image: np.ndarray,
	aspect_ratio_tolerance: float = 0.15,
	debug_dir: Optional[str] = None,
	) -> Optional[Dict[str, Any]]:
	"""
	Detect a credit card in the image.

	Args:
	image: Input BGR image
	aspect_ratio_tolerance: Allowed deviation from standard aspect ratio
	debug_dir: Optional directory to save debug images

	Returns:
	Dictionary containing:
	- corners: 4x2 array of corner points (ordered)
	- contour: Full contour points
	- confidence: Detection confidence score
	- width_px, height_px: Detected dimensions
	- aspect_ratio: Detected aspect ratio
	Or None if no card detected
	"""
	# Create debug observer if debug mode enabled
	observer = DebugObserver(debug_dir) if debug_dir else None

	if observer:
	print(f"Saving card detection debug images to: {debug_dir}")

	h, w = image.shape[:2]
	image_area = h * w

	# Find candidate contours (waterfall: stops after first successful strategy)
	candidates = find_card_contours(
	image, image_area=image_area,
	aspect_ratio_tolerance=aspect_ratio_tolerance,
	debug_dir=debug_dir,
	)

	if not candidates:
	if observer:
	print(" No candidates found")
	return None

	# Score each candidate
	best_score = 0.0
	best_result = None
	all_scored = []

	for contour in candidates:
	corners = contour.reshape(4, 2)
	score, details = score_card_candidate(
	contour, corners, image_area, aspect_ratio_tolerance
	)

	all_scored.append((corners, score, details))

	if score > best_score:
	best_score = score
	best_result = details

	# Sort by score (descending) and take top 5
	all_scored.sort(key=lambda x: x[1], reverse=True)
	top_candidates = all_scored[:5]

	# Save scored candidates visualization
	if observer and top_candidates:
	observer.draw_and_save("20_scored_candidates", image,
	draw_candidates_with_scores, top_candidates, "Top 5 Candidates")

	if best_result is None or best_score < 0.3:
	if observer:
	print(f" Best score {best_score:.2f} below threshold 0.3")
	return None

	# Save final detection
	if observer:
	final_overlay = image.copy()
	corners = best_result["corners"].astype(np.int32)
	cv2.polylines(final_overlay, [corners], True, Color.GREEN, Size.CONTOUR_THICK)

	# Draw corners
	for pt in corners:
	cv2.circle(final_overlay, tuple(pt), Size.CORNER_RADIUS + 2, Color.RED, -1)

	# Add details text
	text_y = Layout.TITLE_Y
	details_text = [
	"Final Detection",
	f"Score: {best_score:.3f}",
	f"Aspect Ratio: {best_result['aspect_ratio']:.3f}",
	f"Dimensions: {best_result['width']:.0f}x{best_result['height']:.0f}px",
	]

	for text in details_text:
	cv2.putText(
	final_overlay, text, (Layout.TEXT_OFFSET_X, text_y),
	FONT_FACE, FontScale.SUBTITLE, Color.WHITE,
	FontThickness.SUBTITLE_OUTLINE, cv2.LINE_AA
	)
	cv2.putText(
	final_overlay, text, (Layout.TEXT_OFFSET_X, text_y),
	FONT_FACE, FontScale.SUBTITLE, Color.GREEN,
	FontThickness.SUBTITLE, cv2.LINE_AA
	)
	text_y += Layout.LINE_SPACING

	observer.save_stage("21_final_detection", final_overlay)
	print(f" Saved 21 debug images")

	return {
	"corners": best_result["corners"],
	"contour": best_result["corners"],
	"confidence": best_score,
	"width_px": best_result["width"],
	"height_px": best_result["height"],
	"aspect_ratio": best_result["aspect_ratio"],
	}


	def rectify_card(
	image: np.ndarray,
	corners: np.ndarray,
	output_width: int = 856,
	) -> Tuple[np.ndarray, np.ndarray]:
	"""
	Apply perspective transform to rectify the card region.

	Args:
	image: Input BGR image
	corners: Ordered 4x2 array of corner points (TL, TR, BR, BL)
	output_width: Width of output image (height computed from aspect ratio)

	Returns:
	Tuple of (rectified_image, transform_matrix)
	"""
	corners = corners.astype(np.float32)

	# Determine if card is in portrait or landscape orientation
	width, height = get_quad_dimensions(corners)

	if width > height:
	# Landscape orientation
	out_w = output_width
	out_h = int(output_width / CARD_ASPECT_RATIO)
	else:
	# Portrait orientation (rotated 90°)
	out_h = output_width
	out_w = int(output_width / CARD_ASPECT_RATIO)

	# Destination points
	dst = np.array([
	[0, 0],
	[out_w - 1, 0],
	[out_w - 1, out_h - 1],
	[0, out_h - 1],
	], dtype=np.float32)

	# Compute perspective transform
	M = cv2.getPerspectiveTransform(corners, dst)

	# Apply transform
	rectified = cv2.warpPerspective(image, M, (out_w, out_h))

	return rectified, M


	def compute_scale_factor(
	corners: np.ndarray,
	) -> Tuple[float, float]:
	"""
	Compute pixels-per-cm scale factor from detected card corners.

	Args:
	corners: Ordered 4x2 array of corner points

	Returns:
	Tuple of (px_per_cm, confidence)
	"""
	width_px, height_px = get_quad_dimensions(corners)

	# Determine orientation and compute scale
	if width_px > height_px:
	# Landscape: width corresponds to card width (8.56 cm)
	px_per_cm_w = width_px / CARD_WIDTH_CM
	px_per_cm_h = height_px / CARD_HEIGHT_CM
	else:
	# Portrait: width corresponds to card height (5.398 cm)
	px_per_cm_w = width_px / CARD_HEIGHT_CM
	px_per_cm_h = height_px / CARD_WIDTH_CM

	# Average the two estimates
	px_per_cm = (px_per_cm_w + px_per_cm_h) / 2

	# Confidence based on consistency between width and height estimates
	consistency = 1.0 - abs(px_per_cm_w - px_per_cm_h) / max(px_per_cm_w, px_per_cm_h)
	confidence = max(0.0, min(1.0, consistency))

	return px_per_cm, confidence