Spaces:

stevelohwc
/

tcg-space

Runtime error

tcg-space / Code /Backend /src /validators /feature_based_validator.py

github-actions[bot]

deploy: backend bundle from 1d16245f820820e68c5c9bcc9de14421d201efc9

380a8c4 17 days ago

10.2 kB

	"""
	Feature-Based Pokemon Card Validator

	Validates that extracted features match Pokemon card characteristics.
	This acts as a safety net to catch non-Pokemon cards that pass geometric
	and back pattern validation.
	"""

	import numpy as np
	from typing import Dict, Tuple, Any, List, Union
	import logging

	logger = logging.getLogger(__name__)


	class FeatureBasedValidator:
	"""
	Validates Pokemon cards using extracted feature ranges.

	Uses knowledge of Pokemon card characteristics to detect non-Pokemon cards:
	- Aspect ratio should be close to 0.716 (standard Pokemon card)
	- Saturation should be moderate (colorful fronts, blue backs)
	- Texture and print quality should match Pokemon cards
	"""

	# Expected feature ranges for Pokemon cards (based on training data)
	# Tightened ranges to better distinguish Pokemon from other trading cards
	POKEMON_FEATURE_RANGES = {
	# Geometric features - Pokemon cards have very consistent aspect ratio
	'aspect_ratio_accuracy': (0.90, 1.00), # Very close to 0.716 target (tightened from 0.85)

	# Color features (Pokemon cards have moderate-high saturation and blue backs)
	'mean_saturation': (35.0, 160.0), # HSV saturation (tightened lower bound)
	'dominant_color_blue': (0.10, 0.65), # Pokemon backs have blue dominance (tightened)
	'mean_hue': (100.0, 130.0), # Pokemon backs have blue hue (~120 degrees)

	# Texture features (Pokemon cards have fine, complex print quality)
	'glcm_contrast': (15.0, 220.0), # GLCM texture contrast (tightened)
	'glcm_energy': (0.001, 0.40), # Energy/uniformity (tightened upper bound)
	'glcm_homogeneity': (0.30, 0.90), # Texture homogeneity

	# Print quality features (Pokemon cards have distinctive print patterns)
	'mean_fft_magnitude': (8.0, 55.0), # Frequency domain patterns (tightened)
	'blur_score': (100.0, 4000.0), # Laplacian variance (tightened lower bound)
	}

	def __init__(self, confidence_threshold: float = 0.75, feature_extractor=None):
	"""
	Initialize the feature-based validator.

	Args:
	confidence_threshold: Minimum Pokemon likelihood score (0.0-1.0)
	feature_extractor: Optional FeatureExtractor instance for converting lists to dicts
	"""
	self.confidence_threshold = confidence_threshold
	self.feature_extractor = feature_extractor

	def _features_to_dict(self, features: Union[List[float], np.ndarray], feature_names: List[str]) -> Dict[str, float]:
	"""
	Convert feature list to dictionary using feature names.

	Args:
	features: List or array of feature values
	feature_names: List of feature names

	Returns:
	Dictionary mapping feature names to values
	"""
	if len(features) != len(feature_names):
	logger.warning(f"Feature count mismatch: {len(features)} features vs {len(feature_names)} names")

	return {name: float(value) for name, value in zip(feature_names, features)}

	def validate_features(self, features: Union[Dict[str, Any], List[float], np.ndarray],
	feature_names: List[str] = None) -> Tuple[bool, float, str]:
	"""
	Validate that features match Pokemon card characteristics.

	Args:
	features: Dictionary of extracted features, or list/array of feature values
	feature_names: Optional list of feature names (required if features is a list/array)

	Returns:
	Tuple of (is_pokemon_like, confidence, reason):
	- is_pokemon_like: True if features match Pokemon patterns
	- confidence: Pokemon likelihood score (0.0-1.0)
	- reason: Human-readable explanation
	"""
	try:
	# Convert list/array to dict if needed
	if isinstance(features, (list, np.ndarray)):
	if feature_names is None:
	if self.feature_extractor is not None:
	feature_names = self.feature_extractor.get_feature_names()
	else:
	# Cannot validate without feature names
	return True, 1.0, "Feature validation skipped (no feature names available)"

	features = self._features_to_dict(features, feature_names)
	# Calculate how many features match Pokemon ranges
	matches = []
	checks = []

	for feature_name, (min_val, max_val) in self.POKEMON_FEATURE_RANGES.items():
	if feature_name in features:
	value = features[feature_name]
	is_match = min_val <= value <= max_val
	matches.append(is_match)
	checks.append({
	'feature': feature_name,
	'value': value,
	'range': (min_val, max_val),
	'match': is_match
	})

	# Calculate Pokemon likelihood score
	if len(matches) > 0:
	confidence = sum(matches) / len(matches)
	else:
	# No features available, pass through
	return True, 1.0, "Feature validation skipped (no features extracted)"

	# Check threshold
	is_pokemon_like = confidence >= self.confidence_threshold

	# Build detailed reason
	if is_pokemon_like:
	reason = (
	f"Features match Pokemon card patterns "
	f"({sum(matches)}/{len(matches)} checks passed, "
	f"score: {confidence:.2%})"
	)
	else:
	# List failing checks
	failing_checks = [c for c in checks if not c['match']]
	reason = (
	f"Features do not match Pokemon card patterns "
	f"({sum(matches)}/{len(matches)} checks passed, "
	f"score: {confidence:.2%}, threshold: {self.confidence_threshold:.2%}). "
	f"Failing checks: " +
	", ".join([
	f"{c['feature']}={c['value']:.2f} (expected {c['range'][0]:.2f}-{c['range'][1]:.2f})"
	for c in failing_checks[:3] # Show first 3 failures
	])
	)

	return is_pokemon_like, confidence, reason

	except Exception as e:
	logger.error(f"Error during feature-based validation: {e}")
	# On error, pass through to avoid blocking legitimate cards
	return True, 0.0, f"Feature validation error: {str(e)}"

	def get_feature_summary(self, features: Dict[str, Any]) -> Dict[str, Any]:
	"""
	Get a summary of feature validation results.

	Args:
	features: Dictionary of extracted features

	Returns:
	Dictionary with detailed validation results for each feature
	"""
	summary = {
	'total_checks': 0,
	'passed_checks': 0,
	'failed_checks': 0,
	'details': []
	}

	for feature_name, (min_val, max_val) in self.POKEMON_FEATURE_RANGES.items():
	if feature_name in features:
	value = features[feature_name]
	is_match = min_val <= value <= max_val

	summary['total_checks'] += 1
	if is_match:
	summary['passed_checks'] += 1
	else:
	summary['failed_checks'] += 1

	summary['details'].append({
	'feature': feature_name,
	'value': float(value),
	'expected_range': (float(min_val), float(max_val)),
	'passed': bool(is_match)
	})

	if summary['total_checks'] > 0:
	summary['confidence'] = summary['passed_checks'] / summary['total_checks']
	else:
	summary['confidence'] = 1.0

	return summary

	def validate_pokemon_back_colors(self, back_image, blue_threshold: float = 0.08) -> Tuple[bool, float, str]:
	"""
	Validate that back image has Pokemon-specific color characteristics.

	Pokemon card backs have distinctive blue color (HSV hue ~180-220 degrees).
	This is a quick heuristic check that doesn't require TensorFlow.

	Args:
	back_image: Back card image (BGR numpy array)
	blue_threshold: Minimum fraction of blue pixels required (default 0.08).
	Use a higher value (e.g. 0.25) when checking whether a front image
	is erroneously a back, to avoid false positives on blue-heavy fronts.

	Returns:
	Tuple of (is_pokemon_back, confidence, reason)
	"""
	import cv2
	import numpy as np

	try:
	# Convert to HSV
	hsv = cv2.cvtColor(back_image, cv2.COLOR_BGR2HSV)

	# Calculate blue pixel percentage
	# Pokemon backs have hue in range 100-130 (blue)
	hue = hsv[:, :, 0]
	saturation = hsv[:, :, 1]

	# Blue pixels: hue 100-125 (cyan-blue, NOT purple which is 130-160)
	# Pokemon card backs are distinctively blue, not purple/violet
	blue_mask = ((hue >= 100) & (hue <= 125) & (saturation >= 30))
	blue_percentage = np.sum(blue_mask) / blue_mask.size

	is_pokemon_back = blue_percentage >= blue_threshold
	confidence = float(min(blue_percentage / blue_threshold, 1.0))

	if is_pokemon_back:
	reason = f"Back image has Pokemon blue color pattern ({blue_percentage:.1%} blue pixels)"
	else:
	reason = (
	f"Back image lacks Pokemon blue color pattern "
	f"({blue_percentage:.1%} blue pixels, expected ≥{blue_threshold:.0%})"
	)

	return is_pokemon_back, confidence, reason

	except Exception as e:
	logger.error(f"Error during back color validation: {e}")
	return True, 0.0, f"Back color validation error: {str(e)}"