tcg-space / Code /Backend /src /validators /feature_based_validator.py
github-actions[bot]
deploy: backend bundle from 1d16245f820820e68c5c9bcc9de14421d201efc9
380a8c4
"""
Feature-Based Pokemon Card Validator
Validates that extracted features match Pokemon card characteristics.
This acts as a safety net to catch non-Pokemon cards that pass geometric
and back pattern validation.
"""
import numpy as np
from typing import Dict, Tuple, Any, List, Union
import logging
logger = logging.getLogger(__name__)
class FeatureBasedValidator:
"""
Validates Pokemon cards using extracted feature ranges.
Uses knowledge of Pokemon card characteristics to detect non-Pokemon cards:
- Aspect ratio should be close to 0.716 (standard Pokemon card)
- Saturation should be moderate (colorful fronts, blue backs)
- Texture and print quality should match Pokemon cards
"""
# Expected feature ranges for Pokemon cards (based on training data)
# Tightened ranges to better distinguish Pokemon from other trading cards
POKEMON_FEATURE_RANGES = {
# Geometric features - Pokemon cards have very consistent aspect ratio
'aspect_ratio_accuracy': (0.90, 1.00), # Very close to 0.716 target (tightened from 0.85)
# Color features (Pokemon cards have moderate-high saturation and blue backs)
'mean_saturation': (35.0, 160.0), # HSV saturation (tightened lower bound)
'dominant_color_blue': (0.10, 0.65), # Pokemon backs have blue dominance (tightened)
'mean_hue': (100.0, 130.0), # Pokemon backs have blue hue (~120 degrees)
# Texture features (Pokemon cards have fine, complex print quality)
'glcm_contrast': (15.0, 220.0), # GLCM texture contrast (tightened)
'glcm_energy': (0.001, 0.40), # Energy/uniformity (tightened upper bound)
'glcm_homogeneity': (0.30, 0.90), # Texture homogeneity
# Print quality features (Pokemon cards have distinctive print patterns)
'mean_fft_magnitude': (8.0, 55.0), # Frequency domain patterns (tightened)
'blur_score': (100.0, 4000.0), # Laplacian variance (tightened lower bound)
}
def __init__(self, confidence_threshold: float = 0.75, feature_extractor=None):
"""
Initialize the feature-based validator.
Args:
confidence_threshold: Minimum Pokemon likelihood score (0.0-1.0)
feature_extractor: Optional FeatureExtractor instance for converting lists to dicts
"""
self.confidence_threshold = confidence_threshold
self.feature_extractor = feature_extractor
def _features_to_dict(self, features: Union[List[float], np.ndarray], feature_names: List[str]) -> Dict[str, float]:
"""
Convert feature list to dictionary using feature names.
Args:
features: List or array of feature values
feature_names: List of feature names
Returns:
Dictionary mapping feature names to values
"""
if len(features) != len(feature_names):
logger.warning(f"Feature count mismatch: {len(features)} features vs {len(feature_names)} names")
return {name: float(value) for name, value in zip(feature_names, features)}
def validate_features(self, features: Union[Dict[str, Any], List[float], np.ndarray],
feature_names: List[str] = None) -> Tuple[bool, float, str]:
"""
Validate that features match Pokemon card characteristics.
Args:
features: Dictionary of extracted features, or list/array of feature values
feature_names: Optional list of feature names (required if features is a list/array)
Returns:
Tuple of (is_pokemon_like, confidence, reason):
- is_pokemon_like: True if features match Pokemon patterns
- confidence: Pokemon likelihood score (0.0-1.0)
- reason: Human-readable explanation
"""
try:
# Convert list/array to dict if needed
if isinstance(features, (list, np.ndarray)):
if feature_names is None:
if self.feature_extractor is not None:
feature_names = self.feature_extractor.get_feature_names()
else:
# Cannot validate without feature names
return True, 1.0, "Feature validation skipped (no feature names available)"
features = self._features_to_dict(features, feature_names)
# Calculate how many features match Pokemon ranges
matches = []
checks = []
for feature_name, (min_val, max_val) in self.POKEMON_FEATURE_RANGES.items():
if feature_name in features:
value = features[feature_name]
is_match = min_val <= value <= max_val
matches.append(is_match)
checks.append({
'feature': feature_name,
'value': value,
'range': (min_val, max_val),
'match': is_match
})
# Calculate Pokemon likelihood score
if len(matches) > 0:
confidence = sum(matches) / len(matches)
else:
# No features available, pass through
return True, 1.0, "Feature validation skipped (no features extracted)"
# Check threshold
is_pokemon_like = confidence >= self.confidence_threshold
# Build detailed reason
if is_pokemon_like:
reason = (
f"Features match Pokemon card patterns "
f"({sum(matches)}/{len(matches)} checks passed, "
f"score: {confidence:.2%})"
)
else:
# List failing checks
failing_checks = [c for c in checks if not c['match']]
reason = (
f"Features do not match Pokemon card patterns "
f"({sum(matches)}/{len(matches)} checks passed, "
f"score: {confidence:.2%}, threshold: {self.confidence_threshold:.2%}). "
f"Failing checks: " +
", ".join([
f"{c['feature']}={c['value']:.2f} (expected {c['range'][0]:.2f}-{c['range'][1]:.2f})"
for c in failing_checks[:3] # Show first 3 failures
])
)
return is_pokemon_like, confidence, reason
except Exception as e:
logger.error(f"Error during feature-based validation: {e}")
# On error, pass through to avoid blocking legitimate cards
return True, 0.0, f"Feature validation error: {str(e)}"
def get_feature_summary(self, features: Dict[str, Any]) -> Dict[str, Any]:
"""
Get a summary of feature validation results.
Args:
features: Dictionary of extracted features
Returns:
Dictionary with detailed validation results for each feature
"""
summary = {
'total_checks': 0,
'passed_checks': 0,
'failed_checks': 0,
'details': []
}
for feature_name, (min_val, max_val) in self.POKEMON_FEATURE_RANGES.items():
if feature_name in features:
value = features[feature_name]
is_match = min_val <= value <= max_val
summary['total_checks'] += 1
if is_match:
summary['passed_checks'] += 1
else:
summary['failed_checks'] += 1
summary['details'].append({
'feature': feature_name,
'value': float(value),
'expected_range': (float(min_val), float(max_val)),
'passed': bool(is_match)
})
if summary['total_checks'] > 0:
summary['confidence'] = summary['passed_checks'] / summary['total_checks']
else:
summary['confidence'] = 1.0
return summary
def validate_pokemon_back_colors(self, back_image, blue_threshold: float = 0.08) -> Tuple[bool, float, str]:
"""
Validate that back image has Pokemon-specific color characteristics.
Pokemon card backs have distinctive blue color (HSV hue ~180-220 degrees).
This is a quick heuristic check that doesn't require TensorFlow.
Args:
back_image: Back card image (BGR numpy array)
blue_threshold: Minimum fraction of blue pixels required (default 0.08).
Use a higher value (e.g. 0.25) when checking whether a *front* image
is erroneously a back, to avoid false positives on blue-heavy fronts.
Returns:
Tuple of (is_pokemon_back, confidence, reason)
"""
import cv2
import numpy as np
try:
# Convert to HSV
hsv = cv2.cvtColor(back_image, cv2.COLOR_BGR2HSV)
# Calculate blue pixel percentage
# Pokemon backs have hue in range 100-130 (blue)
hue = hsv[:, :, 0]
saturation = hsv[:, :, 1]
# Blue pixels: hue 100-125 (cyan-blue, NOT purple which is 130-160)
# Pokemon card backs are distinctively blue, not purple/violet
blue_mask = ((hue >= 100) & (hue <= 125) & (saturation >= 30))
blue_percentage = np.sum(blue_mask) / blue_mask.size
is_pokemon_back = blue_percentage >= blue_threshold
confidence = float(min(blue_percentage / blue_threshold, 1.0))
if is_pokemon_back:
reason = f"Back image has Pokemon blue color pattern ({blue_percentage:.1%} blue pixels)"
else:
reason = (
f"Back image lacks Pokemon blue color pattern "
f"({blue_percentage:.1%} blue pixels, expected ≥{blue_threshold:.0%})"
)
return is_pokemon_back, confidence, reason
except Exception as e:
logger.error(f"Error during back color validation: {e}")
return True, 0.0, f"Back color validation error: {str(e)}"