Spaces:

arij155
/

cin-validator-service

Running

cin-validator-service / enhanced /detector.py

t544h

Fix: Ajout conf=0.25 pour YOLO et prints de debug

f9cf21d 13 days ago

9.6 kB

	"""
	Modern ID Card Detector using YOLOv11 (Ultralytics) or D-FINE.

	Replaces: TFLite EfficientNet (axis-aligned, no rotation awareness)
	Upgrade path: YOLOv11n → YOLOv11n-OBB → D-FINE-S (fine-tuned)

	References:
	- YOLOv11: Ultralytics (2024), 2.6M params, ~2ms on T4
	- D-FINE-S: arxiv:2410.13842, 10M params, 3.5ms T4 TRT FP16
	- IWPOD-Net: arxiv:2509.06246, 1.8M params, direct 4-corner output
	"""

	import numpy as np
	from dataclasses import dataclass
	from typing import List, Optional, Tuple
	import logging

	logger = logging.getLogger(__name__)


	@dataclass
	class Detection:
	"""Single card detection result."""
	bbox: np.ndarray # [x1, y1, x2, y2] in pixel coordinates
	confidence: float # Detection confidence [0, 1]
	class_id: int = 0 # 0 = id_card
	corners: Optional[np.ndarray] = None # [4, 2] corner points if available

	@property
	def center(self) -> Tuple[float, float]:
	return ((self.bbox[0] + self.bbox[2]) / 2, (self.bbox[1] + self.bbox[3]) / 2)

	@property
	def width(self) -> float:
	return self.bbox[2] - self.bbox[0]

	@property
	def height(self) -> float:
	return self.bbox[3] - self.bbox[1]

	@property
	def area(self) -> float:
	return self.width * self.height

	@property
	def aspect_ratio(self) -> float:
	return self.width / max(self.height, 1e-6)

	def crop_from(self, frame: np.ndarray, padding: float = 0.05) -> np.ndarray:
	"""Extract card crop from frame with optional padding."""
	h, w = frame.shape[:2]
	pad_x = int(self.width * padding)
	pad_y = int(self.height * padding)
	x1 = max(0, int(self.bbox[0]) - pad_x)
	y1 = max(0, int(self.bbox[1]) - pad_y)
	x2 = min(w, int(self.bbox[2]) + pad_x)
	y2 = min(h, int(self.bbox[3]) + pad_y)
	return frame[y1:y2, x1:x2].copy()


	class IDCardDetector:
	"""
	ID card detector using YOLOv11 (default) or D-FINE.

	Improvements over TFLite EfficientNet:
	- OBB support for rotated cards (YOLOv11-OBB)
	- Better small object detection
	- Built-in NMS
	- Dynamic input sizes
	- GPU acceleration
	- Easy fine-tuning with Ultralytics ecosystem
	"""

	def __init__(self, config):
	"""
	Args:
	config: DetectorConfig from enhanced/config.py
	"""
	self.config = config
	self.model = None
	self._load_model()

	def _load_model(self):
	"""Load the detection model."""
	if self.config.backend.value == "yolo11":
	self._load_yolo()
	elif self.config.backend.value == "dfine":
	self._load_dfine()
	elif self.config.backend.value == "rtdetr":
	self._load_rtdetr()
	else:
	raise ValueError(f"Unknown detector backend: {self.config.backend}")

	def _load_yolo(self):
	"""Load YOLOv11 model via Ultralytics."""
	try:
	from ultralytics import YOLO
	self.model = YOLO(self.config.model_path)
	logger.info(f"Loaded YOLO model from {self.config.model_path}")
	except ImportError:
	logger.warning("ultralytics not installed. Run: pip install ultralytics. Using mock detector.")
	self.model = None
	except Exception as e:
	logger.warning(f"Could not load YOLO model: {e}. Using mock detector.")
	self.model = None

	def _load_dfine(self):
	"""Load D-FINE model via ONNX Runtime or custom loader."""
	try:
	import onnxruntime as ort
	self.model = ort.InferenceSession(self.config.model_path)
	logger.info(f"Loaded D-FINE ONNX model from {self.config.model_path}")
	except Exception as e:
	logger.warning(f"Could not load D-FINE model: {e}. Using mock detector.")
	self.model = None

	def _load_rtdetr(self):
	"""Load RT-DETR model via HuggingFace transformers."""
	try:
	from transformers import RTDetrForObjectDetection, RTDetrImageProcessor
	import torch
	self.processor = RTDetrImageProcessor.from_pretrained("PekingU/rtdetr_r50vd")
	self.model = RTDetrForObjectDetection.from_pretrained("PekingU/rtdetr_r50vd")
	self.model.eval()
	logger.info("Loaded RT-DETR model from PekingU/rtdetr_r50vd")
	except Exception as e:
	logger.warning(f"Could not load RT-DETR: {e}. Using mock detector.")
	self.model = None

	def detect(self, frame: np.ndarray) -> List[Detection]:
	"""
	Detect ID cards in a frame.

	Args:
	frame: BGR image as numpy array (H, W, 3)

	Returns:
	List of Detection objects
	"""
	if self.model is None:
	return self._mock_detect(frame)

	if self.config.backend.value == "yolo11":
	return self._detect_yolo(frame)
	elif self.config.backend.value == "rtdetr":
	return self._detect_rtdetr(frame)
	else:
	return self._mock_detect(frame)

	def _detect_yolo(self, frame: np.ndarray) -> List[Detection]:
	"""Run YOLO detection."""
	results = self.model(
	frame,
	conf=0.30,
	iou=self.config.nms_iou_threshold,
	imgsz=640,
	verbose=False,
	)

	detections = []
	for r in results:
	if r.boxes is None:
	continue
	for box in r.boxes:
	bbox = box.xyxy[0].cpu().numpy()
	conf = float(box.conf[0])
	cls_id = int(box.cls[0])

	det = Detection(bbox=bbox, confidence=conf, class_id=cls_id)

	# Log de debug pour voir ce que YOLO trouve avant le filtre géométrique
	print(f"--- [DEBUG YOLO] Carte trouvée avec confiance: {conf:.2f} \| Bbox: {bbox}")

	# Apply aspect ratio and area filters
	if self._validate_detection(det, frame.shape):
	detections.append(det)
	else:
	print(f"--- [DEBUG FILTER] Carte REJETÉE par les filtres géométriques (Taille/Ratio)")

	print(f"--- [DEBUG FINAL] Nombre de cartes retenues après filtres : {len(detections)}")

	return detections

	def _detect_rtdetr(self, frame: np.ndarray) -> List[Detection]:
	"""Run RT-DETR detection via HuggingFace transformers."""
	import torch
	from PIL import Image

	image = Image.fromarray(frame[:, :, ::-1]) # BGR to RGB
	inputs = self.processor(images=image, return_tensors="pt")

	with torch.no_grad():
	outputs = self.model(**inputs)

	target_sizes = torch.tensor([(frame.shape[0], frame.shape[1])])
	results = self.processor.post_process_object_detection(
	outputs, target_sizes=target_sizes, threshold=self.config.confidence_threshold
	)

	detections = []
	for result in results:
	for score, label, box in zip(result["scores"], result["labels"], result["boxes"]):
	bbox = box.cpu().numpy()
	det = Detection(
	bbox=bbox,
	confidence=float(score),
	class_id=int(label),
	)
	if self._validate_detection(det, frame.shape):
	detections.append(det)

	return detections

	def _validate_detection(self, det: Detection, frame_shape: Tuple[int, ...]) -> bool:
	"""
	Validate a detection using geometric constraints.

	More permissive than old system (old: 1.2-2.0 aspect ratio).
	Now handles perspective-distorted cards (apparent aspect ratio varies widely).
	"""
	h, w = frame_shape[:2]
	frame_area = h * w
	if det.width < 40 or det.height < 40:
	print(f" -> Échec: Carte trop petite ({det.width:.1f}x{det.height:.1f})")
	return False

	# Area check
	area_ratio = det.area / frame_area
	if area_ratio < 0.05:
	print(f" -> Échec: La carte occupe trop peu de place sur la photo ({area_ratio*100:.1f}%)")
	return False

	# Aspect ratio check (more permissive for tilted/perspective cards)
	ar = det.aspect_ratio
	inv_ar = 1.0 / max(ar, 1e-6)
	effective_ar = max(ar, inv_ar) # Handle both landscape and portrait
	print(f" -> Stats géométriques : Aspect Ratio effectif = {effective_ar:.2f} \| Ratio Aire = {area_ratio*100:.1f}%")
	if effective_ar < 1.0 or effective_ar > 2.3:
	print(f" -> Échec: Aspect Ratio hors limites ({effective_ar:.2f})")
	return False




	return True

	def _mock_detect(self, frame: np.ndarray) -> List[Detection]:
	"""
	Mock detector for testing without a real model.
	Generates a reasonable detection in the center of the frame.
	"""
	h, w = frame.shape[:2]
	# Simulate a card detection in the center
	card_w = w * 0.4
	card_h = card_w / 1.585 # ISO/IEC 7810 ratio
	cx, cy = w / 2, h / 2

	return [Detection(
	bbox=np.array([cx - card_w/2, cy - card_h/2, cx + card_w/2, cy + card_h/2]),
	confidence=0.85,
	class_id=0,
	)]