t544h
Fix: Ajout conf=0.25 pour YOLO et prints de debug
f9cf21d
"""
Modern ID Card Detector using YOLOv11 (Ultralytics) or D-FINE.
Replaces: TFLite EfficientNet (axis-aligned, no rotation awareness)
Upgrade path: YOLOv11n → YOLOv11n-OBB → D-FINE-S (fine-tuned)
References:
- YOLOv11: Ultralytics (2024), 2.6M params, ~2ms on T4
- D-FINE-S: arxiv:2410.13842, 10M params, 3.5ms T4 TRT FP16
- IWPOD-Net: arxiv:2509.06246, 1.8M params, direct 4-corner output
"""
import numpy as np
from dataclasses import dataclass
from typing import List, Optional, Tuple
import logging
logger = logging.getLogger(__name__)
@dataclass
class Detection:
"""Single card detection result."""
bbox: np.ndarray # [x1, y1, x2, y2] in pixel coordinates
confidence: float # Detection confidence [0, 1]
class_id: int = 0 # 0 = id_card
corners: Optional[np.ndarray] = None # [4, 2] corner points if available
@property
def center(self) -> Tuple[float, float]:
return ((self.bbox[0] + self.bbox[2]) / 2, (self.bbox[1] + self.bbox[3]) / 2)
@property
def width(self) -> float:
return self.bbox[2] - self.bbox[0]
@property
def height(self) -> float:
return self.bbox[3] - self.bbox[1]
@property
def area(self) -> float:
return self.width * self.height
@property
def aspect_ratio(self) -> float:
return self.width / max(self.height, 1e-6)
def crop_from(self, frame: np.ndarray, padding: float = 0.05) -> np.ndarray:
"""Extract card crop from frame with optional padding."""
h, w = frame.shape[:2]
pad_x = int(self.width * padding)
pad_y = int(self.height * padding)
x1 = max(0, int(self.bbox[0]) - pad_x)
y1 = max(0, int(self.bbox[1]) - pad_y)
x2 = min(w, int(self.bbox[2]) + pad_x)
y2 = min(h, int(self.bbox[3]) + pad_y)
return frame[y1:y2, x1:x2].copy()
class IDCardDetector:
"""
ID card detector using YOLOv11 (default) or D-FINE.
Improvements over TFLite EfficientNet:
- OBB support for rotated cards (YOLOv11-OBB)
- Better small object detection
- Built-in NMS
- Dynamic input sizes
- GPU acceleration
- Easy fine-tuning with Ultralytics ecosystem
"""
def __init__(self, config):
"""
Args:
config: DetectorConfig from enhanced/config.py
"""
self.config = config
self.model = None
self._load_model()
def _load_model(self):
"""Load the detection model."""
if self.config.backend.value == "yolo11":
self._load_yolo()
elif self.config.backend.value == "dfine":
self._load_dfine()
elif self.config.backend.value == "rtdetr":
self._load_rtdetr()
else:
raise ValueError(f"Unknown detector backend: {self.config.backend}")
def _load_yolo(self):
"""Load YOLOv11 model via Ultralytics."""
try:
from ultralytics import YOLO
self.model = YOLO(self.config.model_path)
logger.info(f"Loaded YOLO model from {self.config.model_path}")
except ImportError:
logger.warning("ultralytics not installed. Run: pip install ultralytics. Using mock detector.")
self.model = None
except Exception as e:
logger.warning(f"Could not load YOLO model: {e}. Using mock detector.")
self.model = None
def _load_dfine(self):
"""Load D-FINE model via ONNX Runtime or custom loader."""
try:
import onnxruntime as ort
self.model = ort.InferenceSession(self.config.model_path)
logger.info(f"Loaded D-FINE ONNX model from {self.config.model_path}")
except Exception as e:
logger.warning(f"Could not load D-FINE model: {e}. Using mock detector.")
self.model = None
def _load_rtdetr(self):
"""Load RT-DETR model via HuggingFace transformers."""
try:
from transformers import RTDetrForObjectDetection, RTDetrImageProcessor
import torch
self.processor = RTDetrImageProcessor.from_pretrained("PekingU/rtdetr_r50vd")
self.model = RTDetrForObjectDetection.from_pretrained("PekingU/rtdetr_r50vd")
self.model.eval()
logger.info("Loaded RT-DETR model from PekingU/rtdetr_r50vd")
except Exception as e:
logger.warning(f"Could not load RT-DETR: {e}. Using mock detector.")
self.model = None
def detect(self, frame: np.ndarray) -> List[Detection]:
"""
Detect ID cards in a frame.
Args:
frame: BGR image as numpy array (H, W, 3)
Returns:
List of Detection objects
"""
if self.model is None:
return self._mock_detect(frame)
if self.config.backend.value == "yolo11":
return self._detect_yolo(frame)
elif self.config.backend.value == "rtdetr":
return self._detect_rtdetr(frame)
else:
return self._mock_detect(frame)
def _detect_yolo(self, frame: np.ndarray) -> List[Detection]:
"""Run YOLO detection."""
results = self.model(
frame,
conf=0.30,
iou=self.config.nms_iou_threshold,
imgsz=640,
verbose=False,
)
detections = []
for r in results:
if r.boxes is None:
continue
for box in r.boxes:
bbox = box.xyxy[0].cpu().numpy()
conf = float(box.conf[0])
cls_id = int(box.cls[0])
det = Detection(bbox=bbox, confidence=conf, class_id=cls_id)
# Log de debug pour voir ce que YOLO trouve avant le filtre géométrique
print(f"--- [DEBUG YOLO] Carte trouvée avec confiance: {conf:.2f} | Bbox: {bbox}")
# Apply aspect ratio and area filters
if self._validate_detection(det, frame.shape):
detections.append(det)
else:
print(f"--- [DEBUG FILTER] Carte REJETÉE par les filtres géométriques (Taille/Ratio)")
print(f"--- [DEBUG FINAL] Nombre de cartes retenues après filtres : {len(detections)}")
return detections
def _detect_rtdetr(self, frame: np.ndarray) -> List[Detection]:
"""Run RT-DETR detection via HuggingFace transformers."""
import torch
from PIL import Image
image = Image.fromarray(frame[:, :, ::-1]) # BGR to RGB
inputs = self.processor(images=image, return_tensors="pt")
with torch.no_grad():
outputs = self.model(**inputs)
target_sizes = torch.tensor([(frame.shape[0], frame.shape[1])])
results = self.processor.post_process_object_detection(
outputs, target_sizes=target_sizes, threshold=self.config.confidence_threshold
)
detections = []
for result in results:
for score, label, box in zip(result["scores"], result["labels"], result["boxes"]):
bbox = box.cpu().numpy()
det = Detection(
bbox=bbox,
confidence=float(score),
class_id=int(label),
)
if self._validate_detection(det, frame.shape):
detections.append(det)
return detections
def _validate_detection(self, det: Detection, frame_shape: Tuple[int, ...]) -> bool:
"""
Validate a detection using geometric constraints.
More permissive than old system (old: 1.2-2.0 aspect ratio).
Now handles perspective-distorted cards (apparent aspect ratio varies widely).
"""
h, w = frame_shape[:2]
frame_area = h * w
if det.width < 40 or det.height < 40:
print(f" -> Échec: Carte trop petite ({det.width:.1f}x{det.height:.1f})")
return False
# Area check
area_ratio = det.area / frame_area
if area_ratio < 0.05:
print(f" -> Échec: La carte occupe trop peu de place sur la photo ({area_ratio*100:.1f}%)")
return False
# Aspect ratio check (more permissive for tilted/perspective cards)
ar = det.aspect_ratio
inv_ar = 1.0 / max(ar, 1e-6)
effective_ar = max(ar, inv_ar) # Handle both landscape and portrait
print(f" -> Stats géométriques : Aspect Ratio effectif = {effective_ar:.2f} | Ratio Aire = {area_ratio*100:.1f}%")
if effective_ar < 1.0 or effective_ar > 2.3:
print(f" -> Échec: Aspect Ratio hors limites ({effective_ar:.2f})")
return False
return True
def _mock_detect(self, frame: np.ndarray) -> List[Detection]:
"""
Mock detector for testing without a real model.
Generates a reasonable detection in the center of the frame.
"""
h, w = frame.shape[:2]
# Simulate a card detection in the center
card_w = w * 0.4
card_h = card_w / 1.585 # ISO/IEC 7810 ratio
cx, cy = w / 2, h / 2
return [Detection(
bbox=np.array([cx - card_w/2, cy - card_h/2, cx + card_w/2, cy + card_h/2]),
confidence=0.85,
class_id=0,
)]