import cv2 import numpy as np import json from typing import Tuple, List, Dict, Any import logging # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) def load_detection_models() -> Tuple[Any, Any, List[str]]: """ Load face detection and object detection models. Returns: Tuple of (face_cascade, object_net, class_names) """ try: # Load face detection cascade face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml') # Load object detection model (MobileNet SSD) model_path = "MobileNetSSD_deploy.prototxt" weights_path = "MobileNetSSD_deploy.caffemodel" try: object_net = cv2.dnn.readNetFromCaffe(model_path, weights_path) except: # If model files don't exist, create a dummy network logger.warning("Object detection model files not found. Using placeholder.") object_net = None # COCO class names class_names = [ "background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor" ] return face_cascade, object_net, class_names except Exception as e: logger.error(f"Error loading models: {e}") return None, None, [] def process_image( image: np.ndarray, face_cascade: Any, object_net: Any, class_names: List[str], enable_face_detection: bool, enable_object_detection: bool, face_confidence: float, object_confidence: float ) -> Tuple[np.ndarray, List[Dict], List[Dict]]: """ Process the input image for face and object detection. Args: image: Input image face_cascade: Face detection cascade object_net: Object detection network class_names: List of class names enable_face_detection: Whether to detect faces enable_object_detection: Whether to detect objects face_confidence: Face detection confidence threshold object_confidence: Object detection confidence threshold Returns: Tuple of (processed_image, face_results, object_results) """ # Convert RGB to BGR for OpenCV processing if len(image.shape) == 3 and image.shape[2] == 3: image_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) else: image_bgr = image.copy() gray = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2GRAY) face_results = [] object_results = [] # Face detection if enable_face_detection and face_cascade is not None: faces = face_cascade.detectMultiScale( gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30) ) for i, (x, y, w, h) in enumerate(faces): face_results.append({ "id": i, "bbox": [int(x), int(y), int(w), int(h)], "confidence": 1.0, # Haar cascade doesn't provide confidence "label": "face" }) # Object detection if enable_object_detection and object_net is not None: try: h, w = image_bgr.shape[:2] blob = cv2.dnn.blobFromImage( image_bgr, 0.007843, (300, 300), 127.5 ) object_net.setInput(blob) detections = object_net.forward() for i in range(detections.shape[2]): confidence = detections[0, 0, i, 2] if confidence > object_confidence: idx = int(detections[0, 0, i, 1]) if idx < len(class_names): x1 = int(detections[0, 0, i, 3] * w) y1 = int(detections[0, 0, i, 4] * h) x2 = int(detections[0, 0, i, 5] * w) y2 = int(detections[0, 0, i, 6] * h) object_results.append({ "id": i, "bbox": [x1, y1, x2 - x1, y2 - y1], "confidence": float(confidence), "label": class_names[idx] }) except Exception as e: logger.warning(f"Object detection failed: {e}") return image, face_results, object_results def draw_detections( image: np.ndarray, face_results: List[Dict], object_results: List[Dict], show_labels: bool, box_color: str ) -> np.ndarray: """ Draw bounding boxes and labels on the image. Args: image: Input image face_results: Face detection results object_results: Object detection results show_labels: Whether to show labels box_color: Color for bounding boxes Returns: Image with drawn detections """ # Convert to BGR for OpenCV drawing if len(image.shape) == 3 and image.shape[2] == 3: image_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) else: image_bgr = image.copy() # Color mapping color_map = { "red": (0, 0, 255), "green": (0, 255, 0), "blue": (255, 0, 0), "yellow": (0, 255, 255), "purple": (255, 0, 255), "orange": (0, 165, 255) } color = color_map.get(box_color, (0, 0, 255)) # Draw face detections for face in face_results: x, y, w, h = face["bbox"] cv2.rectangle(image_bgr, (x, y), (x + w, y + h), color, 2) if show_labels: label = f"Face {face['id']}" label_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)[0] cv2.rectangle( image_bgr, (x, y - label_size[1] - 10), (x + label_size[0], y), color, -1 ) cv2.putText( image_bgr, label, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2 ) # Draw object detections for obj in object_results: x, y, w, h = obj["bbox"] cv2.rectangle(image_bgr, (x, y), (x + w, y + h), color, 2) if show_labels: label = f"{obj['label']}: {obj['confidence']:.2f}" label_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)[0] cv2.rectangle( image_bgr, (x, y - label_size[1] - 10), (x + label_size[0], y), color, -1 ) cv2.putText( image_bgr, label, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2 ) # Convert back to RGB return cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB) def format_results(results: List[Dict], result_type: str) -> str: """ Format detection results as a readable string. Args: results: Detection results result_type: Type of results (face/object) Returns: Formatted string """ if not results: return f"No {result_type}s detected" output = [f"Detected {len(results)} {result_type}s:"] for result in results: bbox = result["bbox"] output.append( f" - {result_type.capitalize()} {result['id']}: " f"Position({bbox[0]}, {bbox[1]}), Size({bbox[2]}x{bbox[3]})" ) if "confidence" in result: output.append(f" Confidence: {result['confidence']:.2f}") if "label" in result and result["label"] != result_type: output.append(f" Label: {result['label']}") return "\n".join(output)