Spaces:
Build error
Build error
| import cv2 | |
| import numpy as np | |
| import json | |
| from typing import Tuple, List, Dict, Any | |
| import logging | |
| # Configure logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| def load_detection_models() -> Tuple[Any, Any, List[str]]: | |
| """ | |
| Load face detection and object detection models. | |
| Returns: | |
| Tuple of (face_cascade, object_net, class_names) | |
| """ | |
| try: | |
| # Load face detection cascade | |
| face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml') | |
| # Load object detection model (MobileNet SSD) | |
| model_path = "MobileNetSSD_deploy.prototxt" | |
| weights_path = "MobileNetSSD_deploy.caffemodel" | |
| try: | |
| object_net = cv2.dnn.readNetFromCaffe(model_path, weights_path) | |
| except: | |
| # If model files don't exist, create a dummy network | |
| logger.warning("Object detection model files not found. Using placeholder.") | |
| object_net = None | |
| # COCO class names | |
| class_names = [ | |
| "background", "aeroplane", "bicycle", "bird", "boat", "bottle", | |
| "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", | |
| "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor" | |
| ] | |
| return face_cascade, object_net, class_names | |
| except Exception as e: | |
| logger.error(f"Error loading models: {e}") | |
| return None, None, [] | |
| def process_image( | |
| image: np.ndarray, | |
| face_cascade: Any, | |
| object_net: Any, | |
| class_names: List[str], | |
| enable_face_detection: bool, | |
| enable_object_detection: bool, | |
| face_confidence: float, | |
| object_confidence: float | |
| ) -> Tuple[np.ndarray, List[Dict], List[Dict]]: | |
| """ | |
| Process the input image for face and object detection. | |
| Args: | |
| image: Input image | |
| face_cascade: Face detection cascade | |
| object_net: Object detection network | |
| class_names: List of class names | |
| enable_face_detection: Whether to detect faces | |
| enable_object_detection: Whether to detect objects | |
| face_confidence: Face detection confidence threshold | |
| object_confidence: Object detection confidence threshold | |
| Returns: | |
| Tuple of (processed_image, face_results, object_results) | |
| """ | |
| # Convert RGB to BGR for OpenCV processing | |
| if len(image.shape) == 3 and image.shape[2] == 3: | |
| image_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) | |
| else: | |
| image_bgr = image.copy() | |
| gray = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2GRAY) | |
| face_results = [] | |
| object_results = [] | |
| # Face detection | |
| if enable_face_detection and face_cascade is not None: | |
| faces = face_cascade.detectMultiScale( | |
| gray, | |
| scaleFactor=1.1, | |
| minNeighbors=5, | |
| minSize=(30, 30) | |
| ) | |
| for i, (x, y, w, h) in enumerate(faces): | |
| face_results.append({ | |
| "id": i, | |
| "bbox": [int(x), int(y), int(w), int(h)], | |
| "confidence": 1.0, # Haar cascade doesn't provide confidence | |
| "label": "face" | |
| }) | |
| # Object detection | |
| if enable_object_detection and object_net is not None: | |
| try: | |
| h, w = image_bgr.shape[:2] | |
| blob = cv2.dnn.blobFromImage( | |
| image_bgr, 0.007843, (300, 300), 127.5 | |
| ) | |
| object_net.setInput(blob) | |
| detections = object_net.forward() | |
| for i in range(detections.shape[2]): | |
| confidence = detections[0, 0, i, 2] | |
| if confidence > object_confidence: | |
| idx = int(detections[0, 0, i, 1]) | |
| if idx < len(class_names): | |
| x1 = int(detections[0, 0, i, 3] * w) | |
| y1 = int(detections[0, 0, i, 4] * h) | |
| x2 = int(detections[0, 0, i, 5] * w) | |
| y2 = int(detections[0, 0, i, 6] * h) | |
| object_results.append({ | |
| "id": i, | |
| "bbox": [x1, y1, x2 - x1, y2 - y1], | |
| "confidence": float(confidence), | |
| "label": class_names[idx] | |
| }) | |
| except Exception as e: | |
| logger.warning(f"Object detection failed: {e}") | |
| return image, face_results, object_results | |
| def draw_detections( | |
| image: np.ndarray, | |
| face_results: List[Dict], | |
| object_results: List[Dict], | |
| show_labels: bool, | |
| box_color: str | |
| ) -> np.ndarray: | |
| """ | |
| Draw bounding boxes and labels on the image. | |
| Args: | |
| image: Input image | |
| face_results: Face detection results | |
| object_results: Object detection results | |
| show_labels: Whether to show labels | |
| box_color: Color for bounding boxes | |
| Returns: | |
| Image with drawn detections | |
| """ | |
| # Convert to BGR for OpenCV drawing | |
| if len(image.shape) == 3 and image.shape[2] == 3: | |
| image_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) | |
| else: | |
| image_bgr = image.copy() | |
| # Color mapping | |
| color_map = { | |
| "red": (0, 0, 255), | |
| "green": (0, 255, 0), | |
| "blue": (255, 0, 0), | |
| "yellow": (0, 255, 255), | |
| "purple": (255, 0, 255), | |
| "orange": (0, 165, 255) | |
| } | |
| color = color_map.get(box_color, (0, 0, 255)) | |
| # Draw face detections | |
| for face in face_results: | |
| x, y, w, h = face["bbox"] | |
| cv2.rectangle(image_bgr, (x, y), (x + w, y + h), color, 2) | |
| if show_labels: | |
| label = f"Face {face['id']}" | |
| label_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)[0] | |
| cv2.rectangle( | |
| image_bgr, | |
| (x, y - label_size[1] - 10), | |
| (x + label_size[0], y), | |
| color, | |
| -1 | |
| ) | |
| cv2.putText( | |
| image_bgr, label, (x, y - 5), | |
| cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2 | |
| ) | |
| # Draw object detections | |
| for obj in object_results: | |
| x, y, w, h = obj["bbox"] | |
| cv2.rectangle(image_bgr, (x, y), (x + w, y + h), color, 2) | |
| if show_labels: | |
| label = f"{obj['label']}: {obj['confidence']:.2f}" | |
| label_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)[0] | |
| cv2.rectangle( | |
| image_bgr, | |
| (x, y - label_size[1] - 10), | |
| (x + label_size[0], y), | |
| color, | |
| -1 | |
| ) | |
| cv2.putText( | |
| image_bgr, label, (x, y - 5), | |
| cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2 | |
| ) | |
| # Convert back to RGB | |
| return cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB) | |
| def format_results(results: List[Dict], result_type: str) -> str: | |
| """ | |
| Format detection results as a readable string. | |
| Args: | |
| results: Detection results | |
| result_type: Type of results (face/object) | |
| Returns: | |
| Formatted string | |
| """ | |
| if not results: | |
| return f"No {result_type}s detected" | |
| output = [f"Detected {len(results)} {result_type}s:"] | |
| for result in results: | |
| bbox = result["bbox"] | |
| output.append( | |
| f" - {result_type.capitalize()} {result['id']}: " | |
| f"Position({bbox[0]}, {bbox[1]}), Size({bbox[2]}x{bbox[3]})" | |
| ) | |
| if "confidence" in result: | |
| output.append(f" Confidence: {result['confidence']:.2f}") | |
| if "label" in result and result["label"] != result_type: | |
| output.append(f" Label: {result['label']}") | |
| return "\n".join(output) |